From: Fangyu Yu riscv_kexec_relocate (copied into control_code_buffer) uses an stvec trick to drop the MMU and land on the PA of the next loop label. Under VS-mode KVM cannot emulate this single-step transition and the VCPU dies with "kvm run failed Operation not supported". Route normal kexec through riscv_kexec_relocate_entry, the trampoline wrapper added in the previous patch. It drops SATP with PC already on a PA, then hands off to control_code_buffer where the relocate body runs with SATP=0. Drop the stvec trick from the relocate body and pass first_ind_entry as a physical address since the body now starts with SATP=0. The ".align 2" plus filler "nop" that ensured the PA of the loop top was 4-byte aligned -- required because the legacy stvec trick wrote that PA into stvec.BASE, whose low two bits are MODE and are discarded by the hardware -- is no longer load-bearing and is removed as well. Signed-off-by: Fangyu Yu --- arch/riscv/kernel/kexec_relocate.S | 26 ++++++-------------------- arch/riscv/kernel/machine_kexec.c | 27 +++++++++++++++++++-------- 2 files changed, 25 insertions(+), 28 deletions(-) diff --git a/arch/riscv/kernel/kexec_relocate.S b/arch/riscv/kernel/kexec_relocate.S index 6c624560c9ac..7ffb83ea45fc 100644 --- a/arch/riscv/kernel/kexec_relocate.S +++ b/arch/riscv/kernel/kexec_relocate.S @@ -34,27 +34,13 @@ SYM_CODE_START(riscv_kexec_relocate) csrw CSR_SIP, zero /* - * When we switch SATP.MODE to "Bare" we'll only - * play with physical addresses. However the first time - * we try to jump somewhere, the offset on the jump - * will be relative to pc which will still be on VA. To - * deal with this we set stvec to the physical address at - * the start of the loop below so that we jump there in - * any case. + * The trampoline wrapper (riscv_kexec_relocate_entry) has already + * dropped the MMU and handed control to us at this PA copy of the + * relocate code. From here on the entire loop runs with SATP=0 and + * every address (s0, s5, source/dest pointers) is a physical one. */ - la s6, 1f - sub s6, s6, s4 - csrw CSR_STVEC, s6 - - /* - * With C-extension, here we get 42 Bytes and the next - * .align directive would pad zeros here up to 44 Bytes. - * So manually put a nop here to avoid zeros padding. - */ - nop /* Process entries in a loop */ -.align 2 1: REG_L t0, 0(s0) /* t0 = *image->entry */ addi s0, s0, RISCV_SZPTR /* image->entry++ */ @@ -70,8 +56,8 @@ SYM_CODE_START(riscv_kexec_relocate) andi t1, t0, 0x2 beqz t1, 2f andi s0, t0, ~0x2 - csrw CSR_SATP, zero - jr s6 + /* MMU is already off; the entry wrapper handled the transition. */ + j 1b 2: /* IND_DONE entry ? -> jump to done label */ diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index 71688c63af65..82fcb84a03ec 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -164,9 +164,6 @@ machine_kexec_prepare(struct kimage *image) memcpy(control_code_buffer, riscv_kexec_relocate, riscv_kexec_relocate_size); - /* Mark the control page executable */ - set_memory_x((unsigned long) control_code_buffer, 1); - WRITE_ONCE(riscv_kexec_relocate_entry_pa, __pa_symbol(&riscv_kexec_relocate_entry)); } else { @@ -262,11 +259,15 @@ machine_kexec(struct kimage *image) { struct kimage_arch *internal = &image->arch; unsigned long jump_addr = (unsigned long) image->start; - unsigned long first_ind_entry = (unsigned long) &image->head; + /* + * The relocate body runs entirely with the MMU off (the wrapper + * drops SATP before jumping into control_code_buffer), so the very + * first entry must be a physical address. + */ + unsigned long first_ind_entry = __pa(&image->head); unsigned long this_cpu_id = __smp_processor_id(); unsigned long this_hart_id = cpuid_to_hartid_map(this_cpu_id); unsigned long fdt_addr = internal->fdt_addr; - void *control_code_buffer = page_address(image->control_code_page); riscv_kexec_method kexec_method = NULL; #ifdef CONFIG_SMP @@ -274,10 +275,20 @@ machine_kexec(struct kimage *image) "Some CPUs may be stale, kdump will be unreliable.\n"); #endif - if (image->type != KEXEC_TYPE_CRASH) - kexec_method = control_code_buffer; - else + if (image->type != KEXEC_TYPE_CRASH) { + kexec_method = (riscv_kexec_method) &riscv_kexec_relocate_entry; + /* + * Publish the per-image control_code_buffer PA at dispatch + * time rather than in machine_kexec_prepare(). machine_kexec() + * only runs once the image has been fully loaded and committed + * as kexec_image, so the global cannot be left pointing at a + * page freed by a failed load. + */ + WRITE_ONCE(riscv_kexec_cc_buffer_pa, + __pa(page_address(image->control_code_page))); + } else { kexec_method = (riscv_kexec_method) &riscv_kexec_norelocate; + } pr_notice("Will call new kernel at %08lx from hart id %lx\n", jump_addr, this_hart_id); -- 2.50.1