If we directly compile the switch.S file into the kernel, the address of the kvm_exc_entry function will definitely be within the DMW memory area. Therefore, we will no longer need to perform a copy relocation of kvm_exc_entry. Based on the above description, compile switch.S directly into the kernel, and then remove the copy relocation execution logic for the kvm_exc_entry function. Cc: stable@vger.kernel.org Signed-off-by: Xianglai Li --- Cc: Huacai Chen Cc: WANG Xuerui Cc: Tianrui Zhao Cc: Bibo Mao Cc: Charlie Jenkins Cc: Xianglai Li Cc: Thomas Gleixner Cc: Tiezhu Yang arch/loongarch/Kbuild | 2 +- arch/loongarch/include/asm/asm-prototypes.h | 21 +++++++++++++ arch/loongarch/include/asm/kvm_host.h | 3 -- arch/loongarch/kvm/Makefile | 2 +- arch/loongarch/kvm/main.c | 35 ++------------------- arch/loongarch/kvm/switch.S | 22 ++++++++++--- 6 files changed, 43 insertions(+), 42 deletions(-) diff --git a/arch/loongarch/Kbuild b/arch/loongarch/Kbuild index beb8499dd8ed..1c7a0dbe5e72 100644 --- a/arch/loongarch/Kbuild +++ b/arch/loongarch/Kbuild @@ -3,7 +3,7 @@ obj-y += mm/ obj-y += net/ obj-y += vdso/ -obj-$(CONFIG_KVM) += kvm/ +obj-$(subst m,y,$(CONFIG_KVM)) += kvm/ # for cleaning subdir- += boot diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h index 704066b4f736..e8ce153691e5 100644 --- a/arch/loongarch/include/asm/asm-prototypes.h +++ b/arch/loongarch/include/asm/asm-prototypes.h @@ -20,3 +20,24 @@ asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_ struct pt_regs *regs, int (*fn)(void *), void *fn_arg); + +struct kvm_run; +struct kvm_vcpu; + +void kvm_exc_entry(void); +int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu); + +struct loongarch_fpu; + +#ifdef CONFIG_CPU_HAS_LSX +void kvm_save_lsx(struct loongarch_fpu *fpu); +void kvm_restore_lsx(struct loongarch_fpu *fpu); +#endif + +#ifdef CONFIG_CPU_HAS_LASX +void kvm_save_lasx(struct loongarch_fpu *fpu); +void kvm_restore_lasx(struct loongarch_fpu *fpu); +#endif + +void kvm_save_fpu(struct loongarch_fpu *fpu); +void kvm_restore_fpu(struct loongarch_fpu *fpu); diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index e4fe5b8e8149..1a1be10e3803 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -85,7 +85,6 @@ struct kvm_context { struct kvm_world_switch { int (*exc_entry)(void); int (*enter_guest)(struct kvm_run *run, struct kvm_vcpu *vcpu); - unsigned long page_order; }; #define MAX_PGTABLE_LEVELS 4 @@ -347,8 +346,6 @@ void kvm_exc_entry(void); int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu); extern unsigned long vpid_mask; -extern const unsigned long kvm_exception_size; -extern const unsigned long kvm_enter_guest_size; extern struct kvm_world_switch *kvm_loongarch_ops; #define SW_GCSR (1 << 0) diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile index cb41d9265662..fe665054f824 100644 --- a/arch/loongarch/kvm/Makefile +++ b/arch/loongarch/kvm/Makefile @@ -11,7 +11,7 @@ kvm-y += exit.o kvm-y += interrupt.o kvm-y += main.o kvm-y += mmu.o -kvm-y += switch.o +obj-y += switch.o kvm-y += timer.o kvm-y += tlb.o kvm-y += vcpu.o diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index 80ea63d465b8..67d234540ed4 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -340,8 +340,7 @@ void kvm_arch_disable_virtualization_cpu(void) static int kvm_loongarch_env_init(void) { - int cpu, order, ret; - void *addr; + int cpu, ret; struct kvm_context *context; vmcs = alloc_percpu(struct kvm_context); @@ -357,30 +356,8 @@ static int kvm_loongarch_env_init(void) return -ENOMEM; } - /* - * PGD register is shared between root kernel and kvm hypervisor. - * So world switch entry should be in DMW area rather than TLB area - * to avoid page fault reenter. - * - * In future if hardware pagetable walking is supported, we won't - * need to copy world switch code to DMW area. - */ - order = get_order(kvm_exception_size + kvm_enter_guest_size); - addr = (void *)__get_free_pages(GFP_KERNEL, order); - if (!addr) { - free_percpu(vmcs); - vmcs = NULL; - kfree(kvm_loongarch_ops); - kvm_loongarch_ops = NULL; - return -ENOMEM; - } - - memcpy(addr, kvm_exc_entry, kvm_exception_size); - memcpy(addr + kvm_exception_size, kvm_enter_guest, kvm_enter_guest_size); - flush_icache_range((unsigned long)addr, (unsigned long)addr + kvm_exception_size + kvm_enter_guest_size); - kvm_loongarch_ops->exc_entry = addr; - kvm_loongarch_ops->enter_guest = addr + kvm_exception_size; - kvm_loongarch_ops->page_order = order; + kvm_loongarch_ops->exc_entry = (void *)kvm_exc_entry; + kvm_loongarch_ops->enter_guest = (void *)kvm_enter_guest; vpid_mask = read_csr_gstat(); vpid_mask = (vpid_mask & CSR_GSTAT_GIDBIT) >> CSR_GSTAT_GIDBIT_SHIFT; @@ -414,16 +391,10 @@ static int kvm_loongarch_env_init(void) static void kvm_loongarch_env_exit(void) { - unsigned long addr; - if (vmcs) free_percpu(vmcs); if (kvm_loongarch_ops) { - if (kvm_loongarch_ops->exc_entry) { - addr = (unsigned long)kvm_loongarch_ops->exc_entry; - free_pages(addr, kvm_loongarch_ops->page_order); - } kfree(kvm_loongarch_ops); } diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S index f1768b7a6194..93845ce53651 100644 --- a/arch/loongarch/kvm/switch.S +++ b/arch/loongarch/kvm/switch.S @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -100,10 +101,18 @@ * - is still in guest mode, such as pgd table/vmid registers etc, * - will fix with hw page walk enabled in future * load kvm_vcpu from reserved CSR KVM_VCPU_KS, and save a2 to KVM_TEMP_KS + * + * PGD register is shared between root kernel and kvm hypervisor. + * So world switch entry should be in DMW area rather than TLB area + * to avoid page fault reenter. + * + * In future if hardware pagetable walking is supported, we won't + * need to copy world switch code to DMW area. */ .text .cfi_sections .debug_frame SYM_CODE_START(kvm_exc_entry) + .p2align PAGE_SHIFT UNWIND_HINT_UNDEFINED csrwr a2, KVM_TEMP_KS csrrd a2, KVM_VCPU_KS @@ -190,8 +199,8 @@ ret_to_host: kvm_restore_host_gpr a2 jr ra -SYM_INNER_LABEL(kvm_exc_entry_end, SYM_L_LOCAL) SYM_CODE_END(kvm_exc_entry) +EXPORT_SYMBOL(kvm_exc_entry) /* * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu) @@ -215,8 +224,8 @@ SYM_FUNC_START(kvm_enter_guest) /* Save kvm_vcpu to kscratch */ csrwr a1, KVM_VCPU_KS kvm_switch_to_guest -SYM_INNER_LABEL(kvm_enter_guest_end, SYM_L_LOCAL) SYM_FUNC_END(kvm_enter_guest) +EXPORT_SYMBOL(kvm_enter_guest) SYM_FUNC_START(kvm_save_fpu) fpu_save_csr a0 t1 @@ -224,6 +233,7 @@ SYM_FUNC_START(kvm_save_fpu) fpu_save_cc a0 t1 t2 jr ra SYM_FUNC_END(kvm_save_fpu) +EXPORT_SYMBOL(kvm_save_fpu) SYM_FUNC_START(kvm_restore_fpu) fpu_restore_double a0 t1 @@ -231,6 +241,7 @@ SYM_FUNC_START(kvm_restore_fpu) fpu_restore_cc a0 t1 t2 jr ra SYM_FUNC_END(kvm_restore_fpu) +EXPORT_SYMBOL(kvm_restore_fpu) #ifdef CONFIG_CPU_HAS_LSX SYM_FUNC_START(kvm_save_lsx) @@ -239,6 +250,7 @@ SYM_FUNC_START(kvm_save_lsx) lsx_save_data a0 t1 jr ra SYM_FUNC_END(kvm_save_lsx) +EXPORT_SYMBOL(kvm_save_lsx) SYM_FUNC_START(kvm_restore_lsx) lsx_restore_data a0 t1 @@ -246,6 +258,7 @@ SYM_FUNC_START(kvm_restore_lsx) fpu_restore_csr a0 t1 t2 jr ra SYM_FUNC_END(kvm_restore_lsx) +EXPORT_SYMBOL(kvm_restore_lsx) #endif #ifdef CONFIG_CPU_HAS_LASX @@ -255,6 +268,7 @@ SYM_FUNC_START(kvm_save_lasx) lasx_save_data a0 t1 jr ra SYM_FUNC_END(kvm_save_lasx) +EXPORT_SYMBOL(kvm_save_lasx) SYM_FUNC_START(kvm_restore_lasx) lasx_restore_data a0 t1 @@ -262,10 +276,8 @@ SYM_FUNC_START(kvm_restore_lasx) fpu_restore_csr a0 t1 t2 jr ra SYM_FUNC_END(kvm_restore_lasx) +EXPORT_SYMBOL(kvm_restore_lasx) #endif - .section ".rodata" -SYM_DATA(kvm_exception_size, .quad kvm_exc_entry_end - kvm_exc_entry) -SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest) #ifdef CONFIG_CPU_HAS_LBT STACK_FRAME_NON_STANDARD kvm_restore_fpu -- 2.39.1 Insert the appropriate UNWIND macro definition into the kvm_exc_entry in the assembly function to guide the generation of correct ORC table entries, thereby solving the timeout problem of loading the livepatch-sample module on a physical machine running multiple vcpus virtual machines. While solving the above problems, we have gained an additional benefit, that is, we can obtain more call stack information Stack information that can be obtained before the problem is fixed: [<0>] kvm_vcpu_block+0x88/0x120 [kvm] [<0>] kvm_vcpu_halt+0x68/0x580 [kvm] [<0>] kvm_emu_idle+0xd4/0xf0 [kvm] [<0>] kvm_handle_gspr+0x7c/0x700 [kvm] [<0>] kvm_handle_exit+0x160/0x270 [kvm] [<0>] kvm_exc_entry+0x100/0x1e0 Stack information that can be obtained after the problem is fixed: [<0>] kvm_vcpu_block+0x88/0x120 [kvm] [<0>] kvm_vcpu_halt+0x68/0x580 [kvm] [<0>] kvm_emu_idle+0xd4/0xf0 [kvm] [<0>] kvm_handle_gspr+0x7c/0x700 [kvm] [<0>] kvm_handle_exit+0x160/0x270 [kvm] [<0>] kvm_exc_entry+0x104/0x1e4 [<0>] kvm_enter_guest+0x38/0x11c [<0>] kvm_arch_vcpu_ioctl_run+0x26c/0x498 [kvm] [<0>] kvm_vcpu_ioctl+0x200/0xcf8 [kvm] [<0>] sys_ioctl+0x498/0xf00 [<0>] do_syscall+0x98/0x1d0 [<0>] handle_syscall+0xb8/0x158 Cc: stable@vger.kernel.org Signed-off-by: Xianglai Li --- Cc: Huacai Chen Cc: WANG Xuerui Cc: Tianrui Zhao Cc: Bibo Mao Cc: Charlie Jenkins Cc: Xianglai Li Cc: Thomas Gleixner Cc: Tiezhu Yang arch/loongarch/kvm/switch.S | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S index 93845ce53651..a3ea9567dbe5 100644 --- a/arch/loongarch/kvm/switch.S +++ b/arch/loongarch/kvm/switch.S @@ -10,6 +10,7 @@ #include #include #include +#include #define HGPR_OFFSET(x) (PT_R0 + 8*x) #define GGPR_OFFSET(x) (KVM_ARCH_GGPR + 8*x) @@ -110,9 +111,9 @@ * need to copy world switch code to DMW area. */ .text + .p2align PAGE_SHIFT .cfi_sections .debug_frame SYM_CODE_START(kvm_exc_entry) - .p2align PAGE_SHIFT UNWIND_HINT_UNDEFINED csrwr a2, KVM_TEMP_KS csrrd a2, KVM_VCPU_KS @@ -170,6 +171,7 @@ SYM_CODE_START(kvm_exc_entry) /* restore per cpu register */ ld.d u0, a2, KVM_ARCH_HPERCPU addi.d sp, sp, -PT_SIZE + UNWIND_HINT_REGS /* Prepare handle exception */ or a0, s0, zero @@ -200,7 +202,7 @@ ret_to_host: jr ra SYM_CODE_END(kvm_exc_entry) -EXPORT_SYMBOL(kvm_exc_entry) +EXPORT_SYMBOL_FOR_KVM(kvm_exc_entry) /* * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu) @@ -215,6 +217,14 @@ SYM_FUNC_START(kvm_enter_guest) /* Save host GPRs */ kvm_save_host_gpr a2 + /* + * The csr_era member variable of the pt_regs structure is required + * for unwinding orc to perform stack traceback, so we need to put + * pc into csr_era member variable here. + */ + pcaddi t0, 0 + st.d t0, a2, PT_ERA + addi.d a2, a1, KVM_VCPU_ARCH st.d sp, a2, KVM_ARCH_HSP st.d tp, a2, KVM_ARCH_HTP @@ -225,7 +235,7 @@ SYM_FUNC_START(kvm_enter_guest) csrwr a1, KVM_VCPU_KS kvm_switch_to_guest SYM_FUNC_END(kvm_enter_guest) -EXPORT_SYMBOL(kvm_enter_guest) +EXPORT_SYMBOL_FOR_KVM(kvm_enter_guest) SYM_FUNC_START(kvm_save_fpu) fpu_save_csr a0 t1 @@ -233,7 +243,7 @@ SYM_FUNC_START(kvm_save_fpu) fpu_save_cc a0 t1 t2 jr ra SYM_FUNC_END(kvm_save_fpu) -EXPORT_SYMBOL(kvm_save_fpu) +EXPORT_SYMBOL_FOR_KVM(kvm_save_fpu) SYM_FUNC_START(kvm_restore_fpu) fpu_restore_double a0 t1 @@ -241,7 +251,7 @@ SYM_FUNC_START(kvm_restore_fpu) fpu_restore_cc a0 t1 t2 jr ra SYM_FUNC_END(kvm_restore_fpu) -EXPORT_SYMBOL(kvm_restore_fpu) +EXPORT_SYMBOL_FOR_KVM(kvm_restore_fpu) #ifdef CONFIG_CPU_HAS_LSX SYM_FUNC_START(kvm_save_lsx) @@ -250,7 +260,7 @@ SYM_FUNC_START(kvm_save_lsx) lsx_save_data a0 t1 jr ra SYM_FUNC_END(kvm_save_lsx) -EXPORT_SYMBOL(kvm_save_lsx) +EXPORT_SYMBOL_FOR_KVM(kvm_save_lsx) SYM_FUNC_START(kvm_restore_lsx) lsx_restore_data a0 t1 @@ -258,7 +268,7 @@ SYM_FUNC_START(kvm_restore_lsx) fpu_restore_csr a0 t1 t2 jr ra SYM_FUNC_END(kvm_restore_lsx) -EXPORT_SYMBOL(kvm_restore_lsx) +EXPORT_SYMBOL_FOR_KVM(kvm_restore_lsx) #endif #ifdef CONFIG_CPU_HAS_LASX @@ -268,7 +278,7 @@ SYM_FUNC_START(kvm_save_lasx) lasx_save_data a0 t1 jr ra SYM_FUNC_END(kvm_save_lasx) -EXPORT_SYMBOL(kvm_save_lasx) +EXPORT_SYMBOL_FOR_KVM(kvm_save_lasx) SYM_FUNC_START(kvm_restore_lasx) lasx_restore_data a0 t1 @@ -276,7 +286,7 @@ SYM_FUNC_START(kvm_restore_lasx) fpu_restore_csr a0 t1 t2 jr ra SYM_FUNC_END(kvm_restore_lasx) -EXPORT_SYMBOL(kvm_restore_lasx) +EXPORT_SYMBOL_FOR_KVM(kvm_restore_lasx) #endif #ifdef CONFIG_CPU_HAS_LBT -- 2.39.1