Add a signal_exits counter for s390, as exists on arm64, loongarch, mips, powerpc, riscv and x86. This is used by kvm_handle_signal_exit(), which we will use when we later enable CONFIG_VIRT_XFER_TO_GUEST_WORK. Signed-off-by: Andrew Donnellan --- arch/s390/include/asm/kvm_host.h | 1 + arch/s390/kvm/kvm-s390.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index c2ba3d4398c5371526ddfd53b43607c00abc35a1..1b08a250fb341f7bd2d19810392c1c6e21673b64 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -146,6 +146,7 @@ struct kvm_vcpu_stat { u64 instruction_diagnose_500; u64 instruction_diagnose_other; u64 pfault_sync; + u64 signal_exits; }; #define PGM_OPERATION 0x01 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 16ba04062854a99ab7d48ac427b690006ea8e7eb..fa6b5150ca31e4d9f0bdafabc1fb1d90ef3f3d0d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -185,7 +185,8 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), - STATS_DESC_COUNTER(VCPU, pfault_sync) + STATS_DESC_COUNTER(VCPU, pfault_sync), + STATS_DESC_COUNTER(VCPU, signal_exits) }; const struct kvm_stats_header kvm_vcpu_stats_header = { @@ -5364,6 +5365,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (signal_pending(current) && !rc) { kvm_run->exit_reason = KVM_EXIT_INTR; + vcpu->stat.signal_exits++; rc = -EINTR; } -- 2.52.0 From: Heiko Carstens Move enabling and disabling of interrupts around the SIE instruction to entry code. Enabling interrupts only after the __TI_sie flag has been set guarantees that the SIE instruction is not executed if an interrupt happens between enabling interrupts and the execution of the SIE instruction. Interrupt handlers and machine check handler forward the PSW to the sie_exit label in such cases. This is a prerequisite for VIRT_XFER_TO_GUEST_WORK to prevent that guest context is entered when e.g. a scheduler IPI, indicating that a reschedule is required, happens right before the SIE instruction, which could lead to long delays. Signed-off-by: Heiko Carstens Tested-by: Andrew Donnellan Signed-off-by: Andrew Donnellan --- arch/s390/include/asm/stacktrace.h | 1 + arch/s390/kernel/asm-offsets.c | 1 + arch/s390/kernel/entry.S | 2 ++ arch/s390/kvm/kvm-s390.c | 5 ----- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index 810a6b9d96280f73311de873ad180c59a0cfbd5f..c9ae680a28af910c4703eee179be4db6c1ec9ad1 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -66,6 +66,7 @@ struct stack_frame { unsigned long sie_flags; unsigned long sie_control_block_phys; unsigned long sie_guest_asce; + unsigned long sie_irq; }; }; unsigned long gprs[10]; diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index a8915663e917faed4551276b64013ee073662cc9..730449f464aff25761264b00d63d92e907f17f78 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -64,6 +64,7 @@ int main(void) OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags); OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys); OFFSET(__SF_SIE_GUEST_ASCE, stack_frame, sie_guest_asce); + OFFSET(__SF_SIE_IRQ, stack_frame, sie_irq); DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame)); BLANK(); OFFSET(__SFUSER_BACKCHAIN, stack_frame_user, back_chain); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 75b0fbb236d05f420b20cac6bac925e8ac36fa68..e906f4ab6cf35e53061a27192911629c10c347ed 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -189,6 +189,7 @@ SYM_FUNC_START(__sie64a) mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags lmg %r0,%r13,0(%r4) # load guest gprs 0-13 mvi __TI_sie(%r14),1 + stosm __SF_SIE_IRQ(%r15),0x03 # enable interrupts lctlg %c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now @@ -212,6 +213,7 @@ SYM_FUNC_START(__sie64a) lg %r14,__LC_CURRENT(%r14) mvi __TI_sie(%r14),0 SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL) + stnsm __SF_SIE_IRQ(%r15),0xfc # disable interrupts lg %r14,__SF_SIE_SAVEAREA(%r15) # load guest register save area stmg %r0,%r13,0(%r14) # save guest gprs 0-13 xgr %r0,%r0 # clear guest registers to diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index fa6b5150ca31e4d9f0bdafabc1fb1d90ef3f3d0d..3cad08662b3d80aaf6f5f8891fc08b383c3c44d4 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -5075,13 +5075,8 @@ int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb, * The guest_state_{enter,exit}_irqoff() functions inform lockdep and * tracing that entry to the guest will enable host IRQs, and exit from * the guest will disable host IRQs. - * - * We must not use lockdep/tracing/RCU in this critical section, so we - * use the low-level arch_local_irq_*() helpers to enable/disable IRQs. */ - arch_local_irq_enable(); ret = sie64a(scb, gprs, gasce); - arch_local_irq_disable(); guest_state_exit_irqoff(); -- 2.52.0 Switch to using the generic infrastructure to check for and handle pending work before transitioning into guest mode. xfer_to_guest_mode_handle_work() does a few more things than the current code does when deciding whether or not to exit the __vcpu_run() loop. The exittime tests from kvm-unit-tests, in my tests, were within a few percent compared to before this series, which is within noise tolerance. Co-developed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Andrew Donnellan --- v2: if work is handled, recheck for outstanding work with interrupts disabled before entering guest (Heiko) The way I've implemented this, I do the check between vcpu_pre_run() and entering the guest, and bail out of the loop if kvm_xfer_to_guest_mode_handle_work() returns nonzero, without calling vcpu_post_run(). My impression is that this is safe, but it does mean there is an sie_enter vcpu event and trace event which isn't matched with corresponding exit events. Is this a problem? --- arch/s390/kvm/Kconfig | 1 + arch/s390/kvm/kvm-s390.c | 25 ++++++++++++++++++------- arch/s390/kvm/vsie.c | 18 +++++++++++++----- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index cae908d645501ef7eb4edbe87b8431f6499370a4..0ca9d6587243c98034d086c0ebd4ef085e504faf 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -30,6 +30,7 @@ config KVM select HAVE_KVM_NO_POLL select KVM_VFIO select MMU_NOTIFIER + select VIRT_XFER_TO_GUEST_WORK help Support hosting paravirtualized guest machines using the SIE virtualization capability on the mainframe. This should work diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3cad08662b3d80aaf6f5f8891fc08b383c3c44d4..34a4b8d112d4d2572336200feff04ea395fa70c6 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -14,6 +14,7 @@ #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include +#include #include #include #include @@ -4788,9 +4789,6 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; - if (need_resched()) - schedule(); - if (!kvm_is_ucontrol(vcpu->kvm)) { rc = kvm_s390_deliver_pending_interrupts(vcpu); if (rc || guestdbg_exit_pending(vcpu)) @@ -5095,12 +5093,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) */ kvm_vcpu_srcu_read_lock(vcpu); - do { + while (true) { rc = vcpu_pre_run(vcpu); + kvm_vcpu_srcu_read_unlock(vcpu); if (rc || guestdbg_exit_pending(vcpu)) break; - kvm_vcpu_srcu_read_unlock(vcpu); /* * As PF_VCPU will be used in fault handler, between * guest_timing_enter_irqoff and guest_timing_exit_irqoff @@ -5112,7 +5110,17 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) sizeof(sie_page->pv_grregs)); } +xfer_to_guest_mode_check: local_irq_disable(); + xfer_to_guest_mode_prepare(); + if (xfer_to_guest_mode_work_pending()) { + local_irq_enable(); + rc = kvm_xfer_to_guest_mode_handle_work(vcpu); + if (rc) + break; + goto xfer_to_guest_mode_check; + } + guest_timing_enter_irqoff(); __disable_cpu_timer_accounting(vcpu); @@ -5142,9 +5150,12 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) kvm_vcpu_srcu_read_lock(vcpu); rc = vcpu_post_run(vcpu, exit_reason); - } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc); + if (rc || guestdbg_exit_pending(vcpu)) { + kvm_vcpu_srcu_read_unlock(vcpu); + break; + } + }; - kvm_vcpu_srcu_read_unlock(vcpu); return rc; } diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 347268f89f2f186bea623a3adff7376cabc305b2..fb630fba822efadbdb959ed646b45f00e138898f 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1180,12 +1180,23 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) current->thread.gmap_int_code = 0; barrier(); if (!kvm_s390_vcpu_sie_inhibited(vcpu)) { +xfer_to_guest_mode_check: local_irq_disable(); + xfer_to_guest_mode_prepare(); + if (xfer_to_guest_mode_work_pending()) { + local_irq_enable(); + rc = kvm_xfer_to_guest_mode_handle_work(vcpu); + if (rc) + goto skip_sie; + goto xfer_to_guest_mode_check; + } guest_timing_enter_irqoff(); rc = kvm_s390_enter_exit_sie(scb_s, vcpu->run->s.regs.gprs, vsie_page->gmap->asce); guest_timing_exit_irqoff(); local_irq_enable(); } + +skip_sie: barrier(); vcpu->arch.sie_block->prog0c &= ~PROG_IN_SIE; @@ -1345,13 +1356,11 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) * but rewind the PSW to re-enter SIE once that's completed * instead of passing a "no action" intercept to the guest. */ - if (signal_pending(current) || - kvm_s390_vcpu_has_irq(vcpu, 0) || + if (kvm_s390_vcpu_has_irq(vcpu, 0) || kvm_s390_vcpu_sie_inhibited(vcpu)) { kvm_s390_rewind_psw(vcpu, 4); break; } - cond_resched(); } if (rc == -EFAULT) { @@ -1483,8 +1492,7 @@ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu) if (unlikely(scb_addr & 0x1ffUL)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - if (signal_pending(current) || kvm_s390_vcpu_has_irq(vcpu, 0) || - kvm_s390_vcpu_sie_inhibited(vcpu)) { + if (kvm_s390_vcpu_has_irq(vcpu, 0) || kvm_s390_vcpu_sie_inhibited(vcpu)) { kvm_s390_rewind_psw(vcpu, 4); return 0; } -- 2.52.0