From: Fred Griffoul Introduce selftest to verify nested VMX APIC virtualization page cache invalidation and refresh mechanisms for pfncache implementation. The test exercises the nested VMX APIC cache invalidation path through: - L2 guest setup: creates a nested environment where L2 accesses the APIC access page that is cached by KVM using pfncache. - Cache invalidation triggers: a separate update thread periodically invalidates the cached pages using either: - madvise(MADV_DONTNEED) to trigger MMU notifications. - vm_mem_region_move() to trigger memslot changes. The test validates that: - L2 can successfully access APIC page before and after invalidation. - KVM properly handles cache refresh without guest-visible errors. - Both MMU notification and memslot change invalidation paths work correctly. Signed-off-by: Fred Griffoul --- tools/testing/selftests/kvm/Makefile.kvm | 1 + .../selftests/kvm/x86/vmx_apic_update_test.c | 302 ++++++++++++++++++ 2 files changed, 303 insertions(+) create mode 100644 tools/testing/selftests/kvm/x86/vmx_apic_update_test.c diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 90f03f00cb04..5d4505c7f6f0 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -136,6 +136,7 @@ TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test TEST_GEN_PROGS_x86 += x86/triple_fault_event_test TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test TEST_GEN_PROGS_x86 += x86/aperfmperf_test +TEST_GEN_PROGS_x86 += x86/vmx_apic_update_test TEST_GEN_PROGS_x86 += access_tracking_perf_test TEST_GEN_PROGS_x86 += coalesced_io_test TEST_GEN_PROGS_x86 += dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/x86/vmx_apic_update_test.c b/tools/testing/selftests/kvm/x86/vmx_apic_update_test.c new file mode 100644 index 000000000000..22f82cf6dd0c --- /dev/null +++ b/tools/testing/selftests/kvm/x86/vmx_apic_update_test.c @@ -0,0 +1,302 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vmx_apic_update_test + * + * Copyright (C) 2025, mazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Test L2 guest APIC access page writes with concurrent MMU + * notifications and memslot move updates. + */ +#include +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#define VAPIC_GPA 0xc0000000 +#define VAPIC_SLOT 1 + +#define L2_GUEST_STACK_SIZE 64 + +#define L2_DELAY (100) + +static void l2_guest_code(void) +{ + uint32_t *vapic_addr = (uint32_t *) (VAPIC_GPA + 0x80); + + /* Unroll the loop to avoid any compiler side effect */ + + WRITE_ONCE(*vapic_addr, 1 << 0); + udelay(msecs_to_usecs(L2_DELAY)); + + WRITE_ONCE(*vapic_addr, 1 << 1); + udelay(msecs_to_usecs(L2_DELAY)); + + WRITE_ONCE(*vapic_addr, 1 << 2); + udelay(msecs_to_usecs(L2_DELAY)); + + WRITE_ONCE(*vapic_addr, 1 << 3); + udelay(msecs_to_usecs(L2_DELAY)); + + WRITE_ONCE(*vapic_addr, 1 << 4); + udelay(msecs_to_usecs(L2_DELAY)); + + WRITE_ONCE(*vapic_addr, 1 << 5); + udelay(msecs_to_usecs(L2_DELAY)); + + WRITE_ONCE(*vapic_addr, 1 << 6); + udelay(msecs_to_usecs(L2_DELAY)); + + WRITE_ONCE(*vapic_addr, 0); + udelay(msecs_to_usecs(L2_DELAY)); + + /* Exit to L1 */ + vmcall(); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uint32_t control, exit_reason; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* Enable APIC access */ + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + control = vmreadz(SECONDARY_VM_EXEC_CONTROL); + control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + vmwrite(SECONDARY_VM_EXEC_CONTROL, control); + vmwrite(APIC_ACCESS_ADDR, VAPIC_GPA); + + GUEST_SYNC1(0); + GUEST_ASSERT(!vmlaunch()); +again: + exit_reason = vmreadz(VM_EXIT_REASON); + if (exit_reason == EXIT_REASON_APIC_ACCESS) { + uint64_t guest_rip = vmreadz(GUEST_RIP); + uint64_t instr_len = vmreadz(VM_EXIT_INSTRUCTION_LEN); + + vmwrite(GUEST_RIP, guest_rip + instr_len); + GUEST_ASSERT(!vmresume()); + goto again; + } + + GUEST_SYNC1(exit_reason); + GUEST_ASSERT(exit_reason == EXIT_REASON_VMCALL); + GUEST_DONE(); +} + +static const char *progname; +static int update_period_ms = L2_DELAY / 4; + +struct update_control { + pthread_mutex_t mutex; + pthread_cond_t start_cond; + struct kvm_vm *vm; + bool running; + bool started; + int updates; +}; + +static void wait_for_start_signal(struct update_control *ctrl) +{ + pthread_mutex_lock(&ctrl->mutex); + while (!ctrl->started) + pthread_cond_wait(&ctrl->start_cond, &ctrl->mutex); + + pthread_mutex_unlock(&ctrl->mutex); + printf("%s: starting update\n", progname); +} + +static bool is_running(struct update_control *ctrl) +{ + return READ_ONCE(ctrl->running); +} + +static void set_running(struct update_control *ctrl, bool running) +{ + WRITE_ONCE(ctrl->running, running); +} + +static void signal_thread_start(struct update_control *ctrl) +{ + pthread_mutex_lock(&ctrl->mutex); + if (!ctrl->started) { + ctrl->started = true; + pthread_cond_signal(&ctrl->start_cond); + } + pthread_mutex_unlock(&ctrl->mutex); +} + +static void *update_madvise(void *arg) +{ + struct update_control *ctrl = arg; + void *hva; + + wait_for_start_signal(ctrl); + + hva = addr_gpa2hva(ctrl->vm, VAPIC_GPA); + memset(hva, 0x45, ctrl->vm->page_size); + + while (is_running(ctrl)) { + usleep(update_period_ms * 1000); + madvise(hva, ctrl->vm->page_size, MADV_DONTNEED); + ctrl->updates++; + } + + return NULL; +} + +static void *update_move_memslot(void *arg) +{ + struct update_control *ctrl = arg; + uint64_t gpa = VAPIC_GPA; + + wait_for_start_signal(ctrl); + + while (is_running(ctrl)) { + usleep(update_period_ms * 1000); + gpa += 0x10000; + vm_mem_region_move(ctrl->vm, VAPIC_SLOT, gpa); + ctrl->updates++; + } + + return NULL; +} + +static void run(void * (*update)(void *), const char *name) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + struct vmx_pages *vmx; + struct update_control ctrl; + struct ucall uc; + vm_vaddr_t vmx_pages_gva; + pthread_t update_thread; + bool done = false; + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + + /* Allocate VMX pages */ + vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); + + /* Allocate memory and create VAPIC memslot */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, VAPIC_GPA, + VAPIC_SLOT, 1, 0); + + /* Allocate guest page table */ + virt_map(vm, VAPIC_GPA, VAPIC_GPA, 1); + + /* Set up nested EPT */ + prepare_eptp(vmx, vm, 0); + nested_map_memslot(vmx, vm, 0); + nested_map_memslot(vmx, vm, VAPIC_SLOT); + nested_map(vmx, vm, VAPIC_GPA, VAPIC_GPA, vm->page_size); + + vcpu_args_set(vcpu, 1, vmx_pages_gva); + + pthread_mutex_init(&ctrl.mutex, NULL); + pthread_cond_init(&ctrl.start_cond, NULL); + ctrl.vm = vm; + ctrl.running = true; + ctrl.started = false; + ctrl.updates = 0; + + pthread_create(&update_thread, NULL, update, &ctrl); + + printf("%s: running %s (tsc_khz %lu)\n", progname, name, guest_tsc_khz); + + while (!done) { + vcpu_run(vcpu); + + switch (vcpu->run->exit_reason) { + case KVM_EXIT_IO: + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + printf("%s: sync(%ld)\n", progname, uc.args[0]); + if (uc.args[0] == 0) + signal_thread_start(&ctrl); + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + /* NOT REACHED */ + case UCALL_DONE: + done = true; + break; + default: + TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); + } + break; + case KVM_EXIT_MMIO: + /* Handle APIC MMIO access after memslot move */ + printf + ("%s: APIC MMIO access at 0x%llx (memslot move effect)\n", + progname, vcpu->run->mmio.phys_addr); + break; + default: + TEST_FAIL("%s: Unexpected exit reason: %d (flags 0x%x)", + progname, + vcpu->run->exit_reason, vcpu->run->flags); + } + } + + set_running(&ctrl, false); + if (!ctrl.started) + signal_thread_start(&ctrl); + pthread_join(update_thread, NULL); + printf("%s: completed with %d updates\n", progname, ctrl.updates); + + pthread_mutex_destroy(&ctrl.mutex); + pthread_cond_destroy(&ctrl.start_cond); + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + int opt_madvise = 0; + int opt_memslot_move = 0; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has_ept()); + + if (argc == 1) { + opt_madvise = 1; + opt_memslot_move = 1; + } else { + int opt; + + while ((opt = getopt(argc, argv, "amp:")) != -1) { + switch (opt) { + case 'a': + opt_madvise = 1; + break; + case 'm': + opt_memslot_move = 1; + break; + case 'p': + update_period_ms = atoi(optarg); + break; + default: + exit(1); + } + } + } + + TEST_ASSERT(opt_madvise + || opt_memslot_move, "No update test configured"); + + progname = argv[0]; + + if (opt_madvise) + run(update_madvise, "madvise"); + + if (opt_memslot_move) + run(update_move_memslot, "move memslot"); + + return 0; +} -- 2.51.0