Display the address as hex if the asserts for the vmxon_pa and vmcs12_pa fail, and assert that the flags are 0 as expected. Reviewed-by: Jim Mattson Signed-off-by: Yosry Ahmed --- .../selftests/kvm/x86/vmx_set_nested_state_test.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c index 67a62a5a88951..c4c400d2824c1 100644 --- a/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c @@ -241,8 +241,14 @@ void test_vmx_nested_state(struct kvm_vcpu *vcpu) TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz, "Size must be between %ld and %d. The size returned was %d.", sizeof(*state), state_sz, state->size); - TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull."); - TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull."); + TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, + "vmxon_pa must be 0x%llx, but was 0x%llx", + -1ull, state->hdr.vmx.vmxon_pa); + TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, + "vmcs12_pa must be 0x%llx, but was 0x%llx", + -1llu, state->hdr.vmx.vmcs12_pa); + TEST_ASSERT(state->flags == 0, + "Flags must be equal to 0, but was 0x%hx", state->flags); free(state); } -- 2.51.0.869.ge66316f041-goog Add test cases for the validation checks in svm_set_nested_state(), and allow the test to run with SVM as well as VMX. The SVM test also makes sure that KVM_SET_NESTED_STATE accepts GIF being set or cleared if EFER.SVME is cleared, verifying a recently fixed bug where GIF was incorrectly expected to always be set when EFER.SVME is cleared. Signed-off-by: Yosry Ahmed --- tools/testing/selftests/kvm/Makefile.kvm | 2 +- ...d_state_test.c => set_nested_state_test.c} | 125 ++++++++++++++++-- 2 files changed, 116 insertions(+), 11 deletions(-) rename tools/testing/selftests/kvm/x86/{vmx_set_nested_state_test.c => set_nested_state_test.c} (70%) diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index b9279ce4eaab8..acfa22206e6f3 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -100,6 +100,7 @@ TEST_GEN_PROGS_x86 += x86/set_sregs_test TEST_GEN_PROGS_x86 += x86/smaller_maxphyaddr_emulation_test TEST_GEN_PROGS_x86 += x86/smm_test TEST_GEN_PROGS_x86 += x86/state_test +TEST_GEN_PROGS_x86 += x86/set_nested_state_test TEST_GEN_PROGS_x86 += x86/vmx_preemption_timer_test TEST_GEN_PROGS_x86 += x86/svm_vmcall_test TEST_GEN_PROGS_x86 += x86/svm_int_ctl_test @@ -116,7 +117,6 @@ TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state TEST_GEN_PROGS_x86 += x86/vmx_msrs_test TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state -TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test TEST_GEN_PROGS_x86 += x86/vmx_la57_nested_state_test TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test diff --git a/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86/set_nested_state_test.c similarity index 70% rename from tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c rename to tools/testing/selftests/kvm/x86/set_nested_state_test.c index c4c400d2824c1..fe12fffacd2ec 100644 --- a/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86/set_nested_state_test.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * vmx_set_nested_state_test + * set_nested_state_test * * Copyright (C) 2019, Google LLC. * @@ -11,6 +11,7 @@ #include "kvm_util.h" #include "processor.h" #include "vmx.h" +#include "svm_util.h" #include #include @@ -253,6 +254,107 @@ void test_vmx_nested_state(struct kvm_vcpu *vcpu) free(state); } +static void vcpu_efer_enable_svm(struct kvm_vcpu *vcpu) +{ + uint64_t old_efer = vcpu_get_msr(vcpu, MSR_EFER); + + vcpu_set_msr(vcpu, MSR_EFER, old_efer | EFER_SVME); +} + +static void vcpu_efer_disable_svm(struct kvm_vcpu *vcpu) +{ + uint64_t old_efer = vcpu_get_msr(vcpu, MSR_EFER); + + vcpu_set_msr(vcpu, MSR_EFER, old_efer & ~EFER_SVME); +} + +void set_default_svm_state(struct kvm_nested_state *state, int size) +{ + memset(state, 0, size); + state->format = 1; + state->size = size; + state->hdr.svm.vmcb_pa = 0x3000; +} + +void test_svm_nested_state(struct kvm_vcpu *vcpu) +{ + /* Add a page for VMCB. */ + const int state_sz = sizeof(struct kvm_nested_state) + getpagesize(); + struct kvm_nested_state *state = + (struct kvm_nested_state *)malloc(state_sz); + + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_SVM); + + /* The format must be set to 1. 0 for VMX, 1 for SVM. */ + set_default_svm_state(state, state_sz); + state->format = 0; + test_nested_state_expect_einval(vcpu, state); + + /* Invalid flags are rejected, KVM_STATE_NESTED_EVMCS is VMX-only */ + set_default_svm_state(state, state_sz); + state->flags = KVM_STATE_NESTED_EVMCS; + test_nested_state_expect_einval(vcpu, state); + + /* + * If EFER.SVME is clear, guest mode is disallowed and GIF can be set or + * cleared. + */ + vcpu_efer_disable_svm(vcpu); + + set_default_svm_state(state, state_sz); + state->flags = KVM_STATE_NESTED_GUEST_MODE; + test_nested_state_expect_einval(vcpu, state); + + state->flags = 0; + test_nested_state(vcpu, state); + + state->flags = KVM_STATE_NESTED_GIF_SET; + test_nested_state(vcpu, state); + + /* Enable SVM in the guest EFER. */ + vcpu_efer_enable_svm(vcpu); + + /* Setting vmcb_pa to a non-aligned address is only fine when not entering guest mode */ + set_default_svm_state(state, state_sz); + state->hdr.svm.vmcb_pa = -1ull; + state->flags = 0; + test_nested_state(vcpu, state); + state->flags = KVM_STATE_NESTED_GUEST_MODE; + test_nested_state_expect_einval(vcpu, state); + + /* + * Size must be large enough to fit kvm_nested_state and VMCB + * only when entering guest mode. + */ + set_default_svm_state(state, state_sz/2); + state->flags = 0; + test_nested_state(vcpu, state); + state->flags = KVM_STATE_NESTED_GUEST_MODE; + test_nested_state_expect_einval(vcpu, state); + + /* + * Test that if we leave nesting the state reflects that when we get it + * again, except for vmcb_pa, which is always returned as 0 when not in + * guest mode. + */ + set_default_svm_state(state, state_sz); + state->hdr.svm.vmcb_pa = -1ull; + state->flags = KVM_STATE_NESTED_GIF_SET; + test_nested_state(vcpu, state); + vcpu_nested_state_get(vcpu, state); + TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz, + "Size must be between %ld and %d. The size returned was %d.", + sizeof(*state), state_sz, state->size); + TEST_ASSERT(state->hdr.svm.vmcb_pa == 0, + "vmcb_pa must be 0, but was %llx", + state->hdr.svm.vmcb_pa); + TEST_ASSERT(state->flags == KVM_STATE_NESTED_GIF_SET, + "Flags must be equal to 0x%hx, but was 0x%hx", + KVM_STATE_NESTED_GIF_SET, state->flags); + + free(state); +} + int main(int argc, char *argv[]) { struct kvm_vm *vm; @@ -261,20 +363,20 @@ int main(int argc, char *argv[]) have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); - /* - * AMD currently does not implement set_nested_state, so for now we - * just early out. - */ - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - vm = vm_create_with_one_vcpu(&vcpu, NULL); /* - * First run tests with VMX disabled to check error handling. + * First run tests with VMX/SVM disabled to check error handling. + * test_{vmx/svm}_nested_state() will re-enable as needed. */ - vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX); + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX); + else + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_SVM); /* Passing a NULL kvm_nested_state causes a EFAULT. */ test_nested_state_expect_efault(vcpu, NULL); @@ -303,7 +405,10 @@ int main(int argc, char *argv[]) state.flags = KVM_STATE_NESTED_RUN_PENDING; test_nested_state_expect_einval(vcpu, &state); - test_vmx_nested_state(vcpu); + if (kvm_cpu_has(X86_FEATURE_VMX)) + test_vmx_nested_state(vcpu); + else + test_svm_nested_state(vcpu); kvm_vm_free(vm); return 0; -- 2.51.0.869.ge66316f041-goog Add SVM L1 code to run the nested guest, and allow the test to run with SVM as well as VMX. Reviewed-by: Jim Mattson Signed-off-by: Yosry Ahmed --- tools/testing/selftests/kvm/Makefile.kvm | 2 +- ...ested_test.c => close_while_nested_test.c} | 42 +++++++++++++++---- 2 files changed, 35 insertions(+), 9 deletions(-) rename tools/testing/selftests/kvm/x86/{vmx_close_while_nested_test.c => close_while_nested_test.c} (64%) diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index acfa22206e6f3..e70a844a52bdc 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -112,7 +112,7 @@ TEST_GEN_PROGS_x86 += x86/ucna_injection_test TEST_GEN_PROGS_x86 += x86/userspace_io_test TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test -TEST_GEN_PROGS_x86 += x86/vmx_close_while_nested_test +TEST_GEN_PROGS_x86 += x86/close_while_nested_test TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state TEST_GEN_PROGS_x86 += x86/vmx_msrs_test diff --git a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86/close_while_nested_test.c similarity index 64% rename from tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c rename to tools/testing/selftests/kvm/x86/close_while_nested_test.c index dad988351493e..cf5f24c83c448 100644 --- a/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86/close_while_nested_test.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * vmx_close_while_nested + * close_while_nested_test * * Copyright (C) 2019, Red Hat, Inc. * @@ -12,6 +12,7 @@ #include "kvm_util.h" #include "processor.h" #include "vmx.h" +#include "svm_util.h" #include #include @@ -22,6 +23,8 @@ enum { PORT_L0_EXIT = 0x2000, }; +#define L2_GUEST_STACK_SIZE 64 + static void l2_guest_code(void) { /* Exit to L0 */ @@ -29,9 +32,8 @@ static void l2_guest_code(void) : : [port] "d" (PORT_L0_EXIT) : "rax"); } -static void l1_guest_code(struct vmx_pages *vmx_pages) +static void l1_vmx_code(struct vmx_pages *vmx_pages) { -#define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); @@ -45,19 +47,43 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_ASSERT(0); } +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + /* Prepare the VMCB for L2 execution. */ + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(0); +} + +static void l1_guest_code(void *data) +{ + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data); + else + l1_svm_code(data); +} + int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva; + vm_vaddr_t guest_gva; struct kvm_vcpu *vcpu; struct kvm_vm *vm; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - /* Allocate VMX pages and shared descriptors (vmx_pages). */ - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &guest_gva); + else + vcpu_alloc_svm(vm, &guest_gva); + + vcpu_args_set(vcpu, 1, guest_gva); for (;;) { volatile struct kvm_run *run = vcpu->run; -- 2.51.0.869.ge66316f041-goog Add SVM L1 code to run the nested guest, and allow the test to run with SVM as well as VMX. Signed-off-by: Yosry Ahmed --- tools/testing/selftests/kvm/Makefile.kvm | 2 +- ...aling_test.c => nested_tsc_scaling_test.c} | 48 +++++++++++++++++-- 2 files changed, 44 insertions(+), 6 deletions(-) rename tools/testing/selftests/kvm/x86/{vmx_nested_tsc_scaling_test.c => nested_tsc_scaling_test.c} (83%) diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index e70a844a52bdc..bb2ff7927ef57 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -119,7 +119,7 @@ TEST_GEN_PROGS_x86 += x86/vmx_msrs_test TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state TEST_GEN_PROGS_x86 += x86/vmx_la57_nested_state_test TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test -TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test +TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test TEST_GEN_PROGS_x86 += x86/xapic_ipi_test TEST_GEN_PROGS_x86 += x86/xapic_state_test diff --git a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c similarity index 83% rename from tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c rename to tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c index 1759fa5cb3f29..4260c9e4f4891 100644 --- a/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c +++ b/tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c @@ -13,6 +13,7 @@ #include "kvm_util.h" #include "vmx.h" +#include "svm_util.h" #include "kselftest.h" /* L2 is scaled up (from L1's perspective) by this factor */ @@ -79,7 +80,30 @@ static void l2_guest_code(void) __asm__ __volatile__("vmcall"); } -static void l1_guest_code(struct vmx_pages *vmx_pages) +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + /* check that L1's frequency looks alright before launching L2 */ + check_tsc_freq(UCHECK_L1); + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* enable TSC scaling for L2 */ + wrmsr(MSR_AMD64_TSC_RATIO, L2_SCALE_FACTOR << 32); + + /* launch L2 */ + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL); + + /* check that L1's frequency still looks good */ + check_tsc_freq(UCHECK_L1); + + GUEST_DONE(); +} + +static void l1_vmx_code(struct vmx_pages *vmx_pages) { unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; uint32_t control; @@ -116,11 +140,19 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_DONE(); } +static void l1_guest_code(void *data) +{ + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data); + else + l1_svm_code(data); +} + int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; - vm_vaddr_t vmx_pages_gva; + vm_vaddr_t guest_gva = 0; uint64_t tsc_start, tsc_end; uint64_t tsc_khz; @@ -129,7 +161,8 @@ int main(int argc, char *argv[]) uint64_t l1_tsc_freq = 0; uint64_t l2_tsc_freq = 0; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL)); TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); @@ -152,8 +185,13 @@ int main(int argc, char *argv[]) printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq); vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); + + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &guest_gva); + else + vcpu_alloc_svm(vm, &guest_gva); + + vcpu_args_set(vcpu, 1, guest_gva); tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL); TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed"); -- 2.51.0.869.ge66316f041-goog vmx_tsc_adjust_test currently verifies that a nested VMLAUNCH fails with an invalid CR3. This is irrelevant to TSC scaling, move it to a standalone test. Signed-off-by: Yosry Ahmed --- tools/testing/selftests/kvm/Makefile.kvm | 1 + .../kvm/x86/nested_invalid_cr3_test.c | 81 +++++++++++++++++++ .../selftests/kvm/x86/vmx_tsc_adjust_test.c | 10 --- 3 files changed, 82 insertions(+), 10 deletions(-) create mode 100644 tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index bb2ff7927ef57..b78700c574fc7 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -88,6 +88,7 @@ TEST_GEN_PROGS_x86 += x86/kvm_pv_test TEST_GEN_PROGS_x86 += x86/kvm_buslock_test TEST_GEN_PROGS_x86 += x86/monitor_mwait_test TEST_GEN_PROGS_x86 += x86/msrs_test +TEST_GEN_PROGS_x86 += x86/nested_invalid_cr3_test TEST_GEN_PROGS_x86 += x86/nested_emulation_test TEST_GEN_PROGS_x86 += x86/nested_exceptions_test TEST_GEN_PROGS_x86 += x86/platform_info_test diff --git a/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c new file mode 100644 index 0000000000000..b9853ab532cfe --- /dev/null +++ b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * nested_invalid_cr3_test + * + * Copyright (C) 2025, Google LLC. + * + * This test verifies that L1 fails to enter L2 with an invalid CR3, and + * succeeds otherwise. + */ +#include "kvm_util.h" +#include "vmx.h" +#include "kselftest.h" + + +#define L2_GUEST_STACK_SIZE 64 + +static void l2_guest_code(void) +{ + vmcall(); +} + +static void l1_vmx_code(struct vmx_pages *vmx_pages) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uintptr_t save_cr3; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* Try to run L2 with invalid CR3 and make sure it fails */ + save_cr3 = vmreadz(GUEST_CR3); + vmwrite(GUEST_CR3, -1ull); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == + (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); + + /* Now restore CR3 and make sure L2 runs successfully */ + vmwrite(GUEST_CR3, save_cr3); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + vm_vaddr_t guest_gva = 0; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_vmx_code); + vcpu_alloc_vmx(vm, &guest_gva); + vcpu_args_set(vcpu, 1, guest_gva); + + for (;;) { + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c index 2ceb5c78c4427..2dcc0306a0d9b 100644 --- a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c @@ -77,7 +77,6 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; uint32_t control; - uintptr_t save_cr3; GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE); wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE); @@ -94,15 +93,6 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); - /* Jump into L2. First, test failure to load guest CR3. */ - save_cr3 = vmreadz(GUEST_CR3); - vmwrite(GUEST_CR3, -1ull); - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == - (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); - check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); - vmwrite(GUEST_CR3, save_cr3); - GUEST_ASSERT(!vmlaunch()); GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); -- 2.51.0.869.ge66316f041-goog Add SVM L1 code to run the nested guest, and allow the test to run with SVM as well as VMX. Signed-off-by: Yosry Ahmed --- .../kvm/x86/nested_invalid_cr3_test.c | 43 +++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c index b9853ab532cfe..2a472440cb962 100644 --- a/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c +++ b/tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c @@ -9,6 +9,7 @@ */ #include "kvm_util.h" #include "vmx.h" +#include "svm_util.h" #include "kselftest.h" @@ -19,6 +20,28 @@ static void l2_guest_code(void) vmcall(); } +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uintptr_t save_cr3; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* Try to run L2 with invalid CR3 and make sure it fails */ + save_cr3 = svm->vmcb->save.cr3; + svm->vmcb->save.cr3 = -1ull; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_ERR); + + /* Now restore CR3 and make sure L2 runs successfully */ + svm->vmcb->save.cr3 = save_cr3; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL); + + GUEST_DONE(); +} + static void l1_vmx_code(struct vmx_pages *vmx_pages) { unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; @@ -45,16 +68,30 @@ static void l1_vmx_code(struct vmx_pages *vmx_pages) GUEST_DONE(); } +static void l1_guest_code(void *data) +{ + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data); + else + l1_svm_code(data); +} + int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; vm_vaddr_t guest_gva = 0; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &guest_gva); + else + vcpu_alloc_svm(vm, &guest_gva); - vm = vm_create_with_one_vcpu(&vcpu, l1_vmx_code); - vcpu_alloc_vmx(vm, &guest_gva); vcpu_args_set(vcpu, 1, guest_gva); for (;;) { -- 2.51.0.869.ge66316f041-goog Add SVM L1 code to run the nested guest, and allow the test to run with SVM as well as VMX. Reviewed-by: Jim Mattson Signed-off-by: Yosry Ahmed --- tools/testing/selftests/kvm/Makefile.kvm | 2 +- ...adjust_test.c => nested_tsc_adjust_test.c} | 69 ++++++++++++------- 2 files changed, 46 insertions(+), 25 deletions(-) rename tools/testing/selftests/kvm/x86/{vmx_tsc_adjust_test.c => nested_tsc_adjust_test.c} (61%) diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index b78700c574fc7..6625ac53545e8 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -119,7 +119,7 @@ TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state TEST_GEN_PROGS_x86 += x86/vmx_msrs_test TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state TEST_GEN_PROGS_x86 += x86/vmx_la57_nested_state_test -TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test +TEST_GEN_PROGS_x86 += x86/nested_tsc_adjust_test TEST_GEN_PROGS_x86 += x86/nested_tsc_scaling_test TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test TEST_GEN_PROGS_x86 += x86/xapic_ipi_test diff --git a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c similarity index 61% rename from tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c rename to tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c index 2dcc0306a0d9b..cc825a0b41dbf 100644 --- a/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * vmx_tsc_adjust_test + * nested_tsc_adjust_test * * Copyright (C) 2018, Google LLC. * @@ -22,6 +22,7 @@ #include "kvm_util.h" #include "processor.h" #include "vmx.h" +#include "svm_util.h" #include #include @@ -35,6 +36,8 @@ #define TSC_ADJUST_VALUE (1ll << 32) #define TSC_OFFSET_VALUE -(1ll << 48) +#define L2_GUEST_STACK_SIZE 64 + enum { PORT_ABORT = 0x1000, PORT_REPORT, @@ -72,32 +75,47 @@ static void l2_guest_code(void) __asm__ __volatile__("vmcall"); } -static void l1_guest_code(struct vmx_pages *vmx_pages) +static void l1_guest_code(void *data) { -#define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; - uint32_t control; + /* Set TSC from L1 and make sure TSC_ADJUST is updated correctly */ GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE); wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE); check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); - GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); - GUEST_ASSERT(load_vmcs(vmx_pages)); - - /* Prepare the VMCS for L2 execution. */ - prepare_vmcs(vmx_pages, l2_guest_code, - &l2_guest_stack[L2_GUEST_STACK_SIZE]); - control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); - control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING; - vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); - vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); - - GUEST_ASSERT(!vmlaunch()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + /* + * Run L2 with TSC_OFFSET. L2 will write to TSC, and L1 is not + * intercepting the write so it should update L1's TSC_ADJUST. + */ + if (this_cpu_has(X86_FEATURE_VMX)) { + struct vmx_pages *vmx_pages = data; + uint32_t control; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + } else { + struct svm_test_data *svm = data; + + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + svm->vmcb->control.tsc_offset = TSC_OFFSET_VALUE; + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL); + } check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); - GUEST_DONE(); } @@ -109,16 +127,19 @@ static void report(int64_t val) int main(int argc, char *argv[]) { - vm_vaddr_t vmx_pages_gva; + vm_vaddr_t nested_gva; struct kvm_vcpu *vcpu; - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || + kvm_cpu_has(X86_FEATURE_SVM)); - vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code); + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + if (kvm_cpu_has(X86_FEATURE_VMX)) + vcpu_alloc_vmx(vm, &nested_gva); + else + vcpu_alloc_svm(vm, &nested_gva); - /* Allocate VMX pages and shared descriptors (vmx_pages). */ - vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); + vcpu_args_set(vcpu, 1, nested_gva); for (;;) { struct ucall uc; -- 2.51.0.869.ge66316f041-goog Use PAGE_SIZE instead of 4096. Signed-off-by: Yosry Ahmed --- .../selftests/kvm/x86/hyperv_features.c | 2 +- tools/testing/selftests/kvm/x86/hyperv_ipi.c | 18 +++++++++--------- .../testing/selftests/kvm/x86/sev_smoke_test.c | 2 +- tools/testing/selftests/kvm/x86/state_test.c | 2 +- .../selftests/kvm/x86/userspace_io_test.c | 2 +- .../selftests/kvm/x86/vmx_dirty_log_test.c | 10 +++++----- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/kvm/x86/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c index 99d327084172f..130b9ce7e5ddd 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86/hyperv_features.c @@ -94,7 +94,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) { input = pgs_gpa; - output = pgs_gpa + 4096; + output = pgs_gpa + PAGE_SIZE; } else { input = output = 0; } diff --git a/tools/testing/selftests/kvm/x86/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c index 2b5b4bc6ef7ec..ca61836c4e325 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c @@ -102,7 +102,7 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) /* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */ ipi->vector = IPI_VECTOR; ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1; - hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096); + hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); @@ -116,13 +116,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; ipi_ex->vp_set.valid_bank_mask = 1 << 0; ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), - pgs_gpa, pgs_gpa + 4096); + pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]); @@ -138,13 +138,13 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; ipi_ex->vp_set.valid_bank_mask = 1 << 1; ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64); hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET), - pgs_gpa, pgs_gpa + 4096); + pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); @@ -160,14 +160,14 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K; ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1; ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1); ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64); hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET), - pgs_gpa, pgs_gpa + 4096); + pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); @@ -183,10 +183,10 @@ static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa) GUEST_SYNC(stage++); /* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */ - memset(hcall_page, 0, 4096); + memset(hcall_page, 0, PAGE_SIZE); ipi_ex->vector = IPI_VECTOR; ipi_ex->vp_set.format = HV_GENERIC_SET_ALL; - hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096); + hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + PAGE_SIZE); nop_loop(); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]); GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]); diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c index 77256c89bb8de..86ad1c7d068f2 100644 --- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c @@ -104,7 +104,7 @@ static void test_sync_vmsa(uint32_t type, uint64_t policy) vm_sev_launch(vm, policy, NULL); /* This page is shared, so make it decrypted. */ - memset(hva, 0, 4096); + memset(hva, 0, PAGE_SIZE); vcpu_run(vcpu); diff --git a/tools/testing/selftests/kvm/x86/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c index 141b7fc0c965b..f2c7a1c297e37 100644 --- a/tools/testing/selftests/kvm/x86/state_test.c +++ b/tools/testing/selftests/kvm/x86/state_test.c @@ -141,7 +141,7 @@ static void __attribute__((__flatten__)) guest_code(void *arg) if (this_cpu_has(X86_FEATURE_XSAVE)) { uint64_t supported_xcr0 = this_cpu_supported_xcr0(); - uint8_t buffer[4096]; + uint8_t buffer[PAGE_SIZE]; memset(buffer, 0xcc, sizeof(buffer)); diff --git a/tools/testing/selftests/kvm/x86/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c index 9481cbcf284f6..be7d72f3c029f 100644 --- a/tools/testing/selftests/kvm/x86/userspace_io_test.c +++ b/tools/testing/selftests/kvm/x86/userspace_io_test.c @@ -85,7 +85,7 @@ int main(int argc, char *argv[]) regs.rcx = 1; if (regs.rcx == 3) regs.rcx = 8192; - memset((void *)run + run->io.data_offset, 0xaa, 4096); + memset((void *)run + run->io.data_offset, 0xaa, PAGE_SIZE); vcpu_regs_set(vcpu, ®s); } diff --git a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c index fa512d033205f..34a57fe747f64 100644 --- a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c @@ -122,15 +122,15 @@ static void test_vmx_dirty_log(bool enable_ept) if (enable_ept) { prepare_eptp(vmx, vm, 0); nested_map_memslot(vmx, vm, 0); - nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096); - nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096); + nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE); + nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE); } bmap = bitmap_zalloc(TEST_MEM_PAGES); host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM); while (!done) { - memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096); + memset(host_test_mem, 0xaa, TEST_MEM_PAGES * PAGE_SIZE); vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); @@ -153,9 +153,9 @@ static void test_vmx_dirty_log(bool enable_ept) } TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty"); - TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest"); + TEST_ASSERT(host_test_mem[PAGE_SIZE / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest"); TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty"); - TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest"); + TEST_ASSERT(host_test_mem[PAGE_SIZE*2 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest"); break; case UCALL_DONE: done = true; -- 2.51.0.869.ge66316f041-goog eptp_memslot is unused, remove it. No functional change intended. Signed-off-by: Yosry Ahmed --- tools/testing/selftests/kvm/include/x86/vmx.h | 3 +-- tools/testing/selftests/kvm/lib/x86/memstress.c | 2 +- tools/testing/selftests/kvm/lib/x86/vmx.c | 3 +-- tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h index edb3c391b9824..96e2b4c630a9b 100644 --- a/tools/testing/selftests/kvm/include/x86/vmx.h +++ b/tools/testing/selftests/kvm/include/x86/vmx.h @@ -568,8 +568,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, uint64_t addr, uint64_t size); bool kvm_cpu_has_ept(void); -void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t eptp_memslot); +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm); void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm); #endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/x86/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c index 7f5d62a65c68a..0b1f288ad5564 100644 --- a/tools/testing/selftests/kvm/lib/x86/memstress.c +++ b/tools/testing/selftests/kvm/lib/x86/memstress.c @@ -63,7 +63,7 @@ void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) { uint64_t start, end; - prepare_eptp(vmx, vm, 0); + prepare_eptp(vmx, vm); /* * Identity map the first 4G and the test region with 1G pages so that diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index 1b6d4a0077980..f0023a3b0137e 100644 --- a/tools/testing/selftests/kvm/lib/x86/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -535,8 +535,7 @@ bool kvm_cpu_has_ept(void) return ctrl & SECONDARY_EXEC_ENABLE_EPT; } -void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, - uint32_t eptp_memslot) +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm) { TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT"); diff --git a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c index 34a57fe747f64..98cb6bdab3e6d 100644 --- a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c @@ -120,7 +120,7 @@ static void test_vmx_dirty_log(bool enable_ept) * GPAs as the EPT enabled case. */ if (enable_ept) { - prepare_eptp(vmx, vm, 0); + prepare_eptp(vmx, vm); nested_map_memslot(vmx, vm, 0); nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE); nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE); -- 2.51.0.869.ge66316f041-goog Replace __virt_pg_map() calls in tests by high-level equivalent functions, removing some loops in the process. No functional change intended. Signed-off-by: Yosry Ahmed --- tools/testing/selftests/kvm/mmu_stress_test.c | 6 ++---- tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c index 6a437d2be9fa4..21c0f8e6552cd 100644 --- a/tools/testing/selftests/kvm/mmu_stress_test.c +++ b/tools/testing/selftests/kvm/mmu_stress_test.c @@ -361,11 +361,9 @@ int main(int argc, char *argv[]) #ifdef __x86_64__ /* Identity map memory in the guest using 1gb pages. */ - for (i = 0; i < slot_size; i += SZ_1G) - __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G); + virt_map_level(vm, gpa, gpa, slot_size, PG_LEVEL_1G); #else - for (i = 0; i < slot_size; i += vm->page_size) - virt_pg_map(vm, gpa + i, gpa + i); + virt_map(vm, gpa, gpa, slot_size >> vm->page_shift); #endif } diff --git a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c index 077cd0ec3040e..a3b7ce1559812 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c +++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c @@ -621,7 +621,7 @@ int main(int argc, char *argv[]) for (i = 0; i < NTEST_PAGES; i++) { pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE); gpa = addr_hva2gpa(vm, pte); - __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K); + virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK); data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK); } -- 2.51.0.869.ge66316f041-goog Call paths leading to __virt_pg_map() are currently: (a) virt_pg_map() -> virt_arch_pg_map() -> __virt_pg_map() (b) virt_map_level() -> __virt_pg_map() For (a), calls to virt_pg_map() from kvm_util.c make sure they update vm->vpages_mapped, but other callers do not. Move the sparsebit_set() call into virt_pg_map() to make sure all callers are captured. For (b), call sparsebit_set_num() from virt_map_level(). It's tempting to have a single the call inside __virt_pg_map(), however: - The call path in (a) is not x86-specific, while (b) is. Moving the call into __virt_pg_map() would require doing something similar for other archs implementing virt_pg_map(). - Incoming changes will reusue __virt_pg_map() for nested PTEs, which should not update vm->vpages_mapped. This means we'd need to break out a triple underscore version that does not update vm->vpages_mapped. Signed-off-by: Yosry Ahmed --- tools/testing/selftests/kvm/include/kvm_util.h | 1 + tools/testing/selftests/kvm/lib/kvm_util.c | 3 --- tools/testing/selftests/kvm/lib/x86/processor.c | 2 ++ 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index bcbcfb4deb9a2..15c69af11a684 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -1206,6 +1206,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr); static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) { virt_arch_pg_map(vm, vaddr, paddr); + sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); } diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 5e6b51768f67f..473c533774898 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1490,8 +1490,6 @@ static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, pages--, vaddr += vm->page_size, paddr += vm->page_size) { virt_pg_map(vm, vaddr, paddr); - - sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); } return vaddr_start; @@ -1605,7 +1603,6 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, while (npages--) { virt_pg_map(vm, vaddr, paddr); - sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift); vaddr += page_size; paddr += page_size; diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index c484e83255c70..4ee9fc844ee66 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -285,6 +285,8 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, for (i = 0; i < nr_pages; i++) { __virt_pg_map(vm, vaddr, paddr, level); + sparsebit_set_num(vm->vpages_mapped, vaddr >> vm->page_shift, + nr_bytes / PAGE_SIZE); vaddr += pg_size; paddr += pg_size; -- 2.51.0.869.ge66316f041-goog Move the PTE bitmasks into a struct and define one for x86 page tables. Move all internal functions in lib/x86/processor.c to take in and use the structs but do not expose it to external callers. Drop the 'global' bit definition as it's currently unused, but leave the 'user' bit as it will be used in coming changes. Following changes will add support to use the virt mapping functions for EPTs and NPTs, so they will define their own bitmask structs. Leave PHYSICAL_PAGE_MASK alone, it's fixed in all page table formats and a lot of other macros depend on it. It's tempting to move all the other macros to be per-struct instead, but it would be too much noise for little benefit. While at it, make __vm_get_page_table_entry() static as it's not used in any other files. No functional change intended. Signed-off-by: Yosry Ahmed --- .../selftests/kvm/include/x86/processor.h | 27 +++--- .../testing/selftests/kvm/lib/x86/processor.c | 92 +++++++++++-------- 2 files changed, 69 insertions(+), 50 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 51cd84b9ca664..8debe0df3ffca 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -362,16 +362,6 @@ static inline unsigned int x86_model(unsigned int eax) return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f); } -/* Page table bitfield declarations */ -#define PTE_PRESENT_MASK BIT_ULL(0) -#define PTE_WRITABLE_MASK BIT_ULL(1) -#define PTE_USER_MASK BIT_ULL(2) -#define PTE_ACCESSED_MASK BIT_ULL(5) -#define PTE_DIRTY_MASK BIT_ULL(6) -#define PTE_LARGE_MASK BIT_ULL(7) -#define PTE_GLOBAL_MASK BIT_ULL(8) -#define PTE_NX_MASK BIT_ULL(63) - #define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12) #define PAGE_SHIFT 12 @@ -1367,8 +1357,6 @@ static inline bool kvm_is_ignore_msrs(void) return get_kvm_param_bool("ignore_msrs"); } -uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, - int *level); uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr); uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, @@ -1451,7 +1439,20 @@ enum pg_level { #define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M) #define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G) -void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level); +struct pte_masks { + uint64_t present; + uint64_t writeable; + uint64_t user; + uint64_t accessed; + uint64_t dirty; + uint64_t large; + uint64_t nx; +}; + +extern const struct pte_masks x86_pte_masks; + +void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + int level, const struct pte_masks *masks); void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint64_t nr_bytes, int level); diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 4ee9fc844ee66..8a838f208abe4 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -169,14 +169,25 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) } } +const struct pte_masks x86_pte_masks = { + .present = BIT_ULL(0), + .writeable = BIT_ULL(1), + .user = BIT_ULL(2), + .accessed = BIT_ULL(5), + .dirty = BIT_ULL(6), + .large = BIT_ULL(7), + .nx = BIT_ULL(63), +}; + static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte, - uint64_t vaddr, int level) + uint64_t vaddr, int level, + const struct pte_masks *masks) { uint64_t pt_gpa = PTE_GET_PA(*parent_pte); uint64_t *page_table = addr_gpa2hva(vm, pt_gpa); int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; - TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd, + TEST_ASSERT((*parent_pte & masks->present) || parent_pte == &vm->pgd, "Parent PTE (level %d) not PRESENT for gva: 0x%08lx", level + 1, vaddr); @@ -188,16 +199,17 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int current_level, - int target_level) + int target_level, + const struct pte_masks *masks) { - uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level); + uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level, masks); paddr = vm_untag_gpa(vm, paddr); - if (!(*pte & PTE_PRESENT_MASK)) { - *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK; + if (!(*pte & masks->present)) { + *pte = masks->present | masks->writeable; if (current_level == target_level) - *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK); + *pte |= masks->large | (paddr & PHYSICAL_PAGE_MASK); else *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK; } else { @@ -209,14 +221,15 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, TEST_ASSERT(current_level != target_level, "Cannot create hugepage at level: %u, vaddr: 0x%lx", current_level, vaddr); - TEST_ASSERT(!(*pte & PTE_LARGE_MASK), + TEST_ASSERT(!(*pte & masks->large), "Cannot create page table at level: %u, vaddr: 0x%lx", current_level, vaddr); } return pte; } -void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) +void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + int level, const struct pte_masks *masks) { const uint64_t pg_size = PG_LEVEL_SIZE(level); uint64_t *pte = &vm->pgd; @@ -246,16 +259,16 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) * early if a hugepage was created. */ for (current_level = vm->pgtable_levels; current_level > PG_LEVEL_4K; current_level--) { - pte = virt_create_upper_pte(vm, pte, vaddr, paddr, current_level, level); - if (*pte & PTE_LARGE_MASK) + pte = virt_create_upper_pte(vm, pte, vaddr, paddr, current_level, level, masks); + if (*pte & masks->large) return; } /* Fill in page table entry. */ - pte = virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K); - TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), + pte = virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K, masks); + TEST_ASSERT(!(*pte & masks->present), "PTE already present for 4k page at vaddr: 0x%lx", vaddr); - *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); + *pte = masks->present | masks->writeable | (paddr & PHYSICAL_PAGE_MASK); /* * Neither SEV nor TDX supports shared page tables, so only the final @@ -269,7 +282,7 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) { - __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K); + __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K, &x86_pte_masks); } void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, @@ -284,7 +297,7 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, nr_bytes, pg_size); for (i = 0; i < nr_pages; i++) { - __virt_pg_map(vm, vaddr, paddr, level); + __virt_pg_map(vm, vaddr, paddr, level, &x86_pte_masks); sparsebit_set_num(vm->vpages_mapped, vaddr >> vm->page_shift, nr_bytes / PAGE_SIZE); @@ -293,9 +306,10 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, } } -static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) +static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level, + const struct pte_masks *masks) { - if (*pte & PTE_LARGE_MASK) { + if (*pte & masks->large) { TEST_ASSERT(*level == PG_LEVEL_NONE || *level == current_level, "Unexpected hugepage at level %d", current_level); @@ -305,8 +319,10 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) return *level == current_level; } -uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, - int *level) +static uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, + uint64_t vaddr, + int *level, + const struct pte_masks *masks) { uint64_t *pte = &vm->pgd; int current_level; @@ -332,8 +348,8 @@ uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, "Canonical check failed. The virtual address is invalid."); for (current_level = vm->pgtable_levels; current_level >= PG_LEVEL_4K; current_level--) { - pte = virt_get_pte(vm, pte, vaddr, current_level); - if (vm_is_target_pte(pte, level, current_level)) + pte = virt_get_pte(vm, pte, vaddr, current_level, masks); + if (vm_is_target_pte(pte, level, current_level, masks)) return pte; } @@ -344,11 +360,12 @@ uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr) { int level = PG_LEVEL_4K; - return __vm_get_page_table_entry(vm, vaddr, &level); + return __vm_get_page_table_entry(vm, vaddr, &level, &x86_pte_masks); } void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { + const struct pte_masks *masks = &x86_pte_masks; uint64_t *pml4e, *pml4e_start; uint64_t *pdpe, *pdpe_start; uint64_t *pde, *pde_start; @@ -365,44 +382,44 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd); for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { pml4e = &pml4e_start[n1]; - if (!(*pml4e & PTE_PRESENT_MASK)) + if (!(*pml4e & masks->present)) continue; fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u " " %u\n", indent, "", pml4e - pml4e_start, pml4e, addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e), - !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK)); + !!(*pml4e & masks->writeable), !!(*pml4e & masks->nx)); pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK); for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { pdpe = &pdpe_start[n2]; - if (!(*pdpe & PTE_PRESENT_MASK)) + if (!(*pdpe & masks->present)) continue; fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx " "%u %u\n", indent, "", pdpe - pdpe_start, pdpe, addr_hva2gpa(vm, pdpe), - PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK), - !!(*pdpe & PTE_NX_MASK)); + PTE_GET_PFN(*pdpe), !!(*pdpe & masks->writeable), + !!(*pdpe & masks->nx)); pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK); for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { pde = &pde_start[n3]; - if (!(*pde & PTE_PRESENT_MASK)) + if (!(*pde & masks->present)) continue; fprintf(stream, "%*spde 0x%-3zx %p " "0x%-12lx 0x%-10llx %u %u\n", indent, "", pde - pde_start, pde, addr_hva2gpa(vm, pde), - PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK), - !!(*pde & PTE_NX_MASK)); + PTE_GET_PFN(*pde), !!(*pde & masks->writeable), + !!(*pde & masks->nx)); pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK); for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { pte = &pte_start[n4]; - if (!(*pte & PTE_PRESENT_MASK)) + if (!(*pte & masks->present)) continue; fprintf(stream, "%*spte 0x%-3zx %p " "0x%-12lx 0x%-10llx %u %u " @@ -411,9 +428,9 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) pte - pte_start, pte, addr_hva2gpa(vm, pte), PTE_GET_PFN(*pte), - !!(*pte & PTE_WRITABLE_MASK), - !!(*pte & PTE_NX_MASK), - !!(*pte & PTE_DIRTY_MASK), + !!(*pte & masks->writeable), + !!(*pte & masks->nx), + !!(*pte & masks->dirty), ((uint64_t) n1 << 27) | ((uint64_t) n2 << 18) | ((uint64_t) n3 << 9) @@ -493,10 +510,11 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp) vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { + const struct pte_masks *masks = &x86_pte_masks; int level = PG_LEVEL_NONE; - uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level); + uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level, masks); - TEST_ASSERT(*pte & PTE_PRESENT_MASK, + TEST_ASSERT(*pte & masks->present, "Leaf PTE not PRESENT for gva: 0x%08lx", gva); /* -- 2.51.0.869.ge66316f041-goog Instead of hardcoding the root check against vm->pgd, pass the root_gpa into virt_get_pte(). There's a subtle change here, instead of checking that the parent pointer has the address of vm->pgd, check if the value pointed at by the parent pointer is the root_gpa. No change in behavior expected, but this will be required for following changes that generalize __virt_pg_map() to other MMUs. Signed-off-by: Yosry Ahmed --- tools/testing/selftests/kvm/lib/x86/processor.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 8a838f208abe4..92a2b5aefd880 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -179,15 +179,15 @@ const struct pte_masks x86_pte_masks = { .nx = BIT_ULL(63), }; -static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte, - uint64_t vaddr, int level, +static void *virt_get_pte(struct kvm_vm *vm, vm_paddr_t root_gpa, + uint64_t *parent_pte, uint64_t vaddr, int level, const struct pte_masks *masks) { uint64_t pt_gpa = PTE_GET_PA(*parent_pte); uint64_t *page_table = addr_gpa2hva(vm, pt_gpa); int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; - TEST_ASSERT((*parent_pte & masks->present) || parent_pte == &vm->pgd, + TEST_ASSERT((*parent_pte == root_gpa) || (*parent_pte & masks->present), "Parent PTE (level %d) not PRESENT for gva: 0x%08lx", level + 1, vaddr); @@ -195,6 +195,7 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte, } static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, + vm_paddr_t root_gpa, uint64_t *parent_pte, uint64_t vaddr, uint64_t paddr, @@ -202,7 +203,8 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, int target_level, const struct pte_masks *masks) { - uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level, masks); + uint64_t *pte = virt_get_pte(vm, root_gpa, parent_pte, + vaddr, current_level, masks); paddr = vm_untag_gpa(vm, paddr); @@ -259,13 +261,14 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, * early if a hugepage was created. */ for (current_level = vm->pgtable_levels; current_level > PG_LEVEL_4K; current_level--) { - pte = virt_create_upper_pte(vm, pte, vaddr, paddr, current_level, level, masks); + pte = virt_create_upper_pte(vm, vm->pgd, pte, vaddr, paddr, + current_level, level, masks); if (*pte & masks->large) return; } /* Fill in page table entry. */ - pte = virt_get_pte(vm, pte, vaddr, PG_LEVEL_4K, masks); + pte = virt_get_pte(vm, vm->pgd, pte, vaddr, PG_LEVEL_4K, masks); TEST_ASSERT(!(*pte & masks->present), "PTE already present for 4k page at vaddr: 0x%lx", vaddr); *pte = masks->present | masks->writeable | (paddr & PHYSICAL_PAGE_MASK); @@ -348,7 +351,7 @@ static uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, "Canonical check failed. The virtual address is invalid."); for (current_level = vm->pgtable_levels; current_level >= PG_LEVEL_4K; current_level--) { - pte = virt_get_pte(vm, pte, vaddr, current_level, masks); + pte = virt_get_pte(vm, vm->pgd, pte, vaddr, current_level, masks); if (vm_is_target_pte(pte, level, current_level, masks)) return pte; } -- 2.51.0.869.ge66316f041-goog Pass the root_gpa into __virt_pg_map() instead of assuming vm->pgd in preparation for using it with other MMUs. No functional change intended. Signed-off-by: Yosry Ahmed --- .../testing/selftests/kvm/include/x86/processor.h | 4 ++-- tools/testing/selftests/kvm/lib/x86/processor.c | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 8debe0df3ffca..2f0d83b6e5952 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1451,8 +1451,8 @@ struct pte_masks { extern const struct pte_masks x86_pte_masks; -void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - int level, const struct pte_masks *masks); +void __virt_pg_map(struct kvm_vm *vm, vm_paddr_t root_gpa, uint64_t vaddr, + uint64_t paddr, int level, const struct pte_masks *masks); void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint64_t nr_bytes, int level); diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 92a2b5aefd880..13e9376d5f545 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -230,11 +230,11 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, return pte; } -void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - int level, const struct pte_masks *masks) +void __virt_pg_map(struct kvm_vm *vm, vm_paddr_t root_gpa, uint64_t vaddr, + uint64_t paddr, int level, const struct pte_masks *masks) { const uint64_t pg_size = PG_LEVEL_SIZE(level); - uint64_t *pte = &vm->pgd; + uint64_t *pte = &root_gpa; int current_level; TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K || @@ -261,14 +261,14 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, * early if a hugepage was created. */ for (current_level = vm->pgtable_levels; current_level > PG_LEVEL_4K; current_level--) { - pte = virt_create_upper_pte(vm, vm->pgd, pte, vaddr, paddr, + pte = virt_create_upper_pte(vm, root_gpa, pte, vaddr, paddr, current_level, level, masks); if (*pte & masks->large) return; } /* Fill in page table entry. */ - pte = virt_get_pte(vm, vm->pgd, pte, vaddr, PG_LEVEL_4K, masks); + pte = virt_get_pte(vm, root_gpa, pte, vaddr, PG_LEVEL_4K, masks); TEST_ASSERT(!(*pte & masks->present), "PTE already present for 4k page at vaddr: 0x%lx", vaddr); *pte = masks->present | masks->writeable | (paddr & PHYSICAL_PAGE_MASK); @@ -285,7 +285,7 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) { - __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K, &x86_pte_masks); + __virt_pg_map(vm, vm->pgd, vaddr, paddr, PG_LEVEL_4K, &x86_pte_masks); } void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, @@ -300,7 +300,7 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, nr_bytes, pg_size); for (i = 0; i < nr_pages; i++) { - __virt_pg_map(vm, vaddr, paddr, level, &x86_pte_masks); + __virt_pg_map(vm, vm->pgd, vaddr, paddr, level, &x86_pte_masks); sparsebit_set_num(vm->vpages_mapped, vaddr >> vm->page_shift, nr_bytes / PAGE_SIZE); -- 2.51.0.869.ge66316f041-goog When new nested EPTs are created, the AD bits are set. This was introduced by commit 094444204570 ("selftests: kvm: add test for dirty logging inside nested guests"), which introduced vmx_dirty_log_test. It's unclear why that was needed at the time, but regardless, the test seems to pass without them so probably no longer needed. dirty_log_perf_test (with -n to run in L2) also passes, and these are the only tests currently using nested EPT mappings. Signed-off-by: Yosry Ahmed --- tools/testing/selftests/kvm/lib/x86/vmx.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index f0023a3b0137e..36e60016fa7b2 100644 --- a/tools/testing/selftests/kvm/lib/x86/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -436,14 +436,6 @@ void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, pt = addr_gpa2hva(vm, pte->address * vm->page_size); } - - /* - * For now mark these as accessed and dirty because the only - * testcase we have needs that. Can be reconsidered later. - */ - pte->accessed = true; - pte->dirty = true; - } void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, -- 2.51.0.869.ge66316f041-goog __nested_pg_map() bears a lot of resemblence to __virt_pg_map(). The main difference is using the EPT struct overlay instead of the PTE masks. Now that PTE masks are passed into __virt_pg_map() as a struct, define a similar struct for EPTs and use __virt_pg_map() instead of __nested_pg_map(). EPTs have no 'present' or 'user' bits, so use the 'readable' bit instead like shadow_{present/user}_mask, ignoring the fact that entries can be present and not readable if the CPU has VMX_EPT_EXECUTE_ONLY_BIT. This is simple and sufficient for testing. Opportunistically drop nested_pg_map() since it has no callers. Add an executable bitmask to struct pte_masks, and update __virt_pg_map() and friends to set the bit on newly created entries to match the EPT behavior. It's a noop for x86 page tables. Another benefit of reusing the code is having separate handling for upper-level PTEs vs 4K PTEs, which avoids some quirks like setting the large bit on a 4K PTE. Signed-off-by: Yosry Ahmed Suggested-by: Sean Christopherson --- .../selftests/kvm/include/x86/processor.h | 1 + tools/testing/selftests/kvm/include/x86/vmx.h | 4 +- .../testing/selftests/kvm/lib/x86/processor.c | 9 +- tools/testing/selftests/kvm/lib/x86/vmx.c | 113 +++--------------- 4 files changed, 27 insertions(+), 100 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 2f0d83b6e5952..0d6d335d309ef 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1447,6 +1447,7 @@ struct pte_masks { uint64_t dirty; uint64_t large; uint64_t nx; + uint64_t x; }; extern const struct pte_masks x86_pte_masks; diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h index 96e2b4c630a9b..5aa14ceed050a 100644 --- a/tools/testing/selftests/kvm/include/x86/vmx.h +++ b/tools/testing/selftests/kvm/include/x86/vmx.h @@ -99,6 +99,8 @@ #define VMX_EPT_VPID_CAP_1G_PAGES 0x00020000 #define VMX_EPT_VPID_CAP_AD_BITS 0x00200000 +extern const struct pte_masks ept_pte_masks; + #define EXIT_REASON_FAILED_VMENTRY 0x80000000 enum vmcs_field { @@ -559,8 +561,6 @@ bool load_vmcs(struct vmx_pages *vmx); bool ept_1g_pages_supported(void); -void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr); void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, uint64_t size); void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 13e9376d5f545..caad8a9b3f067 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -177,6 +177,7 @@ const struct pte_masks x86_pte_masks = { .dirty = BIT_ULL(6), .large = BIT_ULL(7), .nx = BIT_ULL(63), + .x = 0, }; static void *virt_get_pte(struct kvm_vm *vm, vm_paddr_t root_gpa, @@ -209,7 +210,7 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, paddr = vm_untag_gpa(vm, paddr); if (!(*pte & masks->present)) { - *pte = masks->present | masks->writeable; + *pte = masks->present | masks->writeable | masks->x; if (current_level == target_level) *pte |= masks->large | (paddr & PHYSICAL_PAGE_MASK); else @@ -256,6 +257,9 @@ void __virt_pg_map(struct kvm_vm *vm, vm_paddr_t root_gpa, uint64_t vaddr, TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr, "Unexpected bits in paddr: %lx", paddr); + TEST_ASSERT(!masks->x || !masks->nx, + "X and NX bit masks cannot be used simultaneously"); + /* * Allocate upper level page tables, if not already present. Return * early if a hugepage was created. @@ -271,7 +275,8 @@ void __virt_pg_map(struct kvm_vm *vm, vm_paddr_t root_gpa, uint64_t vaddr, pte = virt_get_pte(vm, root_gpa, pte, vaddr, PG_LEVEL_4K, masks); TEST_ASSERT(!(*pte & masks->present), "PTE already present for 4k page at vaddr: 0x%lx", vaddr); - *pte = masks->present | masks->writeable | (paddr & PHYSICAL_PAGE_MASK); + *pte = masks->present | masks->writeable | masks->x + | (paddr & PHYSICAL_PAGE_MASK); /* * Neither SEV nor TDX supports shared page tables, so only the final diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index 36e60016fa7b2..46a491eb083c9 100644 --- a/tools/testing/selftests/kvm/lib/x86/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -19,19 +19,21 @@ bool enable_evmcs; struct hv_enlightened_vmcs *current_evmcs; struct hv_vp_assist_page *current_vp_assist; -struct eptPageTableEntry { - uint64_t readable:1; - uint64_t writable:1; - uint64_t executable:1; - uint64_t memory_type:3; - uint64_t ignore_pat:1; - uint64_t page_size:1; - uint64_t accessed:1; - uint64_t dirty:1; - uint64_t ignored_11_10:2; - uint64_t address:40; - uint64_t ignored_62_52:11; - uint64_t suppress_ve:1; +const struct pte_masks ept_pte_masks = { + /* + * EPTs do not have 'present' or 'user' bits, instead bit 0 is the + * 'readable' bit. In some cases, EPTs can be execute-only and an entry + * is present but not readable. However, for the purposes of testing we + * assume present == user == readable for simplicity. + */ + .present = BIT_ULL(0), + .user = BIT_ULL(0), + .writeable = BIT_ULL(1), + .x = BIT_ULL(2), + .accessed = BIT_ULL(5), + .dirty = BIT_ULL(6), + .large = BIT_ULL(7), + .nx = 0, }; struct eptPageTablePointer { @@ -362,88 +364,6 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) init_vmcs_guest_state(guest_rip, guest_rsp); } -static void nested_create_pte(struct kvm_vm *vm, - struct eptPageTableEntry *pte, - uint64_t nested_paddr, - uint64_t paddr, - int current_level, - int target_level) -{ - if (!pte->readable) { - pte->writable = true; - pte->readable = true; - pte->executable = true; - pte->page_size = (current_level == target_level); - if (pte->page_size) - pte->address = paddr >> vm->page_shift; - else - pte->address = vm_alloc_page_table(vm) >> vm->page_shift; - } else { - /* - * Entry already present. Assert that the caller doesn't want - * a hugepage at this level, and that there isn't a hugepage at - * this level. - */ - TEST_ASSERT(current_level != target_level, - "Cannot create hugepage at level: %u, nested_paddr: 0x%lx", - current_level, nested_paddr); - TEST_ASSERT(!pte->page_size, - "Cannot create page table at level: %u, nested_paddr: 0x%lx", - current_level, nested_paddr); - } -} - - -void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, int target_level) -{ - const uint64_t page_size = PG_LEVEL_SIZE(target_level); - struct eptPageTableEntry *pt = vmx->eptp_hva, *pte; - uint16_t index; - - TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K || - vm->mode == VM_MODE_PXXV57_4K, - "Unknown or unsupported guest mode: 0x%x", vm->mode); - - TEST_ASSERT((nested_paddr >> 48) == 0, - "Nested physical address 0x%lx is > 48-bits and requires 5-level EPT", - nested_paddr); - TEST_ASSERT((nested_paddr % page_size) == 0, - "Nested physical address not on page boundary,\n" - " nested_paddr: 0x%lx page_size: 0x%lx", - nested_paddr, page_size); - TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond beyond maximum supported,\n" - " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - TEST_ASSERT((paddr % page_size) == 0, - "Physical address not on page boundary,\n" - " paddr: 0x%lx page_size: 0x%lx", - paddr, page_size); - TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, - "Physical address beyond beyond maximum supported,\n" - " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", - paddr, vm->max_gfn, vm->page_size); - - for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) { - index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; - pte = &pt[index]; - - nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level); - - if (pte->page_size) - break; - - pt = addr_gpa2hva(vm, pte->address * vm->page_size); - } -} - -void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr) -{ - __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K); -} - /* * Map a range of EPT guest physical addresses to the VM's physical address * @@ -472,7 +392,8 @@ void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); while (npages--) { - __nested_pg_map(vmx, vm, nested_paddr, paddr, level); + __virt_pg_map(vm, vmx->eptp_gpa, nested_paddr, paddr, + level, &ept_pte_masks); nested_paddr += page_size; paddr += page_size; } -- 2.51.0.869.ge66316f041-goog Replace the struct overlay with explicit bitmasks, which is clearer and less error-prone. See commit f18b4aebe107 ("kvm: selftests: do not use bitfields larger than 32-bits for PTEs") for an example of why bitfields are not preferrable. Remove the unused PAGE_SHIFT_4K definition while at it. No functional change intended. Signed-off-by: Yosry Ahmed --- tools/testing/selftests/kvm/lib/x86/vmx.c | 37 +++++++++++------------ 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index 46a491eb083c9..75996fc00501e 100644 --- a/tools/testing/selftests/kvm/lib/x86/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -10,10 +10,16 @@ #include "processor.h" #include "vmx.h" -#define PAGE_SHIFT_4K 12 - #define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000 +#define EPTP_MT_SHIFT 0 /* EPTP memtype bits 2:0 */ +#define EPTP_PWL_SHIFT 3 /* EPTP page walk length bits 5:3 */ +#define EPTP_AD_ENABLED_SHIFT 6 /* EPTP AD enabled bit 6 */ + +#define EPTP_WB (X86_MEMTYPE_WB << EPTP_MT_SHIFT) +#define EPTP_PWL_4 (3ULL << EPTP_PWL_SHIFT) /* PWL is (levels - 1) */ +#define EPTP_AD_ENABLED (1ULL << EPTP_AD_ENABLED_SHIFT) + bool enable_evmcs; struct hv_enlightened_vmcs *current_evmcs; @@ -36,14 +42,6 @@ const struct pte_masks ept_pte_masks = { .nx = 0, }; -struct eptPageTablePointer { - uint64_t memory_type:3; - uint64_t page_walk_length:3; - uint64_t ad_enabled:1; - uint64_t reserved_11_07:5; - uint64_t address:40; - uint64_t reserved_63_52:12; -}; int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) { uint16_t evmcs_ver; @@ -198,16 +196,15 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS)); if (vmx->eptp_gpa) { - uint64_t ept_paddr; - struct eptPageTablePointer eptp = { - .memory_type = X86_MEMTYPE_WB, - .page_walk_length = 3, /* + 1 */ - .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS), - .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, - }; - - memcpy(&ept_paddr, &eptp, sizeof(ept_paddr)); - vmwrite(EPT_POINTER, ept_paddr); + uint64_t eptp = vmx->eptp_gpa | EPTP_WB | EPTP_PWL_4; + + TEST_ASSERT((vmx->eptp_gpa & ~PHYSICAL_PAGE_MASK) == 0, + "Illegal bits set in vmx->eptp_gpa"); + + if (ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS)) + eptp |= EPTP_AD_ENABLED; + + vmwrite(EPT_POINTER, eptp); sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT; } -- 2.51.0.869.ge66316f041-goog Instead of passing in a pointer to struct vmx_pages, pass in the GPA of the root of the EPTs, as that's the only member being used. Furthermore, only use ept_pte_masks for VMX, and use x86_pte_masks otherwise (which is what NPT uses). This is in preparation of supporting NPTs as well. No functional change intended. Signed-off-by: Yosry Ahmed --- tools/testing/selftests/kvm/include/x86/vmx.h | 6 +++--- .../testing/selftests/kvm/lib/x86/memstress.c | 4 ++-- tools/testing/selftests/kvm/lib/x86/vmx.c | 20 ++++++++++--------- .../selftests/kvm/x86/vmx_dirty_log_test.c | 6 +++--- 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h index 5aa14ceed050a..4429e83e1f52c 100644 --- a/tools/testing/selftests/kvm/include/x86/vmx.h +++ b/tools/testing/selftests/kvm/include/x86/vmx.h @@ -561,11 +561,11 @@ bool load_vmcs(struct vmx_pages *vmx); bool ept_1g_pages_supported(void); -void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, +void nested_map(struct kvm_vm *vm, vm_paddr_t root_gpa, uint64_t nested_paddr, uint64_t paddr, uint64_t size); -void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, +void nested_map_memslot(struct kvm_vm *vm, vm_paddr_t root_gpa, uint32_t memslot); -void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, +void nested_identity_map_1g(struct kvm_vm *vm, vm_paddr_t root_gpa, uint64_t addr, uint64_t size); bool kvm_cpu_has_ept(void); void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/lib/x86/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c index 0b1f288ad5564..5ca970a8a5c14 100644 --- a/tools/testing/selftests/kvm/lib/x86/memstress.c +++ b/tools/testing/selftests/kvm/lib/x86/memstress.c @@ -70,11 +70,11 @@ void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) * KVM can shadow the EPT12 with the maximum huge page size supported * by the backing source. */ - nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL); + nested_identity_map_1g(vm, vmx->eptp_gpa, 0, 0x100000000ULL); start = align_down(memstress_args.gpa, PG_SIZE_1G); end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G); - nested_identity_map_1g(vmx, vm, start, end - start); + nested_identity_map_1g(vm, vmx->eptp_gpa, start, end - start); } void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]) diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index 75996fc00501e..0573b3ea717cb 100644 --- a/tools/testing/selftests/kvm/lib/x86/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -378,34 +378,36 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) * Within the VM given by vm, creates a nested guest translation for the * page range starting at nested_paddr to the page range starting at paddr. */ -void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, +void __nested_map(struct kvm_vm *vm, vm_paddr_t root_gpa, uint64_t nested_paddr, uint64_t paddr, uint64_t size, int level) { size_t page_size = PG_LEVEL_SIZE(level); size_t npages = size / page_size; + const struct pte_masks *masks; + + masks = kvm_cpu_has(X86_FEATURE_VMX) ? &ept_pte_masks : &x86_pte_masks; TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); while (npages--) { - __virt_pg_map(vm, vmx->eptp_gpa, nested_paddr, paddr, - level, &ept_pte_masks); + __virt_pg_map(vm, root_gpa, nested_paddr, paddr, level, masks); nested_paddr += page_size; paddr += page_size; } } -void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, +void nested_map(struct kvm_vm *vm, vm_paddr_t root_gpa, uint64_t nested_paddr, uint64_t paddr, uint64_t size) { - __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K); + __nested_map(vm, root_gpa, nested_paddr, paddr, size, PG_LEVEL_4K); } /* Prepare an identity extended page table that maps all the * physical pages in VM. */ -void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, +void nested_map_memslot(struct kvm_vm *vm, vm_paddr_t root_gpa, uint32_t memslot) { sparsebit_idx_t i, last; @@ -419,7 +421,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, if (i > last) break; - nested_map(vmx, vm, + nested_map(vm, root_gpa, (uint64_t)i << vm->page_shift, (uint64_t)i << vm->page_shift, 1 << vm->page_shift); @@ -427,10 +429,10 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, } /* Identity map a region with 1GiB Pages. */ -void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, +void nested_identity_map_1g(struct kvm_vm *vm, vm_paddr_t root_gpa, uint64_t addr, uint64_t size) { - __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G); + __nested_map(vm, root_gpa, addr, addr, size, PG_LEVEL_1G); } bool kvm_cpu_has_ept(void) diff --git a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c index 98cb6bdab3e6d..e54e6111164e7 100644 --- a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c @@ -121,9 +121,9 @@ static void test_vmx_dirty_log(bool enable_ept) */ if (enable_ept) { prepare_eptp(vmx, vm); - nested_map_memslot(vmx, vm, 0); - nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE); - nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE); + nested_map_memslot(vm, vmx->eptp_gpa, 0); + nested_map(vm, vmx->eptp_gpa, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE); + nested_map(vm, vmx->eptp_gpa, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE); } bmap = bitmap_zalloc(TEST_MEM_PAGES); -- 2.51.0.869.ge66316f041-goog Now that the functions are no longer VMX-specific, move them to processor.c. Expose ept_pte_masks in vmx.h to make it accessible by __nested_map(). No functional change intended. Signed-off-by: Yosry Ahmed --- .../selftests/kvm/include/x86/processor.h | 7 ++ tools/testing/selftests/kvm/include/x86/vmx.h | 8 +- .../testing/selftests/kvm/lib/x86/processor.c | 75 +++++++++++++++++++ tools/testing/selftests/kvm/lib/x86/vmx.c | 74 ------------------ 4 files changed, 84 insertions(+), 80 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 0d6d335d309ef..13e8f4a1f589d 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1457,6 +1457,13 @@ void __virt_pg_map(struct kvm_vm *vm, vm_paddr_t root_gpa, uint64_t vaddr, void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint64_t nr_bytes, int level); +void nested_map(struct kvm_vm *vm, vm_paddr_t root_gpa, + uint64_t nested_paddr, uint64_t paddr, uint64_t size); +void nested_map_memslot(struct kvm_vm *vm, vm_paddr_t root_gpa, + uint32_t memslot); +void nested_identity_map_1g(struct kvm_vm *vm, vm_paddr_t root_gpa, + uint64_t addr, uint64_t size); + /* * Basic CPU control in CR0 */ diff --git a/tools/testing/selftests/kvm/include/x86/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h index 4429e83e1f52c..b832774d99cdb 100644 --- a/tools/testing/selftests/kvm/include/x86/vmx.h +++ b/tools/testing/selftests/kvm/include/x86/vmx.h @@ -559,14 +559,10 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx); void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); bool load_vmcs(struct vmx_pages *vmx); +extern const struct pte_masks ept_pte_masks; + bool ept_1g_pages_supported(void); -void nested_map(struct kvm_vm *vm, vm_paddr_t root_gpa, - uint64_t nested_paddr, uint64_t paddr, uint64_t size); -void nested_map_memslot(struct kvm_vm *vm, vm_paddr_t root_gpa, - uint32_t memslot); -void nested_identity_map_1g(struct kvm_vm *vm, vm_paddr_t root_gpa, - uint64_t addr, uint64_t size); bool kvm_cpu_has_ept(void); void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm); void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index caad8a9b3f067..1725f8fde2aa5 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -9,6 +9,7 @@ #include "pmu.h" #include "processor.h" #include "sev.h" +#include "vmx.h" #ifndef NUM_INTERRUPTS #define NUM_INTERRUPTS 256 @@ -449,6 +450,80 @@ void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) } } +/* + * Map a range of EPT guest physical addresses to the VM's physical address + * + * Input Args: + * vm - Virtual Machine + * nested_paddr - Nested guest physical address to map + * paddr - VM Physical Address + * size - The size of the range to map + * level - The level at which to map the range + * + * Output Args: None + * + * Return: None + * + * Within the VM given by vm, creates a nested guest translation for the + * page range starting at nested_paddr to the page range starting at paddr. + */ +void __nested_map(struct kvm_vm *vm, vm_paddr_t root_gpa, + uint64_t nested_paddr, uint64_t paddr, uint64_t size, + int level) +{ + size_t page_size = PG_LEVEL_SIZE(level); + size_t npages = size / page_size; + const struct pte_masks *masks; + + masks = kvm_cpu_has(X86_FEATURE_VMX) ? &ept_pte_masks : &x86_pte_masks; + + TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); + TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); + + while (npages--) { + __virt_pg_map(vm, root_gpa, nested_paddr, paddr, level, masks); + nested_paddr += page_size; + paddr += page_size; + } +} + +void nested_map(struct kvm_vm *vm, vm_paddr_t root_gpa, + uint64_t nested_paddr, uint64_t paddr, uint64_t size) +{ + __nested_map(vm, root_gpa, nested_paddr, paddr, size, PG_LEVEL_4K); +} + +/* Prepare an identity extended page table that maps all the + * physical pages in VM. + */ +void nested_map_memslot(struct kvm_vm *vm, vm_paddr_t root_gpa, + uint32_t memslot) +{ + sparsebit_idx_t i, last; + struct userspace_mem_region *region = + memslot2region(vm, memslot); + + i = (region->region.guest_phys_addr >> vm->page_shift) - 1; + last = i + (region->region.memory_size >> vm->page_shift); + for (;;) { + i = sparsebit_next_clear(region->unused_phy_pages, i); + if (i > last) + break; + + nested_map(vm, root_gpa, + (uint64_t)i << vm->page_shift, + (uint64_t)i << vm->page_shift, + 1 << vm->page_shift); + } +} + +/* Identity map a region with 1GiB Pages. */ +void nested_identity_map_1g(struct kvm_vm *vm, vm_paddr_t root_gpa, + uint64_t addr, uint64_t size) +{ + __nested_map(vm, root_gpa, addr, addr, size, PG_LEVEL_1G); +} + /* * Set Unusable Segment * diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index 0573b3ea717cb..1a9743cabcf4b 100644 --- a/tools/testing/selftests/kvm/lib/x86/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -361,80 +361,6 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) init_vmcs_guest_state(guest_rip, guest_rsp); } -/* - * Map a range of EPT guest physical addresses to the VM's physical address - * - * Input Args: - * vm - Virtual Machine - * nested_paddr - Nested guest physical address to map - * paddr - VM Physical Address - * size - The size of the range to map - * level - The level at which to map the range - * - * Output Args: None - * - * Return: None - * - * Within the VM given by vm, creates a nested guest translation for the - * page range starting at nested_paddr to the page range starting at paddr. - */ -void __nested_map(struct kvm_vm *vm, vm_paddr_t root_gpa, - uint64_t nested_paddr, uint64_t paddr, uint64_t size, - int level) -{ - size_t page_size = PG_LEVEL_SIZE(level); - size_t npages = size / page_size; - const struct pte_masks *masks; - - masks = kvm_cpu_has(X86_FEATURE_VMX) ? &ept_pte_masks : &x86_pte_masks; - - TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); - TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); - - while (npages--) { - __virt_pg_map(vm, root_gpa, nested_paddr, paddr, level, masks); - nested_paddr += page_size; - paddr += page_size; - } -} - -void nested_map(struct kvm_vm *vm, vm_paddr_t root_gpa, - uint64_t nested_paddr, uint64_t paddr, uint64_t size) -{ - __nested_map(vm, root_gpa, nested_paddr, paddr, size, PG_LEVEL_4K); -} - -/* Prepare an identity extended page table that maps all the - * physical pages in VM. - */ -void nested_map_memslot(struct kvm_vm *vm, vm_paddr_t root_gpa, - uint32_t memslot) -{ - sparsebit_idx_t i, last; - struct userspace_mem_region *region = - memslot2region(vm, memslot); - - i = (region->region.guest_phys_addr >> vm->page_shift) - 1; - last = i + (region->region.memory_size >> vm->page_shift); - for (;;) { - i = sparsebit_next_clear(region->unused_phy_pages, i); - if (i > last) - break; - - nested_map(vm, root_gpa, - (uint64_t)i << vm->page_shift, - (uint64_t)i << vm->page_shift, - 1 << vm->page_shift); - } -} - -/* Identity map a region with 1GiB Pages. */ -void nested_identity_map_1g(struct kvm_vm *vm, vm_paddr_t root_gpa, - uint64_t addr, uint64_t size) -{ - __nested_map(vm, root_gpa, addr, addr, size, PG_LEVEL_1G); -} - bool kvm_cpu_has_ept(void) { uint64_t ctrl; -- 2.51.0.869.ge66316f041-goog On x86, KVM selftests use memslot 0 for all the default regions used by the test infrastructure. This is an implementation detail. nested_map_memslot() is currently used to map the default regions by explicitly passing slot 0, which leaks the library implementation into the caller. Rename the function to a very verbose nested_identity_map_default_memslots() to reflect what it actually does. Add an assertion that only memslot 0 is being used so that the implementation does not change from under us. No functional change intended. Signed-off-by: Yosry Ahmed --- tools/testing/selftests/kvm/include/x86/processor.h | 3 +-- tools/testing/selftests/kvm/lib/x86/processor.c | 11 +++++++---- tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 13e8f4a1f589d..2608152b2fa27 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1459,8 +1459,7 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, void nested_map(struct kvm_vm *vm, vm_paddr_t root_gpa, uint64_t nested_paddr, uint64_t paddr, uint64_t size); -void nested_map_memslot(struct kvm_vm *vm, vm_paddr_t root_gpa, - uint32_t memslot); +void nested_identity_map_default_memslots(struct kvm_vm *vm, vm_paddr_t root_gpa); void nested_identity_map_1g(struct kvm_vm *vm, vm_paddr_t root_gpa, uint64_t addr, uint64_t size); diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 1725f8fde2aa5..958389ec1722d 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -496,12 +496,15 @@ void nested_map(struct kvm_vm *vm, vm_paddr_t root_gpa, /* Prepare an identity extended page table that maps all the * physical pages in VM. */ -void nested_map_memslot(struct kvm_vm *vm, vm_paddr_t root_gpa, - uint32_t memslot) +void nested_identity_map_default_memslots(struct kvm_vm *vm, vm_paddr_t root_gpa) { + uint32_t s, memslot = 0; sparsebit_idx_t i, last; - struct userspace_mem_region *region = - memslot2region(vm, memslot); + struct userspace_mem_region *region = memslot2region(vm, memslot); + + /* Only memslot 0 is mapped here, ensure it's the only one being used */ + for (s = 0; s < NR_MEM_REGIONS; s++) + TEST_ASSERT_EQ(vm->memslots[s], 0); i = (region->region.guest_phys_addr >> vm->page_shift) - 1; last = i + (region->region.memory_size >> vm->page_shift); diff --git a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c index e54e6111164e7..b8ebb246aaf15 100644 --- a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c @@ -121,7 +121,7 @@ static void test_vmx_dirty_log(bool enable_ept) */ if (enable_ept) { prepare_eptp(vmx, vm); - nested_map_memslot(vm, vmx->eptp_gpa, 0); + nested_identity_map_default_memslots(vm, vmx->eptp_gpa); nested_map(vm, vmx->eptp_gpa, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE); nested_map(vm, vmx->eptp_gpa, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE); } -- 2.51.0.869.ge66316f041-goog In preparation for generalizing the nested dirty logging test, checking if either EPT or NPT is enabled will be needed. To avoid needing to gate the kvm_cpu_has_ept() call by the CPU type, make sure the function returns false if VMX is not available instead of trying to read VMX-only MSRs. No functional change intended. Signed-off-by: Yosry Ahmed --- tools/testing/selftests/kvm/lib/x86/vmx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/kvm/lib/x86/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c index 1a9743cabcf4b..b1f22e78aca1a 100644 --- a/tools/testing/selftests/kvm/lib/x86/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86/vmx.c @@ -365,6 +365,9 @@ bool kvm_cpu_has_ept(void) { uint64_t ctrl; + if (!kvm_cpu_has(X86_FEATURE_VMX)) + return false; + ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32; if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) return false; -- 2.51.0.869.ge66316f041-goog Similar to KVM's shadow MMU (in most cases), set the user bit on nested PTEs. This is in preparation for supporting NPT mappings, which require the user bit to be set. This should be nop for VMX. Signed-off-by: Yosry Ahmed --- tools/testing/selftests/kvm/lib/x86/processor.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index 958389ec1722d..c2912b0a49e90 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -211,7 +211,7 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, paddr = vm_untag_gpa(vm, paddr); if (!(*pte & masks->present)) { - *pte = masks->present | masks->writeable | masks->x; + *pte = masks->present | masks->writeable | masks->x | masks->user; if (current_level == target_level) *pte |= masks->large | (paddr & PHYSICAL_PAGE_MASK); else @@ -276,7 +276,7 @@ void __virt_pg_map(struct kvm_vm *vm, vm_paddr_t root_gpa, uint64_t vaddr, pte = virt_get_pte(vm, root_gpa, pte, vaddr, PG_LEVEL_4K, masks); TEST_ASSERT(!(*pte & masks->present), "PTE already present for 4k page at vaddr: 0x%lx", vaddr); - *pte = masks->present | masks->writeable | masks->x + *pte = masks->present | masks->writeable | masks->x | masks->user | (paddr & PHYSICAL_PAGE_MASK); /* -- 2.51.0.869.ge66316f041-goog Add prepare_ncr3() to initialize the nested NPTs, and enable NPT from L1 accordingly. Everything else should work for creating nested NPT mappings. Generalize the code in vmx_dirty_log_test.c by adding SVM-specific L1 code, doing some renaming (e.g. EPT -> TDP), and having setup code for both SVM and VMX in test_dirty_log(). Having multiple points to check for SVM vs VMX is not ideal, but the alternatives either include a lot of redundancy or a lot of abstracting functions that will make the test logic harder to follow. Signed-off-by: Yosry Ahmed --- tools/testing/selftests/kvm/Makefile.kvm | 2 +- .../selftests/kvm/include/x86/svm_util.h | 8 ++ tools/testing/selftests/kvm/lib/x86/svm.c | 19 ++++ ...rty_log_test.c => nested_dirty_log_test.c} | 96 +++++++++++++------ 4 files changed, 97 insertions(+), 28 deletions(-) rename tools/testing/selftests/kvm/x86/{vmx_dirty_log_test.c => nested_dirty_log_test.c} (62%) diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 6625ac53545e8..5da95776fc9c2 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -114,7 +114,7 @@ TEST_GEN_PROGS_x86 += x86/userspace_io_test TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test TEST_GEN_PROGS_x86 += x86/close_while_nested_test -TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test +TEST_GEN_PROGS_x86 += x86/nested_dirty_log_test TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state TEST_GEN_PROGS_x86 += x86/vmx_msrs_test TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state diff --git a/tools/testing/selftests/kvm/include/x86/svm_util.h b/tools/testing/selftests/kvm/include/x86/svm_util.h index b74c6dcddcbd6..70bb22dd6de54 100644 --- a/tools/testing/selftests/kvm/include/x86/svm_util.h +++ b/tools/testing/selftests/kvm/include/x86/svm_util.h @@ -27,6 +27,11 @@ struct svm_test_data { void *msr; /* gva */ void *msr_hva; uint64_t msr_gpa; + + /* NPT */ + void *ncr3; /* gva */ + void *ncr3_hva; + uint64_t ncr3_gpa; }; static inline void vmmcall(void) @@ -57,6 +62,9 @@ struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva); void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp); void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa); +bool kvm_cpu_has_npt(void); +void prepare_ncr3(struct svm_test_data *svm, struct kvm_vm *vm); + int open_sev_dev_path_or_exit(void); #endif /* SELFTEST_KVM_SVM_UTILS_H */ diff --git a/tools/testing/selftests/kvm/lib/x86/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c index d239c20973918..190a8044dba0e 100644 --- a/tools/testing/selftests/kvm/lib/x86/svm.c +++ b/tools/testing/selftests/kvm/lib/x86/svm.c @@ -59,6 +59,20 @@ static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector, seg->base = base; } +bool kvm_cpu_has_npt(void) +{ + return kvm_cpu_has(X86_FEATURE_NPT); +} + +void prepare_ncr3(struct svm_test_data *svm, struct kvm_vm *vm) +{ + TEST_ASSERT(kvm_cpu_has_npt(), "KVM doesn't support nested NPT"); + + svm->ncr3 = (void *)vm_vaddr_alloc_page(vm); + svm->ncr3_hva = addr_gva2hva(vm, (uintptr_t)svm->ncr3); + svm->ncr3_gpa = addr_gva2gpa(vm, (uintptr_t)svm->ncr3); +} + void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp) { struct vmcb *vmcb = svm->vmcb; @@ -102,6 +116,11 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r vmcb->save.rip = (u64)guest_rip; vmcb->save.rsp = (u64)guest_rsp; guest_regs.rdi = (u64)svm; + + if (svm->ncr3_gpa) { + ctrl->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE; + ctrl->nested_cr3 = svm->ncr3_gpa; + } } /* diff --git a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c similarity index 62% rename from tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c rename to tools/testing/selftests/kvm/x86/nested_dirty_log_test.c index b8ebb246aaf15..06c94e77b44bd 100644 --- a/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86/nested_dirty_log_test.c @@ -12,6 +12,7 @@ #include "test_util.h" #include "kvm_util.h" #include "processor.h" +#include "svm_util.h" #include "vmx.h" /* The memory slot index to track dirty pages */ @@ -25,6 +26,8 @@ #define NESTED_TEST_MEM1 0xc0001000 #define NESTED_TEST_MEM2 0xc0002000 +#define L2_GUEST_STACK_SIZE 64 + static void l2_guest_code(u64 *a, u64 *b) { READ_ONCE(*a); @@ -42,20 +45,19 @@ static void l2_guest_code(u64 *a, u64 *b) vmcall(); } -static void l2_guest_code_ept_enabled(void) +static void l2_guest_code_tdp_enabled(void) { l2_guest_code((u64 *)NESTED_TEST_MEM1, (u64 *)NESTED_TEST_MEM2); } -static void l2_guest_code_ept_disabled(void) +static void l2_guest_code_tdp_disabled(void) { - /* Access the same L1 GPAs as l2_guest_code_ept_enabled() */ + /* Access the same L1 GPAs as l2_guest_code_tdp_enabled() */ l2_guest_code((u64 *)GUEST_TEST_MEM, (u64 *)GUEST_TEST_MEM); } -void l1_guest_code(struct vmx_pages *vmx) +void l1_vmx_code(struct vmx_pages *vmx) { -#define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; void *l2_rip; @@ -64,23 +66,51 @@ void l1_guest_code(struct vmx_pages *vmx) GUEST_ASSERT(load_vmcs(vmx)); if (vmx->eptp_gpa) - l2_rip = l2_guest_code_ept_enabled; + l2_rip = l2_guest_code_tdp_enabled; else - l2_rip = l2_guest_code_ept_disabled; + l2_rip = l2_guest_code_tdp_disabled; prepare_vmcs(vmx, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]); GUEST_SYNC(false); GUEST_ASSERT(!vmlaunch()); GUEST_SYNC(false); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_VMCALL); + GUEST_DONE(); +} + +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + void *l2_rip; + + if (svm->ncr3_gpa) + l2_rip = l2_guest_code_tdp_enabled; + else + l2_rip = l2_guest_code_tdp_disabled; + + generic_svm_setup(svm, l2_rip, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_SYNC(false); + run_guest(svm->vmcb, svm->vmcb_gpa); + GUEST_SYNC(false); + GUEST_ASSERT_EQ(svm->vmcb->control.exit_code, SVM_EXIT_VMMCALL); GUEST_DONE(); } -static void test_vmx_dirty_log(bool enable_ept) +static void l1_guest_code(void *data) { - vm_vaddr_t vmx_pages_gva = 0; - struct vmx_pages *vmx; + if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(data); + else + l1_svm_code(data); +} + +static void test_dirty_log(bool nested_tdp) +{ + struct svm_test_data *svm = NULL; + struct vmx_pages *vmx = NULL; + vm_vaddr_t nested_gva = 0; unsigned long *bmap; uint64_t *host_test_mem; @@ -89,12 +119,16 @@ static void test_vmx_dirty_log(bool enable_ept) struct ucall uc; bool done = false; - pr_info("Nested EPT: %s\n", enable_ept ? "enabled" : "disabled"); + pr_info("Nested TDP: %s\n", nested_tdp ? "enabled" : "disabled"); /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vcpu, 1, vmx_pages_gva); + if (kvm_cpu_has(X86_FEATURE_VMX)) + vmx = vcpu_alloc_vmx(vm, &nested_gva); + else + svm = vcpu_alloc_svm(vm, &nested_gva); + + vcpu_args_set(vcpu, 1, nested_gva); /* Add an extra memory slot for testing dirty logging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, @@ -113,17 +147,25 @@ static void test_vmx_dirty_log(bool enable_ept) * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to * 0xc0000000. * - * Note that prepare_eptp should be called only L1's GPA map is done, - * meaning after the last call to virt_map. + * Note that prepare_eptp()/prepare_ncr3() should be called only when + * L1's GPA map is done, meaning after the last call to virt_map. * - * When EPT is disabled, the L2 guest code will still access the same L1 - * GPAs as the EPT enabled case. + * When TDP is disabled, the L2 guest code will still access the same L1 + * GPAs as the TDP enabled case. */ - if (enable_ept) { - prepare_eptp(vmx, vm); - nested_identity_map_default_memslots(vm, vmx->eptp_gpa); - nested_map(vm, vmx->eptp_gpa, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE); - nested_map(vm, vmx->eptp_gpa, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE); + if (nested_tdp) { + uint64_t root_gpa; + + if (kvm_cpu_has(X86_FEATURE_VMX)) { + prepare_eptp(vmx, vm); + root_gpa = vmx->eptp_gpa; + } else { + prepare_ncr3(svm, vm); + root_gpa = svm->ncr3_gpa; + } + nested_identity_map_default_memslots(vm, root_gpa); + nested_map(vm, root_gpa, NESTED_TEST_MEM1, GUEST_TEST_MEM, PAGE_SIZE); + nested_map(vm, root_gpa, NESTED_TEST_MEM2, GUEST_TEST_MEM, PAGE_SIZE); } bmap = bitmap_zalloc(TEST_MEM_PAGES); @@ -168,12 +210,12 @@ static void test_vmx_dirty_log(bool enable_ept) int main(int argc, char *argv[]) { - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) || kvm_cpu_has(X86_FEATURE_SVM)); - test_vmx_dirty_log(/*enable_ept=*/false); + test_dirty_log(/*nested_tdp=*/false); - if (kvm_cpu_has_ept()) - test_vmx_dirty_log(/*enable_ept=*/true); + if (kvm_cpu_has_ept() || kvm_cpu_has_npt()) + test_dirty_log(/*nested_tdp=*/true); return 0; } -- 2.51.0.869.ge66316f041-goog