Currently, the virq_inject test advances guest RIP regardless of what instruction caused the nested VM exit. This is an issue when INTERCEPT_VINTR is set and the sti_nop_cli() is called in L2 with a pending interrupt. The vmcb save rip will point to the nop instruction on exit due to a one instruction interrupt shadow. The unconditional advance of the guest rip will move it three bytes, which is past the entire sti_nop_cli() call. This produces some unintended/inconsitent behavior including test failures. Only advance the guest rip if the exiting instruction was vmmcall(). Signed-off-by: Kevin Cheng --- x86/svm_tests.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/x86/svm_tests.c b/x86/svm_tests.c index 3761647642542..11d0e3d39f5ba 100644 --- a/x86/svm_tests.c +++ b/x86/svm_tests.c @@ -1790,7 +1790,9 @@ static void virq_inject_test(struct svm_test *test) static bool virq_inject_finished(struct svm_test *test) { - vmcb->save.rip += 3; + /* Only jump over VMMCALL instruction */ + if (vmcb->control.exit_code == SVM_EXIT_VMMCALL) + vmcb->save.rip += 3; switch (get_test_stage(test)) { case 0: -- 2.52.0.322.g1dd061c0dc-goog Nested page table entries that were touched during nested page table walks for guest PTEs always have their dirty and accessed bits set. Write a test that verifies this behavior for guest read and writes. Note that non-leaf NPT levels encountered during the GPA to HPA translation for guest PTEs only have their accessed bits set. The nVMX tests already have coverage for TDP A/D bits. Add a similar test for nSVM to improve test parity between nSVM and nVMX. Signed-off-by: Kevin Cheng --- x86/svm_npt.c | 176 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) diff --git a/x86/svm_npt.c b/x86/svm_npt.c index bd5e8f351e343..e436c43fb1c4c 100644 --- a/x86/svm_npt.c +++ b/x86/svm_npt.c @@ -380,6 +380,181 @@ skip_pte_test: vmcb->save.cr4 = sg_cr4; } + +static void clear_npt_ad_pte(unsigned long *pml4e, void *gpa) +{ + unsigned long *pte; + int l; + + for (l = PAGE_LEVEL; l > 0; --l) { + pte = get_pte_level(pml4e, gpa, l); + *pte &= ~(PT_AD_MASK); + if (*pte & PT_PAGE_SIZE_MASK) + break; + } +} + +/* + * clear_npt_ad : Clear NPT A/D bits for the page table walk and the final + * GPA of a guest address. + */ +static void clear_npt_ad(u64 *pml4e, unsigned long *guest_cr3, + void *gva) +{ + unsigned long *pte = guest_cr3, gpa; + u64 offset_in_page; + int l; + + for (l = PAGE_LEVEL; l > 0; --l) { + pte = get_pte_level(guest_cr3, gva, l); + + clear_npt_ad_pte(pml4e, (void *)pte); + + assert(*pte & PT_PRESENT_MASK); + if (*pte & PT_PAGE_SIZE_MASK) + break; + } + + pte = get_pte_level(guest_cr3, gva, l); + offset_in_page = (u64)gva & ((1 << PGDIR_BITS(l)) - 1); + gpa = (*pte & PT_ADDR_MASK) | ((u64)gva & offset_in_page); + clear_npt_ad_pte(pml4e, (void *)gpa); +} + +/* + * check_npt_ad_pte : Check the NPT A/D bits at each level for GPA being + * translated. Note that non-leaf NPT levels encountered during translation + * only have the access bit set. + */ +static bool check_npt_ad_pte(u64 *pml4e, void *gpa, int guest_level, + int expected_ad) +{ + int l, expected_ad_level; + unsigned long *pte; + bool leaf; + + for (l = PAGE_LEVEL; l > 0; --l) { + pte = get_pte_level(pml4e, gpa, l); + leaf = (l == 1) || (*pte & PT_PAGE_SIZE_MASK); + expected_ad_level = expected_ad; + + /* The dirty bit is only set on leaf PTEs */ + if (!leaf) + expected_ad_level = expected_ad & ~PT_DIRTY_MASK; + + if ((*pte & PT_AD_MASK) != expected_ad_level) { + report_fail("NPT - guest level %d npt level %d page table received: A=%d/D=%d, expected A=%d/D=%d", + guest_level, + l, + !!(*pte & PT_ACCESSED_MASK), + !!(*pte & PT_DIRTY_MASK), + !!(expected_ad & PT_ACCESSED_MASK), + !!(expected_ad & PT_DIRTY_MASK)); + return true; + } + + if (leaf) + break; + } + + return false; +} + +/* + * check_npt_ad : Check the content of NPT A/D bits for the page table walk + * and the final GPA of a guest address. + */ +static void check_npt_ad(u64 *pml4e, unsigned long *guest_cr3, + void *gva, int expected_gpa_ad) +{ + unsigned long *pte = guest_cr3, gpa; + u64 *npt_pte, offset_in_page; + bool bad_pt_ad = false; + int l; + + for (l = PAGE_LEVEL; l > 0; --l) { + pte = get_pte_level(guest_cr3, gva, l); + npt_pte = npt_get_pte((u64) pte); + + if (!npt_pte) { + report_fail("NPT - guest level %d page table is not mapped.\n", l); + return; + } + + if (!bad_pt_ad) + bad_pt_ad |= check_npt_ad_pte(pml4e, (void *)pte, l, PT_AD_MASK); + + assert(*pte & PT_PRESENT_MASK); + if (*pte & PT_PAGE_SIZE_MASK) + break; + } + + pte = get_pte_level(guest_cr3, gva, l); + offset_in_page = (u64)gva & ((1 << PGDIR_BITS(l)) - 1); + gpa = (*pte & PT_ADDR_MASK) | ((u64)gva & offset_in_page); + + npt_pte = npt_get_pte(gpa); + + if (!npt_pte) { + report_fail("NPT - guest physical address is not mapped"); + return; + } + + check_npt_ad_pte(pml4e, (void *)gpa, l, expected_gpa_ad); + report((*npt_pte & PT_AD_MASK) == expected_gpa_ad, + "NPT - guest physical address received: A=%d/D=%d, expected A=%d/D=%d", + !!(*npt_pte & PT_ACCESSED_MASK), + !!(*npt_pte & PT_DIRTY_MASK), + !!(expected_gpa_ad & PT_ACCESSED_MASK), + !!(expected_gpa_ad & PT_DIRTY_MASK)); +} + +static void npt_ad_read_guest(struct svm_test *test) +{ + (void)*(volatile u64 *)scratch_page; +} + +static void npt_ad_write_guest(struct svm_test *test) +{ + *((u64 *)scratch_page) = 42; +} + +static void npt_ad_test(void) +{ + unsigned long *guest_cr3 = (unsigned long *) vmcb->save.cr3; + + if (!npt_supported()) { + report_skip("NPT not supported"); + return; + } + + scratch_page = alloc_page(); + + clear_npt_ad(npt_get_pml4e(), guest_cr3, scratch_page); + + test_set_guest(npt_ad_read_guest); + svm_vmrun(); + + /* + * NPT walks for guest page tables are write accesses by default unless + * read-only guest page tables are used. As a result, we expect the + * dirty bit to be set on NPT mappings of guest page tables. Since the + * access itself is a read, we expect the final translation to not have + * the dirty bit set. + */ + check_npt_ad(npt_get_pml4e(), guest_cr3, scratch_page, PT_ACCESSED_MASK); + + test_set_guest(npt_ad_write_guest); + svm_vmrun(); + + check_npt_ad(npt_get_pml4e(), guest_cr3, scratch_page, PT_AD_MASK); + + report(*((u64 *)scratch_page) == 42, "Expected: 42, received: %ld", + *((u64 *)scratch_page)); + + clear_npt_ad(npt_get_pml4e(), guest_cr3, scratch_page); +} + #define NPT_V1_TEST(name, prepare, guest_code, check) \ { #name, npt_supported, prepare, default_prepare_gif_clear, guest_code, \ default_finished, check } @@ -395,6 +570,7 @@ static struct svm_test npt_tests[] = { NPT_V1_TEST(npt_l1mmio, npt_l1mmio_prepare, npt_l1mmio_test, npt_l1mmio_check), NPT_V1_TEST(npt_rw_l1mmio, npt_rw_l1mmio_prepare, npt_rw_l1mmio_test, npt_rw_l1mmio_check), NPT_V2_TEST(svm_npt_rsvd_bits_test), + NPT_V2_TEST(npt_ad_test), { NULL, NULL, NULL, NULL, NULL, NULL, NULL } }; -- 2.52.0.322.g1dd061c0dc-goog Add two tests to SVM test suite to validate APIC passthrough capabilities. In the apic_passthrough test, the guest asserts irq-line to trigger a level-triggered interrupt that should be injected directly in the guest. Confirm that the remote_irr is set before the guest's EOI and cleared after guest's EOI. Include a variant that uses a separate thread to trigger the interrupt to ensure cross CPU delivery is handled correctly. The svm_apic_passthrough_tpr_threshold_test validates that a guest can directly modify the host's APIC TPR. The host queues a pending self-IPI by disabling interrupts and raising the TPR to a high value. The test then runs a guest that lowers the TPR to 0, and upon returning to the host, it confirms that the pending interrupt is delivered once interrupts are enabled. The nVMX tests already has coverage for APIC passthrough. Add a similar test for nSVM to improve test parity between nSVM and nVMX. This test uses the old V1 test framework to utilize the test stage for specific test event sequencing. Signed-off-by: Kevin Cheng --- x86/svm_tests.c | 175 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) diff --git a/x86/svm_tests.c b/x86/svm_tests.c index 11d0e3d39f5ba..0eedad6bc3af5 100644 --- a/x86/svm_tests.c +++ b/x86/svm_tests.c @@ -12,6 +12,7 @@ #include "util.h" #include "x86/usermode.h" #include "vmalloc.h" +#include "fwcfg.h" #define SVM_EXIT_MAX_DR_INTERCEPT 0x3f @@ -3575,6 +3576,172 @@ static void svm_shutdown_intercept_test(void) report(vmcb->control.exit_code == SVM_EXIT_SHUTDOWN, "shutdown test passed"); } + +static void set_irq_line_thread(void *data) +{ + /* Wait until other CPU entered L2 */ + while (get_test_stage(data) != 1) + ; + + /* Set irq-line 0xf to raise vector 0x78 for vCPU 0 */ + ioapic_set_redir(0xf, 0x78, TRIGGER_LEVEL); + set_test_stage(data, 2); +} + +static void irq_78_handler_guest(isr_regs_t *regs) +{ + set_irq_line(0xf, 0); + vmmcall(); + eoi(); + vmmcall(); +} + +static void svm_apic_passthrough_guest(struct svm_test *test) +{ + handle_irq(0x78, irq_78_handler_guest); + sti(); + + /* If requested, wait for other CPU to trigger ioapic scan */ + if (get_test_stage(test) < 1) { + set_test_stage(test, 1); + while (get_test_stage(test) != 2) + ; + } + + set_irq_line(0xf, 1); +} + +static void svm_disable_intercept_for_x2apic_msrs(void) +{ + for (u32 msr = APIC_BASE_MSR; msr <= (APIC_BASE_MSR+0xff); ++msr) { + int bit_nr = get_msrpm_bit_nr(msr); + + __clear_bit(bit_nr, msr_bitmap); + __clear_bit(bit_nr + 1, msr_bitmap); + } +} + +static void svm_apic_passthrough_prepare(struct svm_test *test, + bool set_irq_line_from_thread) +{ + if (set_irq_line_from_thread && (cpu_count() < 2)) { + report_skip("%s : CPU count < 2", __func__); + return; + } + + /* Test device is required for generating IRQs */ + if (!test_device_enabled()) { + report_skip("%s : No test device enabled", __func__); + return; + } + + vmcb->control.intercept &= ~(1ULL << INTERCEPT_MSR_PROT); + svm_disable_intercept_for_x2apic_msrs(); + + vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR); + + if (set_irq_line_from_thread) { + on_cpu_async(1, set_irq_line_thread, test); + } else { + ioapic_set_redir(0xf, 0x78, TRIGGER_LEVEL); + set_test_stage(test, 2); + } +} + +static void svm_apic_passthrough_test_prepare(struct svm_test *test) +{ + svm_apic_passthrough_prepare(test, false); +} + +static void svm_apic_passthrough_thread_test_prepare(struct svm_test *test) +{ + svm_apic_passthrough_prepare(test, true); +} + +static bool svm_apic_passthrough_test_finished(struct svm_test *test) +{ + u32 exit_code = vmcb->control.exit_code; + + report(exit_code == SVM_EXIT_VMMCALL, "Expected VMMCALL VM-Exit, got exit reason 0x%x", + exit_code); + + switch (get_test_stage(test)) { + case 2: + /* Jump over VMMCALL instruction */ + vmcb->save.rip += 3; + + /* Before EOI remote_irr should still be set */ + report(1 == (int)ioapic_read_redir(0xf).remote_irr, + "IOAPIC pass-through: remote_irr=1 before EOI"); + set_test_stage(test, 3); + return false; + case 3: + /* Jump over VMMCALL instruction */ + vmcb->save.rip += 3; + + /* After EOI remote_irr should be cleared */ + report(0 == (int)ioapic_read_redir(0xf).remote_irr, + "IOAPIC pass-through: remote_irr=0 after EOI"); + set_test_stage(test, 4); + return false; + case 4: + break; + default: + report_fail("Unexpected stage %d", get_test_stage(test)); + } + + return true; +} + +static bool svm_apic_passthrough_test_check(struct svm_test *test) +{ + return get_test_stage(test) == 4; +} + +static void svm_apic_passthrough_tpr_threshold_guest(struct svm_test *test) +{ + cli(); + apic_set_tpr(0); +} + +static bool svm_apic_passthrough_tpr_threshold_ipi_isr_fired; +static void svm_apic_passthrough_tpr_threshold_ipi_isr(isr_regs_t *regs) +{ + svm_apic_passthrough_tpr_threshold_ipi_isr_fired = true; + eoi(); +} + +static void svm_apic_passthrough_tpr_threshold_test(void) +{ + int ipi_vector = 0xe1; + + vmcb->control.intercept &= ~(1ULL << INTERCEPT_MSR_PROT); + svm_disable_intercept_for_x2apic_msrs(); + + vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR); + vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; + + /* Raise L0 TPR-threshold by queueing vector in LAPIC IRR */ + cli(); + apic_set_tpr((ipi_vector >> 4) + 1); + apic_icr_write(APIC_DEST_SELF | APIC_DEST_PHYSICAL | + APIC_DM_FIXED | ipi_vector, + 0); + + test_set_guest(svm_apic_passthrough_tpr_threshold_guest); + clgi(); + svm_vmrun(); + stgi(); + + report(apic_get_tpr() == 0, "TPR was zero by guest"); + + /* Clean pending self-IPI */ + svm_apic_passthrough_tpr_threshold_ipi_isr_fired = false; + handle_irq(ipi_vector, svm_apic_passthrough_tpr_threshold_ipi_isr); + sti_nop(); + report(svm_apic_passthrough_tpr_threshold_ipi_isr_fired, "self-IPI fired"); +} + struct svm_test svm_tests[] = { { "null", default_supported, default_prepare, default_prepare_gif_clear, null_test, @@ -3694,6 +3861,14 @@ struct svm_test svm_tests[] = { { "vgif", vgif_supported, prepare_vgif_enabled, default_prepare_gif_clear, test_vgif, vgif_finished, vgif_check }, + { "apic_passthrough", default_supported, svm_apic_passthrough_test_prepare, + default_prepare_gif_clear, svm_apic_passthrough_guest, + svm_apic_passthrough_test_finished, svm_apic_passthrough_test_check}, + { "apic_passthrough_thread", default_supported, + svm_apic_passthrough_thread_test_prepare, default_prepare_gif_clear, + svm_apic_passthrough_guest, svm_apic_passthrough_test_finished, + svm_apic_passthrough_test_check}, + TEST(svm_apic_passthrough_tpr_threshold_test), TEST(svm_cr4_osxsave_test), TEST(svm_guest_state_test), TEST(svm_vmrun_errata_test), -- 2.52.0.322.g1dd061c0dc-goog Add test cases for instruction intercepts. Enable instruction intercepts and check that exit codes and exit info are equal to the expected results after running corresponding instructions in guest. The nVMX tests already have coverage for instruction intercepts. Add a similar test for nSVM to improve test parity between nSVM and nVMX. Signed-off-by: Kevin Cheng --- lib/x86/processor.h | 6 ++ x86/svm.c | 14 +++ x86/svm.h | 7 ++ x86/svm_tests.c | 202 ++++++++++++++++++++++++++++++++++++++++++++ x86/unittests.cfg | 16 +++- x86/vmx_tests.c | 5 -- 6 files changed, 244 insertions(+), 6 deletions(-) diff --git a/lib/x86/processor.h b/lib/x86/processor.h index 42dd2d2a4787c..b073ee168ce4b 100644 --- a/lib/x86/processor.h +++ b/lib/x86/processor.h @@ -358,6 +358,7 @@ struct x86_cpu_feature { * Extended Leafs, a.k.a. AMD defined */ #define X86_FEATURE_SVM X86_CPU_FEATURE(0x80000001, 0, ECX, 2) +#define X86_FEATURE_SKINIT X86_CPU_FEATURE(0x80000001, 0, ECX, 12) #define X86_FEATURE_PERFCTR_CORE X86_CPU_FEATURE(0x80000001, 0, ECX, 23) #define X86_FEATURE_NX X86_CPU_FEATURE(0x80000001, 0, EDX, 20) #define X86_FEATURE_GBPAGES X86_CPU_FEATURE(0x80000001, 0, EDX, 26) @@ -522,6 +523,11 @@ static inline u64 this_cpu_supported_xcr0(void) ((u64)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32); } +static inline bool this_cpu_has_mwait(void) +{ + return this_cpu_has(X86_FEATURE_MWAIT); +} + struct far_pointer32 { u32 offset; u16 selector; diff --git a/x86/svm.c b/x86/svm.c index de9eb19443caa..014feae3b48cc 100644 --- a/x86/svm.c +++ b/x86/svm.c @@ -83,6 +83,20 @@ bool pause_threshold_supported(void) return this_cpu_has(X86_FEATURE_PFTHRESHOLD); } +bool rdpru_supported(void) +{ + return this_cpu_has(X86_FEATURE_RDPRU); +} + +bool skinit_supported(void) +{ + return this_cpu_has(X86_FEATURE_SKINIT); +} + +bool xsave_supported(void) +{ + return this_cpu_has(X86_FEATURE_XSAVE); +} void default_prepare(struct svm_test *test) { diff --git a/x86/svm.h b/x86/svm.h index 264583a6547ef..052324c683019 100644 --- a/x86/svm.h +++ b/x86/svm.h @@ -49,6 +49,8 @@ enum { INTERCEPT_MONITOR, INTERCEPT_MWAIT, INTERCEPT_MWAIT_COND, + INTERCEPT_XSETBV, + INTERCEPT_RDPRU, }; enum { @@ -352,6 +354,8 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_EXIT_MONITOR 0x08a #define SVM_EXIT_MWAIT 0x08b #define SVM_EXIT_MWAIT_COND 0x08c +#define SVM_EXIT_XSETBV 0x08d +#define SVM_EXIT_RDPRU 0x08e #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_ERR -1 @@ -423,6 +427,9 @@ bool lbrv_supported(void); bool tsc_scale_supported(void); bool pause_filter_supported(void); bool pause_threshold_supported(void); +bool rdpru_supported(void); +bool skinit_supported(void); +bool xsave_supported(void); void default_prepare(struct svm_test *test); void default_prepare_gif_clear(struct svm_test *test); bool default_finished(struct svm_test *test); diff --git a/x86/svm_tests.c b/x86/svm_tests.c index 0eedad6bc3af5..8dbc033533c00 100644 --- a/x86/svm_tests.c +++ b/x86/svm_tests.c @@ -2547,6 +2547,207 @@ static void test_dr(void) vmcb->save.dr7 = dr_saved; } +asm( + "insn_sidt: sidt idt_descr;ret\n\t" + "insn_sgdt: sgdt gdt_descr;ret\n\t" + "insn_sldt: sldt %ax;ret\n\t" + "insn_str: str %ax;ret\n\t" + "insn_ltr: ltr %ax;ret\n\t" + "insn_pushf: pushf;ret\n\t" + "insn_popf: popf;ret\n\t" + "insn_intn: int $0x22; vmmcall; ret\n\t" + "insn_int_handler: iretq\n\t" + "insn_stgi: stgi;ret\n\t" + "insn_clgi: clgi;ret\n\t" + "insn_rdtscp: rdtscp;ret\n\t" + "insn_wbinvd: wbinvd;ret\n\t" + "insn_lidt: lidt idt_descr;ret\n\t" + "insn_lgdt: lgdt gdt_descr;ret\n\t" + "insn_lldt: xor %eax, %eax; lldt %ax;ret\n\t" + "insn_rdpmc: xor %ecx, %ecx; rdpmc;ret\n\t" + "insn_cpuid: mov $10, %eax; cpuid;ret\n\t" + "insn_invd: invd;ret\n\t" + "insn_pause: pause;ret\n\t" + "insn_hlt: hlt;ret\n\t" + "insn_invlpg: invlpg 0x12345678;ret\n\t" + "insn_rdtsc: rdtsc;ret\n\t" + "insn_monitor: xor %eax, %eax; xor %ecx, %ecx; xor %edx, %edx; monitor;ret\n\t" + "insn_mwait: xor %eax, %eax; xor %ecx, %ecx; mwait;ret\n\t" + "insn_skinit: skinit;ret\n\t" + "insn_icebp: .byte 0xf1; ret\n\t" + "insn_xsetbv: mov $0, %ecx; xor %edx, %edx; xor %eax, %eax; xgetbv; xsetbv;ret\n\t" + "insn_rdpru: xor %ecx, %ecx; .byte 0x0f,0x01,0xfd;ret\n\t" +); + +extern void insn_sidt(struct svm_test *test); +extern void insn_sgdt(struct svm_test *test); +extern void insn_sldt(struct svm_test *test); +extern void insn_str(struct svm_test *test); +extern void insn_ltr(struct svm_test *test); +extern void insn_pushf(struct svm_test *test); +extern void insn_popf(struct svm_test *test); +extern void insn_intn(struct svm_test *test); +extern void insn_int_handler(struct svm_test *test); +extern void insn_stgi(struct svm_test *test); +extern void insn_clgi(struct svm_test *test); +extern void insn_rdtscp(struct svm_test *test); +extern void insn_wbinvd(struct svm_test *test); +extern void insn_lidt(struct svm_test *test); +extern void insn_lgdt(struct svm_test *test); +extern void insn_lldt(struct svm_test *test); +extern void insn_rdpmc(struct svm_test *test); +extern void insn_cpuid(struct svm_test *test); +extern void insn_invd(struct svm_test *test); +extern void insn_pause(struct svm_test *test); +extern void insn_hlt(struct svm_test *test); +extern void insn_invlpg(struct svm_test *test); +extern void insn_rdtsc(struct svm_test *test); +extern void insn_monitor(struct svm_test *test); +extern void insn_mwait(struct svm_test *test); +extern void insn_skinit(struct svm_test *test); +extern void insn_icebp(struct svm_test *test); +extern void insn_xsetbv(struct svm_test *test); +extern void insn_rdpru(struct svm_test *test); + +u64 orig_cr4; + +static void insn_intercept_xsetbv_setup(void) +{ + orig_cr4 = vmcb->save.cr4; + vmcb->save.cr4 |= X86_CR4_OSXSAVE; +} + +static void insn_intercept_xsetbv_cleanup(void) +{ + vmcb->save.cr4 = orig_cr4; +} + +static idt_entry_t old_idt_entry; + +static void insn_int_setup(void) +{ + memcpy(&old_idt_entry, &boot_idt[0x22], sizeof(idt_entry_t)); + set_idt_entry(0x22, insn_int_handler, 0); +} + +static void insn_int_cleanup(void) +{ + memcpy(&boot_idt[0x22], &old_idt_entry, sizeof(idt_entry_t)); +} + +struct insn_table { + const char *name; + u64 intercept; + void (*insn_func)(struct svm_test *test); + u32 reason; + u64 exit_info_1; + u64 exit_info_2; + bool corrupts_guest; + bool (*supported_fn)(void); + void (*setup)(void); + void (*cleanup)(void); +}; + +static struct insn_table insn_table[] = { + { "STORE IDTR", INTERCEPT_STORE_IDTR, insn_sidt, SVM_EXIT_IDTR_READ, 0, + 0 }, + { "STORE GDTR", INTERCEPT_STORE_GDTR, insn_sgdt, SVM_EXIT_GDTR_READ, 0, + 0 }, + { "STORE LDTR", INTERCEPT_STORE_LDTR, insn_sldt, SVM_EXIT_LDTR_READ, 0, + 0 }, + { "STORE TR", INTERCEPT_STORE_TR, insn_str, SVM_EXIT_TR_READ, 0, 0 }, + /* corrupts_guest: LTR causes a #GP if done with a busy selector */ + { "LOAD TR", INTERCEPT_LOAD_TR, insn_ltr, SVM_EXIT_TR_WRITE, 0, 0, true }, + /* corrupts_guest: PUSHF and POPF corrupt the L2 stack */ + { "PUSHF", INTERCEPT_PUSHF, insn_pushf, SVM_EXIT_PUSHF, 0, 0, true }, + { "POPF", INTERCEPT_POPF, insn_popf, SVM_EXIT_POPF, 0, 0, true }, + { "IRET", INTERCEPT_IRET, insn_intn, SVM_EXIT_IRET, 0, 0, false, + NULL, insn_int_setup, insn_int_cleanup }, + { "INTn", INTERCEPT_INTn, insn_intn, SVM_EXIT_SWINT, 0x22, 0, false, + NULL, insn_int_setup, insn_int_cleanup }, + { "STGI", INTERCEPT_STGI, insn_stgi, SVM_EXIT_STGI, 0, 0 }, + { "CLGI", INTERCEPT_CLGI, insn_clgi, SVM_EXIT_CLGI, 0, 0 }, + { "RDTSCP", INTERCEPT_RDTSCP, insn_rdtscp, SVM_EXIT_RDTSCP, 0, 0 }, + { "WBINVD", INTERCEPT_WBINVD, insn_wbinvd, SVM_EXIT_WBINVD, 0, 0 }, + { "LOAD IDTR", INTERCEPT_LOAD_IDTR, insn_lidt, SVM_EXIT_IDTR_WRITE, 0, + 0 }, + { "LOAD GDTR", INTERCEPT_LOAD_GDTR, insn_lgdt, SVM_EXIT_GDTR_WRITE, 0, + 0 }, + { "LOAD LDTR", INTERCEPT_LOAD_LDTR, insn_lldt, SVM_EXIT_LDTR_WRITE, 0, + 0 }, + { "RDPMC", INTERCEPT_RDPMC, insn_rdpmc, SVM_EXIT_RDPMC, 0, 0 }, + { "CPUID", INTERCEPT_CPUID, insn_cpuid, SVM_EXIT_CPUID, 0, 0 }, + { "INVD", INTERCEPT_INVD, insn_invd, SVM_EXIT_INVD, 0, 0 }, + { "PAUSE", INTERCEPT_PAUSE, insn_pause, SVM_EXIT_PAUSE, 0, 0 }, + /* corrupts_guest: HLT causes guest to hang */ + { "HLT", INTERCEPT_HLT, insn_hlt, SVM_EXIT_HLT, 0, 0, true }, + { "INVLPG", INTERCEPT_INVLPG, insn_invlpg, SVM_EXIT_INVLPG, 0x12345678, 0 }, + { "RDTSC", INTERCEPT_RDTSC, insn_rdtsc, SVM_EXIT_RDTSC, 0, 0 }, + { "MONITOR", INTERCEPT_MONITOR, insn_monitor, SVM_EXIT_MONITOR, 0, 0, + false, this_cpu_has_mwait }, + { "MWAIT", INTERCEPT_MWAIT, insn_mwait, SVM_EXIT_MWAIT, 0, 0, + false, this_cpu_has_mwait }, + { "SKINIT", INTERCEPT_SKINIT, insn_skinit, SVM_EXIT_SKINIT, 0, 0, + false, skinit_supported }, + /* corrupts_guest: ICEBP triggers a #DB exception */ + { "ICEBP", INTERCEPT_ICEBP, insn_icebp, SVM_EXIT_ICEBP, 0, 0, true }, + { "XSETBV", INTERCEPT_XSETBV, insn_xsetbv, SVM_EXIT_XSETBV, 0, 0, + false, xsave_supported, insn_intercept_xsetbv_setup, + insn_intercept_xsetbv_cleanup }, + { "RDPRU", INTERCEPT_RDPRU, insn_rdpru, SVM_EXIT_RDPRU, 0, 0, + false, rdpru_supported }, + { NULL }, +}; + +static void svm_insn_intercept_test(void) +{ + u64 exit_info_1; + u64 exit_info_2; + u32 exit_code; + u32 cur_insn; + + for (cur_insn = 0; insn_table[cur_insn].name != NULL; ++cur_insn) { + struct insn_table insn = insn_table[cur_insn]; + + if (insn.supported_fn && !insn.supported_fn()) { + report_skip("\tFeature required for %s is not supported.\n", + insn_table[cur_insn].name); + continue; + } + + test_set_guest(insn.insn_func); + + if (insn.setup) + insn.setup(); + + if (!insn.corrupts_guest) + report(svm_vmrun() == SVM_EXIT_VMMCALL, "execute %s", insn.name); + + vmcb->control.intercept |= (1ULL << insn.intercept); + svm_vmrun(); + + exit_code = vmcb->control.exit_code; + exit_info_1 = vmcb->control.exit_info_1; + exit_info_2 = vmcb->control.exit_info_2; + + report(exit_code == insn.reason, + "Expected exit code: 0x%x, received exit code: 0x%x", + insn.reason, exit_code); + + report(exit_info_1 == insn.exit_info_1, + "Expected exit_info_1: 0x%lx, received exit_info_1: 0x%lx", + insn.exit_info_1, exit_info_1); + + report(exit_info_2 == insn.exit_info_2, + "Expected exit_info_2: 0x%lx, received exit_info_2: 0x%lx", + insn.exit_info_2, exit_info_2); + + if (insn.cleanup) + insn.cleanup(); + vmcb->control.intercept &= ~(1 << insn.intercept); + } +} + /* TODO: verify if high 32-bits are sign- or zero-extended on bare metal */ #define TEST_BITMAP_ADDR(save_intercept, type, addr, exit_code, \ msg) { \ @@ -3891,6 +4092,7 @@ struct svm_test svm_tests[] = { TEST(svm_tsc_scale_test), TEST(pause_filter_test), TEST(svm_shutdown_intercept_test), + TEST(svm_insn_intercept_test), { NULL, NULL, NULL, NULL, NULL, NULL, NULL } }; diff --git a/x86/unittests.cfg b/x86/unittests.cfg index 522318d32bf68..1a89101a5b2dd 100644 --- a/x86/unittests.cfg +++ b/x86/unittests.cfg @@ -253,7 +253,21 @@ arch = x86_64 [svm] file = svm.flat smp = 2 -test_args = "-pause_filter_test" +test_args = "-pause_filter_test -svm_intr_intercept_mix_smi -svm_insn_intercept_test" +qemu_params = -cpu max,+svm -m 4g +arch = x86_64 +groups = svm + +[svm_insn_intercept_test] +file = svm.flat +test_args = svm_insn_intercept_test +qemu_params = -cpu max,+svm -m 4g +arch = x86_64 +groups = svm + +[svm_intr_intercept_mix_smi] +file = svm.flat +test_args = svm_intr_intercept_mix_smi qemu_params = -cpu max,+svm -m 4g arch = x86_64 groups = svm diff --git a/x86/vmx_tests.c b/x86/vmx_tests.c index 5ffb80a3d866b..aa875ef722163 100644 --- a/x86/vmx_tests.c +++ b/x86/vmx_tests.c @@ -855,11 +855,6 @@ u64 cr3; typedef bool (*supported_fn)(void); -static bool this_cpu_has_mwait(void) -{ - return this_cpu_has(X86_FEATURE_MWAIT); -} - struct insn_table { const char *name; u32 flag; -- 2.52.0.322.g1dd061c0dc-goog The nVMX tests already has coverage for PF exception testing by running ac_test_run() in L2. Add a similar test for nSVM to improve test parity between nSVM and nVMX. The VMX tests intercept MSR accesses and CPUID by default, hence why there is special handling for MSR and CPUID VM exits. The svm tests do not intercept the EFER accesses by default and EFER updates in L2 do not affect L1, so we do not need to handle MSR exits like the VMX tests. CPUID should be emulated by KVM so the interception is not required there either. Signed-off-by: Kevin Cheng --- x86/Makefile.common | 2 ++ x86/svm.h | 1 + x86/svm_tests.c | 71 +++++++++++++++++++++++++++++++++++++++++++++ x86/unittests.cfg | 33 ++++++++++++++++++++- 4 files changed, 106 insertions(+), 1 deletion(-) diff --git a/x86/Makefile.common b/x86/Makefile.common index ef0e09a65b07f..31d1ed5777123 100644 --- a/x86/Makefile.common +++ b/x86/Makefile.common @@ -117,6 +117,8 @@ $(TEST_DIR)/access_test.$(bin): $(TEST_DIR)/access.o $(TEST_DIR)/vmx.$(bin): $(TEST_DIR)/access.o +$(TEST_DIR)/svm.$(bin): $(TEST_DIR)/access.o + $(TEST_DIR)/svm_npt.$(bin): $(TEST_DIR)/svm.o $(TEST_DIR)/kvmclock_test.$(bin): $(TEST_DIR)/kvmclock.o diff --git a/x86/svm.h b/x86/svm.h index 052324c683019..66733570f0e37 100644 --- a/x86/svm.h +++ b/x86/svm.h @@ -112,6 +112,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define TLB_CONTROL_DO_NOTHING 0 #define TLB_CONTROL_FLUSH_ALL_ASID 1 +#define TLB_CONTROL_FLUSH_ASID 3 #define V_TPR_MASK 0x0f diff --git a/x86/svm_tests.c b/x86/svm_tests.c index 8dbc033533c00..a40468693b396 100644 --- a/x86/svm_tests.c +++ b/x86/svm_tests.c @@ -13,6 +13,7 @@ #include "x86/usermode.h" #include "vmalloc.h" #include "fwcfg.h" +#include "access.h" #define SVM_EXIT_MAX_DR_INTERCEPT 0x3f @@ -3943,6 +3944,60 @@ static void svm_apic_passthrough_tpr_threshold_test(void) report(svm_apic_passthrough_tpr_threshold_ipi_isr_fired, "self-IPI fired"); } +static bool tlb_control_flush; + +static void svm_pf_inv_asid_test_prepare(struct svm_test *test) +{ + vmcb->control.intercept |= BIT_ULL(INTERCEPT_INVLPG); + tlb_control_flush = false; +} + +static void svm_pf_inv_tlb_ctl_test_prepare(struct svm_test *test) +{ + vmcb->control.intercept |= BIT_ULL(INTERCEPT_INVLPG); + tlb_control_flush = true; +} + +static void svm_pf_exception_test(struct svm_test *test) +{ + ac_test_run(PT_LEVEL_PML4, false); +} + +static void svm_pf_exception_forced_emulation_test(struct svm_test *test) +{ + ac_test_run(PT_LEVEL_PML4, true); +} + +static bool svm_pf_exception_test_finished(struct svm_test *test) +{ + switch (vmcb->control.exit_code) { + case SVM_EXIT_VMMCALL: + break; + case SVM_EXIT_INVLPG: + if (tlb_control_flush) { + /* Flush L2 TLB by using TLB Control field */ + vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; + } else { + /* Flush L2 TLB by assigning a new ASID. */ + vmcb->control.asid++; + } + break; + default: + report_fail("Unexpected exit to L1, Exit reason: 0x%x", + vmcb->control.exit_code); + } + + /* Advance guest RIP to the instruction after exit instruction. */ + vmcb->save.rip = vmcb->control.next_rip; + + return vmcb->control.exit_code == SVM_EXIT_VMMCALL; +} + +static bool svm_pf_exception_test_check(struct svm_test *test) +{ + return vmcb->control.exit_code == SVM_EXIT_VMMCALL; +} + struct svm_test svm_tests[] = { { "null", default_supported, default_prepare, default_prepare_gif_clear, null_test, @@ -4069,6 +4124,22 @@ struct svm_test svm_tests[] = { svm_apic_passthrough_thread_test_prepare, default_prepare_gif_clear, svm_apic_passthrough_guest, svm_apic_passthrough_test_finished, svm_apic_passthrough_test_check}, + { "svm_pf_exception_test", next_rip_supported, + default_prepare, default_prepare_gif_clear, + svm_pf_exception_test, svm_pf_exception_test_finished, + svm_pf_exception_test_check}, + { "svm_pf_exception_forced_emulation_test", next_rip_supported, + default_prepare, default_prepare_gif_clear, + svm_pf_exception_forced_emulation_test, svm_pf_exception_test_finished, + svm_pf_exception_test_check}, + { "svm_pf_inv_asid_test", next_rip_supported, + svm_pf_inv_asid_test_prepare, default_prepare_gif_clear, + svm_pf_exception_test, svm_pf_exception_test_finished, + svm_pf_exception_test_check}, + { "svm_pf_inv_tlb_ctl_test", next_rip_supported, + svm_pf_inv_tlb_ctl_test_prepare, default_prepare_gif_clear, + svm_pf_exception_test, svm_pf_exception_test_finished, + svm_pf_exception_test_check}, TEST(svm_apic_passthrough_tpr_threshold_test), TEST(svm_cr4_osxsave_test), TEST(svm_guest_state_test), diff --git a/x86/unittests.cfg b/x86/unittests.cfg index 1a89101a5b2dd..65dcf8b6cba89 100644 --- a/x86/unittests.cfg +++ b/x86/unittests.cfg @@ -253,7 +253,7 @@ arch = x86_64 [svm] file = svm.flat smp = 2 -test_args = "-pause_filter_test -svm_intr_intercept_mix_smi -svm_insn_intercept_test" +test_args = "-pause_filter_test -svm_intr_intercept_mix_smi -svm_insn_intercept_test -svm_pf_exception_test -svm_pf_exception_forced_emulation_test -svm_pf_inv_asid_test -svm_pf_inv_tlb_ctl_test" qemu_params = -cpu max,+svm -m 4g arch = x86_64 groups = svm @@ -265,6 +265,37 @@ qemu_params = -cpu max,+svm -m 4g arch = x86_64 groups = svm +[svm_pf_inv_asid_test] +file = svm.flat +test_args = svm_pf_inv_asid_test +qemu_params = -cpu max,+svm -m 4g +arch = x86_64 +groups = svm +timeout = 240 + +[svm_pf_inv_tlb_ctl_test] +file = svm.flat +test_args = svm_pf_inv_tlb_ctl_test +qemu_params = -cpu max,+svm -m 4g +arch = x86_64 +groups = svm +timeout = 240 + +[svm_pf_exception_test] +file = svm.flat +test_args = svm_pf_exception_test +qemu_params = -cpu max,+svm -m 4g +arch = x86_64 +groups = svm + +[svm_pf_exception_fep] +file = svm.flat +test_args = svm_pf_exception_forced_emulation_test +qemu_params = -cpu max,+svm -m 4g +arch = x86_64 +groups = svm +timeout = 480 + [svm_intr_intercept_mix_smi] file = svm.flat test_args = svm_intr_intercept_mix_smi -- 2.52.0.322.g1dd061c0dc-goog Currently the npt access tests only modify permission bits on the leaf npt pte. Increase npt testing coverage by modifying permission bits for all levels of the npt. Also add coverage for all different types of accesses for every permission bit. Add this coverage to improve test parity between nSVM and nVMX. Signed-off-by: Kevin Cheng --- lib/x86/vm.c | 2 +- lib/x86/vm.h | 2 + x86/access.c | 7 - x86/access.h | 11 ++ x86/svm.c | 2 +- x86/svm.h | 1 + x86/svm_npt.c | 374 ++++++++++++++++++++++++++++++++++++++++++-------- 7 files changed, 333 insertions(+), 66 deletions(-) diff --git a/lib/x86/vm.c b/lib/x86/vm.c index 27e7bb4004ef9..b8e6657030af2 100644 --- a/lib/x86/vm.c +++ b/lib/x86/vm.c @@ -4,7 +4,7 @@ #include "alloc_page.h" #include "smp.h" -static pteval_t pte_opt_mask; +pteval_t pte_opt_mask; pteval_t *install_pte(pgd_t *cr3, int pte_level, diff --git a/lib/x86/vm.h b/lib/x86/vm.h index cf39787aa8b02..ed0e77597e298 100644 --- a/lib/x86/vm.h +++ b/lib/x86/vm.h @@ -9,6 +9,8 @@ void setup_5level_page_table(void); +extern pteval_t pte_opt_mask; + struct pte_search { int level; pteval_t *pte; diff --git a/x86/access.c b/x86/access.c index d94910bf54052..142c1d9282f83 100644 --- a/x86/access.c +++ b/x86/access.c @@ -17,13 +17,6 @@ static int invalid_mask; #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 36) - 1) & PAGE_MASK)) #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21)) -#define PFERR_PRESENT_MASK (1U << 0) -#define PFERR_WRITE_MASK (1U << 1) -#define PFERR_USER_MASK (1U << 2) -#define PFERR_RESERVED_MASK (1U << 3) -#define PFERR_FETCH_MASK (1U << 4) -#define PFERR_PK_MASK (1U << 5) - #define MSR_EFER 0xc0000080 #define EFER_NX_MASK (1ull << 11) diff --git a/x86/access.h b/x86/access.h index 206a1c86fa0eb..9410085668eae 100644 --- a/x86/access.h +++ b/x86/access.h @@ -4,6 +4,17 @@ #define PT_LEVEL_PML4 4 #define PT_LEVEL_PML5 5 +#define PFERR_PRESENT_MASK (1U << 0) +#define PFERR_WRITE_MASK (1U << 1) +#define PFERR_USER_MASK (1U << 2) +#define PFERR_RESERVED_MASK (1U << 3) +#define PFERR_FETCH_MASK (1U << 4) +#define PFERR_PK_MASK (1U << 5) +#define PFERR_SGX_MASK (1U << 15) +#define PFERR_GUEST_RMP_MASK (1ULL << 31) +#define PFERR_GUEST_FINAL_MASK (1ULL << 32) +#define PFERR_GUEST_PAGE_MASK (1ULL << 33) + void ac_test_run(int page_table_levels, bool force_emulation); #endif // X86_ACCESS_H \ No newline at end of file diff --git a/x86/svm.c b/x86/svm.c index 014feae3b48cc..d65616d063bcc 100644 --- a/x86/svm.c +++ b/x86/svm.c @@ -317,7 +317,7 @@ static void set_additional_vcpu_msr(void *msr_efer) wrmsr(MSR_EFER, (ulong)msr_efer | EFER_SVME); } -static void setup_npt(void) +void setup_npt(void) { u64 size = fwcfg_get_u64(FW_CFG_RAM_SIZE); diff --git a/x86/svm.h b/x86/svm.h index 66733570f0e37..c90759670e08f 100644 --- a/x86/svm.h +++ b/x86/svm.h @@ -446,6 +446,7 @@ void svm_setup_vmrun(u64 rip); int __svm_vmrun(u64 rip); int svm_vmrun(void); void test_set_guest(test_guest_func func); +void setup_npt(void); extern struct vmcb *vmcb; diff --git a/x86/svm_npt.c b/x86/svm_npt.c index e436c43fb1c4c..5d70fd69a0c35 100644 --- a/x86/svm_npt.c +++ b/x86/svm_npt.c @@ -3,115 +3,373 @@ #include "vm.h" #include "alloc_page.h" #include "vmalloc.h" +#include "processor.h" +#include "access.h" + +#include static void *scratch_page; -static void null_test(struct svm_test *test) +enum npt_access_op { + OP_READ, + OP_WRITE, + OP_EXEC, + OP_FLUSH_TLB, + OP_EXIT, +}; +static struct npt_access_test_data { + unsigned long gpa; + unsigned long *gva; + unsigned long hpa; + unsigned long *hva; + enum npt_access_op op; +} npt_access_test_data; + +extern unsigned char ret42_start; +extern unsigned char ret42_end; + +/* Returns 42. */ +asm( + ".align 64\n" + "ret42_start:\n" + "mov $42, %eax\n" + "ret\n" + "ret42_end:\n" +); + +#define MAGIC_VAL_1 0x12345678ul +#define MAGIC_VAL_2 0x87654321ul + +#define PAGE_1G_ORDER 18 + +static void *get_1g_page(void) { + static void *alloc; + + if (!alloc) + alloc = alloc_pages(PAGE_1G_ORDER); + return alloc; } -static void npt_np_prepare(struct svm_test *test) +static void +diagnose_npt_violation_exit_code(u64 expected, u64 actual) { - u64 *pte; - scratch_page = alloc_page(); - pte = npt_get_pte((u64) scratch_page); +#define DIAGNOSE(flag) \ +do { \ + if ((expected & flag) != (actual & flag)) \ + printf(#flag " %sexpected\n", \ + (expected & flag) ? "" : "un"); \ +} while (0) - *pte &= ~1ULL; -} + DIAGNOSE(PFERR_PRESENT_MASK); + DIAGNOSE(PFERR_WRITE_MASK); + DIAGNOSE(PFERR_USER_MASK); + DIAGNOSE(PFERR_RESERVED_MASK); + DIAGNOSE(PFERR_FETCH_MASK); + DIAGNOSE(PFERR_PK_MASK); + DIAGNOSE(PFERR_SGX_MASK); + DIAGNOSE(PFERR_GUEST_RMP_MASK); + DIAGNOSE(PFERR_GUEST_FINAL_MASK); -static void npt_np_test(struct svm_test *test) -{ - (void)*(volatile u64 *)scratch_page; +#undef DIAGNOSE } -static bool npt_np_check(struct svm_test *test) +static unsigned long npt_twiddle(unsigned long gpa, bool mkhuge, int level, + unsigned long clear, unsigned long set) { - u64 *pte = npt_get_pte((u64) scratch_page); + struct npt_access_test_data *data = &npt_access_test_data; + unsigned long orig_pte; + unsigned long *pte; - *pte |= 1ULL; + pte = find_pte_level(npt_get_pml4e(), (void *)gpa, level).pte; + orig_pte = *pte; + report(orig_pte, "Get npt pte for gpa 0x%lx at level %d", gpa, level); - return (vmcb->control.exit_code == SVM_EXIT_NPF) - && (vmcb->control.exit_info_1 == 0x100000004ULL); + if (mkhuge) { + assert(IS_ALIGNED(data->hpa, 1ul << PGDIR_BITS(level))); + *pte = (*pte & ~PT_ADDR_MASK) | data->hpa | PT_PAGE_SIZE_MASK; + } + + /* + * No need for a TLB invalidation here since we always flush the TLB + * via TLB_CONTROL before entering the nested guest in do_npt_access(). + */ + *pte = (*pte & ~clear) | set; + + return orig_pte; } -static void npt_nx_prepare(struct svm_test *test) +static void npt_untwiddle(unsigned long gpa, int level, unsigned long orig_pte) { - u64 *pte; + unsigned long *pte = find_pte_level(npt_get_pml4e(), + (void *)gpa, level).pte; + *pte = orig_pte; +} - test->scratch = rdmsr(MSR_EFER); - wrmsr(MSR_EFER, test->scratch | EFER_NX); +static void do_npt_access(enum npt_access_op op, u64 expected_fault, + u64 expected_paddr) +{ + u32 exit_code; + u64 exit_info_1; + u64 exit_info_2; - /* Clear the guest's EFER.NX, it should not affect NPT behavior. */ - vmcb->save.efer &= ~EFER_NX; + /* Try the access and observe the violation. */ + npt_access_test_data.op = op; + vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; + svm_vmrun(); + + exit_code = vmcb->control.exit_code; + exit_info_1 = vmcb->control.exit_info_1; + exit_info_2 = vmcb->control.exit_info_2; - pte = npt_get_pte((u64) null_test); + if (!expected_fault) { + report(exit_code == SVM_EXIT_VMMCALL, + "NPT access exit code: Expected VMMCALL, received exit " + "code 0x%x with exit_info_1 0x%lx and exit_info_2 " + "0x%lx", + exit_code, exit_info_1, exit_info_2); + return; + } - *pte |= PT64_NX_MASK; + TEST_EXPECT_EQ(exit_code, SVM_EXIT_NPF); + TEST_EXPECT_EQ(exit_info_1, expected_fault); + diagnose_npt_violation_exit_code(expected_fault, exit_info_1); + TEST_EXPECT_EQ(exit_info_2, expected_paddr); } -static bool npt_nx_check(struct svm_test *test) +static void npt_access_at_level_mkhuge(bool mkhuge, int level, + unsigned long clear, unsigned long set, + enum npt_access_op op, + u64 expected_fault) { - u64 *pte = npt_get_pte((u64) null_test); + struct npt_access_test_data *data = &npt_access_test_data; + unsigned long orig_pte; - wrmsr(MSR_EFER, test->scratch); + orig_pte = npt_twiddle(data->gpa, mkhuge, level, clear, set); - *pte &= ~PT64_NX_MASK; + do_npt_access(op, expected_fault, data->gpa); - return (vmcb->control.exit_code == SVM_EXIT_NPF) - && (vmcb->control.exit_info_1 == 0x100000015ULL); + npt_untwiddle(data->gpa, level, orig_pte); } -static void npt_us_prepare(struct svm_test *test) +static void npt_access_at_level(int level, unsigned long clear, unsigned long set, + enum npt_access_op op, u64 expected_fault) { - u64 *pte; + npt_access_at_level_mkhuge(false, level, clear, set, op, expected_fault); - scratch_page = alloc_page(); - pte = npt_get_pte((u64) scratch_page); + if (level == 2 || level == 3) + npt_access_at_level_mkhuge(true, level, clear, set, op, expected_fault); +} - *pte &= ~(1ULL << 2); +static void npt_access_npf(unsigned long clear, unsigned long set, + enum npt_access_op op, u64 expected_fault) +{ + for (int i = 1; i <= PAGE_LEVEL; i++) + npt_access_at_level(i, clear, set, op, expected_fault); } -static void npt_us_test(struct svm_test *test) +static void npt_access_allowed(unsigned long clear, unsigned long set, + enum npt_access_op op) { - (void)*(volatile u64 *)scratch_page; + for (int i = 1; i <= PAGE_LEVEL; i++) + npt_access_at_level(i, clear, set, op, 0); } -static bool npt_us_check(struct svm_test *test) +static void npt_access_test_guest(struct svm_test *test) { - u64 *pte = npt_get_pte((u64) scratch_page); + struct npt_access_test_data *data = &npt_access_test_data; + int (*code)(void) = (int (*)(void)) &data->gva[1]; - *pte |= (1ULL << 2); + while (true) { + switch (data->op) { + case OP_READ: + TEST_EXPECT_EQ(*data->gva, MAGIC_VAL_1); + break; + case OP_WRITE: + *data->gva = MAGIC_VAL_2; + TEST_EXPECT_EQ(*data->gva, MAGIC_VAL_2); + *data->gva = MAGIC_VAL_1; + break; + case OP_EXEC: + TEST_EXPECT_EQ(code(), 42); + break; + case OP_FLUSH_TLB: + write_cr3(read_cr3()); + break; + case OP_EXIT: + return; + default: + report_fail("Unknown op %d", data->op); + } + vmmcall(); + } +} - return (vmcb->control.exit_code == SVM_EXIT_NPF) - && (vmcb->control.exit_info_1 == 0x100000005ULL); +static ulong orig_efer; +static ulong orig_cr4; + +/* + * npt_access_test_setup() must be called before modifying cr4 or efer to + * ensure proper restoration on cleanup. + */ +static void npt_access_test_setup(void) +{ + struct npt_access_test_data *data = &npt_access_test_data; + unsigned long npages = 1ul << PAGE_1G_ORDER; + unsigned long size = npages * PAGE_SIZE; + unsigned long *page_table = current_page_table(); + u64 orig_opt_mask = pte_opt_mask; + + if (!npt_supported()) { + report_skip("NPT not supported"); + return; + } + + assert(npt_get_pml4e()); + + test_set_guest(npt_access_test_guest); + + orig_efer = rdmsr(MSR_EFER); + wrmsr(MSR_EFER, orig_efer | EFER_NX | EFER_LMA); + + orig_cr4 = read_cr4(); + write_cr4(orig_cr4 | X86_CR4_PAE); + + /* Clear the guest's EFER.NX, it should not affect NPT behavior. */ + vmcb->save.efer &= ~EFER_NX; + + /* + * We use data->gpa = 1 << 39 so that test data has a separate pml4 + * entry. + */ + if (cpuid_maxphyaddr() < 40) { + report_skip("Test needs MAXPHYADDR >= 40"); + return; + } + + data->hva = get_1g_page(); + report(data->hva, "Allocate 1g page"); + data->hpa = virt_to_phys(data->hva); + + data->gpa = 1ul << 39; + data->gva = (void *) ALIGN((unsigned long) alloc_vpages(npages * 2), + size); + /* install_pages() creates 4K PTEs by default */ + install_pages(page_table, data->gpa, size, data->gva); + + /* + * Make sure nothing's mapped here so the tests that screw with the + * pml4 entry don't inadvertently break something. + */ + report(!npt_get_pte(data->gpa), "Nothing mapped to gpa 0x%lx", + data->gpa); + report(!npt_get_pte(data->gpa + size - 1), + "Nothing mapped to gpa + %lx", + data->gpa + size - 1); + + /* + * pte_opt_mask is used when installing PTEs and its permission bits. + * Since NPT walks are user accesses, ensure that PT_USER_MASK is set + * for NPT entries as it is not set by default. + */ + pte_opt_mask |= PT_USER_MASK; + /* install_pages() creates 4K PTEs by default */ + install_pages(npt_get_pml4e(), data->hpa, size, + (void *)(ulong)data->gpa); + pte_opt_mask = orig_opt_mask; + + data->hva[0] = MAGIC_VAL_1; + memcpy(&data->hva[1], &ret42_start, &ret42_end - &ret42_start); } -static void npt_rw_prepare(struct svm_test *test) +static void npt_access_test_cleanup(void) { + wrmsr(MSR_EFER, orig_efer); + write_cr4(orig_cr4); - u64 *pte; + /* Reset the npt after each test. */ + setup_npt(); +} - pte = npt_get_pte(0x80000); - *pte &= ~(1ULL << 1); +static void null_test(struct svm_test *test) +{ } -static void npt_rw_test(struct svm_test *test) +static void npt_np_test(void) { - u64 *data = (void *)(0x80000); + npt_access_test_setup(); + npt_access_npf(PT_PRESENT_MASK, 0, OP_READ, + PFERR_GUEST_FINAL_MASK | PFERR_USER_MASK); + npt_access_npf(PT_PRESENT_MASK, 0, OP_WRITE, + PFERR_GUEST_FINAL_MASK | PFERR_WRITE_MASK | + PFERR_USER_MASK); + npt_access_npf(PT_PRESENT_MASK, 0, OP_EXEC, + PFERR_GUEST_FINAL_MASK | PFERR_FETCH_MASK | + PFERR_USER_MASK); + npt_access_test_cleanup(); +} - *data = 0; +static void npt_nx_test(void) +{ + npt_access_test_setup(); + npt_access_allowed(PT_WRITABLE_MASK, PT64_NX_MASK, OP_READ); + npt_access_npf(PT_WRITABLE_MASK, PT64_NX_MASK, OP_WRITE, + PFERR_GUEST_FINAL_MASK | PFERR_WRITE_MASK | + PFERR_USER_MASK | PFERR_PRESENT_MASK); + npt_access_npf(PT_WRITABLE_MASK, PT64_NX_MASK, OP_EXEC, + PFERR_GUEST_FINAL_MASK | PFERR_FETCH_MASK | + PFERR_USER_MASK | PFERR_PRESENT_MASK); + npt_access_test_cleanup(); } -static bool npt_rw_check(struct svm_test *test) +static void npt_us_test(void) { - u64 *pte = npt_get_pte(0x80000); + npt_access_test_setup(); + npt_access_npf(PT_USER_MASK, 0, OP_READ, + PFERR_GUEST_FINAL_MASK | PFERR_USER_MASK | + PFERR_PRESENT_MASK); + npt_access_npf(PT_USER_MASK, 0, OP_WRITE, + PFERR_GUEST_FINAL_MASK | PFERR_WRITE_MASK | + PFERR_USER_MASK | PFERR_PRESENT_MASK); + npt_access_npf(PT_USER_MASK, 0, OP_EXEC, + PFERR_GUEST_FINAL_MASK | PFERR_FETCH_MASK | + PFERR_USER_MASK | PFERR_PRESENT_MASK); + npt_access_test_cleanup(); +} - *pte |= (1ULL << 1); +static void npt_ro_test(void) +{ + npt_access_test_setup(); + npt_access_allowed(PT_WRITABLE_MASK, PT64_NX_MASK, OP_READ); + npt_access_npf(PT_WRITABLE_MASK, PT64_NX_MASK, OP_WRITE, + PFERR_GUEST_FINAL_MASK | PFERR_WRITE_MASK | + PFERR_USER_MASK | PFERR_PRESENT_MASK); + npt_access_npf(PT_WRITABLE_MASK, PT64_NX_MASK, OP_EXEC, + PFERR_GUEST_FINAL_MASK | PFERR_FETCH_MASK | + PFERR_USER_MASK | PFERR_PRESENT_MASK); + npt_access_test_cleanup(); +} - return (vmcb->control.exit_code == SVM_EXIT_NPF) - && (vmcb->control.exit_info_1 == 0x100000007ULL); +static void npt_rw_test(void) +{ + npt_access_test_setup(); + npt_access_allowed(0, PT64_NX_MASK, OP_READ); + npt_access_allowed(0, PT64_NX_MASK, OP_WRITE); + npt_access_npf(0, PT64_NX_MASK, OP_EXEC, + PFERR_GUEST_FINAL_MASK | PFERR_FETCH_MASK | + PFERR_USER_MASK | PFERR_PRESENT_MASK); + npt_access_test_cleanup(); +} + +static void npt_rwx_test(void) +{ + npt_access_test_setup(); + npt_access_allowed(0, 0, OP_READ); + npt_access_allowed(0, 0, OP_WRITE); + npt_access_allowed(0, 0, OP_WRITE); + npt_access_test_cleanup(); } static void npt_rw_pfwalk_prepare(struct svm_test *test) @@ -562,15 +820,17 @@ static void npt_ad_test(void) #define NPT_V2_TEST(name) { #name, .v2 = name } static struct svm_test npt_tests[] = { - NPT_V1_TEST(npt_nx, npt_nx_prepare, null_test, npt_nx_check), - NPT_V1_TEST(npt_np, npt_np_prepare, npt_np_test, npt_np_check), - NPT_V1_TEST(npt_us, npt_us_prepare, npt_us_test, npt_us_check), - NPT_V1_TEST(npt_rw, npt_rw_prepare, npt_rw_test, npt_rw_check), NPT_V1_TEST(npt_rw_pfwalk, npt_rw_pfwalk_prepare, null_test, npt_rw_pfwalk_check), NPT_V1_TEST(npt_l1mmio, npt_l1mmio_prepare, npt_l1mmio_test, npt_l1mmio_check), NPT_V1_TEST(npt_rw_l1mmio, npt_rw_l1mmio_prepare, npt_rw_l1mmio_test, npt_rw_l1mmio_check), NPT_V2_TEST(svm_npt_rsvd_bits_test), NPT_V2_TEST(npt_ad_test), + NPT_V2_TEST(npt_nx_test), + NPT_V2_TEST(npt_np_test), + NPT_V2_TEST(npt_us_test), + NPT_V2_TEST(npt_ro_test), + NPT_V2_TEST(npt_rw_test), + NPT_V2_TEST(npt_rwx_test), { NULL, NULL, NULL, NULL, NULL, NULL, NULL } }; -- 2.52.0.322.g1dd061c0dc-goog Bits 62:59 are ignored if memory protection keys are disabled via the PKE CR4 bit. Verify that accesses are allowed when these bits are set while memory protection keys are disabled. Bits 52:58 are available so test that those are ignored as well. Signed-off-by: Kevin Cheng --- x86/svm_npt.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/x86/svm_npt.c b/x86/svm_npt.c index 5d70fd69a0c35..ab744d41824f8 100644 --- a/x86/svm_npt.c +++ b/x86/svm_npt.c @@ -372,6 +372,35 @@ static void npt_rwx_test(void) npt_access_test_cleanup(); } +static void npt_ignored_bit(int bit) +{ + /* Set the bit. */ + npt_access_allowed(0, 1ul << bit, OP_READ); + npt_access_allowed(0, 1ul << bit, OP_WRITE); + npt_access_allowed(0, 1ul << bit, OP_EXEC); + + /* Clear the bit. */ + npt_access_allowed(1ul << bit, 0, OP_READ); + npt_access_allowed(1ul << bit, 0, OP_WRITE); + npt_access_allowed(1ul << bit, 0, OP_EXEC); +} + +static void npt_ignored_bits_test(void) +{ + ulong saved_cr4 = read_cr4(); + + /* Setup must be called first because it saves the original cr4 state */ + npt_access_test_setup(); + + write_cr4(saved_cr4 & ~X86_CR4_PKE); + + for (int i = 52; i <= 62; i++) + npt_ignored_bit(i); + + write_cr4(saved_cr4); + npt_access_test_cleanup(); +} + static void npt_rw_pfwalk_prepare(struct svm_test *test) { @@ -831,6 +860,7 @@ static struct svm_test npt_tests[] = { NPT_V2_TEST(npt_ro_test), NPT_V2_TEST(npt_rw_test), NPT_V2_TEST(npt_rwx_test), + NPT_V2_TEST(npt_ignored_bits_test), { NULL, NULL, NULL, NULL, NULL, NULL, NULL } }; -- 2.52.0.322.g1dd061c0dc-goog Modify permissions for NPT entries that contain guest page table structures. Verify that NPF VM exit correctly reports fault occurred during the guest page table walk and correctly reports the right violation. Signed-off-by: Kevin Cheng --- x86/svm_npt.c | 223 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 221 insertions(+), 2 deletions(-) diff --git a/x86/svm_npt.c b/x86/svm_npt.c index ab744d41824f8..9380697f36ce9 100644 --- a/x86/svm_npt.c +++ b/x86/svm_npt.c @@ -51,6 +51,12 @@ static void *get_1g_page(void) return alloc; } +static void do_npt_access_op(enum npt_access_op op) +{ + npt_access_test_data.op = op; + svm_vmrun(); +} + static void diagnose_npt_violation_exit_code(u64 expected, u64 actual) { @@ -115,9 +121,8 @@ static void do_npt_access(enum npt_access_op op, u64 expected_fault, u64 exit_info_2; /* Try the access and observe the violation. */ - npt_access_test_data.op = op; vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; - svm_vmrun(); + do_npt_access_op(op); exit_code = vmcb->control.exit_code; exit_info_1 = vmcb->control.exit_info_1; @@ -422,6 +427,215 @@ static bool npt_rw_pfwalk_check(struct svm_test *test) && (vmcb->control.exit_info_2 == read_cr3()); } +/* + * This function modifies the NPT entry that maps the GPA that the guest page + * table entry mapping npt_access_test_data.gva resides on. + */ +static void npt_access_paddr(unsigned long npt_clear, unsigned long npt_set, + unsigned long pte_set, enum npt_access_op op, + bool expect_violation, u64 expected_fault) +{ + struct npt_access_test_data *data = &npt_access_test_data; + unsigned long *ptep; + unsigned long gpa; + unsigned long orig_npte; + unsigned long pte; + u64 orig_opt_mask = pte_opt_mask; + int level; + + /* Modify the guest PTE mapping data->gva according to @pte_set. */ + ptep = get_pte_level(current_page_table(), data->gva, 1); + report(ptep, "Get pte for gva 0x%lx", (unsigned long)data->gva); + report((*ptep & PT_ADDR_MASK) == data->gpa, "gva is correctly mapped"); + *ptep = (*ptep & ~PT_AD_MASK) | pte_set; + do_npt_access_op(OP_FLUSH_TLB); + + /* + * Now modify the access bits on the NPT entry for the GPA that the + * guest PTE resides on. Note that by modifying a single NPT entry, + * we're potentially affecting 512 guest PTEs. However, we've carefully + * constructed our test such that those other 511 PTEs aren't used by + * the guest: data->gva is at the beginning of a 1G huge page, thus the + * PTE we're modifying is at the beginning of a 4K page and the + * following 511 entries are also under our control (and not touched by + * the guest). + */ + gpa = virt_to_phys(ptep); + assert((gpa & ~PAGE_MASK) == 0); + + /* + * Make sure the guest page table page is mapped with a 4K NPT entry, + * otherwise our level=1 twiddling below will fail. We use the + * identity map (gpa = gpa) since page tables are shared with the host. + */ + pte_opt_mask |= PT_USER_MASK; + install_pte(npt_get_pml4e(), /*level=*/1, (void *)(ulong)gpa, + gpa | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK, 0); + pte_opt_mask = orig_opt_mask; + + orig_npte = npt_twiddle(gpa, /*mkhuge=*/0, /*level=1*/1, npt_clear, npt_set); + + if (expect_violation) { + do_npt_access(op, expected_fault, gpa); + npt_untwiddle(gpa, /*level=*/1, orig_npte); + do_npt_access_op(op); + TEST_EXPECT_EQ(vmcb->control.exit_code, SVM_EXIT_VMMCALL); + } else { + do_npt_access(op, 0, gpa); + for (level = PAGE_LEVEL; level > 0; level--) { + pte = *find_pte_level(npt_get_pml4e(), (void *)gpa, level).pte; + report(pte & PT_ACCESSED_MASK, + "Access flag set. PTE val: 0x%lx", + pte); + + if (level == 1) + report(pte & PT_DIRTY_MASK, + "Dirty flag set. PTE val: 0x%lx", + pte); + else + report(!(pte & PT_DIRTY_MASK), + "Dirty flag not set. PTE val: 0x%lx level: %d", + pte, level); + } + + npt_untwiddle(gpa, /*level=*/1, orig_npte); + } + + report(*ptep & PT_ACCESSED_MASK, "Access flag set"); + if ((pte_set & PT_DIRTY_MASK) || op == OP_WRITE) + report(*ptep & PT_DIRTY_MASK, "Dirty flag set"); +} + +static void npt_access_allowed_paddr(unsigned long npt_clear, unsigned long npt_set, + unsigned long pte_set, enum npt_access_op op) +{ + npt_access_paddr(npt_clear, npt_set, pte_set, op, false, 0); +} + +static void npt_access_npf_paddr(unsigned long npt_clear, unsigned long npt_set, + unsigned long pte_set, enum npt_access_op op, + u64 expected_fault) +{ + npt_access_paddr(npt_clear, npt_set, pte_set, op, true, expected_fault); +} + +/* + * All accesses to guest paging structures are considered as writes as far as + * NPT translation is concerned. + */ +static void npt_access_paddr_not_present_test(void) +{ + u32 pte_set_combinations[3] = {0, PT_ACCESSED_MASK, PT_DIRTY_MASK}; + + npt_access_test_setup(); + + for (int i = 0; i < ARRAY_SIZE(pte_set_combinations); i++) { + npt_access_npf_paddr(PT_PRESENT_MASK, 0, + pte_set_combinations[i], OP_READ, + PFERR_GUEST_PAGE_MASK | PFERR_USER_MASK | + PFERR_WRITE_MASK); + npt_access_npf_paddr(PT_PRESENT_MASK, 0, + pte_set_combinations[i], OP_WRITE, + PFERR_GUEST_PAGE_MASK | PFERR_USER_MASK | + PFERR_WRITE_MASK); + npt_access_npf_paddr(PT_PRESENT_MASK, 0, + pte_set_combinations[i], OP_EXEC, + PFERR_GUEST_PAGE_MASK | PFERR_USER_MASK | + PFERR_WRITE_MASK); + } + + npt_access_test_cleanup(); +} + +static void npt_access_paddr_read_only_test(void) +{ + u32 pte_set_combinations[3] = {0, PT_ACCESSED_MASK, PT_DIRTY_MASK}; + + npt_access_test_setup(); + + for (int i = 0; i < ARRAY_SIZE(pte_set_combinations); i++) { + npt_access_npf_paddr(PT_WRITABLE_MASK, PT64_NX_MASK, + pte_set_combinations[i], OP_READ, + PFERR_GUEST_PAGE_MASK | PFERR_USER_MASK | + PFERR_WRITE_MASK | + PFERR_PRESENT_MASK); + npt_access_npf_paddr(PT_WRITABLE_MASK, PT64_NX_MASK, + pte_set_combinations[i], OP_WRITE, + PFERR_GUEST_PAGE_MASK | PFERR_USER_MASK | + PFERR_WRITE_MASK | + PFERR_PRESENT_MASK); + npt_access_npf_paddr(PT_WRITABLE_MASK, PT64_NX_MASK, + pte_set_combinations[i], OP_EXEC, + PFERR_GUEST_PAGE_MASK | PFERR_USER_MASK | + PFERR_WRITE_MASK | + PFERR_PRESENT_MASK); + } + + npt_access_test_cleanup(); +} + +static void npt_access_paddr_read_execute_test(void) +{ + u32 pte_set_combinations[3] = {0, PT_ACCESSED_MASK, PT_DIRTY_MASK}; + + npt_access_test_setup(); + + for (int i = 0; i < ARRAY_SIZE(pte_set_combinations); i++) { + npt_access_npf_paddr( + PT_WRITABLE_MASK, 0, pte_set_combinations[i], OP_READ, + PFERR_GUEST_PAGE_MASK | PFERR_USER_MASK | + PFERR_WRITE_MASK | PFERR_PRESENT_MASK); + npt_access_npf_paddr( + PT_WRITABLE_MASK, 0, pte_set_combinations[i], OP_WRITE, + PFERR_GUEST_PAGE_MASK | PFERR_USER_MASK | + PFERR_WRITE_MASK | PFERR_PRESENT_MASK); + npt_access_npf_paddr( + PT_WRITABLE_MASK, 0, pte_set_combinations[i], OP_EXEC, + PFERR_GUEST_PAGE_MASK | PFERR_USER_MASK | + PFERR_WRITE_MASK | PFERR_PRESENT_MASK); + } + + npt_access_test_cleanup(); +} + +static void npt_access_paddr_read_write_test(void) +{ + u32 pte_set_combinations[3] = {0, PT_ACCESSED_MASK, PT_DIRTY_MASK}; + + npt_access_test_setup(); + + /* Read-write access to paging structure. */ + for (int i = 0; i < ARRAY_SIZE(pte_set_combinations); i++) { + npt_access_allowed_paddr(0, PT_WRITABLE_MASK | PT64_NX_MASK, + pte_set_combinations[i], OP_READ); + npt_access_allowed_paddr(0, PT_WRITABLE_MASK | PT64_NX_MASK, + pte_set_combinations[i], OP_WRITE); + npt_access_allowed_paddr(0, PT_WRITABLE_MASK | PT64_NX_MASK, + pte_set_combinations[i], OP_EXEC); + } + + npt_access_test_cleanup(); +} + +static void npt_access_paddr_read_write_execute_test(void) +{ + u32 pte_set_combinations[3] = {0, PT_ACCESSED_MASK, PT_DIRTY_MASK}; + + npt_access_test_setup(); + + /* RWX access to paging structure. */ + for (int i = 0; i < ARRAY_SIZE(pte_set_combinations); i++) { + npt_access_allowed_paddr(0, PT_WRITABLE_MASK, pte_set_combinations[i], + OP_READ); + npt_access_allowed_paddr(0, PT_WRITABLE_MASK, pte_set_combinations[i], + OP_WRITE); + npt_access_allowed_paddr(0, PT_WRITABLE_MASK, pte_set_combinations[i], + OP_EXEC); + } + + npt_access_test_cleanup(); +} + static bool was_x2apic; static void npt_apic_prepare(void) @@ -861,6 +1075,11 @@ static struct svm_test npt_tests[] = { NPT_V2_TEST(npt_rw_test), NPT_V2_TEST(npt_rwx_test), NPT_V2_TEST(npt_ignored_bits_test), + NPT_V2_TEST(npt_access_paddr_not_present_test), + NPT_V2_TEST(npt_access_paddr_read_only_test), + NPT_V2_TEST(npt_access_paddr_read_write_test), + NPT_V2_TEST(npt_access_paddr_read_write_execute_test), + NPT_V2_TEST(npt_access_paddr_read_execute_test), { NULL, NULL, NULL, NULL, NULL, NULL, NULL } }; -- 2.52.0.322.g1dd061c0dc-goog The APM Vol #2 - 15.20 lists illegal combinations related to event injection. Add testing to verify that these illegal combinations cause an invalid VM exit. Also add testing to verify that legal combinations for event injection work as intended. This includes testing all valid injection types and injecting all exceptions when the exception type is specified. Signed-off-by: Kevin Cheng --- x86/svm_tests.c | 192 ++++++++++++++++++++++++++++++------------------ 1 file changed, 121 insertions(+), 71 deletions(-) diff --git a/x86/svm_tests.c b/x86/svm_tests.c index a40468693b396..a069add43d078 100644 --- a/x86/svm_tests.c +++ b/x86/svm_tests.c @@ -1674,74 +1674,6 @@ static bool vnmi_check(struct svm_test *test) return get_test_stage(test) == 3; } -static volatile int count_exc = 0; - -static void my_isr(struct ex_regs *r) -{ - count_exc++; -} - -static void exc_inject_prepare(struct svm_test *test) -{ - default_prepare(test); - handle_exception(DE_VECTOR, my_isr); - handle_exception(NMI_VECTOR, my_isr); -} - - -static void exc_inject_test(struct svm_test *test) -{ - asm volatile ("vmmcall\n\tvmmcall\n\t"); -} - -static bool exc_inject_finished(struct svm_test *test) -{ - switch (get_test_stage(test)) { - case 0: - if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) { - report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x", - vmcb->control.exit_code); - return true; - } - vmcb->save.rip += 3; - vmcb->control.event_inj = NMI_VECTOR | SVM_EVTINJ_TYPE_EXEPT | SVM_EVTINJ_VALID; - break; - - case 1: - if (vmcb->control.exit_code != SVM_EXIT_ERR) { - report_fail("VMEXIT not due to error. Exit reason 0x%x", - vmcb->control.exit_code); - return true; - } - report(count_exc == 0, "exception with vector 2 not injected"); - vmcb->control.event_inj = DE_VECTOR | SVM_EVTINJ_TYPE_EXEPT | SVM_EVTINJ_VALID; - break; - - case 2: - if (vmcb->control.exit_code != SVM_EXIT_VMMCALL) { - report_fail("VMEXIT not due to vmmcall. Exit reason 0x%x", - vmcb->control.exit_code); - return true; - } - vmcb->save.rip += 3; - report(count_exc == 1, "divide overflow exception injected"); - report(!(vmcb->control.event_inj & SVM_EVTINJ_VALID), "eventinj.VALID cleared"); - break; - - default: - return true; - } - - inc_test_stage(test); - - return get_test_stage(test) == 3; -} - -static bool exc_inject_check(struct svm_test *test) -{ - return count_exc == 1 && get_test_stage(test) == 3; -} - static volatile bool virq_fired; static volatile unsigned long virq_rip; @@ -2548,6 +2480,126 @@ static void test_dr(void) vmcb->save.dr7 = dr_saved; } +/* Returns true if exception can be injected via the SVM_EVTINJ_TYPE_EXEPT type */ +static bool is_injectable_exception(int vec) +{ + /* + * Vectors that do not correspond to an exception are excluded. NMI is + * not an exception so it is excluded. BR and OF are excluded because + * BOUND and INTO are not legal in 64-bit mode. + * + * The VE vector is excluded because it is Intel only. + * + * The HV and VC vectors are excluded because they are only relevant + * within secure guest VMs. + */ + static u8 exception_vectors[32] = { + [DE_VECTOR] = 1, [DB_VECTOR] = 1, [BP_VECTOR] = 1, + [UD_VECTOR] = 1, [NM_VECTOR] = 1, [DF_VECTOR] = 1, + [TS_VECTOR] = 1, [NP_VECTOR] = 1, [SS_VECTOR] = 1, + [GP_VECTOR] = 1, [PF_VECTOR] = 1, [MF_VECTOR] = 1, + [AC_VECTOR] = 1, [MC_VECTOR] = 1, [XF_VECTOR] = 1, + [CP_VECTOR] = 1, [SX_VECTOR] = 1, + }; + + return exception_vectors[vec]; +} + +static bool is_valid_injection_type_mask(int type_mask) +{ + return type_mask == SVM_EVTINJ_TYPE_INTR || + type_mask == SVM_EVTINJ_TYPE_NMI || + type_mask == SVM_EVTINJ_TYPE_EXEPT || + type_mask == SVM_EVTINJ_TYPE_SOFT; +} + +static volatile bool event_injection_handled; +static void event_injection_irq_handler(isr_regs_t *regs) +{ + event_injection_handled = true; + vmmcall(); +} + +static void event_injection_exception_handler(struct ex_regs *r) +{ + event_injection_handled = true; + vmmcall(); +} + +static void test_event_injection(void) +{ + u32 event_inj_saved = vmcb->control.event_inj, vector = 0x22, event_inj; + int type, type_mask; + bool reserved; + + handle_exception(DE_VECTOR, event_injection_exception_handler); + handle_irq(vector, event_injection_irq_handler); + + /* Setting reserved values of TYPE is illegal */ + for (type = 0; type < 8; type++) { + type_mask = type << SVM_EVTINJ_TYPE_SHIFT; + reserved = !is_valid_injection_type_mask(type_mask); + event_injection_handled = false; + event_inj = SVM_EVTINJ_VALID; + + switch (type_mask) { + case SVM_EVTINJ_TYPE_EXEPT: + event_inj |= DE_VECTOR; + break; + default: + event_inj |= vector; + } + + vmcb->control.event_inj = event_inj | + (type << SVM_EVTINJ_TYPE_SHIFT); + if (reserved) { + report(svm_vmrun() == SVM_EXIT_ERR, + "Test EVENTINJ error code with type %d", type); + report(!event_injection_handled, + "Reserved type %d ignores EVENTINJ vector field", type); + } else { + report(svm_vmrun() == SVM_EXIT_VMMCALL, + "Test EVENTINJ delivers with type %d", type); + } + + if (type_mask == SVM_EVTINJ_TYPE_NMI) + report(!event_injection_handled, + "Injected NMI ignores EVENTINJ vector field"); + else if (!reserved) + report(event_injection_handled, + "Test EVENTINJ IRQ handler invoked with type %d", type); + + vmcb->control.event_inj = event_inj_saved; + } + + /* + * It is illegal to specify event injection type 3 (Exception) with a + * vector that does not correspond to an exception. + */ + event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT; + for (vector = 0; vector < 256; vector++) { + vmcb->control.event_inj = event_inj | vector; + event_injection_handled = false; + + if (vector >= 32 || !is_injectable_exception(vector)) { + report(svm_vmrun() == SVM_EXIT_ERR, + "Test EVENTINJ exception type error code with vector %d", + vector); + } else { + handle_exception(vector, event_injection_exception_handler); + report(svm_vmrun() == SVM_EXIT_VMMCALL, + "Test EVENTINJ exception type delivers with vector %d", + vector); + report(event_injection_handled, + "Test EVENTINJ exception handler invoked with vector %d", + vector); + } + + vmcb->control.event_inj = event_inj_saved; + } +} + + asm( "insn_sidt: sidt idt_descr;ret\n\t" "insn_sgdt: sgdt gdt_descr;ret\n\t" @@ -2893,6 +2945,7 @@ static void svm_guest_state_test(void) test_dr(); test_msrpm_iopm_bitmap_addrs(); test_canonicalization(); + test_event_injection(); } extern void guest_rflags_test_guest(struct svm_test *test); @@ -4074,9 +4127,6 @@ struct svm_test svm_tests[] = { { "latency_svm_insn", default_supported, lat_svm_insn_prepare, default_prepare_gif_clear, null_test, lat_svm_insn_finished, lat_svm_insn_check }, - { "exc_inject", default_supported, exc_inject_prepare, - default_prepare_gif_clear, exc_inject_test, - exc_inject_finished, exc_inject_check }, { "pending_event", default_supported, pending_event_prepare, default_prepare_gif_clear, pending_event_test, pending_event_finished, pending_event_check }, -- 2.52.0.322.g1dd061c0dc-goog