Needed for ARM nested virt support. Generated using util/update_headers.sh. Signed-off-by: Andre Przywara --- arm64/include/asm/kvm.h | 23 +++++++++-- include/linux/kvm.h | 31 +++++++++++++++ include/linux/virtio_net.h | 13 ++++++ include/linux/virtio_pci.h | 1 + riscv/include/asm/kvm.h | 2 + x86/include/asm/kvm.h | 81 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 147 insertions(+), 4 deletions(-) diff --git a/arm64/include/asm/kvm.h b/arm64/include/asm/kvm.h index 568bf858f..ed5f38926 100644 --- a/arm64/include/asm/kvm.h +++ b/arm64/include/asm/kvm.h @@ -105,6 +105,7 @@ struct kvm_regs { #define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ #define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ +#define KVM_ARM_VCPU_HAS_EL2_E2H0 8 /* Limit NV support to E2H RES0 */ struct kvm_vcpu_init { __u32 target; @@ -371,6 +372,7 @@ enum { #endif }; +/* Vendor hyper call function numbers 0-63 */ #define KVM_REG_ARM_VENDOR_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(2) enum { @@ -381,6 +383,17 @@ enum { #endif }; +/* Vendor hyper call function numbers 64-127 */ +#define KVM_REG_ARM_VENDOR_HYP_BMAP_2 KVM_REG_ARM_FW_FEAT_BMAP_REG(3) + +enum { + KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_VER = 0, + KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_CPUS = 1, +#ifdef __KERNEL__ + KVM_REG_ARM_VENDOR_HYP_BMAP_2_BIT_COUNT, +#endif +}; + /* Device Control API on vm fd */ #define KVM_ARM_VM_SMCCC_CTRL 0 #define KVM_ARM_VM_SMCCC_FILTER 0 @@ -403,6 +416,7 @@ enum { #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 +#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) @@ -417,10 +431,11 @@ enum { /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 -#define KVM_ARM_VCPU_PMU_V3_IRQ 0 -#define KVM_ARM_VCPU_PMU_V3_INIT 1 -#define KVM_ARM_VCPU_PMU_V3_FILTER 2 -#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 +#define KVM_ARM_VCPU_PMU_V3_IRQ 0 +#define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_PMU_V3_FILTER 2 +#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 +#define KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS 4 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 45e6d8fca..7a4c35ff0 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -178,6 +178,7 @@ struct kvm_xen_exit { #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 +#define KVM_EXIT_TDX 40 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -375,6 +376,7 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_WAKEUP 4 #define KVM_SYSTEM_EVENT_SUSPEND 5 #define KVM_SYSTEM_EVENT_SEV_TERM 6 +#define KVM_SYSTEM_EVENT_TDX_FATAL 7 __u32 type; __u32 ndata; union { @@ -446,6 +448,31 @@ struct kvm_run { __u64 gpa; __u64 size; } memory_fault; + /* KVM_EXIT_TDX */ + struct { + __u64 flags; + __u64 nr; + union { + struct { + __u64 ret; + __u64 data[5]; + } unknown; + struct { + __u64 ret; + __u64 gpa; + __u64 size; + } get_quote; + struct { + __u64 ret; + __u64 leaf; + __u64 r11, r12, r13, r14; + } get_tdvmcall_info; + struct { + __u64 ret; + __u64 vector; + } setup_event_notify; + }; + } tdx; /* Fix the size of the union. */ char padding[256]; }; @@ -929,6 +956,10 @@ struct kvm_enable_cap { #define KVM_CAP_PRE_FAULT_MEMORY 236 #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 +#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 +#define KVM_CAP_ARM_EL2 240 +#define KVM_CAP_ARM_EL2_E2H0 241 +#define KVM_CAP_RISCV_MP_STATE_RESET 242 struct kvm_irq_routing_irqchip { __u32 irqchip; diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index ac9174717..963540dea 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -327,6 +327,19 @@ struct virtio_net_rss_config { __u8 hash_key_data[/* hash_key_length */]; }; +struct virtio_net_rss_config_hdr { + __le32 hash_types; + __le16 indirection_table_mask; + __le16 unclassified_queue; + __le16 indirection_table[/* 1 + indirection_table_mask */]; +}; + +struct virtio_net_rss_config_trailer { + __le16 max_tx_vq; + __u8 hash_key_length; + __u8 hash_key_data[/* hash_key_length */]; +}; + #define VIRTIO_NET_CTRL_MQ_RSS_CONFIG 1 /* diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h index 8549d4571..c691ac210 100644 --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h @@ -246,6 +246,7 @@ struct virtio_pci_cfg_cap { #define VIRTIO_ADMIN_CMD_LIST_USE 0x1 /* Admin command group type. */ +#define VIRTIO_ADMIN_GROUP_TYPE_SELF 0x0 #define VIRTIO_ADMIN_GROUP_TYPE_SRIOV 0x1 /* Transitional device admin command. */ diff --git a/riscv/include/asm/kvm.h b/riscv/include/asm/kvm.h index f06bc5efc..5f59fd226 100644 --- a/riscv/include/asm/kvm.h +++ b/riscv/include/asm/kvm.h @@ -182,6 +182,8 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_SVVPTC, KVM_RISCV_ISA_EXT_ZABHA, KVM_RISCV_ISA_EXT_ZICCRSE, + KVM_RISCV_ISA_EXT_ZAAMO, + KVM_RISCV_ISA_EXT_ZALRSC, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/x86/include/asm/kvm.h b/x86/include/asm/kvm.h index 9e75da97b..0f15d6838 100644 --- a/x86/include/asm/kvm.h +++ b/x86/include/asm/kvm.h @@ -441,6 +441,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6) #define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7) #define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8) +#define KVM_X86_QUIRK_IGNORE_GUEST_PAT (1 << 9) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 @@ -559,6 +560,9 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) #define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8) +#define KVM_XEN_MSR_MIN_INDEX 0x40000000u +#define KVM_XEN_MSR_MAX_INDEX 0x4fffffffu + struct kvm_xen_hvm_config { __u32 flags; __u32 msr; @@ -841,6 +845,7 @@ struct kvm_sev_snp_launch_start { }; /* Kept in sync with firmware values for simplicity. */ +#define KVM_SEV_PAGE_TYPE_INVALID 0x0 #define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1 #define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3 #define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4 @@ -927,4 +932,80 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SNP_VM 4 #define KVM_X86_TDX_VM 5 +/* Trust Domain eXtension sub-ioctl() commands. */ +enum kvm_tdx_cmd_id { + KVM_TDX_CAPABILITIES = 0, + KVM_TDX_INIT_VM, + KVM_TDX_INIT_VCPU, + KVM_TDX_INIT_MEM_REGION, + KVM_TDX_FINALIZE_VM, + KVM_TDX_GET_CPUID, + + KVM_TDX_CMD_NR_MAX, +}; + +struct kvm_tdx_cmd { + /* enum kvm_tdx_cmd_id */ + __u32 id; + /* flags for sub-commend. If sub-command doesn't use this, set zero. */ + __u32 flags; + /* + * data for each sub-command. An immediate or a pointer to the actual + * data in process virtual address. If sub-command doesn't use it, + * set zero. + */ + __u64 data; + /* + * Auxiliary error code. The sub-command may return TDX SEAMCALL + * status code in addition to -Exxx. + */ + __u64 hw_error; +}; + +struct kvm_tdx_capabilities { + __u64 supported_attrs; + __u64 supported_xfam; + + __u64 kernel_tdvmcallinfo_1_r11; + __u64 user_tdvmcallinfo_1_r11; + __u64 kernel_tdvmcallinfo_1_r12; + __u64 user_tdvmcallinfo_1_r12; + + __u64 reserved[250]; + + /* Configurable CPUID bits for userspace */ + struct kvm_cpuid2 cpuid; +}; + +struct kvm_tdx_init_vm { + __u64 attributes; + __u64 xfam; + __u64 mrconfigid[6]; /* sha384 digest */ + __u64 mrowner[6]; /* sha384 digest */ + __u64 mrownerconfig[6]; /* sha384 digest */ + + /* The total space for TD_PARAMS before the CPUIDs is 256 bytes */ + __u64 reserved[12]; + + /* + * Call KVM_TDX_INIT_VM before vcpu creation, thus before + * KVM_SET_CPUID2. + * This configuration supersedes KVM_SET_CPUID2s for VCPUs because the + * TDX module directly virtualizes those CPUIDs without VMM. The user + * space VMM, e.g. qemu, should make KVM_SET_CPUID2 consistent with + * those values. If it doesn't, KVM may have wrong idea of vCPUIDs of + * the guest, and KVM may wrongly emulate CPUIDs or MSRs that the TDX + * module doesn't virtualize. + */ + struct kvm_cpuid2 cpuid; +}; + +#define KVM_TDX_MEASURE_MEMORY_REGION _BITULL(0) + +struct kvm_tdx_init_mem_region { + __u64 source_addr; + __u64 gpa; + __u64 nr_pages; +}; + #endif /* _ASM_X86_KVM_H */ -- 2.25.1 The ARMv8.3 architecture update includes support for nested virtualization. Allow the user to specify "--nested" to start a guest in (virtual) EL2 instead of EL1. This will also change the PSCI conduit from HVC to SMC in the device tree. Signed-off-by: Andre Przywara --- arm64/fdt.c | 5 ++++- arm64/include/kvm/kvm-config-arch.h | 5 ++++- arm64/kvm-cpu.c | 12 +++++++++++- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/arm64/fdt.c b/arm64/fdt.c index df7775876..98f1dd9d4 100644 --- a/arm64/fdt.c +++ b/arm64/fdt.c @@ -205,7 +205,10 @@ static int setup_fdt(struct kvm *kvm) _FDT(fdt_property_string(fdt, "compatible", "arm,psci")); fns = &psci_0_1_fns; } - _FDT(fdt_property_string(fdt, "method", "hvc")); + if (kvm->cfg.arch.nested_virt) + _FDT(fdt_property_string(fdt, "method", "smc")); + else + _FDT(fdt_property_string(fdt, "method", "hvc")); _FDT(fdt_property_cell(fdt, "cpu_suspend", fns->cpu_suspend)); _FDT(fdt_property_cell(fdt, "cpu_off", fns->cpu_off)); _FDT(fdt_property_cell(fdt, "cpu_on", fns->cpu_on)); diff --git a/arm64/include/kvm/kvm-config-arch.h b/arm64/include/kvm/kvm-config-arch.h index ee031f010..a1dac28e6 100644 --- a/arm64/include/kvm/kvm-config-arch.h +++ b/arm64/include/kvm/kvm-config-arch.h @@ -10,6 +10,7 @@ struct kvm_config_arch { bool aarch32_guest; bool has_pmuv3; bool mte_disabled; + bool nested_virt; u64 kaslr_seed; enum irqchip_type irqchip; u64 fw_addr; @@ -57,6 +58,8 @@ int sve_vl_parser(const struct option *opt, const char *arg, int unset); "Type of interrupt controller to emulate in the guest", \ irqchip_parser, NULL), \ OPT_U64('\0', "firmware-address", &(cfg)->fw_addr, \ - "Address where firmware should be loaded"), + "Address where firmware should be loaded"), \ + OPT_BOOLEAN('\0', "nested", &(cfg)->nested_virt, \ + "Start VCPUs in EL2 (for nested virt)"), #endif /* ARM_COMMON__KVM_CONFIG_ARCH_H */ diff --git a/arm64/kvm-cpu.c b/arm64/kvm-cpu.c index 94c08a4d7..42dc11dad 100644 --- a/arm64/kvm-cpu.c +++ b/arm64/kvm-cpu.c @@ -71,6 +71,12 @@ static void kvm_cpu__select_features(struct kvm *kvm, struct kvm_vcpu_init *init /* Enable SVE if available */ if (kvm__supports_extension(kvm, KVM_CAP_ARM_SVE)) init->features[0] |= 1UL << KVM_ARM_VCPU_SVE; + + if (kvm->cfg.arch.nested_virt) { + if (!kvm__supports_extension(kvm, KVM_CAP_ARM_EL2)) + die("EL2 (nested virt) is not supported"); + init->features[0] |= 1UL << KVM_ARM_VCPU_HAS_EL2; + } } static int vcpu_configure_sve(struct kvm_cpu *vcpu) @@ -313,7 +319,11 @@ static void reset_vcpu_aarch64(struct kvm_cpu *vcpu) reg.addr = (u64)&data; /* pstate = all interrupts masked */ - data = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1h; + data = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT; + if (vcpu->kvm->cfg.arch.nested_virt) + data |= PSR_MODE_EL2h; + else + data |= PSR_MODE_EL1h; reg.id = ARM64_CORE_REG(regs.pstate); if (ioctl(vcpu->vcpu_fd, KVM_SET_ONE_REG, ®) < 0) die_perror("KVM_SET_ONE_REG failed (spsr[EL1])"); -- 2.25.1 Uses the new VGIC KVM device attribute to set the maintenance IRQ. This is fixed to use PPI 9, as a platform decision made by kvmtool, matching the SBSA recommendation. Use the opportunity to pass the kvm pointer to gic__generate_fdt_nodes(), as this simplifies the call and allows us access to the nested_virt on the way. Signed-off-by: Andre Przywara --- arm64/arm-cpu.c | 2 +- arm64/gic.c | 26 ++++++++++++++++++++++++-- arm64/include/kvm/gic.h | 2 +- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/arm64/arm-cpu.c b/arm64/arm-cpu.c index 69bb2cb2c..0843ac051 100644 --- a/arm64/arm-cpu.c +++ b/arm64/arm-cpu.c @@ -14,7 +14,7 @@ static void generate_fdt_nodes(void *fdt, struct kvm *kvm) { int timer_interrupts[4] = {13, 14, 11, 10}; - gic__generate_fdt_nodes(fdt, kvm->cfg.arch.irqchip); + gic__generate_fdt_nodes(fdt, kvm); timer__generate_fdt_nodes(fdt, kvm, timer_interrupts); pmu__generate_fdt_nodes(fdt, kvm); } diff --git a/arm64/gic.c b/arm64/gic.c index b0d3a1abb..e35986c06 100644 --- a/arm64/gic.c +++ b/arm64/gic.c @@ -11,6 +11,8 @@ #define IRQCHIP_GIC 0 +#define GIC_MAINT_IRQ 9 + static int gic_fd = -1; static u64 gic_redists_base; static u64 gic_redists_size; @@ -302,10 +304,15 @@ static int gic__init_gic(struct kvm *kvm) int lines = irq__get_nr_allocated_lines(); u32 nr_irqs = ALIGN(lines, 32) + GIC_SPI_IRQ_BASE; + u32 maint_irq = GIC_PPI_IRQ_BASE + GIC_MAINT_IRQ; struct kvm_device_attr nr_irqs_attr = { .group = KVM_DEV_ARM_VGIC_GRP_NR_IRQS, .addr = (u64)(unsigned long)&nr_irqs, }; + struct kvm_device_attr maint_irq_attr = { + .group = KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ, + .addr = (u64)(unsigned long)&maint_irq, + }; struct kvm_device_attr vgic_init_attr = { .group = KVM_DEV_ARM_VGIC_GRP_CTRL, .attr = KVM_DEV_ARM_VGIC_CTRL_INIT, @@ -325,6 +332,13 @@ static int gic__init_gic(struct kvm *kvm) return ret; } + if (kvm->cfg.arch.nested_virt && + !ioctl(gic_fd, KVM_HAS_DEVICE_ATTR, &maint_irq_attr)) { + ret = ioctl(gic_fd, KVM_SET_DEVICE_ATTR, &maint_irq_attr); + if (ret) + return ret; + } + irq__routing_init(kvm); if (!ioctl(gic_fd, KVM_HAS_DEVICE_ATTR, &vgic_init_attr)) { @@ -342,7 +356,7 @@ static int gic__init_gic(struct kvm *kvm) } late_init(gic__init_gic) -void gic__generate_fdt_nodes(void *fdt, enum irqchip_type type) +void gic__generate_fdt_nodes(void *fdt, struct kvm *kvm) { const char *compatible, *msi_compatible = NULL; u64 msi_prop[2]; @@ -350,8 +364,12 @@ void gic__generate_fdt_nodes(void *fdt, enum irqchip_type type) cpu_to_fdt64(ARM_GIC_DIST_BASE), cpu_to_fdt64(ARM_GIC_DIST_SIZE), 0, 0, /* to be filled */ }; + u32 maint_irq[] = { + cpu_to_fdt32(GIC_FDT_IRQ_TYPE_PPI), cpu_to_fdt32(GIC_MAINT_IRQ), + gic__get_fdt_irq_cpumask(kvm) | IRQ_TYPE_LEVEL_HIGH + }; - switch (type) { + switch (kvm->cfg.arch.irqchip) { case IRQCHIP_GICV2M: msi_compatible = "arm,gic-v2m-frame"; /* fall-through */ @@ -377,6 +395,10 @@ void gic__generate_fdt_nodes(void *fdt, enum irqchip_type type) _FDT(fdt_property_cell(fdt, "#interrupt-cells", GIC_FDT_IRQ_NUM_CELLS)); _FDT(fdt_property(fdt, "interrupt-controller", NULL, 0)); _FDT(fdt_property(fdt, "reg", reg_prop, sizeof(reg_prop))); + if (kvm->cfg.arch.nested_virt) { + _FDT(fdt_property(fdt, "interrupts", maint_irq, + sizeof(maint_irq))); + } _FDT(fdt_property_cell(fdt, "phandle", PHANDLE_GIC)); _FDT(fdt_property_cell(fdt, "#address-cells", 2)); _FDT(fdt_property_cell(fdt, "#size-cells", 2)); diff --git a/arm64/include/kvm/gic.h b/arm64/include/kvm/gic.h index ad8bcbf21..8490cca60 100644 --- a/arm64/include/kvm/gic.h +++ b/arm64/include/kvm/gic.h @@ -36,7 +36,7 @@ struct kvm; int gic__alloc_irqnum(void); int gic__create(struct kvm *kvm, enum irqchip_type type); int gic__create_gicv2m_frame(struct kvm *kvm, u64 msi_frame_addr); -void gic__generate_fdt_nodes(void *fdt, enum irqchip_type type); +void gic__generate_fdt_nodes(void *fdt, struct kvm *kvm); u32 gic__get_fdt_irq_cpumask(struct kvm *kvm); int gic__add_irqfd(struct kvm *kvm, unsigned int gsi, int trigger_fd, -- 2.25.1 From: Marc Zyngier KVM allows the offsetting of the global counter in order to help with migration of a VM. This offset applies cumulatively with the offsets provided by the architecture. Although kvmtool doesn't provide a way to migrate a VM, controlling this offset is useful to test the timer subsystem. Add the command line option --counter-offset to allow setting this value when creating a VM. Signed-off-by: Marc Zyngier Signed-off-by: Andre Przywara --- arm64/include/kvm/kvm-config-arch.h | 3 +++ arm64/kvm.c | 17 +++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/arm64/include/kvm/kvm-config-arch.h b/arm64/include/kvm/kvm-config-arch.h index a1dac28e6..44c43367b 100644 --- a/arm64/include/kvm/kvm-config-arch.h +++ b/arm64/include/kvm/kvm-config-arch.h @@ -14,6 +14,7 @@ struct kvm_config_arch { u64 kaslr_seed; enum irqchip_type irqchip; u64 fw_addr; + u64 counter_offset; unsigned int sve_max_vq; bool no_pvtime; }; @@ -59,6 +60,8 @@ int sve_vl_parser(const struct option *opt, const char *arg, int unset); irqchip_parser, NULL), \ OPT_U64('\0', "firmware-address", &(cfg)->fw_addr, \ "Address where firmware should be loaded"), \ + OPT_U64('\0', "counter-offset", &(cfg)->counter_offset, \ + "Specify the counter offset, defaulting to 0"), \ OPT_BOOLEAN('\0', "nested", &(cfg)->nested_virt, \ "Start VCPUs in EL2 (for nested virt)"), diff --git a/arm64/kvm.c b/arm64/kvm.c index 23b4dab1f..6e971dd78 100644 --- a/arm64/kvm.c +++ b/arm64/kvm.c @@ -119,6 +119,22 @@ static void kvm__arch_enable_mte(struct kvm *kvm) pr_debug("MTE capability enabled"); } +static void kvm__arch_set_counter_offset(struct kvm *kvm) +{ + struct kvm_arm_counter_offset offset = { + .counter_offset = kvm->cfg.arch.counter_offset, + }; + + if (!kvm->cfg.arch.counter_offset) + return; + + if (!kvm__supports_extension(kvm, KVM_CAP_COUNTER_OFFSET)) + die("No support for global counter offset"); + + if (ioctl(kvm->vm_fd, KVM_ARM_SET_COUNTER_OFFSET, &offset)) + die_perror("KVM_ARM_SET_COUNTER_OFFSET"); +} + void kvm__arch_init(struct kvm *kvm) { /* Create the virtual GIC. */ @@ -126,6 +142,7 @@ void kvm__arch_init(struct kvm *kvm) die("Failed to create virtual GIC"); kvm__arch_enable_mte(kvm); + kvm__arch_set_counter_offset(kvm); } static u64 kvm__arch_get_payload_region_size(struct kvm *kvm) -- 2.25.1 From: Marc Zyngier The --nested option allows a guest to boot at EL2 without FEAT_E2H0 (i.e. mandating VHE support). While this is great for "modern" operating systems and hypervisors, a few legacy guests are stuck in a distant past. To support those, add the --e2h0 command line option, that exposes FEAT_E2H0 to the guest, at the expense of a number of other features, such as FEAT_NV2. This is conditioned on the host itself supporting FEAT_E2H0. Signed-off-by: Marc Zyngier Signed-off-by: Andre Przywara --- arm64/include/kvm/kvm-config-arch.h | 5 ++++- arm64/kvm-cpu.c | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arm64/include/kvm/kvm-config-arch.h b/arm64/include/kvm/kvm-config-arch.h index 44c43367b..73bf4211a 100644 --- a/arm64/include/kvm/kvm-config-arch.h +++ b/arm64/include/kvm/kvm-config-arch.h @@ -11,6 +11,7 @@ struct kvm_config_arch { bool has_pmuv3; bool mte_disabled; bool nested_virt; + bool e2h0; u64 kaslr_seed; enum irqchip_type irqchip; u64 fw_addr; @@ -63,6 +64,8 @@ int sve_vl_parser(const struct option *opt, const char *arg, int unset); OPT_U64('\0', "counter-offset", &(cfg)->counter_offset, \ "Specify the counter offset, defaulting to 0"), \ OPT_BOOLEAN('\0', "nested", &(cfg)->nested_virt, \ - "Start VCPUs in EL2 (for nested virt)"), + "Start VCPUs in EL2 (for nested virt)"), \ + OPT_BOOLEAN('\0', "e2h0", &(cfg)->e2h0, \ + "Create guest without VHE support"), #endif /* ARM_COMMON__KVM_CONFIG_ARCH_H */ diff --git a/arm64/kvm-cpu.c b/arm64/kvm-cpu.c index 42dc11dad..5e4f3a7dd 100644 --- a/arm64/kvm-cpu.c +++ b/arm64/kvm-cpu.c @@ -76,6 +76,11 @@ static void kvm_cpu__select_features(struct kvm *kvm, struct kvm_vcpu_init *init if (!kvm__supports_extension(kvm, KVM_CAP_ARM_EL2)) die("EL2 (nested virt) is not supported"); init->features[0] |= 1UL << KVM_ARM_VCPU_HAS_EL2; + if (kvm->cfg.arch.e2h0) { + if (!kvm__supports_extension(kvm, KVM_CAP_ARM_EL2_E2H0)) + die("FEAT_E2H0 is not supported"); + init->features[0] |= 1UL << KVM_ARM_VCPU_HAS_EL2_E2H0; + } } } -- 2.25.1 From: Marc Zyngier FEAT_VHE introduced a non-secure EL2 virtual timer, along with its interrupt line. Consequently the arch timer DT binding introduced a fifth interrupt to communicate this interrupt number. Refactor the interrupts property generation code to deal with a variable number of interrupts, and forward five interrupts instead of four in case nested virt is enabled. Signed-off-by: Marc Zyngier Signed-off-by: Andre Przywara --- arm64/arm-cpu.c | 4 +--- arm64/include/kvm/timer.h | 2 +- arm64/timer.c | 29 ++++++++++++----------------- 3 files changed, 14 insertions(+), 21 deletions(-) diff --git a/arm64/arm-cpu.c b/arm64/arm-cpu.c index 0843ac051..5b5484d8b 100644 --- a/arm64/arm-cpu.c +++ b/arm64/arm-cpu.c @@ -12,10 +12,8 @@ static void generate_fdt_nodes(void *fdt, struct kvm *kvm) { - int timer_interrupts[4] = {13, 14, 11, 10}; - gic__generate_fdt_nodes(fdt, kvm); - timer__generate_fdt_nodes(fdt, kvm, timer_interrupts); + timer__generate_fdt_nodes(fdt, kvm); pmu__generate_fdt_nodes(fdt, kvm); } diff --git a/arm64/include/kvm/timer.h b/arm64/include/kvm/timer.h index 928e9ea7a..81e093e46 100644 --- a/arm64/include/kvm/timer.h +++ b/arm64/include/kvm/timer.h @@ -1,6 +1,6 @@ #ifndef ARM_COMMON__TIMER_H #define ARM_COMMON__TIMER_H -void timer__generate_fdt_nodes(void *fdt, struct kvm *kvm, int *irqs); +void timer__generate_fdt_nodes(void *fdt, struct kvm *kvm); #endif /* ARM_COMMON__TIMER_H */ diff --git a/arm64/timer.c b/arm64/timer.c index 861f2d994..2ac6144f9 100644 --- a/arm64/timer.c +++ b/arm64/timer.c @@ -5,31 +5,26 @@ #include "kvm/timer.h" #include "kvm/util.h" -void timer__generate_fdt_nodes(void *fdt, struct kvm *kvm, int *irqs) +void timer__generate_fdt_nodes(void *fdt, struct kvm *kvm) { const char compatible[] = "arm,armv8-timer\0arm,armv7-timer"; u32 cpu_mask = gic__get_fdt_irq_cpumask(kvm); - u32 irq_prop[] = { - cpu_to_fdt32(GIC_FDT_IRQ_TYPE_PPI), - cpu_to_fdt32(irqs[0]), - cpu_to_fdt32(cpu_mask | IRQ_TYPE_LEVEL_LOW), + int irqs[5] = {13, 14, 11, 10, 12}; + int nr = ARRAY_SIZE(irqs); + u32 irq_prop[nr * 3]; - cpu_to_fdt32(GIC_FDT_IRQ_TYPE_PPI), - cpu_to_fdt32(irqs[1]), - cpu_to_fdt32(cpu_mask | IRQ_TYPE_LEVEL_LOW), + if (!kvm->cfg.arch.nested_virt) + nr--; - cpu_to_fdt32(GIC_FDT_IRQ_TYPE_PPI), - cpu_to_fdt32(irqs[2]), - cpu_to_fdt32(cpu_mask | IRQ_TYPE_LEVEL_LOW), - - cpu_to_fdt32(GIC_FDT_IRQ_TYPE_PPI), - cpu_to_fdt32(irqs[3]), - cpu_to_fdt32(cpu_mask | IRQ_TYPE_LEVEL_LOW), - }; + for (int i = 0; i < nr; i++) { + irq_prop[i * 3 + 0] = cpu_to_fdt32(GIC_FDT_IRQ_TYPE_PPI); + irq_prop[i * 3 + 1] = cpu_to_fdt32(irqs[i]); + irq_prop[i * 3 + 2] = cpu_to_fdt32(cpu_mask | IRQ_TYPE_LEVEL_LOW); + } _FDT(fdt_begin_node(fdt, "timer")); _FDT(fdt_property(fdt, "compatible", compatible, sizeof(compatible))); - _FDT(fdt_property(fdt, "interrupts", irq_prop, sizeof(irq_prop))); + _FDT(fdt_property(fdt, "interrupts", irq_prop, nr * 3 * sizeof(irq_prop[0]))); _FDT(fdt_property(fdt, "always-on", NULL, 0)); if (kvm->cfg.arch.force_cntfrq > 0) _FDT(fdt_property_cell(fdt, "clock-frequency", kvm->cfg.arch.force_cntfrq)); -- 2.25.1 From: Marc Zyngier When running an EL2 guest, we need to make sure we don't sample SCTLR_EL1 to work out the virtio endianness, as this is likely to be a bit random. Signed-off-by: Marc Zyngier Signed-off-by: Andre Przywara --- arm64/include/kvm/kvm-cpu-arch.h | 5 ++-- arm64/kvm-cpu.c | 47 +++++++++++++++++++++++++------- 2 files changed, 40 insertions(+), 12 deletions(-) diff --git a/arm64/include/kvm/kvm-cpu-arch.h b/arm64/include/kvm/kvm-cpu-arch.h index 1af394aa3..85646ad4c 100644 --- a/arm64/include/kvm/kvm-cpu-arch.h +++ b/arm64/include/kvm/kvm-cpu-arch.h @@ -10,8 +10,9 @@ #define ARM_MPIDR_HWID_BITMASK 0xFF00FFFFFFUL #define ARM_CPU_ID 3, 0, 0, 0 #define ARM_CPU_ID_MPIDR 5 -#define ARM_CPU_CTRL 3, 0, 1, 0 -#define ARM_CPU_CTRL_SCTLR_EL1 0 +#define SYS_SCTLR_EL1 3, 4, 1, 0, 0 +#define SYS_SCTLR_EL2 3, 4, 1, 0, 0 +#define SYS_HCR_EL2 3, 4, 1, 1, 0 struct kvm_cpu { pthread_t thread; diff --git a/arm64/kvm-cpu.c b/arm64/kvm-cpu.c index 5e4f3a7dd..35e1c6396 100644 --- a/arm64/kvm-cpu.c +++ b/arm64/kvm-cpu.c @@ -12,6 +12,7 @@ #define SCTLR_EL1_E0E_MASK (1 << 24) #define SCTLR_EL1_EE_MASK (1 << 25) +#define HCR_EL2_TGE (1 << 27) static int debug_fd; @@ -408,7 +409,8 @@ int kvm_cpu__get_endianness(struct kvm_cpu *vcpu) { struct kvm_one_reg reg; u64 psr; - u64 sctlr; + u64 sctlr, bit; + u64 hcr = 0; /* * Quoting the definition given by Peter Maydell: @@ -419,8 +421,9 @@ int kvm_cpu__get_endianness(struct kvm_cpu *vcpu) * We first check for an AArch32 guest: its endianness can * change when using SETEND, which affects the CPSR.E bit. * - * If we're AArch64, use SCTLR_EL1.E0E if access comes from - * EL0, and SCTLR_EL1.EE if access comes from EL1. + * If we're AArch64, determine which SCTLR register to use, + * depending on NV being used or not. Then use either the E0E + * bit for EL0, or the EE bit for EL1/EL2. */ reg.id = ARM64_CORE_REG(regs.pstate); reg.addr = (u64)&psr; @@ -430,16 +433,40 @@ int kvm_cpu__get_endianness(struct kvm_cpu *vcpu) if (psr & PSR_MODE32_BIT) return (psr & COMPAT_PSR_E_BIT) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE; - reg.id = ARM64_SYS_REG(ARM_CPU_CTRL, ARM_CPU_CTRL_SCTLR_EL1); + if (vcpu->kvm->cfg.arch.nested_virt) { + reg.id = ARM64_SYS_REG(SYS_HCR_EL2); + reg.addr = (u64)&hcr; + if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, ®) < 0) + die("KVM_GET_ONE_REG failed (HCR_EL2)"); + } + + switch (psr & PSR_MODE_MASK) { + case PSR_MODE_EL0t: + if (hcr & HCR_EL2_TGE) + reg.id = ARM64_SYS_REG(SYS_SCTLR_EL2); + else + reg.id = ARM64_SYS_REG(SYS_SCTLR_EL1); + bit = SCTLR_EL1_E0E_MASK; + break; + case PSR_MODE_EL1t: + case PSR_MODE_EL1h: + reg.id = ARM64_SYS_REG(SYS_SCTLR_EL1); + bit = SCTLR_EL1_EE_MASK; + break; + case PSR_MODE_EL2t: + case PSR_MODE_EL2h: + reg.id = ARM64_SYS_REG(SYS_SCTLR_EL2); + bit = SCTLR_EL1_EE_MASK; + break; + default: + die("What's that mode???\n"); + } + reg.addr = (u64)&sctlr; if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, ®) < 0) - die("KVM_GET_ONE_REG failed (SCTLR_EL1)"); + die("KVM_GET_ONE_REG failed (SCTLR_ELx)"); - if ((psr & PSR_MODE_MASK) == PSR_MODE_EL0t) - sctlr &= SCTLR_EL1_E0E_MASK; - else - sctlr &= SCTLR_EL1_EE_MASK; - return sctlr ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE; + return (sctlr & bit) ? VIRTIO_ENDIAN_BE : VIRTIO_ENDIAN_LE; } void kvm_cpu__show_code(struct kvm_cpu *vcpu) -- 2.25.1