diff options
149 files changed, 4528 insertions, 1207 deletions
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 278a506..51526d3 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -471,7 +471,9 @@ int kvm_create_vcpu(CPUState *cpu) cpu->kvm_fd = kvm_fd; cpu->kvm_state = s; - cpu->vcpu_dirty = true; + if (!s->guest_state_protected) { + cpu->vcpu_dirty = true; + } cpu->dirty_pages = 0; cpu->throttle_us_per_full = 0; @@ -545,6 +547,11 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); + ret = kvm_arch_pre_create_vcpu(cpu, errp); + if (ret < 0) { + goto err; + } + ret = kvm_create_vcpu(cpu); if (ret < 0) { error_setg_errno(errp, -ret, @@ -2426,7 +2433,7 @@ static int kvm_recommended_vcpus(KVMState *s) static int kvm_max_vcpus(KVMState *s) { - int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS); + int ret = kvm_vm_check_extension(s, KVM_CAP_MAX_VCPUS); return (ret) ? ret : kvm_recommended_vcpus(s); } diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build index 97d5e5a..575e92b 100644 --- a/accel/tcg/meson.build +++ b/accel/tcg/meson.build @@ -18,15 +18,15 @@ if get_option('plugins') tcg_ss.add(files('plugin-gen.c')) endif -libuser_ss.add_all(tcg_ss) -libsystem_ss.add_all(tcg_ss) +user_ss.add_all(tcg_ss) +system_ss.add_all(tcg_ss) -libuser_ss.add(files( +user_ss.add(files( 'user-exec.c', 'user-exec-stub.c', )) -libsystem_ss.add(files( +system_ss.add(files( 'cputlb.c', 'icount-common.c', 'monitor.c', diff --git a/configs/devices/i386-softmmu/default.mak b/configs/devices/i386-softmmu/default.mak index 4faf2f0..bc0479a 100644 --- a/configs/devices/i386-softmmu/default.mak +++ b/configs/devices/i386-softmmu/default.mak @@ -18,6 +18,7 @@ #CONFIG_QXL=n #CONFIG_SEV=n #CONFIG_SGA=n +#CONFIG_TDX=n #CONFIG_TEST_DEVICES=n #CONFIG_TPM_CRB=n #CONFIG_TPM_TIS_ISA=n diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index 4715d1e..4203713 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -315,12 +315,6 @@ deprecated; use the new name ``dtb-randomness`` instead. The new name better reflects the way this property affects all random data within the device tree blob, not just the ``kaslr-seed`` node. -Big-Endian variants of MicroBlaze ``petalogix-ml605`` and ``xlnx-zynqmp-pmu`` machines (since 9.2) -'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Both ``petalogix-ml605`` and ``xlnx-zynqmp-pmu`` were added for little endian -CPUs. Big endian support is not tested. - Mips ``mipssim`` machine (since 10.0) ''''''''''''''''''''''''''''''''''''' @@ -351,6 +345,19 @@ machine must ensure that they're setting the ``spike`` machine in the command line (``-M spike``). +System emulator binaries +------------------------ + +``qemu-system-microblazeel`` (since 10.1) +''''''''''''''''''''''''''''''''''''''''' + +The ``qemu-system-microblaze`` binary can emulate little-endian machines +now, too, so the separate binary ``qemu-system-microblazeel`` (with the +``el`` suffix) for little-endian targets is not required anymore. The +``petalogix-s3adsp1800`` machine can now be switched to little endian by +setting its ``endianness`` property to ``little``. + + Backend options --------------- diff --git a/docs/about/removed-features.rst b/docs/about/removed-features.rst index 4819cb4..d7c2113 100644 --- a/docs/about/removed-features.rst +++ b/docs/about/removed-features.rst @@ -1091,6 +1091,15 @@ This machine was removed because PPC 405 CPU have no known users, firmware images are not available, OpenWRT dropped support in 2019, U-Boot in 2017, and Linux in 2024. +Big-Endian variants of ``petalogix-ml605`` and ``xlnx-zynqmp-pmu`` machines (removed in 10.1) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Both the MicroBlaze ``petalogix-ml605`` and ``xlnx-zynqmp-pmu`` machines +were added for little endian CPUs. Big endian support was never tested +and likely never worked. Starting with QEMU v10.1, the machines are now +only available as little-endian machines. + + linux-user mode CPUs -------------------- diff --git a/docs/system/confidential-guest-support.rst b/docs/system/confidential-guest-support.rst index 0c490db..66129fb 100644 --- a/docs/system/confidential-guest-support.rst +++ b/docs/system/confidential-guest-support.rst @@ -38,6 +38,7 @@ Supported mechanisms Currently supported confidential guest mechanisms are: * AMD Secure Encrypted Virtualization (SEV) (see :doc:`i386/amd-memory-encryption`) +* Intel Trust Domain Extension (TDX) (see :doc:`i386/tdx`) * POWER Protected Execution Facility (PEF) (see :ref:`power-papr-protected-execution-facility-pef`) * s390x Protected Virtualization (PV) (see :doc:`s390x/protvirt`) diff --git a/docs/system/i386/tdx.rst b/docs/system/i386/tdx.rst new file mode 100644 index 0000000..8131750 --- /dev/null +++ b/docs/system/i386/tdx.rst @@ -0,0 +1,161 @@ +Intel Trusted Domain eXtension (TDX) +==================================== + +Intel Trusted Domain eXtensions (TDX) refers to an Intel technology that extends +Virtual Machine Extensions (VMX) and Multi-Key Total Memory Encryption (MKTME) +with a new kind of virtual machine guest called a Trust Domain (TD). A TD runs +in a CPU mode that is designed to protect the confidentiality of its memory +contents and its CPU state from any other software, including the hosting +Virtual Machine Monitor (VMM), unless explicitly shared by the TD itself. + +Prerequisites +------------- + +To run TD, the physical machine needs to have TDX module loaded and initialized +while KVM hypervisor has TDX support and has TDX enabled. If those requirements +are met, the ``KVM_CAP_VM_TYPES`` will report the support of ``KVM_X86_TDX_VM``. + +Trust Domain Virtual Firmware (TDVF) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Trust Domain Virtual Firmware (TDVF) is required to provide TD services to boot +TD Guest OS. TDVF needs to be copied to guest private memory and measured before +the TD boots. + +KVM vcpu ioctl ``KVM_TDX_INIT_MEM_REGION`` can be used to populate the TDVF +content into its private memory. + +Since TDX doesn't support readonly memslot, TDVF cannot be mapped as pflash +device and it actually works as RAM. "-bios" option is chosen to load TDVF. + +OVMF is the opensource firmware that implements the TDVF support. Thus the +command line to specify and load TDVF is ``-bios OVMF.fd`` + +Feature Configuration +--------------------- + +Unlike non-TDX VM, the CPU features (enumerated by CPU or MSR) of a TD are not +under full control of VMM. VMM can only configure part of features of a TD on +``KVM_TDX_INIT_VM`` command of VM scope ``MEMORY_ENCRYPT_OP`` ioctl. + +The configurable features have three types: + +- Attributes: + - PKS (bit 30) controls whether Supervisor Protection Keys is exposed to TD, + which determines related CPUID bit and CR4 bit; + - PERFMON (bit 63) controls whether PMU is exposed to TD. + +- XSAVE related features (XFAM): + XFAM is a 64b mask, which has the same format as XCR0 or IA32_XSS MSR. It + determines the set of extended features available for use by the guest TD. + +- CPUID features: + Only some bits of some CPUID leaves are directly configurable by VMM. + +What features can be configured is reported via TDX capabilities. + +TDX capabilities +~~~~~~~~~~~~~~~~ + +The VM scope ``MEMORY_ENCRYPT_OP`` ioctl provides command ``KVM_TDX_CAPABILITIES`` +to get the TDX capabilities from KVM. It returns a data structure of +``struct kvm_tdx_capabilities``, which tells the supported configuration of +attributes, XFAM and CPUIDs. + +TD attributes +~~~~~~~~~~~~~ + +QEMU supports configuring raw 64-bit TD attributes directly via "attributes" +property of "tdx-guest" object. Note, it's users' responsibility to provide a +valid value because some bits may not supported by current QEMU or KVM yet. + +QEMU also supports the configuration of individual attribute bits that are +supported by it, via properties of "tdx-guest" object. +E.g., "sept-ve-disable" (bit 28). + +MSR based features +~~~~~~~~~~~~~~~~~~ + +Current KVM doesn't support MSR based feature (e.g., MSR_IA32_ARCH_CAPABILITIES) +configuration for TDX, and it's a future work to enable it in QEMU when KVM adds +support of it. + +Feature check +~~~~~~~~~~~~~ + +QEMU checks if the final (CPU) features, determined by given cpu model and +explicit feature adjustment of "+featureA/-featureB", can be supported or not. +It can produce feature not supported warning like + + "warning: host doesn't support requested feature: CPUID.07H:EBX.intel-pt [bit 25]" + +It can also produce warning like + + "warning: TDX forcibly sets the feature: CPUID.80000007H:EDX.invtsc [bit 8]" + +if the fixed-1 feature is requested to be disabled explicitly. This is newly +added to QEMU for TDX because TDX has fixed-1 features that are forcibly enabled +by TDX module and VMM cannot disable them. + +Launching a TD (TDX VM) +----------------------- + +To launch a TD, the necessary command line options are tdx-guest object and +split kernel-irqchip, as below: + +.. parsed-literal:: + + |qemu_system_x86| \\ + -accel kvm \\ + -cpu host \\ + -object tdx-guest,id=tdx0 \\ + -machine ...,confidential-guest-support=tdx0 \\ + -bios OVMF.fd \\ + +Restrictions +------------ + + - kernel-irqchip must be split; + + This is set by default for TDX guest if kernel-irqchip is left on its default + 'auto' setting. + + - No readonly support for private memory; + + - No SMM support: SMM support requires manipulating the guest register states + which is not allowed; + +Debugging +--------- + +Bit 0 of TD attributes, is DEBUG bit, which decides if the TD runs in off-TD +debug mode. When in off-TD debug mode, TD's VCPU state and private memory are +accessible via given SEAMCALLs. This requires KVM to expose APIs to invoke those +SEAMCALLs and corresonponding QEMU change. + +It's targeted as future work. + +TD attestation +-------------- + +In TD guest, the attestation process is used to verify the TDX guest +trustworthiness to other entities before provisioning secrets to the guest. + +TD attestation is initiated first by calling TDG.MR.REPORT inside TD to get the +REPORT. Then the REPORT data needs to be converted into a remotely verifiable +Quote by SGX Quoting Enclave (QE). + +It's a future work in QEMU to add support of TD attestation since it lacks +support in current KVM. + +Live Migration +-------------- + +Future work. + +References +---------- + +- `TDX Homepage <https://www.intel.com/content/www/us/en/developer/articles/technical/intel-trust-domain-extensions.html>`__ + +- `SGX QE <https://github.com/intel/SGXDataCenterAttestationPrimitives/tree/master/QuoteGeneration>`__ diff --git a/docs/system/target-i386.rst b/docs/system/target-i386.rst index ab7af1a..43b09c7 100644 --- a/docs/system/target-i386.rst +++ b/docs/system/target-i386.rst @@ -31,6 +31,7 @@ Architectural features i386/kvm-pv i386/sgx i386/amd-memory-encryption + i386/tdx OS requirements ~~~~~~~~~~~~~~~ diff --git a/gdbstub/meson.build b/gdbstub/meson.build index b25db86..15c666f 100644 --- a/gdbstub/meson.build +++ b/gdbstub/meson.build @@ -5,13 +5,13 @@ # # We build two versions of gdbstub, one for each mode -libuser_ss.add(files( +user_ss.add(files( 'gdbstub.c', 'syscalls.c', 'user.c' )) -libsystem_ss.add(files( +system_ss.add(files( 'gdbstub.c', 'syscalls.c', 'system.c' diff --git a/hw/arm/meson.build b/hw/arm/meson.build index 5098795..d90be8f 100644 --- a/hw/arm/meson.build +++ b/hw/arm/meson.build @@ -8,7 +8,7 @@ arm_common_ss.add(when: 'CONFIG_HIGHBANK', if_true: files('highbank.c')) arm_common_ss.add(when: 'CONFIG_INTEGRATOR', if_true: files('integratorcp.c')) arm_common_ss.add(when: 'CONFIG_MICROBIT', if_true: files('microbit.c')) arm_common_ss.add(when: 'CONFIG_MPS3R', if_true: files('mps3r.c')) -arm_common_ss.add(when: 'CONFIG_MUSICPAL', if_true: [pixman, files('musicpal.c')]) +arm_common_ss.add(when: 'CONFIG_MUSICPAL', if_true: [files('musicpal.c')]) arm_common_ss.add(when: 'CONFIG_NETDUINOPLUS2', if_true: files('netduinoplus2.c')) arm_common_ss.add(when: 'CONFIG_OLIMEX_STM32_H405', if_true: files('olimex-stm32-h405.c')) arm_common_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx.c', 'npcm7xx_boards.c')) @@ -79,7 +79,7 @@ arm_common_ss.add(when: 'CONFIG_SX1', if_true: files('omap_sx1.c')) arm_common_ss.add(when: 'CONFIG_VERSATILE', if_true: files('versatilepb.c')) arm_common_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) -arm_common_ss.add(fdt, files('boot.c')) +arm_common_ss.add(files('boot.c')) hw_arch += {'arm': arm_ss} hw_common_arch += {'arm': arm_common_ss} diff --git a/hw/core/loader.c b/hw/core/loader.c index b792a54..e7056ba 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -1333,20 +1333,6 @@ void rom_set_fw(FWCfgState *f) fw_cfg = f; } -void rom_set_order_override(int order) -{ - if (!fw_cfg) - return; - fw_cfg_set_order_override(fw_cfg, order); -} - -void rom_reset_order_override(void) -{ - if (!fw_cfg) - return; - fw_cfg_reset_order_override(fw_cfg); -} - void rom_transaction_begin(void) { Rom *rom; diff --git a/hw/core/machine.c b/hw/core/machine.c index c3f3a50..e869821 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -285,24 +285,6 @@ GlobalProperty hw_compat_2_6[] = { }; const size_t hw_compat_2_6_len = G_N_ELEMENTS(hw_compat_2_6); -GlobalProperty hw_compat_2_5[] = { - { "isa-fdc", "fallback", "144" }, - { "pvscsi", "x-old-pci-configuration", "on" }, - { "pvscsi", "x-disable-pcie", "on" }, - { "vmxnet3", "x-old-msi-offsets", "on" }, - { "vmxnet3", "x-disable-pcie", "on" }, -}; -const size_t hw_compat_2_5_len = G_N_ELEMENTS(hw_compat_2_5); - -GlobalProperty hw_compat_2_4[] = { - { "e1000", "extra_mac_registers", "off" }, - { "virtio-pci", "x-disable-pcie", "on" }, - { "virtio-pci", "migrate-extra", "off" }, - { "fw_cfg_mem", "dma_enabled", "off" }, - { "fw_cfg_io", "dma_enabled", "off" } -}; -const size_t hw_compat_2_4_len = G_N_ELEMENTS(hw_compat_2_4); - MachineState *current_machine; static char *machine_get_kernel(Object *obj, Error **errp) diff --git a/hw/core/meson.build b/hw/core/meson.build index 547de65..b5a545a 100644 --- a/hw/core/meson.build +++ b/hw/core/meson.build @@ -26,7 +26,7 @@ system_ss.add(when: 'CONFIG_XILINX_AXI', if_true: files('stream.c')) system_ss.add(when: 'CONFIG_PLATFORM_BUS', if_true: files('sysbus-fdt.c')) system_ss.add(when: 'CONFIG_EIF', if_true: [files('eif.c'), zlib, libcbor, gnutls]) -libsystem_ss.add(files( +system_ss.add(files( 'cpu-system.c', 'fw-path-provider.c', 'gpio.c', @@ -46,7 +46,7 @@ libsystem_ss.add(files( 'vm-change-state-handler.c', 'clock-vmstate.c', )) -libuser_ss.add(files( +user_ss.add(files( 'cpu-user.c', 'qdev-user.c', )) diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index d34ce07..eb65bda 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -10,6 +10,11 @@ config SGX bool depends on KVM +config TDX + bool + select X86_FW_OVMF + depends on KVM + config PC bool imply APPLESMC @@ -26,6 +31,7 @@ config PC imply QXL imply SEV imply SGX + imply TDX imply TEST_DEVICES imply TPM_CRB imply TPM_TIS_ISA diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 0775c8f..963aa24 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1426,7 +1426,6 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) AMDVIState *s = opaque; AMDVIAddressSpace **iommu_as, *amdvi_dev_as; int bus_num = pci_bus_num(bus); - X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); iommu_as = s->address_spaces[bus_num]; @@ -1486,15 +1485,8 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) AMDVI_INT_ADDR_FIRST, &amdvi_dev_as->iommu_ir, 1); - if (!x86_iommu->pt_supported) { - memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false); - memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), - true); - } else { - memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), - false); - memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, true); - } + memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false); + memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), true); } return &iommu_as[devfn]->as; } @@ -1723,6 +1715,14 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) exit(EXIT_FAILURE); } + if (s->xtsup) { + if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) { + error_report("AMD IOMMU xtsup=on requires x2APIC support on " + "the KVM side"); + exit(EXIT_FAILURE); + } + } + pci_setup_iommu(bus, &amdvi_iommu_ops, s); amdvi_init(s); } diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c index 39035db..1be9bfe 100644 --- a/hw/i386/kvm/apic.c +++ b/hw/i386/kvm/apic.c @@ -17,6 +17,7 @@ #include "system/hw_accel.h" #include "system/kvm.h" #include "kvm/kvm_i386.h" +#include "kvm/tdx.h" static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic, int reg_id, uint32_t val) @@ -141,6 +142,10 @@ static void kvm_apic_put(CPUState *cs, run_on_cpu_data data) struct kvm_lapic_state kapic; int ret; + if (is_tdx_vm()) { + return; + } + kvm_put_apicbase(s->cpu, s->apicbase); kvm_put_apic_state(s, &kapic); diff --git a/hw/i386/meson.build b/hw/i386/meson.build index 10bdfde..7896f34 100644 --- a/hw/i386/meson.build +++ b/hw/i386/meson.build @@ -32,6 +32,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files( 'port92.c')) i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'), if_false: files('pc_sysfw_ovmf-stubs.c')) +i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c', 'tdvf-hob.c')) subdir('kvm') subdir('xen') diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 7065615..b211633 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -44,6 +44,7 @@ #include "system/xen.h" #include "system/reset.h" #include "kvm/kvm_i386.h" +#include "kvm/tdx.h" #include "hw/xen/xen.h" #include "qobject/qlist.h" #include "qemu/error-report.h" @@ -259,28 +260,6 @@ GlobalProperty pc_compat_2_6[] = { }; const size_t pc_compat_2_6_len = G_N_ELEMENTS(pc_compat_2_6); -GlobalProperty pc_compat_2_5[] = {}; -const size_t pc_compat_2_5_len = G_N_ELEMENTS(pc_compat_2_5); - -GlobalProperty pc_compat_2_4[] = { - PC_CPU_MODEL_IDS("2.4.0") - { "Haswell-" TYPE_X86_CPU, "abm", "off" }, - { "Haswell-noTSX-" TYPE_X86_CPU, "abm", "off" }, - { "Broadwell-" TYPE_X86_CPU, "abm", "off" }, - { "Broadwell-noTSX-" TYPE_X86_CPU, "abm", "off" }, - { "host" "-" TYPE_X86_CPU, "host-cache-info", "on" }, - { TYPE_X86_CPU, "check", "off" }, - { "qemu64" "-" TYPE_X86_CPU, "sse4a", "on" }, - { "qemu64" "-" TYPE_X86_CPU, "abm", "on" }, - { "qemu64" "-" TYPE_X86_CPU, "popcnt", "on" }, - { "qemu32" "-" TYPE_X86_CPU, "popcnt", "on" }, - { "Opteron_G2" "-" TYPE_X86_CPU, "rdtscp", "on" }, - { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "on" }, - { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "on" }, - { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "on", } -}; -const size_t pc_compat_2_4_len = G_N_ELEMENTS(pc_compat_2_4); - /* * @PC_FW_DATA: * Size of the chunk of memory at the top of RAM for the BIOS ACPI tables @@ -976,21 +955,23 @@ void pc_memory_init(PCMachineState *pcms, /* Initialize PC system firmware */ pc_system_firmware_init(pcms, rom_memory); - option_rom_mr = g_malloc(sizeof(*option_rom_mr)); - if (machine_require_guest_memfd(machine)) { - memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom", - PC_ROM_SIZE, &error_fatal); - } else { - memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, - &error_fatal); - if (pcmc->pci_enabled) { - memory_region_set_readonly(option_rom_mr, true); + if (!is_tdx_vm()) { + option_rom_mr = g_malloc(sizeof(*option_rom_mr)); + if (machine_require_guest_memfd(machine)) { + memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom", + PC_ROM_SIZE, &error_fatal); + } else { + memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, + &error_fatal); + if (pcmc->pci_enabled) { + memory_region_set_readonly(option_rom_mr, true); + } } + memory_region_add_subregion_overlap(rom_memory, + PC_ROM_MIN_VGA, + option_rom_mr, + 1); } - memory_region_add_subregion_overlap(rom_memory, - PC_ROM_MIN_VGA, - option_rom_mr, - 1); fw_cfg = fw_cfg_arch_create(machine, x86ms->boot_cpus, x86ms->apic_id_limit); @@ -999,14 +980,13 @@ void pc_memory_init(PCMachineState *pcms, if (machine->device_memory) { uint64_t *val = g_malloc(sizeof(*val)); - uint64_t res_mem_end = machine->device_memory->base; - - if (!pcmc->broken_reserved_end) { - res_mem_end += memory_region_size(&machine->device_memory->mr); - } + uint64_t res_mem_end; if (pcms->cxl_devices_state.is_enabled) { res_mem_end = cxl_resv_end; + } else { + res_mem_end = machine->device_memory->base + + memory_region_size(&machine->device_memory->mr); } *val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val)); @@ -1044,9 +1024,7 @@ uint64_t pc_pci_hole64_start(void) hole64_start = pc_get_cxl_range_end(pcms); } else if (pcmc->has_reserved_memory && (ms->ram_size < ms->maxram_size)) { pc_get_device_memory_range(pcms, &hole64_start, &size); - if (!pcmc->broken_reserved_end) { - hole64_start += size; - } + hole64_start += size; } else { hole64_start = pc_above_4g_end(pcms); } @@ -1058,7 +1036,6 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus) { DeviceState *dev = NULL; - rom_set_order_override(FW_CFG_ORDER_OVERRIDE_VGA); if (pci_bus) { PCIDevice *pcidev = pci_vga_init(pci_bus); dev = pcidev ? &pcidev->qdev : NULL; @@ -1066,7 +1043,7 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus) ISADevice *isadev = isa_vga_init(isa_bus); dev = isadev ? DEVICE(isadev) : NULL; } - rom_reset_order_override(); + return dev; } @@ -1256,8 +1233,6 @@ void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus) bool default_is_ne2k = g_str_equal(mc->default_nic, TYPE_ISA_NE2000); NICInfo *nd; - rom_set_order_override(FW_CFG_ORDER_OVERRIDE_NIC); - while ((nd = qemu_find_nic_info(TYPE_ISA_NE2000, default_is_ne2k, NULL))) { pc_init_ne2k_isa(isa_bus, nd, &error_fatal); } @@ -1266,8 +1241,6 @@ void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus) if (pci_bus) { pci_init_nic_devices(pci_bus, mc->default_nic); } - - rom_reset_order_override(); } void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 0dce512..ea7572e 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -285,6 +285,8 @@ static void pc_init1(MachineState *machine, const char *pci_type) pcms->idebus[0] = qdev_get_child_bus(dev, "ide.0"); pcms->idebus[1] = qdev_get_child_bus(dev, "ide.1"); } else { + uint32_t irq; + isa_bus = isa_bus_new(NULL, system_memory, system_io, &error_abort); isa_bus_register_input_irqs(isa_bus, x86ms->gsi); @@ -292,6 +294,9 @@ static void pc_init1(MachineState *machine, const char *pci_type) x86ms->rtc = isa_new(TYPE_MC146818_RTC); qdev_prop_set_int32(DEVICE(x86ms->rtc), "base_year", 2000); isa_realize_and_unref(x86ms->rtc, isa_bus, &error_fatal); + irq = object_property_get_uint(OBJECT(x86ms->rtc), "irq", + &error_fatal); + isa_connect_gpio_out(ISA_DEVICE(x86ms->rtc), 0, irq); i8257_dma_init(OBJECT(machine), isa_bus, 0); pcms->hpet_enabled = false; @@ -778,32 +783,6 @@ static void pc_i440fx_machine_2_6_options(MachineClass *m) DEFINE_I440FX_MACHINE(2, 6); -static void pc_i440fx_machine_2_5_options(MachineClass *m) -{ - X86MachineClass *x86mc = X86_MACHINE_CLASS(m); - - pc_i440fx_machine_2_6_options(m); - x86mc->save_tsc_khz = false; - m->legacy_fw_cfg_order = 1; - compat_props_add(m->compat_props, hw_compat_2_5, hw_compat_2_5_len); - compat_props_add(m->compat_props, pc_compat_2_5, pc_compat_2_5_len); -} - -DEFINE_I440FX_MACHINE(2, 5); - -static void pc_i440fx_machine_2_4_options(MachineClass *m) -{ - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - - pc_i440fx_machine_2_5_options(m); - m->hw_version = "2.4.0"; - pcmc->broken_reserved_end = true; - compat_props_add(m->compat_props, hw_compat_2_4, hw_compat_2_4_len); - compat_props_add(m->compat_props, pc_compat_2_4, pc_compat_2_4_len); -} - -DEFINE_I440FX_MACHINE(2, 4); - #ifdef CONFIG_ISAPC static void isapc_machine_options(MachineClass *m) { diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index c538b3d..33211b1 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -672,29 +672,3 @@ static void pc_q35_machine_2_6_options(MachineClass *m) } DEFINE_Q35_MACHINE(2, 6); - -static void pc_q35_machine_2_5_options(MachineClass *m) -{ - X86MachineClass *x86mc = X86_MACHINE_CLASS(m); - - pc_q35_machine_2_6_options(m); - x86mc->save_tsc_khz = false; - m->legacy_fw_cfg_order = 1; - compat_props_add(m->compat_props, hw_compat_2_5, hw_compat_2_5_len); - compat_props_add(m->compat_props, pc_compat_2_5, pc_compat_2_5_len); -} - -DEFINE_Q35_MACHINE(2, 5); - -static void pc_q35_machine_2_4_options(MachineClass *m) -{ - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - - pc_q35_machine_2_5_options(m); - m->hw_version = "2.4.0"; - pcmc->broken_reserved_end = true; - compat_props_add(m->compat_props, hw_compat_2_4, hw_compat_2_4_len); - compat_props_add(m->compat_props, pc_compat_2_4, pc_compat_2_4_len); -} - -DEFINE_Q35_MACHINE(2, 4); diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index 1eeb58a..821396c 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -37,6 +37,7 @@ #include "hw/block/flash.h" #include "system/kvm.h" #include "target/i386/sev.h" +#include "kvm/tdx.h" #define FLASH_SECTOR_SIZE 4096 @@ -280,5 +281,11 @@ void x86_firmware_configure(hwaddr gpa, void *ptr, int size) } sev_encrypt_flash(gpa, ptr, size, &error_fatal); + } else if (is_tdx_vm()) { + ret = tdx_parse_tdvf(ptr, size); + if (ret) { + error_report("failed to parse TDVF for TDX VM"); + exit(1); + } } } diff --git a/hw/i386/tdvf-hob.c b/hw/i386/tdvf-hob.c new file mode 100644 index 0000000..782b3d1 --- /dev/null +++ b/hw/i386/tdvf-hob.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2025 Intel Corporation + * Author: Isaku Yamahata <isaku.yamahata at gmail.com> + * <isaku.yamahata at intel.com> + * Xiaoyao Li <xiaoyao.li@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "standard-headers/uefi/uefi.h" +#include "hw/pci/pcie_host.h" +#include "tdvf-hob.h" + +typedef struct TdvfHob { + hwaddr hob_addr; + void *ptr; + int size; + + /* working area */ + void *current; + void *end; +} TdvfHob; + +static uint64_t tdvf_current_guest_addr(const TdvfHob *hob) +{ + return hob->hob_addr + (hob->current - hob->ptr); +} + +static void tdvf_align(TdvfHob *hob, size_t align) +{ + hob->current = QEMU_ALIGN_PTR_UP(hob->current, align); +} + +static void *tdvf_get_area(TdvfHob *hob, uint64_t size) +{ + void *ret; + + if (hob->current + size > hob->end) { + error_report("TD_HOB overrun, size = 0x%" PRIx64, size); + exit(1); + } + + ret = hob->current; + hob->current += size; + tdvf_align(hob, 8); + return ret; +} + +static void tdvf_hob_add_memory_resources(TdxGuest *tdx, TdvfHob *hob) +{ + EFI_HOB_RESOURCE_DESCRIPTOR *region; + EFI_RESOURCE_ATTRIBUTE_TYPE attr; + EFI_RESOURCE_TYPE resource_type; + + TdxRamEntry *e; + int i; + + for (i = 0; i < tdx->nr_ram_entries; i++) { + e = &tdx->ram_entries[i]; + + if (e->type == TDX_RAM_UNACCEPTED) { + resource_type = EFI_RESOURCE_MEMORY_UNACCEPTED; + attr = EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED; + } else if (e->type == TDX_RAM_ADDED) { + resource_type = EFI_RESOURCE_SYSTEM_MEMORY; + attr = EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE; + } else { + error_report("unknown TDX_RAM_ENTRY type %d", e->type); + exit(1); + } + + region = tdvf_get_area(hob, sizeof(*region)); + *region = (EFI_HOB_RESOURCE_DESCRIPTOR) { + .Header = { + .HobType = EFI_HOB_TYPE_RESOURCE_DESCRIPTOR, + .HobLength = cpu_to_le16(sizeof(*region)), + .Reserved = cpu_to_le32(0), + }, + .Owner = EFI_HOB_OWNER_ZERO, + .ResourceType = cpu_to_le32(resource_type), + .ResourceAttribute = cpu_to_le32(attr), + .PhysicalStart = cpu_to_le64(e->address), + .ResourceLength = cpu_to_le64(e->length), + }; + } +} + +void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob) +{ + TdvfHob hob = { + .hob_addr = td_hob->address, + .size = td_hob->size, + .ptr = td_hob->mem_ptr, + + .current = td_hob->mem_ptr, + .end = td_hob->mem_ptr + td_hob->size, + }; + + EFI_HOB_GENERIC_HEADER *last_hob; + EFI_HOB_HANDOFF_INFO_TABLE *hit; + + /* Note, Efi{Free}Memory{Bottom,Top} are ignored, leave 'em zeroed. */ + hit = tdvf_get_area(&hob, sizeof(*hit)); + *hit = (EFI_HOB_HANDOFF_INFO_TABLE) { + .Header = { + .HobType = EFI_HOB_TYPE_HANDOFF, + .HobLength = cpu_to_le16(sizeof(*hit)), + .Reserved = cpu_to_le32(0), + }, + .Version = cpu_to_le32(EFI_HOB_HANDOFF_TABLE_VERSION), + .BootMode = cpu_to_le32(0), + .EfiMemoryTop = cpu_to_le64(0), + .EfiMemoryBottom = cpu_to_le64(0), + .EfiFreeMemoryTop = cpu_to_le64(0), + .EfiFreeMemoryBottom = cpu_to_le64(0), + .EfiEndOfHobList = cpu_to_le64(0), /* initialized later */ + }; + + tdvf_hob_add_memory_resources(tdx, &hob); + + last_hob = tdvf_get_area(&hob, sizeof(*last_hob)); + *last_hob = (EFI_HOB_GENERIC_HEADER) { + .HobType = EFI_HOB_TYPE_END_OF_HOB_LIST, + .HobLength = cpu_to_le16(sizeof(*last_hob)), + .Reserved = cpu_to_le32(0), + }; + hit->EfiEndOfHobList = tdvf_current_guest_addr(&hob); +} diff --git a/hw/i386/tdvf-hob.h b/hw/i386/tdvf-hob.h new file mode 100644 index 0000000..4fc6a37 --- /dev/null +++ b/hw/i386/tdvf-hob.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef HW_I386_TD_HOB_H +#define HW_I386_TD_HOB_H + +#include "hw/i386/tdvf.h" +#include "target/i386/kvm/tdx.h" + +void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob); + +#define EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE \ + (EFI_RESOURCE_ATTRIBUTE_PRESENT | \ + EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \ + EFI_RESOURCE_ATTRIBUTE_TESTED) + +#define EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED \ + (EFI_RESOURCE_ATTRIBUTE_PRESENT | \ + EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \ + EFI_RESOURCE_ATTRIBUTE_TESTED) + +#define EFI_RESOURCE_ATTRIBUTE_TDVF_MMIO \ + (EFI_RESOURCE_ATTRIBUTE_PRESENT | \ + EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \ + EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE) + +#endif diff --git a/hw/i386/tdvf.c b/hw/i386/tdvf.c new file mode 100644 index 0000000..bd993ea --- /dev/null +++ b/hw/i386/tdvf.c @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2025 Intel Corporation + * Author: Isaku Yamahata <isaku.yamahata at gmail.com> + * <isaku.yamahata at intel.com> + * Xiaoyao Li <xiaoyao.li@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" + +#include "hw/i386/pc.h" +#include "hw/i386/tdvf.h" +#include "system/kvm.h" + +#define TDX_METADATA_OFFSET_GUID "e47a6535-984a-4798-865e-4685a7bf8ec2" +#define TDX_METADATA_VERSION 1 +#define TDVF_SIGNATURE 0x46564454 /* TDVF as little endian */ +#define TDVF_ALIGNMENT 4096 + +/* + * the raw structs read from TDVF keeps the name convention in + * TDVF Design Guide spec. + */ +typedef struct { + uint32_t DataOffset; + uint32_t RawDataSize; + uint64_t MemoryAddress; + uint64_t MemoryDataSize; + uint32_t Type; + uint32_t Attributes; +} TdvfSectionEntry; + +typedef struct { + uint32_t Signature; + uint32_t Length; + uint32_t Version; + uint32_t NumberOfSectionEntries; + TdvfSectionEntry SectionEntries[]; +} TdvfMetadata; + +struct tdx_metadata_offset { + uint32_t offset; +}; + +static TdvfMetadata *tdvf_get_metadata(void *flash_ptr, int size) +{ + TdvfMetadata *metadata; + uint32_t offset = 0; + uint8_t *data; + + if ((uint32_t) size != size) { + return NULL; + } + + if (pc_system_ovmf_table_find(TDX_METADATA_OFFSET_GUID, &data, NULL)) { + offset = size - le32_to_cpu(((struct tdx_metadata_offset *)data)->offset); + + if (offset + sizeof(*metadata) > size) { + return NULL; + } + } else { + error_report("Cannot find TDX_METADATA_OFFSET_GUID"); + return NULL; + } + + metadata = flash_ptr + offset; + + /* Finally, verify the signature to determine if this is a TDVF image. */ + metadata->Signature = le32_to_cpu(metadata->Signature); + if (metadata->Signature != TDVF_SIGNATURE) { + error_report("Invalid TDVF signature in metadata!"); + return NULL; + } + + /* Sanity check that the TDVF doesn't overlap its own metadata. */ + metadata->Length = le32_to_cpu(metadata->Length); + if (offset + metadata->Length > size) { + return NULL; + } + + /* Only version 1 is supported/defined. */ + metadata->Version = le32_to_cpu(metadata->Version); + if (metadata->Version != TDX_METADATA_VERSION) { + return NULL; + } + + return metadata; +} + +static int tdvf_parse_and_check_section_entry(const TdvfSectionEntry *src, + TdxFirmwareEntry *entry) +{ + entry->data_offset = le32_to_cpu(src->DataOffset); + entry->data_len = le32_to_cpu(src->RawDataSize); + entry->address = le64_to_cpu(src->MemoryAddress); + entry->size = le64_to_cpu(src->MemoryDataSize); + entry->type = le32_to_cpu(src->Type); + entry->attributes = le32_to_cpu(src->Attributes); + + /* sanity check */ + if (entry->size < entry->data_len) { + error_report("Broken metadata RawDataSize 0x%x MemoryDataSize 0x%lx", + entry->data_len, entry->size); + return -1; + } + if (!QEMU_IS_ALIGNED(entry->address, TDVF_ALIGNMENT)) { + error_report("MemoryAddress 0x%lx not page aligned", entry->address); + return -1; + } + if (!QEMU_IS_ALIGNED(entry->size, TDVF_ALIGNMENT)) { + error_report("MemoryDataSize 0x%lx not page aligned", entry->size); + return -1; + } + + switch (entry->type) { + case TDVF_SECTION_TYPE_BFV: + case TDVF_SECTION_TYPE_CFV: + /* The sections that must be copied from firmware image to TD memory */ + if (entry->data_len == 0) { + error_report("%d section with RawDataSize == 0", entry->type); + return -1; + } + break; + case TDVF_SECTION_TYPE_TD_HOB: + case TDVF_SECTION_TYPE_TEMP_MEM: + /* The sections that no need to be copied from firmware image */ + if (entry->data_len != 0) { + error_report("%d section with RawDataSize 0x%x != 0", + entry->type, entry->data_len); + return -1; + } + break; + default: + error_report("TDVF contains unsupported section type %d", entry->type); + return -1; + } + + return 0; +} + +int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size) +{ + g_autofree TdvfSectionEntry *sections = NULL; + TdvfMetadata *metadata; + ssize_t entries_size; + int i; + + metadata = tdvf_get_metadata(flash_ptr, size); + if (!metadata) { + return -EINVAL; + } + + /* load and parse metadata entries */ + fw->nr_entries = le32_to_cpu(metadata->NumberOfSectionEntries); + if (fw->nr_entries < 2) { + error_report("Invalid number of fw entries (%u) in TDVF Metadata", + fw->nr_entries); + return -EINVAL; + } + + entries_size = fw->nr_entries * sizeof(TdvfSectionEntry); + if (metadata->Length != sizeof(*metadata) + entries_size) { + error_report("TDVF metadata len (0x%x) mismatch, expected (0x%x)", + metadata->Length, + (uint32_t)(sizeof(*metadata) + entries_size)); + return -EINVAL; + } + + fw->entries = g_new(TdxFirmwareEntry, fw->nr_entries); + sections = g_new(TdvfSectionEntry, fw->nr_entries); + + memcpy(sections, (void *)metadata + sizeof(*metadata), entries_size); + + for (i = 0; i < fw->nr_entries; i++) { + if (tdvf_parse_and_check_section_entry(§ions[i], &fw->entries[i])) { + goto err; + } + } + + fw->mem_ptr = flash_ptr; + return 0; + +err: + fw->entries = 0; + g_free(fw->entries); + return -EINVAL; +} diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c index 1b0671c..b1b5f11 100644 --- a/hw/i386/x86-common.c +++ b/hw/i386/x86-common.c @@ -44,6 +44,7 @@ #include "standard-headers/asm-x86/bootparam.h" #include CONFIG_DEVICES #include "kvm/kvm_i386.h" +#include "kvm/tdx.h" #ifdef CONFIG_XEN_EMU #include "hw/xen/xen.h" @@ -1035,11 +1036,14 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, if (machine_require_guest_memfd(MACHINE(x86ms))) { memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios", bios_size, &error_fatal); + if (is_tdx_vm()) { + tdx_set_tdvf_region(&x86ms->bios); + } } else { memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, &error_fatal); } - if (sev_enabled()) { + if (sev_enabled() || is_tdx_vm()) { /* * The concept of a "reset" simply doesn't exist for * confidential computing guests, we have to destroy and diff --git a/hw/i386/x86.c b/hw/i386/x86.c index e2d0409..f80533d 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -382,7 +382,6 @@ static void x86_machine_class_init(ObjectClass *oc, const void *data) mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; mc->kvm_type = x86_kvm_type; - x86mc->save_tsc_khz = true; x86mc->fwcfg_dma_enabled = true; nc->nmi_monitor_handler = x86_nmi; diff --git a/hw/microblaze/petalogix_ml605_mmu.c b/hw/microblaze/petalogix_ml605_mmu.c index bea6b68..6e923c4 100644 --- a/hw/microblaze/petalogix_ml605_mmu.c +++ b/hw/microblaze/petalogix_ml605_mmu.c @@ -80,8 +80,6 @@ petalogix_ml605_init(MachineState *machine) MemoryRegion *phys_lmb_bram = g_new(MemoryRegion, 1); MemoryRegion *phys_ram = g_new(MemoryRegion, 1); qemu_irq irq[32]; - EndianMode endianness = TARGET_BIG_ENDIAN ? ENDIAN_MODE_BIG - : ENDIAN_MODE_LITTLE; /* init CPUs */ cpu = MICROBLAZE_CPU(object_new(TYPE_MICROBLAZE_CPU)); @@ -113,7 +111,7 @@ petalogix_ml605_init(MachineState *machine) dev = qdev_new("xlnx.xps-intc"); - qdev_prop_set_enum(dev, "endianness", endianness); + qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE); qdev_prop_set_uint32(dev, "kind-of-intr", 1 << TIMER_IRQ); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, INTC_BASEADDR); @@ -129,7 +127,7 @@ petalogix_ml605_init(MachineState *machine) /* 2 timers at irq 2 @ 100 Mhz. */ dev = qdev_new("xlnx.xps-timer"); - qdev_prop_set_enum(dev, "endianness", endianness); + qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE); qdev_prop_set_uint32(dev, "one-timer-only", 0); qdev_prop_set_uint32(dev, "clock-frequency", 100 * 1000000); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); @@ -177,7 +175,7 @@ petalogix_ml605_init(MachineState *machine) SSIBus *spi; dev = qdev_new("xlnx.xps-spi"); - qdev_prop_set_enum(dev, "endianness", endianness); + qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE); qdev_prop_set_uint8(dev, "num-ss-bits", NUM_SPI_FLASHES); busdev = SYS_BUS_DEVICE(dev); sysbus_realize_and_unref(busdev, &error_fatal); @@ -218,12 +216,7 @@ petalogix_ml605_init(MachineState *machine) static void petalogix_ml605_machine_init(MachineClass *mc) { - if (TARGET_BIG_ENDIAN) { - mc->desc = "PetaLogix linux refdesign for xilinx ml605 (big endian)"; - mc->deprecation_reason = "big endian support is not tested"; - } else { - mc->desc = "PetaLogix linux refdesign for xilinx ml605 (little endian)"; - } + mc->desc = "PetaLogix linux refdesign for xilinx ml605 (little endian)"; mc->init = petalogix_ml605_init; } diff --git a/hw/microblaze/petalogix_s3adsp1800_mmu.c b/hw/microblaze/petalogix_s3adsp1800_mmu.c index 032f6f7..e8d0ddf 100644 --- a/hw/microblaze/petalogix_s3adsp1800_mmu.c +++ b/hw/microblaze/petalogix_s3adsp1800_mmu.c @@ -58,9 +58,20 @@ #define TYPE_PETALOGIX_S3ADSP1800_MACHINE \ MACHINE_TYPE_NAME("petalogix-s3adsp1800") +struct S3Adsp1800MachineState { + MachineState parent_class; + + EndianMode endianness; +}; + +OBJECT_DECLARE_TYPE(S3Adsp1800MachineState, MachineClass, + PETALOGIX_S3ADSP1800_MACHINE) + + static void petalogix_s3adsp1800_init(MachineState *machine) { + S3Adsp1800MachineState *psms = PETALOGIX_S3ADSP1800_MACHINE(machine); ram_addr_t ram_size = machine->ram_size; DeviceState *dev; MicroBlazeCPU *cpu; @@ -71,13 +82,12 @@ petalogix_s3adsp1800_init(MachineState *machine) MemoryRegion *phys_ram = g_new(MemoryRegion, 1); qemu_irq irq[32]; MemoryRegion *sysmem = get_system_memory(); - EndianMode endianness = TARGET_BIG_ENDIAN ? ENDIAN_MODE_BIG - : ENDIAN_MODE_LITTLE; + EndianMode endianness = psms->endianness; cpu = MICROBLAZE_CPU(object_new(TYPE_MICROBLAZE_CPU)); object_property_set_str(OBJECT(cpu), "version", "7.10.d", &error_abort); object_property_set_bool(OBJECT(cpu), "little-endian", - !TARGET_BIG_ENDIAN, &error_abort); + endianness == ENDIAN_MODE_LITTLE, &error_abort); qdev_realize(DEVICE(cpu), NULL, &error_abort); /* Attach emulated BRAM through the LMB. */ @@ -135,20 +145,41 @@ petalogix_s3adsp1800_init(MachineState *machine) create_unimplemented_device("xps_gpio", GPIO_BASEADDR, 0x10000); - microblaze_load_kernel(cpu, !TARGET_BIG_ENDIAN, ddr_base, ram_size, - machine->initrd_filename, + microblaze_load_kernel(cpu, endianness == ENDIAN_MODE_LITTLE, ddr_base, + ram_size, machine->initrd_filename, BINARY_DEVICE_TREE_FILE, NULL); } +static int machine_get_endianness(Object *obj, Error **errp G_GNUC_UNUSED) +{ + S3Adsp1800MachineState *ms = PETALOGIX_S3ADSP1800_MACHINE(obj); + return ms->endianness; +} + +static void machine_set_endianness(Object *obj, int endianness, Error **errp) +{ + S3Adsp1800MachineState *ms = PETALOGIX_S3ADSP1800_MACHINE(obj); + ms->endianness = endianness; +} + static void petalogix_s3adsp1800_machine_class_init(ObjectClass *oc, const void *data) { MachineClass *mc = MACHINE_CLASS(oc); + ObjectProperty *prop; mc->desc = "PetaLogix linux refdesign for xilinx Spartan 3ADSP1800"; mc->init = petalogix_s3adsp1800_init; mc->is_default = true; + + prop = object_class_property_add_enum(oc, "endianness", "EndianMode", + &EndianMode_lookup, + machine_get_endianness, + machine_set_endianness); + object_property_set_default_str(prop, TARGET_BIG_ENDIAN ? "big" : "little"); + object_class_property_set_description(oc, "endianness", + "Defines whether the machine runs in big or little endian mode"); } static const TypeInfo petalogix_s3adsp1800_machine_types[] = { @@ -156,6 +187,7 @@ static const TypeInfo petalogix_s3adsp1800_machine_types[] = { .name = TYPE_PETALOGIX_S3ADSP1800_MACHINE, .parent = TYPE_MACHINE, .class_init = petalogix_s3adsp1800_machine_class_init, + .instance_size = sizeof(S3Adsp1800MachineState), }, }; diff --git a/hw/microblaze/xlnx-zynqmp-pmu.c b/hw/microblaze/xlnx-zynqmp-pmu.c index ed40b5f..e909802 100644 --- a/hw/microblaze/xlnx-zynqmp-pmu.c +++ b/hw/microblaze/xlnx-zynqmp-pmu.c @@ -181,12 +181,7 @@ static void xlnx_zynqmp_pmu_init(MachineState *machine) static void xlnx_zynqmp_pmu_machine_init(MachineClass *mc) { - if (TARGET_BIG_ENDIAN) { - mc->desc = "Xilinx ZynqMP PMU machine (big endian)"; - mc->deprecation_reason = "big endian support is not tested"; - } else { - mc->desc = "Xilinx ZynqMP PMU machine (little endian)"; - } + mc->desc = "Xilinx ZynqMP PMU machine (little endian)"; mc->init = xlnx_zynqmp_pmu_init; } diff --git a/hw/net/e1000.c b/hw/net/e1000.c index cba4999..a80a7b0 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -127,10 +127,8 @@ struct E1000State_st { QEMUTimer *flush_queue_timer; /* Compatibility flags for migration to/from qemu 1.3.0 and older */ -#define E1000_FLAG_MAC_BIT 2 #define E1000_FLAG_TSO_BIT 3 #define E1000_FLAG_VET_BIT 4 -#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT) #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT) #define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT) @@ -1212,52 +1210,51 @@ enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) }; enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 }; -#define markflag(x) ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED) /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p] * f - flag bits (up to 6 possible flags) * n - flag needed - * p - partially implenented */ + * p - partially implemented */ static const uint8_t mac_reg_access[0x8000] = { - [IPAV] = markflag(MAC), [WUC] = markflag(MAC), - [IP6AT] = markflag(MAC), [IP4AT] = markflag(MAC), - [FFVT] = markflag(MAC), [WUPM] = markflag(MAC), - [ECOL] = markflag(MAC), [MCC] = markflag(MAC), - [DC] = markflag(MAC), [TNCRS] = markflag(MAC), - [RLEC] = markflag(MAC), [XONRXC] = markflag(MAC), - [XOFFTXC] = markflag(MAC), [RFC] = markflag(MAC), - [TSCTFC] = markflag(MAC), [MGTPRC] = markflag(MAC), - [WUS] = markflag(MAC), [AIT] = markflag(MAC), - [FFLT] = markflag(MAC), [FFMT] = markflag(MAC), - [SCC] = markflag(MAC), [FCRUC] = markflag(MAC), - [LATECOL] = markflag(MAC), [COLC] = markflag(MAC), - [SEQEC] = markflag(MAC), [CEXTERR] = markflag(MAC), - [XONTXC] = markflag(MAC), [XOFFRXC] = markflag(MAC), - [RJC] = markflag(MAC), [RNBC] = markflag(MAC), - [MGTPDC] = markflag(MAC), [MGTPTC] = markflag(MAC), - [RUC] = markflag(MAC), [ROC] = markflag(MAC), - [GORCL] = markflag(MAC), [GORCH] = markflag(MAC), - [GOTCL] = markflag(MAC), [GOTCH] = markflag(MAC), - [BPRC] = markflag(MAC), [MPRC] = markflag(MAC), - [TSCTC] = markflag(MAC), [PRC64] = markflag(MAC), - [PRC127] = markflag(MAC), [PRC255] = markflag(MAC), - [PRC511] = markflag(MAC), [PRC1023] = markflag(MAC), - [PRC1522] = markflag(MAC), [PTC64] = markflag(MAC), - [PTC127] = markflag(MAC), [PTC255] = markflag(MAC), - [PTC511] = markflag(MAC), [PTC1023] = markflag(MAC), - [PTC1522] = markflag(MAC), [MPTC] = markflag(MAC), - [BPTC] = markflag(MAC), - - [TDFH] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [TDFT] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [RDFH] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [RDFT] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [PBM] = markflag(MAC) | MAC_ACCESS_PARTIAL, + [IPAV] = MAC_ACCESS_FLAG_NEEDED, [WUC] = MAC_ACCESS_FLAG_NEEDED, + [IP6AT] = MAC_ACCESS_FLAG_NEEDED, [IP4AT] = MAC_ACCESS_FLAG_NEEDED, + [FFVT] = MAC_ACCESS_FLAG_NEEDED, [WUPM] = MAC_ACCESS_FLAG_NEEDED, + [ECOL] = MAC_ACCESS_FLAG_NEEDED, [MCC] = MAC_ACCESS_FLAG_NEEDED, + [DC] = MAC_ACCESS_FLAG_NEEDED, [TNCRS] = MAC_ACCESS_FLAG_NEEDED, + [RLEC] = MAC_ACCESS_FLAG_NEEDED, [XONRXC] = MAC_ACCESS_FLAG_NEEDED, + [XOFFTXC] = MAC_ACCESS_FLAG_NEEDED, [RFC] = MAC_ACCESS_FLAG_NEEDED, + [TSCTFC] = MAC_ACCESS_FLAG_NEEDED, [MGTPRC] = MAC_ACCESS_FLAG_NEEDED, + [WUS] = MAC_ACCESS_FLAG_NEEDED, [AIT] = MAC_ACCESS_FLAG_NEEDED, + [FFLT] = MAC_ACCESS_FLAG_NEEDED, [FFMT] = MAC_ACCESS_FLAG_NEEDED, + [SCC] = MAC_ACCESS_FLAG_NEEDED, [FCRUC] = MAC_ACCESS_FLAG_NEEDED, + [LATECOL] = MAC_ACCESS_FLAG_NEEDED, [COLC] = MAC_ACCESS_FLAG_NEEDED, + [SEQEC] = MAC_ACCESS_FLAG_NEEDED, [CEXTERR] = MAC_ACCESS_FLAG_NEEDED, + [XONTXC] = MAC_ACCESS_FLAG_NEEDED, [XOFFRXC] = MAC_ACCESS_FLAG_NEEDED, + [RJC] = MAC_ACCESS_FLAG_NEEDED, [RNBC] = MAC_ACCESS_FLAG_NEEDED, + [MGTPDC] = MAC_ACCESS_FLAG_NEEDED, [MGTPTC] = MAC_ACCESS_FLAG_NEEDED, + [RUC] = MAC_ACCESS_FLAG_NEEDED, [ROC] = MAC_ACCESS_FLAG_NEEDED, + [GORCL] = MAC_ACCESS_FLAG_NEEDED, [GORCH] = MAC_ACCESS_FLAG_NEEDED, + [GOTCL] = MAC_ACCESS_FLAG_NEEDED, [GOTCH] = MAC_ACCESS_FLAG_NEEDED, + [BPRC] = MAC_ACCESS_FLAG_NEEDED, [MPRC] = MAC_ACCESS_FLAG_NEEDED, + [TSCTC] = MAC_ACCESS_FLAG_NEEDED, [PRC64] = MAC_ACCESS_FLAG_NEEDED, + [PRC127] = MAC_ACCESS_FLAG_NEEDED, [PRC255] = MAC_ACCESS_FLAG_NEEDED, + [PRC511] = MAC_ACCESS_FLAG_NEEDED, [PRC1023] = MAC_ACCESS_FLAG_NEEDED, + [PRC1522] = MAC_ACCESS_FLAG_NEEDED, [PTC64] = MAC_ACCESS_FLAG_NEEDED, + [PTC127] = MAC_ACCESS_FLAG_NEEDED, [PTC255] = MAC_ACCESS_FLAG_NEEDED, + [PTC511] = MAC_ACCESS_FLAG_NEEDED, [PTC1023] = MAC_ACCESS_FLAG_NEEDED, + [PTC1522] = MAC_ACCESS_FLAG_NEEDED, [MPTC] = MAC_ACCESS_FLAG_NEEDED, + [BPTC] = MAC_ACCESS_FLAG_NEEDED, + + [TDFH] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [TDFT] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [TDFHS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [TDFTS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [TDFPC] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [RDFH] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [RDFT] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [RDFHS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [RDFTS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [RDFPC] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [PBM] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, }; static void @@ -1419,13 +1416,6 @@ static int e1000_tx_tso_post_load(void *opaque, int version_id) return 0; } -static bool e1000_full_mac_needed(void *opaque) -{ - E1000State *s = opaque; - - return chkflag(MAC); -} - static bool e1000_tso_state_needed(void *opaque) { E1000State *s = opaque; @@ -1451,7 +1441,6 @@ static const VMStateDescription vmstate_e1000_full_mac_state = { .name = "e1000/full_mac_state", .version_id = 1, .minimum_version_id = 1, - .needed = e1000_full_mac_needed, .fields = (const VMStateField[]) { VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000), VMSTATE_END_OF_LIST() @@ -1679,8 +1668,6 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp) static const Property e1000_properties[] = { DEFINE_NIC_PROPERTIES(E1000State, conf), - DEFINE_PROP_BIT("extra_mac_registers", E1000State, - compat_flags, E1000_FLAG_MAC_BIT, true), DEFINE_PROP_BIT("migrate_tso_props", E1000State, compat_flags, E1000_FLAG_TSO_BIT, true), DEFINE_PROP_BIT("init-vet", E1000State, diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h index 6e0962f..ae06c1c 100644 --- a/hw/net/rocker/rocker.h +++ b/hw/net/rocker/rocker.h @@ -36,15 +36,7 @@ static inline G_GNUC_PRINTF(1, 2) int DPRINTF(const char *fmt, ...) } #endif -#define __le16 uint16_t -#define __le32 uint32_t -#define __le64 uint64_t - -#define __be16 uint16_t -#define __be32 uint32_t -#define __be64 uint64_t - -static inline bool ipv4_addr_is_multicast(__be32 addr) +static inline bool ipv4_addr_is_multicast(uint32_t addr) { return (addr & htonl(0xf0000000)) == htonl(0xe0000000); } @@ -52,8 +44,8 @@ static inline bool ipv4_addr_is_multicast(__be32 addr) typedef struct ipv6_addr { union { uint8_t addr8[16]; - __be16 addr16[8]; - __be32 addr32[4]; + uint16_t addr16[8]; + uint32_t addr32[4]; }; } Ipv6Addr; diff --git a/hw/net/rocker/rocker_hw.h b/hw/net/rocker/rocker_hw.h index 1786323..7ec6bfb 100644 --- a/hw/net/rocker/rocker_hw.h +++ b/hw/net/rocker/rocker_hw.h @@ -9,10 +9,6 @@ #ifndef ROCKER_HW_H #define ROCKER_HW_H -#define __le16 uint16_t -#define __le32 uint32_t -#define __le64 uint64_t - /* * Return codes */ @@ -124,12 +120,12 @@ enum { */ typedef struct rocker_desc { - __le64 buf_addr; + uint64_t buf_addr; uint64_t cookie; - __le16 buf_size; - __le16 tlv_size; - __le16 rsvd[5]; /* pad to 32 bytes */ - __le16 comp_err; + uint16_t buf_size; + uint16_t tlv_size; + uint16_t rsvd[5]; /* pad to 32 bytes */ + uint16_t comp_err; } __attribute__((packed, aligned(8))) RockerDesc; /* @@ -137,9 +133,9 @@ typedef struct rocker_desc { */ typedef struct rocker_tlv { - __le32 type; - __le16 len; - __le16 rsvd; + uint32_t type; + uint16_t len; + uint16_t rsvd; } __attribute__((packed, aligned(8))) RockerTlv; /* cmd msg */ diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c index 3378f63..4aed178 100644 --- a/hw/net/rocker/rocker_of_dpa.c +++ b/hw/net/rocker/rocker_of_dpa.c @@ -52,10 +52,10 @@ typedef struct of_dpa_flow_key { uint32_t tunnel_id; /* overlay tunnel id */ uint32_t tbl_id; /* table id */ struct { - __be16 vlan_id; /* 0 if no VLAN */ + uint16_t vlan_id; /* 0 if no VLAN */ MACAddr src; /* ethernet source address */ MACAddr dst; /* ethernet destination address */ - __be16 type; /* ethernet frame type */ + uint16_t type; /* ethernet frame type */ } eth; struct { uint8_t proto; /* IP protocol or ARP opcode */ @@ -66,14 +66,14 @@ typedef struct of_dpa_flow_key { union { struct { struct { - __be32 src; /* IP source address */ - __be32 dst; /* IP destination address */ + uint32_t src; /* IP source address */ + uint32_t dst; /* IP destination address */ } addr; union { struct { - __be16 src; /* TCP/UDP/SCTP source port */ - __be16 dst; /* TCP/UDP/SCTP destination port */ - __be16 flags; /* TCP flags */ + uint16_t src; /* TCP/UDP/SCTP source port */ + uint16_t dst; /* TCP/UDP/SCTP destination port */ + uint16_t flags; /* TCP flags */ } tp; struct { MACAddr sha; /* ARP source hardware address */ @@ -86,11 +86,11 @@ typedef struct of_dpa_flow_key { Ipv6Addr src; /* IPv6 source address */ Ipv6Addr dst; /* IPv6 destination address */ } addr; - __be32 label; /* IPv6 flow label */ + uint32_t label; /* IPv6 flow label */ struct { - __be16 src; /* TCP/UDP/SCTP source port */ - __be16 dst; /* TCP/UDP/SCTP destination port */ - __be16 flags; /* TCP flags */ + uint16_t src; /* TCP/UDP/SCTP source port */ + uint16_t dst; /* TCP/UDP/SCTP destination port */ + uint16_t flags; /* TCP flags */ } tp; struct { Ipv6Addr target; /* ND target address */ @@ -112,13 +112,13 @@ typedef struct of_dpa_flow_action { struct { uint32_t group_id; uint32_t tun_log_lport; - __be16 vlan_id; + uint16_t vlan_id; } write; struct { - __be16 new_vlan_id; + uint16_t new_vlan_id; uint32_t out_pport; uint8_t copy_to_cpu; - __be16 vlan_id; + uint16_t vlan_id; } apply; } OfDpaFlowAction; @@ -143,7 +143,7 @@ typedef struct of_dpa_flow { typedef struct of_dpa_flow_pkt_fields { uint32_t tunnel_id; struct eth_header *ethhdr; - __be16 *h_proto; + uint16_t *h_proto; struct vlan_header *vlanhdr; struct ip_header *ipv4hdr; struct ip6_header *ipv6hdr; @@ -180,7 +180,7 @@ typedef struct of_dpa_group { uint32_t group_id; MACAddr src_mac; MACAddr dst_mac; - __be16 vlan_id; + uint16_t vlan_id; } l2_rewrite; struct { uint16_t group_count; @@ -190,13 +190,13 @@ typedef struct of_dpa_group { uint32_t group_id; MACAddr src_mac; MACAddr dst_mac; - __be16 vlan_id; + uint16_t vlan_id; uint8_t ttl_check; } l3_unicast; }; } OfDpaGroup; -static int of_dpa_mask2prefix(__be32 mask) +static int of_dpa_mask2prefix(uint32_t mask) { int i; int count = 32; @@ -451,7 +451,7 @@ static void of_dpa_flow_pkt_parse(OfDpaFlowContext *fc, fc->iovcnt = iovcnt + 2; } -static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, __be16 vlan_id) +static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, uint16_t vlan_id) { OfDpaFlowPktFields *fields = &fc->fields; uint16_t h_proto = fields->ethhdr->h_proto; @@ -486,7 +486,7 @@ static void of_dpa_flow_pkt_strip_vlan(OfDpaFlowContext *fc) static void of_dpa_flow_pkt_hdr_rewrite(OfDpaFlowContext *fc, uint8_t *src_mac, uint8_t *dst_mac, - __be16 vlan_id) + uint16_t vlan_id) { OfDpaFlowPktFields *fields = &fc->fields; diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 83d942a..7c0ca56 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -41,19 +41,9 @@ #define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1 #define VMXNET3_MSIX_BAR_SIZE 0x2000 -/* Compatibility flags for migration */ -#define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0 -#define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS \ - (1 << VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT) -#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT 1 -#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE \ - (1 << VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT) - #define VMXNET3_EXP_EP_OFFSET (0x48) -#define VMXNET3_MSI_OFFSET(s) \ - ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x50 : 0x84) -#define VMXNET3_MSIX_OFFSET(s) \ - ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0 : 0x9c) +#define VMXNET3_MSI_OFFSET (0x84) +#define VMXNET3_MSIX_OFFSET (0x9c) #define VMXNET3_DSN_OFFSET (0x100) #define VMXNET3_BAR0_IDX (0) @@ -61,8 +51,7 @@ #define VMXNET3_MSIX_BAR_IDX (2) #define VMXNET3_OFF_MSIX_TABLE (0x000) -#define VMXNET3_OFF_MSIX_PBA(s) \ - ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x800 : 0x1000) +#define VMXNET3_OFF_MSIX_PBA (0x1000) /* Link speed in Mbps should be shifted by 16 */ #define VMXNET3_LINK_SPEED (1000 << 16) @@ -2122,8 +2111,8 @@ vmxnet3_init_msix(VMXNET3State *s) &s->msix_bar, VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_TABLE, &s->msix_bar, - VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA(s), - VMXNET3_MSIX_OFFSET(s), NULL); + VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA, + VMXNET3_MSIX_OFFSET, NULL); if (0 > res) { VMW_WRPRN("Failed to initialize MSI-X, error %d", res); @@ -2221,7 +2210,7 @@ static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp) /* Interrupt pin A */ pci_dev->config[PCI_INTERRUPT_PIN] = 0x01; - ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS, + ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET, VMXNET3_MAX_NMSIX_INTRS, VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK, NULL); /* Any error other than -ENOTSUP(board's MSI support is broken) * is a programming error. Fall back to INTx silently on -ENOTSUP */ @@ -2249,6 +2238,7 @@ static void vmxnet3_instance_init(Object *obj) device_add_bootindex_property(obj, &s->conf.bootindex, "bootindex", "/ethernet-phy@0", DEVICE(obj)); + PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS; } static void vmxnet3_pci_uninit(PCIDevice *pci_dev) @@ -2472,30 +2462,12 @@ static const VMStateDescription vmstate_vmxnet3 = { static const Property vmxnet3_properties[] = { DEFINE_NIC_PROPERTIES(VMXNET3State, conf), - DEFINE_PROP_BIT("x-old-msi-offsets", VMXNET3State, compat_flags, - VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT, false), - DEFINE_PROP_BIT("x-disable-pcie", VMXNET3State, compat_flags, - VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT, false), }; -static void vmxnet3_realize(DeviceState *qdev, Error **errp) -{ - VMXNET3Class *vc = VMXNET3_DEVICE_GET_CLASS(qdev); - PCIDevice *pci_dev = PCI_DEVICE(qdev); - VMXNET3State *s = VMXNET3(qdev); - - if (!(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE)) { - pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; - } - - vc->parent_dc_realize(qdev, errp); -} - static void vmxnet3_class_init(ObjectClass *class, const void *data) { DeviceClass *dc = DEVICE_CLASS(class); PCIDeviceClass *c = PCI_DEVICE_CLASS(class); - VMXNET3Class *vc = VMXNET3_DEVICE_CLASS(class); c->realize = vmxnet3_pci_realize; c->exit = vmxnet3_pci_uninit; @@ -2506,8 +2478,6 @@ static void vmxnet3_class_init(ObjectClass *class, const void *data) c->class_id = PCI_CLASS_NETWORK_ETHERNET; c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE; c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3; - device_class_set_parent_realize(dc, vmxnet3_realize, - &vc->parent_dc_realize); dc->desc = "VMWare Paravirtualized Ethernet v3"; device_class_set_legacy_reset(dc, vmxnet3_qdev_reset); dc->vmsd = &vmstate_vmxnet3; diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c index 237b9f7..aa24050 100644 --- a/hw/nvram/fw_cfg.c +++ b/hw/nvram/fw_cfg.c @@ -817,62 +817,6 @@ void fw_cfg_modify_i64(FWCfgState *s, uint16_t key, uint64_t value) g_free(old); } -void fw_cfg_set_order_override(FWCfgState *s, int order) -{ - assert(s->fw_cfg_order_override == 0); - s->fw_cfg_order_override = order; -} - -void fw_cfg_reset_order_override(FWCfgState *s) -{ - assert(s->fw_cfg_order_override != 0); - s->fw_cfg_order_override = 0; -} - -/* - * This is the legacy order list. For legacy systems, files are in - * the fw_cfg in the order defined below, by the "order" value. Note - * that some entries (VGA ROMs, NIC option ROMS, etc.) go into a - * specific area, but there may be more than one and they occur in the - * order that the user specifies them on the command line. Those are - * handled in a special manner, using the order override above. - * - * For non-legacy, the files are sorted by filename to avoid this kind - * of complexity in the future. - * - * This is only for x86, other arches don't implement versioning so - * they won't set legacy mode. - */ -static struct { - const char *name; - int order; -} fw_cfg_order[] = { - { "etc/boot-menu-wait", 10 }, - { "bootsplash.jpg", 11 }, - { "bootsplash.bmp", 12 }, - { "etc/boot-fail-wait", 15 }, - { "etc/smbios/smbios-tables", 20 }, - { "etc/smbios/smbios-anchor", 30 }, - { "etc/e820", 40 }, - { "etc/reserved-memory-end", 50 }, - { "genroms/kvmvapic.bin", 55 }, - { "genroms/linuxboot.bin", 60 }, - { }, /* VGA ROMs from pc_vga_init come here, 70. */ - { }, /* NIC option ROMs from pc_nic_init come here, 80. */ - { "etc/system-states", 90 }, - { }, /* User ROMs come here, 100. */ - { }, /* Device FW comes here, 110. */ - { "etc/extra-pci-roots", 120 }, - { "etc/acpi/tables", 130 }, - { "etc/table-loader", 140 }, - { "etc/tpm/log", 150 }, - { "etc/acpi/rsdp", 160 }, - { "bootorder", 170 }, - { "etc/msr_feature_control", 180 }, - -#define FW_CFG_ORDER_OVERRIDE_LAST 200 -}; - /* * Any sub-page size update to these table MRs will be lost during migration, * as we use aligned size in ram_load_precopy() -> qemu_ram_resize() path. @@ -890,29 +834,6 @@ static void fw_cfg_acpi_mr_save(FWCfgState *s, const char *filename, size_t len) } } -static int get_fw_cfg_order(FWCfgState *s, const char *name) -{ - int i; - - if (s->fw_cfg_order_override > 0) { - return s->fw_cfg_order_override; - } - - for (i = 0; i < ARRAY_SIZE(fw_cfg_order); i++) { - if (fw_cfg_order[i].name == NULL) { - continue; - } - - if (strcmp(name, fw_cfg_order[i].name) == 0) { - return fw_cfg_order[i].order; - } - } - - /* Stick unknown stuff at the end. */ - warn_report("Unknown firmware file in legacy mode: %s", name); - return FW_CFG_ORDER_OVERRIDE_LAST; -} - void fw_cfg_add_file_callback(FWCfgState *s, const char *filename, FWCfgCallback select_cb, FWCfgWriteCallback write_cb, @@ -921,7 +842,6 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename, { int i, index, count; size_t dsize; - MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); int order = 0; if (!s->files) { @@ -933,22 +853,11 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename, count = be32_to_cpu(s->files->count); assert(count < fw_cfg_file_slots(s)); - /* Find the insertion point. */ - if (mc->legacy_fw_cfg_order) { - /* - * Sort by order. For files with the same order, we keep them - * in the sequence in which they were added. - */ - order = get_fw_cfg_order(s, filename); - for (index = count; - index > 0 && order < s->entry_order[index - 1]; - index--); - } else { - /* Sort by file name. */ - for (index = count; - index > 0 && strcmp(filename, s->files->f[index - 1].name) < 0; - index--); - } + /* Find the insertion point, sorting by file name. */ + for (index = count; + index > 0 && strcmp(filename, s->files->f[index - 1].name) < 0; + index--) + ; /* * Move all the entries from the index point and after down one @@ -1058,7 +967,6 @@ bool fw_cfg_add_file_from_generator(FWCfgState *s, static void fw_cfg_machine_reset(void *opaque) { - MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); FWCfgState *s = opaque; void *ptr; size_t len; @@ -1068,11 +976,9 @@ static void fw_cfg_machine_reset(void *opaque) ptr = fw_cfg_modify_file(s, "bootorder", (uint8_t *)buf, len); g_free(ptr); - if (!mc->legacy_fw_cfg_order) { - buf = get_boot_devices_lchs_list(&len); - ptr = fw_cfg_modify_file(s, "bios-geometry", (uint8_t *)buf, len); - g_free(ptr); - } + buf = get_boot_devices_lchs_list(&len); + ptr = fw_cfg_modify_file(s, "bios-geometry", (uint8_t *)buf, len); + g_free(ptr); } static void fw_cfg_machine_ready(struct Notifier *n, void *data) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index f5ab510..9b4bf48 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -128,6 +128,12 @@ static GSequence *pci_acpi_index_list(void) return used_acpi_index_list; } +static void pci_set_master(PCIDevice *d, bool enable) +{ + memory_region_set_enabled(&d->bus_master_enable_region, enable); + d->is_master = enable; /* cache the status */ +} + static void pci_init_bus_master(PCIDevice *pci_dev) { AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev); @@ -135,7 +141,7 @@ static void pci_init_bus_master(PCIDevice *pci_dev) memory_region_init_alias(&pci_dev->bus_master_enable_region, OBJECT(pci_dev), "bus master", dma_as->root, 0, memory_region_size(dma_as->root)); - memory_region_set_enabled(&pci_dev->bus_master_enable_region, false); + pci_set_master(pci_dev, false); memory_region_add_subregion(&pci_dev->bus_master_container_region, 0, &pci_dev->bus_master_enable_region); } @@ -804,9 +810,8 @@ static int get_pci_config_device(QEMUFile *f, void *pv, size_t size, pci_bridge_update_mappings(PCI_BRIDGE(s)); } - memory_region_set_enabled(&s->bus_master_enable_region, - pci_get_word(s->config + PCI_COMMAND) - & PCI_COMMAND_MASTER); + pci_set_master(s, pci_get_word(s->config + PCI_COMMAND) + & PCI_COMMAND_MASTER); g_free(config); return 0; @@ -1725,7 +1730,7 @@ static void pci_update_mappings(PCIDevice *d) pci_update_vga(d); } -static inline int pci_irq_disabled(PCIDevice *d) +int pci_irq_disabled(PCIDevice *d) { return pci_get_word(d->config + PCI_COMMAND) & PCI_COMMAND_INTX_DISABLE; } @@ -1787,9 +1792,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int if (ranges_overlap(addr, l, PCI_COMMAND, 2)) { pci_update_irq_disabled(d, was_irq_disabled); - memory_region_set_enabled(&d->bus_master_enable_region, - (pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER) && d->enabled); + pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND) & + PCI_COMMAND_MASTER) && d->enabled); } msi_write_config(d, addr, val_in, l); @@ -2935,6 +2939,23 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) return &address_space_memory; } +int pci_iommu_init_iotlb_notifier(PCIDevice *dev, IOMMUNotifier *n, + IOMMUNotify fn, void *opaque) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->init_iotlb_notifier) { + iommu_bus->iommu_ops->init_iotlb_notifier(bus, iommu_bus->iommu_opaque, + devfn, n, fn, opaque); + return 0; + } + + return -ENODEV; +} + bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod, Error **errp) { @@ -2966,6 +2987,170 @@ void pci_device_unset_iommu_device(PCIDevice *dev) } } +int pci_pri_request_page(PCIDevice *dev, uint32_t pasid, bool priv_req, + bool exec_req, hwaddr addr, bool lpig, + uint16_t prgi, bool is_read, bool is_write) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if (!dev->is_master || + ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) { + return -EPERM; + } + + if (!pcie_pri_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->pri_request_page) { + return iommu_bus->iommu_ops->pri_request_page(bus, + iommu_bus->iommu_opaque, + devfn, pasid, priv_req, + exec_req, addr, lpig, prgi, + is_read, is_write); + } + + return -ENODEV; +} + +int pci_pri_register_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUPRINotifier *notifier) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if (!dev->is_master || + ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->pri_register_notifier) { + iommu_bus->iommu_ops->pri_register_notifier(bus, + iommu_bus->iommu_opaque, + devfn, pasid, notifier); + return 0; + } + + return -ENODEV; +} + +void pci_pri_unregister_notifier(PCIDevice *dev, uint32_t pasid) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->pri_unregister_notifier) { + iommu_bus->iommu_ops->pri_unregister_notifier(bus, + iommu_bus->iommu_opaque, + devfn, pasid); + } +} + +ssize_t pci_ats_request_translation(PCIDevice *dev, uint32_t pasid, + bool priv_req, bool exec_req, + hwaddr addr, size_t length, + bool no_write, IOMMUTLBEntry *result, + size_t result_length, + uint32_t *err_count) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if (!dev->is_master || + ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) { + return -EPERM; + } + + if (result_length == 0) { + return -ENOSPC; + } + + if (!pcie_ats_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->ats_request_translation) { + return iommu_bus->iommu_ops->ats_request_translation(bus, + iommu_bus->iommu_opaque, + devfn, pasid, priv_req, + exec_req, addr, length, + no_write, result, + result_length, err_count); + } + + return -ENODEV; +} + +int pci_iommu_register_iotlb_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUNotifier *n) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->register_iotlb_notifier) { + iommu_bus->iommu_ops->register_iotlb_notifier(bus, + iommu_bus->iommu_opaque, devfn, + pasid, n); + return 0; + } + + return -ENODEV; +} + +int pci_iommu_unregister_iotlb_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUNotifier *n) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->unregister_iotlb_notifier) { + iommu_bus->iommu_ops->unregister_iotlb_notifier(bus, + iommu_bus->iommu_opaque, + devfn, pasid, n); + return 0; + } + + return -ENODEV; +} + +int pci_iommu_get_iotlb_info(PCIDevice *dev, uint8_t *addr_width, + uint32_t *min_page_size) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->get_iotlb_info) { + iommu_bus->iommu_ops->get_iotlb_info(iommu_bus->iommu_opaque, + addr_width, min_page_size); + return 0; + } + + return -ENODEV; +} + void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque) { /* @@ -3100,9 +3285,8 @@ void pci_set_enabled(PCIDevice *d, bool state) d->enabled = state; pci_update_mappings(d); - memory_region_set_enabled(&d->bus_master_enable_region, - (pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER) && d->enabled); + pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND) + & PCI_COMMAND_MASTER) && d->enabled); if (qdev_is_realized(&d->qdev)) { pci_device_reset(d); } diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 1b12db6..eaeb688 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -1214,3 +1214,81 @@ void pcie_acs_reset(PCIDevice *dev) pci_set_word(dev->config + dev->exp.acs_cap + PCI_ACS_CTRL, 0); } } + +/* PASID */ +void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width, + bool exec_perm, bool priv_mod) +{ + static const uint16_t control_reg_rw_mask = 0x07; + uint16_t capability_reg; + + assert(pasid_width <= PCI_EXT_CAP_PASID_MAX_WIDTH); + + pcie_add_capability(dev, PCI_EXT_CAP_ID_PASID, PCI_PASID_VER, offset, + PCI_EXT_CAP_PASID_SIZEOF); + + capability_reg = ((uint16_t)pasid_width) << PCI_PASID_CAP_WIDTH_SHIFT; + capability_reg |= exec_perm ? PCI_PASID_CAP_EXEC : 0; + capability_reg |= priv_mod ? PCI_PASID_CAP_PRIV : 0; + pci_set_word(dev->config + offset + PCI_PASID_CAP, capability_reg); + + /* Everything is disabled by default */ + pci_set_word(dev->config + offset + PCI_PASID_CTRL, 0); + + pci_set_word(dev->wmask + offset + PCI_PASID_CTRL, control_reg_rw_mask); + + dev->exp.pasid_cap = offset; +} + +/* PRI */ +void pcie_pri_init(PCIDevice *dev, uint16_t offset, uint32_t outstanding_pr_cap, + bool prg_response_pasid_req) +{ + static const uint16_t control_reg_rw_mask = 0x3; + static const uint16_t status_reg_rw1_mask = 0x3; + static const uint32_t pr_alloc_reg_rw_mask = 0xffffffff; + uint16_t status_reg; + + status_reg = prg_response_pasid_req ? PCI_PRI_STATUS_PASID : 0; + status_reg |= PCI_PRI_STATUS_STOPPED; /* Stopped by default */ + + pcie_add_capability(dev, PCI_EXT_CAP_ID_PRI, PCI_PRI_VER, offset, + PCI_EXT_CAP_PRI_SIZEOF); + /* Disabled by default */ + + pci_set_word(dev->config + offset + PCI_PRI_STATUS, status_reg); + pci_set_long(dev->config + offset + PCI_PRI_MAX_REQ, outstanding_pr_cap); + + pci_set_word(dev->wmask + offset + PCI_PRI_CTRL, control_reg_rw_mask); + pci_set_word(dev->w1cmask + offset + PCI_PRI_STATUS, status_reg_rw1_mask); + pci_set_long(dev->wmask + offset + PCI_PRI_ALLOC_REQ, pr_alloc_reg_rw_mask); + + dev->exp.pri_cap = offset; +} + +bool pcie_pri_enabled(const PCIDevice *dev) +{ + if (!pci_is_express(dev) || !dev->exp.pri_cap) { + return false; + } + return (pci_get_word(dev->config + dev->exp.pri_cap + PCI_PRI_CTRL) & + PCI_PRI_CTRL_ENABLE) != 0; +} + +bool pcie_pasid_enabled(const PCIDevice *dev) +{ + if (!pci_is_express(dev) || !dev->exp.pasid_cap) { + return false; + } + return (pci_get_word(dev->config + dev->exp.pasid_cap + PCI_PASID_CTRL) & + PCI_PASID_CTRL_ENABLE) != 0; +} + +bool pcie_ats_enabled(const PCIDevice *dev) +{ + if (!pci_is_express(dev) || !dev->exp.ats_cap) { + return false; + } + return (pci_get_word(dev->config + dev->exp.ats_cap + PCI_ATS_CTRL) & + PCI_ATS_CTRL_ENABLE) != 0; +} diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c index d5825b6..7c98b1b 100644 --- a/hw/scsi/vmw_pvscsi.c +++ b/hw/scsi/vmw_pvscsi.c @@ -68,18 +68,7 @@ struct PVSCSIClass { OBJECT_DECLARE_TYPE(PVSCSIState, PVSCSIClass, PVSCSI) -/* Compatibility flags for migration */ -#define PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT 0 -#define PVSCSI_COMPAT_OLD_PCI_CONFIGURATION \ - (1 << PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT) -#define PVSCSI_COMPAT_DISABLE_PCIE_BIT 1 -#define PVSCSI_COMPAT_DISABLE_PCIE \ - (1 << PVSCSI_COMPAT_DISABLE_PCIE_BIT) - -#define PVSCSI_USE_OLD_PCI_CONFIGURATION(s) \ - ((s)->compat_flags & PVSCSI_COMPAT_OLD_PCI_CONFIGURATION) -#define PVSCSI_MSI_OFFSET(s) \ - (PVSCSI_USE_OLD_PCI_CONFIGURATION(s) ? 0x50 : 0x7c) +#define PVSCSI_MSI_OFFSET (0x7c) #define PVSCSI_EXP_EP_OFFSET (0x40) typedef struct PVSCSIRingInfo { @@ -129,8 +118,6 @@ struct PVSCSIState { uint8_t msi_used; /* For migration compatibility */ PVSCSIRingInfo rings; /* Data transfer rings manager */ uint32_t resetting; /* Reset in progress */ - - uint32_t compat_flags; }; typedef struct PVSCSIRequest { @@ -1110,7 +1097,7 @@ pvscsi_init_msi(PVSCSIState *s) int res; PCIDevice *d = PCI_DEVICE(s); - res = msi_init(d, PVSCSI_MSI_OFFSET(s), PVSCSI_MSIX_NUM_VECTORS, + res = msi_init(d, PVSCSI_MSI_OFFSET, PVSCSI_MSIX_NUM_VECTORS, PVSCSI_USE_64BIT, PVSCSI_PER_VECTOR_MASK, NULL); if (res < 0) { trace_pvscsi_init_msi_fail(res); @@ -1158,15 +1145,11 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp) trace_pvscsi_state("init"); /* PCI subsystem ID, subsystem vendor ID, revision */ - if (PVSCSI_USE_OLD_PCI_CONFIGURATION(s)) { - pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, 0x1000); - } else { - pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID, - PCI_VENDOR_ID_VMWARE); - pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, - PCI_DEVICE_ID_VMWARE_PVSCSI); - pci_config_set_revision(pci_dev->config, 0x2); - } + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID, + PCI_VENDOR_ID_VMWARE); + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, + PCI_DEVICE_ID_VMWARE_PVSCSI); + pci_config_set_revision(pci_dev->config, 0x2); /* PCI latency timer = 255 */ pci_dev->config[PCI_LATENCY_TIMER] = 0xff; @@ -1234,21 +1217,8 @@ pvscsi_post_load(void *opaque, int version_id) return 0; } -static bool pvscsi_vmstate_need_pcie_device(void *opaque) -{ - PVSCSIState *s = PVSCSI(opaque); - - return !(s->compat_flags & PVSCSI_COMPAT_DISABLE_PCIE); -} - -static bool pvscsi_vmstate_test_pci_device(void *opaque, int version_id) -{ - return !pvscsi_vmstate_need_pcie_device(opaque); -} - static const VMStateDescription vmstate_pvscsi_pcie_device = { .name = "pvscsi/pcie", - .needed = pvscsi_vmstate_need_pcie_device, .fields = (const VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PVSCSIState), VMSTATE_END_OF_LIST() @@ -1262,9 +1232,6 @@ static const VMStateDescription vmstate_pvscsi = { .pre_save = pvscsi_pre_save, .post_load = pvscsi_post_load, .fields = (const VMStateField[]) { - VMSTATE_STRUCT_TEST(parent_obj, PVSCSIState, - pvscsi_vmstate_test_pci_device, 0, - vmstate_pci_device, PCIDevice), VMSTATE_UINT8(msi_used, PVSCSIState), VMSTATE_UINT32(resetting, PVSCSIState), VMSTATE_UINT64(reg_interrupt_status, PVSCSIState), @@ -1298,30 +1265,17 @@ static const VMStateDescription vmstate_pvscsi = { static const Property pvscsi_properties[] = { DEFINE_PROP_UINT8("use_msg", PVSCSIState, use_msg, 1), - DEFINE_PROP_BIT("x-old-pci-configuration", PVSCSIState, compat_flags, - PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT, false), - DEFINE_PROP_BIT("x-disable-pcie", PVSCSIState, compat_flags, - PVSCSI_COMPAT_DISABLE_PCIE_BIT, false), }; -static void pvscsi_realize(DeviceState *qdev, Error **errp) +static void pvscsi_instance_init(Object *obj) { - PVSCSIClass *pvs_c = PVSCSI_GET_CLASS(qdev); - PCIDevice *pci_dev = PCI_DEVICE(qdev); - PVSCSIState *s = PVSCSI(qdev); - - if (!(s->compat_flags & PVSCSI_COMPAT_DISABLE_PCIE)) { - pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; - } - - pvs_c->parent_dc_realize(qdev, errp); + PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS; } static void pvscsi_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - PVSCSIClass *pvs_k = PVSCSI_CLASS(klass); HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass); k->realize = pvscsi_realizefn; @@ -1330,8 +1284,6 @@ static void pvscsi_class_init(ObjectClass *klass, const void *data) k->device_id = PCI_DEVICE_ID_VMWARE_PVSCSI; k->class_id = PCI_CLASS_STORAGE_SCSI; k->subsystem_id = 0x1000; - device_class_set_parent_realize(dc, pvscsi_realize, - &pvs_k->parent_dc_realize); device_class_set_legacy_reset(dc, pvscsi_reset); dc->vmsd = &vmstate_pvscsi; device_class_set_props(dc, pvscsi_properties); @@ -1346,6 +1298,7 @@ static const TypeInfo pvscsi_info = { .class_size = sizeof(PVSCSIClass), .instance_size = sizeof(PVSCSIState), .class_init = pvscsi_class_init, + .instance_init = pvscsi_instance_init, .interfaces = (const InterfaceInfo[]) { { TYPE_HOTPLUG_HANDLER }, { INTERFACE_PCIE_DEVICE }, diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c index d1b7bc5..0fd1337 100644 --- a/hw/timer/hpet.c +++ b/hw/timer/hpet.c @@ -426,30 +426,11 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr, uint64_t cur_tick; trace_hpet_ram_read(addr); + addr &= ~4; - /*address range of all TN regs*/ - if (addr >= 0x100 && addr <= 0x3ff) { - uint8_t timer_id = (addr - 0x100) / 0x20; - HPETTimer *timer = &s->timer[timer_id]; - - if (timer_id > s->num_timers) { - trace_hpet_timer_id_out_of_range(timer_id); - return 0; - } - - switch (addr & 0x18) { - case HPET_TN_CFG: // including interrupt capabilities - return timer->config >> shift; - case HPET_TN_CMP: // comparator register - return timer->cmp >> shift; - case HPET_TN_ROUTE: - return timer->fsb >> shift; - default: - trace_hpet_ram_read_invalid(); - break; - } - } else { - switch (addr & ~4) { + /*address range of all global regs*/ + if (addr <= 0xff) { + switch (addr) { case HPET_ID: // including HPET_PERIOD return s->capability >> shift; case HPET_CFG: @@ -468,6 +449,26 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr, trace_hpet_ram_read_invalid(); break; } + } else { + uint8_t timer_id = (addr - 0x100) / 0x20; + HPETTimer *timer = &s->timer[timer_id]; + + if (timer_id > s->num_timers) { + trace_hpet_timer_id_out_of_range(timer_id); + return 0; + } + + switch (addr & 0x1f) { + case HPET_TN_CFG: // including interrupt capabilities + return timer->config >> shift; + case HPET_TN_CMP: // comparator register + return timer->cmp >> shift; + case HPET_TN_ROUTE: + return timer->fsb >> shift; + default: + trace_hpet_ram_read_invalid(); + break; + } } return 0; } @@ -482,9 +483,67 @@ static void hpet_ram_write(void *opaque, hwaddr addr, uint64_t old_val, new_val, cleared; trace_hpet_ram_write(addr, value); + addr &= ~4; - /*address range of all TN regs*/ - if (addr >= 0x100 && addr <= 0x3ff) { + /*address range of all global regs*/ + if (addr <= 0xff) { + switch (addr) { + case HPET_ID: + return; + case HPET_CFG: + old_val = s->config; + new_val = deposit64(old_val, shift, len, value); + new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK); + s->config = new_val; + if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) { + /* Enable main counter and interrupt generation. */ + s->hpet_offset = + ticks_to_ns(s->hpet_counter) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + for (i = 0; i < s->num_timers; i++) { + if (timer_enabled(&s->timer[i]) && (s->isr & (1 << i))) { + update_irq(&s->timer[i], 1); + } + hpet_set_timer(&s->timer[i]); + } + } else if (deactivating_bit(old_val, new_val, HPET_CFG_ENABLE)) { + /* Halt main counter and disable interrupt generation. */ + s->hpet_counter = hpet_get_ticks(s); + for (i = 0; i < s->num_timers; i++) { + hpet_del_timer(&s->timer[i]); + } + } + /* i8254 and RTC output pins are disabled + * when HPET is in legacy mode */ + if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) { + qemu_set_irq(s->pit_enabled, 0); + qemu_irq_lower(s->irqs[0]); + qemu_irq_lower(s->irqs[RTC_ISA_IRQ]); + } else if (deactivating_bit(old_val, new_val, HPET_CFG_LEGACY)) { + qemu_irq_lower(s->irqs[0]); + qemu_set_irq(s->pit_enabled, 1); + qemu_set_irq(s->irqs[RTC_ISA_IRQ], s->rtc_irq_level); + } + break; + case HPET_STATUS: + new_val = value << shift; + cleared = new_val & s->isr; + for (i = 0; i < s->num_timers; i++) { + if (cleared & (1 << i)) { + update_irq(&s->timer[i], 0); + } + } + break; + case HPET_COUNTER: + if (hpet_enabled(s)) { + trace_hpet_ram_write_counter_write_while_enabled(); + } + s->hpet_counter = deposit64(s->hpet_counter, shift, len, value); + break; + default: + trace_hpet_ram_write_invalid(); + break; + } + } else { uint8_t timer_id = (addr - 0x100) / 0x20; HPETTimer *timer = &s->timer[timer_id]; @@ -550,63 +609,6 @@ static void hpet_ram_write(void *opaque, hwaddr addr, break; } return; - } else { - switch (addr & ~4) { - case HPET_ID: - return; - case HPET_CFG: - old_val = s->config; - new_val = deposit64(old_val, shift, len, value); - new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK); - s->config = new_val; - if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) { - /* Enable main counter and interrupt generation. */ - s->hpet_offset = - ticks_to_ns(s->hpet_counter) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - for (i = 0; i < s->num_timers; i++) { - if (timer_enabled(&s->timer[i]) && (s->isr & (1 << i))) { - update_irq(&s->timer[i], 1); - } - hpet_set_timer(&s->timer[i]); - } - } else if (deactivating_bit(old_val, new_val, HPET_CFG_ENABLE)) { - /* Halt main counter and disable interrupt generation. */ - s->hpet_counter = hpet_get_ticks(s); - for (i = 0; i < s->num_timers; i++) { - hpet_del_timer(&s->timer[i]); - } - } - /* i8254 and RTC output pins are disabled - * when HPET is in legacy mode */ - if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) { - qemu_set_irq(s->pit_enabled, 0); - qemu_irq_lower(s->irqs[0]); - qemu_irq_lower(s->irqs[RTC_ISA_IRQ]); - } else if (deactivating_bit(old_val, new_val, HPET_CFG_LEGACY)) { - qemu_irq_lower(s->irqs[0]); - qemu_set_irq(s->pit_enabled, 1); - qemu_set_irq(s->irqs[RTC_ISA_IRQ], s->rtc_irq_level); - } - break; - case HPET_STATUS: - new_val = value << shift; - cleared = new_val & s->isr; - for (i = 0; i < s->num_timers; i++) { - if (cleared & (1 << i)) { - update_irq(&s->timer[i], 0); - } - } - break; - case HPET_COUNTER: - if (hpet_enabled(s)) { - trace_hpet_ram_write_counter_write_while_enabled(); - } - s->hpet_counter = deposit64(s->hpet_counter, shift, len, value); - break; - default: - trace_hpet_ram_write_invalid(); - break; - } } } diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 1ab2c11..e20da95 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -594,6 +594,36 @@ static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v) v->shadow_vqs = g_steal_pointer(&shadow_vqs); } +static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) +{ + struct vhost_vdpa *v = dev->opaque; + + uint64_t features; + uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | + 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | + 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID | + 0x1ULL << VHOST_BACKEND_F_SUSPEND; + int r; + + if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { + return -EFAULT; + } + + features &= f; + + if (vhost_vdpa_first_dev(dev)) { + r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); + if (r) { + return -EFAULT; + } + } + + dev->backend_cap = features; + v->shared->backend_cap = features; + + return 0; +} + static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) { struct vhost_vdpa *v = opaque; @@ -603,7 +633,12 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) v->dev = dev; dev->opaque = opaque ; - v->shared->listener = vhost_vdpa_memory_listener; + + ret = vhost_vdpa_set_backend_cap(dev); + if (unlikely(ret != 0)) { + return ret; + } + vhost_vdpa_init_svq(dev, v); error_propagate(&dev->migration_blocker, v->migration_blocker); @@ -639,6 +674,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER); + v->shared->listener = vhost_vdpa_memory_listener; return 0; } @@ -841,36 +877,6 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); } -static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) -{ - struct vhost_vdpa *v = dev->opaque; - - uint64_t features; - uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | - 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | - 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID | - 0x1ULL << VHOST_BACKEND_F_SUSPEND; - int r; - - if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { - return -EFAULT; - } - - features &= f; - - if (vhost_vdpa_first_dev(dev)) { - r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); - if (r) { - return -EFAULT; - } - } - - dev->backend_cap = features; - v->shared->backend_cap = features; - - return 0; -} - static int vhost_vdpa_get_device_id(struct vhost_dev *dev, uint32_t *device_id) { @@ -888,8 +894,14 @@ static int vhost_vdpa_reset_device(struct vhost_dev *dev) ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); trace_vhost_vdpa_reset_device(dev); + if (ret) { + return ret; + } + + memory_listener_unregister(&v->shared->listener); + v->shared->listener_registered = false; v->suspended = false; - return ret; + return 0; } static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) @@ -1373,7 +1385,15 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) "IOMMU and try again"); return -1; } - memory_listener_register(&v->shared->listener, dev->vdev->dma_as); + if (v->shared->listener_registered && + dev->vdev->dma_as != v->shared->listener.address_space) { + memory_listener_unregister(&v->shared->listener); + v->shared->listener_registered = false; + } + if (!v->shared->listener_registered) { + memory_listener_register(&v->shared->listener, dev->vdev->dma_as); + v->shared->listener_registered = true; + } return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); } @@ -1383,8 +1403,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) static void vhost_vdpa_reset_status(struct vhost_dev *dev) { - struct vhost_vdpa *v = dev->opaque; - if (!vhost_vdpa_last_dev(dev)) { return; } @@ -1392,7 +1410,6 @@ static void vhost_vdpa_reset_status(struct vhost_dev *dev) vhost_vdpa_reset_device(dev); vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER); - memory_listener_unregister(&v->shared->listener); } static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, @@ -1526,12 +1543,27 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev, static int vhost_vdpa_set_owner(struct vhost_dev *dev) { + int r; + struct vhost_vdpa *v; + if (!vhost_vdpa_first_dev(dev)) { return 0; } trace_vhost_vdpa_set_owner(dev); - return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); + r = vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); + if (unlikely(r < 0)) { + return r; + } + + /* + * Being optimistic and listening address space memory. If the device + * uses vIOMMU, it is changed at vhost_vdpa_dev_start. + */ + v = dev->opaque; + memory_listener_register(&v->shared->listener, &address_space_memory); + v->shared->listener_registered = true; + return 0; } static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, @@ -1563,7 +1595,6 @@ const VhostOps vdpa_ops = { .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, .vhost_set_vring_call = vhost_vdpa_set_vring_call, .vhost_get_features = vhost_vdpa_get_features, - .vhost_set_backend_cap = vhost_vdpa_set_backend_cap, .vhost_set_owner = vhost_vdpa_set_owner, .vhost_set_vring_endian = NULL, .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 9b48aa8..fba2372 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -146,9 +146,7 @@ static const VMStateDescription vmstate_virtio_pci = { static bool virtio_pci_has_extra_state(DeviceState *d) { - VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - - return proxy->flags & VIRTIO_PCI_FLAG_MIGRATE_EXTRA; + return true; } static void virtio_pci_save_extra_state(DeviceState *d, QEMUFile *f) @@ -1215,7 +1213,12 @@ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, static bool virtio_pci_query_guest_notifiers(DeviceState *d) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - return msix_enabled(&proxy->pci_dev); + + if (msix_enabled(&proxy->pci_dev)) { + return true; + } else { + return pci_irq_disabled(&proxy->pci_dev); + } } static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) @@ -2363,12 +2366,8 @@ static void virtio_pci_bus_reset_hold(Object *obj, ResetType type) static const Property virtio_pci_properties[] = { DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false), - DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags, - VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true), DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false), - DEFINE_PROP_BIT("x-disable-pcie", VirtIOPCIProxy, flags, - VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false), DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false), DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy, @@ -2397,8 +2396,7 @@ static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp) VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev); PCIDevice *pci_dev = &proxy->pci_dev; - if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) && - virtio_pci_modern(proxy)) { + if (virtio_pci_modern(proxy)) { pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; } diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 2e98cec..5534251 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -205,6 +205,15 @@ static const char *virtio_id_to_name(uint16_t device_id) return name; } +static void virtio_check_indirect_feature(VirtIODevice *vdev) +{ + if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Device %s: indirect_desc was not negotiated!\n", + vdev->name); + } +} + /* Called within call_rcu(). */ static void virtio_free_region_cache(VRingMemoryRegionCaches *caches) { @@ -1733,6 +1742,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) virtio_error(vdev, "Invalid size for indirect buffer table"); goto done; } + virtio_check_indirect_feature(vdev); /* loop over the indirect descriptor table */ len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as, @@ -1870,6 +1880,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) virtio_error(vdev, "Invalid size for indirect buffer table"); goto done; } + virtio_check_indirect_feature(vdev); /* loop over the indirect descriptor table */ len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as, diff --git a/include/hw/boards.h b/include/hw/boards.h index a7b1fcf..f424b2b 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -286,8 +286,7 @@ struct MachineClass { no_parallel:1, no_floppy:1, no_cdrom:1, - pci_allow_0_address:1, - legacy_fw_cfg_order:1; + pci_allow_0_address:1; bool auto_create_sdcard; bool is_default; const char *default_machine_opts; @@ -863,10 +862,4 @@ extern const size_t hw_compat_2_7_len; extern GlobalProperty hw_compat_2_6[]; extern const size_t hw_compat_2_6_len; -extern GlobalProperty hw_compat_2_5[]; -extern const size_t hw_compat_2_5_len; - -extern GlobalProperty hw_compat_2_4[]; -extern const size_t hw_compat_2_4_len; - #endif diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 9563674..79b72c5 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -107,7 +107,6 @@ struct PCMachineClass { /* RAM / address space compat: */ bool gigabyte_align; bool has_reserved_memory; - bool broken_reserved_end; bool enforce_amd_1tb_hole; bool isa_bios_alias; @@ -299,12 +298,6 @@ extern const size_t pc_compat_2_7_len; extern GlobalProperty pc_compat_2_6[]; extern const size_t pc_compat_2_6_len; -extern GlobalProperty pc_compat_2_5[]; -extern const size_t pc_compat_2_5_len; - -extern GlobalProperty pc_compat_2_4[]; -extern const size_t pc_compat_2_4_len; - #define DEFINE_PC_MACHINE(suffix, namestr, initfn, optsfn) \ static void pc_machine_##suffix##_class_init(ObjectClass *oc, \ const void *data) \ diff --git a/include/hw/i386/tdvf.h b/include/hw/i386/tdvf.h new file mode 100644 index 0000000..e75c8d1 --- /dev/null +++ b/include/hw/i386/tdvf.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2025 Intel Corporation + * Author: Isaku Yamahata <isaku.yamahata at gmail.com> + * <isaku.yamahata at intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_I386_TDVF_H +#define HW_I386_TDVF_H + +#include "qemu/osdep.h" + +#define TDVF_SECTION_TYPE_BFV 0 +#define TDVF_SECTION_TYPE_CFV 1 +#define TDVF_SECTION_TYPE_TD_HOB 2 +#define TDVF_SECTION_TYPE_TEMP_MEM 3 + +#define TDVF_SECTION_ATTRIBUTES_MR_EXTEND (1U << 0) +#define TDVF_SECTION_ATTRIBUTES_PAGE_AUG (1U << 1) + +typedef struct TdxFirmwareEntry { + uint32_t data_offset; + uint32_t data_len; + uint64_t address; + uint64_t size; + uint32_t type; + uint32_t attributes; + + void *mem_ptr; +} TdxFirmwareEntry; + +typedef struct TdxFirmware { + void *mem_ptr; + + uint32_t nr_entries; + TdxFirmwareEntry *entries; +} TdxFirmware; + +#define for_each_tdx_fw_entry(fw, e) \ + for (e = (fw)->entries; e != (fw)->entries + (fw)->nr_entries; e++) + +int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size); + +#endif /* HW_I386_TDVF_H */ diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index 258b134..fc460b8 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -27,13 +27,8 @@ #include "qom/object.h" struct X86MachineClass { - /*< private >*/ MachineClass parent; - /*< public >*/ - - /* TSC rate migration: */ - bool save_tsc_khz; /* use DMA capable linuxboot option rom */ bool fwcfg_dma_enabled; /* CPU and apic information: */ diff --git a/include/hw/loader.h b/include/hw/loader.h index d280dc3..c96b5e1 100644 --- a/include/hw/loader.h +++ b/include/hw/loader.h @@ -270,8 +270,6 @@ int rom_add_elf_program(const char *name, GMappedFile *mapped_file, void *data, AddressSpace *as); int rom_check_and_register_reset(void); void rom_set_fw(FWCfgState *f); -void rom_set_order_override(int order); -void rom_reset_order_override(void); /** * rom_transaction_begin: diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h index 47578cc..d41b932 100644 --- a/include/hw/nvram/fw_cfg.h +++ b/include/hw/nvram/fw_cfg.h @@ -42,14 +42,6 @@ struct FWCfgDataGeneratorClass { typedef struct fw_cfg_file FWCfgFile; -#define FW_CFG_ORDER_OVERRIDE_VGA 70 -#define FW_CFG_ORDER_OVERRIDE_NIC 80 -#define FW_CFG_ORDER_OVERRIDE_USER 100 -#define FW_CFG_ORDER_OVERRIDE_DEVICE 110 - -void fw_cfg_set_order_override(FWCfgState *fw_cfg, int order); -void fw_cfg_reset_order_override(FWCfgState *fw_cfg); - typedef struct FWCfgFiles { uint32_t count; FWCfgFile f[]; @@ -75,8 +67,6 @@ struct FWCfgState { uint32_t cur_offset; Notifier machine_ready; - int fw_cfg_order_override; - bool dma_enabled; dma_addr_t dma_addr; AddressSpace *dma_as; diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index c2fe6ca..35d59d7 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -375,6 +375,28 @@ void pci_bus_get_w64_range(PCIBus *bus, Range *range); void pci_device_deassert_intx(PCIDevice *dev); +/* Page Request Interface */ +typedef enum { + IOMMU_PRI_RESP_SUCCESS, + IOMMU_PRI_RESP_INVALID_REQUEST, + IOMMU_PRI_RESP_FAILURE, +} IOMMUPRIResponseCode; + +typedef struct IOMMUPRIResponse { + IOMMUPRIResponseCode response_code; + uint16_t prgi; +} IOMMUPRIResponse; + +struct IOMMUPRINotifier; + +typedef void (*IOMMUPRINotify)(struct IOMMUPRINotifier *notifier, + IOMMUPRIResponse *response); + +typedef struct IOMMUPRINotifier { + IOMMUPRINotify notify; +} IOMMUPRINotifier; + +#define PCI_PRI_PRGI_MASK 0x1ffU /** * struct PCIIOMMUOps: callbacks structure for specific IOMMU handlers @@ -429,6 +451,179 @@ typedef struct PCIIOMMUOps { * @devfn: device and function number of the PCI device. */ void (*unset_iommu_device)(PCIBus *bus, void *opaque, int devfn); + /** + * @get_iotlb_info: get properties required to initialize a device IOTLB. + * + * Callback required if devices are allowed to cache translations. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @addr_width: the address width of the IOMMU (output parameter). + * + * @min_page_size: the page size of the IOMMU (output parameter). + */ + void (*get_iotlb_info)(void *opaque, uint8_t *addr_width, + uint32_t *min_page_size); + /** + * @init_iotlb_notifier: initialize an IOMMU notifier. + * + * Optional callback. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @n: the notifier to be initialized. + * + * @fn: the callback to be installed. + * + * @user_opaque: a user pointer that can be used to track a state. + */ + void (*init_iotlb_notifier)(PCIBus *bus, void *opaque, int devfn, + IOMMUNotifier *n, IOMMUNotify fn, + void *user_opaque); + /** + * @register_iotlb_notifier: setup an IOTLB invalidation notifier. + * + * Callback required if devices are allowed to cache translations. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @pasid: the pasid of the address space to watch. + * + * @n: the notifier to register. + */ + void (*register_iotlb_notifier)(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid, IOMMUNotifier *n); + /** + * @unregister_iotlb_notifier: remove an IOTLB invalidation notifier. + * + * Callback required if devices are allowed to cache translations. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @pasid: the pasid of the address space to stop watching. + * + * @n: the notifier to unregister. + */ + void (*unregister_iotlb_notifier)(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid, IOMMUNotifier *n); + /** + * @ats_request_translation: issue an ATS request. + * + * Callback required if devices are allowed to use the address + * translation service. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @pasid: the pasid of the address space to use for the request. + * + * @priv_req: privileged mode bit (PASID TLP). + * + * @exec_req: execute request bit (PASID TLP). + * + * @addr: start address of the memory range to be translated. + * + * @length: length of the memory range in bytes. + * + * @no_write: request a read-only translation (if supported). + * + * @result: buffer in which the TLB entries will be stored. + * + * @result_length: result buffer length. + * + * @err_count: number of untranslated subregions. + * + * Returns: the number of translations stored in the result buffer, or + * -ENOMEM if the buffer is not large enough. + */ + ssize_t (*ats_request_translation)(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid, bool priv_req, + bool exec_req, hwaddr addr, + size_t length, bool no_write, + IOMMUTLBEntry *result, + size_t result_length, + uint32_t *err_count); + /** + * @pri_register_notifier: setup the PRI completion callback. + * + * Callback required if devices are allowed to use the page request + * interface. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @pasid: the pasid of the address space to track. + * + * @notifier: the notifier to register. + */ + void (*pri_register_notifier)(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid, IOMMUPRINotifier *notifier); + /** + * @pri_unregister_notifier: remove the PRI completion callback. + * + * Callback required if devices are allowed to use the page request + * interface. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @pasid: the pasid of the address space to stop tracking. + */ + void (*pri_unregister_notifier)(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid); + /** + * @pri_request_page: issue a PRI request. + * + * Callback required if devices are allowed to use the page request + * interface. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @pasid: the pasid of the address space to use for the request. + * + * @priv_req: privileged mode bit (PASID TLP). + * + * @exec_req: execute request bit (PASID TLP). + * + * @addr: untranslated address of the requested page. + * + * @lpig: last page in group. + * + * @prgi: page request group index. + * + * @is_read: request read access. + * + * @is_write: request write access. + */ + int (*pri_request_page)(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid, bool priv_req, bool exec_req, + hwaddr addr, bool lpig, uint16_t prgi, bool is_read, + bool is_write); } PCIIOMMUOps; AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); @@ -437,6 +632,126 @@ bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod, void pci_device_unset_iommu_device(PCIDevice *dev); /** + * pci_iommu_get_iotlb_info: get properties required to initialize a + * device IOTLB. + * + * Returns 0 on success, or a negative errno otherwise. + * + * @dev: the device that wants to get the information. + * @addr_width: the address width of the IOMMU (output parameter). + * @min_page_size: the page size of the IOMMU (output parameter). + */ +int pci_iommu_get_iotlb_info(PCIDevice *dev, uint8_t *addr_width, + uint32_t *min_page_size); + +/** + * pci_iommu_init_iotlb_notifier: initialize an IOMMU notifier. + * + * This function is used by devices before registering an IOTLB notifier. + * + * @dev: the device. + * @n: the notifier to be initialized. + * @fn: the callback to be installed. + * @opaque: a user pointer that can be used to track a state. + */ +int pci_iommu_init_iotlb_notifier(PCIDevice *dev, IOMMUNotifier *n, + IOMMUNotify fn, void *opaque); + +/** + * pci_ats_request_translation: perform an ATS request. + * + * Returns the number of translations stored in @result in case of success, + * a negative error code otherwise. + * -ENOMEM is returned when the result buffer is not large enough to store + * all the translations. + * + * @dev: the ATS-capable PCI device. + * @pasid: the pasid of the address space in which the translation will be done. + * @priv_req: privileged mode bit (PASID TLP). + * @exec_req: execute request bit (PASID TLP). + * @addr: start address of the memory range to be translated. + * @length: length of the memory range in bytes. + * @no_write: request a read-only translation (if supported). + * @result: buffer in which the TLB entries will be stored. + * @result_length: result buffer length. + * @err_count: number of untranslated subregions. + */ +ssize_t pci_ats_request_translation(PCIDevice *dev, uint32_t pasid, + bool priv_req, bool exec_req, + hwaddr addr, size_t length, + bool no_write, IOMMUTLBEntry *result, + size_t result_length, + uint32_t *err_count); + +/** + * pci_pri_request_page: perform a PRI request. + * + * Returns 0 if the PRI request has been sent to the guest OS, + * an error code otherwise. + * + * @dev: the PRI-capable PCI device. + * @pasid: the pasid of the address space in which the translation will be done. + * @priv_req: privileged mode bit (PASID TLP). + * @exec_req: execute request bit (PASID TLP). + * @addr: untranslated address of the requested page. + * @lpig: last page in group. + * @prgi: page request group index. + * @is_read: request read access. + * @is_write: request write access. + */ +int pci_pri_request_page(PCIDevice *dev, uint32_t pasid, bool priv_req, + bool exec_req, hwaddr addr, bool lpig, + uint16_t prgi, bool is_read, bool is_write); + +/** + * pci_pri_register_notifier: register the PRI callback for a given address + * space. + * + * Returns 0 on success, an error code otherwise. + * + * @dev: the PRI-capable PCI device. + * @pasid: the pasid of the address space to track. + * @notifier: the notifier to register. + */ +int pci_pri_register_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUPRINotifier *notifier); + +/** + * pci_pri_unregister_notifier: remove the PRI callback from a given address + * space. + * + * @dev: the PRI-capable PCI device. + * @pasid: the pasid of the address space to stop tracking. + */ +void pci_pri_unregister_notifier(PCIDevice *dev, uint32_t pasid); + +/** + * pci_iommu_register_iotlb_notifier: register a notifier for changes to + * IOMMU translation entries in a specific address space. + * + * Returns 0 on success, or a negative errno otherwise. + * + * @dev: the device that wants to get notified. + * @pasid: the pasid of the address space to track. + * @n: the notifier to register. + */ +int pci_iommu_register_iotlb_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUNotifier *n); + +/** + * pci_iommu_unregister_iotlb_notifier: unregister a notifier that has been + * registerd with pci_iommu_register_iotlb_notifier. + * + * Returns 0 on success, or a negative errno otherwise. + * + * @dev: the device that wants to stop notifications. + * @pasid: the pasid of the address space to stop tracking. + * @n: the notifier to unregister. + */ +int pci_iommu_unregister_iotlb_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUNotifier *n); + +/** * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus * * Let PCI host bridges define specific operations. @@ -668,6 +983,7 @@ void lsi53c8xx_handle_legacy_cmdline(DeviceState *lsi_dev); qemu_irq pci_allocate_irq(PCIDevice *pci_dev); void pci_set_irq(PCIDevice *pci_dev, int level); +int pci_irq_disabled(PCIDevice *d); static inline void pci_irq_assert(PCIDevice *pci_dev) { diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h index e41d95b..eee0338 100644 --- a/include/hw/pci/pci_device.h +++ b/include/hw/pci/pci_device.h @@ -90,6 +90,7 @@ struct PCIDevice { char name[64]; PCIIORegion io_regions[PCI_NUM_REGIONS]; AddressSpace bus_master_as; + bool is_master; MemoryRegion bus_master_container_region; MemoryRegion bus_master_enable_region; diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h index 70a5de0..ff6ce08 100644 --- a/include/hw/pci/pcie.h +++ b/include/hw/pci/pcie.h @@ -70,8 +70,10 @@ struct PCIExpressDevice { uint16_t aer_cap; PCIEAERLog aer_log; - /* Offset of ATS capability in config space */ + /* Offset of ATS, PRI and PASID capabilities in config space */ uint16_t ats_cap; + uint16_t pasid_cap; + uint16_t pri_cap; /* ACS */ uint16_t acs_cap; @@ -150,4 +152,13 @@ void pcie_cap_slot_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); + +void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width, + bool exec_perm, bool priv_mod); +void pcie_pri_init(PCIDevice *dev, uint16_t offset, uint32_t outstanding_pr_cap, + bool prg_response_pasid_req); + +bool pcie_pri_enabled(const PCIDevice *dev); +bool pcie_pasid_enabled(const PCIDevice *dev); +bool pcie_ats_enabled(const PCIDevice *dev); #endif /* QEMU_PCIE_H */ diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h index 9d3b686..33a2222 100644 --- a/include/hw/pci/pcie_regs.h +++ b/include/hw/pci/pcie_regs.h @@ -86,6 +86,14 @@ typedef enum PCIExpLinkWidth { #define PCI_ARI_VER 1 #define PCI_ARI_SIZEOF 8 +/* PASID */ +#define PCI_PASID_VER 1 +#define PCI_EXT_CAP_PASID_MAX_WIDTH 20 +#define PCI_PASID_CAP_WIDTH_SHIFT 8 + +/* PRI */ +#define PCI_PRI_VER 1 + /* AER */ #define PCI_ERR_VER 2 #define PCI_ERR_SIZEOF 0x48 diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 0a9575b..449bf5c 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -43,7 +43,21 @@ typedef struct vhost_vdpa_shared { struct vhost_vdpa_iova_range iova_range; QLIST_HEAD(, vdpa_iommu) iommu_list; - /* IOVA mapping used by the Shadow Virtqueue */ + /* + * IOVA mapping used by the Shadow Virtqueue + * + * It is shared among all ASID for simplicity, whether CVQ shares ASID with + * guest or not: + * - Memory listener need access to guest's memory addresses allocated in + * the IOVA tree. + * - There should be plenty of IOVA address space for both ASID not to + * worry about collisions between them. Guest's translations are still + * validated with virtio virtqueue_pop so there is no risk for the guest + * to access memory that it shouldn't. + * + * To allocate a iova tree per ASID is doable but it complicates the code + * and it is not worth it for the moment. + */ VhostIOVATree *iova_tree; /* Copy of backend features */ @@ -51,6 +65,12 @@ typedef struct vhost_vdpa_shared { bool iotlb_batch_begin_sent; + /* + * The memory listener has been registered, so DMA maps have been sent to + * the device. + */ + bool listener_registered; + /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */ bool shadow_data; diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h index 1dbc385..eab5394 100644 --- a/include/hw/virtio/virtio-pci.h +++ b/include/hw/virtio/virtio-pci.h @@ -32,9 +32,7 @@ DECLARE_OBJ_CHECKERS(VirtioPCIBusState, VirtioPCIBusClass, enum { VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, - VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, - VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, VIRTIO_PCI_FLAG_ATS_BIT, VIRTIO_PCI_FLAG_INIT_DEVERR_BIT, @@ -54,12 +52,6 @@ enum { * vcpu thread using ioeventfd for some devices. */ #define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT) -/* virtio version flags */ -#define VIRTIO_PCI_FLAG_DISABLE_PCIE (1 << VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT) - -/* migrate extra state */ -#define VIRTIO_PCI_FLAG_MIGRATE_EXTRA (1 << VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT) - /* have pio notification for modern device ? */ #define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY \ (1 << VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT) diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h index ab15577..a88cf8b 100644 --- a/include/io/channel-socket.h +++ b/include/io/channel-socket.h @@ -261,5 +261,18 @@ QIOChannelSocket * qio_channel_socket_accept(QIOChannelSocket *ioc, Error **errp); +/** + * qio_channel_socket_set_send_buffer: + * @ioc: the socket channel object + * @size: buffer size + * @errp: pointer to a NULL-initialized error object + * + * Set the underlying socket send buffer size. + * + * Retruns: 0 on success, or -1 on error. + */ +int qio_channel_socket_set_send_buffer(QIOChannelSocket *ioc, + size_t size, + Error **errp); #endif /* QIO_CHANNEL_SOCKET_H */ diff --git a/include/standard-headers/uefi/uefi.h b/include/standard-headers/uefi/uefi.h new file mode 100644 index 0000000..5256349 --- /dev/null +++ b/include/standard-headers/uefi/uefi.h @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2025 Intel Corporation + * + * Author: Isaku Yamahata <isaku.yamahata at gmail.com> + * <isaku.yamahata at intel.com> + * Xiaoyao Li <xiaoyao.li@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_I386_UEFI_H +#define HW_I386_UEFI_H + +/***************************************************************************/ +/* + * basic EFI definitions + * supplemented with UEFI Specification Version 2.8 (Errata A) + * released February 2020 + */ +/* UEFI integer is little endian */ + +typedef struct { + uint32_t Data1; + uint16_t Data2; + uint16_t Data3; + uint8_t Data4[8]; +} EFI_GUID; + +typedef enum { + EfiReservedMemoryType, + EfiLoaderCode, + EfiLoaderData, + EfiBootServicesCode, + EfiBootServicesData, + EfiRuntimeServicesCode, + EfiRuntimeServicesData, + EfiConventionalMemory, + EfiUnusableMemory, + EfiACPIReclaimMemory, + EfiACPIMemoryNVS, + EfiMemoryMappedIO, + EfiMemoryMappedIOPortSpace, + EfiPalCode, + EfiPersistentMemory, + EfiUnacceptedMemoryType, + EfiMaxMemoryType +} EFI_MEMORY_TYPE; + +#define EFI_HOB_HANDOFF_TABLE_VERSION 0x0009 + +#define EFI_HOB_TYPE_HANDOFF 0x0001 +#define EFI_HOB_TYPE_MEMORY_ALLOCATION 0x0002 +#define EFI_HOB_TYPE_RESOURCE_DESCRIPTOR 0x0003 +#define EFI_HOB_TYPE_GUID_EXTENSION 0x0004 +#define EFI_HOB_TYPE_FV 0x0005 +#define EFI_HOB_TYPE_CPU 0x0006 +#define EFI_HOB_TYPE_MEMORY_POOL 0x0007 +#define EFI_HOB_TYPE_FV2 0x0009 +#define EFI_HOB_TYPE_LOAD_PEIM_UNUSED 0x000A +#define EFI_HOB_TYPE_UEFI_CAPSULE 0x000B +#define EFI_HOB_TYPE_FV3 0x000C +#define EFI_HOB_TYPE_UNUSED 0xFFFE +#define EFI_HOB_TYPE_END_OF_HOB_LIST 0xFFFF + +typedef struct { + uint16_t HobType; + uint16_t HobLength; + uint32_t Reserved; +} EFI_HOB_GENERIC_HEADER; + +typedef uint64_t EFI_PHYSICAL_ADDRESS; +typedef uint32_t EFI_BOOT_MODE; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + uint32_t Version; + EFI_BOOT_MODE BootMode; + EFI_PHYSICAL_ADDRESS EfiMemoryTop; + EFI_PHYSICAL_ADDRESS EfiMemoryBottom; + EFI_PHYSICAL_ADDRESS EfiFreeMemoryTop; + EFI_PHYSICAL_ADDRESS EfiFreeMemoryBottom; + EFI_PHYSICAL_ADDRESS EfiEndOfHobList; +} EFI_HOB_HANDOFF_INFO_TABLE; + +#define EFI_RESOURCE_SYSTEM_MEMORY 0x00000000 +#define EFI_RESOURCE_MEMORY_MAPPED_IO 0x00000001 +#define EFI_RESOURCE_IO 0x00000002 +#define EFI_RESOURCE_FIRMWARE_DEVICE 0x00000003 +#define EFI_RESOURCE_MEMORY_MAPPED_IO_PORT 0x00000004 +#define EFI_RESOURCE_MEMORY_RESERVED 0x00000005 +#define EFI_RESOURCE_IO_RESERVED 0x00000006 +#define EFI_RESOURCE_MEMORY_UNACCEPTED 0x00000007 +#define EFI_RESOURCE_MAX_MEMORY_TYPE 0x00000008 + +#define EFI_RESOURCE_ATTRIBUTE_PRESENT 0x00000001 +#define EFI_RESOURCE_ATTRIBUTE_INITIALIZED 0x00000002 +#define EFI_RESOURCE_ATTRIBUTE_TESTED 0x00000004 +#define EFI_RESOURCE_ATTRIBUTE_SINGLE_BIT_ECC 0x00000008 +#define EFI_RESOURCE_ATTRIBUTE_MULTIPLE_BIT_ECC 0x00000010 +#define EFI_RESOURCE_ATTRIBUTE_ECC_RESERVED_1 0x00000020 +#define EFI_RESOURCE_ATTRIBUTE_ECC_RESERVED_2 0x00000040 +#define EFI_RESOURCE_ATTRIBUTE_READ_PROTECTED 0x00000080 +#define EFI_RESOURCE_ATTRIBUTE_WRITE_PROTECTED 0x00000100 +#define EFI_RESOURCE_ATTRIBUTE_EXECUTION_PROTECTED 0x00000200 +#define EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE 0x00000400 +#define EFI_RESOURCE_ATTRIBUTE_WRITE_COMBINEABLE 0x00000800 +#define EFI_RESOURCE_ATTRIBUTE_WRITE_THROUGH_CACHEABLE 0x00001000 +#define EFI_RESOURCE_ATTRIBUTE_WRITE_BACK_CACHEABLE 0x00002000 +#define EFI_RESOURCE_ATTRIBUTE_16_BIT_IO 0x00004000 +#define EFI_RESOURCE_ATTRIBUTE_32_BIT_IO 0x00008000 +#define EFI_RESOURCE_ATTRIBUTE_64_BIT_IO 0x00010000 +#define EFI_RESOURCE_ATTRIBUTE_UNCACHED_EXPORTED 0x00020000 +#define EFI_RESOURCE_ATTRIBUTE_READ_ONLY_PROTECTED 0x00040000 +#define EFI_RESOURCE_ATTRIBUTE_READ_ONLY_PROTECTABLE 0x00080000 +#define EFI_RESOURCE_ATTRIBUTE_READ_PROTECTABLE 0x00100000 +#define EFI_RESOURCE_ATTRIBUTE_WRITE_PROTECTABLE 0x00200000 +#define EFI_RESOURCE_ATTRIBUTE_EXECUTION_PROTECTABLE 0x00400000 +#define EFI_RESOURCE_ATTRIBUTE_PERSISTENT 0x00800000 +#define EFI_RESOURCE_ATTRIBUTE_PERSISTABLE 0x01000000 +#define EFI_RESOURCE_ATTRIBUTE_MORE_RELIABLE 0x02000000 + +typedef uint32_t EFI_RESOURCE_TYPE; +typedef uint32_t EFI_RESOURCE_ATTRIBUTE_TYPE; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + EFI_GUID Owner; + EFI_RESOURCE_TYPE ResourceType; + EFI_RESOURCE_ATTRIBUTE_TYPE ResourceAttribute; + EFI_PHYSICAL_ADDRESS PhysicalStart; + uint64_t ResourceLength; +} EFI_HOB_RESOURCE_DESCRIPTOR; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + EFI_GUID Name; + + /* guid specific data follows */ +} EFI_HOB_GUID_TYPE; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + EFI_PHYSICAL_ADDRESS BaseAddress; + uint64_t Length; +} EFI_HOB_FIRMWARE_VOLUME; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + EFI_PHYSICAL_ADDRESS BaseAddress; + uint64_t Length; + EFI_GUID FvName; + EFI_GUID FileName; +} EFI_HOB_FIRMWARE_VOLUME2; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + EFI_PHYSICAL_ADDRESS BaseAddress; + uint64_t Length; + uint32_t AuthenticationStatus; + bool ExtractedFv; + EFI_GUID FvName; + EFI_GUID FileName; +} EFI_HOB_FIRMWARE_VOLUME3; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + uint8_t SizeOfMemorySpace; + uint8_t SizeOfIoSpace; + uint8_t Reserved[6]; +} EFI_HOB_CPU; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; +} EFI_HOB_MEMORY_POOL; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + + EFI_PHYSICAL_ADDRESS BaseAddress; + uint64_t Length; +} EFI_HOB_UEFI_CAPSULE; + +#define EFI_HOB_OWNER_ZERO \ + ((EFI_GUID){ 0x00000000, 0x0000, 0x0000, \ + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }) + +#endif diff --git a/include/system/kvm.h b/include/system/kvm.h index b690dda..62ec131 100644 --- a/include/system/kvm.h +++ b/include/system/kvm.h @@ -376,6 +376,7 @@ int kvm_arch_get_default_type(MachineState *ms); int kvm_arch_init(MachineState *ms, KVMState *s); +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp); int kvm_arch_init_vcpu(CPUState *cpu); int kvm_arch_destroy_vcpu(CPUState *cpu); diff --git a/include/system/memory.h b/include/system/memory.h index fbbf4cf..fc35a0d 100644 --- a/include/system/memory.h +++ b/include/system/memory.h @@ -183,6 +183,7 @@ struct IOMMUNotifier { hwaddr start; hwaddr end; int iommu_idx; + void *opaque; QLIST_ENTRY(IOMMUNotifier) node; }; typedef struct IOMMUNotifier IOMMUNotifier; diff --git a/io/channel-socket.c b/io/channel-socket.c index 088b49f..3b7ca92 100644 --- a/io/channel-socket.c +++ b/io/channel-socket.c @@ -78,6 +78,17 @@ qio_channel_socket_new(void) return sioc; } +int qio_channel_socket_set_send_buffer(QIOChannelSocket *ioc, + size_t size, + Error **errp) +{ + if (setsockopt(ioc->fd, SOL_SOCKET, SO_SNDBUF, &size, sizeof(size)) < 0) { + error_setg_errno(errp, errno, "Unable to set socket send buffer size"); + return -1; + } + + return 0; +} static int qio_channel_socket_set_fd(QIOChannelSocket *sioc, diff --git a/meson.build b/meson.build index fdad3fb..ef99467 100644 --- a/meson.build +++ b/meson.build @@ -3272,6 +3272,7 @@ config_devices_mak_list = [] config_devices_h = {} config_target_h = {} config_target_mak = {} +config_base_arch_mak = {} disassemblers = { 'alpha' : ['CONFIG_ALPHA_DIS'], @@ -3463,6 +3464,11 @@ foreach target : target_dirs config_all_devices += config_devices endif config_target_mak += {target: config_target} + + # build a merged config for all targets with the same TARGET_BASE_ARCH + target_base_arch = config_target['TARGET_BASE_ARCH'] + config_base_arch = config_base_arch_mak.get(target_base_arch, {}) + config_target + config_base_arch_mak += {target_base_arch: config_base_arch} endforeach target_dirs = actual_target_dirs @@ -3718,14 +3724,12 @@ io_ss = ss.source_set() qmp_ss = ss.source_set() qom_ss = ss.source_set() system_ss = ss.source_set() -libsystem_ss = ss.source_set() specific_fuzz_ss = ss.source_set() specific_ss = ss.source_set() rust_devices_ss = ss.source_set() stub_ss = ss.source_set() trace_ss = ss.source_set() user_ss = ss.source_set() -libuser_ss = ss.source_set() util_ss = ss.source_set() # accel modules @@ -4102,30 +4106,20 @@ common_ss.add(hwcore) system_ss.add(authz, blockdev, chardev, crypto, io, qmp) common_ss.add(qom, qemuutil) -common_ss.add_all(when: 'CONFIG_SYSTEM_ONLY', if_true: [system_ss]) -common_ss.add_all(when: 'CONFIG_USER_ONLY', if_true: user_ss) - -libuser_ss = libuser_ss.apply({}) libuser = static_library('user', - libuser_ss.sources() + genh, + user_ss.all_sources() + genh, c_args: ['-DCONFIG_USER_ONLY', '-DCOMPILING_SYSTEM_VS_USER'], - dependencies: libuser_ss.dependencies(), + include_directories: common_user_inc, + dependencies: user_ss.all_dependencies(), build_by_default: false) -libuser = declare_dependency(objects: libuser.extract_all_objects(recursive: false), - dependencies: libuser_ss.dependencies()) -common_ss.add(when: 'CONFIG_USER_ONLY', if_true: libuser) -libsystem_ss = libsystem_ss.apply({}) libsystem = static_library('system', - libsystem_ss.sources() + genh, + system_ss.all_sources() + genh, c_args: ['-DCONFIG_SOFTMMU', '-DCOMPILING_SYSTEM_VS_USER'], - dependencies: libsystem_ss.dependencies(), + dependencies: system_ss.all_dependencies(), build_by_default: false) -libsystem = declare_dependency(objects: libsystem.extract_all_objects(recursive: false), - dependencies: libsystem_ss.dependencies()) -common_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_true: libsystem) # Note that this library is never used directly (only through extract_objects) # and is not built by default; therefore, source files not used by the build @@ -4133,65 +4127,71 @@ common_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_true: libsystem) common_all = static_library('common', build_by_default: false, sources: common_ss.all_sources() + genh, - include_directories: common_user_inc, implicit_include_directories: false, dependencies: common_ss.all_dependencies()) # construct common libraries per base architecture -hw_common_arch_libs = {} target_common_arch_libs = {} target_common_system_arch_libs = {} -foreach target : target_dirs +foreach target_base_arch, config_base_arch : config_base_arch_mak config_target = config_target_mak[target] - target_base_arch = config_target['TARGET_BASE_ARCH'] target_inc = [include_directories('target' / target_base_arch)] inc = [common_user_inc + target_inc] + target_common = common_ss.apply(config_target, strict: false) + target_system = system_ss.apply(config_target, strict: false) + target_user = user_ss.apply(config_target, strict: false) + common_deps = [] + system_deps = [] + user_deps = [] + foreach dep: target_common.dependencies() + common_deps += dep.partial_dependency(compile_args: true, includes: true) + endforeach + foreach dep: target_system.dependencies() + system_deps += dep.partial_dependency(compile_args: true, includes: true) + endforeach + foreach dep: target_user.dependencies() + user_deps += dep.partial_dependency(compile_args: true, includes: true) + endforeach + # prevent common code to access cpu compile time definition, # but still allow access to cpu.h target_c_args = ['-DCPU_DEFS_H'] target_system_c_args = target_c_args + ['-DCOMPILING_SYSTEM_VS_USER', '-DCONFIG_SOFTMMU'] - if target_base_arch in hw_common_arch - if target_base_arch not in hw_common_arch_libs - src = hw_common_arch[target_base_arch] - lib = static_library( - 'hw_' + target_base_arch, - build_by_default: false, - sources: src.all_sources() + genh, - include_directories: inc, - c_args: target_system_c_args, - dependencies: src.all_dependencies()) - hw_common_arch_libs += {target_base_arch: lib} - endif - endif - if target_base_arch in target_common_arch - if target_base_arch not in target_common_arch_libs - src = target_common_arch[target_base_arch] - lib = static_library( - 'target_' + target_base_arch, - build_by_default: false, - sources: src.all_sources() + genh, - include_directories: inc, - c_args: target_c_args, - dependencies: src.all_dependencies()) - target_common_arch_libs += {target_base_arch: lib} + src = target_common_arch[target_base_arch] + lib = static_library( + 'common_' + target_base_arch, + build_by_default: false, + sources: src.all_sources() + genh, + include_directories: inc, + c_args: target_c_args, + dependencies: src.all_dependencies() + common_deps + + system_deps + user_deps) + target_common_arch_libs += {target_base_arch: lib} + endif + + # merge hw_common_arch in target_common_system_arch + if target_base_arch in hw_common_arch + hw_src = hw_common_arch[target_base_arch] + if target_base_arch in target_common_system_arch + target_common_system_arch[target_base_arch].add_all(hw_src) + else + target_common_system_arch += {target_base_arch: hw_src} endif endif if target_base_arch in target_common_system_arch - if target_base_arch not in target_common_system_arch_libs - src = target_common_system_arch[target_base_arch] - lib = static_library( - 'target_system_' + target_base_arch, - build_by_default: false, - sources: src.all_sources() + genh, - include_directories: inc, - c_args: target_system_c_args, - dependencies: src.all_dependencies()) - target_common_system_arch_libs += {target_base_arch: lib} - endif + src = target_common_system_arch[target_base_arch] + lib = static_library( + 'system_' + target_base_arch, + build_by_default: false, + sources: src.all_sources() + genh, + include_directories: inc, + c_args: target_system_c_args, + dependencies: src.all_dependencies() + common_deps + system_deps) + target_common_system_arch_libs += {target_base_arch: lib} endif endforeach @@ -4368,10 +4368,14 @@ foreach target : target_dirs objects += lib.extract_objects(src.sources()) arch_deps += src.dependencies() endif - if target_type == 'system' and target_base_arch in hw_common_arch_libs - src = hw_common_arch[target_base_arch].apply(config_target, strict: false) - lib = hw_common_arch_libs[target_base_arch] - objects += lib.extract_objects(src.sources()) + if target_type == 'system' + src = system_ss.apply(config_target, strict: false) + objects += libsystem.extract_objects(src.sources()) + arch_deps += src.dependencies() + endif + if target_type == 'user' + src = user_ss.apply(config_target, strict: false) + objects += libuser.extract_objects(src.sources()) arch_deps += src.dependencies() endif if target_type == 'system' and target_base_arch in target_common_system_arch_libs diff --git a/nbd/client-connection.c b/nbd/client-connection.c index b11e266..79ea97e 100644 --- a/nbd/client-connection.c +++ b/nbd/client-connection.c @@ -31,6 +31,8 @@ #include "qapi/clone-visitor.h" #include "qemu/coroutine.h" +#include "nbd/nbd-internal.h" + struct NBDClientConnection { /* Initialization constants, never change */ SocketAddress *saddr; /* address to connect to */ @@ -140,6 +142,7 @@ static int nbd_connect(QIOChannelSocket *sioc, SocketAddress *addr, return ret; } + nbd_set_socket_send_buffer(sioc); qio_channel_set_delay(QIO_CHANNEL(sioc), false); if (!info) { diff --git a/nbd/common.c b/nbd/common.c index 589a748..2a133a6 100644 --- a/nbd/common.c +++ b/nbd/common.c @@ -18,6 +18,9 @@ #include "qemu/osdep.h" #include "trace.h" +#include "io/channel-socket.h" +#include "qapi/error.h" +#include "qemu/units.h" #include "nbd-internal.h" /* Discard length bytes from channel. Return -errno on failure and 0 on @@ -264,3 +267,26 @@ const char *nbd_mode_lookup(NBDMode mode) return "<unknown>"; } } + +/* + * Testing shows that 2m send buffer is optimal. Changing the receive buffer + * size has no effect on performance. + * On Linux we need to increase net.core.wmem_max to make this effective. + */ +#if defined(__APPLE__) || defined(__linux__) +#define UNIX_STREAM_SOCKET_SEND_BUFFER_SIZE (2 * MiB) +#endif + +void nbd_set_socket_send_buffer(QIOChannelSocket *sioc) +{ +#ifdef UNIX_STREAM_SOCKET_SEND_BUFFER_SIZE + if (sioc->localAddr.ss_family == AF_UNIX) { + size_t size = UNIX_STREAM_SOCKET_SEND_BUFFER_SIZE; + Error *errp = NULL; + + if (qio_channel_socket_set_send_buffer(sioc, size, &errp) < 0) { + warn_report_err(errp); + } + } +#endif /* UNIX_STREAM_SOCKET_SEND_BUFFER_SIZE */ +} diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h index 715d92d..6bafeef 100644 --- a/nbd/nbd-internal.h +++ b/nbd/nbd-internal.h @@ -74,4 +74,9 @@ static inline int nbd_write(QIOChannel *ioc, const void *buffer, size_t size, int nbd_drop(QIOChannel *ioc, size_t size, Error **errp); +/* nbd_set_socket_send_buffer + * Set the socket send buffer size for optimal performance. + */ +void nbd_set_socket_send_buffer(QIOChannelSocket *sioc); + #endif diff --git a/nbd/server.c b/nbd/server.c index 2076fb2..d242be9 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -3291,6 +3291,8 @@ void nbd_client_new(QIOChannelSocket *sioc, client->close_fn = close_fn; client->owner = owner; + nbd_set_socket_send_buffer(sioc); + co = qemu_coroutine_create(nbd_co_client_start, client); qemu_coroutine_enter(co); } diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 7ca8b46..58d7389 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -235,6 +235,7 @@ static void vhost_vdpa_cleanup(NetClientState *nc) return; } qemu_close(s->vhost_vdpa.shared->device_fd); + g_clear_pointer(&s->vhost_vdpa.shared->iova_tree, vhost_iova_tree_delete); g_free(s->vhost_vdpa.shared); } @@ -362,14 +363,8 @@ static int vdpa_net_migration_state_notifier(NotifierWithReturn *notifier, static void vhost_vdpa_net_data_start_first(VhostVDPAState *s) { - struct vhost_vdpa *v = &s->vhost_vdpa; - migration_add_notifier(&s->migration_state, vdpa_net_migration_state_notifier); - if (v->shadow_vqs_enabled) { - v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first, - v->shared->iova_range.last); - } } static int vhost_vdpa_net_data_start(NetClientState *nc) @@ -416,19 +411,12 @@ static int vhost_vdpa_net_data_load(NetClientState *nc) static void vhost_vdpa_net_client_stop(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); - struct vhost_dev *dev; assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); if (s->vhost_vdpa.index == 0) { migration_remove_notifier(&s->migration_state); } - - dev = s->vhost_vdpa.dev; - if (dev->vq_index + dev->nvqs == dev->vq_index_end) { - g_clear_pointer(&s->vhost_vdpa.shared->iova_tree, - vhost_iova_tree_delete); - } } static NetClientInfo net_vhost_vdpa_info = { @@ -600,24 +588,6 @@ out: return 0; } - /* - * If other vhost_vdpa already have an iova_tree, reuse it for simplicity, - * whether CVQ shares ASID with guest or not, because: - * - Memory listener need access to guest's memory addresses allocated in - * the IOVA tree. - * - There should be plenty of IOVA address space for both ASID not to - * worry about collisions between them. Guest's translations are still - * validated with virtio virtqueue_pop so there is no risk for the guest - * to access memory that it shouldn't. - * - * To allocate a iova tree per ASID is doable but it complicates the code - * and it is not worth it for the moment. - */ - if (!v->shared->iova_tree) { - v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first, - v->shared->iova_range.last); - } - r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len(), false); if (unlikely(r < 0)) { @@ -1726,6 +1696,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, s->vhost_vdpa.shared->device_fd = vdpa_device_fd; s->vhost_vdpa.shared->iova_range = iova_range; s->vhost_vdpa.shared->shadow_data = svq; + s->vhost_vdpa.shared->iova_tree = vhost_iova_tree_new(iova_range.first, + iova_range.last); } else if (!is_datapath) { s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), PROT_READ | PROT_WRITE, diff --git a/plugins/meson.build b/plugins/meson.build index 5383c7b..b20edfb 100644 --- a/plugins/meson.build +++ b/plugins/meson.build @@ -61,8 +61,8 @@ endif user_ss.add(files('user.c', 'api-user.c')) system_ss.add(files('system.c', 'api-system.c')) -libuser_ss.add(files('api.c', 'core.c')) -libsystem_ss.add(files('api.c', 'core.c')) +user_ss.add(files('api.c', 'core.c')) +system_ss.add(files('api.c', 'core.c')) common_ss.add(files('loader.c')) diff --git a/qapi/acpi.json b/qapi/acpi.json index 045dab6..2d53b82 100644 --- a/qapi/acpi.json +++ b/qapi/acpi.json @@ -80,7 +80,7 @@ ## # @ACPIOSTInfo: # -# OSPM Status Indication for a device For description of possible +# OSPM Status Indication for a device. For description of possible # values of @source and @status fields see "_OST (OSPM Status # Indication)" chapter of ACPI5.0 spec. # diff --git a/qapi/audio.json b/qapi/audio.json index 8de4430..16de231 100644 --- a/qapi/audio.json +++ b/qapi/audio.json @@ -309,9 +309,9 @@ # # @name: name of the sink/source to use # -# @stream-name: name of the PulseAudio stream created by qemu. Can be +# @stream-name: name of the PulseAudio stream created by QEMU. Can be # used to identify the stream in PulseAudio when you create -# multiple PulseAudio devices or run multiple qemu instances +# multiple PulseAudio devices or run multiple QEMU instances # (default: audiodev's id, since 4.2) # # @latency: latency you want PulseAudio to achieve in microseconds @@ -353,9 +353,9 @@ # # @name: name of the sink/source to use # -# @stream-name: name of the PipeWire stream created by qemu. Can be +# @stream-name: name of the PipeWire stream created by QEMU. Can be # used to identify the stream in PipeWire when you create multiple -# PipeWire devices or run multiple qemu instances (default: +# PipeWire devices or run multiple QEMU instances (default: # audiodev's id) # # @latency: latency you want PipeWire to achieve in microseconds diff --git a/qapi/block-core.json b/qapi/block-core.json index b411511..1df6644 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -31,8 +31,8 @@ # @icount: Current instruction count. Appears when execution # record/replay is enabled. Used for "time-traveling" to match # the moment in the recorded execution with the snapshots. This -# counter may be obtained through @query-replay command (since -# 5.2) +# counter may be obtained through @query-replay command +# (since 5.2) # # Since: 1.3 ## @@ -488,7 +488,7 @@ # # @active: true if the backend is active; typical cases for inactive backends # are on the migration source instance after migration completes and on the -# destination before it completes. (since: 10.0) +# destination before it completes. (since: 10.0) # # @encrypted: true if the backing device is encrypted # @@ -510,11 +510,11 @@ # # @bps_max: total throughput limit during bursts, in bytes (Since 1.7) # -# @bps_rd_max: read throughput limit during bursts, in bytes (Since -# 1.7) +# @bps_rd_max: read throughput limit during bursts, in bytes +# (Since 1.7) # -# @bps_wr_max: write throughput limit during bursts, in bytes (Since -# 1.7) +# @bps_wr_max: write throughput limit during bursts, in bytes +# (Since 1.7) # # @iops_max: total I/O operations per second during bursts, in bytes # (Since 1.7) @@ -951,11 +951,11 @@ # @unmap_operations: The number of unmap operations performed by the # device (Since 4.2) # -# @rd_total_time_ns: Total time spent on reads in nanoseconds (since -# 0.15). +# @rd_total_time_ns: Total time spent on reads in nanoseconds +# (since 0.15) # -# @wr_total_time_ns: Total time spent on writes in nanoseconds (since -# 0.15). +# @wr_total_time_ns: Total time spent on writes in nanoseconds +# (since 0.15) # # @zone_append_total_time_ns: Total time spent on zone append writes # in nanoseconds (since 8.1) @@ -1322,8 +1322,8 @@ # @incremental: only copy data described by the dirty bitmap. # (since: 2.4) # -# @bitmap: only copy data described by the dirty bitmap. (since: 4.2) -# Behavior on completion is determined by the BitmapSyncMode. +# @bitmap: only copy data described by the dirty bitmap. Behavior on +# completion is determined by the BitmapSyncMode. (since: 4.2) # # Since: 1.3 ## @@ -1337,7 +1337,7 @@ # bitmap when used for data copy operations. # # @on-success: The bitmap is only synced when the operation is -# successful. This is the behavior always used for 'INCREMENTAL' +# successful. This is the behavior always used for incremental # backups. # # @never: The bitmap is never synchronized with the operation, and is @@ -1417,8 +1417,8 @@ # @auto-finalize: Job will finalize itself when PENDING, moving to the # CONCLUDED state. (since 2.12) # -# @auto-dismiss: Job will dismiss itself when CONCLUDED, moving to the -# NULL state and disappearing from the query list. (since 2.12) +# @auto-dismiss: Job will dismiss itself when CONCLUDED, and +# disappear. (since 2.12) # # @error: Error information if the job did not complete successfully. # Not set if the job completed successfully. (since 2.12.1) @@ -1502,15 +1502,15 @@ # # @device: the name of the device to take a snapshot of. # -# @node-name: graph node name to generate the snapshot from (Since -# 2.0) +# @node-name: graph node name to generate the snapshot from +# (Since 2.0) # # @snapshot-file: the target of the new overlay image. If the file # exists, or if it is a device, the overlay will be created in the # existing file/device. Otherwise, a new file will be created. # -# @snapshot-node-name: the graph node name of the new image (Since -# 2.0) +# @snapshot-node-name: the graph node name of the new image +# (Since 2.0) # # @format: the format of the overlay image, default is 'qcow2'. # @@ -1589,7 +1589,7 @@ # # @bitmap-mode: Specifies the type of data the bitmap should contain # after the operation concludes. Must be present if a bitmap was -# provided, Must NOT be present otherwise. (Since 4.2) +# provided, must **not** be present otherwise. (Since 4.2) # # @compress: true to compress data, if the target format supports it. # (default: false) (since 2.8) @@ -1606,16 +1606,15 @@ # copy-before-write jobs; defaults to break-guest-write. (Since 10.1) # # @auto-finalize: When false, this job will wait in a PENDING state -# after it has finished its work, waiting for @block-job-finalize -# before making any block graph changes. When true, this job will +# after it has finished its work, waiting for @job-finalize before +# making any block graph changes. When true, this job will # automatically perform its abort or commit actions. Defaults to # true. (Since 2.12) # # @auto-dismiss: When false, this job will wait in a CONCLUDED state # after it has completely ceased all work, and awaits -# @block-job-dismiss. When true, this job will automatically -# disappear from the query list without user intervention. -# Defaults to true. (Since 2.12) +# @job-dismiss. When true, this job will automatically disappear +# without user intervention. Defaults to true. (Since 2.12) # # @filter-node-name: the node name that should be assigned to the # filter driver that the backup job inserts into the graph above @@ -1785,8 +1784,7 @@ # If top == base, that is an error. If top has no overlays on top of # it, or if it is in use by a writer, the job will not be completed by # itself. The user needs to complete the job with the -# block-job-complete command after getting the ready event. (Since -# 2.0) +# job-complete command after getting the ready event. (Since 2.0) # # If the base image is smaller than top, then the base image will be # resized to be the same size as top. If top is smaller than the base @@ -1840,7 +1838,7 @@ # @speed: the maximum speed, in bytes per second # # @on-error: the action to take on an error. 'ignore' means that the -# request should be retried. (default: report; Since: 5.0) +# request should be retried. (default: report; since: 5.0) # # @filter-node-name: the node name that should be assigned to the # filter driver that the commit job inserts into the graph above @@ -1848,16 +1846,15 @@ # autogenerated. (Since: 2.9) # # @auto-finalize: When false, this job will wait in a PENDING state -# after it has finished its work, waiting for @block-job-finalize -# before making any block graph changes. When true, this job will +# after it has finished its work, waiting for @job-finalize before +# making any block graph changes. When true, this job will # automatically perform its abort or commit actions. Defaults to # true. (Since 3.1) # # @auto-dismiss: When false, this job will wait in a CONCLUDED state # after it has completely ceased all work, and awaits -# @block-job-dismiss. When true, this job will automatically -# disappear from the query list without user intervention. -# Defaults to true. (Since 3.1) +# @job-dismiss. When true, this job will automatically disappear +# without user intervention. Defaults to true. (Since 3.1) # # Features: # @@ -1895,7 +1892,7 @@ # The status of ongoing drive-backup operations can be checked with # query-block-jobs where the BlockJobInfo.type field has the value # 'backup'. The operation can be stopped before it has completed -# using the block-job-cancel command. +# using the job-cancel or block-job-cancel command. # # Features: # @@ -1926,7 +1923,7 @@ # The status of ongoing blockdev-backup operations can be checked with # query-block-jobs where the BlockJobInfo.type field has the value # 'backup'. The operation can be stopped before it has completed -# using the block-job-cancel command. +# using the job-cancel or block-job-cancel command. # # Errors: # - If @device is not a valid block device, DeviceNotFound @@ -2030,7 +2027,7 @@ # # @id: Block graph node identifier. This @id is generated only for # x-debug-query-block-graph and does not relate to any other -# identifiers in Qemu. +# identifiers in QEMU. # # @type: Type of graph node. Can be one of block-backend, block-job # or block-driver-state. @@ -2169,8 +2166,8 @@ # @format: the format of the new destination, default is to probe if # @mode is 'existing', else the format of the source # -# @node-name: the new block driver state node name in the graph (Since -# 2.1) +# @node-name: the new block driver state node name in the graph +# (Since 2.1) # # @replaces: with sync=full graph node name to be replaced by the new # image when a whole image copy is done. This can be used to @@ -2212,16 +2209,15 @@ # 'background' (Since: 3.0) # # @auto-finalize: When false, this job will wait in a PENDING state -# after it has finished its work, waiting for @block-job-finalize -# before making any block graph changes. When true, this job will +# after it has finished its work, waiting for @job-finalize before +# making any block graph changes. When true, this job will # automatically perform its abort or commit actions. Defaults to # true. (Since 3.1) # # @auto-dismiss: When false, this job will wait in a CONCLUDED state # after it has completely ceased all work, and awaits -# @block-job-dismiss. When true, this job will automatically -# disappear from the query list without user intervention. -# Defaults to true. (Since 3.1) +# @job-dismiss. When true, this job will automatically disappear +# without user intervention. Defaults to true. (Since 3.1) # # Since: 1.3 ## @@ -2531,16 +2527,15 @@ # 'background' (Since: 3.0) # # @auto-finalize: When false, this job will wait in a PENDING state -# after it has finished its work, waiting for @block-job-finalize -# before making any block graph changes. When true, this job will +# after it has finished its work, waiting for @job-finalize before +# making any block graph changes. When true, this job will # automatically perform its abort or commit actions. Defaults to # true. (Since 3.1) # # @auto-dismiss: When false, this job will wait in a CONCLUDED state # after it has completely ceased all work, and awaits -# @block-job-dismiss. When true, this job will automatically -# disappear from the query list without user intervention. -# Defaults to true. (Since 3.1) +# @job-dismiss. When true, this job will automatically disappear +# without user intervention. Defaults to true. (Since 3.1) # # @target-is-zero: Assume the destination reads as all zeroes before # the mirror started. Setting this to true can speed up the @@ -2593,11 +2588,11 @@ # # @bps_max: total throughput limit during bursts, in bytes (Since 1.7) # -# @bps_rd_max: read throughput limit during bursts, in bytes (Since -# 1.7) +# @bps_rd_max: read throughput limit during bursts, in bytes +# (Since 1.7) # -# @bps_wr_max: write throughput limit during bursts, in bytes (Since -# 1.7) +# @bps_wr_max: write throughput limit during bursts, in bytes +# (Since 1.7) # # @iops_max: total I/O operations per second during bursts, in bytes # (Since 1.7) @@ -2667,7 +2662,7 @@ # @iops-total-max: I/O operations burst # # @iops-total-max-length: length of the iops-total-max burst period, -# in seconds It must only be set if @iops-total-max is set as +# in seconds. It must only be set if @iops-total-max is set as # well. # # @iops-read: limit read operations per second @@ -2675,14 +2670,14 @@ # @iops-read-max: I/O operations read burst # # @iops-read-max-length: length of the iops-read-max burst period, in -# seconds It must only be set if @iops-read-max is set as well. +# seconds. It must only be set if @iops-read-max is set as well. # # @iops-write: limit write operations per second # # @iops-write-max: I/O operations write burst # # @iops-write-max-length: length of the iops-write-max burst period, -# in seconds It must only be set if @iops-write-max is set as +# in seconds. It must only be set if @iops-write-max is set as # well. # # @bps-total: limit total bytes per second @@ -2697,14 +2692,14 @@ # @bps-read-max: total bytes read burst # # @bps-read-max-length: length of the bps-read-max burst period, in -# seconds It must only be set if @bps-read-max is set as well. +# seconds. It must only be set if @bps-read-max is set as well. # # @bps-write: limit write bytes per second # # @bps-write-max: total bytes write burst # # @bps-write-max-length: length of the bps-write-max burst period, in -# seconds It must only be set if @bps-write-max is set as well. +# seconds. It must only be set if @bps-write-max is set as well. # # @iops-size: when limiting by iops max size of an I/O in bytes # @@ -2789,12 +2784,12 @@ # immediately once streaming has started. The status of ongoing block # streaming operations can be checked with query-block-jobs. The # operation can be stopped before it has completed using the -# block-job-cancel command. +# job-cancel or block-job-cancel command. # # The node that receives the data is called the top image, can be # located in any part of the chain (but always above the base image; # see below) and can be specified using its device or node name. -# Earlier qemu versions only allowed 'device' to name the top level +# Earlier QEMU versions only allowed 'device' to name the top level # node; presence of the 'base-node' parameter during introspection can # be used as a witness of the enhanced semantics of 'device'. # @@ -2859,16 +2854,15 @@ # autogenerated. (Since: 6.0) # # @auto-finalize: When false, this job will wait in a PENDING state -# after it has finished its work, waiting for @block-job-finalize -# before making any block graph changes. When true, this job will +# after it has finished its work, waiting for @job-finalize before +# making any block graph changes. When true, this job will # automatically perform its abort or commit actions. Defaults to # true. (Since 3.1) # # @auto-dismiss: When false, this job will wait in a CONCLUDED state # after it has completely ceased all work, and awaits -# @block-job-dismiss. When true, this job will automatically -# disappear from the query list without user intervention. -# Defaults to true. (Since 3.1) +# @job-dismiss. When true, this job will automatically disappear +# without user intervention. Defaults to true. (Since 3.1) # # Errors: # - If @device does not exist, DeviceNotFound. @@ -3030,10 +3024,10 @@ # state. Completing the job in any other state is an error. # # This is supported only for drive mirroring, where it also switches -# the device to write to the target path only. Note that drive +# the device to write to the target path only. Note that drive # mirroring includes drive-mirror, blockdev-mirror and block-commit # job (only in case of "active commit", when the node being commited -# is used by the guest). The ability to complete is signaled with a +# is used by the guest). The ability to complete is signaled with a # BLOCK_JOB_READY event. # # This command completes an active background block operation @@ -3068,16 +3062,16 @@ # # Deletes a job that is in the CONCLUDED state. This command only # needs to be run explicitly for jobs that don't have automatic -# dismiss enabled. In turn, automatic dismiss may be enabled only +# dismiss enabled. In turn, automatic dismiss may be enabled only # for jobs that have @auto-dismiss option, which are drive-backup, # blockdev-backup, drive-mirror, blockdev-mirror, block-commit and -# block-stream. @auto-dismiss is enabled by default for these +# block-stream. @auto-dismiss is enabled by default for these # jobs. # # This command will refuse to operate on any job that has not yet -# reached its terminal state, JOB_STATUS_CONCLUDED. For jobs that -# make use of the BLOCK_JOB_READY event, block-job-cancel or -# block-job-complete will still need to be used as appropriate. +# reached its terminal state, CONCLUDED. For jobs that make use of +# the BLOCK_JOB_READY event, job-cancel, block-job-cancel or +# job-complete will still need to be used as appropriate. # # @id: The job identifier. # @@ -3196,7 +3190,7 @@ # # Selects the AIO backend to handle I/O requests # -# @threads: Use qemu's thread pool +# @threads: Use QEMU's thread pool # # @native: Use native AIO backend (only Linux and Windows) # @@ -3415,8 +3409,8 @@ # Driver specific block device options for LUKS. # # @key-secret: the ID of a QCryptoSecret object providing the -# decryption key (since 2.6). Mandatory except when doing a -# metadata-only probe of the image. +# decryption key. Mandatory except when doing a metadata-only +# probe of the image. (since 2.6) # # @header: block device holding a detached LUKS header. (since 9.0) # @@ -3655,8 +3649,8 @@ # this feature. (since 2.5) # # @encrypt: Image decryption options. Mandatory for encrypted images, -# except when doing a metadata-only probe of the image. (since -# 2.10) +# except when doing a metadata-only probe of the image. +# (since 2.10) # # @data-file: reference to or definition of the external data file. # This may only be specified for images that require an external @@ -4326,8 +4320,8 @@ # @user: Ceph id name. # # @auth-client-required: Acceptable authentication modes. This maps -# to Ceph configuration option "auth_client_required". (Since -# 3.0) +# to Ceph configuration option "auth_client_required". +# (Since 3.0) # # @key-secret: ID of a QCryptoSecret object providing a key for cephx # authentication. This maps to Ceph configuration option "key". @@ -4581,8 +4575,8 @@ # error. During the first @reconnect-delay seconds, all requests # are paused and will be rerun on a successful reconnect. After # that time, any delayed requests and all future requests before a -# successful reconnect will immediately fail. Default 0 (Since -# 4.2) +# successful reconnect will immediately fail. Default 0 +# (Since 4.2) # # @open-timeout: In seconds. If zero, the nbd driver tries the # connection only once, and fails to open if the connection fails. @@ -4724,11 +4718,11 @@ # # @driver: block driver name # -# @node-name: the node name of the new node (Since 2.0). This option -# is required on the top level of blockdev-add. Valid node names -# start with an alphabetic character and may contain only -# alphanumeric characters, '-', '.' and '_'. Their maximum length -# is 31 characters. +# @node-name: the node name of the new node. This option is required +# on the top level of blockdev-add. Valid node names start with +# an alphabetic character and may contain only alphanumeric +# characters, '-', '.' and '_'. Their maximum length is 31 +# characters. (Since 2.0) # # @discard: discard-related options (default: ignore) # @@ -4737,7 +4731,7 @@ # @active: whether the block node should be activated (default: true). # Having inactive block nodes is useful primarily for migration because it # allows opening an image on the destination while the source is still -# holding locks for it. (Since 10.0) +# holding locks for it. (Since 10.0) # # @read-only: whether the block device should be read-only (default: # false). Note that some block drivers support only read-only @@ -4947,7 +4941,7 @@ # 3) A reference to a different node: the current child is replaced # with the specified one. # -# 4) NULL: the current child (if any) is detached. +# 4) null: the current child (if any) is detached. # # Options (1) and (2) are supported in all cases. Option (3) is # supported for @file and @backing, and option (4) for @backing only. @@ -4999,14 +4993,14 @@ ## # @blockdev-set-active: # -# Activate or inactivate a block device. Use this to manage the handover of +# Activate or inactivate a block device. Use this to manage the handover of # block devices on migration with qemu-storage-daemon. # # Activating a node automatically activates all of its child nodes first. # Inactivating a node automatically inactivates any of its child nodes that are # not in use by a still active node. # -# @node-name: Name of the graph node to activate or inactivate. By default, all +# @node-name: Name of the graph node to activate or inactivate. By default, all # nodes are affected by the operation. # # @active: true if the nodes should be active when the command returns success, @@ -5157,10 +5151,10 @@ ## # @BlockdevQcow2Version: # -# @v2: The original QCOW2 format as introduced in qemu 0.10 (version +# @v2: The original QCOW2 format as introduced in QEMU 0.10 (version # 2) # -# @v3: The extended QCOW2 format as introduced in qemu 1.1 (version 3) +# @v3: The extended QCOW2 format as introduced in QEMU 1.1 (version 3) # # Since: 2.12 ## @@ -5580,7 +5574,7 @@ # @x-blockdev-amend: # # Starts a job to amend format specific options of an existing open -# block device The job is automatically finalized, but a manual +# block device. The job is automatically finalized, but a manual # job-dismiss is required. # # @job-id: Identifier for the newly created job. @@ -5589,7 +5583,7 @@ # # @options: Options (driver specific) # -# @force: Allow unsafe operations, format specific For luks that +# @force: Allow unsafe operations, format specific. For luks that # allows erase of the last active keyslot (permanent loss of # data), and replacement of an active keyslot (possible loss of # data if IO error happens) @@ -5866,7 +5860,7 @@ # @BLOCK_JOB_PENDING: # # Emitted when a block job is awaiting explicit authorization to -# finalize graph changes via @block-job-finalize. If this job is part +# finalize graph changes via @job-finalize. If this job is part # of a transaction, it will not emit this event until the transaction # has converged first. # diff --git a/qapi/block-export.json b/qapi/block-export.json index c783e01..ed4deb5 100644 --- a/qapi/block-export.json +++ b/qapi/block-export.json @@ -169,7 +169,7 @@ # @growable: Whether writes beyond the EOF should grow the block node # accordingly. (default: false) # -# @allow-other: If this is off, only qemu's user is allowed access to +# @allow-other: If this is off, only QEMU's user is allowed access to # this export. That cannot be changed even with chmod or chown. # Enabling this option will allow other users access to the export # with the FUSE mount option "allow_other". Note that using @@ -373,9 +373,9 @@ # (since: 5.2) # # @allow-inactive: If true, the export allows the exported node to be inactive. -# If it is created for an inactive block node, the node remains inactive. If +# If it is created for an inactive block node, the node remains inactive. If # the export type doesn't support running on an inactive node, an error is -# returned. If false, inactive block nodes are automatically activated before +# returned. If false, inactive block nodes are automatically activated before # creating the export and trying to inactivate them later fails. # (since: 10.0; default: false) # diff --git a/qapi/block.json b/qapi/block.json index f5374bd..1490a1a 100644 --- a/qapi/block.json +++ b/qapi/block.json @@ -48,7 +48,7 @@ ## # @FloppyDriveType: # -# Type of Floppy drive to be emulated by the Floppy Disk Controller. +# Type of floppy drive to be emulated by the Floppy Disk Controller. # # @144: 1.44MB 3.5" drive # diff --git a/qapi/char.json b/qapi/char.json index 447c10b..df6e325 100644 --- a/qapi/char.json +++ b/qapi/char.json @@ -274,7 +274,7 @@ # @reconnect: For a client socket, if a socket is disconnected, then # attempt a reconnect after the given number of seconds. Setting # this to zero disables this function. The use of this member is -# deprecated, use @reconnect-ms instead. (default: 0) (Since: 2.2) +# deprecated, use @reconnect-ms instead. (default: 0) (Since: 2.2) # # @reconnect-ms: For a client socket, if a socket is disconnected, # then attempt a reconnect after the given number of milliseconds. @@ -351,7 +351,7 @@ # Configuration info for stdio chardevs. # # @signal: Allow signals (such as SIGINT triggered by ^C) be delivered -# to qemu. Default: true. +# to QEMU. Default: true. # # Since: 1.5 ## @@ -443,7 +443,7 @@ ## # @ChardevQemuVDAgent: # -# Configuration info for qemu vdagent implementation. +# Configuration info for QEMU vdagent implementation. # # @mouse: enable/disable mouse, default is enabled. # @@ -656,7 +656,7 @@ ## # @ChardevQemuVDAgentWrapper: # -# @data: Configuration info for qemu vdagent implementation +# @data: Configuration info for QEMU vdagent implementation # # Since: 6.1 ## diff --git a/qapi/crypto.json b/qapi/crypto.json index c9d967d..9ec6301 100644 --- a/qapi/crypto.json +++ b/qapi/crypto.json @@ -55,7 +55,8 @@ # @sha512: SHA-512. (since 2.7) # # @ripemd160: RIPEMD-160. (since 2.7) -# @sm3: SM3. (since 9.2.0) +# +# @sm3: SM3. (since 9.2.0) # # Since: 2.6 ## @@ -202,19 +203,19 @@ # # The options that apply to LUKS encryption format initialization # -# @cipher-alg: the cipher algorithm for data encryption Currently +# @cipher-alg: the cipher algorithm for data encryption. Currently # defaults to 'aes-256'. # -# @cipher-mode: the cipher mode for data encryption Currently defaults -# to 'xts' +# @cipher-mode: the cipher mode for data encryption. Currently +# defaults to 'xts' # -# @ivgen-alg: the initialization vector generator Currently defaults +# @ivgen-alg: the initialization vector generator. Currently defaults # to 'plain64' # -# @ivgen-hash-alg: the initialization vector generator hash Currently -# defaults to 'sha256' +# @ivgen-hash-alg: the initialization vector generator hash. +# Currently defaults to 'sha256' # -# @hash-alg: the master key hash algorithm Currently defaults to +# @hash-alg: the master key hash algorithm. Currently defaults to # 'sha256' # # @iter-time: number of milliseconds to spend in PBKDF passphrase @@ -370,11 +371,11 @@ # @new-secret: The ID of a QCryptoSecret object providing the password # to be written into added active keyslots # -# @old-secret: Optional (for deactivation only) If given will +# @old-secret: Optional (for deactivation only). If given will # deactivate all keyslots that match password located in # QCryptoSecret with this ID # -# @iter-time: Optional (for activation only) Number of milliseconds to +# @iter-time: Optional (for activation only). Number of milliseconds to # spend in PBKDF passphrase processing for the newly activated # keyslot. Currently defaults to 2000. # diff --git a/qapi/cryptodev.json b/qapi/cryptodev.json index 28b97eb..b13db26 100644 --- a/qapi/cryptodev.json +++ b/qapi/cryptodev.json @@ -15,7 +15,7 @@ # # @sym: symmetric encryption # -# @asym: asymmetric Encryption +# @asym: asymmetric encryption # # Since: 8.0 ## diff --git a/qapi/cxl.json b/qapi/cxl.json index dd947d3..8f2e923 100644 --- a/qapi/cxl.json +++ b/qapi/cxl.json @@ -117,7 +117,7 @@ # @nibble-mask: Identifies one or more nibbles that the error affects # # @bank-group: Bank group of the memory event location, incorporating -# a number of Banks. +# a number of banks. # # @bank: Bank of the memory event location. A single bank is accessed # per read or write of the memory. diff --git a/qapi/dump.json b/qapi/dump.json index f2835c0..d0ba1f0 100644 --- a/qapi/dump.json +++ b/qapi/dump.json @@ -54,9 +54,9 @@ # @paging: if true, do paging to get guest's memory mapping. This # allows using gdb to process the core file. # -# IMPORTANT: this option can make QEMU allocate several gigabytes -# of RAM. This can happen for a large guest, or a malicious guest -# pretending to be large. +# **Important**: this option can make QEMU allocate several +# gigabytes of RAM. This can happen for a large guest, or a +# malicious guest pretending to be large. # # Also, paging=true has the following limitations: # diff --git a/qapi/introspect.json b/qapi/introspect.json index 01bb242..e9e0297 100644 --- a/qapi/introspect.json +++ b/qapi/introspect.json @@ -26,9 +26,9 @@ # the QAPI schema. # # Furthermore, while we strive to keep the QMP wire format -# backwards-compatible across qemu versions, the introspection output +# backwards-compatible across QEMU versions, the introspection output # is not guaranteed to have the same stability. For example, one -# version of qemu may list an object member as an optional +# version of QEMU may list an object member as an optional # non-variant, while another lists the same member only through the # object's variants; or the type of a member may change from a generic # string into a specific enum or from one specific type into an @@ -154,8 +154,8 @@ # # Additional SchemaInfo members for meta-type 'enum'. # -# @members: the enum type's members, in no particular order (since -# 6.2). +# @members: the enum type's members, in no particular order. +# (since 6.2) # # @values: the enumeration type's member names, in no particular # order. Redundant with @members. Just for backward diff --git a/qapi/job.json b/qapi/job.json index b03f80b..126fa5c 100644 --- a/qapi/job.json +++ b/qapi/job.json @@ -20,14 +20,14 @@ # # @create: image creation job type, see "blockdev-create" (since 3.0) # -# @amend: image options amend job type, see "x-blockdev-amend" (since -# 5.1) +# @amend: image options amend job type, see "x-blockdev-amend" +# (since 5.1) # -# @snapshot-load: snapshot load job type, see "snapshot-load" (since -# 6.0) +# @snapshot-load: snapshot load job type, see "snapshot-load" +# (since 6.0) # -# @snapshot-save: snapshot save job type, see "snapshot-save" (since -# 6.0) +# @snapshot-save: snapshot save job type, see "snapshot-save" +# (since 6.0) # # @snapshot-delete: snapshot delete job type, see "snapshot-delete" # (since 6.0) @@ -74,7 +74,7 @@ # process. # # @concluded: The job has finished all work. If auto-dismiss was set -# to false, the job will remain in the query list until it is +# to false, the job will remain in this state until it is # dismissed via @job-dismiss. # # @null: The job is in the process of being dismantled. This state @@ -191,10 +191,10 @@ # state. Completing the job in any other state is an error. # # This is supported only for drive mirroring, where it also switches -# the device to write to the target path only. Note that drive +# the device to write to the target path only. Note that drive # mirroring includes drive-mirror, blockdev-mirror and block-commit # job (only in case of "active commit", when the node being commited -# is used by the guest). The ability to complete is signaled with a +# is used by the guest). The ability to complete is signaled with a # BLOCK_JOB_READY event. # # This command completes an active background block operation @@ -216,16 +216,16 @@ # # Deletes a job that is in the CONCLUDED state. This command only # needs to be run explicitly for jobs that don't have automatic -# dismiss enabled. In turn, automatic dismiss may be enabled only +# dismiss enabled. In turn, automatic dismiss may be enabled only # for jobs that have @auto-dismiss option, which are drive-backup, # blockdev-backup, drive-mirror, blockdev-mirror, block-commit and -# block-stream. @auto-dismiss is enabled by default for these +# block-stream. @auto-dismiss is enabled by default for these # jobs. # # This command will refuse to operate on any job that has not yet -# reached its terminal state, JOB_STATUS_CONCLUDED. For jobs that -# make use of JOB_READY event, job-cancel or job-complete will still -# need to be used as appropriate. +# reached its terminal state, CONCLUDED. For jobs that make use of +# the JOB_READY event, job-cancel or job-complete will still need to +# be used as appropriate. # # @id: The job identifier. # diff --git a/qapi/machine.json b/qapi/machine.json index 5373e13..0650b8d 100644 --- a/qapi/machine.json +++ b/qapi/machine.json @@ -182,8 +182,8 @@ # @default-cpu-type: default CPU model typename if none is requested # via the -cpu argument. (since 4.2) # -# @default-ram-id: the default ID of initial RAM memory backend (since -# 5.2) +# @default-ram-id: the default ID of initial RAM memory backend +# (since 5.2) # # @acpi: machine type supports ACPI (since 8.0) # @@ -694,7 +694,7 @@ # Structure of HMAT (Heterogeneous Memory Attribute Table) # # For more information about @HmatLBDataType, see chapter 5.2.27.4: -# Table 5-146: Field "Data Type" of ACPI 6.3 spec. +# Table 5-146: Field "Data Type" of ACPI 6.3 spec. # # @access-latency: access latency (nanoseconds) # @@ -811,7 +811,7 @@ # # @policy: the write policy, none/write-back/write-through. # -# @line: the cache Line size in bytes. +# @line: the cache line size in bytes. # # Since: 5.0 ## @@ -1089,7 +1089,7 @@ # :annotated: # # For s390x-virtio-ccw machine type started with -# ``-smp 1,maxcpus=2 -cpu qemu`` (Since: 2.11):: +# ``-smp 1,maxcpus=2 -cpu qemu``:: # # -> { "execute": "query-hotpluggable-cpus" } # <- {"return": [ @@ -1160,7 +1160,7 @@ # # Information about the guest balloon device. # -# @actual: the logical size of the VM in bytes Formula used: +# @actual: the logical size of the VM in bytes. Formula used: # logical_vm_size = vm_ram_size - balloon_size # # Since: 0.14 @@ -1199,7 +1199,7 @@ # is equivalent to the @actual field return by the 'query-balloon' # command # -# @actual: the logical size of the VM in bytes Formula used: +# @actual: the logical size of the VM in bytes. Formula used: # logical_vm_size = vm_ram_size - balloon_size # # .. note:: This event is rate-limited. diff --git a/qapi/migration.json b/qapi/migration.json index 41826bd..4963f6c 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -57,8 +57,8 @@ # # @dirty-sync-missed-zero-copy: Number of times dirty RAM # synchronization could not avoid copying dirty pages. This is -# between 0 and @dirty-sync-count * @multifd-channels. (since -# 7.1) +# between 0 and @dirty-sync-count * @multifd-channels. +# (since 7.1) # # Since: 0.14 ## @@ -137,16 +137,16 @@ # # @active: in the process of doing migration. # -# @postcopy-active: like active, but now in postcopy mode. (since -# 2.5) +# @postcopy-active: like active, but now in postcopy mode. +# (since 2.5) # # @postcopy-paused: during postcopy but paused. (since 3.0) # # @postcopy-recover-setup: setup phase for a postcopy recovery # process, preparing for a recovery phase to start. (since 9.1) # -# @postcopy-recover: trying to recover from a paused postcopy. (since -# 3.0) +# @postcopy-recover: trying to recover from a paused postcopy. +# (since 3.0) # # @completed: migration is finished. # @@ -407,7 +407,7 @@ # @postcopy-ram: Start executing on the migration target before all of # RAM has been migrated, pulling the remaining pages along as # needed. The capacity must have the same setting on both source -# and target or migration will not even start. NOTE: If the +# and target or migration will not even start. **Note:** if the # migration fails during postcopy the VM will fail. (since 2.6) # # @x-colo: If enabled, migration will never end, and the state of the @@ -415,15 +415,15 @@ # on secondary side, this process is called COarse-Grain LOck # Stepping (COLO) for Non-stop Service. (since 2.8) # -# @release-ram: if enabled, qemu will free the migrated ram pages on +# @release-ram: if enabled, QEMU will free the migrated ram pages on # the source during postcopy-ram migration. (since 2.9) # # @return-path: If enabled, migration will use the return path even # for precopy. (since 2.10) # # @pause-before-switchover: Pause outgoing migration before -# serialising device state and before disabling block IO (since -# 2.11) +# serialising device state and before disabling block IO +# (since 2.11) # # @multifd: Use more than one fd for migration (since 4.0) # @@ -697,8 +697,8 @@ # @alias: An alias name for migration (for example the bitmap name on # the opposite site). # -# @transform: Allows the modification of the migrated bitmap. (since -# 6.0) +# @transform: Allows the modification of the migrated bitmap. +# (since 6.0) # # Since: 5.2 ## @@ -760,9 +760,9 @@ # auto-converge detects that migration is not making progress. # The default value is 10. (Since 2.7) # -# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage At -# the tail stage of throttling, the Guest is very sensitive to CPU -# percentage while the @cpu-throttle -increment is excessive +# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage. +# At the tail stage of throttling, the Guest is very sensitive to +# CPU percentage while the @cpu-throttle -increment is excessive # usually at tail stage. If this parameter is true, we will # compute the ideal CPU percentage used by the Guest, which may # exactly make the dirty rate match the dirty rate threshold. @@ -770,8 +770,8 @@ # specified by @cpu-throttle-increment and the one generated by # ideal CPU percentage. Therefore, it is compatible to # traditional throttling, meanwhile the throttle increment won't -# be excessive at tail stage. The default value is false. (Since -# 5.1) +# be excessive at tail stage. The default value is false. +# (Since 5.1) # # @tls-creds: ID of the 'tls-creds' object that provides credentials # for establishing a TLS connection over the migration data @@ -801,10 +801,10 @@ # (Since 2.8) # # @avail-switchover-bandwidth: to set the available bandwidth that -# migration can use during switchover phase. NOTE! This does not -# limit the bandwidth during switchover, but only for calculations -# when making decisions to switchover. By default, this value is -# zero, which means QEMU will estimate the bandwidth +# migration can use during switchover phase. **Note:** this does +# not limit the bandwidth during switchover, but only for +# calculations when making decisions to switchover. By default, +# this value is zero, which means QEMU will estimate the bandwidth # automatically. This can be set when the estimated value is not # accurate, while the user is able to guarantee such bandwidth is # available when switching over. When specified correctly, this @@ -842,9 +842,9 @@ # more CPU. Defaults to 1. (Since 5.0) # # @multifd-qatzip-level: Set the compression level to be used in live -# migration. The level is an integer between 1 and 9, where 1 means +# migration. The level is an integer between 1 and 9, where 1 means # the best compression speed, and 9 means the best compression -# ratio which will consume more CPU. Defaults to 1. (Since 9.2) +# ratio which will consume more CPU. Defaults to 1. (Since 9.2) # # @multifd-zstd-level: Set the compression level to be used in live # migration, the compression level is an integer between 0 and 20, @@ -941,9 +941,9 @@ # auto-converge detects that migration is not making progress. # The default value is 10. (Since 2.7) # -# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage At -# the tail stage of throttling, the Guest is very sensitive to CPU -# percentage while the @cpu-throttle -increment is excessive +# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage. +# At the tail stage of throttling, the Guest is very sensitive to +# CPU percentage while the @cpu-throttle -increment is excessive # usually at tail stage. If this parameter is true, we will # compute the ideal CPU percentage used by the Guest, which may # exactly make the dirty rate match the dirty rate threshold. @@ -951,8 +951,8 @@ # specified by @cpu-throttle-increment and the one generated by # ideal CPU percentage. Therefore, it is compatible to # traditional throttling, meanwhile the throttle increment won't -# be excessive at tail stage. The default value is false. (Since -# 5.1) +# be excessive at tail stage. The default value is false. +# (Since 5.1) # # @tls-creds: ID of the 'tls-creds' object that provides credentials # for establishing a TLS connection over the migration data @@ -982,10 +982,10 @@ # (Since 2.8) # # @avail-switchover-bandwidth: to set the available bandwidth that -# migration can use during switchover phase. NOTE! This does not -# limit the bandwidth during switchover, but only for calculations -# when making decisions to switchover. By default, this value is -# zero, which means QEMU will estimate the bandwidth +# migration can use during switchover phase. **Note:** this does +# not limit the bandwidth during switchover, but only for +# calculations when making decisions to switchover. By default, +# this value is zero, which means QEMU will estimate the bandwidth # automatically. This can be set when the estimated value is not # accurate, while the user is able to guarantee such bandwidth is # available when switching over. When specified correctly, this @@ -1023,9 +1023,9 @@ # more CPU. Defaults to 1. (Since 5.0) # # @multifd-qatzip-level: Set the compression level to be used in live -# migration. The level is an integer between 1 and 9, where 1 means +# migration. The level is an integer between 1 and 9, where 1 means # the best compression speed, and 9 means the best compression -# ratio which will consume more CPU. Defaults to 1. (Since 9.2) +# ratio which will consume more CPU. Defaults to 1. (Since 9.2) # # @multifd-zstd-level: Set the compression level to be used in live # migration, the compression level is an integer between 0 and 20, @@ -1148,16 +1148,16 @@ # percentage. The default value is 50. (Since 5.0) # # @cpu-throttle-initial: Initial percentage of time guest cpus are -# throttled when migration auto-converge is activated. (Since -# 2.7) +# throttled when migration auto-converge is activated. +# (Since 2.7) # # @cpu-throttle-increment: throttle percentage increase each time # auto-converge detects that migration is not making progress. # (Since 2.7) # -# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage At -# the tail stage of throttling, the Guest is very sensitive to CPU -# percentage while the @cpu-throttle -increment is excessive +# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage. +# At the tail stage of throttling, the Guest is very sensitive to +# CPU percentage while the @cpu-throttle -increment is excessive # usually at tail stage. If this parameter is true, we will # compute the ideal CPU percentage used by the Guest, which may # exactly make the dirty rate match the dirty rate threshold. @@ -1165,8 +1165,8 @@ # specified by @cpu-throttle-increment and the one generated by # ideal CPU percentage. Therefore, it is compatible to # traditional throttling, meanwhile the throttle increment won't -# be excessive at tail stage. The default value is false. (Since -# 5.1) +# be excessive at tail stage. The default value is false. +# (Since 5.1) # # @tls-creds: ID of the 'tls-creds' object that provides credentials # for establishing a TLS connection over the migration data @@ -1192,10 +1192,10 @@ # (Since 2.8) # # @avail-switchover-bandwidth: to set the available bandwidth that -# migration can use during switchover phase. NOTE! This does not -# limit the bandwidth during switchover, but only for calculations -# when making decisions to switchover. By default, this value is -# zero, which means QEMU will estimate the bandwidth +# migration can use during switchover phase. **Note:** this does +# not limit the bandwidth during switchover, but only for +# calculations when making decisions to switchover. By default, +# this value is zero, which means QEMU will estimate the bandwidth # automatically. This can be set when the estimated value is not # accurate, while the user is able to guarantee such bandwidth is # available when switching over. When specified correctly, this @@ -1233,9 +1233,9 @@ # more CPU. Defaults to 1. (Since 5.0) # # @multifd-qatzip-level: Set the compression level to be used in live -# migration. The level is an integer between 1 and 9, where 1 means +# migration. The level is an integer between 1 and 9, where 1 means # the best compression speed, and 9 means the best compression -# ratio which will consume more CPU. Defaults to 1. (Since 9.2) +# ratio which will consume more CPU. Defaults to 1. (Since 9.2) # # @multifd-zstd-level: Set the compression level to be used in live # migration, the compression level is an integer between 0 and 20, @@ -1500,7 +1500,7 @@ ## # @x-colo-lost-heartbeat: # -# Tell qemu that heartbeat is lost, request it to do takeover +# Tell QEMU that heartbeat is lost, request it to do takeover # procedures. If this command is sent to the PVM, the Primary side # will exit COLO mode. If sent to the Secondary, the Secondary side # will run failover work, then takes over server operation to become @@ -1729,8 +1729,8 @@ ## # @migrate-incoming: # -# Start an incoming migration, the qemu must have been started with -# -incoming defer +# Start an incoming migration. QEMU must have been started with +# -incoming defer. # # @uri: The Uniform Resource Identifier identifying the source or # address to listen on diff --git a/qapi/misc-i386.json b/qapi/misc-i386.json index 3b53464..5fefa0a 100644 --- a/qapi/misc-i386.json +++ b/qapi/misc-i386.json @@ -195,7 +195,7 @@ # # @cbitpos: C-bit location in page table entry # -# @reduced-phys-bits: Number of physical Address bit reduction when +# @reduced-phys-bits: Number of physical address bit reduction when # SEV is enabled # # Since: 2.12 diff --git a/qapi/misc.json b/qapi/misc.json index dcf9f7d..4b9e601 100644 --- a/qapi/misc.json +++ b/qapi/misc.json @@ -222,8 +222,8 @@ # .. note:: This command only exists as a stop-gap. Its use is highly # discouraged. The semantics of this command are not guaranteed: # this means that command names, arguments and responses can change -# or be removed at ANY time. Applications that rely on long term -# stability guarantees should NOT use this command. +# or be removed at **any** time. Applications that rely on long +# term stability guarantees should **not** use this command. # # Known limitations: # diff --git a/qapi/net.json b/qapi/net.json index 310cc4f..97ea183 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -150,12 +150,12 @@ # @domainname: guest-visible domain name of the virtual nameserver # (since 3.0) # -# @ipv6-prefix: IPv6 network prefix (default is fec0::) (since 2.6). -# The network prefix is given in the usual hexadecimal IPv6 -# address notation. +# @ipv6-prefix: IPv6 network prefix (default is fec0::). The network +# prefix is given in the usual hexadecimal IPv6 address notation. +# (since 2.6) # -# @ipv6-prefixlen: IPv6 network prefix length (default is 64) (since -# 2.6) +# @ipv6-prefixlen: IPv6 network prefix length (default is 64) +# (since 2.6) # # @ipv6-host: guest-visible IPv6 address of the host (since 2.6) # @@ -387,8 +387,8 @@ # # @hubid: hub identifier number # -# @netdev: used to connect hub to a netdev instead of a device (since -# 2.12) +# @netdev: used to connect hub to a netdev instead of a device +# (since 2.12) # # Since: 1.2 ## @@ -510,8 +510,8 @@ # @queues: number of queues to be created for multiqueue vhost-vdpa # (default: 1) # -# @x-svq: Start device with (experimental) shadow virtqueue. (Since -# 7.1) (default: false) +# @x-svq: Start device with (experimental) shadow virtqueue. +# (Since 7.1) (default: false) # # Features: # diff --git a/qapi/qom.json b/qapi/qom.json index 04c118e..3e8debf 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -870,7 +870,7 @@ # information read from devices and switches in conjunction with # link characteristics read from PCIe Configuration space. # To get the full path latency from CPU to CXL attached DRAM -# CXL device: Add the latency from CPU to Generic Port (from +# CXL device: Add the latency from CPU to Generic Port (from # HMAT indexed via the node ID in this SRAT structure) to # that for CXL bus links, the latency across intermediate switches # and from the EP port to the actual memory. Bandwidth is more @@ -1048,6 +1048,39 @@ '*vcek-disabled': 'bool' } } ## +# @TdxGuestProperties: +# +# Properties for tdx-guest objects. +# +# @attributes: The 'attributes' of a TD guest that is passed to +# KVM_TDX_INIT_VM +# +# @sept-ve-disable: toggle bit 28 of TD attributes to control disabling +# of EPT violation conversion to #VE on guest TD access of PENDING +# pages. Some guest OS (e.g., Linux TD guest) may require this to +# be set, otherwise they refuse to boot. +# +# @mrconfigid: ID for non-owner-defined configuration of the guest TD, +# e.g., run-time or OS configuration (base64 encoded SHA384 digest). +# Defaults to all zeros. +# +# @mrowner: ID for the guest TD’s owner (base64 encoded SHA384 digest). +# Defaults to all zeros. +# +# @mrownerconfig: ID for owner-defined configuration of the guest TD, +# e.g., specific to the workload rather than the run-time or OS +# (base64 encoded SHA384 digest). Defaults to all zeros. +# +# Since: 10.1 +## +{ 'struct': 'TdxGuestProperties', + 'data': { '*attributes': 'uint64', + '*sept-ve-disable': 'bool', + '*mrconfigid': 'str', + '*mrowner': 'str', + '*mrownerconfig': 'str' } } + +## # @ThreadContextProperties: # # Properties for thread context objects. @@ -1132,6 +1165,7 @@ 'sev-snp-guest', 'thread-context', 's390-pv-guest', + 'tdx-guest', 'throttle-group', 'tls-creds-anon', 'tls-creds-psk', @@ -1204,6 +1238,7 @@ 'if': 'CONFIG_SECRET_KEYRING' }, 'sev-guest': 'SevGuestProperties', 'sev-snp-guest': 'SevSnpGuestProperties', + 'tdx-guest': 'TdxGuestProperties', 'thread-context': 'ThreadContextProperties', 'throttle-group': 'ThrottleGroupProperties', 'tls-creds-anon': 'TlsCredsAnonProperties', diff --git a/qapi/run-state.json b/qapi/run-state.json index ce95cfa..fd09beb 100644 --- a/qapi/run-state.json +++ b/qapi/run-state.json @@ -62,7 +62,7 @@ ## # @ShutdownCause: # -# An enumeration of reasons for a Shutdown. +# An enumeration of reasons for a shutdown. # # @none: No shutdown request pending # @@ -135,19 +135,19 @@ ## # @SHUTDOWN: # -# Emitted when the virtual machine has shut down, indicating that qemu +# Emitted when the virtual machine has shut down, indicating that QEMU # is about to exit. # # @guest: If true, the shutdown was triggered by a guest request (such # as a guest-initiated ACPI shutdown request or other # hardware-specific action) rather than a host request (such as -# sending qemu a SIGINT). (since 2.10) +# sending QEMU a SIGINT). (since 2.10) # # @reason: The @ShutdownCause which resulted in the SHUTDOWN. # (since 4.0) # # .. note:: If the command-line option ``-no-shutdown`` has been -# specified, qemu will not exit, and a STOP event will eventually +# specified, QEMU will not exit, and a STOP event will eventually # follow the SHUTDOWN event. # # Since: 0.12 @@ -365,8 +365,8 @@ # @shutdown: Shutdown the VM and exit, according to the shutdown # action # -# @exit-failure: Shutdown the VM and exit with nonzero status (since -# 7.1) +# @exit-failure: Shutdown the VM and exit with nonzero status +# (since 7.1) # # Since: 6.0 ## @@ -501,10 +501,12 @@ # # @s390: s390 guest panic information type (Since: 2.12) # +# @tdx: tdx guest panic information type (Since: 10.1) +# # Since: 2.9 ## { 'enum': 'GuestPanicInformationType', - 'data': [ 'hyper-v', 's390' ] } + 'data': [ 'hyper-v', 's390', 'tdx' ] } ## # @GuestPanicInformation: @@ -519,7 +521,8 @@ 'base': {'type': 'GuestPanicInformationType'}, 'discriminator': 'type', 'data': {'hyper-v': 'GuestPanicInformationHyperV', - 's390': 'GuestPanicInformationS390'}} + 's390': 'GuestPanicInformationS390', + 'tdx' : 'GuestPanicInformationTdx'}} ## # @GuestPanicInformationHyperV: @@ -599,6 +602,30 @@ 'reason': 'S390CrashReason'}} ## +# @GuestPanicInformationTdx: +# +# TDX Guest panic information specific to TDX, as specified in the +# "Guest-Hypervisor Communication Interface (GHCI) Specification", +# section TDG.VP.VMCALL<ReportFatalError>. +# +# @error-code: TD-specific error code +# +# @message: Human-readable error message provided by the guest. Not +# to be trusted. +# +# @gpa: guest-physical address of a page that contains more verbose +# error information, as zero-terminated string. Present when the +# "GPA valid" bit (bit 63) is set in @error-code. +# +# +# Since: 10.1 +## +{'struct': 'GuestPanicInformationTdx', + 'data': {'error-code': 'uint32', + 'message': 'str', + '*gpa': 'uint64'}} + +## # @MEMORY_FAILURE: # # Emitted when a memory failure occurs on host side. diff --git a/qapi/transaction.json b/qapi/transaction.json index 021e383..9d9e7af 100644 --- a/qapi/transaction.json +++ b/qapi/transaction.json @@ -21,7 +21,7 @@ ## # @ActionCompletionMode: # -# An enumeration of Transactional completion modes. +# An enumeration of transactional completion modes. # # @individual: Do not attempt to cancel any other Actions if any # Actions fail after the Transaction request succeeds. All @@ -223,7 +223,7 @@ # exists, the request will be rejected. Only some image formats # support it, for example, qcow2, and rbd, # -# On failure, qemu will try delete the newly created internal snapshot +# On failure, QEMU will try delete the newly created internal snapshot # in the transaction. When an I/O error occurs during deletion, the # user needs to fix it later with qemu-img or other command. # diff --git a/qapi/uefi.json b/qapi/uefi.json index bdfcabe..6592183 100644 --- a/qapi/uefi.json +++ b/qapi/uefi.json @@ -5,7 +5,7 @@ ## # = UEFI Variable Store # -# The qemu efi variable store implementation (hw/uefi/) uses this to +# The QEMU efi variable store implementation (hw/uefi/) uses this to # store non-volatile variables in json format on disk. # # This is an existing format already supported by (at least) two other diff --git a/qapi/ui.json b/qapi/ui.json index 3d0c853..514fa15 100644 --- a/qapi/ui.json +++ b/qapi/ui.json @@ -175,8 +175,8 @@ # @filename: the path of a new file to store the image # # @device: ID of the display device that should be dumped. If this -# parameter is missing, the primary display will be used. (Since -# 2.12) +# parameter is missing, the primary display will be used. +# (Since 2.12) # # @head: head to use in case the device supports multiple heads. If # this parameter is missing, head #0 will be used. Also note that @@ -1526,12 +1526,12 @@ # # Display (user interface) options. # -# @type: Which DisplayType qemu should use. +# @type: Which DisplayType QEMU should use. # # @full-screen: Start user interface in fullscreen mode # (default: off). # -# @window-close: Allow to quit qemu with window close button +# @window-close: Allow to quit QEMU with window close button # (default: on). # # @show-cursor: Force showing the mouse cursor (default: off). diff --git a/rust/hw/char/pl011/meson.build b/rust/hw/char/pl011/meson.build index 547cca5..494b6c1 100644 --- a/rust/hw/char/pl011/meson.build +++ b/rust/hw/char/pl011/meson.build @@ -1,17 +1,11 @@ -subproject('bilge-0.2-rs', required: true) -subproject('bilge-impl-0.2-rs', required: true) - -bilge_dep = dependency('bilge-0.2-rs') -bilge_impl_dep = dependency('bilge-impl-0.2-rs') - _libpl011_rs = static_library( 'pl011', files('src/lib.rs'), override_options: ['rust_std=2021', 'build.rust_std=2021'], rust_abi: 'rust', dependencies: [ - bilge_dep, - bilge_impl_dep, + bilge_rs, + bilge_impl_rs, qemu_api, qemu_api_macros, ], @@ -21,6 +15,6 @@ rust_devices_ss.add(when: 'CONFIG_X_PL011_RUST', if_true: [declare_dependency( link_whole: [_libpl011_rs], # Putting proc macro crates in `dependencies` is necessary for Meson to find # them when compiling the root per-target static rust lib. - dependencies: [bilge_impl_dep, qemu_api_macros], + dependencies: [bilge_impl_rs, qemu_api_macros], variables: {'crate': 'pl011'}, )]) diff --git a/rust/hw/char/pl011/src/device.rs b/rust/hw/char/pl011/src/device.rs index bde3be6..bd5cee0 100644 --- a/rust/hw/char/pl011/src/device.rs +++ b/rust/hw/char/pl011/src/device.rs @@ -480,13 +480,13 @@ impl PL011Registers { } impl PL011State { - /// Initializes a pre-allocated, unitialized instance of `PL011State`. + /// Initializes a pre-allocated, uninitialized instance of `PL011State`. /// /// # Safety /// /// `self` must point to a correctly sized and aligned location for the /// `PL011State` type. It must not be called more than once on the same - /// location/instance. All its fields are expected to hold unitialized + /// location/instance. All its fields are expected to hold uninitialized /// values with the sole exception of `parent_obj`. unsafe fn init(&mut self) { static PL011_OPS: MemoryRegionOps<PL011State> = MemoryRegionOpsBuilder::<PL011State>::new() diff --git a/rust/hw/timer/hpet/src/hpet.rs b/rust/hw/timer/hpet/src/device.rs index 779681d..e3ba62b 100644 --- a/rust/hw/timer/hpet/src/hpet.rs +++ b/rust/hw/timer/hpet/src/device.rs @@ -1,5 +1,5 @@ // Copyright (C) 2024 Intel Corporation. -// Author(s): Zhao Liu <zhai1.liu@intel.com> +// Author(s): Zhao Liu <zhao1.liu@intel.com> // SPDX-License-Identifier: GPL-2.0-or-later use std::{ diff --git a/rust/hw/timer/hpet/src/fw_cfg.rs b/rust/hw/timer/hpet/src/fw_cfg.rs index aa08d28..6c10316 100644 --- a/rust/hw/timer/hpet/src/fw_cfg.rs +++ b/rust/hw/timer/hpet/src/fw_cfg.rs @@ -1,5 +1,5 @@ // Copyright (C) 2024 Intel Corporation. -// Author(s): Zhao Liu <zhai1.liu@intel.com> +// Author(s): Zhao Liu <zhao1.liu@intel.com> // SPDX-License-Identifier: GPL-2.0-or-later use std::ptr::addr_of_mut; diff --git a/rust/hw/timer/hpet/src/lib.rs b/rust/hw/timer/hpet/src/lib.rs index 1954584..a95cf14 100644 --- a/rust/hw/timer/hpet/src/lib.rs +++ b/rust/hw/timer/hpet/src/lib.rs @@ -1,5 +1,5 @@ // Copyright (C) 2024 Intel Corporation. -// Author(s): Zhao Liu <zhai1.liu@intel.com> +// Author(s): Zhao Liu <zhao1.liu@intel.com> // SPDX-License-Identifier: GPL-2.0-or-later //! # HPET QEMU Device Model @@ -7,7 +7,7 @@ //! This library implements a device model for the IA-PC HPET (High //! Precision Event Timers) device in QEMU. +pub mod device; pub mod fw_cfg; -pub mod hpet; pub const TYPE_HPET: &::std::ffi::CStr = c"hpet"; diff --git a/rust/meson.build b/rust/meson.build index 91e52b8..1f0dcce 100644 --- a/rust/meson.build +++ b/rust/meson.build @@ -1,3 +1,19 @@ +subproject('bilge-0.2-rs', required: true) +subproject('bilge-impl-0.2-rs', required: true) +subproject('libc-0.2-rs', required: true) + +bilge_rs = dependency('bilge-0.2-rs') +bilge_impl_rs = dependency('bilge-impl-0.2-rs') +libc_rs = dependency('libc-0.2-rs') + +subproject('proc-macro2-1-rs', required: true) +subproject('quote-1-rs', required: true) +subproject('syn-2-rs', required: true) + +quote_rs_native = dependency('quote-1-rs', native: true) +syn_rs_native = dependency('syn-2-rs', native: true) +proc_macro2_rs_native = dependency('proc-macro2-1-rs', native: true) + subdir('qemu-api-macros') subdir('qemu-api') diff --git a/rust/qemu-api-macros/meson.build b/rust/qemu-api-macros/meson.build index 6f94a4b..8610ce1 100644 --- a/rust/qemu-api-macros/meson.build +++ b/rust/qemu-api-macros/meson.build @@ -1,11 +1,3 @@ -subproject('proc-macro2-1-rs', required: true) -subproject('quote-1-rs', required: true) -subproject('syn-2-rs', required: true) - -quote_dep = dependency('quote-1-rs', native: true) -syn_dep = dependency('syn-2-rs', native: true) -proc_macro2_dep = dependency('proc-macro2-1-rs', native: true) - _qemu_api_macros_rs = rust.proc_macro( 'qemu_api_macros', files('src/lib.rs'), @@ -16,9 +8,9 @@ _qemu_api_macros_rs = rust.proc_macro( '--cfg', 'feature="proc-macro"', ], dependencies: [ - proc_macro2_dep, - quote_dep, - syn_dep, + proc_macro2_rs_native, + quote_rs_native, + syn_rs_native, ], ) diff --git a/rust/qemu-api/meson.build b/rust/qemu-api/meson.build index 1696df7..1ea86b8 100644 --- a/rust/qemu-api/meson.build +++ b/rust/qemu-api/meson.build @@ -2,8 +2,6 @@ _qemu_api_cfg = run_command(rustc_args, '--config-headers', config_host_h, '--features', files('Cargo.toml'), capture: true, check: true).stdout().strip().splitlines() -libc_dep = dependency('libc-0.2-rs') - # _qemu_api_cfg += ['--cfg', 'feature="allocator"'] if get_option('debug_mutex') _qemu_api_cfg += ['--cfg', 'feature="debug_cell"'] @@ -37,7 +35,7 @@ _qemu_api_rs = static_library( override_options: ['rust_std=2021', 'build.rust_std=2021'], rust_abi: 'rust', rust_args: _qemu_api_cfg, - dependencies: [libc_dep, qemu_api_macros], + dependencies: [libc_rs, qemu_api_macros], ) rust.test('rust-qemu-api-tests', _qemu_api_rs, diff --git a/rust/qemu-api/src/bitops.rs b/rust/qemu-api/src/bitops.rs index 023ec1a..b1e3a53 100644 --- a/rust/qemu-api/src/bitops.rs +++ b/rust/qemu-api/src/bitops.rs @@ -1,5 +1,5 @@ // Copyright (C) 2024 Intel Corporation. -// Author(s): Zhao Liu <zhai1.liu@intel.com> +// Author(s): Zhao Liu <zhao1.liu@intel.com> // SPDX-License-Identifier: GPL-2.0-or-later //! This module provides bit operation extensions to integer types. diff --git a/rust/qemu-api/src/qom.rs b/rust/qemu-api/src/qom.rs index 41e5a5e..14f98fe 100644 --- a/rust/qemu-api/src/qom.rs +++ b/rust/qemu-api/src/qom.rs @@ -291,7 +291,7 @@ pub unsafe trait ObjectType: Sized { } /// Return the receiver as a const raw pointer to Object. - /// This is preferrable to `as_object_mut_ptr()` if a C + /// This is preferable to `as_object_mut_ptr()` if a C /// function only needs a `const Object *`. fn as_object_ptr(&self) -> *const bindings::Object { self.as_object().as_ptr() @@ -485,7 +485,7 @@ pub trait ObjectImpl: ObjectType + IsA<Object> { /// `INSTANCE_INIT` functions have been called. const INSTANCE_POST_INIT: Option<fn(&Self)> = None; - /// Called on descendent classes after all parent class initialization + /// Called on descendant classes after all parent class initialization /// has occurred, but before the class itself is initialized. This /// is only useful if a class is not a leaf, and can be used to undo /// the effects of copying the contents of the parent's class struct diff --git a/rust/qemu-api/src/timer.rs b/rust/qemu-api/src/timer.rs index 868bd88..0a2d111 100644 --- a/rust/qemu-api/src/timer.rs +++ b/rust/qemu-api/src/timer.rs @@ -1,5 +1,5 @@ // Copyright (C) 2024 Intel Corporation. -// Author(s): Zhao Liu <zhai1.liu@intel.com> +// Author(s): Zhao Liu <zhao1.liu@intel.com> // SPDX-License-Identifier: GPL-2.0-or-later use std::{ diff --git a/rust/qemu-api/src/vmstate.rs b/rust/qemu-api/src/vmstate.rs index 9c8b239..812f390 100644 --- a/rust/qemu-api/src/vmstate.rs +++ b/rust/qemu-api/src/vmstate.rs @@ -9,7 +9,7 @@ //! * [`vmstate_unused!`](crate::vmstate_unused) and //! [`vmstate_of!`](crate::vmstate_of), which are used to express the //! migration format for a struct. This is based on the [`VMState`] trait, -//! which is defined by all migrateable types. +//! which is defined by all migratable types. //! //! * [`impl_vmstate_forward`](crate::impl_vmstate_forward) and //! [`impl_vmstate_bitsized`](crate::impl_vmstate_bitsized), which help with diff --git a/rust/qemu-api/tests/vmstate_tests.rs b/rust/qemu-api/tests/vmstate_tests.rs index ad0fc5c..bded836 100644 --- a/rust/qemu-api/tests/vmstate_tests.rs +++ b/rust/qemu-api/tests/vmstate_tests.rs @@ -1,5 +1,5 @@ // Copyright (C) 2025 Intel Corporation. -// Author(s): Zhao Liu <zhai1.liu@intel.com> +// Author(s): Zhao Liu <zhao1.liu@intel.com> // SPDX-License-Identifier: GPL-2.0-or-later use std::{ diff --git a/scripts/tracetool/backend/simple.py b/scripts/tracetool/backend/simple.py index a74d61f..2688d4b 100644 --- a/scripts/tracetool/backend/simple.py +++ b/scripts/tracetool/backend/simple.py @@ -36,8 +36,17 @@ def generate_h_begin(events, group): def generate_h(event, group): - out(' _simple_%(api)s(%(args)s);', + event_id = 'TRACE_' + event.name.upper() + if "vcpu" in event.properties: + # already checked on the generic format code + cond = "true" + else: + cond = "trace_event_get_state(%s)" % event_id + out(' if (%(cond)s) {', + ' _simple_%(api)s(%(args)s);', + ' }', api=event.api(), + cond=cond, args=", ".join(event.args.names())) @@ -72,22 +81,10 @@ def generate_c(event, group): if len(event.args) == 0: sizestr = '0' - event_id = 'TRACE_' + event.name.upper() - if "vcpu" in event.properties: - # already checked on the generic format code - cond = "true" - else: - cond = "trace_event_get_state(%s)" % event_id - out('', - ' if (!%(cond)s) {', - ' return;', - ' }', - '', ' if (trace_record_start(&rec, %(event_obj)s.id, %(size_str)s)) {', ' return; /* Trace Buffer Full, Event Dropped ! */', ' }', - cond=cond, event_obj=event.api(event.QEMU_EVENT), size_str=sizestr) diff --git a/system/meson.build b/system/meson.build index c2f0082..7514bf3 100644 --- a/system/meson.build +++ b/system/meson.build @@ -7,7 +7,7 @@ system_ss.add(files( 'vl.c', ), sdl, libpmem, libdaxctl) -libsystem_ss.add(files( +system_ss.add(files( 'balloon.c', 'bootdevice.c', 'cpus.c', diff --git a/system/runstate.c b/system/runstate.c index de74d96..38900c9 100644 --- a/system/runstate.c +++ b/system/runstate.c @@ -590,6 +590,58 @@ static void qemu_system_wakeup(void) } } +static char *tdx_parse_panic_message(char *message) +{ + bool printable = false; + char *buf = NULL; + int len = 0, i; + + /* + * Although message is defined as a json string, we shouldn't + * unconditionally treat it as is because the guest generated it and + * it's not necessarily trustable. + */ + if (message) { + /* The caller guarantees the NULL-terminated string. */ + len = strlen(message); + + printable = len > 0; + for (i = 0; i < len; i++) { + if (!(0x20 <= message[i] && message[i] <= 0x7e)) { + printable = false; + break; + } + } + } + + if (len == 0) { + buf = g_malloc(1); + buf[0] = '\0'; + } else { + if (!printable) { + /* 3 = length of "%02x " */ + buf = g_malloc(len * 3); + for (i = 0; i < len; i++) { + if (message[i] == '\0') { + break; + } else { + sprintf(buf + 3 * i, "%02x ", message[i]); + } + } + if (i > 0) { + /* replace the last ' '(space) to NULL */ + buf[i * 3 - 1] = '\0'; + } else { + buf[0] = '\0'; + } + } else { + buf = g_strdup(message); + } + } + + return buf; +} + void qemu_system_guest_panicked(GuestPanicInformation *info) { qemu_log_mask(LOG_GUEST_ERROR, "Guest crashed"); @@ -631,7 +683,20 @@ void qemu_system_guest_panicked(GuestPanicInformation *info) S390CrashReason_str(info->u.s390.reason), info->u.s390.psw_mask, info->u.s390.psw_addr); + } else if (info->type == GUEST_PANIC_INFORMATION_TYPE_TDX) { + char *message = tdx_parse_panic_message(info->u.tdx.message); + qemu_log_mask(LOG_GUEST_ERROR, + "\nTDX guest reports fatal error." + " error code: 0x%" PRIx32 " error message:\"%s\"\n", + info->u.tdx.error_code, message); + g_free(message); + if (info->u.tdx.gpa != -1ull) { + qemu_log_mask(LOG_GUEST_ERROR, "Additional error information " + "can be found at gpa page: 0x%" PRIx64 "\n", + info->u.tdx.gpa); + } } + qapi_free_GuestPanicInformation(info); } } diff --git a/system/vl.c b/system/vl.c index fd402b8..3b7057e 100644 --- a/system/vl.c +++ b/system/vl.c @@ -1192,10 +1192,7 @@ static int parse_fw_cfg(void *opaque, QemuOpts *opts, Error **errp) return -1; } } - /* For legacy, keep user files in a specific global order. */ - fw_cfg_set_order_override(fw_cfg, FW_CFG_ORDER_OVERRIDE_USER); fw_cfg_add_file(fw_cfg, name, buf, size); - fw_cfg_reset_order_override(fw_cfg); return 0; } @@ -2745,7 +2742,6 @@ static void qemu_create_cli_devices(void) } /* init generic devices */ - rom_set_order_override(FW_CFG_ORDER_OVERRIDE_DEVICE); qemu_opts_foreach(qemu_find_opts("device"), device_init_func, NULL, &error_fatal); QTAILQ_FOREACH(opt, &device_opts, next) { @@ -2756,7 +2752,6 @@ static void qemu_create_cli_devices(void) assert(ret_data == NULL); /* error_fatal aborts */ loc_pop(&opt->loc); } - rom_reset_order_override(); } static bool qemu_machine_creation_done(Error **errp) diff --git a/target/arm/kvm.c b/target/arm/kvm.c index a2791aa..74fda8b 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -1846,6 +1846,11 @@ static int kvm_arm_sve_set_vls(ARMCPU *cpu) #define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { int ret; diff --git a/target/arm/meson.build b/target/arm/meson.build index dcba4ef..7aa81e3 100644 --- a/target/arm/meson.build +++ b/target/arm/meson.build @@ -27,7 +27,7 @@ arm_user_ss.add(files( 'vfp_fpscr.c', )) -arm_common_system_ss.add(files('cpu.c'), capstone) +arm_common_system_ss.add(files('cpu.c')) arm_common_system_ss.add(when: 'TARGET_AARCH64', if_false: files( 'cpu32-stubs.c')) arm_common_system_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c')) diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h index 164be76..48b88db 100644 --- a/target/i386/confidential-guest.h +++ b/target/i386/confidential-guest.h @@ -39,8 +39,10 @@ struct X86ConfidentialGuestClass { /* <public> */ int (*kvm_type)(X86ConfidentialGuest *cg); - uint32_t (*mask_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, - int reg, uint32_t value); + void (*cpu_instance_init)(X86ConfidentialGuest *cg, CPUState *cpu); + uint32_t (*adjust_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, + uint32_t index, int reg, uint32_t value); + int (*check_features)(X86ConfidentialGuest *cg, CPUState *cs); }; /** @@ -59,25 +61,47 @@ static inline int x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg) } } +static inline void x86_confidential_guest_cpu_instance_init(X86ConfidentialGuest *cg, + CPUState *cpu) +{ + X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); + + if (klass->cpu_instance_init) { + klass->cpu_instance_init(cg, cpu); + } +} + /** - * x86_confidential_guest_mask_cpuid_features: + * x86_confidential_guest_adjust_cpuid_features: * - * Removes unsupported features from a confidential guest's CPUID values, returns - * the value with the bits removed. The bits removed should be those that KVM - * provides independent of host-supported CPUID features, but are not supported by - * the confidential computing firmware. + * Adjust the supported features from a confidential guest's CPUID values, + * returns the adjusted value. There are bits being removed that are not + * supported by the confidential computing firmware or bits being added that + * are forcibly exposed to guest by the confidential computing firmware. */ -static inline int x86_confidential_guest_mask_cpuid_features(X86ConfidentialGuest *cg, +static inline int x86_confidential_guest_adjust_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, int reg, uint32_t value) { X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); - if (klass->mask_cpuid_features) { - return klass->mask_cpuid_features(cg, feature, index, reg, value); + if (klass->adjust_cpuid_features) { + return klass->adjust_cpuid_features(cg, feature, index, reg, value); } else { return value; } } +static inline int x86_confidential_guest_check_features(X86ConfidentialGuest *cg, + CPUState *cs) +{ + X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); + + if (klass->check_features) { + return klass->check_features(cg, cs); + } + + return 0; +} + #endif diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 33afc3e..c9bd344 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -37,6 +37,7 @@ #include "hw/i386/topology.h" #include "exec/watchpoint.h" #ifndef CONFIG_USER_ONLY +#include "confidential-guest.h" #include "system/reset.h" #include "qapi/qapi-commands-machine.h" #include "system/address-spaces.h" @@ -1252,12 +1253,12 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { [FEAT_8000_0021_EAX] = { .type = CPUID_FEATURE_WORD, .feat_names = { - "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, + "no-nested-data-bp", "fs-gs-base-ns", "lfence-always-serializing", NULL, NULL, NULL, "null-sel-clr-base", NULL, "auto-ibrs", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + "prefetchi", NULL, NULL, NULL, "eraps", NULL, NULL, "sbpb", "ibpb-brtype", "srso-no", "srso-user-kernel-no", NULL, }, @@ -1677,14 +1678,21 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, }; -typedef struct FeatureMask { - FeatureWord index; - uint64_t mask; -} FeatureMask; +bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg) +{ + FeatureWordInfo *wi; + FeatureWord w; -typedef struct FeatureDep { - FeatureMask from, to; -} FeatureDep; + for (w = 0; w < FEATURE_WORDS; w++) { + wi = &feature_word_info[w]; + if (wi->type == CPUID_FEATURE_WORD && wi->cpuid.eax == feature && + (!wi->cpuid.needs_ecx || wi->cpuid.ecx == index) && + wi->cpuid.reg == reg) { + return true; + } + } + return false; +} static FeatureDep feature_dependencies[] = { { @@ -1854,9 +1862,6 @@ static const X86RegisterInfo32 x86_reg_info_32[CPU_NB_REGS32] = { }; #undef REGISTER -/* CPUID feature bits available in XSS */ -#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK) - ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = { [XSTATE_FP_BIT] = { /* x87 FP state component is always enabled if XSAVE is supported */ @@ -2206,6 +2211,60 @@ static CPUCaches epyc_v4_cache_info = { }, }; +static CPUCaches epyc_v5_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 64 * KiB, + .line_size = 64, + .associativity = 4, + .partitions = 1, + .sets = 256, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 8 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 8192, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + static const CPUCaches epyc_rome_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -2314,6 +2373,60 @@ static const CPUCaches epyc_rome_v3_cache_info = { }, }; +static const CPUCaches epyc_rome_v5_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 16384, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + static const CPUCaches epyc_milan_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -2422,6 +2535,60 @@ static const CPUCaches epyc_milan_v2_cache_info = { }, }; +static const CPUCaches epyc_milan_v3_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + static const CPUCaches epyc_genoa_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -2476,6 +2643,114 @@ static const CPUCaches epyc_genoa_cache_info = { }, }; +static const CPUCaches epyc_genoa_v2_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 1 * MiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 2048, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + +static const CPUCaches epyc_turin_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 48 * KiB, + .line_size = 64, + .associativity = 12, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 1 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + /* The following VMX features are not supported by KVM and are left out in the * CPU definitions: * @@ -5233,6 +5508,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, .cache_info = &epyc_v4_cache_info }, + { + .version = 5, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "model-id", + "AMD EPYC-v5 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_v5_cache_info + }, { /* end of list */ } } }, @@ -5371,6 +5665,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } }, }, + { + .version = 5, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "model-id", + "AMD EPYC-Rome-v5 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_rome_v5_cache_info + }, { /* end of list */ } } }, @@ -5446,6 +5759,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, .cache_info = &epyc_milan_v2_cache_info }, + { + .version = 3, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "model-id", + "AMD EPYC-Milan-v3 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_milan_v3_cache_info + }, { /* end of list */ } } }, @@ -5520,6 +5852,31 @@ static const X86CPUDefinition builtin_x86_defs[] = { .xlevel = 0x80000022, .model_id = "AMD EPYC-Genoa Processor", .cache_info = &epyc_genoa_cache_info, + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "fs-gs-base-ns", "on" }, + { "perfmon-v2", "on" }, + { "model-id", + "AMD EPYC-Genoa-v2 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_genoa_v2_cache_info + }, + { /* end of list */ } + } }, { .name = "YongFeng", @@ -5657,6 +6014,89 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .name = "EPYC-Turin", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 26, + .model = 0, + .stepping = 0, + .features[FEAT_1_ECX] = + CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | + CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | + CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | + CPUID_EXT_PCID | CPUID_EXT_CX16 | CPUID_EXT_FMA | + CPUID_EXT_SSSE3 | CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | + CPUID_EXT_SSE3, + .features[FEAT_1_EDX] = + CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | + CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | + CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | + CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | + CPUID_VME | CPUID_FP87, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | + CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_AVX512F | + CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA | + CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | + CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | + CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 | + CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_MOVDIRI | + CPUID_7_0_ECX_MOVDIR64B, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_AVX512_VP2INTERSECT, + .features[FEAT_7_1_EAX] = + CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | + CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | + CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | + CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | + CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0007_EBX] = + CPUID_8000_0007_EBX_OVERFLOW_RECOV | CPUID_8000_0007_EBX_SUCCOR, + .features[FEAT_8000_0008_EBX] = + CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | + CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | + CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | + CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | + CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD, + .features[FEAT_8000_0021_EAX] = + CPUID_8000_0021_EAX_NO_NESTED_DATA_BP | + CPUID_8000_0021_EAX_FS_GS_BASE_NS | + CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING | + CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | + CPUID_8000_0021_EAX_AUTO_IBRS | CPUID_8000_0021_EAX_PREFETCHI | + CPUID_8000_0021_EAX_SBPB | CPUID_8000_0021_EAX_IBPB_BRTYPE | + CPUID_8000_0021_EAX_SRSO_USER_KERNEL_NO, + .features[FEAT_8000_0022_EAX] = + CPUID_8000_0022_EAX_PERFMON_V2, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, + .features[FEAT_SVM] = + CPUID_SVM_NPT | CPUID_SVM_LBRV | CPUID_SVM_NRIPSAVE | + CPUID_SVM_TSCSCALE | CPUID_SVM_VMCBCLEAN | CPUID_SVM_FLUSHASID | + CPUID_SVM_PAUSEFILTER | CPUID_SVM_PFTHRESHOLD | + CPUID_SVM_V_VMSAVE_VMLOAD | CPUID_SVM_VGIF | + CPUID_SVM_VNMI | CPUID_SVM_SVME_ADDR_CHK, + .xlevel = 0x80000022, + .model_id = "AMD EPYC-Turin Processor", + .cache_info = &epyc_turin_cache_info, + }, }; /* @@ -5766,7 +6206,7 @@ static const TypeInfo max_x86_cpu_type_info = { .class_init = max_x86_cpu_class_init, }; -static char *feature_word_description(FeatureWordInfo *f, uint32_t bit) +static char *feature_word_description(FeatureWordInfo *f) { assert(f->type == CPUID_FEATURE_WORD || f->type == MSR_FEATURE_WORD); @@ -5775,11 +6215,15 @@ static char *feature_word_description(FeatureWordInfo *f, uint32_t bit) { const char *reg = get_register_name_32(f->cpuid.reg); assert(reg); - return g_strdup_printf("CPUID.%02XH:%s", - f->cpuid.eax, reg); + if (!f->cpuid.needs_ecx) { + return g_strdup_printf("CPUID[eax=%02Xh].%s", f->cpuid.eax, reg); + } else { + return g_strdup_printf("CPUID[eax=%02Xh,ecx=%02Xh].%s", + f->cpuid.eax, f->cpuid.ecx, reg); + } } case MSR_FEATURE_WORD: - return g_strdup_printf("MSR(%02XH)", + return g_strdup_printf("MSR(%02Xh)", f->msr.index); } @@ -5799,12 +6243,13 @@ static bool x86_cpu_have_filtered_features(X86CPU *cpu) return false; } -static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, - const char *verbose_prefix) +void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix) { CPUX86State *env = &cpu->env; FeatureWordInfo *f = &feature_word_info[w]; int i; + g_autofree char *feat_word_str = feature_word_description(f); if (!cpu->force_features) { env->features[w] &= ~mask; @@ -5817,7 +6262,35 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, for (i = 0; i < 64; ++i) { if ((1ULL << i) & mask) { - g_autofree char *feat_word_str = feature_word_description(f, i); + warn_report("%s: %s%s%s [bit %d]", + verbose_prefix, + feat_word_str, + f->feat_names[i] ? "." : "", + f->feat_names[i] ? f->feat_names[i] : "", i); + } + } +} + +void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix) +{ + CPUX86State *env = &cpu->env; + FeatureWordInfo *f = &feature_word_info[w]; + int i; + + if (!cpu->force_features) { + env->features[w] |= mask; + } + + cpu->forced_on_features[w] |= mask; + + if (!verbose_prefix) { + return; + } + + for (i = 0; i < 64; ++i) { + if ((1ULL << i) & mask) { + g_autofree char *feat_word_str = feature_word_description(f); warn_report("%s: %s%s%s [bit %d]", verbose_prefix, feat_word_str, @@ -7044,7 +7517,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0x1F: /* V2 Extended Topology Enumeration Leaf */ - if (!x86_has_extended_topo(env->avail_cpu_topo)) { + if (!x86_has_cpuid_0x1f(cpu)) { *eax = *ebx = *ecx = *edx = 0; break; } @@ -7908,7 +8381,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) * cpu->vendor_cpuid_only has been unset for compatibility with older * machine types. */ - if (x86_has_extended_topo(env->avail_cpu_topo) && + if (x86_has_cpuid_0x1f(cpu) && (IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) { x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F); } @@ -8543,6 +9016,13 @@ static void x86_cpu_post_initfn(Object *obj) } accel_cpu_instance_init(CPU(obj)); + +#ifndef CONFIG_USER_ONLY + if (current_machine && current_machine->cgs) { + x86_confidential_guest_cpu_instance_init( + X86_CONFIDENTIAL_GUEST(current_machine->cgs), (CPU(obj))); + } +#endif } static void x86_cpu_init_default_topo(X86CPU *cpu) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index c51e0a4..1146465 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -584,6 +584,7 @@ typedef enum X86Seg { #define XSTATE_OPMASK_BIT 5 #define XSTATE_ZMM_Hi256_BIT 6 #define XSTATE_Hi16_ZMM_BIT 7 +#define XSTATE_PT_BIT 8 #define XSTATE_PKRU_BIT 9 #define XSTATE_ARCH_LBR_BIT 15 #define XSTATE_XTILE_CFG_BIT 17 @@ -597,6 +598,7 @@ typedef enum X86Seg { #define XSTATE_OPMASK_MASK (1ULL << XSTATE_OPMASK_BIT) #define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT) #define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT) +#define XSTATE_PT_MASK (1ULL << XSTATE_PT_BIT) #define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT) #define XSTATE_ARCH_LBR_MASK (1ULL << XSTATE_ARCH_LBR_BIT) #define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT) @@ -619,6 +621,11 @@ typedef enum X86Seg { XSTATE_Hi16_ZMM_MASK | XSTATE_PKRU_MASK | \ XSTATE_XTILE_CFG_MASK | XSTATE_XTILE_DATA_MASK) +/* CPUID feature bits available in XSS */ +#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK) + +#define CPUID_XSTATE_MASK (CPUID_XSTATE_XCR0_MASK | CPUID_XSTATE_XSS_MASK) + /* CPUID feature words */ typedef enum FeatureWord { FEAT_1_EDX, /* CPUID[1].EDX */ @@ -667,6 +674,15 @@ typedef enum FeatureWord { FEATURE_WORDS, } FeatureWord; +typedef struct FeatureMask { + FeatureWord index; + uint64_t mask; +} FeatureMask; + +typedef struct FeatureDep { + FeatureMask from, to; +} FeatureDep; + typedef uint64_t FeatureWordArray[FEATURE_WORDS]; uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); @@ -899,6 +915,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_0_ECX_LA57 (1U << 16) /* Read Processor ID */ #define CPUID_7_0_ECX_RDPID (1U << 22) +/* KeyLocker */ +#define CPUID_7_0_ECX_KeyLocker (1U << 23) /* Bus Lock Debug Exception */ #define CPUID_7_0_ECX_BUS_LOCK_DETECT (1U << 24) /* Cache Line Demote Instruction */ @@ -920,6 +938,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_0_EDX_FSRM (1U << 4) /* AVX512 Vector Pair Intersection to a Pair of Mask Registers */ #define CPUID_7_0_EDX_AVX512_VP2INTERSECT (1U << 8) + /* "md_clear" VERW clears CPU buffers */ +#define CPUID_7_0_EDX_MD_CLEAR (1U << 10) /* SERIALIZE instruction */ #define CPUID_7_0_EDX_SERIALIZE (1U << 14) /* TSX Suspend Load Address Tracking instruction */ @@ -957,6 +977,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_1_EAX_AVX_VNNI (1U << 4) /* AVX512 BFloat16 Instruction */ #define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) +/* Linear address space separation */ +#define CPUID_7_1_EAX_LASS (1U << 6) /* CMPCCXADD Instructions */ #define CPUID_7_1_EAX_CMPCCXADD (1U << 7) /* Fast Zero REP MOVS */ @@ -1070,12 +1092,16 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* Processor ignores nested data breakpoints */ #define CPUID_8000_0021_EAX_NO_NESTED_DATA_BP (1U << 0) +/* WRMSR to FS_BASE, GS_BASE, or KERNEL_GS_BASE is non-serializing */ +#define CPUID_8000_0021_EAX_FS_GS_BASE_NS (1U << 1) /* LFENCE is always serializing */ #define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) /* Null Selector Clears Base */ #define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) /* Automatic IBRS */ #define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) +/* Indicates support for IC prefetch */ +#define CPUID_8000_0021_EAX_PREFETCHI (1U << 20) /* Enhanced Return Address Predictor Scurity */ #define CPUID_8000_0021_EAX_ERAPS (1U << 24) /* Selective Branch Predictor Barrier */ @@ -1100,6 +1126,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_XSAVE_XSAVEC (1U << 1) #define CPUID_XSAVE_XGETBV1 (1U << 2) #define CPUID_XSAVE_XSAVES (1U << 3) +#define CPUID_XSAVE_XFD (1U << 4) #define CPUID_6_EAX_ARAT (1U << 2) @@ -2192,6 +2219,9 @@ struct ArchCPU { /* Features that were filtered out because of missing host capabilities */ FeatureWordArray filtered_features; + /* Features that are forced enabled by underlying hypervisor, e.g., TDX */ + FeatureWordArray forced_on_features; + /* Enable PMU CPUID bits. This can't be enabled by default yet because * it doesn't have ABI stability guarantees, as it passes all PMU CPUID * bits returned by GET_SUPPORTED_CPUID (that depend on host CPU and kernel @@ -2239,6 +2269,9 @@ struct ArchCPU { /* Compatibility bits for old machine types: */ bool enable_cpuid_0xb; + /* Force to enable cpuid 0x1f */ + bool enable_cpuid_0x1f; + /* Enable auto level-increase for all CPUID leaves */ bool full_cpuid_auto_level; @@ -2499,6 +2532,17 @@ void cpu_set_apic_feature(CPUX86State *env); void host_cpuid(uint32_t function, uint32_t count, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); bool cpu_has_x2apic_feature(CPUX86State *env); +bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg); +void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix); +void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix); + +static inline bool x86_has_cpuid_0x1f(X86CPU *cpu) +{ + return cpu->enable_cpuid_0x1f || + x86_has_extended_topo(cpu->env.avail_cpu_topo); +} /* helper.c */ void x86_cpu_set_a20(X86CPU *cpu, int a20_state); diff --git a/target/i386/emulate/x86_flags.c b/target/i386/emulate/x86_flags.c index 47bc197..cc138c7 100644 --- a/target/i386/emulate/x86_flags.c +++ b/target/i386/emulate/x86_flags.c @@ -255,19 +255,19 @@ void lflags_to_rflags(CPUX86State *env) void rflags_to_lflags(CPUX86State *env) { - target_ulong cf_xor_of; + target_ulong cf_af, cf_xor_of; + /* Leave the low byte zero so that parity is always even... */ + env->cc_dst = !(env->eflags & CC_Z) << 8; + + /* ... and therefore cc_src always uses opposite polarity. */ env->cc_src = CC_P; env->cc_src ^= env->eflags & (CC_S | CC_P); /* rotate right by one to move CF and AF into the carry-out positions */ - env->cc_src |= ( - (env->eflags >> 1) | - (env->eflags << (TARGET_LONG_BITS - 1))) & (CC_C | CC_A); + cf_af = env->eflags & (CC_C | CC_A); + env->cc_src |= ((cf_af >> 1) | (cf_af << (TARGET_LONG_BITS - 1))); - cf_xor_of = (env->eflags & (CC_C | CC_O)) + (CC_O - CC_C); + cf_xor_of = ((env->eflags & (CC_C | CC_O)) + (CC_O - CC_C)) & CC_O; env->cc_src |= -cf_xor_of & LF_MASK_PO; - - /* Leave the low byte zero so that parity is not affected. */ - env->cc_dst = !(env->eflags & CC_Z) << 8; } diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c index a2d3830..7512567 100644 --- a/target/i386/host-cpu.c +++ b/target/i386/host-cpu.c @@ -15,7 +15,7 @@ #include "system/system.h" /* Note: Only safe for use on x86(-64) hosts */ -static uint32_t host_cpu_phys_bits(void) +uint32_t host_cpu_phys_bits(void) { uint32_t eax; uint32_t host_phys_bits; diff --git a/target/i386/host-cpu.h b/target/i386/host-cpu.h index 6a9bc91..b97ec01 100644 --- a/target/i386/host-cpu.h +++ b/target/i386/host-cpu.h @@ -10,6 +10,7 @@ #ifndef HOST_CPU_H #define HOST_CPU_H +uint32_t host_cpu_phys_bits(void); void host_cpu_instance_init(X86CPU *cpu); void host_cpu_max_instance_init(X86CPU *cpu); bool host_cpu_realizefn(CPUState *cs, Error **errp); diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index c9a3c02..a6bc089 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -38,6 +38,7 @@ #include "kvm_i386.h" #include "../confidential-guest.h" #include "sev.h" +#include "tdx.h" #include "xen-emu.h" #include "hyperv.h" #include "hyperv-proto.h" @@ -192,6 +193,7 @@ static const char *vm_type_name[] = { [KVM_X86_SEV_VM] = "SEV", [KVM_X86_SEV_ES_VM] = "SEV-ES", [KVM_X86_SNP_VM] = "SEV-SNP", + [KVM_X86_TDX_VM] = "TDX", }; bool kvm_is_vm_type_supported(int type) @@ -326,7 +328,7 @@ void kvm_synchronize_all_tsc(void) { CPUState *cpu; - if (kvm_enabled()) { + if (kvm_enabled() && !is_tdx_vm()) { CPU_FOREACH(cpu) { run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL); } @@ -392,7 +394,7 @@ static bool host_tsx_broken(void) /* Returns the value for a specific register on the cpuid entry */ -static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) +uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) { uint32_t ret = 0; switch (reg) { @@ -414,9 +416,9 @@ static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) /* Find matching entry for function/index on kvm_cpuid2 struct */ -static struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, - uint32_t function, - uint32_t index) +struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, + uint32_t function, + uint32_t index) { int i; for (i = 0; i < cpuid->nent; ++i) { @@ -572,7 +574,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, } if (current_machine->cgs) { - ret = x86_confidential_guest_mask_cpuid_features( + ret = x86_confidential_guest_adjust_cpuid_features( X86_CONFIDENTIAL_GUEST(current_machine->cgs), function, index, reg, ret); } @@ -868,6 +870,15 @@ static int kvm_arch_set_tsc_khz(CPUState *cs) int r, cur_freq; bool set_ioctl = false; + /* + * TSC of TD vcpu is immutable, it cannot be set/changed via vcpu scope + * VM_SET_TSC_KHZ, but only be initialized via VM scope VM_SET_TSC_KHZ + * before ioctl KVM_TDX_INIT_VM in tdx_pre_create_vcpu() + */ + if (is_tdx_vm()) { + return 0; + } + if (!env->tsc_khz) { return 0; } @@ -1779,8 +1790,6 @@ static int hyperv_init_vcpu(X86CPU *cpu) static Error *invtsc_mig_blocker; -#define KVM_MAX_CPUID_ENTRIES 100 - static void kvm_init_xsave(CPUX86State *env) { if (has_xsave2) { @@ -1823,9 +1832,8 @@ static void kvm_init_nested_state(CPUX86State *env) } } -static uint32_t kvm_x86_build_cpuid(CPUX86State *env, - struct kvm_cpuid_entry2 *entries, - uint32_t cpuid_i) +uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries, + uint32_t cpuid_i) { uint32_t limit, i, j; uint32_t unused; @@ -1864,7 +1872,7 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env, break; } case 0x1f: - if (!x86_has_extended_topo(env->avail_cpu_topo)) { + if (!x86_has_cpuid_0x1f(env_archcpu(env))) { cpuid_i--; break; } @@ -2052,6 +2060,15 @@ full: abort(); } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + if (is_tdx_vm()) { + return tdx_pre_create_vcpu(cpu, errp); + } + + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { struct { @@ -2076,6 +2093,14 @@ int kvm_arch_init_vcpu(CPUState *cs) int r; Error *local_err = NULL; + if (current_machine->cgs) { + r = x86_confidential_guest_check_features( + X86_CONFIDENTIAL_GUEST(current_machine->cgs), cs); + if (r < 0) { + return r; + } + } + memset(&cpuid_data, 0, sizeof(cpuid_data)); cpuid_i = 0; @@ -3206,16 +3231,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) Error *local_err = NULL; /* - * Initialize SEV context, if required - * - * If no memory encryption is requested (ms->cgs == NULL) this is - * a no-op. - * - * It's also a no-op if a non-SEV confidential guest support - * mechanism is selected. SEV is the only mechanism available to - * select on x86 at present, so this doesn't arise, but if new - * mechanisms are supported in future (e.g. TDX), they'll need - * their own initialization either here or elsewhere. + * Initialize confidential guest (SEV/TDX) context, if required */ if (ms->cgs) { ret = confidential_guest_kvm_init(ms->cgs, &local_err); @@ -3856,32 +3872,34 @@ static void kvm_init_msrs(X86CPU *cpu) CPUX86State *env = &cpu->env; kvm_msr_buf_reset(cpu); - if (has_msr_arch_capabs) { - kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, - env->features[FEAT_ARCH_CAPABILITIES]); - } - if (has_msr_core_capabs) { - kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, - env->features[FEAT_CORE_CAPABILITY]); - } + if (!is_tdx_vm()) { + if (has_msr_arch_capabs) { + kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, + env->features[FEAT_ARCH_CAPABILITIES]); + } + + if (has_msr_core_capabs) { + kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, + env->features[FEAT_CORE_CAPABILITY]); + } + + if (has_msr_perf_capabs && cpu->enable_pmu) { + kvm_msr_entry_add_perf(cpu, env->features); + } - if (has_msr_perf_capabs && cpu->enable_pmu) { - kvm_msr_entry_add_perf(cpu, env->features); + /* + * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but + * all kernels with MSR features should have them. + */ + if (kvm_feature_msrs && cpu_has_vmx(env)) { + kvm_msr_entry_add_vmx(cpu, env->features); + } } if (has_msr_ucode_rev) { kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); } - - /* - * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but - * all kernels with MSR features should have them. - */ - if (kvm_feature_msrs && cpu_has_vmx(env)) { - kvm_msr_entry_add_vmx(cpu, env->features); - } - assert(kvm_buf_set_msrs(cpu) == 0); } @@ -6121,6 +6139,16 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) case KVM_EXIT_HYPERCALL: ret = kvm_handle_hypercall(run); break; + case KVM_EXIT_SYSTEM_EVENT: + switch (run->system_event.type) { + case KVM_SYSTEM_EVENT_TDX_FATAL: + ret = tdx_handle_report_fatal_error(cpu, run); + break; + default: + ret = -1; + break; + } + break; default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); ret = -1; diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index 88565e8..5f83e88 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -13,6 +13,8 @@ #include "system/kvm.h" +#define KVM_MAX_CPUID_ENTRIES 100 + /* always false if !CONFIG_KVM */ #define kvm_pit_in_kernel() \ (kvm_irqchip_in_kernel() && !kvm_irqchip_is_split()) @@ -42,6 +44,13 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); #ifdef CONFIG_KVM +#include <linux/kvm.h> + +typedef struct KvmCpuidInfo { + struct kvm_cpuid2 cpuid; + struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; +} KvmCpuidInfo; + bool kvm_is_vm_type_supported(int type); bool kvm_has_adjust_clock_stable(void); bool kvm_has_exception_payload(void); @@ -57,6 +66,12 @@ uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address); void kvm_update_msi_routes_all(void *private, bool global, uint32_t index, uint32_t mask); +struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, + uint32_t function, + uint32_t index); +uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg); +uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries, + uint32_t cpuid_i); #endif /* CONFIG_KVM */ void kvm_pc_setup_irq_routing(bool pci_enabled); diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build index 3996caf..3f44cde 100644 --- a/target/i386/kvm/meson.build +++ b/target/i386/kvm/meson.build @@ -8,6 +8,8 @@ i386_kvm_ss.add(files( i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c')) +i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c'), if_false: files('tdx-stub.c')) + i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c')) i386_system_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss) diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c new file mode 100644 index 0000000..720a4ff --- /dev/null +++ b/target/i386/kvm/tdx-stub.c @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" + +#include "tdx.h" + +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return -EINVAL; +} + +int tdx_parse_tdvf(void *flash_ptr, int size) +{ + return -EINVAL; +} + +int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) +{ + return -EINVAL; +} diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c new file mode 100644 index 0000000..0a21ae5 --- /dev/null +++ b/target/i386/kvm/tdx.c @@ -0,0 +1,1289 @@ +/* + * QEMU TDX support + * + * Copyright (c) 2025 Intel Corporation + * + * Author: + * Xiaoyao Li <xiaoyao.li@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qemu/base64.h" +#include "qemu/mmap-alloc.h" +#include "qapi/error.h" +#include "qom/object_interfaces.h" +#include "crypto/hash.h" +#include "system/kvm_int.h" +#include "system/runstate.h" +#include "system/system.h" +#include "system/ramblock.h" + +#include <linux/kvm_para.h> + +#include "cpu.h" +#include "cpu-internal.h" +#include "host-cpu.h" +#include "hw/i386/e820_memory_layout.h" +#include "hw/i386/tdvf.h" +#include "hw/i386/x86.h" +#include "hw/i386/tdvf-hob.h" +#include "kvm_i386.h" +#include "tdx.h" + +#include "standard-headers/asm-x86/kvm_para.h" + +#define TDX_MIN_TSC_FREQUENCY_KHZ (100 * 1000) +#define TDX_MAX_TSC_FREQUENCY_KHZ (10 * 1000 * 1000) + +#define TDX_TD_ATTRIBUTES_DEBUG BIT_ULL(0) +#define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28) +#define TDX_TD_ATTRIBUTES_PKS BIT_ULL(30) +#define TDX_TD_ATTRIBUTES_PERFMON BIT_ULL(63) + +#define TDX_SUPPORTED_TD_ATTRS (TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE |\ + TDX_TD_ATTRIBUTES_PKS | \ + TDX_TD_ATTRIBUTES_PERFMON) + +#define TDX_SUPPORTED_KVM_FEATURES ((1U << KVM_FEATURE_NOP_IO_DELAY) | \ + (1U << KVM_FEATURE_PV_UNHALT) | \ + (1U << KVM_FEATURE_PV_TLB_FLUSH) | \ + (1U << KVM_FEATURE_PV_SEND_IPI) | \ + (1U << KVM_FEATURE_POLL_CONTROL) | \ + (1U << KVM_FEATURE_PV_SCHED_YIELD) | \ + (1U << KVM_FEATURE_MSI_EXT_DEST_ID)) + +static TdxGuest *tdx_guest; + +static struct kvm_tdx_capabilities *tdx_caps; +static struct kvm_cpuid2 *tdx_supported_cpuid; + +/* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */ +bool is_tdx_vm(void) +{ + return !!tdx_guest; +} + +enum tdx_ioctl_level { + TDX_VM_IOCTL, + TDX_VCPU_IOCTL, +}; + +static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state, + int cmd_id, __u32 flags, void *data, + Error **errp) +{ + struct kvm_tdx_cmd tdx_cmd = {}; + int r; + + const char *tdx_ioctl_name[] = { + [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES", + [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM", + [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU", + [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION", + [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM", + [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID", + }; + + tdx_cmd.id = cmd_id; + tdx_cmd.flags = flags; + tdx_cmd.data = (__u64)(unsigned long)data; + + switch (level) { + case TDX_VM_IOCTL: + r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); + break; + case TDX_VCPU_IOCTL: + r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); + break; + default: + error_setg(errp, "Invalid tdx_ioctl_level %d", level); + return -EINVAL; + } + + if (r < 0) { + error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx", + tdx_ioctl_name[cmd_id], tdx_cmd.hw_error); + } + return r; +} + +static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data, + Error **errp) +{ + return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp); +} + +static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags, + void *data, Error **errp) +{ + return tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp); +} + +static int get_tdx_capabilities(Error **errp) +{ + struct kvm_tdx_capabilities *caps; + /* 1st generation of TDX reports 6 cpuid configs */ + int nr_cpuid_configs = 6; + size_t size; + int r; + + do { + Error *local_err = NULL; + size = sizeof(struct kvm_tdx_capabilities) + + nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2); + caps = g_malloc0(size); + caps->cpuid.nent = nr_cpuid_configs; + + r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err); + if (r == -E2BIG) { + g_free(caps); + nr_cpuid_configs *= 2; + if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) { + error_report("KVM TDX seems broken that number of CPUID entries" + " in kvm_tdx_capabilities exceeds limit: %d", + KVM_MAX_CPUID_ENTRIES); + error_propagate(errp, local_err); + return r; + } + error_free(local_err); + } else if (r < 0) { + g_free(caps); + error_propagate(errp, local_err); + return r; + } + } while (r == -E2BIG); + + tdx_caps = caps; + + return 0; +} + +void tdx_set_tdvf_region(MemoryRegion *tdvf_mr) +{ + assert(!tdx_guest->tdvf_mr); + tdx_guest->tdvf_mr = tdvf_mr; +} + +static TdxFirmwareEntry *tdx_get_hob_entry(TdxGuest *tdx) +{ + TdxFirmwareEntry *entry; + + for_each_tdx_fw_entry(&tdx->tdvf, entry) { + if (entry->type == TDVF_SECTION_TYPE_TD_HOB) { + return entry; + } + } + error_report("TDVF metadata doesn't specify TD_HOB location."); + exit(1); +} + +static void tdx_add_ram_entry(uint64_t address, uint64_t length, + enum TdxRamType type) +{ + uint32_t nr_entries = tdx_guest->nr_ram_entries; + tdx_guest->ram_entries = g_renew(TdxRamEntry, tdx_guest->ram_entries, + nr_entries + 1); + + tdx_guest->ram_entries[nr_entries].address = address; + tdx_guest->ram_entries[nr_entries].length = length; + tdx_guest->ram_entries[nr_entries].type = type; + tdx_guest->nr_ram_entries++; +} + +static int tdx_accept_ram_range(uint64_t address, uint64_t length) +{ + uint64_t head_start, tail_start, head_length, tail_length; + uint64_t tmp_address, tmp_length; + TdxRamEntry *e; + int i = 0; + + do { + if (i == tdx_guest->nr_ram_entries) { + return -1; + } + + e = &tdx_guest->ram_entries[i++]; + } while (address + length <= e->address || address >= e->address + e->length); + + /* + * The to-be-accepted ram range must be fully contained by one + * RAM entry. + */ + if (e->address > address || + e->address + e->length < address + length) { + return -1; + } + + if (e->type == TDX_RAM_ADDED) { + return 0; + } + + tmp_address = e->address; + tmp_length = e->length; + + e->address = address; + e->length = length; + e->type = TDX_RAM_ADDED; + + head_length = address - tmp_address; + if (head_length > 0) { + head_start = tmp_address; + tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED); + } + + tail_start = address + length; + if (tail_start < tmp_address + tmp_length) { + tail_length = tmp_address + tmp_length - tail_start; + tdx_add_ram_entry(tail_start, tail_length, TDX_RAM_UNACCEPTED); + } + + return 0; +} + +static int tdx_ram_entry_compare(const void *lhs_, const void* rhs_) +{ + const TdxRamEntry *lhs = lhs_; + const TdxRamEntry *rhs = rhs_; + + if (lhs->address == rhs->address) { + return 0; + } + if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) { + return 1; + } + return -1; +} + +static void tdx_init_ram_entries(void) +{ + unsigned i, j, nr_e820_entries; + + nr_e820_entries = e820_get_table(NULL); + tdx_guest->ram_entries = g_new(TdxRamEntry, nr_e820_entries); + + for (i = 0, j = 0; i < nr_e820_entries; i++) { + uint64_t addr, len; + + if (e820_get_entry(i, E820_RAM, &addr, &len)) { + tdx_guest->ram_entries[j].address = addr; + tdx_guest->ram_entries[j].length = len; + tdx_guest->ram_entries[j].type = TDX_RAM_UNACCEPTED; + j++; + } + } + tdx_guest->nr_ram_entries = j; +} + +static void tdx_post_init_vcpus(void) +{ + TdxFirmwareEntry *hob; + CPUState *cpu; + + hob = tdx_get_hob_entry(tdx_guest); + CPU_FOREACH(cpu) { + tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)hob->address, + &error_fatal); + } +} + +static void tdx_finalize_vm(Notifier *notifier, void *unused) +{ + TdxFirmware *tdvf = &tdx_guest->tdvf; + TdxFirmwareEntry *entry; + RAMBlock *ram_block; + Error *local_err = NULL; + int r; + + tdx_init_ram_entries(); + + for_each_tdx_fw_entry(tdvf, entry) { + switch (entry->type) { + case TDVF_SECTION_TYPE_BFV: + case TDVF_SECTION_TYPE_CFV: + entry->mem_ptr = tdvf->mem_ptr + entry->data_offset; + break; + case TDVF_SECTION_TYPE_TD_HOB: + case TDVF_SECTION_TYPE_TEMP_MEM: + entry->mem_ptr = qemu_ram_mmap(-1, entry->size, + qemu_real_host_page_size(), 0, 0); + if (entry->mem_ptr == MAP_FAILED) { + error_report("Failed to mmap memory for TDVF section %d", + entry->type); + exit(1); + } + if (tdx_accept_ram_range(entry->address, entry->size)) { + error_report("Failed to accept memory for TDVF section %d", + entry->type); + qemu_ram_munmap(-1, entry->mem_ptr, entry->size); + exit(1); + } + break; + default: + error_report("Unsupported TDVF section %d", entry->type); + exit(1); + } + } + + qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries, + sizeof(TdxRamEntry), &tdx_ram_entry_compare); + + tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest)); + + tdx_post_init_vcpus(); + + for_each_tdx_fw_entry(tdvf, entry) { + struct kvm_tdx_init_mem_region region; + uint32_t flags; + + region = (struct kvm_tdx_init_mem_region) { + .source_addr = (uint64_t)entry->mem_ptr, + .gpa = entry->address, + .nr_pages = entry->size >> 12, + }; + + flags = entry->attributes & TDVF_SECTION_ATTRIBUTES_MR_EXTEND ? + KVM_TDX_MEASURE_MEMORY_REGION : 0; + + do { + error_free(local_err); + local_err = NULL; + r = tdx_vcpu_ioctl(first_cpu, KVM_TDX_INIT_MEM_REGION, flags, + ®ion, &local_err); + } while (r == -EAGAIN || r == -EINTR); + if (r < 0) { + error_report_err(local_err); + exit(1); + } + + if (entry->type == TDVF_SECTION_TYPE_TD_HOB || + entry->type == TDVF_SECTION_TYPE_TEMP_MEM) { + qemu_ram_munmap(-1, entry->mem_ptr, entry->size); + entry->mem_ptr = NULL; + } + } + + /* + * TDVF image has been copied into private region above via + * KVM_MEMORY_MAPPING. It becomes useless. + */ + ram_block = tdx_guest->tdvf_mr->ram_block; + ram_block_discard_range(ram_block, 0, ram_block->max_length); + + tdx_vm_ioctl(KVM_TDX_FINALIZE_VM, 0, NULL, &error_fatal); + CONFIDENTIAL_GUEST_SUPPORT(tdx_guest)->ready = true; +} + +static Notifier tdx_machine_done_notify = { + .notify = tdx_finalize_vm, +}; + +/* + * Some CPUID bits change from fixed1 to configurable bits when TDX module + * supports TDX_FEATURES0.VE_REDUCTION. e.g., MCA/MCE/MTRR/CORE_CAPABILITY. + * + * To make QEMU work with all the versions of TDX module, keep the fixed1 bits + * here if they are ever fixed1 bits in any of the version though not fixed1 in + * the latest version. Otherwise, with the older version of TDX module, QEMU may + * treat the fixed1 bit as unsupported. + * + * For newer TDX module, it does no harm to keep them in tdx_fixed1_bits even + * though they changed to configurable bits. Because tdx_fixed1_bits is used to + * setup the supported bits. + */ +KvmCpuidInfo tdx_fixed1_bits = { + .cpuid.nent = 8, + .entries[0] = { + .function = 0x1, + .index = 0, + .ecx = CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_DTES64 | + CPUID_EXT_DSCPL | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | + CPUID_EXT_PDCM | CPUID_EXT_PCID | CPUID_EXT_SSE41 | + CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | + CPUID_EXT_POPCNT | CPUID_EXT_AES | CPUID_EXT_XSAVE | + CPUID_EXT_RDRAND | CPUID_EXT_HYPERVISOR, + .edx = CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | + CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | + CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | + CPUID_PAT | CPUID_CLFLUSH | CPUID_DTS | CPUID_MMX | CPUID_FXSR | + CPUID_SSE | CPUID_SSE2, + }, + .entries[1] = { + .function = 0x6, + .index = 0, + .eax = CPUID_6_EAX_ARAT, + }, + .entries[2] = { + .function = 0x7, + .index = 0, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .ebx = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_FDP_EXCPTN_ONLY | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_INVPCID | + CPUID_7_0_EBX_ZERO_FCS_FDS | CPUID_7_0_EBX_RDSEED | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | + CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_SHA_NI, + .ecx = CPUID_7_0_ECX_BUS_LOCK_DETECT | CPUID_7_0_ECX_MOVDIRI | + CPUID_7_0_ECX_MOVDIR64B, + .edx = CPUID_7_0_EDX_MD_CLEAR | CPUID_7_0_EDX_SPEC_CTRL | + CPUID_7_0_EDX_STIBP | CPUID_7_0_EDX_FLUSH_L1D | + CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_CORE_CAPABILITY | + CPUID_7_0_EDX_SPEC_CTRL_SSBD, + }, + .entries[3] = { + .function = 0x7, + .index = 2, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .edx = CPUID_7_2_EDX_PSFD | CPUID_7_2_EDX_IPRED_CTRL | + CPUID_7_2_EDX_RRSBA_CTRL | CPUID_7_2_EDX_BHI_CTRL, + }, + .entries[4] = { + .function = 0xD, + .index = 0, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .eax = XSTATE_FP_MASK | XSTATE_SSE_MASK, + }, + .entries[5] = { + .function = 0xD, + .index = 1, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .eax = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC| + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, + }, + .entries[6] = { + .function = 0x80000001, + .index = 0, + .ecx = CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, + /* + * Strictly speaking, SYSCALL is not fixed1 bit since it depends on + * the CPU to be in 64-bit mode. But here fixed1 is used to serve the + * purpose of supported bits for TDX. In this sense, SYACALL is always + * supported. + */ + .edx = CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, + }, + .entries[7] = { + .function = 0x80000007, + .index = 0, + .edx = CPUID_APM_INVTSC, + }, +}; + +typedef struct TdxAttrsMap { + uint32_t attr_index; + uint32_t cpuid_leaf; + uint32_t cpuid_subleaf; + int cpuid_reg; + uint32_t feat_mask; +} TdxAttrsMap; + +static TdxAttrsMap tdx_attrs_maps[] = { + {.attr_index = 27, + .cpuid_leaf = 7, + .cpuid_subleaf = 1, + .cpuid_reg = R_EAX, + .feat_mask = CPUID_7_1_EAX_LASS,}, + + {.attr_index = 30, + .cpuid_leaf = 7, + .cpuid_subleaf = 0, + .cpuid_reg = R_ECX, + .feat_mask = CPUID_7_0_ECX_PKS,}, + + {.attr_index = 31, + .cpuid_leaf = 7, + .cpuid_subleaf = 0, + .cpuid_reg = R_ECX, + .feat_mask = CPUID_7_0_ECX_KeyLocker,}, +}; + +typedef struct TdxXFAMDep { + int xfam_bit; + FeatureMask feat_mask; +} TdxXFAMDep; + +/* + * Note, only the CPUID bits whose virtualization type are "XFAM & Native" are + * defiend here. + * + * For those whose virtualization type are "XFAM & Configured & Native", they + * are reported as configurable bits. And they are not supported if not in the + * configureable bits list from KVM even if the corresponding XFAM bit is + * supported. + */ +TdxXFAMDep tdx_xfam_deps[] = { + { XSTATE_YMM_BIT, { FEAT_1_ECX, CPUID_EXT_FMA }}, + { XSTATE_YMM_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_AVX2 }}, + { XSTATE_OPMASK_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_AVX512_VBMI}}, + { XSTATE_OPMASK_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AVX512_FP16}}, + { XSTATE_PT_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT}}, + { XSTATE_PKRU_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_PKU}}, + { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_BF16 }}, + { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_TILE }}, + { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_INT8 }}, +}; + +static struct kvm_cpuid_entry2 *find_in_supported_entry(uint32_t function, + uint32_t index) +{ + struct kvm_cpuid_entry2 *e; + + e = cpuid_find_entry(tdx_supported_cpuid, function, index); + if (!e) { + if (tdx_supported_cpuid->nent >= KVM_MAX_CPUID_ENTRIES) { + error_report("tdx_supported_cpuid requries more space than %d entries", + KVM_MAX_CPUID_ENTRIES); + exit(1); + } + e = &tdx_supported_cpuid->entries[tdx_supported_cpuid->nent++]; + e->function = function; + e->index = index; + } + + return e; +} + +static void tdx_add_supported_cpuid_by_fixed1_bits(void) +{ + struct kvm_cpuid_entry2 *e, *e1; + int i; + + for (i = 0; i < tdx_fixed1_bits.cpuid.nent; i++) { + e = &tdx_fixed1_bits.entries[i]; + + e1 = find_in_supported_entry(e->function, e->index); + e1->eax |= e->eax; + e1->ebx |= e->ebx; + e1->ecx |= e->ecx; + e1->edx |= e->edx; + } +} + +static void tdx_add_supported_cpuid_by_attrs(void) +{ + struct kvm_cpuid_entry2 *e; + TdxAttrsMap *map; + int i; + + for (i = 0; i < ARRAY_SIZE(tdx_attrs_maps); i++) { + map = &tdx_attrs_maps[i]; + if (!((1ULL << map->attr_index) & tdx_caps->supported_attrs)) { + continue; + } + + e = find_in_supported_entry(map->cpuid_leaf, map->cpuid_subleaf); + + switch(map->cpuid_reg) { + case R_EAX: + e->eax |= map->feat_mask; + break; + case R_EBX: + e->ebx |= map->feat_mask; + break; + case R_ECX: + e->ecx |= map->feat_mask; + break; + case R_EDX: + e->edx |= map->feat_mask; + break; + } + } +} + +static void tdx_add_supported_cpuid_by_xfam(void) +{ + struct kvm_cpuid_entry2 *e; + int i; + + const TdxXFAMDep *xfam_dep; + const FeatureWordInfo *f; + for (i = 0; i < ARRAY_SIZE(tdx_xfam_deps); i++) { + xfam_dep = &tdx_xfam_deps[i]; + if (!((1ULL << xfam_dep->xfam_bit) & tdx_caps->supported_xfam)) { + continue; + } + + f = &feature_word_info[xfam_dep->feat_mask.index]; + if (f->type != CPUID_FEATURE_WORD) { + continue; + } + + e = find_in_supported_entry(f->cpuid.eax, f->cpuid.ecx); + switch(f->cpuid.reg) { + case R_EAX: + e->eax |= xfam_dep->feat_mask.mask; + break; + case R_EBX: + e->ebx |= xfam_dep->feat_mask.mask; + break; + case R_ECX: + e->ecx |= xfam_dep->feat_mask.mask; + break; + case R_EDX: + e->edx |= xfam_dep->feat_mask.mask; + break; + } + } + + e = find_in_supported_entry(0xd, 0); + e->eax |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK); + e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK) >> 32; + + e = find_in_supported_entry(0xd, 1); + /* + * Mark XFD always support for TDX, it will be cleared finally in + * tdx_adjust_cpuid_features() if XFD is unavailable on the hardware + * because in this case the original data has it as 0. + */ + e->eax |= CPUID_XSAVE_XFD; + e->ecx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK); + e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK) >> 32; +} + +static void tdx_add_supported_kvm_features(void) +{ + struct kvm_cpuid_entry2 *e; + + e = find_in_supported_entry(0x40000001, 0); + e->eax = TDX_SUPPORTED_KVM_FEATURES; +} + +static void tdx_setup_supported_cpuid(void) +{ + if (tdx_supported_cpuid) { + return; + } + + tdx_supported_cpuid = g_malloc0(sizeof(*tdx_supported_cpuid) + + KVM_MAX_CPUID_ENTRIES * sizeof(struct kvm_cpuid_entry2)); + + memcpy(tdx_supported_cpuid->entries, tdx_caps->cpuid.entries, + tdx_caps->cpuid.nent * sizeof(struct kvm_cpuid_entry2)); + tdx_supported_cpuid->nent = tdx_caps->cpuid.nent; + + tdx_add_supported_cpuid_by_fixed1_bits(); + tdx_add_supported_cpuid_by_attrs(); + tdx_add_supported_cpuid_by_xfam(); + + tdx_add_supported_kvm_features(); +} + +static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + X86MachineState *x86ms = X86_MACHINE(ms); + TdxGuest *tdx = TDX_GUEST(cgs); + int r = 0; + + kvm_mark_guest_state_protected(); + + if (x86ms->smm == ON_OFF_AUTO_AUTO) { + x86ms->smm = ON_OFF_AUTO_OFF; + } else if (x86ms->smm == ON_OFF_AUTO_ON) { + error_setg(errp, "TDX VM doesn't support SMM"); + return -EINVAL; + } + + if (x86ms->pic == ON_OFF_AUTO_AUTO) { + x86ms->pic = ON_OFF_AUTO_OFF; + } else if (x86ms->pic == ON_OFF_AUTO_ON) { + error_setg(errp, "TDX VM doesn't support PIC"); + return -EINVAL; + } + + if (kvm_state->kernel_irqchip_split == ON_OFF_AUTO_AUTO) { + kvm_state->kernel_irqchip_split = ON_OFF_AUTO_ON; + } else if (kvm_state->kernel_irqchip_split != ON_OFF_AUTO_ON) { + error_setg(errp, "TDX VM requires kernel_irqchip to be split"); + return -EINVAL; + } + + if (!tdx_caps) { + r = get_tdx_capabilities(errp); + if (r) { + return r; + } + } + + tdx_setup_supported_cpuid(); + + /* TDX relies on KVM_HC_MAP_GPA_RANGE to handle TDG.VP.VMCALL<MapGPA> */ + if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) { + return -EOPNOTSUPP; + } + + /* + * Set kvm_readonly_mem_allowed to false, because TDX only supports readonly + * memory for shared memory but not for private memory. Besides, whether a + * memslot is private or shared is not determined by QEMU. + * + * Thus, just mark readonly memory not supported for simplicity. + */ + kvm_readonly_mem_allowed = false; + + qemu_add_machine_init_done_notifier(&tdx_machine_done_notify); + + tdx_guest = tdx; + return 0; +} + +static int tdx_kvm_type(X86ConfidentialGuest *cg) +{ + /* Do the object check */ + TDX_GUEST(cg); + + return KVM_X86_TDX_VM; +} + +static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu) +{ + X86CPU *x86cpu = X86_CPU(cpu); + + object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort); + + /* invtsc is fixed1 for TD guest */ + object_property_set_bool(OBJECT(cpu), "invtsc", true, &error_abort); + + x86cpu->enable_cpuid_0x1f = true; +} + +static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg, + uint32_t feature, uint32_t index, + int reg, uint32_t value) +{ + struct kvm_cpuid_entry2 *e; + + e = cpuid_find_entry(&tdx_fixed1_bits.cpuid, feature, index); + if (e) { + value |= cpuid_entry_get_reg(e, reg); + } + + if (is_feature_word_cpuid(feature, index, reg)) { + e = cpuid_find_entry(tdx_supported_cpuid, feature, index); + if (e) { + value &= cpuid_entry_get_reg(e, reg); + } + } + + return value; +} + +static struct kvm_cpuid2 *tdx_fetch_cpuid(CPUState *cpu, int *ret) +{ + struct kvm_cpuid2 *fetch_cpuid; + int size = KVM_MAX_CPUID_ENTRIES; + Error *local_err = NULL; + int r; + + do { + error_free(local_err); + local_err = NULL; + + fetch_cpuid = g_malloc0(sizeof(*fetch_cpuid) + + sizeof(struct kvm_cpuid_entry2) * size); + fetch_cpuid->nent = size; + r = tdx_vcpu_ioctl(cpu, KVM_TDX_GET_CPUID, 0, fetch_cpuid, &local_err); + if (r == -E2BIG) { + g_free(fetch_cpuid); + size = fetch_cpuid->nent; + } + } while (r == -E2BIG); + + if (r < 0) { + error_report_err(local_err); + *ret = r; + return NULL; + } + + return fetch_cpuid; +} + +static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs) +{ + uint64_t actual, requested, unavailable, forced_on; + g_autofree struct kvm_cpuid2 *fetch_cpuid; + const char *forced_on_prefix = NULL; + const char *unav_prefix = NULL; + struct kvm_cpuid_entry2 *entry; + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + FeatureWordInfo *wi; + FeatureWord w; + bool mismatch = false; + int r; + + fetch_cpuid = tdx_fetch_cpuid(cs, &r); + if (!fetch_cpuid) { + return r; + } + + if (cpu->check_cpuid || cpu->enforce_cpuid) { + unav_prefix = "TDX doesn't support requested feature"; + forced_on_prefix = "TDX forcibly sets the feature"; + } + + for (w = 0; w < FEATURE_WORDS; w++) { + wi = &feature_word_info[w]; + actual = 0; + + switch (wi->type) { + case CPUID_FEATURE_WORD: + entry = cpuid_find_entry(fetch_cpuid, wi->cpuid.eax, wi->cpuid.ecx); + if (!entry) { + /* + * If KVM doesn't report it means it's totally configurable + * by QEMU + */ + continue; + } + + actual = cpuid_entry_get_reg(entry, wi->cpuid.reg); + break; + case MSR_FEATURE_WORD: + /* + * TODO: + * validate MSR features when KVM has interface report them. + */ + continue; + } + + /* Fixup for special cases */ + switch (w) { + case FEAT_8000_0001_EDX: + /* + * Intel enumerates SYSCALL bit as 1 only when processor in 64-bit + * mode and before vcpu running it's not in 64-bit mode. + */ + actual |= CPUID_EXT2_SYSCALL; + break; + default: + break; + } + + requested = env->features[w]; + unavailable = requested & ~actual; + mark_unavailable_features(cpu, w, unavailable, unav_prefix); + if (unavailable) { + mismatch = true; + } + + forced_on = actual & ~requested; + mark_forced_on_features(cpu, w, forced_on, forced_on_prefix); + if (forced_on) { + mismatch = true; + } + } + + if (cpu->enforce_cpuid && mismatch) { + return -EINVAL; + } + + if (cpu->phys_bits != host_cpu_phys_bits()) { + error_report("TDX requires guest CPU physical bits (%u) " + "to match host CPU physical bits (%u)", + cpu->phys_bits, host_cpu_phys_bits()); + return -EINVAL; + } + + return 0; +} + +static int tdx_validate_attributes(TdxGuest *tdx, Error **errp) +{ + if ((tdx->attributes & ~tdx_caps->supported_attrs)) { + error_setg(errp, "Invalid attributes 0x%lx for TDX VM " + "(KVM supported: 0x%llx)", tdx->attributes, + tdx_caps->supported_attrs); + return -1; + } + + if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) { + error_setg(errp, "Some QEMU unsupported TD attribute bits being " + "requested: 0x%lx (QEMU supported: 0x%llx)", + tdx->attributes, TDX_SUPPORTED_TD_ATTRS); + return -1; + } + + return 0; +} + +static int setup_td_guest_attributes(X86CPU *x86cpu, Error **errp) +{ + CPUX86State *env = &x86cpu->env; + + tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ? + TDX_TD_ATTRIBUTES_PKS : 0; + tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0; + + return tdx_validate_attributes(tdx_guest, errp); +} + +static int setup_td_xfam(X86CPU *x86cpu, Error **errp) +{ + CPUX86State *env = &x86cpu->env; + uint64_t xfam; + + xfam = env->features[FEAT_XSAVE_XCR0_LO] | + env->features[FEAT_XSAVE_XCR0_HI] | + env->features[FEAT_XSAVE_XSS_LO] | + env->features[FEAT_XSAVE_XSS_HI]; + + if (xfam & ~tdx_caps->supported_xfam) { + error_setg(errp, "Invalid XFAM 0x%lx for TDX VM (supported: 0x%llx))", + xfam, tdx_caps->supported_xfam); + return -1; + } + + tdx_guest->xfam = xfam; + return 0; +} + +static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids) +{ + int i, dest_cnt = 0; + struct kvm_cpuid_entry2 *src, *dest, *conf; + + for (i = 0; i < cpuids->nent; i++) { + src = cpuids->entries + i; + conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index); + if (!conf) { + continue; + } + dest = cpuids->entries + dest_cnt; + + dest->function = src->function; + dest->index = src->index; + dest->flags = src->flags; + dest->eax = src->eax & conf->eax; + dest->ebx = src->ebx & conf->ebx; + dest->ecx = src->ecx & conf->ecx; + dest->edx = src->edx & conf->edx; + + dest_cnt++; + } + cpuids->nent = dest_cnt++; +} + +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + X86CPU *x86cpu = X86_CPU(cpu); + CPUX86State *env = &x86cpu->env; + g_autofree struct kvm_tdx_init_vm *init_vm = NULL; + Error *local_err = NULL; + size_t data_len; + int retry = 10000; + int r = 0; + + QEMU_LOCK_GUARD(&tdx_guest->lock); + if (tdx_guest->initialized) { + return r; + } + + init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) + + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); + + if (!kvm_check_extension(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS)) { + error_setg(errp, "KVM doesn't support KVM_CAP_X86_APIC_BUS_CYCLES_NS"); + return -EOPNOTSUPP; + } + + r = kvm_vm_enable_cap(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS, + 0, TDX_APIC_BUS_CYCLES_NS); + if (r < 0) { + error_setg_errno(errp, -r, + "Unable to set core crystal clock frequency to 25MHz"); + return r; + } + + if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ || + env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) { + error_setg(errp, "Invalid TSC %ld KHz, must specify cpu_frequency " + "between [%d, %d] kHz", env->tsc_khz, + TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ); + return -EINVAL; + } + + if (env->tsc_khz % (25 * 1000)) { + error_setg(errp, "Invalid TSC %ld KHz, it must be multiple of 25MHz", + env->tsc_khz); + return -EINVAL; + } + + /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */ + r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz); + if (r < 0) { + error_setg_errno(errp, -r, "Unable to set TSC frequency to %ld kHz", + env->tsc_khz); + return r; + } + + if (tdx_guest->mrconfigid) { + g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid, + strlen(tdx_guest->mrconfigid), &data_len, errp); + if (!data) { + return -1; + } + if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { + error_setg(errp, "TDX: failed to decode mrconfigid"); + return -1; + } + memcpy(init_vm->mrconfigid, data, data_len); + } + + if (tdx_guest->mrowner) { + g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrowner, + strlen(tdx_guest->mrowner), &data_len, errp); + if (!data) { + return -1; + } + if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { + error_setg(errp, "TDX: failed to decode mrowner"); + return -1; + } + memcpy(init_vm->mrowner, data, data_len); + } + + if (tdx_guest->mrownerconfig) { + g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrownerconfig, + strlen(tdx_guest->mrownerconfig), &data_len, errp); + if (!data) { + return -1; + } + if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { + error_setg(errp, "TDX: failed to decode mrownerconfig"); + return -1; + } + memcpy(init_vm->mrownerconfig, data, data_len); + } + + r = setup_td_guest_attributes(x86cpu, errp); + if (r) { + return r; + } + + r = setup_td_xfam(x86cpu, errp); + if (r) { + return r; + } + + init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0); + tdx_filter_cpuid(&init_vm->cpuid); + + init_vm->attributes = tdx_guest->attributes; + init_vm->xfam = tdx_guest->xfam; + + /* + * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE) + * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or + * RDSEED) is busy. + * + * Retry for the case. + */ + do { + error_free(local_err); + local_err = NULL; + r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err); + } while (r == -EAGAIN && --retry); + + if (r < 0) { + if (!retry) { + error_append_hint(&local_err, "Hardware RNG (Random Number " + "Generator) is busy occupied by someone (via RDRAND/RDSEED) " + "maliciously, which leads to KVM_TDX_INIT_VM keeping failure " + "due to lack of entropy.\n"); + } + error_propagate(errp, local_err); + return r; + } + + tdx_guest->initialized = true; + + return 0; +} + +int tdx_parse_tdvf(void *flash_ptr, int size) +{ + return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size); +} + +static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code, + char *message, uint64_t gpa) +{ + GuestPanicInformation *panic_info; + + panic_info = g_new0(GuestPanicInformation, 1); + panic_info->type = GUEST_PANIC_INFORMATION_TYPE_TDX; + panic_info->u.tdx.error_code = (uint32_t) error_code; + panic_info->u.tdx.message = message; + panic_info->u.tdx.gpa = gpa; + + qemu_system_guest_panicked(panic_info); +} + +/* + * Only 8 registers can contain valid ASCII byte stream to form the fatal + * message, and their sequence is: R14, R15, RBX, RDI, RSI, R8, R9, RDX + */ +#define TDX_FATAL_MESSAGE_MAX 64 + +#define TDX_REPORT_FATAL_ERROR_GPA_VALID BIT_ULL(63) + +int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) +{ + uint64_t error_code = run->system_event.data[R_R12]; + uint64_t reg_mask = run->system_event.data[R_ECX]; + char *message = NULL; + uint64_t *tmp; + uint64_t gpa = -1ull; + + if (error_code & 0xffff) { + error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%lx", + error_code); + return -1; + } + + if (reg_mask) { + message = g_malloc0(TDX_FATAL_MESSAGE_MAX + 1); + tmp = (uint64_t *)message; + +#define COPY_REG(REG) \ + do { \ + if (reg_mask & BIT_ULL(REG)) { \ + *(tmp++) = run->system_event.data[REG]; \ + } \ + } while (0) + + COPY_REG(R_R14); + COPY_REG(R_R15); + COPY_REG(R_EBX); + COPY_REG(R_EDI); + COPY_REG(R_ESI); + COPY_REG(R_R8); + COPY_REG(R_R9); + COPY_REG(R_EDX); + *((char *)tmp) = '\0'; + } +#undef COPY_REG + + if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) { + gpa = run->system_event.data[R_R13]; + } + + tdx_panicked_on_fatal_error(cpu, error_code, message, gpa); + + return -1; +} + +static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE); +} + +static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + if (value) { + tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; + } else { + tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; + } +} + +static char *tdx_guest_get_mrconfigid(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return g_strdup(tdx->mrconfigid); +} + +static void tdx_guest_set_mrconfigid(Object *obj, const char *value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + g_free(tdx->mrconfigid); + tdx->mrconfigid = g_strdup(value); +} + +static char *tdx_guest_get_mrowner(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return g_strdup(tdx->mrowner); +} + +static void tdx_guest_set_mrowner(Object *obj, const char *value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + g_free(tdx->mrowner); + tdx->mrowner = g_strdup(value); +} + +static char *tdx_guest_get_mrownerconfig(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return g_strdup(tdx->mrownerconfig); +} + +static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + g_free(tdx->mrownerconfig); + tdx->mrownerconfig = g_strdup(value); +} + +/* tdx guest */ +OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest, + tdx_guest, + TDX_GUEST, + X86_CONFIDENTIAL_GUEST, + { TYPE_USER_CREATABLE }, + { NULL }) + +static void tdx_guest_init(Object *obj) +{ + ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj); + TdxGuest *tdx = TDX_GUEST(obj); + + qemu_mutex_init(&tdx->lock); + + cgs->require_guest_memfd = true; + tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; + + object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes, + OBJ_PROP_FLAG_READWRITE); + object_property_add_bool(obj, "sept-ve-disable", + tdx_guest_get_sept_ve_disable, + tdx_guest_set_sept_ve_disable); + object_property_add_str(obj, "mrconfigid", + tdx_guest_get_mrconfigid, + tdx_guest_set_mrconfigid); + object_property_add_str(obj, "mrowner", + tdx_guest_get_mrowner, tdx_guest_set_mrowner); + object_property_add_str(obj, "mrownerconfig", + tdx_guest_get_mrownerconfig, + tdx_guest_set_mrownerconfig); +} + +static void tdx_guest_finalize(Object *obj) +{ +} + +static void tdx_guest_class_init(ObjectClass *oc, const void *data) +{ + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->kvm_init = tdx_kvm_init; + x86_klass->kvm_type = tdx_kvm_type; + x86_klass->cpu_instance_init = tdx_cpu_instance_init; + x86_klass->adjust_cpuid_features = tdx_adjust_cpuid_features; + x86_klass->check_features = tdx_check_features; +} diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h new file mode 100644 index 0000000..04b5afe --- /dev/null +++ b/target/i386/kvm/tdx.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef QEMU_I386_TDX_H +#define QEMU_I386_TDX_H + +#ifndef CONFIG_USER_ONLY +#include CONFIG_DEVICES /* CONFIG_TDX */ +#endif + +#include "confidential-guest.h" +#include "cpu.h" +#include "hw/i386/tdvf.h" + +#define TYPE_TDX_GUEST "tdx-guest" +#define TDX_GUEST(obj) OBJECT_CHECK(TdxGuest, (obj), TYPE_TDX_GUEST) + +typedef struct TdxGuestClass { + X86ConfidentialGuestClass parent_class; +} TdxGuestClass; + +/* TDX requires bus frequency 25MHz */ +#define TDX_APIC_BUS_CYCLES_NS 40 + +enum TdxRamType { + TDX_RAM_UNACCEPTED, + TDX_RAM_ADDED, +}; + +typedef struct TdxRamEntry { + uint64_t address; + uint64_t length; + enum TdxRamType type; +} TdxRamEntry; + +typedef struct TdxGuest { + X86ConfidentialGuest parent_obj; + + QemuMutex lock; + + bool initialized; + uint64_t attributes; /* TD attributes */ + uint64_t xfam; + char *mrconfigid; /* base64 encoded sha348 digest */ + char *mrowner; /* base64 encoded sha348 digest */ + char *mrownerconfig; /* base64 encoded sha348 digest */ + + MemoryRegion *tdvf_mr; + TdxFirmware tdvf; + + uint32_t nr_ram_entries; + TdxRamEntry *ram_entries; +} TdxGuest; + +#ifdef CONFIG_TDX +bool is_tdx_vm(void); +#else +#define is_tdx_vm() 0 +#endif /* CONFIG_TDX */ + +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp); +void tdx_set_tdvf_region(MemoryRegion *tdvf_mr); +int tdx_parse_tdvf(void *flash_ptr, int size); +int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run); + +#endif /* QEMU_I386_TDX_H */ diff --git a/target/i386/machine.c b/target/i386/machine.c index 6cb561c..dd2dac1 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -1060,9 +1060,8 @@ static bool tsc_khz_needed(void *opaque) { X86CPU *cpu = opaque; CPUX86State *env = &cpu->env; - MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); - X86MachineClass *x86mc = X86_MACHINE_CLASS(mc); - return env->tsc_khz && x86mc->save_tsc_khz; + + return env->tsc_khz; } static const VMStateDescription vmstate_tsc_khz = { diff --git a/target/i386/sev.c b/target/i386/sev.c index 56dd64e..1a12f06 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -212,14 +212,6 @@ static const char *const sev_fw_errlist[] = { #define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist) -/* <linux/kvm.h> doesn't expose this, so re-use the max from kvm.c */ -#define KVM_MAX_CPUID_ENTRIES 100 - -typedef struct KvmCpuidInfo { - struct kvm_cpuid2 cpuid; - struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; -} KvmCpuidInfo; - #define SNP_CPUID_FUNCTION_MAXCOUNT 64 #define SNP_CPUID_FUNCTION_UNKNOWN 0xFFFFFFFF @@ -947,7 +939,7 @@ out: } static uint32_t -sev_snp_mask_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, +sev_snp_adjust_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, int reg, uint32_t value) { switch (feature) { @@ -2405,7 +2397,7 @@ sev_snp_guest_class_init(ObjectClass *oc, const void *data) klass->launch_finish = sev_snp_launch_finish; klass->launch_update_data = sev_snp_launch_update_data; klass->kvm_init = sev_snp_kvm_init; - x86_klass->mask_cpuid_features = sev_snp_mask_cpuid_features; + x86_klass->adjust_cpuid_features = sev_snp_adjust_cpuid_features; x86_klass->kvm_type = sev_snp_kvm_type; object_class_property_add(oc, "policy", "uint64", diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h index 6b3f198..be011b0 100644 --- a/target/i386/tcg/helper-tcg.h +++ b/target/i386/tcg/helper-tcg.h @@ -97,7 +97,7 @@ static inline unsigned int compute_pf(uint8_t x) /* misc_helper.c */ void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask); -/* sysemu/svm_helper.c */ +/* system/svm_helper.c */ #ifndef CONFIG_USER_ONLY G_NORETURN void cpu_vmexit(CPUX86State *nenv, uint32_t exit_code, uint64_t exit_info_1, uintptr_t retaddr); @@ -115,7 +115,7 @@ int exception_has_error_code(int intno); /* smm_helper.c */ void do_smm_enter(X86CPU *cpu); -/* sysemu/bpt_helper.c */ +/* system/bpt_helper.c */ bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update); /* diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c index 1bda570..c66bdd5 100644 --- a/target/loongarch/kvm/kvm.c +++ b/target/loongarch/kvm/kvm.c @@ -1071,7 +1071,11 @@ static int kvm_cpu_check_pv_features(CPUState *cs, Error **errp) env->pv_features |= BIT(KVM_FEATURE_VIRT_EXTIOI); } } + return 0; +} +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ return 0; } diff --git a/target/mips/kvm.c b/target/mips/kvm.c index d67b7c1..ec53acb 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -61,6 +61,11 @@ int kvm_arch_irqchip_create(KVMState *s) return 0; } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { CPUMIPSState *env = cpu_env(cs); diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 8a957c3..0156580 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -479,6 +479,11 @@ static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) } } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index efb41fa..e1a04be 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -1472,6 +1472,11 @@ static int kvm_vcpu_enable_sbi_dbcn(RISCVCPU *cpu, CPUState *cs) return kvm_set_one_reg(cs, kvm_sbi_dbcn.kvm_reg_id, ®); } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { int ret = 0; diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 6cd2ebc..67d9a19 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -398,6 +398,11 @@ unsigned long kvm_arch_vcpu_id(CPUState *cpu) return cpu->cpu_index; } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { unsigned int max_cpus = MACHINE(qdev_get_machine())->smp.max_cpus; diff --git a/tcg/meson.build b/tcg/meson.build index bd2821e..706a6eb 100644 --- a/tcg/meson.build +++ b/tcg/meson.build @@ -27,5 +27,5 @@ if host_os == 'linux' tcg_ss.add(files('perf.c')) endif -libuser_ss.add_all(tcg_ss) -libsystem_ss.add_all(tcg_ss) +user_ss.add_all(tcg_ss) +system_ss.add_all(tcg_ss) diff --git a/tests/data/uefi-boot-images/bios-tables-test.loongarch64.iso.qcow2 b/tests/data/uefi-boot-images/bios-tables-test.loongarch64.iso.qcow2 Binary files differnew file mode 100644 index 0000000..18daee0 --- /dev/null +++ b/tests/data/uefi-boot-images/bios-tables-test.loongarch64.iso.qcow2 diff --git a/tests/functional/test_mem_addr_space.py b/tests/functional/test_mem_addr_space.py index 2d9d31e..61b4a19 100755 --- a/tests/functional/test_mem_addr_space.py +++ b/tests/functional/test_mem_addr_space.py @@ -58,8 +58,8 @@ class MemAddrCheck(QemuSystemTest): should start fine. """ self.ensure_64bit_binary() - self.vm.add_args('-S', '-machine', 'q35', '-m', - '512,slots=1,maxmem=59.6G', + self.set_machine('q35') + self.vm.add_args('-S', '-m', '512,slots=1,maxmem=59.6G', '-cpu', 'pentium,pse36=on', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -76,8 +76,8 @@ class MemAddrCheck(QemuSystemTest): with pse36 above. """ self.ensure_64bit_binary() - self.vm.add_args('-S', '-machine', 'q35', '-m', - '512,slots=1,maxmem=59.6G', + self.set_machine('q35') + self.vm.add_args('-S', '-m', '512,slots=1,maxmem=59.6G', '-cpu', 'pentium,pae=on', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -93,8 +93,8 @@ class MemAddrCheck(QemuSystemTest): same options as the failing case above with pse36 cpu feature. """ self.ensure_64bit_binary() - self.vm.add_args('-machine', 'q35', '-m', - '512,slots=1,maxmem=59.5G', + self.set_machine('q35') + self.vm.add_args('-m', '512,slots=1,maxmem=59.5G', '-cpu', 'pentium,pse36=on', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -111,8 +111,8 @@ class MemAddrCheck(QemuSystemTest): with the same options as the case above. """ self.ensure_64bit_binary() - self.vm.add_args('-machine', 'q35', '-m', - '512,slots=1,maxmem=59.5G', + self.set_machine('q35') + self.vm.add_args('-m', '512,slots=1,maxmem=59.5G', '-cpu', 'pentium,pae=on', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -128,8 +128,8 @@ class MemAddrCheck(QemuSystemTest): with pse36 ON. """ self.ensure_64bit_binary() - self.vm.add_args('-machine', 'q35', '-m', - '512,slots=1,maxmem=59.5G', + self.set_machine('q35') + self.vm.add_args('-m', '512,slots=1,maxmem=59.5G', '-cpu', 'pentium2', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -148,8 +148,8 @@ class MemAddrCheck(QemuSystemTest): above 4 GiB due to the PCI hole and simplicity. """ self.ensure_64bit_binary() - self.vm.add_args('-S', '-machine', 'q35', '-m', - '512,slots=1,maxmem=4G', + self.set_machine('q35') + self.vm.add_args('-S', '-m', '512,slots=1,maxmem=4G', '-cpu', 'pentium', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -176,8 +176,8 @@ class MemAddrCheck(QemuSystemTest): make QEMU fail with the error message. """ self.ensure_64bit_binary() - self.vm.add_args('-S', '-machine', 'pc-q35-7.0', '-m', - '512,slots=1,maxmem=988G', + self.set_machine('pc-q35-7.0') + self.vm.add_args('-S', '-m', '512,slots=1,maxmem=988G', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -197,8 +197,8 @@ class MemAddrCheck(QemuSystemTest): than 988 GiB). """ self.ensure_64bit_binary() - self.vm.add_args('-S', '-machine', 'pc-q35-7.1', '-m', - '512,slots=1,maxmem=976G', + self.set_machine('pc-q35-7.1') + self.vm.add_args('-S', '-m', '512,slots=1,maxmem=976G', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -214,8 +214,8 @@ class MemAddrCheck(QemuSystemTest): successfully start when maxmem is < 988G. """ self.ensure_64bit_binary() - self.vm.add_args('-S', '-machine', 'pc-q35-7.0', '-m', - '512,slots=1,maxmem=987.5G', + self.set_machine('pc-q35-7.0') + self.vm.add_args('-S', '-m', '512,slots=1,maxmem=987.5G', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -231,8 +231,8 @@ class MemAddrCheck(QemuSystemTest): successfully start when maxmem is < 976G. """ self.ensure_64bit_binary() - self.vm.add_args('-S', '-machine', 'pc-q35-7.1', '-m', - '512,slots=1,maxmem=975.5G', + self.set_machine('pc-q35-7.1') + self.vm.add_args('-S', '-m', '512,slots=1,maxmem=975.5G', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -249,9 +249,9 @@ class MemAddrCheck(QemuSystemTest): "above_4G" memory starts at 4G. """ self.ensure_64bit_binary() + self.set_machine('pc-q35-7.1') self.vm.add_args('-S', '-cpu', 'Skylake-Server', - '-machine', 'pc-q35-7.1', '-m', - '512,slots=1,maxmem=976G', + '-m', '512,slots=1,maxmem=976G', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -274,9 +274,9 @@ class MemAddrCheck(QemuSystemTest): fail to start. """ self.ensure_64bit_binary() + self.set_machine('pc-q35-7.1') self.vm.add_args('-S', '-cpu', 'EPYC-v4,phys-bits=41', - '-machine', 'pc-q35-7.1', '-m', - '512,slots=1,maxmem=992G', + '-m', '512,slots=1,maxmem=992G', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -293,9 +293,9 @@ class MemAddrCheck(QemuSystemTest): QEMU should start fine. """ self.ensure_64bit_binary() + self.set_machine('pc-q35-7.1') self.vm.add_args('-S', '-cpu', 'EPYC-v4,phys-bits=41', - '-machine', 'pc-q35-7.1', '-m', - '512,slots=1,maxmem=990G', + '-m', '512,slots=1,maxmem=990G', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -314,12 +314,12 @@ class MemAddrCheck(QemuSystemTest): alignment constraints with 40 bits (1 TiB) of processor physical bits. """ self.ensure_64bit_binary() + self.set_machine('q35') self.vm.add_args('-S', '-cpu', 'Skylake-Server,phys-bits=40', - '-machine', 'q35,cxl=on', '-m', - '512,slots=1,maxmem=987G', + '-m', '512,slots=1,maxmem=987G', '-display', 'none', '-device', 'pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1', - '-M', 'cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=1G') + '-M', 'cxl=on,cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=1G') self.vm.set_qmp_monitor(enabled=False) self.vm.launch() self.vm.wait() @@ -333,9 +333,10 @@ class MemAddrCheck(QemuSystemTest): with cxl enabled. """ self.ensure_64bit_binary() + self.set_machine('q35') self.vm.add_args('-S', '-cpu', 'Skylake-Server,phys-bits=40', - '-machine', 'q35,cxl=on', '-m', - '512,slots=1,maxmem=987G', + '-machine', 'cxl=on', + '-m', '512,slots=1,maxmem=987G', '-display', 'none', '-device', 'pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1') self.vm.set_qmp_monitor(enabled=False) diff --git a/tests/functional/test_microblaze_s3adsp1800.py b/tests/functional/test_microblaze_s3adsp1800.py index c93fa14..f093b16 100755 --- a/tests/functional/test_microblaze_s3adsp1800.py +++ b/tests/functional/test_microblaze_s3adsp1800.py @@ -25,12 +25,14 @@ class MicroblazeMachine(QemuSystemTest): ('http://www.qemu-advent-calendar.org/2023/download/day13.tar.gz'), 'b9b3d43c5dd79db88ada495cc6e0d1f591153fe41355e925d791fbf44de50c22') - def do_ballerina_be_test(self, machine): - self.set_machine(machine) + def do_ballerina_be_test(self, force_endianness=False): + self.set_machine('petalogix-s3adsp1800') self.archive_extract(self.ASSET_IMAGE_BE) self.vm.set_console() self.vm.add_args('-kernel', self.scratch_file('day17', 'ballerina.bin')) + if force_endianness: + self.vm.add_args('-M', 'endianness=big') self.vm.launch() wait_for_console_pattern(self, 'This architecture does not have ' 'kernel memory protection') @@ -39,12 +41,14 @@ class MicroblazeMachine(QemuSystemTest): # message, that's why we don't test for a later string here. This # needs some investigation by a microblaze wizard one day... - def do_xmaton_le_test(self, machine): + def do_xmaton_le_test(self, force_endianness=False): self.require_netdev('user') - self.set_machine(machine) + self.set_machine('petalogix-s3adsp1800') self.archive_extract(self.ASSET_IMAGE_LE) self.vm.set_console() self.vm.add_args('-kernel', self.scratch_file('day13', 'xmaton.bin')) + if force_endianness: + self.vm.add_args('-M', 'endianness=little') tftproot = self.scratch_file('day13') self.vm.add_args('-nic', f'user,tftp={tftproot}') self.vm.launch() @@ -59,9 +63,13 @@ class MicroblazeMachine(QemuSystemTest): class MicroblazeBigEndianMachine(MicroblazeMachine): ASSET_IMAGE_BE = MicroblazeMachine.ASSET_IMAGE_BE + ASSET_IMAGE_LE = MicroblazeMachine.ASSET_IMAGE_LE def test_microblaze_s3adsp1800_legacy_be(self): - self.do_ballerina_be_test('petalogix-s3adsp1800') + self.do_ballerina_be_test() + + def test_microblaze_s3adsp1800_legacy_le(self): + self.do_xmaton_le_test(force_endianness=True) if __name__ == '__main__': diff --git a/tests/functional/test_microblazeel_s3adsp1800.py b/tests/functional/test_microblazeel_s3adsp1800.py index ab59941..915902d 100755 --- a/tests/functional/test_microblazeel_s3adsp1800.py +++ b/tests/functional/test_microblazeel_s3adsp1800.py @@ -13,9 +13,13 @@ from test_microblaze_s3adsp1800 import MicroblazeMachine class MicroblazeLittleEndianMachine(MicroblazeMachine): ASSET_IMAGE_LE = MicroblazeMachine.ASSET_IMAGE_LE + ASSET_IMAGE_BE = MicroblazeMachine.ASSET_IMAGE_BE def test_microblaze_s3adsp1800_legacy_le(self): - self.do_xmaton_le_test('petalogix-s3adsp1800') + self.do_xmaton_le_test() + + def test_microblaze_s3adsp1800_legacy_be(self): + self.do_ballerina_be_test(force_endianness=True) if __name__ == '__main__': diff --git a/tests/functional/test_mips_malta.py b/tests/functional/test_mips_malta.py index 89b9556..30279f0 100755 --- a/tests/functional/test_mips_malta.py +++ b/tests/functional/test_mips_malta.py @@ -80,10 +80,8 @@ def mips_check_wheezy(test, kernel_path, image_path, kernel_command_line, exec_command_and_wait_for_pattern(test, 'cat /proc/devices', 'usb') exec_command_and_wait_for_pattern(test, 'cat /proc/ioports', ' : piix4_smbus') - # lspci for the host bridge does not work on big endian targets: - # https://gitlab.com/qemu-project/qemu/-/issues/2826 - # exec_command_and_wait_for_pattern(test, 'lspci -d 11ab:4620', - # 'GT-64120') + exec_command_and_wait_for_pattern(test, 'lspci -d 11ab:4620', + 'GT-64120') exec_command_and_wait_for_pattern(test, 'cat /sys/bus/i2c/devices/i2c-0/name', 'SMBus PIIX4 adapter') diff --git a/tests/functional/test_sparc64_tuxrun.py b/tests/functional/test_sparc64_tuxrun.py index 3be08d6..0d7b43d 100755 --- a/tests/functional/test_sparc64_tuxrun.py +++ b/tests/functional/test_sparc64_tuxrun.py @@ -24,6 +24,7 @@ class TuxRunSparc64Test(TuxRunBaselineTest): '479c3dc104c82b68be55e2c0c5c38cd473d0b37ad4badccde4775bb88ce34611') def test_sparc64(self): + self.set_machine('sun4u') self.root='sda' self.wait_for_shutdown=False self.common_tuxrun(kernel_asset=self.ASSET_SPARC64_KERNEL, diff --git a/tests/qemu-iotests/106 b/tests/qemu-iotests/106 index ae0fc46..5554843 100755 --- a/tests/qemu-iotests/106 +++ b/tests/qemu-iotests/106 @@ -40,6 +40,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt raw _supported_proto file fuse _supported_os Linux +_require_disk_usage # in kB CREATION_SIZE=128 diff --git a/tests/qemu-iotests/125 b/tests/qemu-iotests/125 index 46279d6..708e7c5 100755 --- a/tests/qemu-iotests/125 +++ b/tests/qemu-iotests/125 @@ -35,7 +35,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 get_image_size_on_host() { - echo $(($(stat -c '%b * %B' "$TEST_IMG_FILE"))) + disk_usage "$TEST_IMG_FILE" } # get standard environment and filters diff --git a/tests/qemu-iotests/175 b/tests/qemu-iotests/175 index f74f053..bbbf550 100755 --- a/tests/qemu-iotests/175 +++ b/tests/qemu-iotests/175 @@ -77,6 +77,7 @@ _supported_os Linux _default_cache_mode none _supported_cache_modes none directsync +_require_disk_usage size=$((1 * 1024 * 1024)) diff --git a/tests/qemu-iotests/221 b/tests/qemu-iotests/221 index c463fd4..eba00b8 100755 --- a/tests/qemu-iotests/221 +++ b/tests/qemu-iotests/221 @@ -41,6 +41,7 @@ _supported_os Linux _default_cache_mode writeback _supported_cache_modes writeback writethrough unsafe +_require_disk_usage echo echo "=== Check mapping of unaligned raw image ===" diff --git a/tests/qemu-iotests/253 b/tests/qemu-iotests/253 index 35039d2..6da85e6 100755 --- a/tests/qemu-iotests/253 +++ b/tests/qemu-iotests/253 @@ -41,6 +41,7 @@ _supported_os Linux _default_cache_mode none _supported_cache_modes none directsync +_require_disk_usage echo echo "=== Check mapping of unaligned raw image ===" diff --git a/tests/qemu-iotests/308 b/tests/qemu-iotests/308 index ea81dc4..6eced3a 100755 --- a/tests/qemu-iotests/308 +++ b/tests/qemu-iotests/308 @@ -51,6 +51,7 @@ _unsupported_fmt vpc _supported_proto file # We create the FUSE export manually _supported_os Linux # We need /dev/urandom +_require_disk_usage # $1: Export ID # $2: Options (beyond the node-name and ID) @@ -290,7 +291,7 @@ echo '--- Try growing non-growable export ---' # Get the current size so we can write beyond the EOF orig_len=$(get_proto_len "$EXT_MP" "$TEST_IMG") -orig_disk_usage=$(stat -c '%b' "$TEST_IMG") +orig_disk_usage=$(disk_usage "$TEST_IMG") # Should fail (exports are non-growable by default) # (Note that qemu-io can never write beyond the EOF, so we have to use @@ -312,7 +313,7 @@ else echo 'OK: Post-truncate image size is as expected' fi -new_disk_usage=$(stat -c '%b' "$TEST_IMG") +new_disk_usage=$(disk_usage "$TEST_IMG") if [ "$new_disk_usage" -gt "$orig_disk_usage" ]; then echo 'OK: Disk usage grew with fallocate' else diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 237f746..e977cb4 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -996,6 +996,36 @@ _require_large_file() rm "$FILENAME" } +# Check whether disk_usage can be reliably used. +_require_disk_usage() +{ + local unusable=false + # ZFS triggers known failures on this front; it does not immediately + # allocate files, and then aggressively compresses writes even when full + # allocation was requested. + if [ -z "$TEST_IMG_FILE" ]; then + FILENAME="$TEST_IMG" + else + FILENAME="$TEST_IMG_FILE" + fi + if [ -e "FILENAME" ]; then + echo "unwilling to overwrite existing file" + exit 1 + fi + $QEMU_IMG create -f raw "$FILENAME" 5M > /dev/null + if [ $(disk_usage "$FILENAME") -gt $((1024*1024)) ]; then + unusable=true + fi + $QEMU_IMG create -f raw -o preallocation=full "$FILENAME" 5M > /dev/null + if [ $(disk_usage "$FILENAME") -lt $((4*1024*1024)) ]; then + unusable=true + fi + rm -f "$FILENAME" + if $unusable; then + _notrun "file system on $TEST_DIR does not handle sparse files nicely" + fi +} + # Check that a set of devices is available in the QEMU binary # _require_devices() diff --git a/tests/qemu-iotests/tests/mirror-sparse b/tests/qemu-iotests/tests/mirror-sparse index 8c52a4e..cfcaa60 100755 --- a/tests/qemu-iotests/tests/mirror-sparse +++ b/tests/qemu-iotests/tests/mirror-sparse @@ -40,6 +40,7 @@ cd .. _supported_fmt qcow2 raw # Format of the source. dst is always raw file _supported_proto file _supported_os Linux +_require_disk_usage echo echo "=== Initial image setup ===" @@ -96,13 +97,15 @@ _send_qemu_cmd $h1 '{"execute": "blockdev-del", "arguments": {"node-name": "dst"}}' 'return' \ | _filter_block_job_offset | _filter_block_job_len $QEMU_IMG compare -U -f $IMGFMT -F raw $TEST_IMG.base $TEST_IMG +# Some filesystems can fudge allocations for various reasons; rather +# than expecting precise 2M and 20M images, it is better to allow for slop. result=$(disk_usage $TEST_IMG) -if test $result -lt $((3*1024*1024)); then +if test $result -lt $((4*1024*1024)); then actual=sparse -elif test $result = $((20*1024*1024)); then +elif test $result -gt $((19*1024*1024)); then actual=full else - actual=unknown + actual="unexpected size ($result)" fi echo "Destination is $actual; expected $expected" } diff --git a/tests/qemu-iotests/tests/write-zeroes-unmap b/tests/qemu-iotests/tests/write-zeroes-unmap index 7cfeeaf..f90fb8e 100755 --- a/tests/qemu-iotests/tests/write-zeroes-unmap +++ b/tests/qemu-iotests/tests/write-zeroes-unmap @@ -32,6 +32,7 @@ cd .. _supported_fmt raw _supported_proto file _supported_os Linux +_require_disk_usage create_test_image() { _make_test_img -f $IMGFMT 1m diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index 0a333ec..0b2bdf9 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -1622,7 +1622,7 @@ static void test_acpi_aarch64_virt_tcg_memhp(void) .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", .cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2", .ram_start = 0x40000000ULL, - .scan_len = 256ULL * 1024 * 1024, + .scan_len = 256ULL * MiB, }; data.variant = ".memhp"; @@ -1717,7 +1717,7 @@ static void test_acpi_riscv64_virt_tcg_numamem(void) .uefi_fl2 = "pc-bios/edk2-riscv-vars.fd", .cd = "tests/data/uefi-boot-images/bios-tables-test.riscv64.iso.qcow2", .ram_start = 0x80000000ULL, - .scan_len = 128ULL * 1024 * 1024, + .scan_len = 128ULL * MiB, }; data.variant = ".numamem"; @@ -1743,7 +1743,7 @@ static void test_acpi_aarch64_virt_tcg_numamem(void) .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", .cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2", .ram_start = 0x40000000ULL, - .scan_len = 128ULL * 1024 * 1024, + .scan_len = 128ULL * MiB, }; data.variant = ".numamem"; @@ -1765,7 +1765,7 @@ static void test_acpi_aarch64_virt_tcg_pxb(void) .uefi_fl1 = "pc-bios/edk2-aarch64-code.fd", .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", .ram_start = 0x40000000ULL, - .scan_len = 128ULL * 1024 * 1024, + .scan_len = 128ULL * MiB, }; /* * While using -cdrom, the cdrom would auto plugged into pxb-pcie, @@ -1841,7 +1841,7 @@ static void test_acpi_aarch64_virt_tcg_acpi_hmat(void) .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", .cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2", .ram_start = 0x40000000ULL, - .scan_len = 128ULL * 1024 * 1024, + .scan_len = 128ULL * MiB, }; data.variant = ".acpihmatvirt"; @@ -2095,7 +2095,7 @@ static void test_acpi_riscv64_virt_tcg(void) .uefi_fl2 = "pc-bios/edk2-riscv-vars.fd", .cd = "tests/data/uefi-boot-images/bios-tables-test.riscv64.iso.qcow2", .ram_start = 0x80000000ULL, - .scan_len = 128ULL * 1024 * 1024, + .scan_len = 128ULL * MiB, }; /* @@ -2117,7 +2117,7 @@ static void test_acpi_aarch64_virt_tcg(void) .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", .cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2", .ram_start = 0x40000000ULL, - .scan_len = 128ULL * 1024 * 1024, + .scan_len = 128ULL * MiB, }; data.smbios_cpu_max_speed = 2900; @@ -2138,7 +2138,7 @@ static void test_acpi_aarch64_virt_tcg_topology(void) .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", .cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2", .ram_start = 0x40000000ULL, - .scan_len = 128ULL * 1024 * 1024, + .scan_len = 128ULL * MiB, }; test_acpi_one("-cpu cortex-a57 " @@ -2223,7 +2223,7 @@ static void test_acpi_aarch64_virt_viot(void) .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", .cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2", .ram_start = 0x40000000ULL, - .scan_len = 128ULL * 1024 * 1024, + .scan_len = 128ULL * MiB, }; test_acpi_one("-cpu cortex-a57 " @@ -2407,7 +2407,7 @@ static void test_acpi_aarch64_virt_oem_fields(void) .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", .cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2", .ram_start = 0x40000000ULL, - .scan_len = 128ULL * 1024 * 1024, + .scan_len = 128ULL * MiB, }; char *args; diff --git a/tests/qtest/test-x86-cpuid-compat.c b/tests/qtest/test-x86-cpuid-compat.c index c9de47b..456e2af 100644 --- a/tests/qtest/test-x86-cpuid-compat.c +++ b/tests/qtest/test-x86-cpuid-compat.c @@ -365,20 +365,6 @@ int main(int argc, char **argv) "level", 10); } - /* - * xlevel doesn't have any feature that triggers auto-level - * code on old machine-types. Just check that the compat code - * is working correctly: - */ - if (qtest_has_machine("pc-i440fx-2.4")) { - add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-off", - "SandyBridge", NULL, "pc-i440fx-2.4", - "xlevel", 0x80000008); - add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on", - "SandyBridge", "svm=on,npt=on", "pc-i440fx-2.4", - "xlevel", 0x80000008); - } - /* Test feature parsing */ add_feature_test("x86/cpuid/features/plus", "486", "+arat", diff --git a/tests/uefi-test-tools/Makefile b/tests/uefi-test-tools/Makefile index f4eaebd..8ee6fb3 100644 --- a/tests/uefi-test-tools/Makefile +++ b/tests/uefi-test-tools/Makefile @@ -12,7 +12,7 @@ edk2_dir := ../../roms/edk2 images_dir := ../data/uefi-boot-images -emulation_targets := arm aarch64 i386 x86_64 riscv64 +emulation_targets := arm aarch64 i386 x86_64 riscv64 loongarch64 uefi_binaries := bios-tables-test intermediate_suffixes := .efi .fat .iso.raw @@ -56,7 +56,8 @@ Build/%.iso.raw: Build/%.fat # stripped from, the argument. map_arm_to_uefi = $(subst arm,ARM,$(1)) map_aarch64_to_uefi = $(subst aarch64,AA64,$(call map_arm_to_uefi,$(1))) -map_riscv64_to_uefi = $(subst riscv64,RISCV64,$(call map_aarch64_to_uefi,$(1))) +map_loongarch64_to_uefi = $(subst loongarch64,LOONGARCH64,$(call map_aarch64_to_uefi,$(1))) +map_riscv64_to_uefi = $(subst riscv64,RISCV64,$(call map_loongarch64_to_uefi,$(1))) map_i386_to_uefi = $(subst i386,IA32,$(call map_riscv64_to_uefi,$(1))) map_x86_64_to_uefi = $(subst x86_64,X64,$(call map_i386_to_uefi,$(1))) map_to_uefi = $(subst .,,$(call map_x86_64_to_uefi,$(1))) diff --git a/tests/uefi-test-tools/UefiTestToolsPkg/UefiTestToolsPkg.dsc b/tests/uefi-test-tools/UefiTestToolsPkg/UefiTestToolsPkg.dsc index 0902fd3..facf8df 100644 --- a/tests/uefi-test-tools/UefiTestToolsPkg/UefiTestToolsPkg.dsc +++ b/tests/uefi-test-tools/UefiTestToolsPkg/UefiTestToolsPkg.dsc @@ -19,7 +19,7 @@ PLATFORM_VERSION = 0.1 PLATFORM_NAME = UefiTestTools SKUID_IDENTIFIER = DEFAULT - SUPPORTED_ARCHITECTURES = ARM|AARCH64|IA32|X64|RISCV64 + SUPPORTED_ARCHITECTURES = ARM|AARCH64|IA32|X64|RISCV64|LOONGARCH64 BUILD_TARGETS = DEBUG [BuildOptions.IA32] @@ -65,6 +65,10 @@ [LibraryClasses.RISCV64] BaseMemoryLib|MdePkg/Library/BaseMemoryLib/BaseMemoryLib.inf +[LibraryClasses.LOONGARCH64] + BaseMemoryLib|MdePkg/Library/BaseMemoryLib/BaseMemoryLib.inf + StackCheckLib|MdePkg/Library/StackCheckLibNull/StackCheckLibNull.inf + [PcdsFixedAtBuild] gEfiMdePkgTokenSpaceGuid.PcdDebugPrintErrorLevel|0x8040004F gEfiMdePkgTokenSpaceGuid.PcdDebugPropertyMask|0x2F diff --git a/tests/uefi-test-tools/uefi-test-build.config b/tests/uefi-test-tools/uefi-test-build.config index a4c61fc..8bf4826 100644 --- a/tests/uefi-test-tools/uefi-test-build.config +++ b/tests/uefi-test-tools/uefi-test-build.config @@ -22,6 +22,16 @@ arch = AARCH64 cpy1 = AARCH64/BiosTablesTest.efi bios-tables-test.aarch64.efi #################################################################################### +# loongarch64 + +[build.loongarch64] +conf = UefiTestToolsPkg/UefiTestToolsPkg.dsc +plat = UefiTestTools +dest = ./Build +arch = LOONGARCH64 +cpy1 = LOONGARCH64/BiosTablesTest.efi bios-tables-test.loongarch64.efi + +#################################################################################### # riscv64 [build.riscv64] diff --git a/tests/unit/test-util-sockets.c b/tests/unit/test-util-sockets.c index 8492f4d..ee66d72 100644 --- a/tests/unit/test-util-sockets.c +++ b/tests/unit/test-util-sockets.c @@ -341,8 +341,12 @@ static void inet_parse_test_helper(const char *str, int rc = inet_parse(&addr, str, &error); if (success) { + if (error) { + error_report_err(error); + } g_assert_cmpint(rc, ==, 0); } else { + error_free(error); g_assert_cmpint(rc, <, 0); } if (exp_addr != NULL) { |