aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitlab-ci.d/buildtest-template.yml3
-rw-r--r--.gitlab-ci.d/buildtest.yml11
-rw-r--r--MAINTAINERS14
-rw-r--r--accel/kvm/kvm-all.c11
-rw-r--r--accel/tcg/meson.build8
-rw-r--r--backends/iommufd.c58
-rw-r--r--backends/trace-events1
-rw-r--r--block.c235
-rw-r--r--block/backup.c2
-rw-r--r--block/blklogwrites.c4
-rw-r--r--block/blkverify.c2
-rw-r--r--block/block-backend.c10
-rw-r--r--block/commit.c4
-rw-r--r--block/io.c1
-rw-r--r--block/mirror.c5
-rw-r--r--block/qcow2.c4
-rw-r--r--block/quorum.c4
-rw-r--r--block/replication.c7
-rw-r--r--block/snapshot.c28
-rw-r--r--block/stream.c10
-rw-r--r--block/vmdk.c10
-rw-r--r--blockdev.c78
-rw-r--r--blockjob.c12
-rw-r--r--clippy.toml (renamed from rust/clippy.toml)2
-rw-r--r--configs/devices/i386-softmmu/default.mak1
-rwxr-xr-xconfigure16
-rw-r--r--docs/about/deprecated.rst19
-rw-r--r--docs/about/removed-features.rst9
-rw-r--r--docs/devel/rust.rst12
-rw-r--r--docs/interop/bitmaps.rst2
-rw-r--r--docs/interop/index.rst1
-rw-r--r--docs/interop/qcow2.rst (renamed from docs/interop/qcow2.txt)187
-rw-r--r--docs/qcow2-cache.txt2
-rw-r--r--docs/system/confidential-guest-support.rst1
-rw-r--r--docs/system/i386/tdx.rst161
-rw-r--r--docs/system/target-i386.rst1
-rw-r--r--gdbstub/meson.build4
-rw-r--r--hw/arm/Kconfig1
-rw-r--r--hw/arm/boot.c1
-rw-r--r--hw/arm/meson.build4
-rw-r--r--hw/arm/npcm8xx.c55
-rw-r--r--hw/block/Kconfig3
-rw-r--r--hw/block/meson.build1
-rw-r--r--hw/block/nand.c835
-rw-r--r--hw/core/loader.c14
-rw-r--r--hw/core/machine.c18
-rw-r--r--hw/core/meson.build4
-rw-r--r--hw/core/qdev-properties-system.c1
-rw-r--r--hw/i386/Kconfig6
-rw-r--r--hw/i386/kvm/apic.c5
-rw-r--r--hw/i386/meson.build1
-rw-r--r--hw/i386/pc.c71
-rw-r--r--hw/i386/pc_piix.c26
-rw-r--r--hw/i386/pc_q35.c26
-rw-r--r--hw/i386/pc_sysfw.c7
-rw-r--r--hw/i386/tdvf-hob.c130
-rw-r--r--hw/i386/tdvf-hob.h26
-rw-r--r--hw/i386/tdvf.c189
-rw-r--r--hw/i386/x86-common.c6
-rw-r--r--hw/i386/x86.c1
-rw-r--r--hw/microblaze/petalogix_ml605_mmu.c15
-rw-r--r--hw/microblaze/petalogix_s3adsp1800_mmu.c42
-rw-r--r--hw/microblaze/xlnx-zynqmp-pmu.c7
-rw-r--r--hw/net/e1000.c95
-rw-r--r--hw/net/rocker/rocker.h14
-rw-r--r--hw/net/rocker/rocker_hw.h20
-rw-r--r--hw/net/rocker/rocker_of_dpa.c40
-rw-r--r--hw/net/vmxnet3.c44
-rw-r--r--hw/nvram/fw_cfg.c110
-rw-r--r--hw/scsi/vmw_pvscsi.c67
-rw-r--r--hw/timer/hpet.c166
-rw-r--r--hw/vfio/container-base.c4
-rw-r--r--hw/vfio/container.c5
-rw-r--r--hw/vfio/cpr.c2
-rw-r--r--hw/vfio/igd.c22
-rw-r--r--hw/vfio/iommufd.c45
-rw-r--r--hw/vfio/listener.c74
-rw-r--r--hw/vfio/pci.c89
-rw-r--r--hw/vfio/vfio-cpr.h15
-rw-r--r--hw/virtio/vhost-vdpa.c9
-rw-r--r--hw/virtio/virtio-pci.c11
-rw-r--r--include/block/block-global-state.h19
-rw-r--r--include/block/block-io.h2
-rw-r--r--include/block/block_int-common.h32
-rw-r--r--include/block/blockjob.h2
-rw-r--r--include/hw/arm/npcm8xx.h5
-rw-r--r--include/hw/block/flash.h18
-rw-r--r--include/hw/boards.h9
-rw-r--r--include/hw/i386/pc.h7
-rw-r--r--include/hw/i386/tdvf.h45
-rw-r--r--include/hw/i386/x86.h5
-rw-r--r--include/hw/loader.h2
-rw-r--r--include/hw/nvram/fw_cfg.h10
-rw-r--r--include/hw/vfio/vfio-container-base.h83
-rw-r--r--include/hw/vfio/vfio-cpr.h18
-rw-r--r--include/hw/virtio/virtio-pci.h8
-rw-r--r--include/io/channel-socket.h13
-rw-r--r--include/standard-headers/uefi/uefi.h187
-rw-r--r--include/system/host_iommu_device.h15
-rw-r--r--include/system/iommufd.h54
-rw-r--r--include/system/kvm.h1
-rw-r--r--include/system/memory.h19
-rw-r--r--io/channel-socket.c11
-rw-r--r--meson.build129
-rw-r--r--nbd/client-connection.c3
-rw-r--r--nbd/common.c26
-rw-r--r--nbd/nbd-internal.h5
-rw-r--r--nbd/server.c2
-rw-r--r--plugins/meson.build4
-rwxr-xr-xpython/scripts/vendor.py4
-rw-r--r--python/wheels/meson-1.5.0-py3-none-any.whlbin959846 -> 0 bytes
-rw-r--r--python/wheels/meson-1.8.1-py3-none-any.whlbin0 -> 1013001 bytes
-rw-r--r--pythondeps.toml2
-rw-r--r--qapi/acpi.json2
-rw-r--r--qapi/audio.json8
-rw-r--r--qapi/block-core.json184
-rw-r--r--qapi/block-export.json6
-rw-r--r--qapi/block.json2
-rw-r--r--qapi/char.json8
-rw-r--r--qapi/crypto.json21
-rw-r--r--qapi/cryptodev.json2
-rw-r--r--qapi/cxl.json2
-rw-r--r--qapi/dump.json6
-rw-r--r--qapi/introspect.json8
-rw-r--r--qapi/job.json28
-rw-r--r--qapi/machine.json14
-rw-r--r--qapi/migration.json100
-rw-r--r--qapi/misc-i386.json2
-rw-r--r--qapi/misc.json4
-rw-r--r--qapi/net.json18
-rw-r--r--qapi/qom.json37
-rw-r--r--qapi/run-state.json43
-rw-r--r--qapi/transaction.json4
-rw-r--r--qapi/uefi.json2
-rw-r--r--qapi/ui.json8
-rw-r--r--qemu-img.c2
-rw-r--r--rust/Cargo.lock8
-rw-r--r--rust/Cargo.toml2
-rw-r--r--rust/bits/Cargo.toml19
-rw-r--r--rust/bits/meson.build16
-rw-r--r--rust/bits/src/lib.rs443
-rw-r--r--rust/hw/char/pl011/Cargo.toml1
-rw-r--r--rust/hw/char/pl011/meson.build13
-rw-r--r--rust/hw/char/pl011/src/device.rs55
-rw-r--r--rust/hw/char/pl011/src/registers.rs39
-rw-r--r--rust/hw/timer/hpet/src/device.rs (renamed from rust/hw/timer/hpet/src/hpet.rs)2
-rw-r--r--rust/hw/timer/hpet/src/fw_cfg.rs2
-rw-r--r--rust/hw/timer/hpet/src/lib.rs4
-rw-r--r--rust/meson.build31
-rw-r--r--rust/qemu-api-macros/meson.build14
-rw-r--r--rust/qemu-api-macros/src/bits.rs229
-rw-r--r--rust/qemu-api-macros/src/lib.rs56
-rw-r--r--rust/qemu-api/meson.build36
-rw-r--r--rust/qemu-api/src/bindings.rs1
-rw-r--r--rust/qemu-api/src/bitops.rs2
-rw-r--r--rust/qemu-api/src/cell.rs22
-rw-r--r--rust/qemu-api/src/qom.rs4
-rw-r--r--rust/qemu-api/src/timer.rs2
-rw-r--r--rust/qemu-api/src/vmstate.rs2
-rw-r--r--rust/qemu-api/tests/vmstate_tests.rs2
-rw-r--r--scripts/rust/rustc_args.py5
-rw-r--r--scripts/tracetool/backend/simple.py23
-rw-r--r--system/memory.c32
-rw-r--r--system/meson.build2
-rw-r--r--system/runstate.c65
-rw-r--r--system/vl.c5
-rw-r--r--target/arm/arm-qmp-cmds.c1
-rw-r--r--target/arm/cpregs.h1
-rw-r--r--target/arm/cpu-features.h1
-rw-r--r--target/arm/hvf-stub.c20
-rw-r--r--target/arm/hvf_arm.h18
-rw-r--r--target/arm/kvm.c5
-rw-r--r--target/arm/kvm_arm.h1
-rw-r--r--target/arm/meson.build6
-rw-r--r--target/arm/tcg/meson.build2
-rw-r--r--target/i386/confidential-guest.h44
-rw-r--r--target/i386/cpu.c549
-rw-r--r--target/i386/cpu.h49
-rw-r--r--target/i386/emulate/x86_flags.c16
-rw-r--r--target/i386/host-cpu.c2
-rw-r--r--target/i386/host-cpu.h1
-rw-r--r--target/i386/kvm/kvm.c110
-rw-r--r--target/i386/kvm/kvm_i386.h15
-rw-r--r--target/i386/kvm/meson.build2
-rw-r--r--target/i386/kvm/tdx-stub.c20
-rw-r--r--target/i386/kvm/tdx.c1289
-rw-r--r--target/i386/kvm/tdx.h65
-rw-r--r--target/i386/machine.c5
-rw-r--r--target/i386/sev.c12
-rw-r--r--target/i386/tcg/helper-tcg.h4
-rw-r--r--target/loongarch/kvm/kvm.c4
-rw-r--r--target/mips/kvm.c5
-rw-r--r--target/ppc/kvm.c5
-rw-r--r--target/riscv/kvm/kvm-cpu.c5
-rw-r--r--target/s390x/kvm/kvm.c5
-rw-r--r--tcg/meson.build4
-rw-r--r--tests/docker/dockerfiles/fedora-rust-nightly.docker2
-rw-r--r--tests/docker/dockerfiles/ubuntu2204.docker1
-rw-r--r--tests/functional/meson.build1
-rwxr-xr-xtests/functional/test_arm_stellaris.py48
-rwxr-xr-xtests/functional/test_mem_addr_space.py63
-rwxr-xr-xtests/functional/test_microblaze_s3adsp1800.py18
-rwxr-xr-xtests/functional/test_microblazeel_s3adsp1800.py6
-rwxr-xr-xtests/functional/test_mips_malta.py6
-rwxr-xr-xtests/functional/test_sparc64_tuxrun.py1
-rw-r--r--tests/lcitool/mappings.yml6
-rwxr-xr-xtests/lcitool/refresh3
-rwxr-xr-xtests/qemu-iotests/1061
-rwxr-xr-xtests/qemu-iotests/1252
-rwxr-xr-xtests/qemu-iotests/1751
-rwxr-xr-xtests/qemu-iotests/2211
-rwxr-xr-xtests/qemu-iotests/2402
-rw-r--r--tests/qemu-iotests/240.out4
-rwxr-xr-xtests/qemu-iotests/2531
-rwxr-xr-xtests/qemu-iotests/3085
-rw-r--r--tests/qemu-iotests/common.rc30
-rwxr-xr-xtests/qemu-iotests/tests/graph-changes-while-io102
-rw-r--r--tests/qemu-iotests/tests/graph-changes-while-io.out4
-rwxr-xr-xtests/qemu-iotests/tests/mirror-sparse9
-rwxr-xr-xtests/qemu-iotests/tests/write-zeroes-unmap1
-rw-r--r--tests/qtest/meson.build6
-rw-r--r--tests/qtest/npcm_gmac-test.c85
-rw-r--r--tests/qtest/test-x86-cpuid-compat.c14
-rw-r--r--tests/unit/test-bdrv-drain.c24
-rw-r--r--tests/unit/test-bdrv-graph-mod.c10
-rw-r--r--tests/unit/test-util-sockets.c4
226 files changed, 5891 insertions, 2491 deletions
diff --git a/.gitlab-ci.d/buildtest-template.yml b/.gitlab-ci.d/buildtest-template.yml
index 118371e..fea4e8d 100644
--- a/.gitlab-ci.d/buildtest-template.yml
+++ b/.gitlab-ci.d/buildtest-template.yml
@@ -76,7 +76,8 @@
fi
- section_end buildenv
- section_start test "Running tests"
- - $MAKE NINJA=":" $MAKE_CHECK_ARGS
+ # doctests need all the compilation artifacts
+ - $MAKE NINJA=":" MTESTARGS="--no-suite doc" $MAKE_CHECK_ARGS
- section_end test
.native_test_job_template:
diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml
index ca1a9c6..d888a60 100644
--- a/.gitlab-ci.d/buildtest.yml
+++ b/.gitlab-ci.d/buildtest.yml
@@ -41,7 +41,7 @@ build-system-ubuntu:
IMAGE: ubuntu2204
CONFIGURE_ARGS: --enable-docs --enable-rust
TARGETS: alpha-softmmu microblazeel-softmmu mips64el-softmmu
- MAKE_CHECK_ARGS: check-build
+ MAKE_CHECK_ARGS: check-build check-doc
check-system-ubuntu:
extends: .native_test_job_template
@@ -115,7 +115,7 @@ build-system-fedora:
CONFIGURE_ARGS: --disable-gcrypt --enable-nettle --enable-docs --enable-crypto-afalg --enable-rust
TARGETS: microblaze-softmmu mips-softmmu
xtensa-softmmu m68k-softmmu riscv32-softmmu ppc-softmmu sparc64-softmmu
- MAKE_CHECK_ARGS: check-build
+ MAKE_CHECK_ARGS: check-build check-doc
build-system-fedora-rust-nightly:
extends:
@@ -127,12 +127,7 @@ build-system-fedora-rust-nightly:
IMAGE: fedora-rust-nightly
CONFIGURE_ARGS: --disable-docs --enable-rust --enable-strict-rust-lints
TARGETS: aarch64-softmmu
- MAKE_CHECK_ARGS: check-build
- after_script:
- - source scripts/ci/gitlab-ci-section
- - section_start test "Running Rust doctests"
- - cd build
- - pyvenv/bin/meson devenv -w ../rust ${CARGO-cargo} test --doc -p qemu_api
+ MAKE_CHECK_ARGS: check-build check-doc
allow_failure: true
diff --git a/MAINTAINERS b/MAINTAINERS
index e27d145..aa67630 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -510,6 +510,7 @@ Apple Silicon HVF CPUs
M: Alexander Graf <agraf@csgraf.de>
S: Maintained
F: target/arm/hvf/
+F: target/arm/hvf-stub.c
X86 HVF CPUs
M: Cameron Esfahani <dirty@apple.com>
@@ -1003,6 +1004,7 @@ F: hw/display/ssd03*
F: include/hw/input/gamepad.h
F: include/hw/timer/stellaris-gptm.h
F: docs/system/arm/stellaris.rst
+F: tests/functional/test_arm_stellaris.py
STM32L4x5 SoC Family
M: Samuel Tardieu <sam@rfc1149.net>
@@ -3030,6 +3032,16 @@ F: include/qemu/co-shared-resource.h
T: git https://gitlab.com/jsnow/qemu.git jobs
T: git https://gitlab.com/vsementsov/qemu.git block
+CheckPoint and Restart (CPR)
+R: Steve Sistare <steven.sistare@oracle.com>
+S: Supported
+F: hw/vfio/cpr*
+F: include/hw/vfio/vfio-cpr.h
+F: include/migration/cpr.h
+F: migration/cpr*
+F: tests/qtest/migration/cpr*
+F: docs/devel/migration/CPR.rst
+
Compute Express Link
M: Jonathan Cameron <jonathan.cameron@huawei.com>
R: Fan Ni <fan.ni@samsung.com>
@@ -4130,7 +4142,7 @@ M: Hanna Reitz <hreitz@redhat.com>
L: qemu-block@nongnu.org
S: Supported
F: block/qcow2*
-F: docs/interop/qcow2.txt
+F: docs/interop/qcow2.rst
qcow
M: Kevin Wolf <kwolf@redhat.com>
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 278a506..51526d3 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -471,7 +471,9 @@ int kvm_create_vcpu(CPUState *cpu)
cpu->kvm_fd = kvm_fd;
cpu->kvm_state = s;
- cpu->vcpu_dirty = true;
+ if (!s->guest_state_protected) {
+ cpu->vcpu_dirty = true;
+ }
cpu->dirty_pages = 0;
cpu->throttle_us_per_full = 0;
@@ -545,6 +547,11 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
+ ret = kvm_arch_pre_create_vcpu(cpu, errp);
+ if (ret < 0) {
+ goto err;
+ }
+
ret = kvm_create_vcpu(cpu);
if (ret < 0) {
error_setg_errno(errp, -ret,
@@ -2426,7 +2433,7 @@ static int kvm_recommended_vcpus(KVMState *s)
static int kvm_max_vcpus(KVMState *s)
{
- int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS);
+ int ret = kvm_vm_check_extension(s, KVM_CAP_MAX_VCPUS);
return (ret) ? ret : kvm_recommended_vcpus(s);
}
diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build
index 97d5e5a..575e92b 100644
--- a/accel/tcg/meson.build
+++ b/accel/tcg/meson.build
@@ -18,15 +18,15 @@ if get_option('plugins')
tcg_ss.add(files('plugin-gen.c'))
endif
-libuser_ss.add_all(tcg_ss)
-libsystem_ss.add_all(tcg_ss)
+user_ss.add_all(tcg_ss)
+system_ss.add_all(tcg_ss)
-libuser_ss.add(files(
+user_ss.add(files(
'user-exec.c',
'user-exec-stub.c',
))
-libsystem_ss.add(files(
+system_ss.add(files(
'cputlb.c',
'icount-common.c',
'monitor.c',
diff --git a/backends/iommufd.c b/backends/iommufd.c
index b73f75c..c2c47ab 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -311,6 +311,62 @@ bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
return true;
}
+bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
+ uint32_t data_type, uint32_t entry_len,
+ uint32_t *entry_num, void *data,
+ Error **errp)
+{
+ int ret, fd = be->fd;
+ uint32_t total_entries = *entry_num;
+ struct iommu_hwpt_invalidate cache = {
+ .size = sizeof(cache),
+ .hwpt_id = id,
+ .data_type = data_type,
+ .entry_len = entry_len,
+ .entry_num = total_entries,
+ .data_uptr = (uintptr_t)data,
+ };
+
+ ret = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cache);
+ trace_iommufd_backend_invalidate_cache(fd, id, data_type, entry_len,
+ total_entries, cache.entry_num,
+ (uintptr_t)data, ret ? errno : 0);
+ *entry_num = cache.entry_num;
+
+ if (ret) {
+ error_setg_errno(errp, errno, "IOMMU_HWPT_INVALIDATE failed:"
+ " total %d entries, processed %d entries",
+ total_entries, cache.entry_num);
+ } else if (total_entries != cache.entry_num) {
+ error_setg(errp, "IOMMU_HWPT_INVALIDATE succeed but with unprocessed"
+ " entries: total %d entries, processed %d entries."
+ " Kernel BUG?!", total_entries, cache.entry_num);
+ return false;
+ }
+
+ return !ret;
+}
+
+bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ uint32_t hwpt_id, Error **errp)
+{
+ HostIOMMUDeviceIOMMUFDClass *idevc =
+ HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
+
+ g_assert(idevc->attach_hwpt);
+ return idevc->attach_hwpt(idev, hwpt_id, errp);
+}
+
+bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ Error **errp)
+{
+ HostIOMMUDeviceIOMMUFDClass *idevc =
+ HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev);
+
+ g_assert(idevc->detach_hwpt);
+ return idevc->detach_hwpt(idev, errp);
+}
+
static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp)
{
HostIOMMUDeviceCaps *caps = &hiod->caps;
@@ -349,6 +405,8 @@ static const TypeInfo types[] = {
}, {
.name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD,
.parent = TYPE_HOST_IOMMU_DEVICE,
+ .instance_size = sizeof(HostIOMMUDeviceIOMMUFD),
+ .class_size = sizeof(HostIOMMUDeviceIOMMUFDClass),
.class_init = hiod_iommufd_class_init,
.abstract = true,
}
diff --git a/backends/trace-events b/backends/trace-events
index 40811a3..7278214 100644
--- a/backends/trace-events
+++ b/backends/trace-events
@@ -18,3 +18,4 @@ iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_
iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)"
iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%u iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)"
+iommufd_backend_invalidate_cache(int iommufd, uint32_t id, uint32_t data_type, uint32_t entry_len, uint32_t entry_num, uint32_t done_num, uint64_t data_ptr, int ret) " iommufd=%d id=%u data_type=%u entry_len=%u entry_num=%u done_num=%u data_ptr=0x%"PRIx64" (%d)"
diff --git a/block.c b/block.c
index f222e1a..bfd4340 100644
--- a/block.c
+++ b/block.c
@@ -106,9 +106,9 @@ static void bdrv_reopen_abort(BDRVReopenState *reopen_state);
static bool bdrv_backing_overridden(BlockDriverState *bs);
-static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
- GHashTable *visited, Transaction *tran,
- Error **errp);
+static bool GRAPH_RDLOCK
+bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
+ GHashTable *visited, Transaction *tran, Error **errp);
/* If non-zero, use only whitelisted block drivers */
static int use_bdrv_whitelist;
@@ -1226,9 +1226,10 @@ static int bdrv_child_cb_inactivate(BdrvChild *child)
return 0;
}
-static bool bdrv_child_cb_change_aio_ctx(BdrvChild *child, AioContext *ctx,
- GHashTable *visited, Transaction *tran,
- Error **errp)
+static bool GRAPH_RDLOCK
+bdrv_child_cb_change_aio_ctx(BdrvChild *child, AioContext *ctx,
+ GHashTable *visited, Transaction *tran,
+ Error **errp)
{
BlockDriverState *bs = child->opaque;
return bdrv_change_aio_context(bs, ctx, visited, tran, errp);
@@ -1720,12 +1721,14 @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name,
open_failed:
bs->drv = NULL;
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
if (bs->file != NULL) {
bdrv_unref_child(bs, bs->file);
assert(!bs->file);
}
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
g_free(bs->opaque);
bs->opaque = NULL;
@@ -3027,7 +3030,8 @@ static void GRAPH_WRLOCK bdrv_attach_child_common_abort(void *opaque)
bdrv_replace_child_noperm(s->child, NULL);
if (bdrv_get_aio_context(bs) != s->old_child_ctx) {
- bdrv_try_change_aio_context(bs, s->old_child_ctx, NULL, &error_abort);
+ bdrv_try_change_aio_context_locked(bs, s->old_child_ctx, NULL,
+ &error_abort);
}
if (bdrv_child_get_parent_aio_context(s->child) != s->old_parent_ctx) {
@@ -3069,6 +3073,9 @@ static TransactionActionDrv bdrv_attach_child_common_drv = {
*
* Both @parent_bs and @child_bs can move to a different AioContext in this
* function.
+ *
+ * All block nodes must be drained before this function is called until after
+ * the transaction is finalized.
*/
static BdrvChild * GRAPH_WRLOCK
bdrv_attach_child_common(BlockDriverState *child_bs,
@@ -3112,8 +3119,8 @@ bdrv_attach_child_common(BlockDriverState *child_bs,
parent_ctx = bdrv_child_get_parent_aio_context(new_child);
if (child_ctx != parent_ctx) {
Error *local_err = NULL;
- int ret = bdrv_try_change_aio_context(child_bs, parent_ctx, NULL,
- &local_err);
+ int ret = bdrv_try_change_aio_context_locked(child_bs, parent_ctx, NULL,
+ &local_err);
if (ret < 0 && child_class->change_aio_ctx) {
Transaction *aio_ctx_tran = tran_new();
@@ -3179,6 +3186,9 @@ bdrv_attach_child_common(BlockDriverState *child_bs,
*
* After calling this function, the transaction @tran may only be completed
* while holding a writer lock for the graph.
+ *
+ * All block nodes must be drained before this function is called until after
+ * the transaction is finalized.
*/
static BdrvChild * GRAPH_WRLOCK
bdrv_attach_child_noperm(BlockDriverState *parent_bs,
@@ -3220,6 +3230,8 @@ bdrv_attach_child_noperm(BlockDriverState *parent_bs,
*
* On failure NULL is returned, errp is set and the reference to
* child_bs is also dropped.
+ *
+ * All block nodes must be drained.
*/
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
const char *child_name,
@@ -3259,6 +3271,8 @@ out:
*
* On failure NULL is returned, errp is set and the reference to
* child_bs is also dropped.
+ *
+ * All block nodes must be drained.
*/
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
@@ -3293,7 +3307,11 @@ out:
return ret < 0 ? NULL : child;
}
-/* Callers must ensure that child->frozen is false. */
+/*
+ * Callers must ensure that child->frozen is false.
+ *
+ * All block nodes must be drained.
+ */
void bdrv_root_unref_child(BdrvChild *child)
{
BlockDriverState *child_bs = child->bs;
@@ -3314,8 +3332,8 @@ void bdrv_root_unref_child(BdrvChild *child)
* When the parent requiring a non-default AioContext is removed, the
* node moves back to the main AioContext
*/
- bdrv_try_change_aio_context(child_bs, qemu_get_aio_context(), NULL,
- NULL);
+ bdrv_try_change_aio_context_locked(child_bs, qemu_get_aio_context(),
+ NULL, NULL);
}
bdrv_schedule_unref(child_bs);
@@ -3388,7 +3406,11 @@ bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child,
}
}
-/* Callers must ensure that child->frozen is false. */
+/*
+ * Callers must ensure that child->frozen is false.
+ *
+ * All block nodes must be drained.
+ */
void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
{
GLOBAL_STATE_CODE();
@@ -3453,6 +3475,9 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
*
* After calling this function, the transaction @tran may only be completed
* while holding a writer lock for the graph.
+ *
+ * All block nodes must be drained before this function is called until after
+ * the transaction is finalized.
*/
static int GRAPH_WRLOCK
bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
@@ -3545,8 +3570,7 @@ out:
* Both @bs and @backing_hd can move to a different AioContext in this
* function.
*
- * If a backing child is already present (i.e. we're detaching a node), that
- * child node must be drained.
+ * All block nodes must be drained.
*/
int bdrv_set_backing_hd_drained(BlockDriverState *bs,
BlockDriverState *backing_hd,
@@ -3575,21 +3599,14 @@ out:
int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
Error **errp)
{
- BlockDriverState *drain_bs;
int ret;
GLOBAL_STATE_CODE();
- bdrv_graph_rdlock_main_loop();
- drain_bs = bs->backing ? bs->backing->bs : bs;
- bdrv_graph_rdunlock_main_loop();
-
- bdrv_ref(drain_bs);
- bdrv_drained_begin(drain_bs);
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp);
bdrv_graph_wrunlock();
- bdrv_drained_end(drain_bs);
- bdrv_unref(drain_bs);
+ bdrv_drain_all_end();
return ret;
}
@@ -3780,10 +3797,12 @@ static BdrvChild *bdrv_open_child_common(const char *filename,
return NULL;
}
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
errp);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
return child;
}
@@ -4358,9 +4377,7 @@ bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child)
* returns a pointer to bs_queue, which is either the newly allocated
* bs_queue, or the existing bs_queue being used.
*
- * bs is drained here and undrained by bdrv_reopen_queue_free().
- *
- * To be called with bs->aio_context locked.
+ * bs must be drained.
*/
static BlockReopenQueue * GRAPH_RDLOCK
bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs,
@@ -4379,12 +4396,7 @@ bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs,
GLOBAL_STATE_CODE();
- /*
- * Strictly speaking, draining is illegal under GRAPH_RDLOCK. We know that
- * we've been called with bdrv_graph_rdlock_main_loop(), though, so it's ok
- * in practice.
- */
- bdrv_drained_begin(bs);
+ assert(bs->quiesce_counter > 0);
if (bs_queue == NULL) {
bs_queue = g_new0(BlockReopenQueue, 1);
@@ -4519,12 +4531,17 @@ bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs,
return bs_queue;
}
-/* To be called with bs->aio_context locked */
BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
BlockDriverState *bs,
QDict *options, bool keep_old_opts)
{
GLOBAL_STATE_CODE();
+
+ if (bs_queue == NULL) {
+ /* Paired with bdrv_drain_all_end() in bdrv_reopen_queue_free(). */
+ bdrv_drain_all_begin();
+ }
+
GRAPH_RDLOCK_GUARD_MAINLOOP();
return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false,
@@ -4537,12 +4554,14 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue)
if (bs_queue) {
BlockReopenQueueEntry *bs_entry, *next;
QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
- bdrv_drained_end(bs_entry->state.bs);
qobject_unref(bs_entry->state.explicit_options);
qobject_unref(bs_entry->state.options);
g_free(bs_entry);
}
g_free(bs_queue);
+
+ /* Paired with bdrv_drain_all_begin() in bdrv_reopen_queue(). */
+ bdrv_drain_all_end();
}
}
@@ -4709,6 +4728,9 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
* Return 0 on success, otherwise return < 0 and set @errp.
*
* @reopen_state->bs can move to a different AioContext in this function.
+ *
+ * All block nodes must be drained before this function is called until after
+ * the transaction is finalized.
*/
static int GRAPH_UNLOCKED
bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
@@ -4802,7 +4824,7 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
if (old_child_bs) {
bdrv_ref(old_child_bs);
- bdrv_drained_begin(old_child_bs);
+ assert(old_child_bs->quiesce_counter > 0);
}
bdrv_graph_rdunlock_main_loop();
@@ -4814,7 +4836,6 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
bdrv_graph_wrunlock();
if (old_child_bs) {
- bdrv_drained_end(old_child_bs);
bdrv_unref(old_child_bs);
}
@@ -4843,6 +4864,9 @@ out_rdlock:
*
* After calling this function, the transaction @change_child_tran may only be
* completed while holding a writer lock for the graph.
+ *
+ * All block nodes must be drained before this function is called until after
+ * the transaction is finalized.
*/
static int GRAPH_UNLOCKED
bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
@@ -5156,6 +5180,7 @@ static void bdrv_close(BlockDriverState *bs)
bs->drv = NULL;
}
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
bdrv_unref_child(bs, child);
@@ -5164,6 +5189,7 @@ static void bdrv_close(BlockDriverState *bs)
assert(!bs->backing);
assert(!bs->file);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
g_free(bs->opaque);
bs->opaque = NULL;
@@ -5489,9 +5515,7 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
assert(!bs_new->backing);
bdrv_graph_rdunlock_main_loop();
- bdrv_drained_begin(bs_top);
- bdrv_drained_begin(bs_new);
-
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
child = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
@@ -5513,9 +5537,7 @@ out:
bdrv_refresh_limits(bs_top, NULL, NULL);
bdrv_graph_wrunlock();
-
- bdrv_drained_end(bs_top);
- bdrv_drained_end(bs_new);
+ bdrv_drain_all_end();
return ret;
}
@@ -6989,6 +7011,8 @@ bdrv_inactivate_recurse(BlockDriverState *bs, bool top_level)
GLOBAL_STATE_CODE();
+ assert(bs->quiesce_counter > 0);
+
if (!bs->drv) {
return -ENOMEDIUM;
}
@@ -7032,9 +7056,7 @@ bdrv_inactivate_recurse(BlockDriverState *bs, bool top_level)
return -EPERM;
}
- bdrv_drained_begin(bs);
bs->open_flags |= BDRV_O_INACTIVE;
- bdrv_drained_end(bs);
/*
* Update permissions, they may differ for inactive nodes.
@@ -7059,20 +7081,26 @@ int bdrv_inactivate(BlockDriverState *bs, Error **errp)
int ret;
GLOBAL_STATE_CODE();
- GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+ bdrv_drain_all_begin();
+ bdrv_graph_rdlock_main_loop();
if (bdrv_has_bds_parent(bs, true)) {
error_setg(errp, "Node has active parent node");
- return -EPERM;
+ ret = -EPERM;
+ goto out;
}
ret = bdrv_inactivate_recurse(bs, true);
if (ret < 0) {
error_setg_errno(errp, -ret, "Failed to inactivate node");
- return ret;
+ goto out;
}
- return 0;
+out:
+ bdrv_graph_rdunlock_main_loop();
+ bdrv_drain_all_end();
+ return ret;
}
int bdrv_inactivate_all(void)
@@ -7082,7 +7110,9 @@ int bdrv_inactivate_all(void)
int ret = 0;
GLOBAL_STATE_CODE();
- GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+ bdrv_drain_all_begin();
+ bdrv_graph_rdlock_main_loop();
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
/* Nodes with BDS parents are covered by recursion from the last
@@ -7098,6 +7128,9 @@ int bdrv_inactivate_all(void)
}
}
+ bdrv_graph_rdunlock_main_loop();
+ bdrv_drain_all_end();
+
return ret;
}
@@ -7278,10 +7311,6 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
return true;
}
-/*
- * Must not be called while holding the lock of an AioContext other than the
- * current one.
- */
void bdrv_img_create(const char *filename, const char *fmt,
const char *base_filename, const char *base_fmt,
char *options, uint64_t img_size, int flags, bool quiet,
@@ -7568,10 +7597,21 @@ typedef struct BdrvStateSetAioContext {
BlockDriverState *bs;
} BdrvStateSetAioContext;
-static bool bdrv_parent_change_aio_context(BdrvChild *c, AioContext *ctx,
- GHashTable *visited,
- Transaction *tran,
- Error **errp)
+/*
+ * Changes the AioContext of @child to @ctx and recursively for the associated
+ * block nodes and all their children and parents. Returns true if the change is
+ * possible and the transaction @tran can be continued. Returns false and sets
+ * @errp if not and the transaction must be aborted.
+ *
+ * @visited will accumulate all visited BdrvChild objects. The caller is
+ * responsible for freeing the list afterwards.
+ *
+ * Must be called with the affected block nodes drained.
+ */
+static bool GRAPH_RDLOCK
+bdrv_parent_change_aio_context(BdrvChild *c, AioContext *ctx,
+ GHashTable *visited, Transaction *tran,
+ Error **errp)
{
GLOBAL_STATE_CODE();
if (g_hash_table_contains(visited, c)) {
@@ -7596,6 +7636,17 @@ static bool bdrv_parent_change_aio_context(BdrvChild *c, AioContext *ctx,
return true;
}
+/*
+ * Changes the AioContext of @c->bs to @ctx and recursively for all its children
+ * and parents. Returns true if the change is possible and the transaction @tran
+ * can be continued. Returns false and sets @errp if not and the transaction
+ * must be aborted.
+ *
+ * @visited will accumulate all visited BdrvChild objects. The caller is
+ * responsible for freeing the list afterwards.
+ *
+ * Must be called with the affected block nodes drained.
+ */
bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx,
GHashTable *visited, Transaction *tran,
Error **errp)
@@ -7611,10 +7662,6 @@ bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx,
static void bdrv_set_aio_context_clean(void *opaque)
{
BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque;
- BlockDriverState *bs = (BlockDriverState *) state->bs;
-
- /* Paired with bdrv_drained_begin in bdrv_change_aio_context() */
- bdrv_drained_end(bs);
g_free(state);
}
@@ -7642,10 +7689,12 @@ static TransactionActionDrv set_aio_context = {
*
* @visited will accumulate all visited BdrvChild objects. The caller is
* responsible for freeing the list afterwards.
+ *
+ * @bs must be drained.
*/
-static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
- GHashTable *visited, Transaction *tran,
- Error **errp)
+static bool GRAPH_RDLOCK
+bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
+ GHashTable *visited, Transaction *tran, Error **errp)
{
BdrvChild *c;
BdrvStateSetAioContext *state;
@@ -7656,21 +7705,17 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
return true;
}
- bdrv_graph_rdlock_main_loop();
QLIST_FOREACH(c, &bs->parents, next_parent) {
if (!bdrv_parent_change_aio_context(c, ctx, visited, tran, errp)) {
- bdrv_graph_rdunlock_main_loop();
return false;
}
}
QLIST_FOREACH(c, &bs->children, next) {
if (!bdrv_child_change_aio_context(c, ctx, visited, tran, errp)) {
- bdrv_graph_rdunlock_main_loop();
return false;
}
}
- bdrv_graph_rdunlock_main_loop();
state = g_new(BdrvStateSetAioContext, 1);
*state = (BdrvStateSetAioContext) {
@@ -7678,8 +7723,7 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
.bs = bs,
};
- /* Paired with bdrv_drained_end in bdrv_set_aio_context_clean() */
- bdrv_drained_begin(bs);
+ assert(bs->quiesce_counter > 0);
tran_add(tran, &set_aio_context, state);
@@ -7692,9 +7736,13 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
*
* If ignore_child is not NULL, that child (and its subgraph) will not
* be touched.
+ *
+ * Called with the graph lock held.
+ *
+ * Called while all bs are drained.
*/
-int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
- BdrvChild *ignore_child, Error **errp)
+int bdrv_try_change_aio_context_locked(BlockDriverState *bs, AioContext *ctx,
+ BdrvChild *ignore_child, Error **errp)
{
Transaction *tran;
GHashTable *visited;
@@ -7703,9 +7751,9 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
/*
* Recursion phase: go through all nodes of the graph.
- * Take care of checking that all nodes support changing AioContext
- * and drain them, building a linear list of callbacks to run if everything
- * is successful (the transaction itself).
+ * Take care of checking that all nodes support changing AioContext,
+ * building a linear list of callbacks to run if everything is successful
+ * (the transaction itself).
*/
tran = tran_new();
visited = g_hash_table_new(NULL, NULL);
@@ -7732,6 +7780,29 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
return 0;
}
+/*
+ * Change bs's and recursively all of its parents' and children's AioContext
+ * to the given new context, returning an error if that isn't possible.
+ *
+ * If ignore_child is not NULL, that child (and its subgraph) will not
+ * be touched.
+ */
+int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
+ BdrvChild *ignore_child, Error **errp)
+{
+ int ret;
+
+ GLOBAL_STATE_CODE();
+
+ bdrv_drain_all_begin();
+ bdrv_graph_rdlock_main_loop();
+ ret = bdrv_try_change_aio_context_locked(bs, ctx, ignore_child, errp);
+ bdrv_graph_rdunlock_main_loop();
+ bdrv_drain_all_end();
+
+ return ret;
+}
+
void bdrv_add_aio_context_notifier(BlockDriverState *bs,
void (*attached_aio_context)(AioContext *new_context, void *opaque),
void (*detach_aio_context)(void *opaque), void *opaque)
@@ -8159,8 +8230,10 @@ char *bdrv_dirname(BlockDriverState *bs, Error **errp)
}
/*
- * Hot add/remove a BDS's child. So the user can take a child offline when
- * it is broken and take a new child online
+ * Hot add a BDS's child. Used in combination with bdrv_del_child, so the user
+ * can take a child offline when it is broken and take a new child online.
+ *
+ * All block nodes must be drained.
*/
void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
Error **errp)
@@ -8200,6 +8273,12 @@ void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp);
}
+/*
+ * Hot remove a BDS's child. Used in combination with bdrv_add_child, so the
+ * user can take a child offline when it is broken and take a new child online.
+ *
+ * All block nodes must be drained.
+ */
void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)
{
BdrvChild *tmp;
diff --git a/block/backup.c b/block/backup.c
index 0151e84..909027c 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -498,10 +498,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
block_copy_set_speed(bcs, speed);
/* Required permissions are taken by copy-before-write filter target */
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
&error_abort);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
return &job->common;
diff --git a/block/blklogwrites.c b/block/blklogwrites.c
index b0f78c4..70ac76f 100644
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@@ -281,9 +281,11 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
ret = 0;
fail_log:
if (ret < 0) {
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_unref_child(bs, s->log_file);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
s->log_file = NULL;
qemu_mutex_destroy(&s->mutex);
}
@@ -296,10 +298,12 @@ static void blk_log_writes_close(BlockDriverState *bs)
{
BDRVBlkLogWritesState *s = bs->opaque;
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_unref_child(bs, s->log_file);
s->log_file = NULL;
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
qemu_mutex_destroy(&s->mutex);
}
diff --git a/block/blkverify.c b/block/blkverify.c
index db79a36..3a71f74 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -151,10 +151,12 @@ static void blkverify_close(BlockDriverState *bs)
{
BDRVBlkverifyState *s = bs->opaque;
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_unref_child(bs, s->test_file);
s->test_file = NULL;
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
}
static int64_t coroutine_fn GRAPH_RDLOCK
diff --git a/block/block-backend.c b/block/block-backend.c
index a402db1..68209bb 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -136,9 +136,9 @@ static void blk_root_drained_end(BdrvChild *child);
static void blk_root_change_media(BdrvChild *child, bool load);
static void blk_root_resize(BdrvChild *child);
-static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx,
- GHashTable *visited, Transaction *tran,
- Error **errp);
+static bool GRAPH_RDLOCK
+blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx, GHashTable *visited,
+ Transaction *tran, Error **errp);
static char *blk_root_get_parent_desc(BdrvChild *child)
{
@@ -889,9 +889,11 @@ void blk_remove_bs(BlockBackend *blk)
root = blk->root;
blk->root = NULL;
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_root_unref_child(root);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
}
/*
@@ -904,6 +906,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
GLOBAL_STATE_CODE();
bdrv_ref(bs);
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
if ((bs->open_flags & BDRV_O_INACTIVE) && blk_can_inactivate(blk)) {
@@ -919,6 +922,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
perm, shared_perm, blk, errp);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
if (blk->root == NULL) {
return -EPERM;
}
diff --git a/block/commit.c b/block/commit.c
index 7cc8c0f..6c4b736 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -392,6 +392,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
* this is the responsibility of the interface (i.e. whoever calls
* commit_start()).
*/
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
s->base_overlay = bdrv_find_overlay(top, base);
assert(s->base_overlay);
@@ -424,18 +425,21 @@ void commit_start(const char *job_id, BlockDriverState *bs,
iter_shared_perms, errp);
if (ret < 0) {
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
goto fail;
}
}
if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) {
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
goto fail;
}
s->chain_frozen = true;
ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
if (ret < 0) {
goto fail;
diff --git a/block/io.c b/block/io.c
index 4fd7768..ac5c717 100644
--- a/block/io.c
+++ b/block/io.c
@@ -413,7 +413,6 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
/* At this point, we should be always running in the main loop. */
GLOBAL_STATE_CODE();
assert(bs->quiesce_counter > 0);
- GLOBAL_STATE_CODE();
/* Re-enable things in child-to-parent order */
old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
diff --git a/block/mirror.c b/block/mirror.c
index c2c5099..6e8caf4 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -2014,6 +2014,7 @@ static BlockJob *mirror_start_job(
*/
bdrv_disable_dirty_bitmap(s->dirty_bitmap);
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
ret = block_job_add_bdrv(&s->common, "source", bs, 0,
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE |
@@ -2021,6 +2022,7 @@ static BlockJob *mirror_start_job(
errp);
if (ret < 0) {
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
goto fail;
}
@@ -2066,16 +2068,19 @@ static BlockJob *mirror_start_job(
iter_shared_perms, errp);
if (ret < 0) {
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
goto fail;
}
}
if (bdrv_freeze_backing_chain(mirror_top_bs, target, errp) < 0) {
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
goto fail;
}
}
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
QTAILQ_INIT(&s->ops_in_flight);
diff --git a/block/qcow2.c b/block/qcow2.c
index 66fba89..45451a7 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1895,7 +1895,9 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags,
g_free(s->image_data_file);
if (open_data_file && has_data_file(bs)) {
bdrv_graph_co_rdunlock();
+ bdrv_drain_all_begin();
bdrv_co_unref_child(bs, s->data_file);
+ bdrv_drain_all_end();
bdrv_graph_co_rdlock();
s->data_file = NULL;
}
@@ -2821,9 +2823,11 @@ qcow2_do_close(BlockDriverState *bs, bool close_data_file)
if (close_data_file && has_data_file(bs)) {
GLOBAL_STATE_CODE();
bdrv_graph_rdunlock_main_loop();
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_unref_child(bs, s->data_file);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
s->data_file = NULL;
bdrv_graph_rdlock_main_loop();
}
diff --git a/block/quorum.c b/block/quorum.c
index ed8ce80..cc3bc5f 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1037,6 +1037,7 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
close_exit:
/* cleanup on error */
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
for (i = 0; i < s->num_children; i++) {
if (!opened[i]) {
@@ -1045,6 +1046,7 @@ close_exit:
bdrv_unref_child(bs, s->children[i]);
}
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
g_free(s->children);
g_free(opened);
exit:
@@ -1057,11 +1059,13 @@ static void quorum_close(BlockDriverState *bs)
BDRVQuorumState *s = bs->opaque;
int i;
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
for (i = 0; i < s->num_children; i++) {
bdrv_unref_child(bs, s->children[i]);
}
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
g_free(s->children);
}
diff --git a/block/replication.c b/block/replication.c
index 07f274d..0879718 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -540,6 +540,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
return;
}
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_ref(hidden_disk->bs);
@@ -549,6 +550,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
if (local_err) {
error_propagate(errp, local_err);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
return;
}
@@ -559,6 +561,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
if (local_err) {
error_propagate(errp, local_err);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
return;
}
@@ -571,12 +574,14 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
!check_top_bs(top_bs, bs)) {
error_setg(errp, "No top_bs or it is invalid");
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
reopen_backing_file(bs, false, NULL);
return;
}
bdrv_op_block_all(top_bs, s->blocker);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
s->backup_job = backup_job_create(
NULL, s->secondary_disk->bs, s->hidden_disk->bs,
@@ -651,12 +656,14 @@ static void replication_done(void *opaque, int ret)
if (ret == 0) {
s->stage = BLOCK_REPLICATION_DONE;
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_unref_child(bs, s->secondary_disk);
s->secondary_disk = NULL;
bdrv_unref_child(bs, s->hidden_disk);
s->hidden_disk = NULL;
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
s->error = 0;
} else {
diff --git a/block/snapshot.c b/block/snapshot.c
index 22567f1..28c9c43 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -291,9 +291,11 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
}
/* .bdrv_open() will re-attach it */
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_unref_child(bs, fallback);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp);
memset(bs->opaque, 0, drv->instance_size);
@@ -327,7 +329,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
/**
* Delete an internal snapshot by @snapshot_id and @name.
- * @bs: block device used in the operation
+ * @bs: block device used in the operation, must be drained
* @snapshot_id: unique snapshot ID, or NULL
* @name: snapshot name, or NULL
* @errp: location to store error
@@ -358,6 +360,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
GLOBAL_STATE_CODE();
+ assert(bs->quiesce_counter > 0);
+
if (!drv) {
error_setg(errp, "Device '%s' has no medium",
bdrv_get_device_name(bs));
@@ -368,9 +372,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
return -EINVAL;
}
- /* drain all pending i/o before deleting snapshot */
- bdrv_drained_begin(bs);
-
if (drv->bdrv_snapshot_delete) {
ret = drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
} else if (fallback_bs) {
@@ -382,7 +383,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
ret = -ENOTSUP;
}
- bdrv_drained_end(bs);
return ret;
}
@@ -571,19 +571,22 @@ int bdrv_all_delete_snapshot(const char *name,
ERRP_GUARD();
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
+ int ret = 0;
GLOBAL_STATE_CODE();
- GRAPH_RDLOCK_GUARD_MAINLOOP();
- if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
- return -1;
+ bdrv_drain_all_begin();
+ bdrv_graph_rdlock_main_loop();
+
+ ret = bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp);
+ if (ret < 0) {
+ goto out;
}
iterbdrvs = bdrvs;
while (iterbdrvs) {
BlockDriverState *bs = iterbdrvs->data;
QEMUSnapshotInfo sn1, *snapshot = &sn1;
- int ret = 0;
if ((devices || bdrv_all_snapshots_includes_bs(bs)) &&
bdrv_snapshot_find(bs, snapshot, name) >= 0)
@@ -594,13 +597,16 @@ int bdrv_all_delete_snapshot(const char *name,
if (ret < 0) {
error_prepend(errp, "Could not delete snapshot '%s' on '%s': ",
name, bdrv_get_device_or_node_name(bs));
- return -1;
+ goto out;
}
iterbdrvs = iterbdrvs->next;
}
- return 0;
+out:
+ bdrv_graph_rdunlock_main_loop();
+ bdrv_drain_all_end();
+ return ret;
}
diff --git a/block/stream.c b/block/stream.c
index 999d9e5..f5441f2 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -80,11 +80,10 @@ static int stream_prepare(Job *job)
* may end up working with the wrong base node (or it might even have gone
* away by the time we want to use it).
*/
- bdrv_drained_begin(unfiltered_bs);
if (unfiltered_bs_cow) {
bdrv_ref(unfiltered_bs_cow);
- bdrv_drained_begin(unfiltered_bs_cow);
}
+ bdrv_drain_all_begin();
bdrv_graph_rdlock_main_loop();
base = bdrv_filter_or_cow_bs(s->above_base);
@@ -123,11 +122,10 @@ static int stream_prepare(Job *job)
}
out:
+ bdrv_drain_all_end();
if (unfiltered_bs_cow) {
- bdrv_drained_end(unfiltered_bs_cow);
bdrv_unref(unfiltered_bs_cow);
}
- bdrv_drained_end(unfiltered_bs);
return ret;
}
@@ -373,10 +371,12 @@ void stream_start(const char *job_id, BlockDriverState *bs,
* already have our own plans. Also don't allow resize as the image size is
* queried only at the job start and then cached.
*/
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
if (block_job_add_bdrv(&s->common, "active node", bs, 0,
basic_flags | BLK_PERM_WRITE, errp)) {
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
goto fail;
}
@@ -397,10 +397,12 @@ void stream_start(const char *job_id, BlockDriverState *bs,
basic_flags, errp);
if (ret < 0) {
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
goto fail;
}
}
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
s->base_overlay = base_overlay;
s->above_base = above_base;
diff --git a/block/vmdk.c b/block/vmdk.c
index 9c7ab03..89a7250 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -271,6 +271,7 @@ static void vmdk_free_extents(BlockDriverState *bs)
BDRVVmdkState *s = bs->opaque;
VmdkExtent *e;
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
for (i = 0; i < s->num_extents; i++) {
e = &s->extents[i];
@@ -283,6 +284,7 @@ static void vmdk_free_extents(BlockDriverState *bs)
}
}
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
g_free(s->extents);
}
@@ -1247,9 +1249,11 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
0, 0, 0, 0, 0, &extent, errp);
if (ret < 0) {
bdrv_graph_rdunlock_main_loop();
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_unref_child(bs, extent_file);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
bdrv_graph_rdlock_main_loop();
goto out;
}
@@ -1266,9 +1270,11 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
g_free(buf);
if (ret) {
bdrv_graph_rdunlock_main_loop();
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_unref_child(bs, extent_file);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
bdrv_graph_rdlock_main_loop();
goto out;
}
@@ -1277,9 +1283,11 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
if (ret) {
bdrv_graph_rdunlock_main_loop();
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_unref_child(bs, extent_file);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
bdrv_graph_rdlock_main_loop();
goto out;
}
@@ -1287,9 +1295,11 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
} else {
error_setg(errp, "Unsupported extent type '%s'", type);
bdrv_graph_rdunlock_main_loop();
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_unref_child(bs, extent_file);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
bdrv_graph_rdlock_main_loop();
ret = -ENOTSUP;
goto out;
diff --git a/blockdev.c b/blockdev.c
index 21443b4..2e7fda6 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1132,39 +1132,41 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
int ret;
GLOBAL_STATE_CODE();
- GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+ bdrv_drain_all_begin();
+ bdrv_graph_rdlock_main_loop();
bs = qmp_get_root_bs(device, errp);
if (!bs) {
- return NULL;
+ goto error;
}
if (!id && !name) {
error_setg(errp, "Name or id must be provided");
- return NULL;
+ goto error;
}
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) {
- return NULL;
+ goto error;
}
ret = bdrv_snapshot_find_by_id_and_name(bs, id, name, &sn, &local_err);
if (local_err) {
error_propagate(errp, local_err);
- return NULL;
+ goto error;
}
if (!ret) {
error_setg(errp,
"Snapshot with id '%s' and name '%s' does not exist on "
"device '%s'",
STR_OR_NULL(id), STR_OR_NULL(name), device);
- return NULL;
+ goto error;
}
bdrv_snapshot_delete(bs, id, name, &local_err);
if (local_err) {
error_propagate(errp, local_err);
- return NULL;
+ goto error;
}
info = g_new0(SnapshotInfo, 1);
@@ -1180,6 +1182,9 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
info->has_icount = true;
}
+error:
+ bdrv_graph_rdunlock_main_loop();
+ bdrv_drain_all_end();
return info;
}
@@ -1203,7 +1208,7 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal,
Error *local_err = NULL;
const char *device;
const char *name;
- BlockDriverState *bs;
+ BlockDriverState *bs, *check_bs;
QEMUSnapshotInfo old_sn, *sn;
bool ret;
int64_t rt;
@@ -1211,7 +1216,7 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal,
int ret1;
GLOBAL_STATE_CODE();
- GRAPH_RDLOCK_GUARD_MAINLOOP();
+ bdrv_graph_rdlock_main_loop();
tran_add(tran, &internal_snapshot_drv, state);
@@ -1220,14 +1225,29 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal,
bs = qmp_get_root_bs(device, errp);
if (!bs) {
+ bdrv_graph_rdunlock_main_loop();
return;
}
state->bs = bs;
+ /* Need to drain while unlocked. */
+ bdrv_graph_rdunlock_main_loop();
/* Paired with .clean() */
bdrv_drained_begin(bs);
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+ /* Make sure the root bs did not change with the drain. */
+ check_bs = qmp_get_root_bs(device, errp);
+ if (bs != check_bs) {
+ if (check_bs) {
+ error_setg(errp, "Block node of device '%s' unexpectedly changed",
+ device);
+ } /* else errp is already set */
+ return;
+ }
+
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, errp)) {
return;
}
@@ -1295,12 +1315,14 @@ static void internal_snapshot_abort(void *opaque)
Error *local_error = NULL;
GLOBAL_STATE_CODE();
- GRAPH_RDLOCK_GUARD_MAINLOOP();
if (!state->created) {
return;
}
+ bdrv_drain_all_begin();
+ bdrv_graph_rdlock_main_loop();
+
if (bdrv_snapshot_delete(bs, sn->id_str, sn->name, &local_error) < 0) {
error_reportf_err(local_error,
"Failed to delete snapshot with id '%s' and "
@@ -1308,6 +1330,8 @@ static void internal_snapshot_abort(void *opaque)
sn->id_str, sn->name,
bdrv_get_device_name(bs));
}
+ bdrv_graph_rdunlock_main_loop();
+ bdrv_drain_all_end();
}
static void internal_snapshot_clean(void *opaque)
@@ -1353,9 +1377,10 @@ static void external_snapshot_action(TransactionAction *action,
const char *new_image_file;
ExternalSnapshotState *state = g_new0(ExternalSnapshotState, 1);
uint64_t perm, shared;
+ BlockDriverState *check_bs;
/* TODO We'll eventually have to take a writer lock in this function */
- GRAPH_RDLOCK_GUARD_MAINLOOP();
+ bdrv_graph_rdlock_main_loop();
tran_add(tran, &external_snapshot_drv, state);
@@ -1388,11 +1413,25 @@ static void external_snapshot_action(TransactionAction *action,
state->old_bs = bdrv_lookup_bs(device, node_name, errp);
if (!state->old_bs) {
+ bdrv_graph_rdunlock_main_loop();
return;
}
+ /* Need to drain while unlocked. */
+ bdrv_graph_rdunlock_main_loop();
/* Paired with .clean() */
bdrv_drained_begin(state->old_bs);
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
+ /* Make sure the associated bs did not change with the drain. */
+ check_bs = bdrv_lookup_bs(device, node_name, errp);
+ if (state->old_bs != check_bs) {
+ if (check_bs) {
+ error_setg(errp, "Block node of device '%s' unexpectedly changed",
+ device);
+ } /* else errp is already set */
+ return;
+ }
if (!bdrv_is_inserted(state->old_bs)) {
error_setg(errp, "Device '%s' has no medium",
@@ -3522,6 +3561,7 @@ void qmp_x_blockdev_change(const char *parent, const char *child,
BlockDriverState *parent_bs, *new_bs = NULL;
BdrvChild *p_child;
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
parent_bs = bdrv_lookup_bs(parent, parent, errp);
@@ -3559,6 +3599,7 @@ void qmp_x_blockdev_change(const char *parent, const char *child,
out:
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
}
BlockJobInfoList *qmp_query_block_jobs(Error **errp)
@@ -3592,12 +3633,13 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread,
AioContext *new_context;
BlockDriverState *bs;
- GRAPH_RDLOCK_GUARD_MAINLOOP();
+ bdrv_drain_all_begin();
+ bdrv_graph_rdlock_main_loop();
bs = bdrv_find_node(node_name);
if (!bs) {
error_setg(errp, "Failed to find node with node-name='%s'", node_name);
- return;
+ goto out;
}
/* Protects against accidents. */
@@ -3605,14 +3647,14 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread,
error_setg(errp, "Node %s is associated with a BlockBackend and could "
"be in use (use force=true to override this check)",
node_name);
- return;
+ goto out;
}
if (iothread->type == QTYPE_QSTRING) {
IOThread *obj = iothread_by_id(iothread->u.s);
if (!obj) {
error_setg(errp, "Cannot find iothread %s", iothread->u.s);
- return;
+ goto out;
}
new_context = iothread_get_aio_context(obj);
@@ -3620,7 +3662,11 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread,
new_context = qemu_get_aio_context();
}
- bdrv_try_change_aio_context(bs, new_context, NULL, errp);
+ bdrv_try_change_aio_context_locked(bs, new_context, NULL, errp);
+
+out:
+ bdrv_graph_rdunlock_main_loop();
+ bdrv_drain_all_end();
}
QemuOptsList qemu_common_drive_opts = {
diff --git a/blockjob.c b/blockjob.c
index 32007f3..e68181a 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -144,9 +144,9 @@ static TransactionActionDrv change_child_job_context = {
.clean = g_free,
};
-static bool child_job_change_aio_ctx(BdrvChild *c, AioContext *ctx,
- GHashTable *visited, Transaction *tran,
- Error **errp)
+static bool GRAPH_RDLOCK
+child_job_change_aio_ctx(BdrvChild *c, AioContext *ctx, GHashTable *visited,
+ Transaction *tran, Error **errp)
{
BlockJob *job = c->opaque;
BdrvStateChildJobContext *s;
@@ -198,6 +198,7 @@ void block_job_remove_all_bdrv(BlockJob *job)
* one to make sure that such a concurrent access does not attempt
* to process an already freed BdrvChild.
*/
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
while (job->nodes) {
GSList *l = job->nodes;
@@ -211,6 +212,7 @@ void block_job_remove_all_bdrv(BlockJob *job)
g_slist_free_1(l);
}
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
}
bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs)
@@ -496,6 +498,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
int ret;
GLOBAL_STATE_CODE();
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
if (job_id == NULL && !(flags & JOB_INTERNAL)) {
@@ -506,6 +509,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
flags, cb, opaque, errp);
if (job == NULL) {
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
return NULL;
}
@@ -544,10 +548,12 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
}
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
return job;
fail:
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
job_early_fail(&job->job);
return NULL;
}
diff --git a/rust/clippy.toml b/clippy.toml
index 58a62c0..9016172 100644
--- a/rust/clippy.toml
+++ b/clippy.toml
@@ -1,3 +1,3 @@
-doc-valid-idents = ["PrimeCell", ".."]
+doc-valid-idents = ["IrDA", "PrimeCell", ".."]
allow-mixed-uninlined-format-args = false
msrv = "1.77.0"
diff --git a/configs/devices/i386-softmmu/default.mak b/configs/devices/i386-softmmu/default.mak
index 4faf2f0..bc0479a 100644
--- a/configs/devices/i386-softmmu/default.mak
+++ b/configs/devices/i386-softmmu/default.mak
@@ -18,6 +18,7 @@
#CONFIG_QXL=n
#CONFIG_SEV=n
#CONFIG_SGA=n
+#CONFIG_TDX=n
#CONFIG_TEST_DEVICES=n
#CONFIG_TPM_CRB=n
#CONFIG_TPM_TIS_ISA=n
diff --git a/configure b/configure
index 2ce8d29..2b2b3d6 100755
--- a/configure
+++ b/configure
@@ -209,6 +209,8 @@ for opt do
;;
--rustc=*) RUSTC="$optarg"
;;
+ --rustdoc=*) RUSTDOC="$optarg"
+ ;;
--cpu=*) cpu="$optarg"
;;
--extra-cflags=*)
@@ -323,6 +325,7 @@ pkg_config="${PKG_CONFIG-${cross_prefix}pkg-config}"
sdl2_config="${SDL2_CONFIG-${cross_prefix}sdl2-config}"
rustc="${RUSTC-rustc}"
+rustdoc="${RUSTDOC-rustdoc}"
check_define() {
cat > $TMPC <<EOF
@@ -660,6 +663,8 @@ for opt do
;;
--rustc=*)
;;
+ --rustdoc=*)
+ ;;
--make=*)
;;
--install=*)
@@ -890,6 +895,7 @@ Advanced options (experts only):
--cxx=CXX use C++ compiler CXX [$cxx]
--objcc=OBJCC use Objective-C compiler OBJCC [$objcc]
--rustc=RUSTC use Rust compiler RUSTC [$rustc]
+ --rustdoc=RUSTDOC use rustdoc binary RUSTDOC [$rustdoc]
--extra-cflags=CFLAGS append extra C compiler flags CFLAGS
--extra-cxxflags=CXXFLAGS append extra C++ compiler flags CXXFLAGS
--extra-objcflags=OBJCFLAGS append extra Objective C compiler flags OBJCFLAGS
@@ -1178,6 +1184,14 @@ fi
##########################################
# detect rust triple
+meson_version=$($meson --version)
+if test "$rust" != disabled && ! version_ge "$meson_version" 1.8.1; then
+ if test "$rust" = enabled; then
+ error_exit "Rust support needs Meson 1.8.1 or newer"
+ fi
+ echo "Rust needs Meson 1.8.1, disabling" 2>&1
+ rust=disabled
+fi
if test "$rust" != disabled && has "$rustc" && $rustc -vV > "${TMPDIR1}/${TMPB}.out"; then
rust_host_triple=$(sed -n 's/^host: //p' "${TMPDIR1}/${TMPB}.out")
else
@@ -1893,8 +1907,10 @@ if test "$skip_meson" = no; then
if test "$rust" != disabled; then
if test "$rust_host_triple" != "$rust_target_triple"; then
echo "rust = [$(meson_quote $rustc --target "$rust_target_triple")]" >> $cross
+ echo "rustdoc = [$(meson_quote $rustdoc --target "$rust_target_triple")]" >> $cross
else
echo "rust = [$(meson_quote $rustc)]" >> $cross
+ echo "rustdoc = [$(meson_quote $rustdoc)]" >> $cross
fi
fi
echo "ar = [$(meson_quote $ar)]" >> $cross
diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index 4715d1e..4203713 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -315,12 +315,6 @@ deprecated; use the new name ``dtb-randomness`` instead. The new name
better reflects the way this property affects all random data within
the device tree blob, not just the ``kaslr-seed`` node.
-Big-Endian variants of MicroBlaze ``petalogix-ml605`` and ``xlnx-zynqmp-pmu`` machines (since 9.2)
-''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
-
-Both ``petalogix-ml605`` and ``xlnx-zynqmp-pmu`` were added for little endian
-CPUs. Big endian support is not tested.
-
Mips ``mipssim`` machine (since 10.0)
'''''''''''''''''''''''''''''''''''''
@@ -351,6 +345,19 @@ machine must ensure that they're setting the ``spike`` machine in the
command line (``-M spike``).
+System emulator binaries
+------------------------
+
+``qemu-system-microblazeel`` (since 10.1)
+'''''''''''''''''''''''''''''''''''''''''
+
+The ``qemu-system-microblaze`` binary can emulate little-endian machines
+now, too, so the separate binary ``qemu-system-microblazeel`` (with the
+``el`` suffix) for little-endian targets is not required anymore. The
+``petalogix-s3adsp1800`` machine can now be switched to little endian by
+setting its ``endianness`` property to ``little``.
+
+
Backend options
---------------
diff --git a/docs/about/removed-features.rst b/docs/about/removed-features.rst
index 4819cb4..d7c2113 100644
--- a/docs/about/removed-features.rst
+++ b/docs/about/removed-features.rst
@@ -1091,6 +1091,15 @@ This machine was removed because PPC 405 CPU have no known users,
firmware images are not available, OpenWRT dropped support in 2019,
U-Boot in 2017, and Linux in 2024.
+Big-Endian variants of ``petalogix-ml605`` and ``xlnx-zynqmp-pmu`` machines (removed in 10.1)
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+Both the MicroBlaze ``petalogix-ml605`` and ``xlnx-zynqmp-pmu`` machines
+were added for little endian CPUs. Big endian support was never tested
+and likely never worked. Starting with QEMU v10.1, the machines are now
+only available as little-endian machines.
+
+
linux-user mode CPUs
--------------------
diff --git a/docs/devel/rust.rst b/docs/devel/rust.rst
index 171d908..34d9c79 100644
--- a/docs/devel/rust.rst
+++ b/docs/devel/rust.rst
@@ -37,12 +37,16 @@ output directory (typically ``rust/target/``). A vanilla invocation
of Cargo will complain that it cannot find the generated sources,
which can be fixed in different ways:
-* by using special shorthand targets in the QEMU build directory::
+* by using Makefile targets, provided by Meson, that run ``clippy`` or
+ ``rustdoc``:
make clippy
- make rustfmt
make rustdoc
+A target for ``rustfmt`` is also declared in ``rust/meson.build``:
+
+ make rustfmt
+
* by invoking ``cargo`` through the Meson `development environment`__
feature::
@@ -50,7 +54,7 @@ which can be fixed in different ways:
pyvenv/bin/meson devenv -w ../rust cargo fmt
If you are going to use ``cargo`` repeatedly, ``pyvenv/bin/meson devenv``
- will enter a shell where commands like ``cargo clippy`` just work.
+ will enter a shell where commands like ``cargo fmt`` just work.
__ https://mesonbuild.com/Commands.html#devenv
@@ -66,7 +70,7 @@ be run via ``meson test`` or ``make``::
make check-rust
-Building Rust code with ``--enable-modules`` is not supported yet.
+Note that doctests require all ``.o`` files from the build to be available.
Supported tools
'''''''''''''''
diff --git a/docs/interop/bitmaps.rst b/docs/interop/bitmaps.rst
index ddf8947..7536f0b 100644
--- a/docs/interop/bitmaps.rst
+++ b/docs/interop/bitmaps.rst
@@ -97,7 +97,7 @@ time.
- Persistent storage formats may impose their own requirements on bitmap names
and namespaces. Presently, only qcow2 supports persistent bitmaps. See
- docs/interop/qcow2.txt for more details on restrictions. Notably:
+ :doc:`qcow2` for more details on restrictions. Notably:
- qcow2 bitmap names are limited to between 1 and 1023 bytes long.
diff --git a/docs/interop/index.rst b/docs/interop/index.rst
index 999e44e..4b951ae 100644
--- a/docs/interop/index.rst
+++ b/docs/interop/index.rst
@@ -17,6 +17,7 @@ are useful for making QEMU interoperate with other software.
nbd
parallels
prl-xml
+ qcow2
pr-helper
qmp-spec
qemu-ga
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.rst
index 2c46183..5948591 100644
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.rst
@@ -1,6 +1,8 @@
-== General ==
+=======================
+Qcow2 Image File Format
+=======================
-A qcow2 image file is organized in units of constant size, which are called
+A ``qcow2`` image file is organized in units of constant size, which are called
(host) clusters. A cluster is the unit in which all allocations are done,
both for actual guest data and for image metadata.
@@ -9,10 +11,10 @@ clusters of the same size.
All numbers in qcow2 are stored in Big Endian byte order.
+Header
+------
-== Header ==
-
-The first cluster of a qcow2 image contains the file header:
+The first cluster of a qcow2 image contains the file header::
Byte 0 - 3: magic
QCOW magic string ("QFI\xfb")
@@ -38,7 +40,7 @@ The first cluster of a qcow2 image contains the file header:
within a cluster (1 << cluster_bits is the cluster size).
Must not be less than 9 (i.e. 512 byte clusters).
- Note: qemu as of today has an implementation limit of 2 MB
+ Note: QEMU as of today has an implementation limit of 2 MB
as the maximum cluster size and won't be able to open images
with larger cluster sizes.
@@ -48,7 +50,7 @@ The first cluster of a qcow2 image contains the file header:
24 - 31: size
Virtual disk size in bytes.
- Note: qemu has an implementation limit of 32 MB as
+ Note: QEMU has an implementation limit of 32 MB as
the maximum L1 table size. With a 2 MB cluster
size, it is unable to populate a virtual cluster
beyond 2 EB (61 bits); with a 512 byte cluster
@@ -87,7 +89,8 @@ The first cluster of a qcow2 image contains the file header:
For version 2, the header is exactly 72 bytes in length, and finishes here.
For version 3 or higher, the header length is at least 104 bytes, including
-the next fields through header_length.
+the next fields through ``header_length``.
+::
72 - 79: incompatible_features
Bitmask of incompatible features. An implementation must
@@ -185,7 +188,8 @@ the next fields through header_length.
of 8.
-=== Additional fields (version 3 and higher) ===
+Additional fields (version 3 and higher)
+----------------------------------------
In general, these fields are optional and may be safely ignored by the software,
as well as filled by zeros (which is equal to field absence), if software needs
@@ -193,21 +197,25 @@ to set field B, but does not care about field A which precedes B. More
formally, additional fields have the following compatibility rules:
1. If the value of the additional field must not be ignored for correct
-handling of the file, it will be accompanied by a corresponding incompatible
-feature bit.
+ handling of the file, it will be accompanied by a corresponding incompatible
+ feature bit.
2. If there are no unrecognized incompatible feature bits set, an unknown
-additional field may be safely ignored other than preserving its value when
-rewriting the image header.
+ additional field may be safely ignored other than preserving its value when
+ rewriting the image header.
+
+.. _ref_rules_3:
3. An explicit value of 0 will have the same behavior as when the field is not
-present*, if not altered by a specific incompatible bit.
+ present*, if not altered by a specific incompatible bit.
-*. A field is considered not present when header_length is less than or equal
+(*) A field is considered not present when ``header_length`` is less than or equal
to the field's offset. Also, all additional fields are not present for
version 2.
- 104: compression_type
+::
+
+ 104: compression_type
Defines the compression method used for compressed clusters.
All compressed clusters in an image use the same compression
@@ -219,8 +227,8 @@ version 2.
or must be zero (which means deflate).
Available compression type values:
- 0: deflate <https://www.ietf.org/rfc/rfc1951.txt>
- 1: zstd <http://github.com/facebook/zstd>
+ - 0: deflate <https://www.ietf.org/rfc/rfc1951.txt>
+ - 1: zstd <http://github.com/facebook/zstd>
The deflate compression type is called "zlib"
<https://www.zlib.net/> in QEMU. However, clusters with the
@@ -228,19 +236,21 @@ version 2.
105 - 111: Padding, contents defined below.
-=== Header padding ===
+Header padding
+--------------
-@header_length must be a multiple of 8, which means that if the end of the last
+``header_length`` must be a multiple of 8, which means that if the end of the last
additional field is not aligned, some padding is needed. This padding must be
zeroed, so that if some existing (or future) additional field will fall into
-the padding, it will be interpreted accordingly to point [3.] of the previous
+the padding, it will be interpreted accordingly to point `[3.] <#ref_rules_3>`_ of the previous
paragraph, i.e. in the same manner as when this field is not present.
-=== Header extensions ===
+Header extensions
+-----------------
Directly after the image header, optional sections called header extensions can
-be stored. Each extension has a structure like the following:
+be stored. Each extension has a structure like the following::
Byte 0 - 3: Header extension type:
0x00000000 - End of the header extension area
@@ -270,17 +280,19 @@ data of compatible features that it doesn't support. Compatible features that
need space for additional data can use a header extension.
-== String header extensions ==
+String header extensions
+------------------------
Some header extensions (such as the backing file format name and the external
data file name) are just a single string. In this case, the header extension
-length is the string length and the string is not '\0' terminated. (The header
-extension padding can make it look like a string is '\0' terminated, but
+length is the string length and the string is not ``\0`` terminated. (The header
+extension padding can make it look like a string is ``\0`` terminated, but
neither is padding always necessary nor is there a guarantee that zero bytes
are used for padding.)
-== Feature name table ==
+Feature name table
+------------------
The feature name table is an optional header extension that contains the name
for features used by the image. It can be used by applications that don't know
@@ -288,7 +300,7 @@ the respective feature (e.g. because the feature was introduced only later) to
display a useful error message.
The number of entries in the feature name table is determined by the length of
-the header extension data. Each entry look like this:
+the header extension data. Each entry looks like this::
Byte 0: Type of feature (select feature bitmap)
0: Incompatible feature
@@ -302,7 +314,8 @@ the header extension data. Each entry look like this:
terminated if it has full length)
-== Bitmaps extension ==
+Bitmaps extension
+-----------------
The bitmaps extension is an optional header extension. It provides the ability
to store bitmaps related to a virtual disk. For now, there is only one bitmap
@@ -310,9 +323,9 @@ type: the dirty tracking bitmap, which tracks virtual disk changes from some
point in time.
The data of the extension should be considered consistent only if the
-corresponding auto-clear feature bit is set, see autoclear_features above.
+corresponding auto-clear feature bit is set, see ``autoclear_features`` above.
-The fields of the bitmaps extension are:
+The fields of the bitmaps extension are::
Byte 0 - 3: nb_bitmaps
The number of bitmaps contained in the image. Must be
@@ -331,15 +344,17 @@ The fields of the bitmaps extension are:
Offset into the image file at which the bitmap directory
starts. Must be aligned to a cluster boundary.
-== Full disk encryption header pointer ==
+Full disk encryption header pointer
+-----------------------------------
The full disk encryption header must be present if, and only if, the
-'crypt_method' header requires metadata. Currently this is only true
-of the 'LUKS' crypt method. The header extension must be absent for
+``crypt_method`` header requires metadata. Currently this is only true
+of the ``LUKS`` crypt method. The header extension must be absent for
other methods.
This header provides the offset at which the crypt method can store
its additional data, as well as the length of such data.
+::
Byte 0 - 7: Offset into the image file at which the encryption
header starts in bytes. Must be aligned to a cluster
@@ -357,10 +372,10 @@ The first 592 bytes of the header clusters will contain the LUKS
partition header. This is then followed by the key material data areas.
The size of the key material data areas is determined by the number of
stripes in the key slot and key size. Refer to the LUKS format
-specification ('docs/on-disk-format.pdf' in the cryptsetup source
+specification (``docs/on-disk-format.pdf`` in the cryptsetup source
package) for details of the LUKS partition header format.
-In the LUKS partition header, the "payload-offset" field will be
+In the LUKS partition header, the ``payload-offset`` field will be
calculated as normal for the LUKS spec. ie the size of the LUKS
header, plus key material regions, plus padding, relative to the
start of the LUKS header. This offset value is not required to be
@@ -369,11 +384,12 @@ context of qcow2, since the qcow2 file format itself defines where
the real payload offset is, but none the less a valid payload offset
should always be present.
-In the LUKS key slots header, the "key-material-offset" is relative
+In the LUKS key slots header, the ``key-material-offset`` is relative
to the start of the LUKS header clusters in the qcow2 container,
not the start of the qcow2 file.
Logically the layout looks like
+::
+-----------------------------+
| QCow2 header |
@@ -405,7 +421,8 @@ Logically the layout looks like
| |
+-----------------------------+
-== Data encryption ==
+Data encryption
+---------------
When an encryption method is requested in the header, the image payload
data must be encrypted/decrypted on every write/read. The image headers
@@ -413,7 +430,7 @@ and metadata are never encrypted.
The algorithms used for encryption vary depending on the method
- - AES:
+ - ``AES``:
The AES cipher, in CBC mode, with 256 bit keys.
@@ -425,7 +442,7 @@ The algorithms used for encryption vary depending on the method
supported in the command line tools for the sake of back compatibility
and data liberation.
- - LUKS:
+ - ``LUKS``:
The algorithms are specified in the LUKS header.
@@ -433,7 +450,8 @@ The algorithms used for encryption vary depending on the method
in the LUKS header, with the physical disk sector as the
input tweak.
-== Host cluster management ==
+Host cluster management
+-----------------------
qcow2 manages the allocation of host clusters by maintaining a reference count
for each host cluster. A refcount of 0 means that the cluster is free, 1 means
@@ -453,14 +471,15 @@ Although a large enough refcount table can reserve clusters past 64 PB
large), note that some qcow2 metadata such as L1/L2 tables must point
to clusters prior to that point.
-Note: qemu has an implementation limit of 8 MB as the maximum refcount
-table size. With a 2 MB cluster size and a default refcount_order of
-4, it is unable to reference host resources beyond 2 EB (61 bits); in
-the worst case, with a 512 cluster size and refcount_order of 6, it is
-unable to access beyond 32 GB (35 bits).
+.. note::
+ QEMU has an implementation limit of 8 MB as the maximum refcount
+ table size. With a 2 MB cluster size and a default refcount_order of
+ 4, it is unable to reference host resources beyond 2 EB (61 bits); in
+ the worst case, with a 512 cluster size and refcount_order of 6, it is
+ unable to access beyond 32 GB (35 bits).
Given an offset into the image file, the refcount of its cluster can be
-obtained as follows:
+obtained as follows::
refcount_block_entries = (cluster_size * 8 / refcount_bits)
@@ -470,7 +489,7 @@ obtained as follows:
refcount_block = load_cluster(refcount_table[refcount_table_index]);
return refcount_block[refcount_block_index];
-Refcount table entry:
+Refcount table entry::
Bit 0 - 8: Reserved (set to 0)
@@ -482,14 +501,15 @@ Refcount table entry:
been allocated. All refcounts managed by this refcount block
are 0.
-Refcount block entry (x = refcount_bits - 1):
+Refcount block entry ``(x = refcount_bits - 1)``::
Bit 0 - x: Reference count of the cluster. If refcount_bits implies a
sub-byte width, note that bit 0 means the least significant
bit in this context.
-== Cluster mapping ==
+Cluster mapping
+---------------
Just as for refcounts, qcow2 uses a two-level structure for the mapping of
guest clusters to host clusters. They are called L1 and L2 table.
@@ -509,7 +529,7 @@ compressed clusters to reside below 512 TB (49 bits), and this limit
cannot be relaxed without an incompatible layout change).
Given an offset into the virtual disk, the offset into the image file can be
-obtained as follows:
+obtained as follows::
l2_entries = (cluster_size / sizeof(uint64_t)) [*]
@@ -523,7 +543,7 @@ obtained as follows:
[*] this changes if Extended L2 Entries are enabled, see next section
-L1 table entry:
+L1 table entry::
Bit 0 - 8: Reserved (set to 0)
@@ -538,7 +558,7 @@ L1 table entry:
refcount is exactly one. This information is only accurate
in the active L1 table.
-L2 table entry:
+L2 table entry::
Bit 0 - 61: Cluster descriptor
@@ -555,7 +575,7 @@ L2 table entry:
mapping for guest cluster offsets), so this bit should be 1
for all allocated clusters.
-Standard Cluster Descriptor:
+Standard Cluster Descriptor::
Bit 0: If set to 1, the cluster reads as all zeros. The host
cluster offset can be used to describe a preallocation,
@@ -577,7 +597,7 @@ Standard Cluster Descriptor:
56 - 61: Reserved (set to 0)
-Compressed Clusters Descriptor (x = 62 - (cluster_bits - 8)):
+Compressed Clusters Descriptor ``(x = 62 - (cluster_bits - 8))``::
Bit 0 - x-1: Host cluster offset. This is usually _not_ aligned to a
cluster or sector boundary! If cluster_bits is
@@ -601,7 +621,8 @@ file (except if bit 0 in the Standard Cluster Descriptor is set). If there is
no backing file or the backing file is smaller than the image, they shall read
zeros for all parts that are not covered by the backing file.
-== Extended L2 Entries ==
+Extended L2 Entries
+-------------------
An image uses Extended L2 Entries if bit 4 is set on the incompatible_features
field of the header.
@@ -615,6 +636,8 @@ subclusters so they are treated the same as in images without this feature.
The size of an extended L2 entry is 128 bits so the number of entries per table
is calculated using this formula:
+.. code::
+
l2_entries = (cluster_size / (2 * sizeof(uint64_t)))
The first 64 bits have the same format as the standard L2 table entry described
@@ -623,7 +646,7 @@ descriptor.
The last 64 bits contain a subcluster allocation bitmap with this format:
-Subcluster Allocation Bitmap (for standard clusters):
+Subcluster Allocation Bitmap (for standard clusters)::
Bit 0 - 31: Allocation status (one bit per subcluster)
@@ -647,13 +670,14 @@ Subcluster Allocation Bitmap (for standard clusters):
Bits are assigned starting from the least significant
one (i.e. bit x is used for subcluster x - 32).
-Subcluster Allocation Bitmap (for compressed clusters):
+Subcluster Allocation Bitmap (for compressed clusters)::
Bit 0 - 63: Reserved (set to 0)
Compressed clusters don't have subclusters,
so this field is not used.
-== Snapshots ==
+Snapshots
+---------
qcow2 supports internal snapshots. Their basic principle of operation is to
switch the active L1 table, so that a different set of host clusters are
@@ -672,7 +696,7 @@ in the image file, whose starting offset and length are given by the header
fields snapshots_offset and nb_snapshots. The entries of the snapshot table
have variable length, depending on the length of ID, name and extra data.
-Snapshot table entry:
+Snapshot table entry::
Byte 0 - 7: Offset into the image file at which the L1 table for the
snapshot starts. Must be aligned to a cluster boundary.
@@ -728,7 +752,8 @@ Snapshot table entry:
next multiple of 8.
-== Bitmaps ==
+Bitmaps
+-------
As mentioned above, the bitmaps extension provides the ability to store bitmaps
related to a virtual disk. This section describes how these bitmaps are stored.
@@ -739,20 +764,23 @@ each bitmap size is equal to the virtual disk size.
Each bit of the bitmap is responsible for strictly defined range of the virtual
disk. For bit number bit_nr the corresponding range (in bytes) will be:
+.. code::
+
[bit_nr * bitmap_granularity .. (bit_nr + 1) * bitmap_granularity - 1]
Granularity is a property of the concrete bitmap, see below.
-=== Bitmap directory ===
+Bitmap directory
+----------------
Each bitmap saved in the image is described in a bitmap directory entry. The
bitmap directory is a contiguous area in the image file, whose starting offset
-and length are given by the header extension fields bitmap_directory_offset and
-bitmap_directory_size. The entries of the bitmap directory have variable
+and length are given by the header extension fields ``bitmap_directory_offset`` and
+``bitmap_directory_size``. The entries of the bitmap directory have variable
length, depending on the lengths of the bitmap name and extra data.
-Structure of a bitmap directory entry:
+Structure of a bitmap directory entry::
Byte 0 - 7: bitmap_table_offset
Offset into the image file at which the bitmap table
@@ -833,7 +861,8 @@ Structure of a bitmap directory entry:
next multiple of 8. All bytes of the padding must be zero.
-=== Bitmap table ===
+Bitmap table
+------------
Each bitmap is stored using a one-level structure (as opposed to two-level
structures like for refcounts and guest clusters mapping) for the mapping of
@@ -843,7 +872,7 @@ Each bitmap table has a variable size (stored in the bitmap directory entry)
and may use multiple clusters, however, it must be contiguous in the image
file.
-Structure of a bitmap table entry:
+Structure of a bitmap table entry::
Bit 0: Reserved and must be zero if bits 9 - 55 are non-zero.
If bits 9 - 55 are zero:
@@ -860,11 +889,12 @@ Structure of a bitmap table entry:
56 - 63: Reserved and must be zero.
-=== Bitmap data ===
+Bitmap data
+-----------
As noted above, bitmap data is stored in separate clusters, described by the
bitmap table. Given an offset (in bytes) into the bitmap data, the offset into
-the image file can be obtained as follows:
+the image file can be obtained as follows::
image_offset(bitmap_data_offset) =
bitmap_table[bitmap_data_offset / cluster_size] +
@@ -875,7 +905,7 @@ above).
Given an offset byte_nr into the virtual disk and the bitmap's granularity, the
bit offset into the image file to the corresponding bit of the bitmap can be
-calculated like this:
+calculated like this::
bit_offset(byte_nr) =
image_offset(byte_nr / granularity / 8) * 8 +
@@ -886,21 +916,22 @@ last cluster of the bitmap data contains some unused tail bits. These bits must
be zero.
-=== Dirty tracking bitmaps ===
+Dirty tracking bitmaps
+----------------------
-Bitmaps with 'type' field equal to one are dirty tracking bitmaps.
+Bitmaps with ``type`` field equal to one are dirty tracking bitmaps.
-When the virtual disk is in use dirty tracking bitmap may be 'enabled' or
-'disabled'. While the bitmap is 'enabled', all writes to the virtual disk
+When the virtual disk is in use dirty tracking bitmap may be ``enabled`` or
+``disabled``. While the bitmap is ``enabled``, all writes to the virtual disk
should be reflected in the bitmap. A set bit in the bitmap means that the
corresponding range of the virtual disk (see above) was written to while the
-bitmap was 'enabled'. An unset bit means that this range was not written to.
+bitmap was ``enabled``. An unset bit means that this range was not written to.
The software doesn't have to sync the bitmap in the image file with its
-representation in RAM after each write or metadata change. Flag 'in_use'
+representation in RAM after each write or metadata change. Flag ``in_use``
should be set while the bitmap is not synced.
-In the image file the 'enabled' state is reflected by the 'auto' flag. If this
-flag is set, the software must consider the bitmap as 'enabled' and start
+In the image file the ``enabled`` state is reflected by the ``auto`` flag. If this
+flag is set, the software must consider the bitmap as ``enabled`` and start
tracking virtual disk changes to this bitmap from the first write to the
virtual disk. If this flag is not set then the bitmap is disabled.
diff --git a/docs/qcow2-cache.txt b/docs/qcow2-cache.txt
index 5f763aa..204a574 100644
--- a/docs/qcow2-cache.txt
+++ b/docs/qcow2-cache.txt
@@ -15,7 +15,7 @@ not a straightforward operation.
This document attempts to give an overview of the L2 and refcount
caches, and how to configure them.
-Please refer to the docs/interop/qcow2.txt file for an in-depth
+Please refer to the docs/interop/qcow2.rst file for an in-depth
technical description of the qcow2 file format.
diff --git a/docs/system/confidential-guest-support.rst b/docs/system/confidential-guest-support.rst
index 0c490db..66129fb 100644
--- a/docs/system/confidential-guest-support.rst
+++ b/docs/system/confidential-guest-support.rst
@@ -38,6 +38,7 @@ Supported mechanisms
Currently supported confidential guest mechanisms are:
* AMD Secure Encrypted Virtualization (SEV) (see :doc:`i386/amd-memory-encryption`)
+* Intel Trust Domain Extension (TDX) (see :doc:`i386/tdx`)
* POWER Protected Execution Facility (PEF) (see :ref:`power-papr-protected-execution-facility-pef`)
* s390x Protected Virtualization (PV) (see :doc:`s390x/protvirt`)
diff --git a/docs/system/i386/tdx.rst b/docs/system/i386/tdx.rst
new file mode 100644
index 0000000..8131750
--- /dev/null
+++ b/docs/system/i386/tdx.rst
@@ -0,0 +1,161 @@
+Intel Trusted Domain eXtension (TDX)
+====================================
+
+Intel Trusted Domain eXtensions (TDX) refers to an Intel technology that extends
+Virtual Machine Extensions (VMX) and Multi-Key Total Memory Encryption (MKTME)
+with a new kind of virtual machine guest called a Trust Domain (TD). A TD runs
+in a CPU mode that is designed to protect the confidentiality of its memory
+contents and its CPU state from any other software, including the hosting
+Virtual Machine Monitor (VMM), unless explicitly shared by the TD itself.
+
+Prerequisites
+-------------
+
+To run TD, the physical machine needs to have TDX module loaded and initialized
+while KVM hypervisor has TDX support and has TDX enabled. If those requirements
+are met, the ``KVM_CAP_VM_TYPES`` will report the support of ``KVM_X86_TDX_VM``.
+
+Trust Domain Virtual Firmware (TDVF)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Trust Domain Virtual Firmware (TDVF) is required to provide TD services to boot
+TD Guest OS. TDVF needs to be copied to guest private memory and measured before
+the TD boots.
+
+KVM vcpu ioctl ``KVM_TDX_INIT_MEM_REGION`` can be used to populate the TDVF
+content into its private memory.
+
+Since TDX doesn't support readonly memslot, TDVF cannot be mapped as pflash
+device and it actually works as RAM. "-bios" option is chosen to load TDVF.
+
+OVMF is the opensource firmware that implements the TDVF support. Thus the
+command line to specify and load TDVF is ``-bios OVMF.fd``
+
+Feature Configuration
+---------------------
+
+Unlike non-TDX VM, the CPU features (enumerated by CPU or MSR) of a TD are not
+under full control of VMM. VMM can only configure part of features of a TD on
+``KVM_TDX_INIT_VM`` command of VM scope ``MEMORY_ENCRYPT_OP`` ioctl.
+
+The configurable features have three types:
+
+- Attributes:
+ - PKS (bit 30) controls whether Supervisor Protection Keys is exposed to TD,
+ which determines related CPUID bit and CR4 bit;
+ - PERFMON (bit 63) controls whether PMU is exposed to TD.
+
+- XSAVE related features (XFAM):
+ XFAM is a 64b mask, which has the same format as XCR0 or IA32_XSS MSR. It
+ determines the set of extended features available for use by the guest TD.
+
+- CPUID features:
+ Only some bits of some CPUID leaves are directly configurable by VMM.
+
+What features can be configured is reported via TDX capabilities.
+
+TDX capabilities
+~~~~~~~~~~~~~~~~
+
+The VM scope ``MEMORY_ENCRYPT_OP`` ioctl provides command ``KVM_TDX_CAPABILITIES``
+to get the TDX capabilities from KVM. It returns a data structure of
+``struct kvm_tdx_capabilities``, which tells the supported configuration of
+attributes, XFAM and CPUIDs.
+
+TD attributes
+~~~~~~~~~~~~~
+
+QEMU supports configuring raw 64-bit TD attributes directly via "attributes"
+property of "tdx-guest" object. Note, it's users' responsibility to provide a
+valid value because some bits may not supported by current QEMU or KVM yet.
+
+QEMU also supports the configuration of individual attribute bits that are
+supported by it, via properties of "tdx-guest" object.
+E.g., "sept-ve-disable" (bit 28).
+
+MSR based features
+~~~~~~~~~~~~~~~~~~
+
+Current KVM doesn't support MSR based feature (e.g., MSR_IA32_ARCH_CAPABILITIES)
+configuration for TDX, and it's a future work to enable it in QEMU when KVM adds
+support of it.
+
+Feature check
+~~~~~~~~~~~~~
+
+QEMU checks if the final (CPU) features, determined by given cpu model and
+explicit feature adjustment of "+featureA/-featureB", can be supported or not.
+It can produce feature not supported warning like
+
+ "warning: host doesn't support requested feature: CPUID.07H:EBX.intel-pt [bit 25]"
+
+It can also produce warning like
+
+ "warning: TDX forcibly sets the feature: CPUID.80000007H:EDX.invtsc [bit 8]"
+
+if the fixed-1 feature is requested to be disabled explicitly. This is newly
+added to QEMU for TDX because TDX has fixed-1 features that are forcibly enabled
+by TDX module and VMM cannot disable them.
+
+Launching a TD (TDX VM)
+-----------------------
+
+To launch a TD, the necessary command line options are tdx-guest object and
+split kernel-irqchip, as below:
+
+.. parsed-literal::
+
+ |qemu_system_x86| \\
+ -accel kvm \\
+ -cpu host \\
+ -object tdx-guest,id=tdx0 \\
+ -machine ...,confidential-guest-support=tdx0 \\
+ -bios OVMF.fd \\
+
+Restrictions
+------------
+
+ - kernel-irqchip must be split;
+
+ This is set by default for TDX guest if kernel-irqchip is left on its default
+ 'auto' setting.
+
+ - No readonly support for private memory;
+
+ - No SMM support: SMM support requires manipulating the guest register states
+ which is not allowed;
+
+Debugging
+---------
+
+Bit 0 of TD attributes, is DEBUG bit, which decides if the TD runs in off-TD
+debug mode. When in off-TD debug mode, TD's VCPU state and private memory are
+accessible via given SEAMCALLs. This requires KVM to expose APIs to invoke those
+SEAMCALLs and corresonponding QEMU change.
+
+It's targeted as future work.
+
+TD attestation
+--------------
+
+In TD guest, the attestation process is used to verify the TDX guest
+trustworthiness to other entities before provisioning secrets to the guest.
+
+TD attestation is initiated first by calling TDG.MR.REPORT inside TD to get the
+REPORT. Then the REPORT data needs to be converted into a remotely verifiable
+Quote by SGX Quoting Enclave (QE).
+
+It's a future work in QEMU to add support of TD attestation since it lacks
+support in current KVM.
+
+Live Migration
+--------------
+
+Future work.
+
+References
+----------
+
+- `TDX Homepage <https://www.intel.com/content/www/us/en/developer/articles/technical/intel-trust-domain-extensions.html>`__
+
+- `SGX QE <https://github.com/intel/SGXDataCenterAttestationPrimitives/tree/master/QuoteGeneration>`__
diff --git a/docs/system/target-i386.rst b/docs/system/target-i386.rst
index ab7af1a..43b09c7 100644
--- a/docs/system/target-i386.rst
+++ b/docs/system/target-i386.rst
@@ -31,6 +31,7 @@ Architectural features
i386/kvm-pv
i386/sgx
i386/amd-memory-encryption
+ i386/tdx
OS requirements
~~~~~~~~~~~~~~~
diff --git a/gdbstub/meson.build b/gdbstub/meson.build
index b25db86..15c666f 100644
--- a/gdbstub/meson.build
+++ b/gdbstub/meson.build
@@ -5,13 +5,13 @@
#
# We build two versions of gdbstub, one for each mode
-libuser_ss.add(files(
+user_ss.add(files(
'gdbstub.c',
'syscalls.c',
'user.c'
))
-libsystem_ss.add(files(
+system_ss.add(files(
'gdbstub.c',
'syscalls.c',
'system.c'
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index a55b44d..f543d94 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -147,7 +147,6 @@ config OMAP
bool
select FRAMEBUFFER
select I2C
- select NAND
select PFLASH_CFI01
select SD
select SERIAL_MM
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index f94b940..79afb51 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -19,6 +19,7 @@
#include "system/kvm.h"
#include "system/tcg.h"
#include "system/system.h"
+#include "system/memory.h"
#include "system/numa.h"
#include "hw/boards.h"
#include "system/reset.h"
diff --git a/hw/arm/meson.build b/hw/arm/meson.build
index 5098795..d90be8f 100644
--- a/hw/arm/meson.build
+++ b/hw/arm/meson.build
@@ -8,7 +8,7 @@ arm_common_ss.add(when: 'CONFIG_HIGHBANK', if_true: files('highbank.c'))
arm_common_ss.add(when: 'CONFIG_INTEGRATOR', if_true: files('integratorcp.c'))
arm_common_ss.add(when: 'CONFIG_MICROBIT', if_true: files('microbit.c'))
arm_common_ss.add(when: 'CONFIG_MPS3R', if_true: files('mps3r.c'))
-arm_common_ss.add(when: 'CONFIG_MUSICPAL', if_true: [pixman, files('musicpal.c')])
+arm_common_ss.add(when: 'CONFIG_MUSICPAL', if_true: [files('musicpal.c')])
arm_common_ss.add(when: 'CONFIG_NETDUINOPLUS2', if_true: files('netduinoplus2.c'))
arm_common_ss.add(when: 'CONFIG_OLIMEX_STM32_H405', if_true: files('olimex-stm32-h405.c'))
arm_common_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx.c', 'npcm7xx_boards.c'))
@@ -79,7 +79,7 @@ arm_common_ss.add(when: 'CONFIG_SX1', if_true: files('omap_sx1.c'))
arm_common_ss.add(when: 'CONFIG_VERSATILE', if_true: files('versatilepb.c'))
arm_common_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c'))
-arm_common_ss.add(fdt, files('boot.c'))
+arm_common_ss.add(files('boot.c'))
hw_arch += {'arm': arm_ss}
hw_common_arch += {'arm': arm_common_ss}
diff --git a/hw/arm/npcm8xx.c b/hw/arm/npcm8xx.c
index d7ee306..a276fea 100644
--- a/hw/arm/npcm8xx.c
+++ b/hw/arm/npcm8xx.c
@@ -67,6 +67,9 @@
/* SDHCI Modules */
#define NPCM8XX_MMC_BA 0xf0842000
+/* PCS Module */
+#define NPCM8XX_PCS_BA 0xf0780000
+
/* PSPI Modules */
#define NPCM8XX_PSPI_BA 0xf0201000
@@ -85,6 +88,10 @@ enum NPCM8xxInterrupt {
NPCM8XX_ADC_IRQ = 0,
NPCM8XX_PECI_IRQ = 6,
NPCM8XX_KCS_HIB_IRQ = 9,
+ NPCM8XX_GMAC1_IRQ = 14,
+ NPCM8XX_GMAC2_IRQ,
+ NPCM8XX_GMAC3_IRQ,
+ NPCM8XX_GMAC4_IRQ,
NPCM8XX_MMC_IRQ = 26,
NPCM8XX_PSPI_IRQ = 28,
NPCM8XX_TIMER0_IRQ = 32, /* Timer Module 0 */
@@ -260,6 +267,14 @@ static const hwaddr npcm8xx_smbus_addr[] = {
0xfff0a000,
};
+/* Register base address for each GMAC Module */
+static const hwaddr npcm8xx_gmac_addr[] = {
+ 0xf0802000,
+ 0xf0804000,
+ 0xf0806000,
+ 0xf0808000,
+};
+
/* Register base address for each USB host EHCI registers */
static const hwaddr npcm8xx_ehci_addr[] = {
0xf0828100,
@@ -350,6 +365,7 @@ static struct arm_boot_info npcm8xx_binfo = {
.secure_boot = false,
.board_id = -1,
.board_setup_addr = NPCM8XX_BOARD_SETUP_ADDR,
+ .psci_conduit = QEMU_PSCI_CONDUIT_SMC,
};
void npcm8xx_load_kernel(MachineState *machine, NPCM8xxState *soc)
@@ -444,6 +460,11 @@ static void npcm8xx_init(Object *obj)
object_initialize_child(obj, "mft[*]", &s->mft[i], TYPE_NPCM7XX_MFT);
}
+ for (i = 0; i < ARRAY_SIZE(s->gmac); i++) {
+ object_initialize_child(obj, "gmac[*]", &s->gmac[i], TYPE_NPCM_GMAC);
+ }
+ object_initialize_child(obj, "pcs", &s->pcs, TYPE_NPCM_PCS);
+
object_initialize_child(obj, "mmc", &s->mmc, TYPE_NPCM7XX_SDHCI);
object_initialize_child(obj, "pspi", &s->pspi, TYPE_NPCM_PSPI);
}
@@ -669,6 +690,35 @@ static void npcm8xx_realize(DeviceState *dev, Error **errp)
}
/*
+ * GMAC Modules. Cannot fail.
+ */
+ QEMU_BUILD_BUG_ON(ARRAY_SIZE(npcm8xx_gmac_addr) != ARRAY_SIZE(s->gmac));
+ for (i = 0; i < ARRAY_SIZE(s->gmac); i++) {
+ SysBusDevice *sbd = SYS_BUS_DEVICE(&s->gmac[i]);
+
+ /* This is used to make sure that the NIC can create the device */
+ qemu_configure_nic_device(DEVICE(sbd), false, NULL);
+
+ /*
+ * The device exists regardless of whether it's connected to a QEMU
+ * netdev backend. So always instantiate it even if there is no
+ * backend.
+ */
+ sysbus_realize(sbd, &error_abort);
+ sysbus_mmio_map(sbd, 0, npcm8xx_gmac_addr[i]);
+ /*
+ * N.B. The values for the second argument sysbus_connect_irq are
+ * chosen to match the registration order in npcm7xx_emc_realize.
+ */
+ sysbus_connect_irq(sbd, 0, npcm8xx_irq(s, NPCM8XX_GMAC1_IRQ + i));
+ }
+ /*
+ * GMAC Physical Coding Sublayer(PCS) Module. Cannot fail.
+ */
+ sysbus_realize(SYS_BUS_DEVICE(&s->pcs), &error_abort);
+ sysbus_mmio_map(SYS_BUS_DEVICE(&s->pcs), 0, NPCM8XX_PCS_BA);
+
+ /*
* Flash Interface Unit (FIU). Can fail if incorrect number of chip selects
* specified, but this is a programming error.
*/
@@ -741,12 +791,7 @@ static void npcm8xx_realize(DeviceState *dev, Error **errp)
create_unimplemented_device("npcm8xx.ahbpci", 0xf0400000, 1 * MiB);
create_unimplemented_device("npcm8xx.dap", 0xf0500000, 960 * KiB);
create_unimplemented_device("npcm8xx.mcphy", 0xf05f0000, 64 * KiB);
- create_unimplemented_device("npcm8xx.pcs", 0xf0780000, 256 * KiB);
create_unimplemented_device("npcm8xx.tsgen", 0xf07fc000, 8 * KiB);
- create_unimplemented_device("npcm8xx.gmac1", 0xf0802000, 8 * KiB);
- create_unimplemented_device("npcm8xx.gmac2", 0xf0804000, 8 * KiB);
- create_unimplemented_device("npcm8xx.gmac3", 0xf0806000, 8 * KiB);
- create_unimplemented_device("npcm8xx.gmac4", 0xf0808000, 8 * KiB);
create_unimplemented_device("npcm8xx.copctl", 0xf080c000, 4 * KiB);
create_unimplemented_device("npcm8xx.tipctl", 0xf080d000, 4 * KiB);
create_unimplemented_device("npcm8xx.rst", 0xf080e000, 4 * KiB);
diff --git a/hw/block/Kconfig b/hw/block/Kconfig
index a898e04..737dbcd 100644
--- a/hw/block/Kconfig
+++ b/hw/block/Kconfig
@@ -13,9 +13,6 @@ config FDC_SYSBUS
config SSI_M25P80
bool
-config NAND
- bool
-
config PFLASH_CFI01
bool
diff --git a/hw/block/meson.build b/hw/block/meson.build
index 16a51bf..6557044 100644
--- a/hw/block/meson.build
+++ b/hw/block/meson.build
@@ -6,7 +6,6 @@ system_ss.add(files(
system_ss.add(when: 'CONFIG_FDC', if_true: files('fdc.c'))
system_ss.add(when: 'CONFIG_FDC_ISA', if_true: files('fdc-isa.c'))
system_ss.add(when: 'CONFIG_FDC_SYSBUS', if_true: files('fdc-sysbus.c'))
-system_ss.add(when: 'CONFIG_NAND', if_true: files('nand.c'))
system_ss.add(when: 'CONFIG_PFLASH_CFI01', if_true: files('pflash_cfi01.c'))
system_ss.add(when: 'CONFIG_PFLASH_CFI02', if_true: files('pflash_cfi02.c'))
system_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c'))
diff --git a/hw/block/nand.c b/hw/block/nand.c
deleted file mode 100644
index c80bf78..0000000
--- a/hw/block/nand.c
+++ /dev/null
@@ -1,835 +0,0 @@
-/*
- * Flash NAND memory emulation. Based on "16M x 8 Bit NAND Flash
- * Memory" datasheet for the KM29U128AT / K9F2808U0A chips from
- * Samsung Electronic.
- *
- * Copyright (c) 2006 Openedhand Ltd.
- * Written by Andrzej Zaborowski <balrog@zabor.org>
- *
- * Support for additional features based on "MT29F2G16ABCWP 2Gx16"
- * datasheet from Micron Technology and "NAND02G-B2C" datasheet
- * from ST Microelectronics.
- *
- * This code is licensed under the GNU GPL v2.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-
-#ifndef NAND_IO
-
-#include "qemu/osdep.h"
-#include "hw/hw.h"
-#include "hw/qdev-properties.h"
-#include "hw/qdev-properties-system.h"
-#include "hw/block/flash.h"
-#include "system/block-backend.h"
-#include "migration/vmstate.h"
-#include "qapi/error.h"
-#include "qemu/error-report.h"
-#include "qemu/module.h"
-#include "qom/object.h"
-
-# define NAND_CMD_READ0 0x00
-# define NAND_CMD_READ1 0x01
-# define NAND_CMD_READ2 0x50
-# define NAND_CMD_LPREAD2 0x30
-# define NAND_CMD_NOSERIALREAD2 0x35
-# define NAND_CMD_RANDOMREAD1 0x05
-# define NAND_CMD_RANDOMREAD2 0xe0
-# define NAND_CMD_READID 0x90
-# define NAND_CMD_RESET 0xff
-# define NAND_CMD_PAGEPROGRAM1 0x80
-# define NAND_CMD_PAGEPROGRAM2 0x10
-# define NAND_CMD_CACHEPROGRAM2 0x15
-# define NAND_CMD_BLOCKERASE1 0x60
-# define NAND_CMD_BLOCKERASE2 0xd0
-# define NAND_CMD_READSTATUS 0x70
-# define NAND_CMD_COPYBACKPRG1 0x85
-
-# define NAND_IOSTATUS_ERROR (1 << 0)
-# define NAND_IOSTATUS_PLANE0 (1 << 1)
-# define NAND_IOSTATUS_PLANE1 (1 << 2)
-# define NAND_IOSTATUS_PLANE2 (1 << 3)
-# define NAND_IOSTATUS_PLANE3 (1 << 4)
-# define NAND_IOSTATUS_READY (1 << 6)
-# define NAND_IOSTATUS_UNPROTCT (1 << 7)
-
-# define MAX_PAGE 0x800
-# define MAX_OOB 0x40
-
-typedef struct NANDFlashState NANDFlashState;
-struct NANDFlashState {
- DeviceState parent_obj;
-
- uint8_t manf_id, chip_id;
- uint8_t buswidth; /* in BYTES */
- int size, pages;
- int page_shift, oob_shift, erase_shift, addr_shift;
- uint8_t *storage;
- BlockBackend *blk;
- int mem_oob;
-
- uint8_t cle, ale, ce, wp, gnd;
-
- uint8_t io[MAX_PAGE + MAX_OOB + 0x400];
- uint8_t *ioaddr;
- int iolen;
-
- uint32_t cmd;
- uint64_t addr;
- int addrlen;
- int status;
- int offset;
-
- void (*blk_write)(NANDFlashState *s);
- void (*blk_erase)(NANDFlashState *s);
- /*
- * Returns %true when block containing (@addr + @offset) is
- * successfully loaded, otherwise %false.
- */
- bool (*blk_load)(NANDFlashState *s, uint64_t addr, unsigned offset);
-
- uint32_t ioaddr_vmstate;
-};
-
-#define TYPE_NAND "nand"
-
-OBJECT_DECLARE_SIMPLE_TYPE(NANDFlashState, NAND)
-
-static void mem_and(uint8_t *dest, const uint8_t *src, size_t n)
-{
- /* Like memcpy() but we logical-AND the data into the destination */
- int i;
- for (i = 0; i < n; i++) {
- dest[i] &= src[i];
- }
-}
-
-# define NAND_NO_AUTOINCR 0x00000001
-# define NAND_BUSWIDTH_16 0x00000002
-# define NAND_NO_PADDING 0x00000004
-# define NAND_CACHEPRG 0x00000008
-# define NAND_COPYBACK 0x00000010
-# define NAND_IS_AND 0x00000020
-# define NAND_4PAGE_ARRAY 0x00000040
-# define NAND_NO_READRDY 0x00000100
-# define NAND_SAMSUNG_LP (NAND_NO_PADDING | NAND_COPYBACK)
-
-# define NAND_IO
-
-# define PAGE(addr) ((addr) >> ADDR_SHIFT)
-# define PAGE_START(page) (PAGE(page) * (NAND_PAGE_SIZE + OOB_SIZE))
-# define PAGE_MASK ((1 << ADDR_SHIFT) - 1)
-# define OOB_SHIFT (PAGE_SHIFT - 5)
-# define OOB_SIZE (1 << OOB_SHIFT)
-# define SECTOR(addr) ((addr) >> (9 + ADDR_SHIFT - PAGE_SHIFT))
-# define SECTOR_OFFSET(addr) ((addr) & ((511 >> PAGE_SHIFT) << 8))
-
-# define NAND_PAGE_SIZE 256
-# define PAGE_SHIFT 8
-# define PAGE_SECTORS 1
-# define ADDR_SHIFT 8
-# include "nand.c"
-# define NAND_PAGE_SIZE 512
-# define PAGE_SHIFT 9
-# define PAGE_SECTORS 1
-# define ADDR_SHIFT 8
-# include "nand.c"
-# define NAND_PAGE_SIZE 2048
-# define PAGE_SHIFT 11
-# define PAGE_SECTORS 4
-# define ADDR_SHIFT 16
-# include "nand.c"
-
-/* Information based on Linux drivers/mtd/nand/raw/nand_ids.c */
-static const struct {
- int size;
- int width;
- int page_shift;
- int erase_shift;
- uint32_t options;
-} nand_flash_ids[0x100] = {
- [0 ... 0xff] = { 0 },
-
- [0x6b] = { 4, 8, 9, 4, 0 },
- [0xe3] = { 4, 8, 9, 4, 0 },
- [0xe5] = { 4, 8, 9, 4, 0 },
- [0xd6] = { 8, 8, 9, 4, 0 },
- [0xe6] = { 8, 8, 9, 4, 0 },
-
- [0x33] = { 16, 8, 9, 5, 0 },
- [0x73] = { 16, 8, 9, 5, 0 },
- [0x43] = { 16, 16, 9, 5, NAND_BUSWIDTH_16 },
- [0x53] = { 16, 16, 9, 5, NAND_BUSWIDTH_16 },
-
- [0x35] = { 32, 8, 9, 5, 0 },
- [0x75] = { 32, 8, 9, 5, 0 },
- [0x45] = { 32, 16, 9, 5, NAND_BUSWIDTH_16 },
- [0x55] = { 32, 16, 9, 5, NAND_BUSWIDTH_16 },
-
- [0x36] = { 64, 8, 9, 5, 0 },
- [0x76] = { 64, 8, 9, 5, 0 },
- [0x46] = { 64, 16, 9, 5, NAND_BUSWIDTH_16 },
- [0x56] = { 64, 16, 9, 5, NAND_BUSWIDTH_16 },
-
- [0x78] = { 128, 8, 9, 5, 0 },
- [0x39] = { 128, 8, 9, 5, 0 },
- [0x79] = { 128, 8, 9, 5, 0 },
- [0x72] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 },
- [0x49] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 },
- [0x74] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 },
- [0x59] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 },
-
- [0x71] = { 256, 8, 9, 5, 0 },
-
- /*
- * These are the new chips with large page size. The pagesize and the
- * erasesize is determined from the extended id bytes
- */
-# define LP_OPTIONS (NAND_SAMSUNG_LP | NAND_NO_READRDY | NAND_NO_AUTOINCR)
-# define LP_OPTIONS16 (LP_OPTIONS | NAND_BUSWIDTH_16)
-
- /* 512 Megabit */
- [0xa2] = { 64, 8, 0, 0, LP_OPTIONS },
- [0xf2] = { 64, 8, 0, 0, LP_OPTIONS },
- [0xb2] = { 64, 16, 0, 0, LP_OPTIONS16 },
- [0xc2] = { 64, 16, 0, 0, LP_OPTIONS16 },
-
- /* 1 Gigabit */
- [0xa1] = { 128, 8, 0, 0, LP_OPTIONS },
- [0xf1] = { 128, 8, 0, 0, LP_OPTIONS },
- [0xb1] = { 128, 16, 0, 0, LP_OPTIONS16 },
- [0xc1] = { 128, 16, 0, 0, LP_OPTIONS16 },
-
- /* 2 Gigabit */
- [0xaa] = { 256, 8, 0, 0, LP_OPTIONS },
- [0xda] = { 256, 8, 0, 0, LP_OPTIONS },
- [0xba] = { 256, 16, 0, 0, LP_OPTIONS16 },
- [0xca] = { 256, 16, 0, 0, LP_OPTIONS16 },
-
- /* 4 Gigabit */
- [0xac] = { 512, 8, 0, 0, LP_OPTIONS },
- [0xdc] = { 512, 8, 0, 0, LP_OPTIONS },
- [0xbc] = { 512, 16, 0, 0, LP_OPTIONS16 },
- [0xcc] = { 512, 16, 0, 0, LP_OPTIONS16 },
-
- /* 8 Gigabit */
- [0xa3] = { 1024, 8, 0, 0, LP_OPTIONS },
- [0xd3] = { 1024, 8, 0, 0, LP_OPTIONS },
- [0xb3] = { 1024, 16, 0, 0, LP_OPTIONS16 },
- [0xc3] = { 1024, 16, 0, 0, LP_OPTIONS16 },
-
- /* 16 Gigabit */
- [0xa5] = { 2048, 8, 0, 0, LP_OPTIONS },
- [0xd5] = { 2048, 8, 0, 0, LP_OPTIONS },
- [0xb5] = { 2048, 16, 0, 0, LP_OPTIONS16 },
- [0xc5] = { 2048, 16, 0, 0, LP_OPTIONS16 },
-};
-
-static void nand_reset(DeviceState *dev)
-{
- NANDFlashState *s = NAND(dev);
- s->cmd = NAND_CMD_READ0;
- s->addr = 0;
- s->addrlen = 0;
- s->iolen = 0;
- s->offset = 0;
- s->status &= NAND_IOSTATUS_UNPROTCT;
- s->status |= NAND_IOSTATUS_READY;
-}
-
-static inline void nand_pushio_byte(NANDFlashState *s, uint8_t value)
-{
- s->ioaddr[s->iolen++] = value;
- for (value = s->buswidth; --value;) {
- s->ioaddr[s->iolen++] = 0;
- }
-}
-
-/*
- * nand_load_block: Load block containing (s->addr + @offset).
- * Returns length of data available at @offset in this block.
- */
-static unsigned nand_load_block(NANDFlashState *s, unsigned offset)
-{
- unsigned iolen;
-
- if (!s->blk_load(s, s->addr, offset)) {
- return 0;
- }
-
- iolen = (1 << s->page_shift);
- if (s->gnd) {
- iolen += 1 << s->oob_shift;
- }
- assert(offset <= iolen);
- iolen -= offset;
-
- return iolen;
-}
-
-static void nand_command(NANDFlashState *s)
-{
- switch (s->cmd) {
- case NAND_CMD_READ0:
- s->iolen = 0;
- break;
-
- case NAND_CMD_READID:
- s->ioaddr = s->io;
- s->iolen = 0;
- nand_pushio_byte(s, s->manf_id);
- nand_pushio_byte(s, s->chip_id);
- nand_pushio_byte(s, 'Q'); /* Don't-care byte (often 0xa5) */
- if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) {
- /* Page Size, Block Size, Spare Size; bit 6 indicates
- * 8 vs 16 bit width NAND.
- */
- nand_pushio_byte(s, (s->buswidth == 2) ? 0x55 : 0x15);
- } else {
- nand_pushio_byte(s, 0xc0); /* Multi-plane */
- }
- break;
-
- case NAND_CMD_RANDOMREAD2:
- case NAND_CMD_NOSERIALREAD2:
- if (!(nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP))
- break;
- s->iolen = nand_load_block(s, s->addr & ((1 << s->addr_shift) - 1));
- break;
-
- case NAND_CMD_RESET:
- nand_reset(DEVICE(s));
- break;
-
- case NAND_CMD_PAGEPROGRAM1:
- s->ioaddr = s->io;
- s->iolen = 0;
- break;
-
- case NAND_CMD_PAGEPROGRAM2:
- if (s->wp) {
- s->blk_write(s);
- }
- break;
-
- case NAND_CMD_BLOCKERASE1:
- break;
-
- case NAND_CMD_BLOCKERASE2:
- s->addr &= (1ull << s->addrlen * 8) - 1;
- s->addr <<= nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP ?
- 16 : 8;
-
- if (s->wp) {
- s->blk_erase(s);
- }
- break;
-
- case NAND_CMD_READSTATUS:
- s->ioaddr = s->io;
- s->iolen = 0;
- nand_pushio_byte(s, s->status);
- break;
-
- default:
- printf("%s: Unknown NAND command 0x%02x\n", __func__, s->cmd);
- }
-}
-
-static int nand_pre_save(void *opaque)
-{
- NANDFlashState *s = NAND(opaque);
-
- s->ioaddr_vmstate = s->ioaddr - s->io;
-
- return 0;
-}
-
-static int nand_post_load(void *opaque, int version_id)
-{
- NANDFlashState *s = NAND(opaque);
-
- if (s->ioaddr_vmstate > sizeof(s->io)) {
- return -EINVAL;
- }
- s->ioaddr = s->io + s->ioaddr_vmstate;
-
- return 0;
-}
-
-static const VMStateDescription vmstate_nand = {
- .name = "nand",
- .version_id = 1,
- .minimum_version_id = 1,
- .pre_save = nand_pre_save,
- .post_load = nand_post_load,
- .fields = (const VMStateField[]) {
- VMSTATE_UINT8(cle, NANDFlashState),
- VMSTATE_UINT8(ale, NANDFlashState),
- VMSTATE_UINT8(ce, NANDFlashState),
- VMSTATE_UINT8(wp, NANDFlashState),
- VMSTATE_UINT8(gnd, NANDFlashState),
- VMSTATE_BUFFER(io, NANDFlashState),
- VMSTATE_UINT32(ioaddr_vmstate, NANDFlashState),
- VMSTATE_INT32(iolen, NANDFlashState),
- VMSTATE_UINT32(cmd, NANDFlashState),
- VMSTATE_UINT64(addr, NANDFlashState),
- VMSTATE_INT32(addrlen, NANDFlashState),
- VMSTATE_INT32(status, NANDFlashState),
- VMSTATE_INT32(offset, NANDFlashState),
- /* XXX: do we want to save s->storage too? */
- VMSTATE_END_OF_LIST()
- }
-};
-
-static void nand_realize(DeviceState *dev, Error **errp)
-{
- int pagesize;
- NANDFlashState *s = NAND(dev);
- int ret;
-
-
- s->buswidth = nand_flash_ids[s->chip_id].width >> 3;
- s->size = nand_flash_ids[s->chip_id].size << 20;
- if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) {
- s->page_shift = 11;
- s->erase_shift = 6;
- } else {
- s->page_shift = nand_flash_ids[s->chip_id].page_shift;
- s->erase_shift = nand_flash_ids[s->chip_id].erase_shift;
- }
-
- switch (1 << s->page_shift) {
- case 256:
- nand_init_256(s);
- break;
- case 512:
- nand_init_512(s);
- break;
- case 2048:
- nand_init_2048(s);
- break;
- default:
- error_setg(errp, "Unsupported NAND block size %#x",
- 1 << s->page_shift);
- return;
- }
-
- pagesize = 1 << s->oob_shift;
- s->mem_oob = 1;
- if (s->blk) {
- if (!blk_supports_write_perm(s->blk)) {
- error_setg(errp, "Can't use a read-only drive");
- return;
- }
- ret = blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
- BLK_PERM_ALL, errp);
- if (ret < 0) {
- return;
- }
- if (blk_getlength(s->blk) >=
- (s->pages << s->page_shift) + (s->pages << s->oob_shift)) {
- pagesize = 0;
- s->mem_oob = 0;
- }
- } else {
- pagesize += 1 << s->page_shift;
- }
- if (pagesize) {
- s->storage = (uint8_t *) memset(g_malloc(s->pages * pagesize),
- 0xff, s->pages * pagesize);
- }
- /* Give s->ioaddr a sane value in case we save state before it is used. */
- s->ioaddr = s->io;
-}
-
-static const Property nand_properties[] = {
- DEFINE_PROP_UINT8("manufacturer_id", NANDFlashState, manf_id, 0),
- DEFINE_PROP_UINT8("chip_id", NANDFlashState, chip_id, 0),
- DEFINE_PROP_DRIVE("drive", NANDFlashState, blk),
-};
-
-static void nand_class_init(ObjectClass *klass, const void *data)
-{
- DeviceClass *dc = DEVICE_CLASS(klass);
-
- dc->realize = nand_realize;
- device_class_set_legacy_reset(dc, nand_reset);
- dc->vmsd = &vmstate_nand;
- device_class_set_props(dc, nand_properties);
- set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-}
-
-static const TypeInfo nand_info = {
- .name = TYPE_NAND,
- .parent = TYPE_DEVICE,
- .instance_size = sizeof(NANDFlashState),
- .class_init = nand_class_init,
-};
-
-static void nand_register_types(void)
-{
- type_register_static(&nand_info);
-}
-
-/*
- * Chip inputs are CLE, ALE, CE, WP, GND and eight I/O pins. Chip
- * outputs are R/B and eight I/O pins.
- *
- * CE, WP and R/B are active low.
- */
-void nand_setpins(DeviceState *dev, uint8_t cle, uint8_t ale,
- uint8_t ce, uint8_t wp, uint8_t gnd)
-{
- NANDFlashState *s = NAND(dev);
-
- s->cle = cle;
- s->ale = ale;
- s->ce = ce;
- s->wp = wp;
- s->gnd = gnd;
- if (wp) {
- s->status |= NAND_IOSTATUS_UNPROTCT;
- } else {
- s->status &= ~NAND_IOSTATUS_UNPROTCT;
- }
-}
-
-void nand_getpins(DeviceState *dev, int *rb)
-{
- *rb = 1;
-}
-
-void nand_setio(DeviceState *dev, uint32_t value)
-{
- int i;
- NANDFlashState *s = NAND(dev);
-
- if (!s->ce && s->cle) {
- if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) {
- if (s->cmd == NAND_CMD_READ0 && value == NAND_CMD_LPREAD2)
- return;
- if (value == NAND_CMD_RANDOMREAD1) {
- s->addr &= ~((1 << s->addr_shift) - 1);
- s->addrlen = 0;
- return;
- }
- }
- if (value == NAND_CMD_READ0) {
- s->offset = 0;
- } else if (value == NAND_CMD_READ1) {
- s->offset = 0x100;
- value = NAND_CMD_READ0;
- } else if (value == NAND_CMD_READ2) {
- s->offset = 1 << s->page_shift;
- value = NAND_CMD_READ0;
- }
-
- s->cmd = value;
-
- if (s->cmd == NAND_CMD_READSTATUS ||
- s->cmd == NAND_CMD_PAGEPROGRAM2 ||
- s->cmd == NAND_CMD_BLOCKERASE1 ||
- s->cmd == NAND_CMD_BLOCKERASE2 ||
- s->cmd == NAND_CMD_NOSERIALREAD2 ||
- s->cmd == NAND_CMD_RANDOMREAD2 ||
- s->cmd == NAND_CMD_RESET) {
- nand_command(s);
- }
-
- if (s->cmd != NAND_CMD_RANDOMREAD2) {
- s->addrlen = 0;
- }
- }
-
- if (s->ale) {
- unsigned int shift = s->addrlen * 8;
- uint64_t mask = ~(0xffull << shift);
- uint64_t v = (uint64_t)value << shift;
-
- s->addr = (s->addr & mask) | v;
- s->addrlen ++;
-
- switch (s->addrlen) {
- case 1:
- if (s->cmd == NAND_CMD_READID) {
- nand_command(s);
- }
- break;
- case 2: /* fix cache address as a byte address */
- s->addr <<= (s->buswidth - 1);
- break;
- case 3:
- if (!(nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) &&
- (s->cmd == NAND_CMD_READ0 ||
- s->cmd == NAND_CMD_PAGEPROGRAM1)) {
- nand_command(s);
- }
- break;
- case 4:
- if ((nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) &&
- nand_flash_ids[s->chip_id].size < 256 && /* 1Gb or less */
- (s->cmd == NAND_CMD_READ0 ||
- s->cmd == NAND_CMD_PAGEPROGRAM1)) {
- nand_command(s);
- }
- break;
- case 5:
- if ((nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) &&
- nand_flash_ids[s->chip_id].size >= 256 && /* 2Gb or more */
- (s->cmd == NAND_CMD_READ0 ||
- s->cmd == NAND_CMD_PAGEPROGRAM1)) {
- nand_command(s);
- }
- break;
- default:
- break;
- }
- }
-
- if (!s->cle && !s->ale && s->cmd == NAND_CMD_PAGEPROGRAM1) {
- if (s->iolen < (1 << s->page_shift) + (1 << s->oob_shift)) {
- for (i = s->buswidth; i--; value >>= 8) {
- s->io[s->iolen ++] = (uint8_t) (value & 0xff);
- }
- }
- } else if (!s->cle && !s->ale && s->cmd == NAND_CMD_COPYBACKPRG1) {
- if ((s->addr & ((1 << s->addr_shift) - 1)) <
- (1 << s->page_shift) + (1 << s->oob_shift)) {
- for (i = s->buswidth; i--; s->addr++, value >>= 8) {
- s->io[s->iolen + (s->addr & ((1 << s->addr_shift) - 1))] =
- (uint8_t) (value & 0xff);
- }
- }
- }
-}
-
-uint32_t nand_getio(DeviceState *dev)
-{
- int offset;
- uint32_t x = 0;
- NANDFlashState *s = NAND(dev);
-
- /* Allow sequential reading */
- if (!s->iolen && s->cmd == NAND_CMD_READ0) {
- offset = (int) (s->addr & ((1 << s->addr_shift) - 1)) + s->offset;
- s->offset = 0;
- s->iolen = nand_load_block(s, offset);
- }
-
- if (s->ce || s->iolen <= 0) {
- return 0;
- }
-
- for (offset = s->buswidth; offset--;) {
- x |= s->ioaddr[offset] << (offset << 3);
- }
- /* after receiving READ STATUS command all subsequent reads will
- * return the status register value until another command is issued
- */
- if (s->cmd != NAND_CMD_READSTATUS) {
- s->addr += s->buswidth;
- s->ioaddr += s->buswidth;
- s->iolen -= s->buswidth;
- }
- return x;
-}
-
-uint32_t nand_getbuswidth(DeviceState *dev)
-{
- NANDFlashState *s = (NANDFlashState *) dev;
- return s->buswidth << 3;
-}
-
-DeviceState *nand_init(BlockBackend *blk, int manf_id, int chip_id)
-{
- DeviceState *dev;
-
- if (nand_flash_ids[chip_id].size == 0) {
- hw_error("%s: Unsupported NAND chip ID.\n", __func__);
- }
- dev = qdev_new(TYPE_NAND);
- qdev_prop_set_uint8(dev, "manufacturer_id", manf_id);
- qdev_prop_set_uint8(dev, "chip_id", chip_id);
- if (blk) {
- qdev_prop_set_drive_err(dev, "drive", blk, &error_fatal);
- }
-
- qdev_realize(dev, NULL, &error_fatal);
- return dev;
-}
-
-type_init(nand_register_types)
-
-#else
-
-/* Program a single page */
-static void glue(nand_blk_write_, NAND_PAGE_SIZE)(NANDFlashState *s)
-{
- uint64_t off, page, sector, soff;
- uint8_t iobuf[(PAGE_SECTORS + 2) * 0x200];
- if (PAGE(s->addr) >= s->pages)
- return;
-
- if (!s->blk) {
- mem_and(s->storage + PAGE_START(s->addr) + (s->addr & PAGE_MASK) +
- s->offset, s->io, s->iolen);
- } else if (s->mem_oob) {
- sector = SECTOR(s->addr);
- off = (s->addr & PAGE_MASK) + s->offset;
- soff = SECTOR_OFFSET(s->addr);
- if (blk_pread(s->blk, sector << BDRV_SECTOR_BITS,
- PAGE_SECTORS << BDRV_SECTOR_BITS, iobuf, 0) < 0) {
- printf("%s: read error in sector %" PRIu64 "\n", __func__, sector);
- return;
- }
-
- mem_and(iobuf + (soff | off), s->io, MIN(s->iolen, NAND_PAGE_SIZE - off));
- if (off + s->iolen > NAND_PAGE_SIZE) {
- page = PAGE(s->addr);
- mem_and(s->storage + (page << OOB_SHIFT), s->io + NAND_PAGE_SIZE - off,
- MIN(OOB_SIZE, off + s->iolen - NAND_PAGE_SIZE));
- }
-
- if (blk_pwrite(s->blk, sector << BDRV_SECTOR_BITS,
- PAGE_SECTORS << BDRV_SECTOR_BITS, iobuf, 0) < 0) {
- printf("%s: write error in sector %" PRIu64 "\n", __func__, sector);
- }
- } else {
- off = PAGE_START(s->addr) + (s->addr & PAGE_MASK) + s->offset;
- sector = off >> 9;
- soff = off & 0x1ff;
- if (blk_pread(s->blk, sector << BDRV_SECTOR_BITS,
- (PAGE_SECTORS + 2) << BDRV_SECTOR_BITS, iobuf, 0) < 0) {
- printf("%s: read error in sector %" PRIu64 "\n", __func__, sector);
- return;
- }
-
- mem_and(iobuf + soff, s->io, s->iolen);
-
- if (blk_pwrite(s->blk, sector << BDRV_SECTOR_BITS,
- (PAGE_SECTORS + 2) << BDRV_SECTOR_BITS, iobuf, 0) < 0) {
- printf("%s: write error in sector %" PRIu64 "\n", __func__, sector);
- }
- }
- s->offset = 0;
-}
-
-/* Erase a single block */
-static void glue(nand_blk_erase_, NAND_PAGE_SIZE)(NANDFlashState *s)
-{
- uint64_t i, page, addr;
- uint8_t iobuf[0x200] = { [0 ... 0x1ff] = 0xff, };
- addr = s->addr & ~((1 << (ADDR_SHIFT + s->erase_shift)) - 1);
-
- if (PAGE(addr) >= s->pages) {
- return;
- }
-
- if (!s->blk) {
- memset(s->storage + PAGE_START(addr),
- 0xff, (NAND_PAGE_SIZE + OOB_SIZE) << s->erase_shift);
- } else if (s->mem_oob) {
- memset(s->storage + (PAGE(addr) << OOB_SHIFT),
- 0xff, OOB_SIZE << s->erase_shift);
- i = SECTOR(addr);
- page = SECTOR(addr + (1 << (ADDR_SHIFT + s->erase_shift)));
- for (; i < page; i ++)
- if (blk_pwrite(s->blk, i << BDRV_SECTOR_BITS,
- BDRV_SECTOR_SIZE, iobuf, 0) < 0) {
- printf("%s: write error in sector %" PRIu64 "\n", __func__, i);
- }
- } else {
- addr = PAGE_START(addr);
- page = addr >> 9;
- if (blk_pread(s->blk, page << BDRV_SECTOR_BITS,
- BDRV_SECTOR_SIZE, iobuf, 0) < 0) {
- printf("%s: read error in sector %" PRIu64 "\n", __func__, page);
- }
- memset(iobuf + (addr & 0x1ff), 0xff, (~addr & 0x1ff) + 1);
- if (blk_pwrite(s->blk, page << BDRV_SECTOR_BITS,
- BDRV_SECTOR_SIZE, iobuf, 0) < 0) {
- printf("%s: write error in sector %" PRIu64 "\n", __func__, page);
- }
-
- memset(iobuf, 0xff, 0x200);
- i = (addr & ~0x1ff) + 0x200;
- for (addr += ((NAND_PAGE_SIZE + OOB_SIZE) << s->erase_shift) - 0x200;
- i < addr; i += 0x200) {
- if (blk_pwrite(s->blk, i, BDRV_SECTOR_SIZE, iobuf, 0) < 0) {
- printf("%s: write error in sector %" PRIu64 "\n",
- __func__, i >> 9);
- }
- }
-
- page = i >> 9;
- if (blk_pread(s->blk, page << BDRV_SECTOR_BITS,
- BDRV_SECTOR_SIZE, iobuf, 0) < 0) {
- printf("%s: read error in sector %" PRIu64 "\n", __func__, page);
- }
- memset(iobuf, 0xff, ((addr - 1) & 0x1ff) + 1);
- if (blk_pwrite(s->blk, page << BDRV_SECTOR_BITS,
- BDRV_SECTOR_SIZE, iobuf, 0) < 0) {
- printf("%s: write error in sector %" PRIu64 "\n", __func__, page);
- }
- }
-}
-
-static bool glue(nand_blk_load_, NAND_PAGE_SIZE)(NANDFlashState *s,
- uint64_t addr, unsigned offset)
-{
- if (PAGE(addr) >= s->pages) {
- return false;
- }
-
- if (offset > NAND_PAGE_SIZE + OOB_SIZE) {
- return false;
- }
-
- if (s->blk) {
- if (s->mem_oob) {
- if (blk_pread(s->blk, SECTOR(addr) << BDRV_SECTOR_BITS,
- PAGE_SECTORS << BDRV_SECTOR_BITS, s->io, 0) < 0) {
- printf("%s: read error in sector %" PRIu64 "\n",
- __func__, SECTOR(addr));
- }
- memcpy(s->io + SECTOR_OFFSET(s->addr) + NAND_PAGE_SIZE,
- s->storage + (PAGE(s->addr) << OOB_SHIFT),
- OOB_SIZE);
- s->ioaddr = s->io + SECTOR_OFFSET(s->addr) + offset;
- } else {
- if (blk_pread(s->blk, PAGE_START(addr),
- (PAGE_SECTORS + 2) << BDRV_SECTOR_BITS, s->io, 0)
- < 0) {
- printf("%s: read error in sector %" PRIu64 "\n",
- __func__, PAGE_START(addr) >> 9);
- }
- s->ioaddr = s->io + (PAGE_START(addr) & 0x1ff) + offset;
- }
- } else {
- memcpy(s->io, s->storage + PAGE_START(s->addr) +
- offset, NAND_PAGE_SIZE + OOB_SIZE - offset);
- s->ioaddr = s->io;
- }
-
- return true;
-}
-
-static void glue(nand_init_, NAND_PAGE_SIZE)(NANDFlashState *s)
-{
- s->oob_shift = PAGE_SHIFT - 5;
- s->pages = s->size >> PAGE_SHIFT;
- s->addr_shift = ADDR_SHIFT;
-
- s->blk_erase = glue(nand_blk_erase_, NAND_PAGE_SIZE);
- s->blk_write = glue(nand_blk_write_, NAND_PAGE_SIZE);
- s->blk_load = glue(nand_blk_load_, NAND_PAGE_SIZE);
-}
-
-# undef NAND_PAGE_SIZE
-# undef PAGE_SHIFT
-# undef PAGE_SECTORS
-# undef ADDR_SHIFT
-#endif /* NAND_IO */
diff --git a/hw/core/loader.c b/hw/core/loader.c
index b792a54..e7056ba 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -1333,20 +1333,6 @@ void rom_set_fw(FWCfgState *f)
fw_cfg = f;
}
-void rom_set_order_override(int order)
-{
- if (!fw_cfg)
- return;
- fw_cfg_set_order_override(fw_cfg, order);
-}
-
-void rom_reset_order_override(void)
-{
- if (!fw_cfg)
- return;
- fw_cfg_reset_order_override(fw_cfg);
-}
-
void rom_transaction_begin(void)
{
Rom *rom;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index c3f3a50..e869821 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -285,24 +285,6 @@ GlobalProperty hw_compat_2_6[] = {
};
const size_t hw_compat_2_6_len = G_N_ELEMENTS(hw_compat_2_6);
-GlobalProperty hw_compat_2_5[] = {
- { "isa-fdc", "fallback", "144" },
- { "pvscsi", "x-old-pci-configuration", "on" },
- { "pvscsi", "x-disable-pcie", "on" },
- { "vmxnet3", "x-old-msi-offsets", "on" },
- { "vmxnet3", "x-disable-pcie", "on" },
-};
-const size_t hw_compat_2_5_len = G_N_ELEMENTS(hw_compat_2_5);
-
-GlobalProperty hw_compat_2_4[] = {
- { "e1000", "extra_mac_registers", "off" },
- { "virtio-pci", "x-disable-pcie", "on" },
- { "virtio-pci", "migrate-extra", "off" },
- { "fw_cfg_mem", "dma_enabled", "off" },
- { "fw_cfg_io", "dma_enabled", "off" }
-};
-const size_t hw_compat_2_4_len = G_N_ELEMENTS(hw_compat_2_4);
-
MachineState *current_machine;
static char *machine_get_kernel(Object *obj, Error **errp)
diff --git a/hw/core/meson.build b/hw/core/meson.build
index 547de65..b5a545a 100644
--- a/hw/core/meson.build
+++ b/hw/core/meson.build
@@ -26,7 +26,7 @@ system_ss.add(when: 'CONFIG_XILINX_AXI', if_true: files('stream.c'))
system_ss.add(when: 'CONFIG_PLATFORM_BUS', if_true: files('sysbus-fdt.c'))
system_ss.add(when: 'CONFIG_EIF', if_true: [files('eif.c'), zlib, libcbor, gnutls])
-libsystem_ss.add(files(
+system_ss.add(files(
'cpu-system.c',
'fw-path-provider.c',
'gpio.c',
@@ -46,7 +46,7 @@ libsystem_ss.add(files(
'vm-change-state-handler.c',
'clock-vmstate.c',
))
-libuser_ss.add(files(
+user_ss.add(files(
'cpu-user.c',
'qdev-user.c',
))
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 8e11e63..24e145d 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -145,6 +145,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name,
if (ctx != bdrv_get_aio_context(bs)) {
error_setg(errp, "Different aio context is not supported for new "
"node");
+ return;
}
blk_replace_bs(blk, bs, errp);
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index d34ce07..eb65bda 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -10,6 +10,11 @@ config SGX
bool
depends on KVM
+config TDX
+ bool
+ select X86_FW_OVMF
+ depends on KVM
+
config PC
bool
imply APPLESMC
@@ -26,6 +31,7 @@ config PC
imply QXL
imply SEV
imply SGX
+ imply TDX
imply TEST_DEVICES
imply TPM_CRB
imply TPM_TIS_ISA
diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c
index 39035db..1be9bfe 100644
--- a/hw/i386/kvm/apic.c
+++ b/hw/i386/kvm/apic.c
@@ -17,6 +17,7 @@
#include "system/hw_accel.h"
#include "system/kvm.h"
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
int reg_id, uint32_t val)
@@ -141,6 +142,10 @@ static void kvm_apic_put(CPUState *cs, run_on_cpu_data data)
struct kvm_lapic_state kapic;
int ret;
+ if (is_tdx_vm()) {
+ return;
+ }
+
kvm_put_apicbase(s->cpu, s->apicbase);
kvm_put_apic_state(s, &kapic);
diff --git a/hw/i386/meson.build b/hw/i386/meson.build
index 10bdfde..7896f34 100644
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -32,6 +32,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files(
'port92.c'))
i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'),
if_false: files('pc_sysfw_ovmf-stubs.c'))
+i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c', 'tdvf-hob.c'))
subdir('kvm')
subdir('xen')
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 7065615..b211633 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -44,6 +44,7 @@
#include "system/xen.h"
#include "system/reset.h"
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
#include "hw/xen/xen.h"
#include "qobject/qlist.h"
#include "qemu/error-report.h"
@@ -259,28 +260,6 @@ GlobalProperty pc_compat_2_6[] = {
};
const size_t pc_compat_2_6_len = G_N_ELEMENTS(pc_compat_2_6);
-GlobalProperty pc_compat_2_5[] = {};
-const size_t pc_compat_2_5_len = G_N_ELEMENTS(pc_compat_2_5);
-
-GlobalProperty pc_compat_2_4[] = {
- PC_CPU_MODEL_IDS("2.4.0")
- { "Haswell-" TYPE_X86_CPU, "abm", "off" },
- { "Haswell-noTSX-" TYPE_X86_CPU, "abm", "off" },
- { "Broadwell-" TYPE_X86_CPU, "abm", "off" },
- { "Broadwell-noTSX-" TYPE_X86_CPU, "abm", "off" },
- { "host" "-" TYPE_X86_CPU, "host-cache-info", "on" },
- { TYPE_X86_CPU, "check", "off" },
- { "qemu64" "-" TYPE_X86_CPU, "sse4a", "on" },
- { "qemu64" "-" TYPE_X86_CPU, "abm", "on" },
- { "qemu64" "-" TYPE_X86_CPU, "popcnt", "on" },
- { "qemu32" "-" TYPE_X86_CPU, "popcnt", "on" },
- { "Opteron_G2" "-" TYPE_X86_CPU, "rdtscp", "on" },
- { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "on" },
- { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "on" },
- { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "on", }
-};
-const size_t pc_compat_2_4_len = G_N_ELEMENTS(pc_compat_2_4);
-
/*
* @PC_FW_DATA:
* Size of the chunk of memory at the top of RAM for the BIOS ACPI tables
@@ -976,21 +955,23 @@ void pc_memory_init(PCMachineState *pcms,
/* Initialize PC system firmware */
pc_system_firmware_init(pcms, rom_memory);
- option_rom_mr = g_malloc(sizeof(*option_rom_mr));
- if (machine_require_guest_memfd(machine)) {
- memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
- PC_ROM_SIZE, &error_fatal);
- } else {
- memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
- &error_fatal);
- if (pcmc->pci_enabled) {
- memory_region_set_readonly(option_rom_mr, true);
+ if (!is_tdx_vm()) {
+ option_rom_mr = g_malloc(sizeof(*option_rom_mr));
+ if (machine_require_guest_memfd(machine)) {
+ memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
+ PC_ROM_SIZE, &error_fatal);
+ } else {
+ memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
+ &error_fatal);
+ if (pcmc->pci_enabled) {
+ memory_region_set_readonly(option_rom_mr, true);
+ }
}
+ memory_region_add_subregion_overlap(rom_memory,
+ PC_ROM_MIN_VGA,
+ option_rom_mr,
+ 1);
}
- memory_region_add_subregion_overlap(rom_memory,
- PC_ROM_MIN_VGA,
- option_rom_mr,
- 1);
fw_cfg = fw_cfg_arch_create(machine,
x86ms->boot_cpus, x86ms->apic_id_limit);
@@ -999,14 +980,13 @@ void pc_memory_init(PCMachineState *pcms,
if (machine->device_memory) {
uint64_t *val = g_malloc(sizeof(*val));
- uint64_t res_mem_end = machine->device_memory->base;
-
- if (!pcmc->broken_reserved_end) {
- res_mem_end += memory_region_size(&machine->device_memory->mr);
- }
+ uint64_t res_mem_end;
if (pcms->cxl_devices_state.is_enabled) {
res_mem_end = cxl_resv_end;
+ } else {
+ res_mem_end = machine->device_memory->base
+ + memory_region_size(&machine->device_memory->mr);
}
*val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB));
fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
@@ -1044,9 +1024,7 @@ uint64_t pc_pci_hole64_start(void)
hole64_start = pc_get_cxl_range_end(pcms);
} else if (pcmc->has_reserved_memory && (ms->ram_size < ms->maxram_size)) {
pc_get_device_memory_range(pcms, &hole64_start, &size);
- if (!pcmc->broken_reserved_end) {
- hole64_start += size;
- }
+ hole64_start += size;
} else {
hole64_start = pc_above_4g_end(pcms);
}
@@ -1058,7 +1036,6 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
{
DeviceState *dev = NULL;
- rom_set_order_override(FW_CFG_ORDER_OVERRIDE_VGA);
if (pci_bus) {
PCIDevice *pcidev = pci_vga_init(pci_bus);
dev = pcidev ? &pcidev->qdev : NULL;
@@ -1066,7 +1043,7 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
ISADevice *isadev = isa_vga_init(isa_bus);
dev = isadev ? DEVICE(isadev) : NULL;
}
- rom_reset_order_override();
+
return dev;
}
@@ -1256,8 +1233,6 @@ void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus)
bool default_is_ne2k = g_str_equal(mc->default_nic, TYPE_ISA_NE2000);
NICInfo *nd;
- rom_set_order_override(FW_CFG_ORDER_OVERRIDE_NIC);
-
while ((nd = qemu_find_nic_info(TYPE_ISA_NE2000, default_is_ne2k, NULL))) {
pc_init_ne2k_isa(isa_bus, nd, &error_fatal);
}
@@ -1266,8 +1241,6 @@ void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus)
if (pci_bus) {
pci_init_nic_devices(pci_bus, mc->default_nic);
}
-
- rom_reset_order_override();
}
void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs)
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 6b6359e..ea7572e 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -783,32 +783,6 @@ static void pc_i440fx_machine_2_6_options(MachineClass *m)
DEFINE_I440FX_MACHINE(2, 6);
-static void pc_i440fx_machine_2_5_options(MachineClass *m)
-{
- X86MachineClass *x86mc = X86_MACHINE_CLASS(m);
-
- pc_i440fx_machine_2_6_options(m);
- x86mc->save_tsc_khz = false;
- m->legacy_fw_cfg_order = 1;
- compat_props_add(m->compat_props, hw_compat_2_5, hw_compat_2_5_len);
- compat_props_add(m->compat_props, pc_compat_2_5, pc_compat_2_5_len);
-}
-
-DEFINE_I440FX_MACHINE(2, 5);
-
-static void pc_i440fx_machine_2_4_options(MachineClass *m)
-{
- PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
-
- pc_i440fx_machine_2_5_options(m);
- m->hw_version = "2.4.0";
- pcmc->broken_reserved_end = true;
- compat_props_add(m->compat_props, hw_compat_2_4, hw_compat_2_4_len);
- compat_props_add(m->compat_props, pc_compat_2_4, pc_compat_2_4_len);
-}
-
-DEFINE_I440FX_MACHINE(2, 4);
-
#ifdef CONFIG_ISAPC
static void isapc_machine_options(MachineClass *m)
{
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index c538b3d..33211b1 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -672,29 +672,3 @@ static void pc_q35_machine_2_6_options(MachineClass *m)
}
DEFINE_Q35_MACHINE(2, 6);
-
-static void pc_q35_machine_2_5_options(MachineClass *m)
-{
- X86MachineClass *x86mc = X86_MACHINE_CLASS(m);
-
- pc_q35_machine_2_6_options(m);
- x86mc->save_tsc_khz = false;
- m->legacy_fw_cfg_order = 1;
- compat_props_add(m->compat_props, hw_compat_2_5, hw_compat_2_5_len);
- compat_props_add(m->compat_props, pc_compat_2_5, pc_compat_2_5_len);
-}
-
-DEFINE_Q35_MACHINE(2, 5);
-
-static void pc_q35_machine_2_4_options(MachineClass *m)
-{
- PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
-
- pc_q35_machine_2_5_options(m);
- m->hw_version = "2.4.0";
- pcmc->broken_reserved_end = true;
- compat_props_add(m->compat_props, hw_compat_2_4, hw_compat_2_4_len);
- compat_props_add(m->compat_props, pc_compat_2_4, pc_compat_2_4_len);
-}
-
-DEFINE_Q35_MACHINE(2, 4);
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index 1eeb58a..821396c 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -37,6 +37,7 @@
#include "hw/block/flash.h"
#include "system/kvm.h"
#include "target/i386/sev.h"
+#include "kvm/tdx.h"
#define FLASH_SECTOR_SIZE 4096
@@ -280,5 +281,11 @@ void x86_firmware_configure(hwaddr gpa, void *ptr, int size)
}
sev_encrypt_flash(gpa, ptr, size, &error_fatal);
+ } else if (is_tdx_vm()) {
+ ret = tdx_parse_tdvf(ptr, size);
+ if (ret) {
+ error_report("failed to parse TDVF for TDX VM");
+ exit(1);
+ }
}
}
diff --git a/hw/i386/tdvf-hob.c b/hw/i386/tdvf-hob.c
new file mode 100644
index 0000000..782b3d1
--- /dev/null
+++ b/hw/i386/tdvf-hob.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2025 Intel Corporation
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ * <isaku.yamahata at intel.com>
+ * Xiaoyao Li <xiaoyao.li@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "standard-headers/uefi/uefi.h"
+#include "hw/pci/pcie_host.h"
+#include "tdvf-hob.h"
+
+typedef struct TdvfHob {
+ hwaddr hob_addr;
+ void *ptr;
+ int size;
+
+ /* working area */
+ void *current;
+ void *end;
+} TdvfHob;
+
+static uint64_t tdvf_current_guest_addr(const TdvfHob *hob)
+{
+ return hob->hob_addr + (hob->current - hob->ptr);
+}
+
+static void tdvf_align(TdvfHob *hob, size_t align)
+{
+ hob->current = QEMU_ALIGN_PTR_UP(hob->current, align);
+}
+
+static void *tdvf_get_area(TdvfHob *hob, uint64_t size)
+{
+ void *ret;
+
+ if (hob->current + size > hob->end) {
+ error_report("TD_HOB overrun, size = 0x%" PRIx64, size);
+ exit(1);
+ }
+
+ ret = hob->current;
+ hob->current += size;
+ tdvf_align(hob, 8);
+ return ret;
+}
+
+static void tdvf_hob_add_memory_resources(TdxGuest *tdx, TdvfHob *hob)
+{
+ EFI_HOB_RESOURCE_DESCRIPTOR *region;
+ EFI_RESOURCE_ATTRIBUTE_TYPE attr;
+ EFI_RESOURCE_TYPE resource_type;
+
+ TdxRamEntry *e;
+ int i;
+
+ for (i = 0; i < tdx->nr_ram_entries; i++) {
+ e = &tdx->ram_entries[i];
+
+ if (e->type == TDX_RAM_UNACCEPTED) {
+ resource_type = EFI_RESOURCE_MEMORY_UNACCEPTED;
+ attr = EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED;
+ } else if (e->type == TDX_RAM_ADDED) {
+ resource_type = EFI_RESOURCE_SYSTEM_MEMORY;
+ attr = EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE;
+ } else {
+ error_report("unknown TDX_RAM_ENTRY type %d", e->type);
+ exit(1);
+ }
+
+ region = tdvf_get_area(hob, sizeof(*region));
+ *region = (EFI_HOB_RESOURCE_DESCRIPTOR) {
+ .Header = {
+ .HobType = EFI_HOB_TYPE_RESOURCE_DESCRIPTOR,
+ .HobLength = cpu_to_le16(sizeof(*region)),
+ .Reserved = cpu_to_le32(0),
+ },
+ .Owner = EFI_HOB_OWNER_ZERO,
+ .ResourceType = cpu_to_le32(resource_type),
+ .ResourceAttribute = cpu_to_le32(attr),
+ .PhysicalStart = cpu_to_le64(e->address),
+ .ResourceLength = cpu_to_le64(e->length),
+ };
+ }
+}
+
+void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob)
+{
+ TdvfHob hob = {
+ .hob_addr = td_hob->address,
+ .size = td_hob->size,
+ .ptr = td_hob->mem_ptr,
+
+ .current = td_hob->mem_ptr,
+ .end = td_hob->mem_ptr + td_hob->size,
+ };
+
+ EFI_HOB_GENERIC_HEADER *last_hob;
+ EFI_HOB_HANDOFF_INFO_TABLE *hit;
+
+ /* Note, Efi{Free}Memory{Bottom,Top} are ignored, leave 'em zeroed. */
+ hit = tdvf_get_area(&hob, sizeof(*hit));
+ *hit = (EFI_HOB_HANDOFF_INFO_TABLE) {
+ .Header = {
+ .HobType = EFI_HOB_TYPE_HANDOFF,
+ .HobLength = cpu_to_le16(sizeof(*hit)),
+ .Reserved = cpu_to_le32(0),
+ },
+ .Version = cpu_to_le32(EFI_HOB_HANDOFF_TABLE_VERSION),
+ .BootMode = cpu_to_le32(0),
+ .EfiMemoryTop = cpu_to_le64(0),
+ .EfiMemoryBottom = cpu_to_le64(0),
+ .EfiFreeMemoryTop = cpu_to_le64(0),
+ .EfiFreeMemoryBottom = cpu_to_le64(0),
+ .EfiEndOfHobList = cpu_to_le64(0), /* initialized later */
+ };
+
+ tdvf_hob_add_memory_resources(tdx, &hob);
+
+ last_hob = tdvf_get_area(&hob, sizeof(*last_hob));
+ *last_hob = (EFI_HOB_GENERIC_HEADER) {
+ .HobType = EFI_HOB_TYPE_END_OF_HOB_LIST,
+ .HobLength = cpu_to_le16(sizeof(*last_hob)),
+ .Reserved = cpu_to_le32(0),
+ };
+ hit->EfiEndOfHobList = tdvf_current_guest_addr(&hob);
+}
diff --git a/hw/i386/tdvf-hob.h b/hw/i386/tdvf-hob.h
new file mode 100644
index 0000000..4fc6a37
--- /dev/null
+++ b/hw/i386/tdvf-hob.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef HW_I386_TD_HOB_H
+#define HW_I386_TD_HOB_H
+
+#include "hw/i386/tdvf.h"
+#include "target/i386/kvm/tdx.h"
+
+void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob);
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE \
+ (EFI_RESOURCE_ATTRIBUTE_PRESENT | \
+ EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
+ EFI_RESOURCE_ATTRIBUTE_TESTED)
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED \
+ (EFI_RESOURCE_ATTRIBUTE_PRESENT | \
+ EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
+ EFI_RESOURCE_ATTRIBUTE_TESTED)
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_MMIO \
+ (EFI_RESOURCE_ATTRIBUTE_PRESENT | \
+ EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
+ EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE)
+
+#endif
diff --git a/hw/i386/tdvf.c b/hw/i386/tdvf.c
new file mode 100644
index 0000000..645d9d1
--- /dev/null
+++ b/hw/i386/tdvf.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2025 Intel Corporation
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ * <isaku.yamahata at intel.com>
+ * Xiaoyao Li <xiaoyao.li@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+
+#include "hw/i386/pc.h"
+#include "hw/i386/tdvf.h"
+#include "system/kvm.h"
+
+#define TDX_METADATA_OFFSET_GUID "e47a6535-984a-4798-865e-4685a7bf8ec2"
+#define TDX_METADATA_VERSION 1
+#define TDVF_SIGNATURE 0x46564454 /* TDVF as little endian */
+#define TDVF_ALIGNMENT 4096
+
+/*
+ * the raw structs read from TDVF keeps the name convention in
+ * TDVF Design Guide spec.
+ */
+typedef struct {
+ uint32_t DataOffset;
+ uint32_t RawDataSize;
+ uint64_t MemoryAddress;
+ uint64_t MemoryDataSize;
+ uint32_t Type;
+ uint32_t Attributes;
+} TdvfSectionEntry;
+
+typedef struct {
+ uint32_t Signature;
+ uint32_t Length;
+ uint32_t Version;
+ uint32_t NumberOfSectionEntries;
+ TdvfSectionEntry SectionEntries[];
+} TdvfMetadata;
+
+struct tdx_metadata_offset {
+ uint32_t offset;
+};
+
+static TdvfMetadata *tdvf_get_metadata(void *flash_ptr, int size)
+{
+ TdvfMetadata *metadata;
+ uint32_t offset = 0;
+ uint8_t *data;
+
+ if ((uint32_t) size != size) {
+ return NULL;
+ }
+
+ if (pc_system_ovmf_table_find(TDX_METADATA_OFFSET_GUID, &data, NULL)) {
+ offset = size - le32_to_cpu(((struct tdx_metadata_offset *)data)->offset);
+
+ if (offset + sizeof(*metadata) > size) {
+ return NULL;
+ }
+ } else {
+ error_report("Cannot find TDX_METADATA_OFFSET_GUID");
+ return NULL;
+ }
+
+ metadata = flash_ptr + offset;
+
+ /* Finally, verify the signature to determine if this is a TDVF image. */
+ metadata->Signature = le32_to_cpu(metadata->Signature);
+ if (metadata->Signature != TDVF_SIGNATURE) {
+ error_report("Invalid TDVF signature in metadata!");
+ return NULL;
+ }
+
+ /* Sanity check that the TDVF doesn't overlap its own metadata. */
+ metadata->Length = le32_to_cpu(metadata->Length);
+ if (offset + metadata->Length > size) {
+ return NULL;
+ }
+
+ /* Only version 1 is supported/defined. */
+ metadata->Version = le32_to_cpu(metadata->Version);
+ if (metadata->Version != TDX_METADATA_VERSION) {
+ return NULL;
+ }
+
+ return metadata;
+}
+
+static int tdvf_parse_and_check_section_entry(const TdvfSectionEntry *src,
+ TdxFirmwareEntry *entry)
+{
+ entry->data_offset = le32_to_cpu(src->DataOffset);
+ entry->data_len = le32_to_cpu(src->RawDataSize);
+ entry->address = le64_to_cpu(src->MemoryAddress);
+ entry->size = le64_to_cpu(src->MemoryDataSize);
+ entry->type = le32_to_cpu(src->Type);
+ entry->attributes = le32_to_cpu(src->Attributes);
+
+ /* sanity check */
+ if (entry->size < entry->data_len) {
+ error_report("Broken metadata RawDataSize 0x%x MemoryDataSize 0x%"PRIx64,
+ entry->data_len, entry->size);
+ return -1;
+ }
+ if (!QEMU_IS_ALIGNED(entry->address, TDVF_ALIGNMENT)) {
+ error_report("MemoryAddress 0x%"PRIx64" not page aligned", entry->address);
+ return -1;
+ }
+ if (!QEMU_IS_ALIGNED(entry->size, TDVF_ALIGNMENT)) {
+ error_report("MemoryDataSize 0x%"PRIx64" not page aligned", entry->size);
+ return -1;
+ }
+
+ switch (entry->type) {
+ case TDVF_SECTION_TYPE_BFV:
+ case TDVF_SECTION_TYPE_CFV:
+ /* The sections that must be copied from firmware image to TD memory */
+ if (entry->data_len == 0) {
+ error_report("%d section with RawDataSize == 0", entry->type);
+ return -1;
+ }
+ break;
+ case TDVF_SECTION_TYPE_TD_HOB:
+ case TDVF_SECTION_TYPE_TEMP_MEM:
+ /* The sections that no need to be copied from firmware image */
+ if (entry->data_len != 0) {
+ error_report("%d section with RawDataSize 0x%x != 0",
+ entry->type, entry->data_len);
+ return -1;
+ }
+ break;
+ default:
+ error_report("TDVF contains unsupported section type %d", entry->type);
+ return -1;
+ }
+
+ return 0;
+}
+
+int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size)
+{
+ g_autofree TdvfSectionEntry *sections = NULL;
+ TdvfMetadata *metadata;
+ ssize_t entries_size;
+ int i;
+
+ metadata = tdvf_get_metadata(flash_ptr, size);
+ if (!metadata) {
+ return -EINVAL;
+ }
+
+ /* load and parse metadata entries */
+ fw->nr_entries = le32_to_cpu(metadata->NumberOfSectionEntries);
+ if (fw->nr_entries < 2) {
+ error_report("Invalid number of fw entries (%u) in TDVF Metadata",
+ fw->nr_entries);
+ return -EINVAL;
+ }
+
+ entries_size = fw->nr_entries * sizeof(TdvfSectionEntry);
+ if (metadata->Length != sizeof(*metadata) + entries_size) {
+ error_report("TDVF metadata len (0x%x) mismatch, expected (0x%x)",
+ metadata->Length,
+ (uint32_t)(sizeof(*metadata) + entries_size));
+ return -EINVAL;
+ }
+
+ fw->entries = g_new(TdxFirmwareEntry, fw->nr_entries);
+ sections = g_new(TdvfSectionEntry, fw->nr_entries);
+
+ memcpy(sections, (void *)metadata + sizeof(*metadata), entries_size);
+
+ for (i = 0; i < fw->nr_entries; i++) {
+ if (tdvf_parse_and_check_section_entry(&sections[i], &fw->entries[i])) {
+ goto err;
+ }
+ }
+
+ fw->mem_ptr = flash_ptr;
+ return 0;
+
+err:
+ fw->entries = 0;
+ g_free(fw->entries);
+ return -EINVAL;
+}
diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c
index 1b0671c..b1b5f11 100644
--- a/hw/i386/x86-common.c
+++ b/hw/i386/x86-common.c
@@ -44,6 +44,7 @@
#include "standard-headers/asm-x86/bootparam.h"
#include CONFIG_DEVICES
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
#ifdef CONFIG_XEN_EMU
#include "hw/xen/xen.h"
@@ -1035,11 +1036,14 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
if (machine_require_guest_memfd(MACHINE(x86ms))) {
memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios",
bios_size, &error_fatal);
+ if (is_tdx_vm()) {
+ tdx_set_tdvf_region(&x86ms->bios);
+ }
} else {
memory_region_init_ram(&x86ms->bios, NULL, "pc.bios",
bios_size, &error_fatal);
}
- if (sev_enabled()) {
+ if (sev_enabled() || is_tdx_vm()) {
/*
* The concept of a "reset" simply doesn't exist for
* confidential computing guests, we have to destroy and
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index e2d0409..f80533d 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -382,7 +382,6 @@ static void x86_machine_class_init(ObjectClass *oc, const void *data)
mc->get_default_cpu_node_id = x86_get_default_cpu_node_id;
mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids;
mc->kvm_type = x86_kvm_type;
- x86mc->save_tsc_khz = true;
x86mc->fwcfg_dma_enabled = true;
nc->nmi_monitor_handler = x86_nmi;
diff --git a/hw/microblaze/petalogix_ml605_mmu.c b/hw/microblaze/petalogix_ml605_mmu.c
index bea6b68..6e923c4 100644
--- a/hw/microblaze/petalogix_ml605_mmu.c
+++ b/hw/microblaze/petalogix_ml605_mmu.c
@@ -80,8 +80,6 @@ petalogix_ml605_init(MachineState *machine)
MemoryRegion *phys_lmb_bram = g_new(MemoryRegion, 1);
MemoryRegion *phys_ram = g_new(MemoryRegion, 1);
qemu_irq irq[32];
- EndianMode endianness = TARGET_BIG_ENDIAN ? ENDIAN_MODE_BIG
- : ENDIAN_MODE_LITTLE;
/* init CPUs */
cpu = MICROBLAZE_CPU(object_new(TYPE_MICROBLAZE_CPU));
@@ -113,7 +111,7 @@ petalogix_ml605_init(MachineState *machine)
dev = qdev_new("xlnx.xps-intc");
- qdev_prop_set_enum(dev, "endianness", endianness);
+ qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
qdev_prop_set_uint32(dev, "kind-of-intr", 1 << TIMER_IRQ);
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, INTC_BASEADDR);
@@ -129,7 +127,7 @@ petalogix_ml605_init(MachineState *machine)
/* 2 timers at irq 2 @ 100 Mhz. */
dev = qdev_new("xlnx.xps-timer");
- qdev_prop_set_enum(dev, "endianness", endianness);
+ qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
qdev_prop_set_uint32(dev, "one-timer-only", 0);
qdev_prop_set_uint32(dev, "clock-frequency", 100 * 1000000);
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
@@ -177,7 +175,7 @@ petalogix_ml605_init(MachineState *machine)
SSIBus *spi;
dev = qdev_new("xlnx.xps-spi");
- qdev_prop_set_enum(dev, "endianness", endianness);
+ qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
qdev_prop_set_uint8(dev, "num-ss-bits", NUM_SPI_FLASHES);
busdev = SYS_BUS_DEVICE(dev);
sysbus_realize_and_unref(busdev, &error_fatal);
@@ -218,12 +216,7 @@ petalogix_ml605_init(MachineState *machine)
static void petalogix_ml605_machine_init(MachineClass *mc)
{
- if (TARGET_BIG_ENDIAN) {
- mc->desc = "PetaLogix linux refdesign for xilinx ml605 (big endian)";
- mc->deprecation_reason = "big endian support is not tested";
- } else {
- mc->desc = "PetaLogix linux refdesign for xilinx ml605 (little endian)";
- }
+ mc->desc = "PetaLogix linux refdesign for xilinx ml605 (little endian)";
mc->init = petalogix_ml605_init;
}
diff --git a/hw/microblaze/petalogix_s3adsp1800_mmu.c b/hw/microblaze/petalogix_s3adsp1800_mmu.c
index 032f6f7..e8d0ddf 100644
--- a/hw/microblaze/petalogix_s3adsp1800_mmu.c
+++ b/hw/microblaze/petalogix_s3adsp1800_mmu.c
@@ -58,9 +58,20 @@
#define TYPE_PETALOGIX_S3ADSP1800_MACHINE \
MACHINE_TYPE_NAME("petalogix-s3adsp1800")
+struct S3Adsp1800MachineState {
+ MachineState parent_class;
+
+ EndianMode endianness;
+};
+
+OBJECT_DECLARE_TYPE(S3Adsp1800MachineState, MachineClass,
+ PETALOGIX_S3ADSP1800_MACHINE)
+
+
static void
petalogix_s3adsp1800_init(MachineState *machine)
{
+ S3Adsp1800MachineState *psms = PETALOGIX_S3ADSP1800_MACHINE(machine);
ram_addr_t ram_size = machine->ram_size;
DeviceState *dev;
MicroBlazeCPU *cpu;
@@ -71,13 +82,12 @@ petalogix_s3adsp1800_init(MachineState *machine)
MemoryRegion *phys_ram = g_new(MemoryRegion, 1);
qemu_irq irq[32];
MemoryRegion *sysmem = get_system_memory();
- EndianMode endianness = TARGET_BIG_ENDIAN ? ENDIAN_MODE_BIG
- : ENDIAN_MODE_LITTLE;
+ EndianMode endianness = psms->endianness;
cpu = MICROBLAZE_CPU(object_new(TYPE_MICROBLAZE_CPU));
object_property_set_str(OBJECT(cpu), "version", "7.10.d", &error_abort);
object_property_set_bool(OBJECT(cpu), "little-endian",
- !TARGET_BIG_ENDIAN, &error_abort);
+ endianness == ENDIAN_MODE_LITTLE, &error_abort);
qdev_realize(DEVICE(cpu), NULL, &error_abort);
/* Attach emulated BRAM through the LMB. */
@@ -135,20 +145,41 @@ petalogix_s3adsp1800_init(MachineState *machine)
create_unimplemented_device("xps_gpio", GPIO_BASEADDR, 0x10000);
- microblaze_load_kernel(cpu, !TARGET_BIG_ENDIAN, ddr_base, ram_size,
- machine->initrd_filename,
+ microblaze_load_kernel(cpu, endianness == ENDIAN_MODE_LITTLE, ddr_base,
+ ram_size, machine->initrd_filename,
BINARY_DEVICE_TREE_FILE,
NULL);
}
+static int machine_get_endianness(Object *obj, Error **errp G_GNUC_UNUSED)
+{
+ S3Adsp1800MachineState *ms = PETALOGIX_S3ADSP1800_MACHINE(obj);
+ return ms->endianness;
+}
+
+static void machine_set_endianness(Object *obj, int endianness, Error **errp)
+{
+ S3Adsp1800MachineState *ms = PETALOGIX_S3ADSP1800_MACHINE(obj);
+ ms->endianness = endianness;
+}
+
static void petalogix_s3adsp1800_machine_class_init(ObjectClass *oc,
const void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
+ ObjectProperty *prop;
mc->desc = "PetaLogix linux refdesign for xilinx Spartan 3ADSP1800";
mc->init = petalogix_s3adsp1800_init;
mc->is_default = true;
+
+ prop = object_class_property_add_enum(oc, "endianness", "EndianMode",
+ &EndianMode_lookup,
+ machine_get_endianness,
+ machine_set_endianness);
+ object_property_set_default_str(prop, TARGET_BIG_ENDIAN ? "big" : "little");
+ object_class_property_set_description(oc, "endianness",
+ "Defines whether the machine runs in big or little endian mode");
}
static const TypeInfo petalogix_s3adsp1800_machine_types[] = {
@@ -156,6 +187,7 @@ static const TypeInfo petalogix_s3adsp1800_machine_types[] = {
.name = TYPE_PETALOGIX_S3ADSP1800_MACHINE,
.parent = TYPE_MACHINE,
.class_init = petalogix_s3adsp1800_machine_class_init,
+ .instance_size = sizeof(S3Adsp1800MachineState),
},
};
diff --git a/hw/microblaze/xlnx-zynqmp-pmu.c b/hw/microblaze/xlnx-zynqmp-pmu.c
index ed40b5f..e909802 100644
--- a/hw/microblaze/xlnx-zynqmp-pmu.c
+++ b/hw/microblaze/xlnx-zynqmp-pmu.c
@@ -181,12 +181,7 @@ static void xlnx_zynqmp_pmu_init(MachineState *machine)
static void xlnx_zynqmp_pmu_machine_init(MachineClass *mc)
{
- if (TARGET_BIG_ENDIAN) {
- mc->desc = "Xilinx ZynqMP PMU machine (big endian)";
- mc->deprecation_reason = "big endian support is not tested";
- } else {
- mc->desc = "Xilinx ZynqMP PMU machine (little endian)";
- }
+ mc->desc = "Xilinx ZynqMP PMU machine (little endian)";
mc->init = xlnx_zynqmp_pmu_init;
}
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index cba4999..a80a7b0 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -127,10 +127,8 @@ struct E1000State_st {
QEMUTimer *flush_queue_timer;
/* Compatibility flags for migration to/from qemu 1.3.0 and older */
-#define E1000_FLAG_MAC_BIT 2
#define E1000_FLAG_TSO_BIT 3
#define E1000_FLAG_VET_BIT 4
-#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
#define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
#define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
@@ -1212,52 +1210,51 @@ enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
-#define markflag(x) ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
/* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
* f - flag bits (up to 6 possible flags)
* n - flag needed
- * p - partially implenented */
+ * p - partially implemented */
static const uint8_t mac_reg_access[0x8000] = {
- [IPAV] = markflag(MAC), [WUC] = markflag(MAC),
- [IP6AT] = markflag(MAC), [IP4AT] = markflag(MAC),
- [FFVT] = markflag(MAC), [WUPM] = markflag(MAC),
- [ECOL] = markflag(MAC), [MCC] = markflag(MAC),
- [DC] = markflag(MAC), [TNCRS] = markflag(MAC),
- [RLEC] = markflag(MAC), [XONRXC] = markflag(MAC),
- [XOFFTXC] = markflag(MAC), [RFC] = markflag(MAC),
- [TSCTFC] = markflag(MAC), [MGTPRC] = markflag(MAC),
- [WUS] = markflag(MAC), [AIT] = markflag(MAC),
- [FFLT] = markflag(MAC), [FFMT] = markflag(MAC),
- [SCC] = markflag(MAC), [FCRUC] = markflag(MAC),
- [LATECOL] = markflag(MAC), [COLC] = markflag(MAC),
- [SEQEC] = markflag(MAC), [CEXTERR] = markflag(MAC),
- [XONTXC] = markflag(MAC), [XOFFRXC] = markflag(MAC),
- [RJC] = markflag(MAC), [RNBC] = markflag(MAC),
- [MGTPDC] = markflag(MAC), [MGTPTC] = markflag(MAC),
- [RUC] = markflag(MAC), [ROC] = markflag(MAC),
- [GORCL] = markflag(MAC), [GORCH] = markflag(MAC),
- [GOTCL] = markflag(MAC), [GOTCH] = markflag(MAC),
- [BPRC] = markflag(MAC), [MPRC] = markflag(MAC),
- [TSCTC] = markflag(MAC), [PRC64] = markflag(MAC),
- [PRC127] = markflag(MAC), [PRC255] = markflag(MAC),
- [PRC511] = markflag(MAC), [PRC1023] = markflag(MAC),
- [PRC1522] = markflag(MAC), [PTC64] = markflag(MAC),
- [PTC127] = markflag(MAC), [PTC255] = markflag(MAC),
- [PTC511] = markflag(MAC), [PTC1023] = markflag(MAC),
- [PTC1522] = markflag(MAC), [MPTC] = markflag(MAC),
- [BPTC] = markflag(MAC),
-
- [TDFH] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [TDFT] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFH] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFT] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [PBM] = markflag(MAC) | MAC_ACCESS_PARTIAL,
+ [IPAV] = MAC_ACCESS_FLAG_NEEDED, [WUC] = MAC_ACCESS_FLAG_NEEDED,
+ [IP6AT] = MAC_ACCESS_FLAG_NEEDED, [IP4AT] = MAC_ACCESS_FLAG_NEEDED,
+ [FFVT] = MAC_ACCESS_FLAG_NEEDED, [WUPM] = MAC_ACCESS_FLAG_NEEDED,
+ [ECOL] = MAC_ACCESS_FLAG_NEEDED, [MCC] = MAC_ACCESS_FLAG_NEEDED,
+ [DC] = MAC_ACCESS_FLAG_NEEDED, [TNCRS] = MAC_ACCESS_FLAG_NEEDED,
+ [RLEC] = MAC_ACCESS_FLAG_NEEDED, [XONRXC] = MAC_ACCESS_FLAG_NEEDED,
+ [XOFFTXC] = MAC_ACCESS_FLAG_NEEDED, [RFC] = MAC_ACCESS_FLAG_NEEDED,
+ [TSCTFC] = MAC_ACCESS_FLAG_NEEDED, [MGTPRC] = MAC_ACCESS_FLAG_NEEDED,
+ [WUS] = MAC_ACCESS_FLAG_NEEDED, [AIT] = MAC_ACCESS_FLAG_NEEDED,
+ [FFLT] = MAC_ACCESS_FLAG_NEEDED, [FFMT] = MAC_ACCESS_FLAG_NEEDED,
+ [SCC] = MAC_ACCESS_FLAG_NEEDED, [FCRUC] = MAC_ACCESS_FLAG_NEEDED,
+ [LATECOL] = MAC_ACCESS_FLAG_NEEDED, [COLC] = MAC_ACCESS_FLAG_NEEDED,
+ [SEQEC] = MAC_ACCESS_FLAG_NEEDED, [CEXTERR] = MAC_ACCESS_FLAG_NEEDED,
+ [XONTXC] = MAC_ACCESS_FLAG_NEEDED, [XOFFRXC] = MAC_ACCESS_FLAG_NEEDED,
+ [RJC] = MAC_ACCESS_FLAG_NEEDED, [RNBC] = MAC_ACCESS_FLAG_NEEDED,
+ [MGTPDC] = MAC_ACCESS_FLAG_NEEDED, [MGTPTC] = MAC_ACCESS_FLAG_NEEDED,
+ [RUC] = MAC_ACCESS_FLAG_NEEDED, [ROC] = MAC_ACCESS_FLAG_NEEDED,
+ [GORCL] = MAC_ACCESS_FLAG_NEEDED, [GORCH] = MAC_ACCESS_FLAG_NEEDED,
+ [GOTCL] = MAC_ACCESS_FLAG_NEEDED, [GOTCH] = MAC_ACCESS_FLAG_NEEDED,
+ [BPRC] = MAC_ACCESS_FLAG_NEEDED, [MPRC] = MAC_ACCESS_FLAG_NEEDED,
+ [TSCTC] = MAC_ACCESS_FLAG_NEEDED, [PRC64] = MAC_ACCESS_FLAG_NEEDED,
+ [PRC127] = MAC_ACCESS_FLAG_NEEDED, [PRC255] = MAC_ACCESS_FLAG_NEEDED,
+ [PRC511] = MAC_ACCESS_FLAG_NEEDED, [PRC1023] = MAC_ACCESS_FLAG_NEEDED,
+ [PRC1522] = MAC_ACCESS_FLAG_NEEDED, [PTC64] = MAC_ACCESS_FLAG_NEEDED,
+ [PTC127] = MAC_ACCESS_FLAG_NEEDED, [PTC255] = MAC_ACCESS_FLAG_NEEDED,
+ [PTC511] = MAC_ACCESS_FLAG_NEEDED, [PTC1023] = MAC_ACCESS_FLAG_NEEDED,
+ [PTC1522] = MAC_ACCESS_FLAG_NEEDED, [MPTC] = MAC_ACCESS_FLAG_NEEDED,
+ [BPTC] = MAC_ACCESS_FLAG_NEEDED,
+
+ [TDFH] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [TDFT] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [TDFHS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [TDFTS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [TDFPC] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFH] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFT] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFHS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFTS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFPC] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [PBM] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
};
static void
@@ -1419,13 +1416,6 @@ static int e1000_tx_tso_post_load(void *opaque, int version_id)
return 0;
}
-static bool e1000_full_mac_needed(void *opaque)
-{
- E1000State *s = opaque;
-
- return chkflag(MAC);
-}
-
static bool e1000_tso_state_needed(void *opaque)
{
E1000State *s = opaque;
@@ -1451,7 +1441,6 @@ static const VMStateDescription vmstate_e1000_full_mac_state = {
.name = "e1000/full_mac_state",
.version_id = 1,
.minimum_version_id = 1,
- .needed = e1000_full_mac_needed,
.fields = (const VMStateField[]) {
VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
VMSTATE_END_OF_LIST()
@@ -1679,8 +1668,6 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
static const Property e1000_properties[] = {
DEFINE_NIC_PROPERTIES(E1000State, conf),
- DEFINE_PROP_BIT("extra_mac_registers", E1000State,
- compat_flags, E1000_FLAG_MAC_BIT, true),
DEFINE_PROP_BIT("migrate_tso_props", E1000State,
compat_flags, E1000_FLAG_TSO_BIT, true),
DEFINE_PROP_BIT("init-vet", E1000State,
diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h
index 6e0962f..ae06c1c 100644
--- a/hw/net/rocker/rocker.h
+++ b/hw/net/rocker/rocker.h
@@ -36,15 +36,7 @@ static inline G_GNUC_PRINTF(1, 2) int DPRINTF(const char *fmt, ...)
}
#endif
-#define __le16 uint16_t
-#define __le32 uint32_t
-#define __le64 uint64_t
-
-#define __be16 uint16_t
-#define __be32 uint32_t
-#define __be64 uint64_t
-
-static inline bool ipv4_addr_is_multicast(__be32 addr)
+static inline bool ipv4_addr_is_multicast(uint32_t addr)
{
return (addr & htonl(0xf0000000)) == htonl(0xe0000000);
}
@@ -52,8 +44,8 @@ static inline bool ipv4_addr_is_multicast(__be32 addr)
typedef struct ipv6_addr {
union {
uint8_t addr8[16];
- __be16 addr16[8];
- __be32 addr32[4];
+ uint16_t addr16[8];
+ uint32_t addr32[4];
};
} Ipv6Addr;
diff --git a/hw/net/rocker/rocker_hw.h b/hw/net/rocker/rocker_hw.h
index 1786323..7ec6bfb 100644
--- a/hw/net/rocker/rocker_hw.h
+++ b/hw/net/rocker/rocker_hw.h
@@ -9,10 +9,6 @@
#ifndef ROCKER_HW_H
#define ROCKER_HW_H
-#define __le16 uint16_t
-#define __le32 uint32_t
-#define __le64 uint64_t
-
/*
* Return codes
*/
@@ -124,12 +120,12 @@ enum {
*/
typedef struct rocker_desc {
- __le64 buf_addr;
+ uint64_t buf_addr;
uint64_t cookie;
- __le16 buf_size;
- __le16 tlv_size;
- __le16 rsvd[5]; /* pad to 32 bytes */
- __le16 comp_err;
+ uint16_t buf_size;
+ uint16_t tlv_size;
+ uint16_t rsvd[5]; /* pad to 32 bytes */
+ uint16_t comp_err;
} __attribute__((packed, aligned(8))) RockerDesc;
/*
@@ -137,9 +133,9 @@ typedef struct rocker_desc {
*/
typedef struct rocker_tlv {
- __le32 type;
- __le16 len;
- __le16 rsvd;
+ uint32_t type;
+ uint16_t len;
+ uint16_t rsvd;
} __attribute__((packed, aligned(8))) RockerTlv;
/* cmd msg */
diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c
index 3378f63..4aed178 100644
--- a/hw/net/rocker/rocker_of_dpa.c
+++ b/hw/net/rocker/rocker_of_dpa.c
@@ -52,10 +52,10 @@ typedef struct of_dpa_flow_key {
uint32_t tunnel_id; /* overlay tunnel id */
uint32_t tbl_id; /* table id */
struct {
- __be16 vlan_id; /* 0 if no VLAN */
+ uint16_t vlan_id; /* 0 if no VLAN */
MACAddr src; /* ethernet source address */
MACAddr dst; /* ethernet destination address */
- __be16 type; /* ethernet frame type */
+ uint16_t type; /* ethernet frame type */
} eth;
struct {
uint8_t proto; /* IP protocol or ARP opcode */
@@ -66,14 +66,14 @@ typedef struct of_dpa_flow_key {
union {
struct {
struct {
- __be32 src; /* IP source address */
- __be32 dst; /* IP destination address */
+ uint32_t src; /* IP source address */
+ uint32_t dst; /* IP destination address */
} addr;
union {
struct {
- __be16 src; /* TCP/UDP/SCTP source port */
- __be16 dst; /* TCP/UDP/SCTP destination port */
- __be16 flags; /* TCP flags */
+ uint16_t src; /* TCP/UDP/SCTP source port */
+ uint16_t dst; /* TCP/UDP/SCTP destination port */
+ uint16_t flags; /* TCP flags */
} tp;
struct {
MACAddr sha; /* ARP source hardware address */
@@ -86,11 +86,11 @@ typedef struct of_dpa_flow_key {
Ipv6Addr src; /* IPv6 source address */
Ipv6Addr dst; /* IPv6 destination address */
} addr;
- __be32 label; /* IPv6 flow label */
+ uint32_t label; /* IPv6 flow label */
struct {
- __be16 src; /* TCP/UDP/SCTP source port */
- __be16 dst; /* TCP/UDP/SCTP destination port */
- __be16 flags; /* TCP flags */
+ uint16_t src; /* TCP/UDP/SCTP source port */
+ uint16_t dst; /* TCP/UDP/SCTP destination port */
+ uint16_t flags; /* TCP flags */
} tp;
struct {
Ipv6Addr target; /* ND target address */
@@ -112,13 +112,13 @@ typedef struct of_dpa_flow_action {
struct {
uint32_t group_id;
uint32_t tun_log_lport;
- __be16 vlan_id;
+ uint16_t vlan_id;
} write;
struct {
- __be16 new_vlan_id;
+ uint16_t new_vlan_id;
uint32_t out_pport;
uint8_t copy_to_cpu;
- __be16 vlan_id;
+ uint16_t vlan_id;
} apply;
} OfDpaFlowAction;
@@ -143,7 +143,7 @@ typedef struct of_dpa_flow {
typedef struct of_dpa_flow_pkt_fields {
uint32_t tunnel_id;
struct eth_header *ethhdr;
- __be16 *h_proto;
+ uint16_t *h_proto;
struct vlan_header *vlanhdr;
struct ip_header *ipv4hdr;
struct ip6_header *ipv6hdr;
@@ -180,7 +180,7 @@ typedef struct of_dpa_group {
uint32_t group_id;
MACAddr src_mac;
MACAddr dst_mac;
- __be16 vlan_id;
+ uint16_t vlan_id;
} l2_rewrite;
struct {
uint16_t group_count;
@@ -190,13 +190,13 @@ typedef struct of_dpa_group {
uint32_t group_id;
MACAddr src_mac;
MACAddr dst_mac;
- __be16 vlan_id;
+ uint16_t vlan_id;
uint8_t ttl_check;
} l3_unicast;
};
} OfDpaGroup;
-static int of_dpa_mask2prefix(__be32 mask)
+static int of_dpa_mask2prefix(uint32_t mask)
{
int i;
int count = 32;
@@ -451,7 +451,7 @@ static void of_dpa_flow_pkt_parse(OfDpaFlowContext *fc,
fc->iovcnt = iovcnt + 2;
}
-static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, __be16 vlan_id)
+static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, uint16_t vlan_id)
{
OfDpaFlowPktFields *fields = &fc->fields;
uint16_t h_proto = fields->ethhdr->h_proto;
@@ -486,7 +486,7 @@ static void of_dpa_flow_pkt_strip_vlan(OfDpaFlowContext *fc)
static void of_dpa_flow_pkt_hdr_rewrite(OfDpaFlowContext *fc,
uint8_t *src_mac, uint8_t *dst_mac,
- __be16 vlan_id)
+ uint16_t vlan_id)
{
OfDpaFlowPktFields *fields = &fc->fields;
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 83d942a..7c0ca56 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -41,19 +41,9 @@
#define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1
#define VMXNET3_MSIX_BAR_SIZE 0x2000
-/* Compatibility flags for migration */
-#define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0
-#define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS \
- (1 << VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT)
-#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT 1
-#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE \
- (1 << VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT)
-
#define VMXNET3_EXP_EP_OFFSET (0x48)
-#define VMXNET3_MSI_OFFSET(s) \
- ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x50 : 0x84)
-#define VMXNET3_MSIX_OFFSET(s) \
- ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0 : 0x9c)
+#define VMXNET3_MSI_OFFSET (0x84)
+#define VMXNET3_MSIX_OFFSET (0x9c)
#define VMXNET3_DSN_OFFSET (0x100)
#define VMXNET3_BAR0_IDX (0)
@@ -61,8 +51,7 @@
#define VMXNET3_MSIX_BAR_IDX (2)
#define VMXNET3_OFF_MSIX_TABLE (0x000)
-#define VMXNET3_OFF_MSIX_PBA(s) \
- ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x800 : 0x1000)
+#define VMXNET3_OFF_MSIX_PBA (0x1000)
/* Link speed in Mbps should be shifted by 16 */
#define VMXNET3_LINK_SPEED (1000 << 16)
@@ -2122,8 +2111,8 @@ vmxnet3_init_msix(VMXNET3State *s)
&s->msix_bar,
VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_TABLE,
&s->msix_bar,
- VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA(s),
- VMXNET3_MSIX_OFFSET(s), NULL);
+ VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA,
+ VMXNET3_MSIX_OFFSET, NULL);
if (0 > res) {
VMW_WRPRN("Failed to initialize MSI-X, error %d", res);
@@ -2221,7 +2210,7 @@ static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
/* Interrupt pin A */
pci_dev->config[PCI_INTERRUPT_PIN] = 0x01;
- ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS,
+ ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET, VMXNET3_MAX_NMSIX_INTRS,
VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK, NULL);
/* Any error other than -ENOTSUP(board's MSI support is broken)
* is a programming error. Fall back to INTx silently on -ENOTSUP */
@@ -2249,6 +2238,7 @@ static void vmxnet3_instance_init(Object *obj)
device_add_bootindex_property(obj, &s->conf.bootindex,
"bootindex", "/ethernet-phy@0",
DEVICE(obj));
+ PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS;
}
static void vmxnet3_pci_uninit(PCIDevice *pci_dev)
@@ -2472,30 +2462,12 @@ static const VMStateDescription vmstate_vmxnet3 = {
static const Property vmxnet3_properties[] = {
DEFINE_NIC_PROPERTIES(VMXNET3State, conf),
- DEFINE_PROP_BIT("x-old-msi-offsets", VMXNET3State, compat_flags,
- VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT, false),
- DEFINE_PROP_BIT("x-disable-pcie", VMXNET3State, compat_flags,
- VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT, false),
};
-static void vmxnet3_realize(DeviceState *qdev, Error **errp)
-{
- VMXNET3Class *vc = VMXNET3_DEVICE_GET_CLASS(qdev);
- PCIDevice *pci_dev = PCI_DEVICE(qdev);
- VMXNET3State *s = VMXNET3(qdev);
-
- if (!(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE)) {
- pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
- }
-
- vc->parent_dc_realize(qdev, errp);
-}
-
static void vmxnet3_class_init(ObjectClass *class, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(class);
PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
- VMXNET3Class *vc = VMXNET3_DEVICE_CLASS(class);
c->realize = vmxnet3_pci_realize;
c->exit = vmxnet3_pci_uninit;
@@ -2506,8 +2478,6 @@ static void vmxnet3_class_init(ObjectClass *class, const void *data)
c->class_id = PCI_CLASS_NETWORK_ETHERNET;
c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE;
c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
- device_class_set_parent_realize(dc, vmxnet3_realize,
- &vc->parent_dc_realize);
dc->desc = "VMWare Paravirtualized Ethernet v3";
device_class_set_legacy_reset(dc, vmxnet3_qdev_reset);
dc->vmsd = &vmstate_vmxnet3;
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index 237b9f7..aa24050 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -817,62 +817,6 @@ void fw_cfg_modify_i64(FWCfgState *s, uint16_t key, uint64_t value)
g_free(old);
}
-void fw_cfg_set_order_override(FWCfgState *s, int order)
-{
- assert(s->fw_cfg_order_override == 0);
- s->fw_cfg_order_override = order;
-}
-
-void fw_cfg_reset_order_override(FWCfgState *s)
-{
- assert(s->fw_cfg_order_override != 0);
- s->fw_cfg_order_override = 0;
-}
-
-/*
- * This is the legacy order list. For legacy systems, files are in
- * the fw_cfg in the order defined below, by the "order" value. Note
- * that some entries (VGA ROMs, NIC option ROMS, etc.) go into a
- * specific area, but there may be more than one and they occur in the
- * order that the user specifies them on the command line. Those are
- * handled in a special manner, using the order override above.
- *
- * For non-legacy, the files are sorted by filename to avoid this kind
- * of complexity in the future.
- *
- * This is only for x86, other arches don't implement versioning so
- * they won't set legacy mode.
- */
-static struct {
- const char *name;
- int order;
-} fw_cfg_order[] = {
- { "etc/boot-menu-wait", 10 },
- { "bootsplash.jpg", 11 },
- { "bootsplash.bmp", 12 },
- { "etc/boot-fail-wait", 15 },
- { "etc/smbios/smbios-tables", 20 },
- { "etc/smbios/smbios-anchor", 30 },
- { "etc/e820", 40 },
- { "etc/reserved-memory-end", 50 },
- { "genroms/kvmvapic.bin", 55 },
- { "genroms/linuxboot.bin", 60 },
- { }, /* VGA ROMs from pc_vga_init come here, 70. */
- { }, /* NIC option ROMs from pc_nic_init come here, 80. */
- { "etc/system-states", 90 },
- { }, /* User ROMs come here, 100. */
- { }, /* Device FW comes here, 110. */
- { "etc/extra-pci-roots", 120 },
- { "etc/acpi/tables", 130 },
- { "etc/table-loader", 140 },
- { "etc/tpm/log", 150 },
- { "etc/acpi/rsdp", 160 },
- { "bootorder", 170 },
- { "etc/msr_feature_control", 180 },
-
-#define FW_CFG_ORDER_OVERRIDE_LAST 200
-};
-
/*
* Any sub-page size update to these table MRs will be lost during migration,
* as we use aligned size in ram_load_precopy() -> qemu_ram_resize() path.
@@ -890,29 +834,6 @@ static void fw_cfg_acpi_mr_save(FWCfgState *s, const char *filename, size_t len)
}
}
-static int get_fw_cfg_order(FWCfgState *s, const char *name)
-{
- int i;
-
- if (s->fw_cfg_order_override > 0) {
- return s->fw_cfg_order_override;
- }
-
- for (i = 0; i < ARRAY_SIZE(fw_cfg_order); i++) {
- if (fw_cfg_order[i].name == NULL) {
- continue;
- }
-
- if (strcmp(name, fw_cfg_order[i].name) == 0) {
- return fw_cfg_order[i].order;
- }
- }
-
- /* Stick unknown stuff at the end. */
- warn_report("Unknown firmware file in legacy mode: %s", name);
- return FW_CFG_ORDER_OVERRIDE_LAST;
-}
-
void fw_cfg_add_file_callback(FWCfgState *s, const char *filename,
FWCfgCallback select_cb,
FWCfgWriteCallback write_cb,
@@ -921,7 +842,6 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename,
{
int i, index, count;
size_t dsize;
- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
int order = 0;
if (!s->files) {
@@ -933,22 +853,11 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename,
count = be32_to_cpu(s->files->count);
assert(count < fw_cfg_file_slots(s));
- /* Find the insertion point. */
- if (mc->legacy_fw_cfg_order) {
- /*
- * Sort by order. For files with the same order, we keep them
- * in the sequence in which they were added.
- */
- order = get_fw_cfg_order(s, filename);
- for (index = count;
- index > 0 && order < s->entry_order[index - 1];
- index--);
- } else {
- /* Sort by file name. */
- for (index = count;
- index > 0 && strcmp(filename, s->files->f[index - 1].name) < 0;
- index--);
- }
+ /* Find the insertion point, sorting by file name. */
+ for (index = count;
+ index > 0 && strcmp(filename, s->files->f[index - 1].name) < 0;
+ index--)
+ ;
/*
* Move all the entries from the index point and after down one
@@ -1058,7 +967,6 @@ bool fw_cfg_add_file_from_generator(FWCfgState *s,
static void fw_cfg_machine_reset(void *opaque)
{
- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
FWCfgState *s = opaque;
void *ptr;
size_t len;
@@ -1068,11 +976,9 @@ static void fw_cfg_machine_reset(void *opaque)
ptr = fw_cfg_modify_file(s, "bootorder", (uint8_t *)buf, len);
g_free(ptr);
- if (!mc->legacy_fw_cfg_order) {
- buf = get_boot_devices_lchs_list(&len);
- ptr = fw_cfg_modify_file(s, "bios-geometry", (uint8_t *)buf, len);
- g_free(ptr);
- }
+ buf = get_boot_devices_lchs_list(&len);
+ ptr = fw_cfg_modify_file(s, "bios-geometry", (uint8_t *)buf, len);
+ g_free(ptr);
}
static void fw_cfg_machine_ready(struct Notifier *n, void *data)
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
index d5825b6..7c98b1b 100644
--- a/hw/scsi/vmw_pvscsi.c
+++ b/hw/scsi/vmw_pvscsi.c
@@ -68,18 +68,7 @@ struct PVSCSIClass {
OBJECT_DECLARE_TYPE(PVSCSIState, PVSCSIClass, PVSCSI)
-/* Compatibility flags for migration */
-#define PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT 0
-#define PVSCSI_COMPAT_OLD_PCI_CONFIGURATION \
- (1 << PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT)
-#define PVSCSI_COMPAT_DISABLE_PCIE_BIT 1
-#define PVSCSI_COMPAT_DISABLE_PCIE \
- (1 << PVSCSI_COMPAT_DISABLE_PCIE_BIT)
-
-#define PVSCSI_USE_OLD_PCI_CONFIGURATION(s) \
- ((s)->compat_flags & PVSCSI_COMPAT_OLD_PCI_CONFIGURATION)
-#define PVSCSI_MSI_OFFSET(s) \
- (PVSCSI_USE_OLD_PCI_CONFIGURATION(s) ? 0x50 : 0x7c)
+#define PVSCSI_MSI_OFFSET (0x7c)
#define PVSCSI_EXP_EP_OFFSET (0x40)
typedef struct PVSCSIRingInfo {
@@ -129,8 +118,6 @@ struct PVSCSIState {
uint8_t msi_used; /* For migration compatibility */
PVSCSIRingInfo rings; /* Data transfer rings manager */
uint32_t resetting; /* Reset in progress */
-
- uint32_t compat_flags;
};
typedef struct PVSCSIRequest {
@@ -1110,7 +1097,7 @@ pvscsi_init_msi(PVSCSIState *s)
int res;
PCIDevice *d = PCI_DEVICE(s);
- res = msi_init(d, PVSCSI_MSI_OFFSET(s), PVSCSI_MSIX_NUM_VECTORS,
+ res = msi_init(d, PVSCSI_MSI_OFFSET, PVSCSI_MSIX_NUM_VECTORS,
PVSCSI_USE_64BIT, PVSCSI_PER_VECTOR_MASK, NULL);
if (res < 0) {
trace_pvscsi_init_msi_fail(res);
@@ -1158,15 +1145,11 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp)
trace_pvscsi_state("init");
/* PCI subsystem ID, subsystem vendor ID, revision */
- if (PVSCSI_USE_OLD_PCI_CONFIGURATION(s)) {
- pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, 0x1000);
- } else {
- pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID,
- PCI_VENDOR_ID_VMWARE);
- pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID,
- PCI_DEVICE_ID_VMWARE_PVSCSI);
- pci_config_set_revision(pci_dev->config, 0x2);
- }
+ pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID,
+ PCI_VENDOR_ID_VMWARE);
+ pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID,
+ PCI_DEVICE_ID_VMWARE_PVSCSI);
+ pci_config_set_revision(pci_dev->config, 0x2);
/* PCI latency timer = 255 */
pci_dev->config[PCI_LATENCY_TIMER] = 0xff;
@@ -1234,21 +1217,8 @@ pvscsi_post_load(void *opaque, int version_id)
return 0;
}
-static bool pvscsi_vmstate_need_pcie_device(void *opaque)
-{
- PVSCSIState *s = PVSCSI(opaque);
-
- return !(s->compat_flags & PVSCSI_COMPAT_DISABLE_PCIE);
-}
-
-static bool pvscsi_vmstate_test_pci_device(void *opaque, int version_id)
-{
- return !pvscsi_vmstate_need_pcie_device(opaque);
-}
-
static const VMStateDescription vmstate_pvscsi_pcie_device = {
.name = "pvscsi/pcie",
- .needed = pvscsi_vmstate_need_pcie_device,
.fields = (const VMStateField[]) {
VMSTATE_PCI_DEVICE(parent_obj, PVSCSIState),
VMSTATE_END_OF_LIST()
@@ -1262,9 +1232,6 @@ static const VMStateDescription vmstate_pvscsi = {
.pre_save = pvscsi_pre_save,
.post_load = pvscsi_post_load,
.fields = (const VMStateField[]) {
- VMSTATE_STRUCT_TEST(parent_obj, PVSCSIState,
- pvscsi_vmstate_test_pci_device, 0,
- vmstate_pci_device, PCIDevice),
VMSTATE_UINT8(msi_used, PVSCSIState),
VMSTATE_UINT32(resetting, PVSCSIState),
VMSTATE_UINT64(reg_interrupt_status, PVSCSIState),
@@ -1298,30 +1265,17 @@ static const VMStateDescription vmstate_pvscsi = {
static const Property pvscsi_properties[] = {
DEFINE_PROP_UINT8("use_msg", PVSCSIState, use_msg, 1),
- DEFINE_PROP_BIT("x-old-pci-configuration", PVSCSIState, compat_flags,
- PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT, false),
- DEFINE_PROP_BIT("x-disable-pcie", PVSCSIState, compat_flags,
- PVSCSI_COMPAT_DISABLE_PCIE_BIT, false),
};
-static void pvscsi_realize(DeviceState *qdev, Error **errp)
+static void pvscsi_instance_init(Object *obj)
{
- PVSCSIClass *pvs_c = PVSCSI_GET_CLASS(qdev);
- PCIDevice *pci_dev = PCI_DEVICE(qdev);
- PVSCSIState *s = PVSCSI(qdev);
-
- if (!(s->compat_flags & PVSCSI_COMPAT_DISABLE_PCIE)) {
- pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
- }
-
- pvs_c->parent_dc_realize(qdev, errp);
+ PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS;
}
static void pvscsi_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
- PVSCSIClass *pvs_k = PVSCSI_CLASS(klass);
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
k->realize = pvscsi_realizefn;
@@ -1330,8 +1284,6 @@ static void pvscsi_class_init(ObjectClass *klass, const void *data)
k->device_id = PCI_DEVICE_ID_VMWARE_PVSCSI;
k->class_id = PCI_CLASS_STORAGE_SCSI;
k->subsystem_id = 0x1000;
- device_class_set_parent_realize(dc, pvscsi_realize,
- &pvs_k->parent_dc_realize);
device_class_set_legacy_reset(dc, pvscsi_reset);
dc->vmsd = &vmstate_pvscsi;
device_class_set_props(dc, pvscsi_properties);
@@ -1346,6 +1298,7 @@ static const TypeInfo pvscsi_info = {
.class_size = sizeof(PVSCSIClass),
.instance_size = sizeof(PVSCSIState),
.class_init = pvscsi_class_init,
+ .instance_init = pvscsi_instance_init,
.interfaces = (const InterfaceInfo[]) {
{ TYPE_HOTPLUG_HANDLER },
{ INTERFACE_PCIE_DEVICE },
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index d1b7bc5..0fd1337 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -426,30 +426,11 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr,
uint64_t cur_tick;
trace_hpet_ram_read(addr);
+ addr &= ~4;
- /*address range of all TN regs*/
- if (addr >= 0x100 && addr <= 0x3ff) {
- uint8_t timer_id = (addr - 0x100) / 0x20;
- HPETTimer *timer = &s->timer[timer_id];
-
- if (timer_id > s->num_timers) {
- trace_hpet_timer_id_out_of_range(timer_id);
- return 0;
- }
-
- switch (addr & 0x18) {
- case HPET_TN_CFG: // including interrupt capabilities
- return timer->config >> shift;
- case HPET_TN_CMP: // comparator register
- return timer->cmp >> shift;
- case HPET_TN_ROUTE:
- return timer->fsb >> shift;
- default:
- trace_hpet_ram_read_invalid();
- break;
- }
- } else {
- switch (addr & ~4) {
+ /*address range of all global regs*/
+ if (addr <= 0xff) {
+ switch (addr) {
case HPET_ID: // including HPET_PERIOD
return s->capability >> shift;
case HPET_CFG:
@@ -468,6 +449,26 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr,
trace_hpet_ram_read_invalid();
break;
}
+ } else {
+ uint8_t timer_id = (addr - 0x100) / 0x20;
+ HPETTimer *timer = &s->timer[timer_id];
+
+ if (timer_id > s->num_timers) {
+ trace_hpet_timer_id_out_of_range(timer_id);
+ return 0;
+ }
+
+ switch (addr & 0x1f) {
+ case HPET_TN_CFG: // including interrupt capabilities
+ return timer->config >> shift;
+ case HPET_TN_CMP: // comparator register
+ return timer->cmp >> shift;
+ case HPET_TN_ROUTE:
+ return timer->fsb >> shift;
+ default:
+ trace_hpet_ram_read_invalid();
+ break;
+ }
}
return 0;
}
@@ -482,9 +483,67 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
uint64_t old_val, new_val, cleared;
trace_hpet_ram_write(addr, value);
+ addr &= ~4;
- /*address range of all TN regs*/
- if (addr >= 0x100 && addr <= 0x3ff) {
+ /*address range of all global regs*/
+ if (addr <= 0xff) {
+ switch (addr) {
+ case HPET_ID:
+ return;
+ case HPET_CFG:
+ old_val = s->config;
+ new_val = deposit64(old_val, shift, len, value);
+ new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK);
+ s->config = new_val;
+ if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
+ /* Enable main counter and interrupt generation. */
+ s->hpet_offset =
+ ticks_to_ns(s->hpet_counter) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ for (i = 0; i < s->num_timers; i++) {
+ if (timer_enabled(&s->timer[i]) && (s->isr & (1 << i))) {
+ update_irq(&s->timer[i], 1);
+ }
+ hpet_set_timer(&s->timer[i]);
+ }
+ } else if (deactivating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
+ /* Halt main counter and disable interrupt generation. */
+ s->hpet_counter = hpet_get_ticks(s);
+ for (i = 0; i < s->num_timers; i++) {
+ hpet_del_timer(&s->timer[i]);
+ }
+ }
+ /* i8254 and RTC output pins are disabled
+ * when HPET is in legacy mode */
+ if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
+ qemu_set_irq(s->pit_enabled, 0);
+ qemu_irq_lower(s->irqs[0]);
+ qemu_irq_lower(s->irqs[RTC_ISA_IRQ]);
+ } else if (deactivating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
+ qemu_irq_lower(s->irqs[0]);
+ qemu_set_irq(s->pit_enabled, 1);
+ qemu_set_irq(s->irqs[RTC_ISA_IRQ], s->rtc_irq_level);
+ }
+ break;
+ case HPET_STATUS:
+ new_val = value << shift;
+ cleared = new_val & s->isr;
+ for (i = 0; i < s->num_timers; i++) {
+ if (cleared & (1 << i)) {
+ update_irq(&s->timer[i], 0);
+ }
+ }
+ break;
+ case HPET_COUNTER:
+ if (hpet_enabled(s)) {
+ trace_hpet_ram_write_counter_write_while_enabled();
+ }
+ s->hpet_counter = deposit64(s->hpet_counter, shift, len, value);
+ break;
+ default:
+ trace_hpet_ram_write_invalid();
+ break;
+ }
+ } else {
uint8_t timer_id = (addr - 0x100) / 0x20;
HPETTimer *timer = &s->timer[timer_id];
@@ -550,63 +609,6 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
break;
}
return;
- } else {
- switch (addr & ~4) {
- case HPET_ID:
- return;
- case HPET_CFG:
- old_val = s->config;
- new_val = deposit64(old_val, shift, len, value);
- new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK);
- s->config = new_val;
- if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
- /* Enable main counter and interrupt generation. */
- s->hpet_offset =
- ticks_to_ns(s->hpet_counter) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- for (i = 0; i < s->num_timers; i++) {
- if (timer_enabled(&s->timer[i]) && (s->isr & (1 << i))) {
- update_irq(&s->timer[i], 1);
- }
- hpet_set_timer(&s->timer[i]);
- }
- } else if (deactivating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
- /* Halt main counter and disable interrupt generation. */
- s->hpet_counter = hpet_get_ticks(s);
- for (i = 0; i < s->num_timers; i++) {
- hpet_del_timer(&s->timer[i]);
- }
- }
- /* i8254 and RTC output pins are disabled
- * when HPET is in legacy mode */
- if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
- qemu_set_irq(s->pit_enabled, 0);
- qemu_irq_lower(s->irqs[0]);
- qemu_irq_lower(s->irqs[RTC_ISA_IRQ]);
- } else if (deactivating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
- qemu_irq_lower(s->irqs[0]);
- qemu_set_irq(s->pit_enabled, 1);
- qemu_set_irq(s->irqs[RTC_ISA_IRQ], s->rtc_irq_level);
- }
- break;
- case HPET_STATUS:
- new_val = value << shift;
- cleared = new_val & s->isr;
- for (i = 0; i < s->num_timers; i++) {
- if (cleared & (1 << i)) {
- update_irq(&s->timer[i], 0);
- }
- }
- break;
- case HPET_COUNTER:
- if (hpet_enabled(s)) {
- trace_hpet_ram_write_counter_write_while_enabled();
- }
- s->hpet_counter = deposit64(s->hpet_counter, shift, len, value);
- break;
- default:
- trace_hpet_ram_write_invalid();
- break;
- }
}
}
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index 1c6ca94..d834bd4 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -75,12 +75,12 @@ void vfio_address_space_insert(VFIOAddressSpace *space,
int vfio_container_dma_map(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly)
+ void *vaddr, bool readonly, MemoryRegion *mr)
{
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
g_assert(vioc->dma_map);
- return vioc->dma_map(bcontainer, iova, size, vaddr, readonly);
+ return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr);
}
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index a9f0dba..0f948d0 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -33,8 +33,8 @@
#include "qapi/error.h"
#include "pci.h"
#include "hw/vfio/vfio-container.h"
+#include "hw/vfio/vfio-cpr.h"
#include "vfio-helpers.h"
-#include "vfio-cpr.h"
#include "vfio-listener.h"
#define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio"
@@ -207,7 +207,8 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
}
static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
- ram_addr_t size, void *vaddr, bool readonly)
+ ram_addr_t size, void *vaddr, bool readonly,
+ MemoryRegion *mr)
{
const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
bcontainer);
diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c
index 3214184..0210e76 100644
--- a/hw/vfio/cpr.c
+++ b/hw/vfio/cpr.c
@@ -8,9 +8,9 @@
#include "qemu/osdep.h"
#include "hw/vfio/vfio-device.h"
#include "migration/misc.h"
+#include "hw/vfio/vfio-cpr.h"
#include "qapi/error.h"
#include "system/runstate.h"
-#include "vfio-cpr.h"
static int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier,
MigrationEvent *e, Error **errp)
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
index e7952d1..e7a9d1f 100644
--- a/hw/vfio/igd.c
+++ b/hw/vfio/igd.c
@@ -187,23 +187,21 @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
}
static bool vfio_pci_igd_opregion_detect(VFIOPCIDevice *vdev,
- struct vfio_region_info **opregion,
- Error **errp)
+ struct vfio_region_info **opregion)
{
int ret;
- /* Hotplugging is not supported for opregion access */
- if (vdev->pdev.qdev.hotplugged) {
- error_setg(errp, "IGD OpRegion is not supported on hotplugged device");
- return false;
- }
-
ret = vfio_device_get_region_info_type(&vdev->vbasedev,
VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, opregion);
if (ret) {
- error_setg_errno(errp, -ret,
- "Device does not supports IGD OpRegion feature");
+ return false;
+ }
+
+ /* Hotplugging is not supported for opregion access */
+ if (vdev->pdev.qdev.hotplugged) {
+ warn_report("IGD device detected, but OpRegion is not supported "
+ "on hotplugged device.");
return false;
}
@@ -524,7 +522,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
}
/* IGD device always comes with OpRegion */
- if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
+ if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
return true;
}
info_report("OpRegion detected on Intel display %x.", vdev->device_id);
@@ -695,7 +693,7 @@ static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp)
return true;
}
- if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
+ if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
/* Should never reach here, KVMGT always emulates OpRegion */
return false;
}
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index af1c7ab..d3efef7 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -21,20 +21,21 @@
#include "qapi/error.h"
#include "system/iommufd.h"
#include "hw/qdev-core.h"
+#include "hw/vfio/vfio-cpr.h"
#include "system/reset.h"
#include "qemu/cutils.h"
#include "qemu/chardev_open.h"
#include "pci.h"
#include "vfio-iommufd.h"
#include "vfio-helpers.h"
-#include "vfio-cpr.h"
#include "vfio-listener.h"
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \
TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio"
static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
- ram_addr_t size, void *vaddr, bool readonly)
+ ram_addr_t size, void *vaddr, bool readonly,
+ MemoryRegion *mr)
{
const VFIOIOMMUFDContainer *container =
container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
@@ -592,6 +593,10 @@ found_container:
goto err_listener_register;
}
+ /*
+ * Do not move this code before attachment! The nested IOMMU support
+ * needs device and hwpt id which are generated only after attachment.
+ */
if (!vfio_device_hiod_create_and_realize(vbasedev,
TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, errp)) {
goto err_listener_register;
@@ -810,21 +815,38 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data)
vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap;
};
+static bool
+host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ uint32_t hwpt_id, Error **errp)
+{
+ VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
+
+ return !iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp);
+}
+
+static bool
+host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ Error **errp)
+{
+ VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
+
+ return iommufd_cdev_detach_ioas_hwpt(vbasedev, errp);
+}
+
static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
Error **errp)
{
VFIODevice *vdev = opaque;
+ HostIOMMUDeviceIOMMUFD *idev;
HostIOMMUDeviceCaps *caps = &hiod->caps;
+ VendorCaps *vendor_caps = &caps->vendor_caps;
enum iommu_hw_info_type type;
- union {
- struct iommu_hw_info_vtd vtd;
- } data;
uint64_t hw_caps;
hiod->agent = opaque;
- if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid,
- &type, &data, sizeof(data),
+ if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, &type,
+ vendor_caps, sizeof(*vendor_caps),
&hw_caps, errp)) {
return false;
}
@@ -833,6 +855,11 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
caps->type = type;
caps->hw_caps = hw_caps;
+ idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod);
+ idev->iommufd = vdev->iommufd;
+ idev->devid = vdev->devid;
+ idev->hwpt_id = vdev->hwpt->hwpt_id;
+
return true;
}
@@ -858,10 +885,14 @@ hiod_iommufd_vfio_get_page_size_mask(HostIOMMUDevice *hiod)
static void hiod_iommufd_vfio_class_init(ObjectClass *oc, const void *data)
{
HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc);
+ HostIOMMUDeviceIOMMUFDClass *idevc = HOST_IOMMU_DEVICE_IOMMUFD_CLASS(oc);
hiodc->realize = hiod_iommufd_vfio_realize;
hiodc->get_iova_ranges = hiod_iommufd_vfio_get_iova_ranges;
hiodc->get_page_size_mask = hiod_iommufd_vfio_get_page_size_mask;
+
+ idevc->attach_hwpt = host_iommu_device_iommufd_vfio_attach_hwpt;
+ idevc->detach_hwpt = host_iommu_device_iommufd_vfio_detach_hwpt;
};
static const TypeInfo types[] = {
diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c
index bfacb3d..203ed03 100644
--- a/hw/vfio/listener.c
+++ b/hw/vfio/listener.c
@@ -90,16 +90,17 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
section->offset_within_address_space & (1ULL << 63);
}
-/* Called with rcu_read_lock held. */
-static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
- ram_addr_t *ram_addr, bool *read_only,
- Error **errp)
+/*
+ * Called with rcu_read_lock held.
+ * The returned MemoryRegion must not be accessed after calling rcu_read_unlock.
+ */
+static MemoryRegion *vfio_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p,
+ Error **errp)
{
- bool ret, mr_has_discard_manager;
+ MemoryRegion *mr;
- ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, read_only,
- &mr_has_discard_manager, errp);
- if (ret && mr_has_discard_manager) {
+ mr = memory_translate_iotlb(iotlb, xlat_p, errp);
+ if (mr && memory_region_has_ram_discard_manager(mr)) {
/*
* Malicious VMs might trigger discarding of IOMMU-mapped memory. The
* pages will remain pinned inside vfio until unmapped, resulting in a
@@ -118,7 +119,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
" intended via an IOMMU. It's possible to mitigate "
" by setting/adjusting RLIMIT_MEMLOCK.");
}
- return ret;
+ return mr;
}
static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
@@ -126,6 +127,8 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
VFIOContainerBase *bcontainer = giommu->bcontainer;
hwaddr iova = iotlb->iova + giommu->iommu_offset;
+ MemoryRegion *mr;
+ hwaddr xlat;
void *vaddr;
int ret;
Error *local_err = NULL;
@@ -150,10 +153,14 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
bool read_only;
- if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, &local_err)) {
+ mr = vfio_translate_iotlb(iotlb, &xlat, &local_err);
+ if (!mr) {
error_report_err(local_err);
goto out;
}
+ vaddr = memory_region_get_ram_ptr(mr) + xlat;
+ read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly;
+
/*
* vaddr is only valid until rcu_read_unlock(). But after
* vfio_dma_map has set up the mapping the pages will be
@@ -163,7 +170,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
*/
ret = vfio_container_dma_map(bcontainer, iova,
iotlb->addr_mask + 1, vaddr,
- read_only);
+ read_only, mr);
if (ret) {
error_report("vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx", %p) = %d (%s)",
@@ -233,7 +240,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
vaddr = memory_region_get_ram_ptr(section->mr) + start;
ret = vfio_container_dma_map(bcontainer, iova, next - start,
- vaddr, section->readonly);
+ vaddr, section->readonly, section->mr);
if (ret) {
/* Rollback */
vfio_ram_discard_notify_discard(rdl, section);
@@ -449,6 +456,26 @@ static void vfio_device_error_append(VFIODevice *vbasedev, Error **errp)
}
}
+VFIORamDiscardListener *vfio_find_ram_discard_listener(
+ VFIOContainerBase *bcontainer, MemoryRegionSection *section)
+{
+ VFIORamDiscardListener *vrdl = NULL;
+
+ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
+ if (vrdl->mr == section->mr &&
+ vrdl->offset_within_address_space ==
+ section->offset_within_address_space) {
+ break;
+ }
+ }
+
+ if (!vrdl) {
+ hw_error("vfio: Trying to sync missing RAM discard listener");
+ /* does not return */
+ }
+ return vrdl;
+}
+
static void vfio_listener_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
@@ -557,7 +584,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
}
ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize),
- vaddr, section->readonly);
+ vaddr, section->readonly, section->mr);
if (ret) {
error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx", %p) = %d (%s)",
@@ -1010,6 +1037,8 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
ram_addr_t translated_addr;
Error *local_err = NULL;
int ret = -EINVAL;
+ MemoryRegion *mr;
+ hwaddr xlat;
trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask);
@@ -1021,9 +1050,11 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
}
rcu_read_lock();
- if (!vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL, &local_err)) {
+ mr = vfio_translate_iotlb(iotlb, &xlat, &local_err);
+ if (!mr) {
goto out_unlock;
}
+ translated_addr = memory_region_get_ram_addr(mr) + xlat;
ret = vfio_container_query_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1,
translated_addr, &local_err);
@@ -1075,19 +1106,8 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer,
MemoryRegionSection *section)
{
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
- VFIORamDiscardListener *vrdl = NULL;
-
- QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
- if (vrdl->mr == section->mr &&
- vrdl->offset_within_address_space ==
- section->offset_within_address_space) {
- break;
- }
- }
-
- if (!vrdl) {
- hw_error("vfio: Trying to sync missing RAM discard listener");
- }
+ VFIORamDiscardListener *vrdl =
+ vfio_find_ram_discard_listener(bcontainer, section);
/*
* We only want/can synchronize the bitmap for actually mapped parts -
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a1bfdfe..b1250d8 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -511,6 +511,25 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg,
kvm_irqchip_commit_routes(kvm_state);
}
+static void set_irq_signalling(VFIODevice *vbasedev, VFIOMSIVector *vector,
+ unsigned int nr)
+{
+ Error *err = NULL;
+ int32_t fd;
+
+ if (vector->virq >= 0) {
+ fd = event_notifier_get_fd(&vector->kvm_interrupt);
+ } else {
+ fd = event_notifier_get_fd(&vector->interrupt);
+ }
+
+ if (!vfio_device_irq_set_signaling(vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, nr,
+ VFIO_IRQ_SET_ACTION_TRIGGER,
+ fd, &err)) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name);
+ }
+}
+
static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
MSIMessage *msg, IOHandler *handler)
{
@@ -583,21 +602,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
strerror(-ret));
}
} else {
- Error *err = NULL;
- int32_t fd;
-
- if (vector->virq >= 0) {
- fd = event_notifier_get_fd(&vector->kvm_interrupt);
- } else {
- fd = event_notifier_get_fd(&vector->interrupt);
- }
-
- if (!vfio_device_irq_set_signaling(&vdev->vbasedev,
- VFIO_PCI_MSIX_IRQ_INDEX, nr,
- VFIO_IRQ_SET_ACTION_TRIGGER, fd,
- &err)) {
- error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
- }
+ set_irq_signalling(&vdev->vbasedev, vector, nr);
}
}
@@ -2854,6 +2859,18 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
static void vfio_pci_put_device(VFIOPCIDevice *vdev)
{
+ vfio_display_finalize(vdev);
+ vfio_bars_finalize(vdev);
+ g_free(vdev->emulated_config_bits);
+ g_free(vdev->rom);
+ /*
+ * XXX Leaking igd_opregion is not an oversight, we can't remove the
+ * fw_cfg entry therefore leaking this allocation seems like the safest
+ * option.
+ *
+ * g_free(vdev->igd_opregion);
+ */
+
vfio_device_detach(&vdev->vbasedev);
g_free(vdev->vbasedev.name);
@@ -3005,6 +3022,19 @@ static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
{
PCIDevice *pdev = &vdev->pdev;
VFIODevice *vbasedev = &vdev->vbasedev;
+ uint32_t config_space_size;
+ int ret;
+
+ config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size);
+
+ /* Get a copy of config space */
+ ret = vfio_pci_config_space_read(vdev, 0, config_space_size,
+ vdev->pdev.config);
+ if (ret < (int)config_space_size) {
+ ret = ret < 0 ? -ret : EFAULT;
+ error_setg_errno(errp, ret, "failed to read device config space");
+ return false;
+ }
/* vfio emulates a lot for us, but some bits need extra love */
vdev->emulated_config_bits = g_malloc0(vdev->config_size);
@@ -3126,15 +3156,14 @@ static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
return true;
}
-static void vfio_realize(PCIDevice *pdev, Error **errp)
+static void vfio_pci_realize(PCIDevice *pdev, Error **errp)
{
ERRP_GUARD();
VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
VFIODevice *vbasedev = &vdev->vbasedev;
- int i, ret;
+ int i;
char uuid[UUID_STR_LEN];
g_autofree char *name = NULL;
- uint32_t config_space_size;
if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
if (!(~vdev->host.domain || ~vdev->host.bus ||
@@ -3189,17 +3218,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
goto error;
}
- config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size);
-
- /* Get a copy of config space */
- ret = vfio_pci_config_space_read(vdev, 0, config_space_size,
- vdev->pdev.config);
- if (ret < (int)config_space_size) {
- ret = ret < 0 ? -ret : EFAULT;
- error_setg_errno(errp, ret, "failed to read device config space");
- goto error;
- }
-
if (!vfio_pci_config_setup(vdev, errp)) {
goto error;
}
@@ -3302,17 +3320,6 @@ static void vfio_instance_finalize(Object *obj)
{
VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
- vfio_display_finalize(vdev);
- vfio_bars_finalize(vdev);
- g_free(vdev->emulated_config_bits);
- g_free(vdev->rom);
- /*
- * XXX Leaking igd_opregion is not an oversight, we can't remove the
- * fw_cfg entry therefore leaking this allocation seems like the safest
- * option.
- *
- * g_free(vdev->igd_opregion);
- */
vfio_pci_put_device(vdev);
}
@@ -3514,7 +3521,7 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data)
object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
#endif
dc->desc = "VFIO-based PCI device assignment";
- pdc->realize = vfio_realize;
+ pdc->realize = vfio_pci_realize;
object_class_property_set_description(klass, /* 1.3 */
"host",
diff --git a/hw/vfio/vfio-cpr.h b/hw/vfio/vfio-cpr.h
deleted file mode 100644
index 134b83a..0000000
--- a/hw/vfio/vfio-cpr.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * VFIO CPR
- *
- * Copyright (c) 2025 Oracle and/or its affiliates.
- *
- * SPDX-License-Identifier: GPL-2.0-or-later
- */
-
-#ifndef HW_VFIO_CPR_H
-#define HW_VFIO_CPR_H
-
-bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp);
-void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer);
-
-#endif /* HW_VFIO_CPR_H */
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index e20da95..7061b6e 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -209,6 +209,8 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
int ret;
Int128 llend;
Error *local_err = NULL;
+ MemoryRegion *mr;
+ hwaddr xlat;
if (iotlb->target_as != &address_space_memory) {
error_report("Wrong target AS \"%s\", only system memory is allowed",
@@ -228,11 +230,14 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
bool read_only;
- if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL,
- &local_err)) {
+ mr = memory_translate_iotlb(iotlb, &xlat, &local_err);
+ if (!mr) {
error_report_err(local_err);
return;
}
+ vaddr = memory_region_get_ram_ptr(mr) + xlat;
+ read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly;
+
ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova,
iotlb->addr_mask + 1, vaddr, read_only);
if (ret) {
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 7e309d1..fba2372 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -146,9 +146,7 @@ static const VMStateDescription vmstate_virtio_pci = {
static bool virtio_pci_has_extra_state(DeviceState *d)
{
- VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
-
- return proxy->flags & VIRTIO_PCI_FLAG_MIGRATE_EXTRA;
+ return true;
}
static void virtio_pci_save_extra_state(DeviceState *d, QEMUFile *f)
@@ -2368,12 +2366,8 @@ static void virtio_pci_bus_reset_hold(Object *obj, ResetType type)
static const Property virtio_pci_properties[] = {
DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags,
VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false),
- DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags,
- VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true),
DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags,
VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false),
- DEFINE_PROP_BIT("x-disable-pcie", VirtIOPCIProxy, flags,
- VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy,
@@ -2402,8 +2396,7 @@ static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp)
VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
PCIDevice *pci_dev = &proxy->pci_dev;
- if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) &&
- virtio_pci_modern(proxy)) {
+ if (virtio_pci_modern(proxy)) {
pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
}
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
index 9be34b3..84a2a4e 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
@@ -192,10 +192,10 @@ int bdrv_inactivate_all(void);
int bdrv_flush_all(void);
void bdrv_close_all(void);
-void bdrv_drain_all_begin(void);
+void GRAPH_UNLOCKED bdrv_drain_all_begin(void);
void bdrv_drain_all_begin_nopoll(void);
void bdrv_drain_all_end(void);
-void bdrv_drain_all(void);
+void GRAPH_UNLOCKED bdrv_drain_all(void);
void bdrv_aio_cancel(BlockAIOCB *acb);
@@ -274,11 +274,16 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
-bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx,
- GHashTable *visited, Transaction *tran,
- Error **errp);
-int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
- BdrvChild *ignore_child, Error **errp);
+bool GRAPH_RDLOCK
+bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx,
+ GHashTable *visited, Transaction *tran,
+ Error **errp);
+int GRAPH_UNLOCKED
+bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
+ BdrvChild *ignore_child, Error **errp);
+int GRAPH_RDLOCK
+bdrv_try_change_aio_context_locked(BlockDriverState *bs, AioContext *ctx,
+ BdrvChild *ignore_child, Error **errp);
int GRAPH_RDLOCK bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
diff --git a/include/block/block-io.h b/include/block/block-io.h
index b99cc98..4cf83fb 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -431,7 +431,7 @@ bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
*
* This function can be recursive.
*/
-void bdrv_drained_begin(BlockDriverState *bs);
+void GRAPH_UNLOCKED bdrv_drained_begin(BlockDriverState *bs);
/**
* bdrv_do_drained_begin_quiesce:
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 2982dd3..925a3e7 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -396,9 +396,23 @@ struct BlockDriver {
int GRAPH_RDLOCK_PTR (*bdrv_probe_geometry)(
BlockDriverState *bs, HDGeometry *geo);
+ /**
+ * Hot add a BDS's child. Used in combination with bdrv_del_child, so the
+ * user can take a child offline when it is broken and take a new child
+ * online.
+ *
+ * All block nodes must be drained.
+ */
void GRAPH_WRLOCK_PTR (*bdrv_add_child)(
BlockDriverState *parent, BlockDriverState *child, Error **errp);
+ /**
+ * Hot remove a BDS's child. Used in combination with bdrv_add_child, so the
+ * user can take a child offline when it is broken and take a new child
+ * online.
+ *
+ * All block nodes must be drained.
+ */
void GRAPH_WRLOCK_PTR (*bdrv_del_child)(
BlockDriverState *parent, BdrvChild *child, Error **errp);
@@ -983,9 +997,21 @@ struct BdrvChildClass {
bool backing_mask_protocol,
Error **errp);
- bool (*change_aio_ctx)(BdrvChild *child, AioContext *ctx,
- GHashTable *visited, Transaction *tran,
- Error **errp);
+ /*
+ * Notifies the parent that the child is trying to change its AioContext.
+ * The parent may in turn change the AioContext of other nodes in the same
+ * transaction. Returns true if the change is possible and the transaction
+ * can be continued. Returns false and sets @errp if not and the transaction
+ * must be aborted.
+ *
+ * @visited will accumulate all visited BdrvChild objects. The caller is
+ * responsible for freeing the list afterwards.
+ *
+ * Must be called with the affected block nodes drained.
+ */
+ bool GRAPH_RDLOCK_PTR (*change_aio_ctx)(BdrvChild *child, AioContext *ctx,
+ GHashTable *visited,
+ Transaction *tran, Error **errp);
/*
* I/O API functions. These functions are thread-safe.
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 7061ab7..990f3e1 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -137,6 +137,8 @@ BlockJob *block_job_get_locked(const char *id);
* Add @bs to the list of BlockDriverState that are involved in
* @job. This means that all operations will be blocked on @bs while
* @job exists.
+ *
+ * All block nodes must be drained.
*/
int GRAPH_WRLOCK
block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
diff --git a/include/hw/arm/npcm8xx.h b/include/hw/arm/npcm8xx.h
index 3436abf..a8377db 100644
--- a/include/hw/arm/npcm8xx.h
+++ b/include/hw/arm/npcm8xx.h
@@ -28,7 +28,8 @@
#include "hw/misc/npcm7xx_mft.h"
#include "hw/misc/npcm7xx_pwm.h"
#include "hw/misc/npcm7xx_rng.h"
-#include "hw/net/npcm7xx_emc.h"
+#include "hw/net/npcm_gmac.h"
+#include "hw/net/npcm_pcs.h"
#include "hw/nvram/npcm7xx_otp.h"
#include "hw/sd/npcm7xx_sdhci.h"
#include "hw/timer/npcm7xx_timer.h"
@@ -99,6 +100,8 @@ struct NPCM8xxState {
EHCISysBusState ehci[2];
OHCISysBusState ohci[2];
NPCM7xxFIUState fiu[3];
+ NPCMGMACState gmac[4];
+ NPCMPCSState pcs;
NPCM7xxSDHCIState mmc;
NPCMPSPIState pspi;
};
diff --git a/include/hw/block/flash.h b/include/hw/block/flash.h
index 5fd67f5..3671f01 100644
--- a/include/hw/block/flash.h
+++ b/include/hw/block/flash.h
@@ -44,24 +44,6 @@ PFlashCFI02 *pflash_cfi02_register(hwaddr base,
uint16_t unlock_addr1,
int be);
-/* nand.c */
-DeviceState *nand_init(BlockBackend *blk, int manf_id, int chip_id);
-void nand_setpins(DeviceState *dev, uint8_t cle, uint8_t ale,
- uint8_t ce, uint8_t wp, uint8_t gnd);
-void nand_getpins(DeviceState *dev, int *rb);
-void nand_setio(DeviceState *dev, uint32_t value);
-uint32_t nand_getio(DeviceState *dev);
-uint32_t nand_getbuswidth(DeviceState *dev);
-
-#define NAND_MFR_TOSHIBA 0x98
-#define NAND_MFR_SAMSUNG 0xec
-#define NAND_MFR_FUJITSU 0x04
-#define NAND_MFR_NATIONAL 0x8f
-#define NAND_MFR_RENESAS 0x07
-#define NAND_MFR_STMICRO 0x20
-#define NAND_MFR_HYNIX 0xad
-#define NAND_MFR_MICRON 0x2c
-
/* m25p80.c */
#define TYPE_M25P80 "m25p80-generic"
diff --git a/include/hw/boards.h b/include/hw/boards.h
index a7b1fcf..f424b2b 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -286,8 +286,7 @@ struct MachineClass {
no_parallel:1,
no_floppy:1,
no_cdrom:1,
- pci_allow_0_address:1,
- legacy_fw_cfg_order:1;
+ pci_allow_0_address:1;
bool auto_create_sdcard;
bool is_default;
const char *default_machine_opts;
@@ -863,10 +862,4 @@ extern const size_t hw_compat_2_7_len;
extern GlobalProperty hw_compat_2_6[];
extern const size_t hw_compat_2_6_len;
-extern GlobalProperty hw_compat_2_5[];
-extern const size_t hw_compat_2_5_len;
-
-extern GlobalProperty hw_compat_2_4[];
-extern const size_t hw_compat_2_4_len;
-
#endif
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 9563674..79b72c5 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -107,7 +107,6 @@ struct PCMachineClass {
/* RAM / address space compat: */
bool gigabyte_align;
bool has_reserved_memory;
- bool broken_reserved_end;
bool enforce_amd_1tb_hole;
bool isa_bios_alias;
@@ -299,12 +298,6 @@ extern const size_t pc_compat_2_7_len;
extern GlobalProperty pc_compat_2_6[];
extern const size_t pc_compat_2_6_len;
-extern GlobalProperty pc_compat_2_5[];
-extern const size_t pc_compat_2_5_len;
-
-extern GlobalProperty pc_compat_2_4[];
-extern const size_t pc_compat_2_4_len;
-
#define DEFINE_PC_MACHINE(suffix, namestr, initfn, optsfn) \
static void pc_machine_##suffix##_class_init(ObjectClass *oc, \
const void *data) \
diff --git a/include/hw/i386/tdvf.h b/include/hw/i386/tdvf.h
new file mode 100644
index 0000000..e75c8d1
--- /dev/null
+++ b/include/hw/i386/tdvf.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2025 Intel Corporation
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ * <isaku.yamahata at intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_I386_TDVF_H
+#define HW_I386_TDVF_H
+
+#include "qemu/osdep.h"
+
+#define TDVF_SECTION_TYPE_BFV 0
+#define TDVF_SECTION_TYPE_CFV 1
+#define TDVF_SECTION_TYPE_TD_HOB 2
+#define TDVF_SECTION_TYPE_TEMP_MEM 3
+
+#define TDVF_SECTION_ATTRIBUTES_MR_EXTEND (1U << 0)
+#define TDVF_SECTION_ATTRIBUTES_PAGE_AUG (1U << 1)
+
+typedef struct TdxFirmwareEntry {
+ uint32_t data_offset;
+ uint32_t data_len;
+ uint64_t address;
+ uint64_t size;
+ uint32_t type;
+ uint32_t attributes;
+
+ void *mem_ptr;
+} TdxFirmwareEntry;
+
+typedef struct TdxFirmware {
+ void *mem_ptr;
+
+ uint32_t nr_entries;
+ TdxFirmwareEntry *entries;
+} TdxFirmware;
+
+#define for_each_tdx_fw_entry(fw, e) \
+ for (e = (fw)->entries; e != (fw)->entries + (fw)->nr_entries; e++)
+
+int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size);
+
+#endif /* HW_I386_TDVF_H */
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index 258b134..fc460b8 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -27,13 +27,8 @@
#include "qom/object.h"
struct X86MachineClass {
- /*< private >*/
MachineClass parent;
- /*< public >*/
-
- /* TSC rate migration: */
- bool save_tsc_khz;
/* use DMA capable linuxboot option rom */
bool fwcfg_dma_enabled;
/* CPU and apic information: */
diff --git a/include/hw/loader.h b/include/hw/loader.h
index d280dc3..c96b5e1 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -270,8 +270,6 @@ int rom_add_elf_program(const char *name, GMappedFile *mapped_file, void *data,
AddressSpace *as);
int rom_check_and_register_reset(void);
void rom_set_fw(FWCfgState *f);
-void rom_set_order_override(int order);
-void rom_reset_order_override(void);
/**
* rom_transaction_begin:
diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h
index 47578cc..d41b932 100644
--- a/include/hw/nvram/fw_cfg.h
+++ b/include/hw/nvram/fw_cfg.h
@@ -42,14 +42,6 @@ struct FWCfgDataGeneratorClass {
typedef struct fw_cfg_file FWCfgFile;
-#define FW_CFG_ORDER_OVERRIDE_VGA 70
-#define FW_CFG_ORDER_OVERRIDE_NIC 80
-#define FW_CFG_ORDER_OVERRIDE_USER 100
-#define FW_CFG_ORDER_OVERRIDE_DEVICE 110
-
-void fw_cfg_set_order_override(FWCfgState *fw_cfg, int order);
-void fw_cfg_reset_order_override(FWCfgState *fw_cfg);
-
typedef struct FWCfgFiles {
uint32_t count;
FWCfgFile f[];
@@ -75,8 +67,6 @@ struct FWCfgState {
uint32_t cur_offset;
Notifier machine_ready;
- int fw_cfg_order_override;
-
bool dma_enabled;
dma_addr_t dma_addr;
AddressSpace *dma_as;
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
index 3d392b0..9d37f86 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -78,7 +78,7 @@ void vfio_address_space_insert(VFIOAddressSpace *space,
int vfio_container_dma_map(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly);
+ void *vaddr, bool readonly, MemoryRegion *mr);
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb, bool unmap_all);
@@ -115,13 +115,57 @@ OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU)
struct VFIOIOMMUClass {
ObjectClass parent_class;
- /* basic feature */
+ /**
+ * @setup
+ *
+ * Perform basic setup of the container, including configuring IOMMU
+ * capabilities, IOVA ranges, supported page sizes, etc.
+ *
+ * @bcontainer: #VFIOContainerBase
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns true to indicate success and false for error.
+ */
bool (*setup)(VFIOContainerBase *bcontainer, Error **errp);
+
+ /**
+ * @listener_begin
+ *
+ * Called at the beginning of an address space update transaction.
+ * See #MemoryListener.
+ *
+ * @bcontainer: #VFIOContainerBase
+ */
void (*listener_begin)(VFIOContainerBase *bcontainer);
+
+ /**
+ * @listener_commit
+ *
+ * Called at the end of an address space update transaction,
+ * See #MemoryListener.
+ *
+ * @bcontainer: #VFIOContainerBase
+ */
void (*listener_commit)(VFIOContainerBase *bcontainer);
+
+ /**
+ * @dma_map
+ *
+ * Map an address range into the container. Note that the memory region is
+ * referenced within an RCU read lock region across this call.
+ *
+ * @bcontainer: #VFIOContainerBase to use
+ * @iova: start address to map
+ * @size: size of the range to map
+ * @vaddr: process virtual address of mapping
+ * @readonly: true if mapping should be readonly
+ * @mr: the memory region for this mapping
+ *
+ * Returns 0 to indicate success and -errno otherwise.
+ */
int (*dma_map)(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly);
+ void *vaddr, bool readonly, MemoryRegion *mr);
/**
* @dma_unmap
*
@@ -132,12 +176,38 @@ struct VFIOIOMMUClass {
* @size: size of the range to unmap
* @iotlb: The IOMMU TLB mapping entry (or NULL)
* @unmap_all: if set, unmap the entire address space
+ *
+ * Returns 0 to indicate success and -errno otherwise.
*/
int (*dma_unmap)(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb, bool unmap_all);
+
+
+ /**
+ * @attach_device
+ *
+ * Associate the given device with a container and do some related
+ * initialization of the device context.
+ *
+ * @name: name of the device
+ * @vbasedev: the device
+ * @as: address space to use
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns true to indicate success and false for error.
+ */
bool (*attach_device)(const char *name, VFIODevice *vbasedev,
AddressSpace *as, Error **errp);
+
+ /*
+ * @detach_device
+ *
+ * Detach the given device from its container and clean up any necessary
+ * state.
+ *
+ * @vbasedev: the device to disassociate
+ */
void (*detach_device)(VFIODevice *vbasedev);
/* migration feature */
@@ -152,7 +222,7 @@ struct VFIOIOMMUClass {
* @start: indicates whether to start or stop dirty pages tracking
* @errp: pointer to Error*, to store an error if it happens.
*
- * Returns zero to indicate success and negative for error
+ * Returns zero to indicate success and negative for error.
*/
int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer,
bool start, Error **errp);
@@ -167,7 +237,7 @@ struct VFIOIOMMUClass {
* @size: size of iova range
* @errp: pointer to Error*, to store an error if it happens.
*
- * Returns zero to indicate success and negative for error
+ * Returns zero to indicate success and negative for error.
*/
int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer,
VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp);
@@ -183,4 +253,7 @@ struct VFIOIOMMUClass {
void (*release)(VFIOContainerBase *bcontainer);
};
+VFIORamDiscardListener *vfio_find_ram_discard_listener(
+ VFIOContainerBase *bcontainer, MemoryRegionSection *section);
+
#endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */
diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h
new file mode 100644
index 0000000..750ea5b
--- /dev/null
+++ b/include/hw/vfio/vfio-cpr.h
@@ -0,0 +1,18 @@
+/*
+ * VFIO CPR
+ *
+ * Copyright (c) 2025 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VFIO_VFIO_CPR_H
+#define HW_VFIO_VFIO_CPR_H
+
+struct VFIOContainerBase;
+
+bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer,
+ Error **errp);
+void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer);
+
+#endif /* HW_VFIO_VFIO_CPR_H */
diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h
index 1dbc385..eab5394 100644
--- a/include/hw/virtio/virtio-pci.h
+++ b/include/hw/virtio/virtio-pci.h
@@ -32,9 +32,7 @@ DECLARE_OBJ_CHECKERS(VirtioPCIBusState, VirtioPCIBusClass,
enum {
VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT,
VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT,
- VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT,
VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT,
- VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT,
VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT,
VIRTIO_PCI_FLAG_ATS_BIT,
VIRTIO_PCI_FLAG_INIT_DEVERR_BIT,
@@ -54,12 +52,6 @@ enum {
* vcpu thread using ioeventfd for some devices. */
#define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
-/* virtio version flags */
-#define VIRTIO_PCI_FLAG_DISABLE_PCIE (1 << VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT)
-
-/* migrate extra state */
-#define VIRTIO_PCI_FLAG_MIGRATE_EXTRA (1 << VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT)
-
/* have pio notification for modern device ? */
#define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY \
(1 << VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT)
diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h
index ab15577..a88cf8b 100644
--- a/include/io/channel-socket.h
+++ b/include/io/channel-socket.h
@@ -261,5 +261,18 @@ QIOChannelSocket *
qio_channel_socket_accept(QIOChannelSocket *ioc,
Error **errp);
+/**
+ * qio_channel_socket_set_send_buffer:
+ * @ioc: the socket channel object
+ * @size: buffer size
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Set the underlying socket send buffer size.
+ *
+ * Retruns: 0 on success, or -1 on error.
+ */
+int qio_channel_socket_set_send_buffer(QIOChannelSocket *ioc,
+ size_t size,
+ Error **errp);
#endif /* QIO_CHANNEL_SOCKET_H */
diff --git a/include/standard-headers/uefi/uefi.h b/include/standard-headers/uefi/uefi.h
new file mode 100644
index 0000000..5256349
--- /dev/null
+++ b/include/standard-headers/uefi/uefi.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2025 Intel Corporation
+ *
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ * <isaku.yamahata at intel.com>
+ * Xiaoyao Li <xiaoyao.li@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_I386_UEFI_H
+#define HW_I386_UEFI_H
+
+/***************************************************************************/
+/*
+ * basic EFI definitions
+ * supplemented with UEFI Specification Version 2.8 (Errata A)
+ * released February 2020
+ */
+/* UEFI integer is little endian */
+
+typedef struct {
+ uint32_t Data1;
+ uint16_t Data2;
+ uint16_t Data3;
+ uint8_t Data4[8];
+} EFI_GUID;
+
+typedef enum {
+ EfiReservedMemoryType,
+ EfiLoaderCode,
+ EfiLoaderData,
+ EfiBootServicesCode,
+ EfiBootServicesData,
+ EfiRuntimeServicesCode,
+ EfiRuntimeServicesData,
+ EfiConventionalMemory,
+ EfiUnusableMemory,
+ EfiACPIReclaimMemory,
+ EfiACPIMemoryNVS,
+ EfiMemoryMappedIO,
+ EfiMemoryMappedIOPortSpace,
+ EfiPalCode,
+ EfiPersistentMemory,
+ EfiUnacceptedMemoryType,
+ EfiMaxMemoryType
+} EFI_MEMORY_TYPE;
+
+#define EFI_HOB_HANDOFF_TABLE_VERSION 0x0009
+
+#define EFI_HOB_TYPE_HANDOFF 0x0001
+#define EFI_HOB_TYPE_MEMORY_ALLOCATION 0x0002
+#define EFI_HOB_TYPE_RESOURCE_DESCRIPTOR 0x0003
+#define EFI_HOB_TYPE_GUID_EXTENSION 0x0004
+#define EFI_HOB_TYPE_FV 0x0005
+#define EFI_HOB_TYPE_CPU 0x0006
+#define EFI_HOB_TYPE_MEMORY_POOL 0x0007
+#define EFI_HOB_TYPE_FV2 0x0009
+#define EFI_HOB_TYPE_LOAD_PEIM_UNUSED 0x000A
+#define EFI_HOB_TYPE_UEFI_CAPSULE 0x000B
+#define EFI_HOB_TYPE_FV3 0x000C
+#define EFI_HOB_TYPE_UNUSED 0xFFFE
+#define EFI_HOB_TYPE_END_OF_HOB_LIST 0xFFFF
+
+typedef struct {
+ uint16_t HobType;
+ uint16_t HobLength;
+ uint32_t Reserved;
+} EFI_HOB_GENERIC_HEADER;
+
+typedef uint64_t EFI_PHYSICAL_ADDRESS;
+typedef uint32_t EFI_BOOT_MODE;
+
+typedef struct {
+ EFI_HOB_GENERIC_HEADER Header;
+ uint32_t Version;
+ EFI_BOOT_MODE BootMode;
+ EFI_PHYSICAL_ADDRESS EfiMemoryTop;
+ EFI_PHYSICAL_ADDRESS EfiMemoryBottom;
+ EFI_PHYSICAL_ADDRESS EfiFreeMemoryTop;
+ EFI_PHYSICAL_ADDRESS EfiFreeMemoryBottom;
+ EFI_PHYSICAL_ADDRESS EfiEndOfHobList;
+} EFI_HOB_HANDOFF_INFO_TABLE;
+
+#define EFI_RESOURCE_SYSTEM_MEMORY 0x00000000
+#define EFI_RESOURCE_MEMORY_MAPPED_IO 0x00000001
+#define EFI_RESOURCE_IO 0x00000002
+#define EFI_RESOURCE_FIRMWARE_DEVICE 0x00000003
+#define EFI_RESOURCE_MEMORY_MAPPED_IO_PORT 0x00000004
+#define EFI_RESOURCE_MEMORY_RESERVED 0x00000005
+#define EFI_RESOURCE_IO_RESERVED 0x00000006
+#define EFI_RESOURCE_MEMORY_UNACCEPTED 0x00000007
+#define EFI_RESOURCE_MAX_MEMORY_TYPE 0x00000008
+
+#define EFI_RESOURCE_ATTRIBUTE_PRESENT 0x00000001
+#define EFI_RESOURCE_ATTRIBUTE_INITIALIZED 0x00000002
+#define EFI_RESOURCE_ATTRIBUTE_TESTED 0x00000004
+#define EFI_RESOURCE_ATTRIBUTE_SINGLE_BIT_ECC 0x00000008
+#define EFI_RESOURCE_ATTRIBUTE_MULTIPLE_BIT_ECC 0x00000010
+#define EFI_RESOURCE_ATTRIBUTE_ECC_RESERVED_1 0x00000020
+#define EFI_RESOURCE_ATTRIBUTE_ECC_RESERVED_2 0x00000040
+#define EFI_RESOURCE_ATTRIBUTE_READ_PROTECTED 0x00000080
+#define EFI_RESOURCE_ATTRIBUTE_WRITE_PROTECTED 0x00000100
+#define EFI_RESOURCE_ATTRIBUTE_EXECUTION_PROTECTED 0x00000200
+#define EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE 0x00000400
+#define EFI_RESOURCE_ATTRIBUTE_WRITE_COMBINEABLE 0x00000800
+#define EFI_RESOURCE_ATTRIBUTE_WRITE_THROUGH_CACHEABLE 0x00001000
+#define EFI_RESOURCE_ATTRIBUTE_WRITE_BACK_CACHEABLE 0x00002000
+#define EFI_RESOURCE_ATTRIBUTE_16_BIT_IO 0x00004000
+#define EFI_RESOURCE_ATTRIBUTE_32_BIT_IO 0x00008000
+#define EFI_RESOURCE_ATTRIBUTE_64_BIT_IO 0x00010000
+#define EFI_RESOURCE_ATTRIBUTE_UNCACHED_EXPORTED 0x00020000
+#define EFI_RESOURCE_ATTRIBUTE_READ_ONLY_PROTECTED 0x00040000
+#define EFI_RESOURCE_ATTRIBUTE_READ_ONLY_PROTECTABLE 0x00080000
+#define EFI_RESOURCE_ATTRIBUTE_READ_PROTECTABLE 0x00100000
+#define EFI_RESOURCE_ATTRIBUTE_WRITE_PROTECTABLE 0x00200000
+#define EFI_RESOURCE_ATTRIBUTE_EXECUTION_PROTECTABLE 0x00400000
+#define EFI_RESOURCE_ATTRIBUTE_PERSISTENT 0x00800000
+#define EFI_RESOURCE_ATTRIBUTE_PERSISTABLE 0x01000000
+#define EFI_RESOURCE_ATTRIBUTE_MORE_RELIABLE 0x02000000
+
+typedef uint32_t EFI_RESOURCE_TYPE;
+typedef uint32_t EFI_RESOURCE_ATTRIBUTE_TYPE;
+
+typedef struct {
+ EFI_HOB_GENERIC_HEADER Header;
+ EFI_GUID Owner;
+ EFI_RESOURCE_TYPE ResourceType;
+ EFI_RESOURCE_ATTRIBUTE_TYPE ResourceAttribute;
+ EFI_PHYSICAL_ADDRESS PhysicalStart;
+ uint64_t ResourceLength;
+} EFI_HOB_RESOURCE_DESCRIPTOR;
+
+typedef struct {
+ EFI_HOB_GENERIC_HEADER Header;
+ EFI_GUID Name;
+
+ /* guid specific data follows */
+} EFI_HOB_GUID_TYPE;
+
+typedef struct {
+ EFI_HOB_GENERIC_HEADER Header;
+ EFI_PHYSICAL_ADDRESS BaseAddress;
+ uint64_t Length;
+} EFI_HOB_FIRMWARE_VOLUME;
+
+typedef struct {
+ EFI_HOB_GENERIC_HEADER Header;
+ EFI_PHYSICAL_ADDRESS BaseAddress;
+ uint64_t Length;
+ EFI_GUID FvName;
+ EFI_GUID FileName;
+} EFI_HOB_FIRMWARE_VOLUME2;
+
+typedef struct {
+ EFI_HOB_GENERIC_HEADER Header;
+ EFI_PHYSICAL_ADDRESS BaseAddress;
+ uint64_t Length;
+ uint32_t AuthenticationStatus;
+ bool ExtractedFv;
+ EFI_GUID FvName;
+ EFI_GUID FileName;
+} EFI_HOB_FIRMWARE_VOLUME3;
+
+typedef struct {
+ EFI_HOB_GENERIC_HEADER Header;
+ uint8_t SizeOfMemorySpace;
+ uint8_t SizeOfIoSpace;
+ uint8_t Reserved[6];
+} EFI_HOB_CPU;
+
+typedef struct {
+ EFI_HOB_GENERIC_HEADER Header;
+} EFI_HOB_MEMORY_POOL;
+
+typedef struct {
+ EFI_HOB_GENERIC_HEADER Header;
+
+ EFI_PHYSICAL_ADDRESS BaseAddress;
+ uint64_t Length;
+} EFI_HOB_UEFI_CAPSULE;
+
+#define EFI_HOB_OWNER_ZERO \
+ ((EFI_GUID){ 0x00000000, 0x0000, 0x0000, \
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } })
+
+#endif
diff --git a/include/system/host_iommu_device.h b/include/system/host_iommu_device.h
index 809cced..ab849a4 100644
--- a/include/system/host_iommu_device.h
+++ b/include/system/host_iommu_device.h
@@ -14,6 +14,13 @@
#include "qom/object.h"
#include "qapi/error.h"
+#ifdef CONFIG_LINUX
+#include "linux/iommufd.h"
+
+typedef union VendorCaps {
+ struct iommu_hw_info_vtd vtd;
+ struct iommu_hw_info_arm_smmuv3 smmuv3;
+} VendorCaps;
/**
* struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities.
@@ -22,11 +29,17 @@
*
* @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this represents
* the @out_capabilities value returned from IOMMU_GET_HW_INFO ioctl)
+ *
+ * @vendor_caps: host platform IOMMU vendor specific capabilities (e.g. on
+ * IOMMUFD this represents a user-space buffer filled by kernel
+ * with host IOMMU @type specific hardware information data)
*/
typedef struct HostIOMMUDeviceCaps {
uint32_t type;
uint64_t hw_caps;
+ VendorCaps vendor_caps;
} HostIOMMUDeviceCaps;
+#endif
#define TYPE_HOST_IOMMU_DEVICE "host-iommu-device"
OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE)
@@ -38,7 +51,9 @@ struct HostIOMMUDevice {
void *agent; /* pointer to agent device, ie. VFIO or VDPA device */
PCIBus *aliased_bus;
int aliased_devfn;
+#ifdef CONFIG_LINUX
HostIOMMUDeviceCaps caps;
+#endif
};
/**
diff --git a/include/system/iommufd.h b/include/system/iommufd.h
index cbab75b..283861b 100644
--- a/include/system/iommufd.h
+++ b/include/system/iommufd.h
@@ -61,6 +61,60 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id,
uint64_t iova, ram_addr_t size,
uint64_t page_size, uint64_t *data,
Error **errp);
+bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id,
+ uint32_t data_type, uint32_t entry_len,
+ uint32_t *entry_num, void *data,
+ Error **errp);
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
+OBJECT_DECLARE_TYPE(HostIOMMUDeviceIOMMUFD, HostIOMMUDeviceIOMMUFDClass,
+ HOST_IOMMU_DEVICE_IOMMUFD)
+
+/* Overload of the host IOMMU device for the iommufd backend */
+struct HostIOMMUDeviceIOMMUFD {
+ HostIOMMUDevice parent_obj;
+
+ IOMMUFDBackend *iommufd;
+ uint32_t devid;
+ uint32_t hwpt_id;
+};
+
+struct HostIOMMUDeviceIOMMUFDClass {
+ HostIOMMUDeviceClass parent_class;
+
+ /**
+ * @attach_hwpt: attach host IOMMU device to IOMMUFD hardware page table.
+ * VFIO and VDPA device can have different implementation.
+ *
+ * Mandatory callback.
+ *
+ * @idev: host IOMMU device backed by IOMMUFD backend.
+ *
+ * @hwpt_id: ID of IOMMUFD hardware page table.
+ *
+ * @errp: pass an Error out when attachment fails.
+ *
+ * Returns: true on success, false on failure.
+ */
+ bool (*attach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, uint32_t hwpt_id,
+ Error **errp);
+ /**
+ * @detach_hwpt: detach host IOMMU device from IOMMUFD hardware page table.
+ * VFIO and VDPA device can have different implementation.
+ *
+ * Mandatory callback.
+ *
+ * @idev: host IOMMU device backed by IOMMUFD backend.
+ *
+ * @errp: pass an Error out when attachment fails.
+ *
+ * Returns: true on success, false on failure.
+ */
+ bool (*detach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, Error **errp);
+};
+
+bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ uint32_t hwpt_id, Error **errp);
+bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ Error **errp);
#endif
diff --git a/include/system/kvm.h b/include/system/kvm.h
index b690dda..62ec131 100644
--- a/include/system/kvm.h
+++ b/include/system/kvm.h
@@ -376,6 +376,7 @@ int kvm_arch_get_default_type(MachineState *ms);
int kvm_arch_init(MachineState *ms, KVMState *s);
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp);
int kvm_arch_init_vcpu(CPUState *cpu);
int kvm_arch_destroy_vcpu(CPUState *cpu);
diff --git a/include/system/memory.h b/include/system/memory.h
index fc35a0d..0848690 100644
--- a/include/system/memory.h
+++ b/include/system/memory.h
@@ -739,21 +739,20 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
RamDiscardListener *rdl);
/**
- * memory_get_xlat_addr: Extract addresses from a TLB entry
+ * memory_translate_iotlb: Extract addresses from a TLB entry.
+ * Called with rcu_read_lock held.
*
* @iotlb: pointer to an #IOMMUTLBEntry
- * @vaddr: virtual address
- * @ram_addr: RAM address
- * @read_only: indicates if writes are allowed
- * @mr_has_discard_manager: indicates memory is controlled by a
- * RamDiscardManager
+ * @xlat_p: return the offset of the entry from the start of the returned
+ * MemoryRegion.
* @errp: pointer to Error*, to store an error if it happens.
*
- * Return: true on success, else false setting @errp with error.
+ * Return: On success, return the MemoryRegion containing the @iotlb translated
+ * addr. The MemoryRegion must not be accessed after rcu_read_unlock.
+ * On failure, return NULL, setting @errp with error.
*/
-bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
- ram_addr_t *ram_addr, bool *read_only,
- bool *mr_has_discard_manager, Error **errp);
+MemoryRegion *memory_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p,
+ Error **errp);
typedef struct CoalescedMemoryRange CoalescedMemoryRange;
typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
diff --git a/io/channel-socket.c b/io/channel-socket.c
index 088b49f..3b7ca92 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -78,6 +78,17 @@ qio_channel_socket_new(void)
return sioc;
}
+int qio_channel_socket_set_send_buffer(QIOChannelSocket *ioc,
+ size_t size,
+ Error **errp)
+{
+ if (setsockopt(ioc->fd, SOL_SOCKET, SO_SNDBUF, &size, sizeof(size)) < 0) {
+ error_setg_errno(errp, errno, "Unable to set socket send buffer size");
+ return -1;
+ }
+
+ return 0;
+}
static int
qio_channel_socket_set_fd(QIOChannelSocket *sioc,
diff --git a/meson.build b/meson.build
index fdad3fb..967a10e 100644
--- a/meson.build
+++ b/meson.build
@@ -106,6 +106,7 @@ if have_rust
endif
if have_rust
+ rustdoc = find_program('rustdoc', required: get_option('rust'))
bindgen = find_program('bindgen', required: get_option('rust'))
if not bindgen.found() or bindgen.version().version_compare('<0.60.0')
if get_option('rust').enabled()
@@ -3272,6 +3273,7 @@ config_devices_mak_list = []
config_devices_h = {}
config_target_h = {}
config_target_mak = {}
+config_base_arch_mak = {}
disassemblers = {
'alpha' : ['CONFIG_ALPHA_DIS'],
@@ -3463,6 +3465,11 @@ foreach target : target_dirs
config_all_devices += config_devices
endif
config_target_mak += {target: config_target}
+
+ # build a merged config for all targets with the same TARGET_BASE_ARCH
+ target_base_arch = config_target['TARGET_BASE_ARCH']
+ config_base_arch = config_base_arch_mak.get(target_base_arch, {}) + config_target
+ config_base_arch_mak += {target_base_arch: config_base_arch}
endforeach
target_dirs = actual_target_dirs
@@ -3718,14 +3725,12 @@ io_ss = ss.source_set()
qmp_ss = ss.source_set()
qom_ss = ss.source_set()
system_ss = ss.source_set()
-libsystem_ss = ss.source_set()
specific_fuzz_ss = ss.source_set()
specific_ss = ss.source_set()
rust_devices_ss = ss.source_set()
stub_ss = ss.source_set()
trace_ss = ss.source_set()
user_ss = ss.source_set()
-libuser_ss = ss.source_set()
util_ss = ss.source_set()
# accel modules
@@ -4102,30 +4107,20 @@ common_ss.add(hwcore)
system_ss.add(authz, blockdev, chardev, crypto, io, qmp)
common_ss.add(qom, qemuutil)
-common_ss.add_all(when: 'CONFIG_SYSTEM_ONLY', if_true: [system_ss])
-common_ss.add_all(when: 'CONFIG_USER_ONLY', if_true: user_ss)
-
-libuser_ss = libuser_ss.apply({})
libuser = static_library('user',
- libuser_ss.sources() + genh,
+ user_ss.all_sources() + genh,
c_args: ['-DCONFIG_USER_ONLY',
'-DCOMPILING_SYSTEM_VS_USER'],
- dependencies: libuser_ss.dependencies(),
+ include_directories: common_user_inc,
+ dependencies: user_ss.all_dependencies(),
build_by_default: false)
-libuser = declare_dependency(objects: libuser.extract_all_objects(recursive: false),
- dependencies: libuser_ss.dependencies())
-common_ss.add(when: 'CONFIG_USER_ONLY', if_true: libuser)
-libsystem_ss = libsystem_ss.apply({})
libsystem = static_library('system',
- libsystem_ss.sources() + genh,
+ system_ss.all_sources() + genh,
c_args: ['-DCONFIG_SOFTMMU',
'-DCOMPILING_SYSTEM_VS_USER'],
- dependencies: libsystem_ss.dependencies(),
+ dependencies: system_ss.all_dependencies(),
build_by_default: false)
-libsystem = declare_dependency(objects: libsystem.extract_all_objects(recursive: false),
- dependencies: libsystem_ss.dependencies())
-common_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_true: libsystem)
# Note that this library is never used directly (only through extract_objects)
# and is not built by default; therefore, source files not used by the build
@@ -4133,65 +4128,70 @@ common_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_true: libsystem)
common_all = static_library('common',
build_by_default: false,
sources: common_ss.all_sources() + genh,
- include_directories: common_user_inc,
implicit_include_directories: false,
dependencies: common_ss.all_dependencies())
# construct common libraries per base architecture
-hw_common_arch_libs = {}
target_common_arch_libs = {}
target_common_system_arch_libs = {}
-foreach target : target_dirs
- config_target = config_target_mak[target]
- target_base_arch = config_target['TARGET_BASE_ARCH']
+foreach target_base_arch, config_base_arch : config_base_arch_mak
target_inc = [include_directories('target' / target_base_arch)]
inc = [common_user_inc + target_inc]
+ target_common = common_ss.apply(config_base_arch, strict: false)
+ target_system = system_ss.apply(config_base_arch, strict: false)
+ target_user = user_ss.apply(config_base_arch, strict: false)
+ common_deps = []
+ system_deps = []
+ user_deps = []
+ foreach dep: target_common.dependencies()
+ common_deps += dep.partial_dependency(compile_args: true, includes: true)
+ endforeach
+ foreach dep: target_system.dependencies()
+ system_deps += dep.partial_dependency(compile_args: true, includes: true)
+ endforeach
+ foreach dep: target_user.dependencies()
+ user_deps += dep.partial_dependency(compile_args: true, includes: true)
+ endforeach
+
# prevent common code to access cpu compile time definition,
# but still allow access to cpu.h
target_c_args = ['-DCPU_DEFS_H']
target_system_c_args = target_c_args + ['-DCOMPILING_SYSTEM_VS_USER', '-DCONFIG_SOFTMMU']
- if target_base_arch in hw_common_arch
- if target_base_arch not in hw_common_arch_libs
- src = hw_common_arch[target_base_arch]
- lib = static_library(
- 'hw_' + target_base_arch,
- build_by_default: false,
- sources: src.all_sources() + genh,
- include_directories: inc,
- c_args: target_system_c_args,
- dependencies: src.all_dependencies())
- hw_common_arch_libs += {target_base_arch: lib}
- endif
- endif
-
if target_base_arch in target_common_arch
- if target_base_arch not in target_common_arch_libs
- src = target_common_arch[target_base_arch]
- lib = static_library(
- 'target_' + target_base_arch,
- build_by_default: false,
- sources: src.all_sources() + genh,
- include_directories: inc,
- c_args: target_c_args,
- dependencies: src.all_dependencies())
- target_common_arch_libs += {target_base_arch: lib}
+ src = target_common_arch[target_base_arch]
+ lib = static_library(
+ 'common_' + target_base_arch,
+ build_by_default: false,
+ sources: src.all_sources() + genh,
+ include_directories: inc,
+ c_args: target_c_args,
+ dependencies: src.all_dependencies() + common_deps +
+ system_deps + user_deps)
+ target_common_arch_libs += {target_base_arch: lib}
+ endif
+
+ # merge hw_common_arch in target_common_system_arch
+ if target_base_arch in hw_common_arch
+ hw_src = hw_common_arch[target_base_arch]
+ if target_base_arch in target_common_system_arch
+ target_common_system_arch[target_base_arch].add_all(hw_src)
+ else
+ target_common_system_arch += {target_base_arch: hw_src}
endif
endif
if target_base_arch in target_common_system_arch
- if target_base_arch not in target_common_system_arch_libs
- src = target_common_system_arch[target_base_arch]
- lib = static_library(
- 'target_system_' + target_base_arch,
- build_by_default: false,
- sources: src.all_sources() + genh,
- include_directories: inc,
- c_args: target_system_c_args,
- dependencies: src.all_dependencies())
- target_common_system_arch_libs += {target_base_arch: lib}
- endif
+ src = target_common_system_arch[target_base_arch]
+ lib = static_library(
+ 'system_' + target_base_arch,
+ build_by_default: false,
+ sources: src.all_sources() + genh,
+ include_directories: inc,
+ c_args: target_system_c_args,
+ dependencies: src.all_dependencies() + common_deps + system_deps)
+ target_common_system_arch_libs += {target_base_arch: lib}
endif
endforeach
@@ -4368,10 +4368,14 @@ foreach target : target_dirs
objects += lib.extract_objects(src.sources())
arch_deps += src.dependencies()
endif
- if target_type == 'system' and target_base_arch in hw_common_arch_libs
- src = hw_common_arch[target_base_arch].apply(config_target, strict: false)
- lib = hw_common_arch_libs[target_base_arch]
- objects += lib.extract_objects(src.sources())
+ if target_type == 'system'
+ src = system_ss.apply(config_target, strict: false)
+ objects += libsystem.extract_objects(src.sources())
+ arch_deps += src.dependencies()
+ endif
+ if target_type == 'user'
+ src = user_ss.apply(config_target, strict: false)
+ objects += libuser.extract_objects(src.sources())
arch_deps += src.dependencies()
endif
if target_type == 'system' and target_base_arch in target_common_system_arch_libs
@@ -4399,7 +4403,7 @@ foreach target : target_dirs
build_by_default: true,
build_always_stale: true)
rlib = static_library('rust_' + target.underscorify(),
- rlib_rs,
+ structured_sources([], {'.': rlib_rs}),
dependencies: target_rust.dependencies(),
override_options: ['rust_std=2021', 'build.rust_std=2021'],
rust_abi: 'c')
@@ -4753,6 +4757,7 @@ if have_rust
summary_info += {'Rust target': config_host['RUST_TARGET_TRIPLE']}
summary_info += {'rustc': ' '.join(rustc.cmd_array())}
summary_info += {'rustc version': rustc.version()}
+ summary_info += {'rustdoc': rustdoc}
summary_info += {'bindgen': bindgen.full_path()}
summary_info += {'bindgen version': bindgen.version()}
endif
diff --git a/nbd/client-connection.c b/nbd/client-connection.c
index b11e266..79ea97e 100644
--- a/nbd/client-connection.c
+++ b/nbd/client-connection.c
@@ -31,6 +31,8 @@
#include "qapi/clone-visitor.h"
#include "qemu/coroutine.h"
+#include "nbd/nbd-internal.h"
+
struct NBDClientConnection {
/* Initialization constants, never change */
SocketAddress *saddr; /* address to connect to */
@@ -140,6 +142,7 @@ static int nbd_connect(QIOChannelSocket *sioc, SocketAddress *addr,
return ret;
}
+ nbd_set_socket_send_buffer(sioc);
qio_channel_set_delay(QIO_CHANNEL(sioc), false);
if (!info) {
diff --git a/nbd/common.c b/nbd/common.c
index 589a748..2a133a6 100644
--- a/nbd/common.c
+++ b/nbd/common.c
@@ -18,6 +18,9 @@
#include "qemu/osdep.h"
#include "trace.h"
+#include "io/channel-socket.h"
+#include "qapi/error.h"
+#include "qemu/units.h"
#include "nbd-internal.h"
/* Discard length bytes from channel. Return -errno on failure and 0 on
@@ -264,3 +267,26 @@ const char *nbd_mode_lookup(NBDMode mode)
return "<unknown>";
}
}
+
+/*
+ * Testing shows that 2m send buffer is optimal. Changing the receive buffer
+ * size has no effect on performance.
+ * On Linux we need to increase net.core.wmem_max to make this effective.
+ */
+#if defined(__APPLE__) || defined(__linux__)
+#define UNIX_STREAM_SOCKET_SEND_BUFFER_SIZE (2 * MiB)
+#endif
+
+void nbd_set_socket_send_buffer(QIOChannelSocket *sioc)
+{
+#ifdef UNIX_STREAM_SOCKET_SEND_BUFFER_SIZE
+ if (sioc->localAddr.ss_family == AF_UNIX) {
+ size_t size = UNIX_STREAM_SOCKET_SEND_BUFFER_SIZE;
+ Error *errp = NULL;
+
+ if (qio_channel_socket_set_send_buffer(sioc, size, &errp) < 0) {
+ warn_report_err(errp);
+ }
+ }
+#endif /* UNIX_STREAM_SOCKET_SEND_BUFFER_SIZE */
+}
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 715d92d..6bafeef 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -74,4 +74,9 @@ static inline int nbd_write(QIOChannel *ioc, const void *buffer, size_t size,
int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);
+/* nbd_set_socket_send_buffer
+ * Set the socket send buffer size for optimal performance.
+ */
+void nbd_set_socket_send_buffer(QIOChannelSocket *sioc);
+
#endif
diff --git a/nbd/server.c b/nbd/server.c
index 2076fb2..d242be9 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -3291,6 +3291,8 @@ void nbd_client_new(QIOChannelSocket *sioc,
client->close_fn = close_fn;
client->owner = owner;
+ nbd_set_socket_send_buffer(sioc);
+
co = qemu_coroutine_create(nbd_co_client_start, client);
qemu_coroutine_enter(co);
}
diff --git a/plugins/meson.build b/plugins/meson.build
index 5383c7b..b20edfb 100644
--- a/plugins/meson.build
+++ b/plugins/meson.build
@@ -61,8 +61,8 @@ endif
user_ss.add(files('user.c', 'api-user.c'))
system_ss.add(files('system.c', 'api-system.c'))
-libuser_ss.add(files('api.c', 'core.c'))
-libsystem_ss.add(files('api.c', 'core.c'))
+user_ss.add(files('api.c', 'core.c'))
+system_ss.add(files('api.c', 'core.c'))
common_ss.add(files('loader.c'))
diff --git a/python/scripts/vendor.py b/python/scripts/vendor.py
index 0405e91..b47db00 100755
--- a/python/scripts/vendor.py
+++ b/python/scripts/vendor.py
@@ -41,8 +41,8 @@ def main() -> int:
parser.parse_args()
packages = {
- "meson==1.5.0":
- "52b34f4903b882df52ad0d533146d4b992c018ea77399f825579737672ae7b20",
+ "meson==1.8.1":
+ "374bbf71247e629475fc10b0bd2ef66fc418c2d8f4890572f74de0f97d0d42da",
}
vendor_dir = Path(__file__, "..", "..", "wheels").resolve()
diff --git a/python/wheels/meson-1.5.0-py3-none-any.whl b/python/wheels/meson-1.5.0-py3-none-any.whl
deleted file mode 100644
index c7edeb3..0000000
--- a/python/wheels/meson-1.5.0-py3-none-any.whl
+++ /dev/null
Binary files differ
diff --git a/python/wheels/meson-1.8.1-py3-none-any.whl b/python/wheels/meson-1.8.1-py3-none-any.whl
new file mode 100644
index 0000000..a885f0e
--- /dev/null
+++ b/python/wheels/meson-1.8.1-py3-none-any.whl
Binary files differ
diff --git a/pythondeps.toml b/pythondeps.toml
index 7eaaa0f..7884ab5 100644
--- a/pythondeps.toml
+++ b/pythondeps.toml
@@ -19,7 +19,7 @@
[meson]
# The install key should match the version in python/wheels/
-meson = { accepted = ">=1.5.0", installed = "1.5.0", canary = "meson" }
+meson = { accepted = ">=1.5.0", installed = "1.8.1", canary = "meson" }
pycotap = { accepted = ">=1.1.0", installed = "1.3.1" }
[docs]
diff --git a/qapi/acpi.json b/qapi/acpi.json
index 045dab6..2d53b82 100644
--- a/qapi/acpi.json
+++ b/qapi/acpi.json
@@ -80,7 +80,7 @@
##
# @ACPIOSTInfo:
#
-# OSPM Status Indication for a device For description of possible
+# OSPM Status Indication for a device. For description of possible
# values of @source and @status fields see "_OST (OSPM Status
# Indication)" chapter of ACPI5.0 spec.
#
diff --git a/qapi/audio.json b/qapi/audio.json
index 8de4430..16de231 100644
--- a/qapi/audio.json
+++ b/qapi/audio.json
@@ -309,9 +309,9 @@
#
# @name: name of the sink/source to use
#
-# @stream-name: name of the PulseAudio stream created by qemu. Can be
+# @stream-name: name of the PulseAudio stream created by QEMU. Can be
# used to identify the stream in PulseAudio when you create
-# multiple PulseAudio devices or run multiple qemu instances
+# multiple PulseAudio devices or run multiple QEMU instances
# (default: audiodev's id, since 4.2)
#
# @latency: latency you want PulseAudio to achieve in microseconds
@@ -353,9 +353,9 @@
#
# @name: name of the sink/source to use
#
-# @stream-name: name of the PipeWire stream created by qemu. Can be
+# @stream-name: name of the PipeWire stream created by QEMU. Can be
# used to identify the stream in PipeWire when you create multiple
-# PipeWire devices or run multiple qemu instances (default:
+# PipeWire devices or run multiple QEMU instances (default:
# audiodev's id)
#
# @latency: latency you want PipeWire to achieve in microseconds
diff --git a/qapi/block-core.json b/qapi/block-core.json
index b411511..1df6644 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -31,8 +31,8 @@
# @icount: Current instruction count. Appears when execution
# record/replay is enabled. Used for "time-traveling" to match
# the moment in the recorded execution with the snapshots. This
-# counter may be obtained through @query-replay command (since
-# 5.2)
+# counter may be obtained through @query-replay command
+# (since 5.2)
#
# Since: 1.3
##
@@ -488,7 +488,7 @@
#
# @active: true if the backend is active; typical cases for inactive backends
# are on the migration source instance after migration completes and on the
-# destination before it completes. (since: 10.0)
+# destination before it completes. (since: 10.0)
#
# @encrypted: true if the backing device is encrypted
#
@@ -510,11 +510,11 @@
#
# @bps_max: total throughput limit during bursts, in bytes (Since 1.7)
#
-# @bps_rd_max: read throughput limit during bursts, in bytes (Since
-# 1.7)
+# @bps_rd_max: read throughput limit during bursts, in bytes
+# (Since 1.7)
#
-# @bps_wr_max: write throughput limit during bursts, in bytes (Since
-# 1.7)
+# @bps_wr_max: write throughput limit during bursts, in bytes
+# (Since 1.7)
#
# @iops_max: total I/O operations per second during bursts, in bytes
# (Since 1.7)
@@ -951,11 +951,11 @@
# @unmap_operations: The number of unmap operations performed by the
# device (Since 4.2)
#
-# @rd_total_time_ns: Total time spent on reads in nanoseconds (since
-# 0.15).
+# @rd_total_time_ns: Total time spent on reads in nanoseconds
+# (since 0.15)
#
-# @wr_total_time_ns: Total time spent on writes in nanoseconds (since
-# 0.15).
+# @wr_total_time_ns: Total time spent on writes in nanoseconds
+# (since 0.15)
#
# @zone_append_total_time_ns: Total time spent on zone append writes
# in nanoseconds (since 8.1)
@@ -1322,8 +1322,8 @@
# @incremental: only copy data described by the dirty bitmap.
# (since: 2.4)
#
-# @bitmap: only copy data described by the dirty bitmap. (since: 4.2)
-# Behavior on completion is determined by the BitmapSyncMode.
+# @bitmap: only copy data described by the dirty bitmap. Behavior on
+# completion is determined by the BitmapSyncMode. (since: 4.2)
#
# Since: 1.3
##
@@ -1337,7 +1337,7 @@
# bitmap when used for data copy operations.
#
# @on-success: The bitmap is only synced when the operation is
-# successful. This is the behavior always used for 'INCREMENTAL'
+# successful. This is the behavior always used for incremental
# backups.
#
# @never: The bitmap is never synchronized with the operation, and is
@@ -1417,8 +1417,8 @@
# @auto-finalize: Job will finalize itself when PENDING, moving to the
# CONCLUDED state. (since 2.12)
#
-# @auto-dismiss: Job will dismiss itself when CONCLUDED, moving to the
-# NULL state and disappearing from the query list. (since 2.12)
+# @auto-dismiss: Job will dismiss itself when CONCLUDED, and
+# disappear. (since 2.12)
#
# @error: Error information if the job did not complete successfully.
# Not set if the job completed successfully. (since 2.12.1)
@@ -1502,15 +1502,15 @@
#
# @device: the name of the device to take a snapshot of.
#
-# @node-name: graph node name to generate the snapshot from (Since
-# 2.0)
+# @node-name: graph node name to generate the snapshot from
+# (Since 2.0)
#
# @snapshot-file: the target of the new overlay image. If the file
# exists, or if it is a device, the overlay will be created in the
# existing file/device. Otherwise, a new file will be created.
#
-# @snapshot-node-name: the graph node name of the new image (Since
-# 2.0)
+# @snapshot-node-name: the graph node name of the new image
+# (Since 2.0)
#
# @format: the format of the overlay image, default is 'qcow2'.
#
@@ -1589,7 +1589,7 @@
#
# @bitmap-mode: Specifies the type of data the bitmap should contain
# after the operation concludes. Must be present if a bitmap was
-# provided, Must NOT be present otherwise. (Since 4.2)
+# provided, must **not** be present otherwise. (Since 4.2)
#
# @compress: true to compress data, if the target format supports it.
# (default: false) (since 2.8)
@@ -1606,16 +1606,15 @@
# copy-before-write jobs; defaults to break-guest-write. (Since 10.1)
#
# @auto-finalize: When false, this job will wait in a PENDING state
-# after it has finished its work, waiting for @block-job-finalize
-# before making any block graph changes. When true, this job will
+# after it has finished its work, waiting for @job-finalize before
+# making any block graph changes. When true, this job will
# automatically perform its abort or commit actions. Defaults to
# true. (Since 2.12)
#
# @auto-dismiss: When false, this job will wait in a CONCLUDED state
# after it has completely ceased all work, and awaits
-# @block-job-dismiss. When true, this job will automatically
-# disappear from the query list without user intervention.
-# Defaults to true. (Since 2.12)
+# @job-dismiss. When true, this job will automatically disappear
+# without user intervention. Defaults to true. (Since 2.12)
#
# @filter-node-name: the node name that should be assigned to the
# filter driver that the backup job inserts into the graph above
@@ -1785,8 +1784,7 @@
# If top == base, that is an error. If top has no overlays on top of
# it, or if it is in use by a writer, the job will not be completed by
# itself. The user needs to complete the job with the
-# block-job-complete command after getting the ready event. (Since
-# 2.0)
+# job-complete command after getting the ready event. (Since 2.0)
#
# If the base image is smaller than top, then the base image will be
# resized to be the same size as top. If top is smaller than the base
@@ -1840,7 +1838,7 @@
# @speed: the maximum speed, in bytes per second
#
# @on-error: the action to take on an error. 'ignore' means that the
-# request should be retried. (default: report; Since: 5.0)
+# request should be retried. (default: report; since: 5.0)
#
# @filter-node-name: the node name that should be assigned to the
# filter driver that the commit job inserts into the graph above
@@ -1848,16 +1846,15 @@
# autogenerated. (Since: 2.9)
#
# @auto-finalize: When false, this job will wait in a PENDING state
-# after it has finished its work, waiting for @block-job-finalize
-# before making any block graph changes. When true, this job will
+# after it has finished its work, waiting for @job-finalize before
+# making any block graph changes. When true, this job will
# automatically perform its abort or commit actions. Defaults to
# true. (Since 3.1)
#
# @auto-dismiss: When false, this job will wait in a CONCLUDED state
# after it has completely ceased all work, and awaits
-# @block-job-dismiss. When true, this job will automatically
-# disappear from the query list without user intervention.
-# Defaults to true. (Since 3.1)
+# @job-dismiss. When true, this job will automatically disappear
+# without user intervention. Defaults to true. (Since 3.1)
#
# Features:
#
@@ -1895,7 +1892,7 @@
# The status of ongoing drive-backup operations can be checked with
# query-block-jobs where the BlockJobInfo.type field has the value
# 'backup'. The operation can be stopped before it has completed
-# using the block-job-cancel command.
+# using the job-cancel or block-job-cancel command.
#
# Features:
#
@@ -1926,7 +1923,7 @@
# The status of ongoing blockdev-backup operations can be checked with
# query-block-jobs where the BlockJobInfo.type field has the value
# 'backup'. The operation can be stopped before it has completed
-# using the block-job-cancel command.
+# using the job-cancel or block-job-cancel command.
#
# Errors:
# - If @device is not a valid block device, DeviceNotFound
@@ -2030,7 +2027,7 @@
#
# @id: Block graph node identifier. This @id is generated only for
# x-debug-query-block-graph and does not relate to any other
-# identifiers in Qemu.
+# identifiers in QEMU.
#
# @type: Type of graph node. Can be one of block-backend, block-job
# or block-driver-state.
@@ -2169,8 +2166,8 @@
# @format: the format of the new destination, default is to probe if
# @mode is 'existing', else the format of the source
#
-# @node-name: the new block driver state node name in the graph (Since
-# 2.1)
+# @node-name: the new block driver state node name in the graph
+# (Since 2.1)
#
# @replaces: with sync=full graph node name to be replaced by the new
# image when a whole image copy is done. This can be used to
@@ -2212,16 +2209,15 @@
# 'background' (Since: 3.0)
#
# @auto-finalize: When false, this job will wait in a PENDING state
-# after it has finished its work, waiting for @block-job-finalize
-# before making any block graph changes. When true, this job will
+# after it has finished its work, waiting for @job-finalize before
+# making any block graph changes. When true, this job will
# automatically perform its abort or commit actions. Defaults to
# true. (Since 3.1)
#
# @auto-dismiss: When false, this job will wait in a CONCLUDED state
# after it has completely ceased all work, and awaits
-# @block-job-dismiss. When true, this job will automatically
-# disappear from the query list without user intervention.
-# Defaults to true. (Since 3.1)
+# @job-dismiss. When true, this job will automatically disappear
+# without user intervention. Defaults to true. (Since 3.1)
#
# Since: 1.3
##
@@ -2531,16 +2527,15 @@
# 'background' (Since: 3.0)
#
# @auto-finalize: When false, this job will wait in a PENDING state
-# after it has finished its work, waiting for @block-job-finalize
-# before making any block graph changes. When true, this job will
+# after it has finished its work, waiting for @job-finalize before
+# making any block graph changes. When true, this job will
# automatically perform its abort or commit actions. Defaults to
# true. (Since 3.1)
#
# @auto-dismiss: When false, this job will wait in a CONCLUDED state
# after it has completely ceased all work, and awaits
-# @block-job-dismiss. When true, this job will automatically
-# disappear from the query list without user intervention.
-# Defaults to true. (Since 3.1)
+# @job-dismiss. When true, this job will automatically disappear
+# without user intervention. Defaults to true. (Since 3.1)
#
# @target-is-zero: Assume the destination reads as all zeroes before
# the mirror started. Setting this to true can speed up the
@@ -2593,11 +2588,11 @@
#
# @bps_max: total throughput limit during bursts, in bytes (Since 1.7)
#
-# @bps_rd_max: read throughput limit during bursts, in bytes (Since
-# 1.7)
+# @bps_rd_max: read throughput limit during bursts, in bytes
+# (Since 1.7)
#
-# @bps_wr_max: write throughput limit during bursts, in bytes (Since
-# 1.7)
+# @bps_wr_max: write throughput limit during bursts, in bytes
+# (Since 1.7)
#
# @iops_max: total I/O operations per second during bursts, in bytes
# (Since 1.7)
@@ -2667,7 +2662,7 @@
# @iops-total-max: I/O operations burst
#
# @iops-total-max-length: length of the iops-total-max burst period,
-# in seconds It must only be set if @iops-total-max is set as
+# in seconds. It must only be set if @iops-total-max is set as
# well.
#
# @iops-read: limit read operations per second
@@ -2675,14 +2670,14 @@
# @iops-read-max: I/O operations read burst
#
# @iops-read-max-length: length of the iops-read-max burst period, in
-# seconds It must only be set if @iops-read-max is set as well.
+# seconds. It must only be set if @iops-read-max is set as well.
#
# @iops-write: limit write operations per second
#
# @iops-write-max: I/O operations write burst
#
# @iops-write-max-length: length of the iops-write-max burst period,
-# in seconds It must only be set if @iops-write-max is set as
+# in seconds. It must only be set if @iops-write-max is set as
# well.
#
# @bps-total: limit total bytes per second
@@ -2697,14 +2692,14 @@
# @bps-read-max: total bytes read burst
#
# @bps-read-max-length: length of the bps-read-max burst period, in
-# seconds It must only be set if @bps-read-max is set as well.
+# seconds. It must only be set if @bps-read-max is set as well.
#
# @bps-write: limit write bytes per second
#
# @bps-write-max: total bytes write burst
#
# @bps-write-max-length: length of the bps-write-max burst period, in
-# seconds It must only be set if @bps-write-max is set as well.
+# seconds. It must only be set if @bps-write-max is set as well.
#
# @iops-size: when limiting by iops max size of an I/O in bytes
#
@@ -2789,12 +2784,12 @@
# immediately once streaming has started. The status of ongoing block
# streaming operations can be checked with query-block-jobs. The
# operation can be stopped before it has completed using the
-# block-job-cancel command.
+# job-cancel or block-job-cancel command.
#
# The node that receives the data is called the top image, can be
# located in any part of the chain (but always above the base image;
# see below) and can be specified using its device or node name.
-# Earlier qemu versions only allowed 'device' to name the top level
+# Earlier QEMU versions only allowed 'device' to name the top level
# node; presence of the 'base-node' parameter during introspection can
# be used as a witness of the enhanced semantics of 'device'.
#
@@ -2859,16 +2854,15 @@
# autogenerated. (Since: 6.0)
#
# @auto-finalize: When false, this job will wait in a PENDING state
-# after it has finished its work, waiting for @block-job-finalize
-# before making any block graph changes. When true, this job will
+# after it has finished its work, waiting for @job-finalize before
+# making any block graph changes. When true, this job will
# automatically perform its abort or commit actions. Defaults to
# true. (Since 3.1)
#
# @auto-dismiss: When false, this job will wait in a CONCLUDED state
# after it has completely ceased all work, and awaits
-# @block-job-dismiss. When true, this job will automatically
-# disappear from the query list without user intervention.
-# Defaults to true. (Since 3.1)
+# @job-dismiss. When true, this job will automatically disappear
+# without user intervention. Defaults to true. (Since 3.1)
#
# Errors:
# - If @device does not exist, DeviceNotFound.
@@ -3030,10 +3024,10 @@
# state. Completing the job in any other state is an error.
#
# This is supported only for drive mirroring, where it also switches
-# the device to write to the target path only. Note that drive
+# the device to write to the target path only. Note that drive
# mirroring includes drive-mirror, blockdev-mirror and block-commit
# job (only in case of "active commit", when the node being commited
-# is used by the guest). The ability to complete is signaled with a
+# is used by the guest). The ability to complete is signaled with a
# BLOCK_JOB_READY event.
#
# This command completes an active background block operation
@@ -3068,16 +3062,16 @@
#
# Deletes a job that is in the CONCLUDED state. This command only
# needs to be run explicitly for jobs that don't have automatic
-# dismiss enabled. In turn, automatic dismiss may be enabled only
+# dismiss enabled. In turn, automatic dismiss may be enabled only
# for jobs that have @auto-dismiss option, which are drive-backup,
# blockdev-backup, drive-mirror, blockdev-mirror, block-commit and
-# block-stream. @auto-dismiss is enabled by default for these
+# block-stream. @auto-dismiss is enabled by default for these
# jobs.
#
# This command will refuse to operate on any job that has not yet
-# reached its terminal state, JOB_STATUS_CONCLUDED. For jobs that
-# make use of the BLOCK_JOB_READY event, block-job-cancel or
-# block-job-complete will still need to be used as appropriate.
+# reached its terminal state, CONCLUDED. For jobs that make use of
+# the BLOCK_JOB_READY event, job-cancel, block-job-cancel or
+# job-complete will still need to be used as appropriate.
#
# @id: The job identifier.
#
@@ -3196,7 +3190,7 @@
#
# Selects the AIO backend to handle I/O requests
#
-# @threads: Use qemu's thread pool
+# @threads: Use QEMU's thread pool
#
# @native: Use native AIO backend (only Linux and Windows)
#
@@ -3415,8 +3409,8 @@
# Driver specific block device options for LUKS.
#
# @key-secret: the ID of a QCryptoSecret object providing the
-# decryption key (since 2.6). Mandatory except when doing a
-# metadata-only probe of the image.
+# decryption key. Mandatory except when doing a metadata-only
+# probe of the image. (since 2.6)
#
# @header: block device holding a detached LUKS header. (since 9.0)
#
@@ -3655,8 +3649,8 @@
# this feature. (since 2.5)
#
# @encrypt: Image decryption options. Mandatory for encrypted images,
-# except when doing a metadata-only probe of the image. (since
-# 2.10)
+# except when doing a metadata-only probe of the image.
+# (since 2.10)
#
# @data-file: reference to or definition of the external data file.
# This may only be specified for images that require an external
@@ -4326,8 +4320,8 @@
# @user: Ceph id name.
#
# @auth-client-required: Acceptable authentication modes. This maps
-# to Ceph configuration option "auth_client_required". (Since
-# 3.0)
+# to Ceph configuration option "auth_client_required".
+# (Since 3.0)
#
# @key-secret: ID of a QCryptoSecret object providing a key for cephx
# authentication. This maps to Ceph configuration option "key".
@@ -4581,8 +4575,8 @@
# error. During the first @reconnect-delay seconds, all requests
# are paused and will be rerun on a successful reconnect. After
# that time, any delayed requests and all future requests before a
-# successful reconnect will immediately fail. Default 0 (Since
-# 4.2)
+# successful reconnect will immediately fail. Default 0
+# (Since 4.2)
#
# @open-timeout: In seconds. If zero, the nbd driver tries the
# connection only once, and fails to open if the connection fails.
@@ -4724,11 +4718,11 @@
#
# @driver: block driver name
#
-# @node-name: the node name of the new node (Since 2.0). This option
-# is required on the top level of blockdev-add. Valid node names
-# start with an alphabetic character and may contain only
-# alphanumeric characters, '-', '.' and '_'. Their maximum length
-# is 31 characters.
+# @node-name: the node name of the new node. This option is required
+# on the top level of blockdev-add. Valid node names start with
+# an alphabetic character and may contain only alphanumeric
+# characters, '-', '.' and '_'. Their maximum length is 31
+# characters. (Since 2.0)
#
# @discard: discard-related options (default: ignore)
#
@@ -4737,7 +4731,7 @@
# @active: whether the block node should be activated (default: true).
# Having inactive block nodes is useful primarily for migration because it
# allows opening an image on the destination while the source is still
-# holding locks for it. (Since 10.0)
+# holding locks for it. (Since 10.0)
#
# @read-only: whether the block device should be read-only (default:
# false). Note that some block drivers support only read-only
@@ -4947,7 +4941,7 @@
# 3) A reference to a different node: the current child is replaced
# with the specified one.
#
-# 4) NULL: the current child (if any) is detached.
+# 4) null: the current child (if any) is detached.
#
# Options (1) and (2) are supported in all cases. Option (3) is
# supported for @file and @backing, and option (4) for @backing only.
@@ -4999,14 +4993,14 @@
##
# @blockdev-set-active:
#
-# Activate or inactivate a block device. Use this to manage the handover of
+# Activate or inactivate a block device. Use this to manage the handover of
# block devices on migration with qemu-storage-daemon.
#
# Activating a node automatically activates all of its child nodes first.
# Inactivating a node automatically inactivates any of its child nodes that are
# not in use by a still active node.
#
-# @node-name: Name of the graph node to activate or inactivate. By default, all
+# @node-name: Name of the graph node to activate or inactivate. By default, all
# nodes are affected by the operation.
#
# @active: true if the nodes should be active when the command returns success,
@@ -5157,10 +5151,10 @@
##
# @BlockdevQcow2Version:
#
-# @v2: The original QCOW2 format as introduced in qemu 0.10 (version
+# @v2: The original QCOW2 format as introduced in QEMU 0.10 (version
# 2)
#
-# @v3: The extended QCOW2 format as introduced in qemu 1.1 (version 3)
+# @v3: The extended QCOW2 format as introduced in QEMU 1.1 (version 3)
#
# Since: 2.12
##
@@ -5580,7 +5574,7 @@
# @x-blockdev-amend:
#
# Starts a job to amend format specific options of an existing open
-# block device The job is automatically finalized, but a manual
+# block device. The job is automatically finalized, but a manual
# job-dismiss is required.
#
# @job-id: Identifier for the newly created job.
@@ -5589,7 +5583,7 @@
#
# @options: Options (driver specific)
#
-# @force: Allow unsafe operations, format specific For luks that
+# @force: Allow unsafe operations, format specific. For luks that
# allows erase of the last active keyslot (permanent loss of
# data), and replacement of an active keyslot (possible loss of
# data if IO error happens)
@@ -5866,7 +5860,7 @@
# @BLOCK_JOB_PENDING:
#
# Emitted when a block job is awaiting explicit authorization to
-# finalize graph changes via @block-job-finalize. If this job is part
+# finalize graph changes via @job-finalize. If this job is part
# of a transaction, it will not emit this event until the transaction
# has converged first.
#
diff --git a/qapi/block-export.json b/qapi/block-export.json
index c783e01..ed4deb5 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -169,7 +169,7 @@
# @growable: Whether writes beyond the EOF should grow the block node
# accordingly. (default: false)
#
-# @allow-other: If this is off, only qemu's user is allowed access to
+# @allow-other: If this is off, only QEMU's user is allowed access to
# this export. That cannot be changed even with chmod or chown.
# Enabling this option will allow other users access to the export
# with the FUSE mount option "allow_other". Note that using
@@ -373,9 +373,9 @@
# (since: 5.2)
#
# @allow-inactive: If true, the export allows the exported node to be inactive.
-# If it is created for an inactive block node, the node remains inactive. If
+# If it is created for an inactive block node, the node remains inactive. If
# the export type doesn't support running on an inactive node, an error is
-# returned. If false, inactive block nodes are automatically activated before
+# returned. If false, inactive block nodes are automatically activated before
# creating the export and trying to inactivate them later fails.
# (since: 10.0; default: false)
#
diff --git a/qapi/block.json b/qapi/block.json
index f5374bd..1490a1a 100644
--- a/qapi/block.json
+++ b/qapi/block.json
@@ -48,7 +48,7 @@
##
# @FloppyDriveType:
#
-# Type of Floppy drive to be emulated by the Floppy Disk Controller.
+# Type of floppy drive to be emulated by the Floppy Disk Controller.
#
# @144: 1.44MB 3.5" drive
#
diff --git a/qapi/char.json b/qapi/char.json
index 447c10b..df6e325 100644
--- a/qapi/char.json
+++ b/qapi/char.json
@@ -274,7 +274,7 @@
# @reconnect: For a client socket, if a socket is disconnected, then
# attempt a reconnect after the given number of seconds. Setting
# this to zero disables this function. The use of this member is
-# deprecated, use @reconnect-ms instead. (default: 0) (Since: 2.2)
+# deprecated, use @reconnect-ms instead. (default: 0) (Since: 2.2)
#
# @reconnect-ms: For a client socket, if a socket is disconnected,
# then attempt a reconnect after the given number of milliseconds.
@@ -351,7 +351,7 @@
# Configuration info for stdio chardevs.
#
# @signal: Allow signals (such as SIGINT triggered by ^C) be delivered
-# to qemu. Default: true.
+# to QEMU. Default: true.
#
# Since: 1.5
##
@@ -443,7 +443,7 @@
##
# @ChardevQemuVDAgent:
#
-# Configuration info for qemu vdagent implementation.
+# Configuration info for QEMU vdagent implementation.
#
# @mouse: enable/disable mouse, default is enabled.
#
@@ -656,7 +656,7 @@
##
# @ChardevQemuVDAgentWrapper:
#
-# @data: Configuration info for qemu vdagent implementation
+# @data: Configuration info for QEMU vdagent implementation
#
# Since: 6.1
##
diff --git a/qapi/crypto.json b/qapi/crypto.json
index c9d967d..9ec6301 100644
--- a/qapi/crypto.json
+++ b/qapi/crypto.json
@@ -55,7 +55,8 @@
# @sha512: SHA-512. (since 2.7)
#
# @ripemd160: RIPEMD-160. (since 2.7)
-# @sm3: SM3. (since 9.2.0)
+#
+# @sm3: SM3. (since 9.2.0)
#
# Since: 2.6
##
@@ -202,19 +203,19 @@
#
# The options that apply to LUKS encryption format initialization
#
-# @cipher-alg: the cipher algorithm for data encryption Currently
+# @cipher-alg: the cipher algorithm for data encryption. Currently
# defaults to 'aes-256'.
#
-# @cipher-mode: the cipher mode for data encryption Currently defaults
-# to 'xts'
+# @cipher-mode: the cipher mode for data encryption. Currently
+# defaults to 'xts'
#
-# @ivgen-alg: the initialization vector generator Currently defaults
+# @ivgen-alg: the initialization vector generator. Currently defaults
# to 'plain64'
#
-# @ivgen-hash-alg: the initialization vector generator hash Currently
-# defaults to 'sha256'
+# @ivgen-hash-alg: the initialization vector generator hash.
+# Currently defaults to 'sha256'
#
-# @hash-alg: the master key hash algorithm Currently defaults to
+# @hash-alg: the master key hash algorithm. Currently defaults to
# 'sha256'
#
# @iter-time: number of milliseconds to spend in PBKDF passphrase
@@ -370,11 +371,11 @@
# @new-secret: The ID of a QCryptoSecret object providing the password
# to be written into added active keyslots
#
-# @old-secret: Optional (for deactivation only) If given will
+# @old-secret: Optional (for deactivation only). If given will
# deactivate all keyslots that match password located in
# QCryptoSecret with this ID
#
-# @iter-time: Optional (for activation only) Number of milliseconds to
+# @iter-time: Optional (for activation only). Number of milliseconds to
# spend in PBKDF passphrase processing for the newly activated
# keyslot. Currently defaults to 2000.
#
diff --git a/qapi/cryptodev.json b/qapi/cryptodev.json
index 28b97eb..b13db26 100644
--- a/qapi/cryptodev.json
+++ b/qapi/cryptodev.json
@@ -15,7 +15,7 @@
#
# @sym: symmetric encryption
#
-# @asym: asymmetric Encryption
+# @asym: asymmetric encryption
#
# Since: 8.0
##
diff --git a/qapi/cxl.json b/qapi/cxl.json
index dd947d3..8f2e923 100644
--- a/qapi/cxl.json
+++ b/qapi/cxl.json
@@ -117,7 +117,7 @@
# @nibble-mask: Identifies one or more nibbles that the error affects
#
# @bank-group: Bank group of the memory event location, incorporating
-# a number of Banks.
+# a number of banks.
#
# @bank: Bank of the memory event location. A single bank is accessed
# per read or write of the memory.
diff --git a/qapi/dump.json b/qapi/dump.json
index f2835c0..d0ba1f0 100644
--- a/qapi/dump.json
+++ b/qapi/dump.json
@@ -54,9 +54,9 @@
# @paging: if true, do paging to get guest's memory mapping. This
# allows using gdb to process the core file.
#
-# IMPORTANT: this option can make QEMU allocate several gigabytes
-# of RAM. This can happen for a large guest, or a malicious guest
-# pretending to be large.
+# **Important**: this option can make QEMU allocate several
+# gigabytes of RAM. This can happen for a large guest, or a
+# malicious guest pretending to be large.
#
# Also, paging=true has the following limitations:
#
diff --git a/qapi/introspect.json b/qapi/introspect.json
index 01bb242..e9e0297 100644
--- a/qapi/introspect.json
+++ b/qapi/introspect.json
@@ -26,9 +26,9 @@
# the QAPI schema.
#
# Furthermore, while we strive to keep the QMP wire format
-# backwards-compatible across qemu versions, the introspection output
+# backwards-compatible across QEMU versions, the introspection output
# is not guaranteed to have the same stability. For example, one
-# version of qemu may list an object member as an optional
+# version of QEMU may list an object member as an optional
# non-variant, while another lists the same member only through the
# object's variants; or the type of a member may change from a generic
# string into a specific enum or from one specific type into an
@@ -154,8 +154,8 @@
#
# Additional SchemaInfo members for meta-type 'enum'.
#
-# @members: the enum type's members, in no particular order (since
-# 6.2).
+# @members: the enum type's members, in no particular order.
+# (since 6.2)
#
# @values: the enumeration type's member names, in no particular
# order. Redundant with @members. Just for backward
diff --git a/qapi/job.json b/qapi/job.json
index b03f80b..126fa5c 100644
--- a/qapi/job.json
+++ b/qapi/job.json
@@ -20,14 +20,14 @@
#
# @create: image creation job type, see "blockdev-create" (since 3.0)
#
-# @amend: image options amend job type, see "x-blockdev-amend" (since
-# 5.1)
+# @amend: image options amend job type, see "x-blockdev-amend"
+# (since 5.1)
#
-# @snapshot-load: snapshot load job type, see "snapshot-load" (since
-# 6.0)
+# @snapshot-load: snapshot load job type, see "snapshot-load"
+# (since 6.0)
#
-# @snapshot-save: snapshot save job type, see "snapshot-save" (since
-# 6.0)
+# @snapshot-save: snapshot save job type, see "snapshot-save"
+# (since 6.0)
#
# @snapshot-delete: snapshot delete job type, see "snapshot-delete"
# (since 6.0)
@@ -74,7 +74,7 @@
# process.
#
# @concluded: The job has finished all work. If auto-dismiss was set
-# to false, the job will remain in the query list until it is
+# to false, the job will remain in this state until it is
# dismissed via @job-dismiss.
#
# @null: The job is in the process of being dismantled. This state
@@ -191,10 +191,10 @@
# state. Completing the job in any other state is an error.
#
# This is supported only for drive mirroring, where it also switches
-# the device to write to the target path only. Note that drive
+# the device to write to the target path only. Note that drive
# mirroring includes drive-mirror, blockdev-mirror and block-commit
# job (only in case of "active commit", when the node being commited
-# is used by the guest). The ability to complete is signaled with a
+# is used by the guest). The ability to complete is signaled with a
# BLOCK_JOB_READY event.
#
# This command completes an active background block operation
@@ -216,16 +216,16 @@
#
# Deletes a job that is in the CONCLUDED state. This command only
# needs to be run explicitly for jobs that don't have automatic
-# dismiss enabled. In turn, automatic dismiss may be enabled only
+# dismiss enabled. In turn, automatic dismiss may be enabled only
# for jobs that have @auto-dismiss option, which are drive-backup,
# blockdev-backup, drive-mirror, blockdev-mirror, block-commit and
-# block-stream. @auto-dismiss is enabled by default for these
+# block-stream. @auto-dismiss is enabled by default for these
# jobs.
#
# This command will refuse to operate on any job that has not yet
-# reached its terminal state, JOB_STATUS_CONCLUDED. For jobs that
-# make use of JOB_READY event, job-cancel or job-complete will still
-# need to be used as appropriate.
+# reached its terminal state, CONCLUDED. For jobs that make use of
+# the JOB_READY event, job-cancel or job-complete will still need to
+# be used as appropriate.
#
# @id: The job identifier.
#
diff --git a/qapi/machine.json b/qapi/machine.json
index 5373e13..0650b8d 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -182,8 +182,8 @@
# @default-cpu-type: default CPU model typename if none is requested
# via the -cpu argument. (since 4.2)
#
-# @default-ram-id: the default ID of initial RAM memory backend (since
-# 5.2)
+# @default-ram-id: the default ID of initial RAM memory backend
+# (since 5.2)
#
# @acpi: machine type supports ACPI (since 8.0)
#
@@ -694,7 +694,7 @@
# Structure of HMAT (Heterogeneous Memory Attribute Table)
#
# For more information about @HmatLBDataType, see chapter 5.2.27.4:
-# Table 5-146: Field "Data Type" of ACPI 6.3 spec.
+# Table 5-146: Field "Data Type" of ACPI 6.3 spec.
#
# @access-latency: access latency (nanoseconds)
#
@@ -811,7 +811,7 @@
#
# @policy: the write policy, none/write-back/write-through.
#
-# @line: the cache Line size in bytes.
+# @line: the cache line size in bytes.
#
# Since: 5.0
##
@@ -1089,7 +1089,7 @@
# :annotated:
#
# For s390x-virtio-ccw machine type started with
-# ``-smp 1,maxcpus=2 -cpu qemu`` (Since: 2.11)::
+# ``-smp 1,maxcpus=2 -cpu qemu``::
#
# -> { "execute": "query-hotpluggable-cpus" }
# <- {"return": [
@@ -1160,7 +1160,7 @@
#
# Information about the guest balloon device.
#
-# @actual: the logical size of the VM in bytes Formula used:
+# @actual: the logical size of the VM in bytes. Formula used:
# logical_vm_size = vm_ram_size - balloon_size
#
# Since: 0.14
@@ -1199,7 +1199,7 @@
# is equivalent to the @actual field return by the 'query-balloon'
# command
#
-# @actual: the logical size of the VM in bytes Formula used:
+# @actual: the logical size of the VM in bytes. Formula used:
# logical_vm_size = vm_ram_size - balloon_size
#
# .. note:: This event is rate-limited.
diff --git a/qapi/migration.json b/qapi/migration.json
index 41826bd..4963f6c 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -57,8 +57,8 @@
#
# @dirty-sync-missed-zero-copy: Number of times dirty RAM
# synchronization could not avoid copying dirty pages. This is
-# between 0 and @dirty-sync-count * @multifd-channels. (since
-# 7.1)
+# between 0 and @dirty-sync-count * @multifd-channels.
+# (since 7.1)
#
# Since: 0.14
##
@@ -137,16 +137,16 @@
#
# @active: in the process of doing migration.
#
-# @postcopy-active: like active, but now in postcopy mode. (since
-# 2.5)
+# @postcopy-active: like active, but now in postcopy mode.
+# (since 2.5)
#
# @postcopy-paused: during postcopy but paused. (since 3.0)
#
# @postcopy-recover-setup: setup phase for a postcopy recovery
# process, preparing for a recovery phase to start. (since 9.1)
#
-# @postcopy-recover: trying to recover from a paused postcopy. (since
-# 3.0)
+# @postcopy-recover: trying to recover from a paused postcopy.
+# (since 3.0)
#
# @completed: migration is finished.
#
@@ -407,7 +407,7 @@
# @postcopy-ram: Start executing on the migration target before all of
# RAM has been migrated, pulling the remaining pages along as
# needed. The capacity must have the same setting on both source
-# and target or migration will not even start. NOTE: If the
+# and target or migration will not even start. **Note:** if the
# migration fails during postcopy the VM will fail. (since 2.6)
#
# @x-colo: If enabled, migration will never end, and the state of the
@@ -415,15 +415,15 @@
# on secondary side, this process is called COarse-Grain LOck
# Stepping (COLO) for Non-stop Service. (since 2.8)
#
-# @release-ram: if enabled, qemu will free the migrated ram pages on
+# @release-ram: if enabled, QEMU will free the migrated ram pages on
# the source during postcopy-ram migration. (since 2.9)
#
# @return-path: If enabled, migration will use the return path even
# for precopy. (since 2.10)
#
# @pause-before-switchover: Pause outgoing migration before
-# serialising device state and before disabling block IO (since
-# 2.11)
+# serialising device state and before disabling block IO
+# (since 2.11)
#
# @multifd: Use more than one fd for migration (since 4.0)
#
@@ -697,8 +697,8 @@
# @alias: An alias name for migration (for example the bitmap name on
# the opposite site).
#
-# @transform: Allows the modification of the migrated bitmap. (since
-# 6.0)
+# @transform: Allows the modification of the migrated bitmap.
+# (since 6.0)
#
# Since: 5.2
##
@@ -760,9 +760,9 @@
# auto-converge detects that migration is not making progress.
# The default value is 10. (Since 2.7)
#
-# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage At
-# the tail stage of throttling, the Guest is very sensitive to CPU
-# percentage while the @cpu-throttle -increment is excessive
+# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage.
+# At the tail stage of throttling, the Guest is very sensitive to
+# CPU percentage while the @cpu-throttle -increment is excessive
# usually at tail stage. If this parameter is true, we will
# compute the ideal CPU percentage used by the Guest, which may
# exactly make the dirty rate match the dirty rate threshold.
@@ -770,8 +770,8 @@
# specified by @cpu-throttle-increment and the one generated by
# ideal CPU percentage. Therefore, it is compatible to
# traditional throttling, meanwhile the throttle increment won't
-# be excessive at tail stage. The default value is false. (Since
-# 5.1)
+# be excessive at tail stage. The default value is false.
+# (Since 5.1)
#
# @tls-creds: ID of the 'tls-creds' object that provides credentials
# for establishing a TLS connection over the migration data
@@ -801,10 +801,10 @@
# (Since 2.8)
#
# @avail-switchover-bandwidth: to set the available bandwidth that
-# migration can use during switchover phase. NOTE! This does not
-# limit the bandwidth during switchover, but only for calculations
-# when making decisions to switchover. By default, this value is
-# zero, which means QEMU will estimate the bandwidth
+# migration can use during switchover phase. **Note:** this does
+# not limit the bandwidth during switchover, but only for
+# calculations when making decisions to switchover. By default,
+# this value is zero, which means QEMU will estimate the bandwidth
# automatically. This can be set when the estimated value is not
# accurate, while the user is able to guarantee such bandwidth is
# available when switching over. When specified correctly, this
@@ -842,9 +842,9 @@
# more CPU. Defaults to 1. (Since 5.0)
#
# @multifd-qatzip-level: Set the compression level to be used in live
-# migration. The level is an integer between 1 and 9, where 1 means
+# migration. The level is an integer between 1 and 9, where 1 means
# the best compression speed, and 9 means the best compression
-# ratio which will consume more CPU. Defaults to 1. (Since 9.2)
+# ratio which will consume more CPU. Defaults to 1. (Since 9.2)
#
# @multifd-zstd-level: Set the compression level to be used in live
# migration, the compression level is an integer between 0 and 20,
@@ -941,9 +941,9 @@
# auto-converge detects that migration is not making progress.
# The default value is 10. (Since 2.7)
#
-# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage At
-# the tail stage of throttling, the Guest is very sensitive to CPU
-# percentage while the @cpu-throttle -increment is excessive
+# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage.
+# At the tail stage of throttling, the Guest is very sensitive to
+# CPU percentage while the @cpu-throttle -increment is excessive
# usually at tail stage. If this parameter is true, we will
# compute the ideal CPU percentage used by the Guest, which may
# exactly make the dirty rate match the dirty rate threshold.
@@ -951,8 +951,8 @@
# specified by @cpu-throttle-increment and the one generated by
# ideal CPU percentage. Therefore, it is compatible to
# traditional throttling, meanwhile the throttle increment won't
-# be excessive at tail stage. The default value is false. (Since
-# 5.1)
+# be excessive at tail stage. The default value is false.
+# (Since 5.1)
#
# @tls-creds: ID of the 'tls-creds' object that provides credentials
# for establishing a TLS connection over the migration data
@@ -982,10 +982,10 @@
# (Since 2.8)
#
# @avail-switchover-bandwidth: to set the available bandwidth that
-# migration can use during switchover phase. NOTE! This does not
-# limit the bandwidth during switchover, but only for calculations
-# when making decisions to switchover. By default, this value is
-# zero, which means QEMU will estimate the bandwidth
+# migration can use during switchover phase. **Note:** this does
+# not limit the bandwidth during switchover, but only for
+# calculations when making decisions to switchover. By default,
+# this value is zero, which means QEMU will estimate the bandwidth
# automatically. This can be set when the estimated value is not
# accurate, while the user is able to guarantee such bandwidth is
# available when switching over. When specified correctly, this
@@ -1023,9 +1023,9 @@
# more CPU. Defaults to 1. (Since 5.0)
#
# @multifd-qatzip-level: Set the compression level to be used in live
-# migration. The level is an integer between 1 and 9, where 1 means
+# migration. The level is an integer between 1 and 9, where 1 means
# the best compression speed, and 9 means the best compression
-# ratio which will consume more CPU. Defaults to 1. (Since 9.2)
+# ratio which will consume more CPU. Defaults to 1. (Since 9.2)
#
# @multifd-zstd-level: Set the compression level to be used in live
# migration, the compression level is an integer between 0 and 20,
@@ -1148,16 +1148,16 @@
# percentage. The default value is 50. (Since 5.0)
#
# @cpu-throttle-initial: Initial percentage of time guest cpus are
-# throttled when migration auto-converge is activated. (Since
-# 2.7)
+# throttled when migration auto-converge is activated.
+# (Since 2.7)
#
# @cpu-throttle-increment: throttle percentage increase each time
# auto-converge detects that migration is not making progress.
# (Since 2.7)
#
-# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage At
-# the tail stage of throttling, the Guest is very sensitive to CPU
-# percentage while the @cpu-throttle -increment is excessive
+# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage.
+# At the tail stage of throttling, the Guest is very sensitive to
+# CPU percentage while the @cpu-throttle -increment is excessive
# usually at tail stage. If this parameter is true, we will
# compute the ideal CPU percentage used by the Guest, which may
# exactly make the dirty rate match the dirty rate threshold.
@@ -1165,8 +1165,8 @@
# specified by @cpu-throttle-increment and the one generated by
# ideal CPU percentage. Therefore, it is compatible to
# traditional throttling, meanwhile the throttle increment won't
-# be excessive at tail stage. The default value is false. (Since
-# 5.1)
+# be excessive at tail stage. The default value is false.
+# (Since 5.1)
#
# @tls-creds: ID of the 'tls-creds' object that provides credentials
# for establishing a TLS connection over the migration data
@@ -1192,10 +1192,10 @@
# (Since 2.8)
#
# @avail-switchover-bandwidth: to set the available bandwidth that
-# migration can use during switchover phase. NOTE! This does not
-# limit the bandwidth during switchover, but only for calculations
-# when making decisions to switchover. By default, this value is
-# zero, which means QEMU will estimate the bandwidth
+# migration can use during switchover phase. **Note:** this does
+# not limit the bandwidth during switchover, but only for
+# calculations when making decisions to switchover. By default,
+# this value is zero, which means QEMU will estimate the bandwidth
# automatically. This can be set when the estimated value is not
# accurate, while the user is able to guarantee such bandwidth is
# available when switching over. When specified correctly, this
@@ -1233,9 +1233,9 @@
# more CPU. Defaults to 1. (Since 5.0)
#
# @multifd-qatzip-level: Set the compression level to be used in live
-# migration. The level is an integer between 1 and 9, where 1 means
+# migration. The level is an integer between 1 and 9, where 1 means
# the best compression speed, and 9 means the best compression
-# ratio which will consume more CPU. Defaults to 1. (Since 9.2)
+# ratio which will consume more CPU. Defaults to 1. (Since 9.2)
#
# @multifd-zstd-level: Set the compression level to be used in live
# migration, the compression level is an integer between 0 and 20,
@@ -1500,7 +1500,7 @@
##
# @x-colo-lost-heartbeat:
#
-# Tell qemu that heartbeat is lost, request it to do takeover
+# Tell QEMU that heartbeat is lost, request it to do takeover
# procedures. If this command is sent to the PVM, the Primary side
# will exit COLO mode. If sent to the Secondary, the Secondary side
# will run failover work, then takes over server operation to become
@@ -1729,8 +1729,8 @@
##
# @migrate-incoming:
#
-# Start an incoming migration, the qemu must have been started with
-# -incoming defer
+# Start an incoming migration. QEMU must have been started with
+# -incoming defer.
#
# @uri: The Uniform Resource Identifier identifying the source or
# address to listen on
diff --git a/qapi/misc-i386.json b/qapi/misc-i386.json
index 3b53464..5fefa0a 100644
--- a/qapi/misc-i386.json
+++ b/qapi/misc-i386.json
@@ -195,7 +195,7 @@
#
# @cbitpos: C-bit location in page table entry
#
-# @reduced-phys-bits: Number of physical Address bit reduction when
+# @reduced-phys-bits: Number of physical address bit reduction when
# SEV is enabled
#
# Since: 2.12
diff --git a/qapi/misc.json b/qapi/misc.json
index dcf9f7d..4b9e601 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -222,8 +222,8 @@
# .. note:: This command only exists as a stop-gap. Its use is highly
# discouraged. The semantics of this command are not guaranteed:
# this means that command names, arguments and responses can change
-# or be removed at ANY time. Applications that rely on long term
-# stability guarantees should NOT use this command.
+# or be removed at **any** time. Applications that rely on long
+# term stability guarantees should **not** use this command.
#
# Known limitations:
#
diff --git a/qapi/net.json b/qapi/net.json
index 310cc4f..97ea183 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -150,12 +150,12 @@
# @domainname: guest-visible domain name of the virtual nameserver
# (since 3.0)
#
-# @ipv6-prefix: IPv6 network prefix (default is fec0::) (since 2.6).
-# The network prefix is given in the usual hexadecimal IPv6
-# address notation.
+# @ipv6-prefix: IPv6 network prefix (default is fec0::). The network
+# prefix is given in the usual hexadecimal IPv6 address notation.
+# (since 2.6)
#
-# @ipv6-prefixlen: IPv6 network prefix length (default is 64) (since
-# 2.6)
+# @ipv6-prefixlen: IPv6 network prefix length (default is 64)
+# (since 2.6)
#
# @ipv6-host: guest-visible IPv6 address of the host (since 2.6)
#
@@ -387,8 +387,8 @@
#
# @hubid: hub identifier number
#
-# @netdev: used to connect hub to a netdev instead of a device (since
-# 2.12)
+# @netdev: used to connect hub to a netdev instead of a device
+# (since 2.12)
#
# Since: 1.2
##
@@ -510,8 +510,8 @@
# @queues: number of queues to be created for multiqueue vhost-vdpa
# (default: 1)
#
-# @x-svq: Start device with (experimental) shadow virtqueue. (Since
-# 7.1) (default: false)
+# @x-svq: Start device with (experimental) shadow virtqueue.
+# (Since 7.1) (default: false)
#
# Features:
#
diff --git a/qapi/qom.json b/qapi/qom.json
index 04c118e..3e8debf 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -870,7 +870,7 @@
# information read from devices and switches in conjunction with
# link characteristics read from PCIe Configuration space.
# To get the full path latency from CPU to CXL attached DRAM
-# CXL device: Add the latency from CPU to Generic Port (from
+# CXL device: Add the latency from CPU to Generic Port (from
# HMAT indexed via the node ID in this SRAT structure) to
# that for CXL bus links, the latency across intermediate switches
# and from the EP port to the actual memory. Bandwidth is more
@@ -1048,6 +1048,39 @@
'*vcek-disabled': 'bool' } }
##
+# @TdxGuestProperties:
+#
+# Properties for tdx-guest objects.
+#
+# @attributes: The 'attributes' of a TD guest that is passed to
+# KVM_TDX_INIT_VM
+#
+# @sept-ve-disable: toggle bit 28 of TD attributes to control disabling
+# of EPT violation conversion to #VE on guest TD access of PENDING
+# pages. Some guest OS (e.g., Linux TD guest) may require this to
+# be set, otherwise they refuse to boot.
+#
+# @mrconfigid: ID for non-owner-defined configuration of the guest TD,
+# e.g., run-time or OS configuration (base64 encoded SHA384 digest).
+# Defaults to all zeros.
+#
+# @mrowner: ID for the guest TD’s owner (base64 encoded SHA384 digest).
+# Defaults to all zeros.
+#
+# @mrownerconfig: ID for owner-defined configuration of the guest TD,
+# e.g., specific to the workload rather than the run-time or OS
+# (base64 encoded SHA384 digest). Defaults to all zeros.
+#
+# Since: 10.1
+##
+{ 'struct': 'TdxGuestProperties',
+ 'data': { '*attributes': 'uint64',
+ '*sept-ve-disable': 'bool',
+ '*mrconfigid': 'str',
+ '*mrowner': 'str',
+ '*mrownerconfig': 'str' } }
+
+##
# @ThreadContextProperties:
#
# Properties for thread context objects.
@@ -1132,6 +1165,7 @@
'sev-snp-guest',
'thread-context',
's390-pv-guest',
+ 'tdx-guest',
'throttle-group',
'tls-creds-anon',
'tls-creds-psk',
@@ -1204,6 +1238,7 @@
'if': 'CONFIG_SECRET_KEYRING' },
'sev-guest': 'SevGuestProperties',
'sev-snp-guest': 'SevSnpGuestProperties',
+ 'tdx-guest': 'TdxGuestProperties',
'thread-context': 'ThreadContextProperties',
'throttle-group': 'ThrottleGroupProperties',
'tls-creds-anon': 'TlsCredsAnonProperties',
diff --git a/qapi/run-state.json b/qapi/run-state.json
index ce95cfa..fd09beb 100644
--- a/qapi/run-state.json
+++ b/qapi/run-state.json
@@ -62,7 +62,7 @@
##
# @ShutdownCause:
#
-# An enumeration of reasons for a Shutdown.
+# An enumeration of reasons for a shutdown.
#
# @none: No shutdown request pending
#
@@ -135,19 +135,19 @@
##
# @SHUTDOWN:
#
-# Emitted when the virtual machine has shut down, indicating that qemu
+# Emitted when the virtual machine has shut down, indicating that QEMU
# is about to exit.
#
# @guest: If true, the shutdown was triggered by a guest request (such
# as a guest-initiated ACPI shutdown request or other
# hardware-specific action) rather than a host request (such as
-# sending qemu a SIGINT). (since 2.10)
+# sending QEMU a SIGINT). (since 2.10)
#
# @reason: The @ShutdownCause which resulted in the SHUTDOWN.
# (since 4.0)
#
# .. note:: If the command-line option ``-no-shutdown`` has been
-# specified, qemu will not exit, and a STOP event will eventually
+# specified, QEMU will not exit, and a STOP event will eventually
# follow the SHUTDOWN event.
#
# Since: 0.12
@@ -365,8 +365,8 @@
# @shutdown: Shutdown the VM and exit, according to the shutdown
# action
#
-# @exit-failure: Shutdown the VM and exit with nonzero status (since
-# 7.1)
+# @exit-failure: Shutdown the VM and exit with nonzero status
+# (since 7.1)
#
# Since: 6.0
##
@@ -501,10 +501,12 @@
#
# @s390: s390 guest panic information type (Since: 2.12)
#
+# @tdx: tdx guest panic information type (Since: 10.1)
+#
# Since: 2.9
##
{ 'enum': 'GuestPanicInformationType',
- 'data': [ 'hyper-v', 's390' ] }
+ 'data': [ 'hyper-v', 's390', 'tdx' ] }
##
# @GuestPanicInformation:
@@ -519,7 +521,8 @@
'base': {'type': 'GuestPanicInformationType'},
'discriminator': 'type',
'data': {'hyper-v': 'GuestPanicInformationHyperV',
- 's390': 'GuestPanicInformationS390'}}
+ 's390': 'GuestPanicInformationS390',
+ 'tdx' : 'GuestPanicInformationTdx'}}
##
# @GuestPanicInformationHyperV:
@@ -599,6 +602,30 @@
'reason': 'S390CrashReason'}}
##
+# @GuestPanicInformationTdx:
+#
+# TDX Guest panic information specific to TDX, as specified in the
+# "Guest-Hypervisor Communication Interface (GHCI) Specification",
+# section TDG.VP.VMCALL<ReportFatalError>.
+#
+# @error-code: TD-specific error code
+#
+# @message: Human-readable error message provided by the guest. Not
+# to be trusted.
+#
+# @gpa: guest-physical address of a page that contains more verbose
+# error information, as zero-terminated string. Present when the
+# "GPA valid" bit (bit 63) is set in @error-code.
+#
+#
+# Since: 10.1
+##
+{'struct': 'GuestPanicInformationTdx',
+ 'data': {'error-code': 'uint32',
+ 'message': 'str',
+ '*gpa': 'uint64'}}
+
+##
# @MEMORY_FAILURE:
#
# Emitted when a memory failure occurs on host side.
diff --git a/qapi/transaction.json b/qapi/transaction.json
index 021e383..9d9e7af 100644
--- a/qapi/transaction.json
+++ b/qapi/transaction.json
@@ -21,7 +21,7 @@
##
# @ActionCompletionMode:
#
-# An enumeration of Transactional completion modes.
+# An enumeration of transactional completion modes.
#
# @individual: Do not attempt to cancel any other Actions if any
# Actions fail after the Transaction request succeeds. All
@@ -223,7 +223,7 @@
# exists, the request will be rejected. Only some image formats
# support it, for example, qcow2, and rbd,
#
-# On failure, qemu will try delete the newly created internal snapshot
+# On failure, QEMU will try delete the newly created internal snapshot
# in the transaction. When an I/O error occurs during deletion, the
# user needs to fix it later with qemu-img or other command.
#
diff --git a/qapi/uefi.json b/qapi/uefi.json
index bdfcabe..6592183 100644
--- a/qapi/uefi.json
+++ b/qapi/uefi.json
@@ -5,7 +5,7 @@
##
# = UEFI Variable Store
#
-# The qemu efi variable store implementation (hw/uefi/) uses this to
+# The QEMU efi variable store implementation (hw/uefi/) uses this to
# store non-volatile variables in json format on disk.
#
# This is an existing format already supported by (at least) two other
diff --git a/qapi/ui.json b/qapi/ui.json
index 3d0c853..514fa15 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -175,8 +175,8 @@
# @filename: the path of a new file to store the image
#
# @device: ID of the display device that should be dumped. If this
-# parameter is missing, the primary display will be used. (Since
-# 2.12)
+# parameter is missing, the primary display will be used.
+# (Since 2.12)
#
# @head: head to use in case the device supports multiple heads. If
# this parameter is missing, head #0 will be used. Also note that
@@ -1526,12 +1526,12 @@
#
# Display (user interface) options.
#
-# @type: Which DisplayType qemu should use.
+# @type: Which DisplayType QEMU should use.
#
# @full-screen: Start user interface in fullscreen mode
# (default: off).
#
-# @window-close: Allow to quit qemu with window close button
+# @window-close: Allow to quit QEMU with window close button
# (default: on).
#
# @show-cursor: Force showing the mouse cursor (default: off).
diff --git a/qemu-img.c b/qemu-img.c
index 139eeb5..e757071 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3505,6 +3505,7 @@ static int img_snapshot(int argc, char **argv)
break;
case SNAPSHOT_DELETE:
+ bdrv_drain_all_begin();
bdrv_graph_rdlock_main_loop();
ret = bdrv_snapshot_find(bs, &sn, snapshot_name);
if (ret < 0) {
@@ -3520,6 +3521,7 @@ static int img_snapshot(int argc, char **argv)
}
}
bdrv_graph_rdunlock_main_loop();
+ bdrv_drain_all_end();
break;
}
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
index 13d580c..bccfe85 100644
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -32,6 +32,13 @@ dependencies = [
]
[[package]]
+name = "bits"
+version = "0.1.0"
+dependencies = [
+ "qemu_api_macros",
+]
+
+[[package]]
name = "either"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -66,6 +73,7 @@ version = "0.1.0"
dependencies = [
"bilge",
"bilge-impl",
+ "bits",
"qemu_api",
"qemu_api_macros",
]
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index d9faeec..fd4c2fb 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -1,6 +1,7 @@
[workspace]
resolver = "2"
members = [
+ "bits",
"qemu-api-macros",
"qemu-api",
"hw/char/pl011",
@@ -63,7 +64,6 @@ ignored_unit_patterns = "deny"
implicit_clone = "deny"
macro_use_imports = "deny"
missing_safety_doc = "deny"
-multiple_crate_versions = "deny"
mut_mut = "deny"
needless_bitwise_bool = "deny"
needless_pass_by_ref_mut = "deny"
diff --git a/rust/bits/Cargo.toml b/rust/bits/Cargo.toml
new file mode 100644
index 0000000..1ff38a4
--- /dev/null
+++ b/rust/bits/Cargo.toml
@@ -0,0 +1,19 @@
+[package]
+name = "bits"
+version = "0.1.0"
+authors = ["Paolo Bonzini <pbonzini@redhat.com>"]
+description = "const-friendly bit flags"
+resolver = "2"
+publish = false
+
+edition.workspace = true
+homepage.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
+
+[dependencies]
+qemu_api_macros = { path = "../qemu-api-macros" }
+
+[lints]
+workspace = true
diff --git a/rust/bits/meson.build b/rust/bits/meson.build
new file mode 100644
index 0000000..2a41e13
--- /dev/null
+++ b/rust/bits/meson.build
@@ -0,0 +1,16 @@
+_bits_rs = static_library(
+ 'bits',
+ 'src/lib.rs',
+ override_options: ['rust_std=2021', 'build.rust_std=2021'],
+ rust_abi: 'rust',
+ dependencies: [qemu_api_macros],
+)
+
+bits_rs = declare_dependency(link_with: _bits_rs)
+
+rust.test('rust-bits-tests', _bits_rs,
+ suite: ['unit', 'rust'])
+
+rust.doctest('rust-bits-doctests', _bits_rs,
+ dependencies: bits_rs,
+ suite: ['doc', 'rust'])
diff --git a/rust/bits/src/lib.rs b/rust/bits/src/lib.rs
new file mode 100644
index 0000000..d485d6b
--- /dev/null
+++ b/rust/bits/src/lib.rs
@@ -0,0 +1,443 @@
+// SPDX-License-Identifier: MIT or Apache-2.0 or GPL-2.0-or-later
+
+/// # Definition entry point
+///
+/// Define a struct with a single field of type $type. Include public constants
+/// for each element listed in braces.
+///
+/// The unnamed element at the end, if present, can be used to enlarge the set
+/// of valid bits. Bits that are valid but not listed are treated normally for
+/// the purpose of arithmetic operations, and are printed with their hexadecimal
+/// value.
+///
+/// The struct implements the following traits: [`BitAnd`](std::ops::BitAnd),
+/// [`BitOr`](std::ops::BitOr), [`BitXor`](std::ops::BitXor),
+/// [`Not`](std::ops::Not), [`Sub`](std::ops::Sub); [`Debug`](std::fmt::Debug),
+/// [`Display`](std::fmt::Display), [`Binary`](std::fmt::Binary),
+/// [`Octal`](std::fmt::Octal), [`LowerHex`](std::fmt::LowerHex),
+/// [`UpperHex`](std::fmt::UpperHex); [`From`]`<type>`/[`Into`]`<type>` where
+/// type is the type specified in the definition.
+///
+/// ## Example
+///
+/// ```
+/// # use bits::bits;
+/// bits! {
+/// pub struct Colors(u8) {
+/// BLACK = 0,
+/// RED = 1,
+/// GREEN = 1 << 1,
+/// BLUE = 1 << 2,
+/// WHITE = (1 << 0) | (1 << 1) | (1 << 2),
+/// }
+/// }
+/// ```
+///
+/// ```
+/// # use bits::bits;
+/// # bits! { pub struct Colors(u8) { BLACK = 0, RED = 1, GREEN = 1 << 1, BLUE = 1 << 2, } }
+///
+/// bits! {
+/// pub struct Colors8(u8) {
+/// BLACK = 0,
+/// RED = 1,
+/// GREEN = 1 << 1,
+/// BLUE = 1 << 2,
+/// WHITE = (1 << 0) | (1 << 1) | (1 << 2),
+///
+/// _ = 255,
+/// }
+/// }
+///
+/// // The previously defined struct ignores bits not explicitly defined.
+/// assert_eq!(
+/// Colors::from(255).into_bits(),
+/// (Colors::RED | Colors::GREEN | Colors::BLUE).into_bits()
+/// );
+///
+/// // Adding "_ = 255" makes it retain other bits as well.
+/// assert_eq!(Colors8::from(255).into_bits(), 255);
+///
+/// // all() does not include the additional bits, valid_bits() does
+/// assert_eq!(Colors8::all().into_bits(), Colors::all().into_bits());
+/// assert_eq!(Colors8::valid_bits().into_bits(), 255);
+/// ```
+///
+/// # Evaluation entry point
+///
+/// Return a constant corresponding to the boolean expression `$expr`.
+/// Identifiers in the expression correspond to values defined for the
+/// type `$type`. Supported operators are `!` (unary), `-`, `&`, `^`, `|`.
+///
+/// ## Examples
+///
+/// ```
+/// # use bits::bits;
+/// bits! {
+/// pub struct Colors(u8) {
+/// BLACK = 0,
+/// RED = 1,
+/// GREEN = 1 << 1,
+/// BLUE = 1 << 2,
+/// // same as "WHITE = 7",
+/// WHITE = bits!(Self as u8: RED | GREEN | BLUE),
+/// }
+/// }
+///
+/// let rgb = bits! { Colors: RED | GREEN | BLUE };
+/// assert_eq!(rgb, Colors::WHITE);
+/// ```
+#[macro_export]
+macro_rules! bits {
+ {
+ $(#[$struct_meta:meta])*
+ $struct_vis:vis struct $struct_name:ident($field_vis:vis $type:ty) {
+ $($(#[$const_meta:meta])* $const:ident = $val:expr),+
+ $(,_ = $mask:expr)?
+ $(,)?
+ }
+ } => {
+ $(#[$struct_meta])*
+ #[derive(Clone, Copy, PartialEq, Eq)]
+ #[repr(transparent)]
+ $struct_vis struct $struct_name($field_vis $type);
+
+ impl $struct_name {
+ $( #[allow(dead_code)] $(#[$const_meta])*
+ pub const $const: $struct_name = $struct_name($val); )+
+
+ #[doc(hidden)]
+ const VALID__: $type = $( Self::$const.0 )|+ $(|$mask)?;
+
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub const fn empty() -> Self {
+ Self(0)
+ }
+
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub const fn all() -> Self {
+ Self($( Self::$const.0 )|+)
+ }
+
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub const fn valid_bits() -> Self {
+ Self(Self::VALID__)
+ }
+
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub const fn valid(val: $type) -> bool {
+ (val & !Self::VALID__) == 0
+ }
+
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub const fn any_set(self, mask: Self) -> bool {
+ (self.0 & mask.0) != 0
+ }
+
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub const fn all_set(self, mask: Self) -> bool {
+ (self.0 & mask.0) == mask.0
+ }
+
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub const fn none_set(self, mask: Self) -> bool {
+ (self.0 & mask.0) == 0
+ }
+
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub const fn from_bits(value: $type) -> Self {
+ $struct_name(value)
+ }
+
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub const fn into_bits(self) -> $type {
+ self.0
+ }
+
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub fn set(&mut self, rhs: Self) {
+ self.0 |= rhs.0;
+ }
+
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub fn clear(&mut self, rhs: Self) {
+ self.0 &= !rhs.0;
+ }
+
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub fn toggle(&mut self, rhs: Self) {
+ self.0 ^= rhs.0;
+ }
+
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub const fn intersection(self, rhs: Self) -> Self {
+ $struct_name(self.0 & rhs.0)
+ }
+
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub const fn difference(self, rhs: Self) -> Self {
+ $struct_name(self.0 & !rhs.0)
+ }
+
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub const fn symmetric_difference(self, rhs: Self) -> Self {
+ $struct_name(self.0 ^ rhs.0)
+ }
+
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub const fn union(self, rhs: Self) -> Self {
+ $struct_name(self.0 | rhs.0)
+ }
+
+ #[allow(dead_code)]
+ #[inline(always)]
+ pub const fn invert(self) -> Self {
+ $struct_name(self.0 ^ Self::VALID__)
+ }
+ }
+
+ impl ::std::fmt::Binary for $struct_name {
+ fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
+ // If no width, use the highest valid bit
+ let width = f.width().unwrap_or((Self::VALID__.ilog2() + 1) as usize);
+ write!(f, "{:0>width$.precision$b}", self.0,
+ width = width,
+ precision = f.precision().unwrap_or(width))
+ }
+ }
+
+ impl ::std::fmt::LowerHex for $struct_name {
+ fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
+ <$type as ::std::fmt::LowerHex>::fmt(&self.0, f)
+ }
+ }
+
+ impl ::std::fmt::Octal for $struct_name {
+ fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
+ <$type as ::std::fmt::Octal>::fmt(&self.0, f)
+ }
+ }
+
+ impl ::std::fmt::UpperHex for $struct_name {
+ fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
+ <$type as ::std::fmt::UpperHex>::fmt(&self.0, f)
+ }
+ }
+
+ impl ::std::fmt::Debug for $struct_name {
+ fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
+ write!(f, "{}({})", stringify!($struct_name), self)
+ }
+ }
+
+ impl ::std::fmt::Display for $struct_name {
+ fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
+ use ::std::fmt::Display;
+ let mut first = true;
+ let mut left = self.0;
+ $(if Self::$const.0.is_power_of_two() && (self & Self::$const).0 != 0 {
+ if first { first = false } else { Display::fmt(&'|', f)?; }
+ Display::fmt(stringify!($const), f)?;
+ left -= Self::$const.0;
+ })+
+ if first {
+ Display::fmt(&'0', f)
+ } else if left != 0 {
+ write!(f, "|{left:#x}")
+ } else {
+ Ok(())
+ }
+ }
+ }
+
+ impl ::std::cmp::PartialEq<$type> for $struct_name {
+ fn eq(&self, rhs: &$type) -> bool {
+ self.0 == *rhs
+ }
+ }
+
+ impl ::std::ops::BitAnd<$struct_name> for &$struct_name {
+ type Output = $struct_name;
+ fn bitand(self, rhs: $struct_name) -> Self::Output {
+ $struct_name(self.0 & rhs.0)
+ }
+ }
+
+ impl ::std::ops::BitAndAssign<$struct_name> for $struct_name {
+ fn bitand_assign(&mut self, rhs: $struct_name) {
+ self.0 = self.0 & rhs.0
+ }
+ }
+
+ impl ::std::ops::BitXor<$struct_name> for &$struct_name {
+ type Output = $struct_name;
+ fn bitxor(self, rhs: $struct_name) -> Self::Output {
+ $struct_name(self.0 ^ rhs.0)
+ }
+ }
+
+ impl ::std::ops::BitXorAssign<$struct_name> for $struct_name {
+ fn bitxor_assign(&mut self, rhs: $struct_name) {
+ self.0 = self.0 ^ rhs.0
+ }
+ }
+
+ impl ::std::ops::BitOr<$struct_name> for &$struct_name {
+ type Output = $struct_name;
+ fn bitor(self, rhs: $struct_name) -> Self::Output {
+ $struct_name(self.0 | rhs.0)
+ }
+ }
+
+ impl ::std::ops::BitOrAssign<$struct_name> for $struct_name {
+ fn bitor_assign(&mut self, rhs: $struct_name) {
+ self.0 = self.0 | rhs.0
+ }
+ }
+
+ impl ::std::ops::Sub<$struct_name> for &$struct_name {
+ type Output = $struct_name;
+ fn sub(self, rhs: $struct_name) -> Self::Output {
+ $struct_name(self.0 & !rhs.0)
+ }
+ }
+
+ impl ::std::ops::SubAssign<$struct_name> for $struct_name {
+ fn sub_assign(&mut self, rhs: $struct_name) {
+ self.0 = self.0 - rhs.0
+ }
+ }
+
+ impl ::std::ops::Not for &$struct_name {
+ type Output = $struct_name;
+ fn not(self) -> Self::Output {
+ $struct_name(self.0 ^ $struct_name::VALID__)
+ }
+ }
+
+ impl ::std::ops::BitAnd<$struct_name> for $struct_name {
+ type Output = Self;
+ fn bitand(self, rhs: Self) -> Self::Output {
+ $struct_name(self.0 & rhs.0)
+ }
+ }
+
+ impl ::std::ops::BitXor<$struct_name> for $struct_name {
+ type Output = Self;
+ fn bitxor(self, rhs: Self) -> Self::Output {
+ $struct_name(self.0 ^ rhs.0)
+ }
+ }
+
+ impl ::std::ops::BitOr<$struct_name> for $struct_name {
+ type Output = Self;
+ fn bitor(self, rhs: Self) -> Self::Output {
+ $struct_name(self.0 | rhs.0)
+ }
+ }
+
+ impl ::std::ops::Sub<$struct_name> for $struct_name {
+ type Output = Self;
+ fn sub(self, rhs: Self) -> Self::Output {
+ $struct_name(self.0 & !rhs.0)
+ }
+ }
+
+ impl ::std::ops::Not for $struct_name {
+ type Output = Self;
+ fn not(self) -> Self::Output {
+ $struct_name(self.0 ^ Self::VALID__)
+ }
+ }
+
+ impl From<$struct_name> for $type {
+ fn from(x: $struct_name) -> $type {
+ x.0
+ }
+ }
+
+ impl From<$type> for $struct_name {
+ fn from(x: $type) -> Self {
+ $struct_name(x & Self::VALID__)
+ }
+ }
+ };
+
+ { $type:ty: $expr:expr } => {
+ ::qemu_api_macros::bits_const_internal! { $type @ ($expr) }
+ };
+
+ { $type:ty as $int_type:ty: $expr:expr } => {
+ (::qemu_api_macros::bits_const_internal! { $type @ ($expr) }.into_bits()) as $int_type
+ };
+}
+
+#[cfg(test)]
+mod test {
+ bits! {
+ pub struct InterruptMask(u32) {
+ OE = 1 << 10,
+ BE = 1 << 9,
+ PE = 1 << 8,
+ FE = 1 << 7,
+ RT = 1 << 6,
+ TX = 1 << 5,
+ RX = 1 << 4,
+ DSR = 1 << 3,
+ DCD = 1 << 2,
+ CTS = 1 << 1,
+ RI = 1 << 0,
+
+ E = bits!(Self as u32: OE | BE | PE | FE),
+ MS = bits!(Self as u32: RI | DSR | DCD | CTS),
+ }
+ }
+
+ #[test]
+ pub fn test_not() {
+ assert_eq!(
+ !InterruptMask::from(InterruptMask::RT.0),
+ InterruptMask::E | InterruptMask::MS | InterruptMask::TX | InterruptMask::RX
+ );
+ }
+
+ #[test]
+ pub fn test_and() {
+ assert_eq!(
+ InterruptMask::from(0),
+ InterruptMask::MS & InterruptMask::OE
+ )
+ }
+
+ #[test]
+ pub fn test_or() {
+ assert_eq!(
+ InterruptMask::E,
+ InterruptMask::OE | InterruptMask::BE | InterruptMask::PE | InterruptMask::FE
+ );
+ }
+
+ #[test]
+ pub fn test_xor() {
+ assert_eq!(
+ InterruptMask::E ^ InterruptMask::BE,
+ InterruptMask::OE | InterruptMask::PE | InterruptMask::FE
+ );
+ }
+}
diff --git a/rust/hw/char/pl011/Cargo.toml b/rust/hw/char/pl011/Cargo.toml
index a1f431a..003ef96 100644
--- a/rust/hw/char/pl011/Cargo.toml
+++ b/rust/hw/char/pl011/Cargo.toml
@@ -18,6 +18,7 @@ crate-type = ["staticlib"]
[dependencies]
bilge = { version = "0.2.0" }
bilge-impl = { version = "0.2.0" }
+bits = { path = "../../../bits" }
qemu_api = { path = "../../../qemu-api" }
qemu_api_macros = { path = "../../../qemu-api-macros" }
diff --git a/rust/hw/char/pl011/meson.build b/rust/hw/char/pl011/meson.build
index 547cca5..2a1be32 100644
--- a/rust/hw/char/pl011/meson.build
+++ b/rust/hw/char/pl011/meson.build
@@ -1,17 +1,12 @@
-subproject('bilge-0.2-rs', required: true)
-subproject('bilge-impl-0.2-rs', required: true)
-
-bilge_dep = dependency('bilge-0.2-rs')
-bilge_impl_dep = dependency('bilge-impl-0.2-rs')
-
_libpl011_rs = static_library(
'pl011',
files('src/lib.rs'),
override_options: ['rust_std=2021', 'build.rust_std=2021'],
rust_abi: 'rust',
dependencies: [
- bilge_dep,
- bilge_impl_dep,
+ bilge_rs,
+ bilge_impl_rs,
+ bits_rs,
qemu_api,
qemu_api_macros,
],
@@ -21,6 +16,6 @@ rust_devices_ss.add(when: 'CONFIG_X_PL011_RUST', if_true: [declare_dependency(
link_whole: [_libpl011_rs],
# Putting proc macro crates in `dependencies` is necessary for Meson to find
# them when compiling the root per-target static rust lib.
- dependencies: [bilge_impl_dep, qemu_api_macros],
+ dependencies: [bilge_impl_rs, qemu_api_macros],
variables: {'crate': 'pl011'},
)])
diff --git a/rust/hw/char/pl011/src/device.rs b/rust/hw/char/pl011/src/device.rs
index bde3be6..0501fa5 100644
--- a/rust/hw/char/pl011/src/device.rs
+++ b/rust/hw/char/pl011/src/device.rs
@@ -85,8 +85,8 @@ pub struct PL011Registers {
#[doc(alias = "cr")]
pub control: registers::Control,
pub dmacr: u32,
- pub int_enabled: u32,
- pub int_level: u32,
+ pub int_enabled: Interrupt,
+ pub int_level: Interrupt,
pub read_fifo: Fifo,
pub ilpr: u32,
pub ibrd: u32,
@@ -199,9 +199,9 @@ impl PL011Registers {
LCR_H => u32::from(self.line_control),
CR => u32::from(self.control),
FLS => self.ifl,
- IMSC => self.int_enabled,
- RIS => self.int_level,
- MIS => self.int_level & self.int_enabled,
+ IMSC => u32::from(self.int_enabled),
+ RIS => u32::from(self.int_level),
+ MIS => u32::from(self.int_level & self.int_enabled),
ICR => {
// "The UARTICR Register is the interrupt clear register and is write-only"
// Source: ARM DDI 0183G 3.3.13 Interrupt Clear Register, UARTICR
@@ -263,13 +263,13 @@ impl PL011Registers {
self.set_read_trigger();
}
IMSC => {
- self.int_enabled = value;
+ self.int_enabled = Interrupt::from(value);
return true;
}
RIS => {}
MIS => {}
ICR => {
- self.int_level &= !value;
+ self.int_level &= !Interrupt::from(value);
return true;
}
DMACR => {
@@ -295,7 +295,7 @@ impl PL011Registers {
self.flags.set_receive_fifo_empty(true);
}
if self.read_count + 1 == self.read_trigger {
- self.int_level &= !Interrupt::RX.0;
+ self.int_level &= !Interrupt::RX;
}
self.receive_status_error_clear.set_from_data(c);
*update = true;
@@ -305,7 +305,7 @@ impl PL011Registers {
fn write_data_register(&mut self, value: u32) -> bool {
// interrupts always checked
let _ = self.loopback_tx(value.into());
- self.int_level |= Interrupt::TX.0;
+ self.int_level |= Interrupt::TX;
true
}
@@ -361,19 +361,19 @@ impl PL011Registers {
// Change interrupts based on updated FR
let mut il = self.int_level;
- il &= !Interrupt::MS.0;
+ il &= !Interrupt::MS;
if self.flags.data_set_ready() {
- il |= Interrupt::DSR.0;
+ il |= Interrupt::DSR;
}
if self.flags.data_carrier_detect() {
- il |= Interrupt::DCD.0;
+ il |= Interrupt::DCD;
}
if self.flags.clear_to_send() {
- il |= Interrupt::CTS.0;
+ il |= Interrupt::CTS;
}
if self.flags.ring_indicator() {
- il |= Interrupt::RI.0;
+ il |= Interrupt::RI;
}
self.int_level = il;
true
@@ -391,8 +391,8 @@ impl PL011Registers {
self.line_control.reset();
self.receive_status_error_clear.reset();
self.dmacr = 0;
- self.int_enabled = 0;
- self.int_level = 0;
+ self.int_enabled = 0.into();
+ self.int_level = 0.into();
self.ilpr = 0;
self.ibrd = 0;
self.fbrd = 0;
@@ -451,7 +451,7 @@ impl PL011Registers {
}
if self.read_count == self.read_trigger {
- self.int_level |= Interrupt::RX.0;
+ self.int_level |= Interrupt::RX;
return true;
}
false
@@ -480,13 +480,13 @@ impl PL011Registers {
}
impl PL011State {
- /// Initializes a pre-allocated, unitialized instance of `PL011State`.
+ /// Initializes a pre-allocated, uninitialized instance of `PL011State`.
///
/// # Safety
///
/// `self` must point to a correctly sized and aligned location for the
/// `PL011State` type. It must not be called more than once on the same
- /// location/instance. All its fields are expected to hold unitialized
+ /// location/instance. All its fields are expected to hold uninitialized
/// values with the sole exception of `parent_obj`.
unsafe fn init(&mut self) {
static PL011_OPS: MemoryRegionOps<PL011State> = MemoryRegionOpsBuilder::<PL011State>::new()
@@ -632,7 +632,7 @@ impl PL011State {
let regs = self.regs.borrow();
let flags = regs.int_level & regs.int_enabled;
for (irq, i) in self.interrupts.iter().zip(IRQMASK) {
- irq.set(flags & i != 0);
+ irq.set(flags.any_set(i));
}
}
@@ -642,14 +642,13 @@ impl PL011State {
}
/// Which bits in the interrupt status matter for each outbound IRQ line ?
-const IRQMASK: [u32; 6] = [
- /* combined IRQ */
- Interrupt::E.0 | Interrupt::MS.0 | Interrupt::RT.0 | Interrupt::TX.0 | Interrupt::RX.0,
- Interrupt::RX.0,
- Interrupt::TX.0,
- Interrupt::RT.0,
- Interrupt::MS.0,
- Interrupt::E.0,
+const IRQMASK: [Interrupt; 6] = [
+ Interrupt::all(),
+ Interrupt::RX,
+ Interrupt::TX,
+ Interrupt::RT,
+ Interrupt::MS,
+ Interrupt::E,
];
/// # Safety
diff --git a/rust/hw/char/pl011/src/registers.rs b/rust/hw/char/pl011/src/registers.rs
index 690feb6..7ececd3 100644
--- a/rust/hw/char/pl011/src/registers.rs
+++ b/rust/hw/char/pl011/src/registers.rs
@@ -9,7 +9,8 @@
// https://developer.arm.com/documentation/ddi0183/latest/
use bilge::prelude::*;
-use qemu_api::impl_vmstate_bitsized;
+use bits::bits;
+use qemu_api::{impl_vmstate_bitsized, impl_vmstate_forward};
/// Offset of each register from the base memory address of the device.
#[doc(alias = "offset")]
@@ -326,22 +327,24 @@ impl Default for Control {
}
}
-/// Interrupt status bits in UARTRIS, UARTMIS, UARTIMSC
-pub struct Interrupt(pub u32);
+bits! {
+ /// Interrupt status bits in UARTRIS, UARTMIS, UARTIMSC
+ #[derive(Default)]
+ pub struct Interrupt(u32) {
+ OE = 1 << 10,
+ BE = 1 << 9,
+ PE = 1 << 8,
+ FE = 1 << 7,
+ RT = 1 << 6,
+ TX = 1 << 5,
+ RX = 1 << 4,
+ DSR = 1 << 3,
+ DCD = 1 << 2,
+ CTS = 1 << 1,
+ RI = 1 << 0,
-impl Interrupt {
- pub const OE: Self = Self(1 << 10);
- pub const BE: Self = Self(1 << 9);
- pub const PE: Self = Self(1 << 8);
- pub const FE: Self = Self(1 << 7);
- pub const RT: Self = Self(1 << 6);
- pub const TX: Self = Self(1 << 5);
- pub const RX: Self = Self(1 << 4);
- pub const DSR: Self = Self(1 << 3);
- pub const DCD: Self = Self(1 << 2);
- pub const CTS: Self = Self(1 << 1);
- pub const RI: Self = Self(1 << 0);
-
- pub const E: Self = Self(Self::OE.0 | Self::BE.0 | Self::PE.0 | Self::FE.0);
- pub const MS: Self = Self(Self::RI.0 | Self::DSR.0 | Self::DCD.0 | Self::CTS.0);
+ E = bits!(Self as u32: OE | BE | PE | FE),
+ MS = bits!(Self as u32: RI | DSR | DCD | CTS),
+ }
}
+impl_vmstate_forward!(Interrupt);
diff --git a/rust/hw/timer/hpet/src/hpet.rs b/rust/hw/timer/hpet/src/device.rs
index 779681d..e3ba62b 100644
--- a/rust/hw/timer/hpet/src/hpet.rs
+++ b/rust/hw/timer/hpet/src/device.rs
@@ -1,5 +1,5 @@
// Copyright (C) 2024 Intel Corporation.
-// Author(s): Zhao Liu <zhai1.liu@intel.com>
+// Author(s): Zhao Liu <zhao1.liu@intel.com>
// SPDX-License-Identifier: GPL-2.0-or-later
use std::{
diff --git a/rust/hw/timer/hpet/src/fw_cfg.rs b/rust/hw/timer/hpet/src/fw_cfg.rs
index aa08d28..6c10316 100644
--- a/rust/hw/timer/hpet/src/fw_cfg.rs
+++ b/rust/hw/timer/hpet/src/fw_cfg.rs
@@ -1,5 +1,5 @@
// Copyright (C) 2024 Intel Corporation.
-// Author(s): Zhao Liu <zhai1.liu@intel.com>
+// Author(s): Zhao Liu <zhao1.liu@intel.com>
// SPDX-License-Identifier: GPL-2.0-or-later
use std::ptr::addr_of_mut;
diff --git a/rust/hw/timer/hpet/src/lib.rs b/rust/hw/timer/hpet/src/lib.rs
index 1954584..a95cf14 100644
--- a/rust/hw/timer/hpet/src/lib.rs
+++ b/rust/hw/timer/hpet/src/lib.rs
@@ -1,5 +1,5 @@
// Copyright (C) 2024 Intel Corporation.
-// Author(s): Zhao Liu <zhai1.liu@intel.com>
+// Author(s): Zhao Liu <zhao1.liu@intel.com>
// SPDX-License-Identifier: GPL-2.0-or-later
//! # HPET QEMU Device Model
@@ -7,7 +7,7 @@
//! This library implements a device model for the IA-PC HPET (High
//! Precision Event Timers) device in QEMU.
+pub mod device;
pub mod fw_cfg;
-pub mod hpet;
pub const TYPE_HPET: &::std::ffi::CStr = c"hpet";
diff --git a/rust/meson.build b/rust/meson.build
index 91e52b8..b1b3315 100644
--- a/rust/meson.build
+++ b/rust/meson.build
@@ -1,4 +1,23 @@
+subproject('bilge-0.2-rs', required: true)
+subproject('bilge-impl-0.2-rs', required: true)
+subproject('libc-0.2-rs', required: true)
+
+bilge_rs = dependency('bilge-0.2-rs')
+bilge_impl_rs = dependency('bilge-impl-0.2-rs')
+libc_rs = dependency('libc-0.2-rs')
+
+subproject('proc-macro2-1-rs', required: true)
+subproject('quote-1-rs', required: true)
+subproject('syn-2-rs', required: true)
+
+quote_rs_native = dependency('quote-1-rs', native: true)
+syn_rs_native = dependency('syn-2-rs', native: true)
+proc_macro2_rs_native = dependency('proc-macro2-1-rs', native: true)
+
+qemuutil_rs = qemuutil.partial_dependency(link_args: true, links: true)
+
subdir('qemu-api-macros')
+subdir('bits')
subdir('qemu-api')
subdir('hw')
@@ -6,21 +25,9 @@ subdir('hw')
cargo = find_program('cargo', required: false)
if cargo.found()
- run_target('clippy',
- command: [config_host['MESON'], 'devenv',
- '--workdir', '@CURRENT_SOURCE_DIR@',
- cargo, 'clippy', '--tests'],
- depends: bindings_rs)
-
run_target('rustfmt',
command: [config_host['MESON'], 'devenv',
'--workdir', '@CURRENT_SOURCE_DIR@',
cargo, 'fmt'],
depends: bindings_rs)
-
- run_target('rustdoc',
- command: [config_host['MESON'], 'devenv',
- '--workdir', '@CURRENT_SOURCE_DIR@',
- cargo, 'doc', '--no-deps', '--document-private-items'],
- depends: bindings_rs)
endif
diff --git a/rust/qemu-api-macros/meson.build b/rust/qemu-api-macros/meson.build
index 6f94a4b..8610ce1 100644
--- a/rust/qemu-api-macros/meson.build
+++ b/rust/qemu-api-macros/meson.build
@@ -1,11 +1,3 @@
-subproject('proc-macro2-1-rs', required: true)
-subproject('quote-1-rs', required: true)
-subproject('syn-2-rs', required: true)
-
-quote_dep = dependency('quote-1-rs', native: true)
-syn_dep = dependency('syn-2-rs', native: true)
-proc_macro2_dep = dependency('proc-macro2-1-rs', native: true)
-
_qemu_api_macros_rs = rust.proc_macro(
'qemu_api_macros',
files('src/lib.rs'),
@@ -16,9 +8,9 @@ _qemu_api_macros_rs = rust.proc_macro(
'--cfg', 'feature="proc-macro"',
],
dependencies: [
- proc_macro2_dep,
- quote_dep,
- syn_dep,
+ proc_macro2_rs_native,
+ quote_rs_native,
+ syn_rs_native,
],
)
diff --git a/rust/qemu-api-macros/src/bits.rs b/rust/qemu-api-macros/src/bits.rs
new file mode 100644
index 0000000..5ba8475
--- /dev/null
+++ b/rust/qemu-api-macros/src/bits.rs
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: MIT or Apache-2.0 or GPL-2.0-or-later
+
+// shadowing is useful together with "if let"
+#![allow(clippy::shadow_unrelated)]
+
+use proc_macro2::{
+ Delimiter, Group, Ident, Punct, Spacing, Span, TokenStream, TokenTree, TokenTree as TT,
+};
+
+use crate::utils::MacroError;
+
+pub struct BitsConstInternal {
+ typ: TokenTree,
+}
+
+fn paren(ts: TokenStream) -> TokenTree {
+ TT::Group(Group::new(Delimiter::Parenthesis, ts))
+}
+
+fn ident(s: &'static str) -> TokenTree {
+ TT::Ident(Ident::new(s, Span::call_site()))
+}
+
+fn punct(ch: char) -> TokenTree {
+ TT::Punct(Punct::new(ch, Spacing::Alone))
+}
+
+/// Implements a recursive-descent parser that translates Boolean expressions on
+/// bitmasks to invocations of `const` functions defined by the `bits!` macro.
+impl BitsConstInternal {
+ // primary ::= '(' or ')'
+ // | ident
+ // | '!' ident
+ fn parse_primary(
+ &self,
+ tok: TokenTree,
+ it: &mut dyn Iterator<Item = TokenTree>,
+ out: &mut TokenStream,
+ ) -> Result<Option<TokenTree>, MacroError> {
+ let next = match tok {
+ TT::Group(ref g) => {
+ if g.delimiter() != Delimiter::Parenthesis && g.delimiter() != Delimiter::None {
+ return Err(MacroError::Message("expected parenthesis".into(), g.span()));
+ }
+ let mut stream = g.stream().into_iter();
+ let Some(first_tok) = stream.next() else {
+ return Err(MacroError::Message(
+ "expected operand, found ')'".into(),
+ g.span(),
+ ));
+ };
+ let mut output = TokenStream::new();
+ // start from the lowest precedence
+ let next = self.parse_or(first_tok, &mut stream, &mut output)?;
+ if let Some(tok) = next {
+ return Err(MacroError::Message(
+ format!("unexpected token {tok}"),
+ tok.span(),
+ ));
+ }
+ out.extend(Some(paren(output)));
+ it.next()
+ }
+ TT::Ident(_) => {
+ let mut output = TokenStream::new();
+ output.extend([
+ self.typ.clone(),
+ TT::Punct(Punct::new(':', Spacing::Joint)),
+ TT::Punct(Punct::new(':', Spacing::Joint)),
+ tok,
+ ]);
+ out.extend(Some(paren(output)));
+ it.next()
+ }
+ TT::Punct(ref p) => {
+ if p.as_char() != '!' {
+ return Err(MacroError::Message("expected operand".into(), p.span()));
+ }
+ let Some(rhs_tok) = it.next() else {
+ return Err(MacroError::Message(
+ "expected operand at end of input".into(),
+ p.span(),
+ ));
+ };
+ let next = self.parse_primary(rhs_tok, it, out)?;
+ out.extend([punct('.'), ident("invert"), paren(TokenStream::new())]);
+ next
+ }
+ _ => {
+ return Err(MacroError::Message("unexpected literal".into(), tok.span()));
+ }
+ };
+ Ok(next)
+ }
+
+ fn parse_binop<
+ F: Fn(
+ &Self,
+ TokenTree,
+ &mut dyn Iterator<Item = TokenTree>,
+ &mut TokenStream,
+ ) -> Result<Option<TokenTree>, MacroError>,
+ >(
+ &self,
+ tok: TokenTree,
+ it: &mut dyn Iterator<Item = TokenTree>,
+ out: &mut TokenStream,
+ ch: char,
+ f: F,
+ method: &'static str,
+ ) -> Result<Option<TokenTree>, MacroError> {
+ let mut next = f(self, tok, it, out)?;
+ while next.is_some() {
+ let op = next.as_ref().unwrap();
+ let TT::Punct(ref p) = op else { break };
+ if p.as_char() != ch {
+ break;
+ }
+
+ let Some(rhs_tok) = it.next() else {
+ return Err(MacroError::Message(
+ "expected operand at end of input".into(),
+ p.span(),
+ ));
+ };
+ let mut rhs = TokenStream::new();
+ next = f(self, rhs_tok, it, &mut rhs)?;
+ out.extend([punct('.'), ident(method), paren(rhs)]);
+ }
+ Ok(next)
+ }
+
+ // sub ::= primary ('-' primary)*
+ pub fn parse_sub(
+ &self,
+ tok: TokenTree,
+ it: &mut dyn Iterator<Item = TokenTree>,
+ out: &mut TokenStream,
+ ) -> Result<Option<TokenTree>, MacroError> {
+ self.parse_binop(tok, it, out, '-', Self::parse_primary, "difference")
+ }
+
+ // and ::= sub ('&' sub)*
+ fn parse_and(
+ &self,
+ tok: TokenTree,
+ it: &mut dyn Iterator<Item = TokenTree>,
+ out: &mut TokenStream,
+ ) -> Result<Option<TokenTree>, MacroError> {
+ self.parse_binop(tok, it, out, '&', Self::parse_sub, "intersection")
+ }
+
+ // xor ::= and ('&' and)*
+ fn parse_xor(
+ &self,
+ tok: TokenTree,
+ it: &mut dyn Iterator<Item = TokenTree>,
+ out: &mut TokenStream,
+ ) -> Result<Option<TokenTree>, MacroError> {
+ self.parse_binop(tok, it, out, '^', Self::parse_and, "symmetric_difference")
+ }
+
+ // or ::= xor ('|' xor)*
+ pub fn parse_or(
+ &self,
+ tok: TokenTree,
+ it: &mut dyn Iterator<Item = TokenTree>,
+ out: &mut TokenStream,
+ ) -> Result<Option<TokenTree>, MacroError> {
+ self.parse_binop(tok, it, out, '|', Self::parse_xor, "union")
+ }
+
+ pub fn parse(
+ it: &mut dyn Iterator<Item = TokenTree>,
+ ) -> Result<proc_macro2::TokenStream, MacroError> {
+ let mut pos = Span::call_site();
+ let mut typ = proc_macro2::TokenStream::new();
+
+ // Gobble everything up to an `@` sign, which is followed by a
+ // parenthesized expression; that is, all token trees except the
+ // last two form the type.
+ let next = loop {
+ let tok = it.next();
+ if let Some(ref t) = tok {
+ pos = t.span();
+ }
+ match tok {
+ None => break None,
+ Some(TT::Punct(ref p)) if p.as_char() == '@' => {
+ let tok = it.next();
+ if let Some(ref t) = tok {
+ pos = t.span();
+ }
+ break tok;
+ }
+ Some(x) => typ.extend(Some(x)),
+ }
+ };
+
+ let Some(tok) = next else {
+ return Err(MacroError::Message(
+ "expected expression, do not call this macro directly".into(),
+ pos,
+ ));
+ };
+ let TT::Group(ref _group) = tok else {
+ return Err(MacroError::Message(
+ "expected parenthesis, do not call this macro directly".into(),
+ tok.span(),
+ ));
+ };
+ let mut out = TokenStream::new();
+ let state = Self {
+ typ: TT::Group(Group::new(Delimiter::None, typ)),
+ };
+
+ let next = state.parse_primary(tok, it, &mut out)?;
+
+ // A parenthesized expression is a single production of the grammar,
+ // so the input must have reached the last token.
+ if let Some(tok) = next {
+ return Err(MacroError::Message(
+ format!("unexpected token {tok}"),
+ tok.span(),
+ ));
+ }
+ Ok(out)
+ }
+}
diff --git a/rust/qemu-api-macros/src/lib.rs b/rust/qemu-api-macros/src/lib.rs
index f97449b..1034707 100644
--- a/rust/qemu-api-macros/src/lib.rs
+++ b/rust/qemu-api-macros/src/lib.rs
@@ -12,6 +12,9 @@ use syn::{
mod utils;
use utils::MacroError;
+mod bits;
+use bits::BitsConstInternal;
+
fn get_fields<'a>(
input: &'a DeriveInput,
msg: &str,
@@ -190,23 +193,51 @@ fn get_variants(input: &DeriveInput) -> Result<&Punctuated<Variant, Comma>, Macr
}
#[rustfmt::skip::macros(quote)]
+fn derive_tryinto_body(
+ name: &Ident,
+ variants: &Punctuated<Variant, Comma>,
+ repr: &Path,
+) -> Result<proc_macro2::TokenStream, MacroError> {
+ let discriminants: Vec<&Ident> = variants.iter().map(|f| &f.ident).collect();
+
+ Ok(quote! {
+ #(const #discriminants: #repr = #name::#discriminants as #repr;)*;
+ match value {
+ #(#discriminants => Ok(#name::#discriminants),)*
+ _ => Err(value),
+ }
+ })
+}
+
+#[rustfmt::skip::macros(quote)]
fn derive_tryinto_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStream, MacroError> {
let repr = get_repr_uN(&input, "#[derive(TryInto)]")?;
-
let name = &input.ident;
- let variants = get_variants(&input)?;
- let discriminants: Vec<&Ident> = variants.iter().map(|f| &f.ident).collect();
+ let body = derive_tryinto_body(name, get_variants(&input)?, &repr)?;
+ let errmsg = format!("invalid value for {name}");
Ok(quote! {
+ impl #name {
+ #[allow(dead_code)]
+ pub const fn into_bits(self) -> #repr {
+ self as #repr
+ }
+
+ #[allow(dead_code)]
+ pub const fn from_bits(value: #repr) -> Self {
+ match ({
+ #body
+ }) {
+ Ok(x) => x,
+ Err(_) => panic!(#errmsg)
+ }
+ }
+ }
impl core::convert::TryFrom<#repr> for #name {
type Error = #repr;
fn try_from(value: #repr) -> Result<Self, Self::Error> {
- #(const #discriminants: #repr = #name::#discriminants as #repr;)*;
- match value {
- #(#discriminants => Ok(Self::#discriminants),)*
- _ => Err(value),
- }
+ #body
}
}
})
@@ -219,3 +250,12 @@ pub fn derive_tryinto(input: TokenStream) -> TokenStream {
TokenStream::from(expanded)
}
+
+#[proc_macro]
+pub fn bits_const_internal(ts: TokenStream) -> TokenStream {
+ let ts = proc_macro2::TokenStream::from(ts);
+ let mut it = ts.into_iter();
+
+ let expanded = BitsConstInternal::parse(&mut it).unwrap_or_else(Into::into);
+ TokenStream::from(expanded)
+}
diff --git a/rust/qemu-api/meson.build b/rust/qemu-api/meson.build
index 1696df7..b532281 100644
--- a/rust/qemu-api/meson.build
+++ b/rust/qemu-api/meson.build
@@ -2,8 +2,6 @@ _qemu_api_cfg = run_command(rustc_args,
'--config-headers', config_host_h, '--features', files('Cargo.toml'),
capture: true, check: true).stdout().strip().splitlines()
-libc_dep = dependency('libc-0.2-rs')
-
# _qemu_api_cfg += ['--cfg', 'feature="allocator"']
if get_option('debug_mutex')
_qemu_api_cfg += ['--cfg', 'feature="debug_cell"']
@@ -37,32 +35,24 @@ _qemu_api_rs = static_library(
override_options: ['rust_std=2021', 'build.rust_std=2021'],
rust_abi: 'rust',
rust_args: _qemu_api_cfg,
- dependencies: [libc_dep, qemu_api_macros],
+ dependencies: [libc_rs, qemu_api_macros, qemuutil_rs,
+ qom, hwcore, chardev, migration],
)
rust.test('rust-qemu-api-tests', _qemu_api_rs,
suite: ['unit', 'rust'])
-qemu_api = declare_dependency(link_with: _qemu_api_rs)
+qemu_api = declare_dependency(link_with: [_qemu_api_rs],
+ dependencies: [qemu_api_macros, qom, hwcore, chardev, migration])
-# Rust executables do not support objects, so add an intermediate step.
-rust_qemu_api_objs = static_library(
- 'rust_qemu_api_objs',
- objects: [libqom.extract_all_objects(recursive: false),
- libhwcore.extract_all_objects(recursive: false),
- libchardev.extract_all_objects(recursive: false),
- libcrypto.extract_all_objects(recursive: false),
- libauthz.extract_all_objects(recursive: false),
- libio.extract_all_objects(recursive: false),
- libmigration.extract_all_objects(recursive: false)])
-rust_qemu_api_deps = declare_dependency(
- dependencies: [
- qom_ss.dependencies(),
- chardev_ss.dependencies(),
- crypto_ss.dependencies(),
- authz_ss.dependencies(),
- io_ss.dependencies()],
- link_whole: [rust_qemu_api_objs, libqemuutil])
+# Doctests are essentially integration tests, so they need the same dependencies.
+# Note that running them requires the object files for C code, so place them
+# in a separate suite that is run by the "build" CI jobs rather than "check".
+rust.doctest('rust-qemu-api-doctests',
+ _qemu_api_rs,
+ protocol: 'rust',
+ dependencies: qemu_api,
+ suite: ['doc', 'rust'])
test('rust-qemu-api-integration',
executable(
@@ -71,7 +61,7 @@ test('rust-qemu-api-integration',
override_options: ['rust_std=2021', 'build.rust_std=2021'],
rust_args: ['--test'],
install: false,
- dependencies: [qemu_api, qemu_api_macros, rust_qemu_api_deps]),
+ dependencies: [qemu_api]),
args: [
'--test', '--test-threads', '1',
'--format', 'pretty',
diff --git a/rust/qemu-api/src/bindings.rs b/rust/qemu-api/src/bindings.rs
index 3c1d297..057de4b 100644
--- a/rust/qemu-api/src/bindings.rs
+++ b/rust/qemu-api/src/bindings.rs
@@ -11,6 +11,7 @@
clippy::restriction,
clippy::style,
clippy::missing_const_for_fn,
+ clippy::ptr_offset_with_cast,
clippy::useless_transmute,
clippy::missing_safety_doc
)]
diff --git a/rust/qemu-api/src/bitops.rs b/rust/qemu-api/src/bitops.rs
index 023ec1a..b1e3a53 100644
--- a/rust/qemu-api/src/bitops.rs
+++ b/rust/qemu-api/src/bitops.rs
@@ -1,5 +1,5 @@
// Copyright (C) 2024 Intel Corporation.
-// Author(s): Zhao Liu <zhai1.liu@intel.com>
+// Author(s): Zhao Liu <zhao1.liu@intel.com>
// SPDX-License-Identifier: GPL-2.0-or-later
//! This module provides bit operation extensions to integer types.
diff --git a/rust/qemu-api/src/cell.rs b/rust/qemu-api/src/cell.rs
index 05ce09f..27063b0 100644
--- a/rust/qemu-api/src/cell.rs
+++ b/rust/qemu-api/src/cell.rs
@@ -225,27 +225,23 @@ use crate::bindings;
/// An internal function that is used by doctests.
pub fn bql_start_test() {
- if cfg!(MESON) {
- // SAFETY: integration tests are run with --test-threads=1, while
- // unit tests and doctests are not multithreaded and do not have
- // any BQL-protected data. Just set bql_locked to true.
- unsafe {
- bindings::rust_bql_mock_lock();
- }
+ // SAFETY: integration tests are run with --test-threads=1, while
+ // unit tests and doctests are not multithreaded and do not have
+ // any BQL-protected data. Just set bql_locked to true.
+ unsafe {
+ bindings::rust_bql_mock_lock();
}
}
pub fn bql_locked() -> bool {
// SAFETY: the function does nothing but return a thread-local bool
- !cfg!(MESON) || unsafe { bindings::bql_locked() }
+ unsafe { bindings::bql_locked() }
}
fn bql_block_unlock(increase: bool) {
- if cfg!(MESON) {
- // SAFETY: this only adjusts a counter
- unsafe {
- bindings::bql_block_unlock(increase);
- }
+ // SAFETY: this only adjusts a counter
+ unsafe {
+ bindings::bql_block_unlock(increase);
}
}
diff --git a/rust/qemu-api/src/qom.rs b/rust/qemu-api/src/qom.rs
index 41e5a5e..14f98fe 100644
--- a/rust/qemu-api/src/qom.rs
+++ b/rust/qemu-api/src/qom.rs
@@ -291,7 +291,7 @@ pub unsafe trait ObjectType: Sized {
}
/// Return the receiver as a const raw pointer to Object.
- /// This is preferrable to `as_object_mut_ptr()` if a C
+ /// This is preferable to `as_object_mut_ptr()` if a C
/// function only needs a `const Object *`.
fn as_object_ptr(&self) -> *const bindings::Object {
self.as_object().as_ptr()
@@ -485,7 +485,7 @@ pub trait ObjectImpl: ObjectType + IsA<Object> {
/// `INSTANCE_INIT` functions have been called.
const INSTANCE_POST_INIT: Option<fn(&Self)> = None;
- /// Called on descendent classes after all parent class initialization
+ /// Called on descendant classes after all parent class initialization
/// has occurred, but before the class itself is initialized. This
/// is only useful if a class is not a leaf, and can be used to undo
/// the effects of copying the contents of the parent's class struct
diff --git a/rust/qemu-api/src/timer.rs b/rust/qemu-api/src/timer.rs
index 868bd88..0a2d111 100644
--- a/rust/qemu-api/src/timer.rs
+++ b/rust/qemu-api/src/timer.rs
@@ -1,5 +1,5 @@
// Copyright (C) 2024 Intel Corporation.
-// Author(s): Zhao Liu <zhai1.liu@intel.com>
+// Author(s): Zhao Liu <zhao1.liu@intel.com>
// SPDX-License-Identifier: GPL-2.0-or-later
use std::{
diff --git a/rust/qemu-api/src/vmstate.rs b/rust/qemu-api/src/vmstate.rs
index 9c8b239..812f390 100644
--- a/rust/qemu-api/src/vmstate.rs
+++ b/rust/qemu-api/src/vmstate.rs
@@ -9,7 +9,7 @@
//! * [`vmstate_unused!`](crate::vmstate_unused) and
//! [`vmstate_of!`](crate::vmstate_of), which are used to express the
//! migration format for a struct. This is based on the [`VMState`] trait,
-//! which is defined by all migrateable types.
+//! which is defined by all migratable types.
//!
//! * [`impl_vmstate_forward`](crate::impl_vmstate_forward) and
//! [`impl_vmstate_bitsized`](crate::impl_vmstate_bitsized), which help with
diff --git a/rust/qemu-api/tests/vmstate_tests.rs b/rust/qemu-api/tests/vmstate_tests.rs
index ad0fc5c..bded836 100644
--- a/rust/qemu-api/tests/vmstate_tests.rs
+++ b/rust/qemu-api/tests/vmstate_tests.rs
@@ -1,5 +1,5 @@
// Copyright (C) 2025 Intel Corporation.
-// Author(s): Zhao Liu <zhai1.liu@intel.com>
+// Author(s): Zhao Liu <zhao1.liu@intel.com>
// SPDX-License-Identifier: GPL-2.0-or-later
use std::{
diff --git a/scripts/rust/rustc_args.py b/scripts/rust/rustc_args.py
index 2633157..63b0748 100644
--- a/scripts/rust/rustc_args.py
+++ b/scripts/rust/rustc_args.py
@@ -104,10 +104,7 @@ def generate_lint_flags(cargo_toml: CargoTOML, strict_lints: bool) -> Iterable[s
else:
raise Exception(f"invalid level {level} for {prefix}{lint}")
- # This may change if QEMU ever invokes clippy-driver or rustdoc by
- # hand. For now, check the syntax but do not add non-rustc lints to
- # the command line.
- if k == "rust" and not (strict_lints and lint in STRICT_LINTS):
+ if not (strict_lints and lint in STRICT_LINTS):
lint_list.append(LintFlag(flags=[flag, prefix + lint], priority=priority))
if strict_lints:
diff --git a/scripts/tracetool/backend/simple.py b/scripts/tracetool/backend/simple.py
index a74d61f..2688d4b 100644
--- a/scripts/tracetool/backend/simple.py
+++ b/scripts/tracetool/backend/simple.py
@@ -36,8 +36,17 @@ def generate_h_begin(events, group):
def generate_h(event, group):
- out(' _simple_%(api)s(%(args)s);',
+ event_id = 'TRACE_' + event.name.upper()
+ if "vcpu" in event.properties:
+ # already checked on the generic format code
+ cond = "true"
+ else:
+ cond = "trace_event_get_state(%s)" % event_id
+ out(' if (%(cond)s) {',
+ ' _simple_%(api)s(%(args)s);',
+ ' }',
api=event.api(),
+ cond=cond,
args=", ".join(event.args.names()))
@@ -72,22 +81,10 @@ def generate_c(event, group):
if len(event.args) == 0:
sizestr = '0'
- event_id = 'TRACE_' + event.name.upper()
- if "vcpu" in event.properties:
- # already checked on the generic format code
- cond = "true"
- else:
- cond = "trace_event_get_state(%s)" % event_id
-
out('',
- ' if (!%(cond)s) {',
- ' return;',
- ' }',
- '',
' if (trace_record_start(&rec, %(event_obj)s.id, %(size_str)s)) {',
' return; /* Trace Buffer Full, Event Dropped ! */',
' }',
- cond=cond,
event_obj=event.api(event.QEMU_EVENT),
size_str=sizestr)
diff --git a/system/memory.c b/system/memory.c
index 63b983e..306e9ff 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -2174,18 +2174,14 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
}
/* Called with rcu_read_lock held. */
-bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
- ram_addr_t *ram_addr, bool *read_only,
- bool *mr_has_discard_manager, Error **errp)
+MemoryRegion *memory_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p,
+ Error **errp)
{
MemoryRegion *mr;
hwaddr xlat;
hwaddr len = iotlb->addr_mask + 1;
bool writable = iotlb->perm & IOMMU_WO;
- if (mr_has_discard_manager) {
- *mr_has_discard_manager = false;
- }
/*
* The IOMMU TLB entry we have just covers translation through
* this IOMMU to its immediate target. We need to translate
@@ -2195,7 +2191,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
&xlat, &len, writable, MEMTXATTRS_UNSPECIFIED);
if (!memory_region_is_ram(mr)) {
error_setg(errp, "iommu map to non memory area %" HWADDR_PRIx "", xlat);
- return false;
+ return NULL;
} else if (memory_region_has_ram_discard_manager(mr)) {
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr);
MemoryRegionSection tmp = {
@@ -2203,9 +2199,6 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
.offset_within_region = xlat,
.size = int128_make64(len),
};
- if (mr_has_discard_manager) {
- *mr_has_discard_manager = true;
- }
/*
* Malicious VMs can map memory into the IOMMU, which is expected
* to remain discarded. vfio will pin all pages, populating memory.
@@ -2216,7 +2209,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
error_setg(errp, "iommu map to discarded memory (e.g., unplugged"
" via virtio-mem): %" HWADDR_PRIx "",
iotlb->translated_addr);
- return false;
+ return NULL;
}
}
@@ -2226,22 +2219,11 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
*/
if (len & iotlb->addr_mask) {
error_setg(errp, "iommu has granularity incompatible with target AS");
- return false;
- }
-
- if (vaddr) {
- *vaddr = memory_region_get_ram_ptr(mr) + xlat;
- }
-
- if (ram_addr) {
- *ram_addr = memory_region_get_ram_addr(mr) + xlat;
- }
-
- if (read_only) {
- *read_only = !writable || mr->readonly;
+ return NULL;
}
- return true;
+ *xlat_p = xlat;
+ return mr;
}
void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
diff --git a/system/meson.build b/system/meson.build
index c2f0082..7514bf3 100644
--- a/system/meson.build
+++ b/system/meson.build
@@ -7,7 +7,7 @@ system_ss.add(files(
'vl.c',
), sdl, libpmem, libdaxctl)
-libsystem_ss.add(files(
+system_ss.add(files(
'balloon.c',
'bootdevice.c',
'cpus.c',
diff --git a/system/runstate.c b/system/runstate.c
index de74d96..38900c9 100644
--- a/system/runstate.c
+++ b/system/runstate.c
@@ -590,6 +590,58 @@ static void qemu_system_wakeup(void)
}
}
+static char *tdx_parse_panic_message(char *message)
+{
+ bool printable = false;
+ char *buf = NULL;
+ int len = 0, i;
+
+ /*
+ * Although message is defined as a json string, we shouldn't
+ * unconditionally treat it as is because the guest generated it and
+ * it's not necessarily trustable.
+ */
+ if (message) {
+ /* The caller guarantees the NULL-terminated string. */
+ len = strlen(message);
+
+ printable = len > 0;
+ for (i = 0; i < len; i++) {
+ if (!(0x20 <= message[i] && message[i] <= 0x7e)) {
+ printable = false;
+ break;
+ }
+ }
+ }
+
+ if (len == 0) {
+ buf = g_malloc(1);
+ buf[0] = '\0';
+ } else {
+ if (!printable) {
+ /* 3 = length of "%02x " */
+ buf = g_malloc(len * 3);
+ for (i = 0; i < len; i++) {
+ if (message[i] == '\0') {
+ break;
+ } else {
+ sprintf(buf + 3 * i, "%02x ", message[i]);
+ }
+ }
+ if (i > 0) {
+ /* replace the last ' '(space) to NULL */
+ buf[i * 3 - 1] = '\0';
+ } else {
+ buf[0] = '\0';
+ }
+ } else {
+ buf = g_strdup(message);
+ }
+ }
+
+ return buf;
+}
+
void qemu_system_guest_panicked(GuestPanicInformation *info)
{
qemu_log_mask(LOG_GUEST_ERROR, "Guest crashed");
@@ -631,7 +683,20 @@ void qemu_system_guest_panicked(GuestPanicInformation *info)
S390CrashReason_str(info->u.s390.reason),
info->u.s390.psw_mask,
info->u.s390.psw_addr);
+ } else if (info->type == GUEST_PANIC_INFORMATION_TYPE_TDX) {
+ char *message = tdx_parse_panic_message(info->u.tdx.message);
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "\nTDX guest reports fatal error."
+ " error code: 0x%" PRIx32 " error message:\"%s\"\n",
+ info->u.tdx.error_code, message);
+ g_free(message);
+ if (info->u.tdx.gpa != -1ull) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Additional error information "
+ "can be found at gpa page: 0x%" PRIx64 "\n",
+ info->u.tdx.gpa);
+ }
}
+
qapi_free_GuestPanicInformation(info);
}
}
diff --git a/system/vl.c b/system/vl.c
index fd402b8..3b7057e 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -1192,10 +1192,7 @@ static int parse_fw_cfg(void *opaque, QemuOpts *opts, Error **errp)
return -1;
}
}
- /* For legacy, keep user files in a specific global order. */
- fw_cfg_set_order_override(fw_cfg, FW_CFG_ORDER_OVERRIDE_USER);
fw_cfg_add_file(fw_cfg, name, buf, size);
- fw_cfg_reset_order_override(fw_cfg);
return 0;
}
@@ -2745,7 +2742,6 @@ static void qemu_create_cli_devices(void)
}
/* init generic devices */
- rom_set_order_override(FW_CFG_ORDER_OVERRIDE_DEVICE);
qemu_opts_foreach(qemu_find_opts("device"),
device_init_func, NULL, &error_fatal);
QTAILQ_FOREACH(opt, &device_opts, next) {
@@ -2756,7 +2752,6 @@ static void qemu_create_cli_devices(void)
assert(ret_data == NULL); /* error_fatal aborts */
loc_pop(&opt->loc);
}
- rom_reset_order_override();
}
static bool qemu_machine_creation_done(Error **errp)
diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c
index cca6b97..cefd235 100644
--- a/target/arm/arm-qmp-cmds.c
+++ b/target/arm/arm-qmp-cmds.c
@@ -30,6 +30,7 @@
#include "qapi/qapi-commands-misc-arm.h"
#include "qobject/qdict.h"
#include "qom/qom-qobject.h"
+#include "cpu.h"
static GICCapability *gic_cap_new(int version)
{
diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h
index 2183de8..c1a7ae3 100644
--- a/target/arm/cpregs.h
+++ b/target/arm/cpregs.h
@@ -23,6 +23,7 @@
#include "hw/registerfields.h"
#include "target/arm/kvm-consts.h"
+#include "cpu.h"
/*
* ARMCPRegInfo type field bits:
diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h
index 525e4ce..4452e7c 100644
--- a/target/arm/cpu-features.h
+++ b/target/arm/cpu-features.h
@@ -22,6 +22,7 @@
#include "hw/registerfields.h"
#include "qemu/host-utils.h"
+#include "cpu.h"
/*
* Naming convention for isar_feature functions:
diff --git a/target/arm/hvf-stub.c b/target/arm/hvf-stub.c
new file mode 100644
index 0000000..ff13726
--- /dev/null
+++ b/target/arm/hvf-stub.c
@@ -0,0 +1,20 @@
+/*
+ * QEMU Hypervisor.framework (HVF) stubs for ARM
+ *
+ * Copyright (c) Linaro
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hvf_arm.h"
+
+uint32_t hvf_arm_get_default_ipa_bit_size(void)
+{
+ g_assert_not_reached();
+}
+
+uint32_t hvf_arm_get_max_ipa_bit_size(void)
+{
+ g_assert_not_reached();
+}
diff --git a/target/arm/hvf_arm.h b/target/arm/hvf_arm.h
index 26c717b..ea82f26 100644
--- a/target/arm/hvf_arm.h
+++ b/target/arm/hvf_arm.h
@@ -11,7 +11,7 @@
#ifndef QEMU_HVF_ARM_H
#define QEMU_HVF_ARM_H
-#include "cpu.h"
+#include "target/arm/cpu-qom.h"
/**
* hvf_arm_init_debug() - initialize guest debug capabilities
@@ -22,23 +22,7 @@ void hvf_arm_init_debug(void);
void hvf_arm_set_cpu_features_from_host(ARMCPU *cpu);
-#ifdef CONFIG_HVF
-
uint32_t hvf_arm_get_default_ipa_bit_size(void);
uint32_t hvf_arm_get_max_ipa_bit_size(void);
-#else
-
-static inline uint32_t hvf_arm_get_default_ipa_bit_size(void)
-{
- return 0;
-}
-
-static inline uint32_t hvf_arm_get_max_ipa_bit_size(void)
-{
- return 0;
-}
-
-#endif
-
#endif
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index a2791aa..74fda8b 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -1846,6 +1846,11 @@ static int kvm_arm_sve_set_vls(ARMCPU *cpu)
#define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ return 0;
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
int ret;
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index c4178d1..7dc83ca 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -12,6 +12,7 @@
#define QEMU_KVM_ARM_H
#include "system/kvm.h"
+#include "target/arm/cpu-qom.h"
#define KVM_ARM_VGIC_V2 (1 << 0)
#define KVM_ARM_VGIC_V3 (1 << 1)
diff --git a/target/arm/meson.build b/target/arm/meson.build
index b404fa5..7aa81e3 100644
--- a/target/arm/meson.build
+++ b/target/arm/meson.build
@@ -3,7 +3,6 @@ arm_common_ss = ss.source_set()
arm_ss.add(files(
'gdbstub.c',
))
-arm_ss.add(zlib)
arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
'cpu64.c',
@@ -28,10 +27,11 @@ arm_user_ss.add(files(
'vfp_fpscr.c',
))
-arm_common_system_ss.add(files('cpu.c'), capstone)
+arm_common_system_ss.add(files('cpu.c'))
arm_common_system_ss.add(when: 'TARGET_AARCH64', if_false: files(
'cpu32-stubs.c'))
arm_common_system_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c'))
+arm_common_system_ss.add(when: 'CONFIG_HVF', if_false: files('hvf-stub.c'))
arm_common_system_ss.add(files(
'arch_dump.c',
'arm-powerctl.c',
@@ -48,7 +48,7 @@ subdir('hvf')
if 'CONFIG_TCG' in config_all_accel
subdir('tcg')
else
- arm_ss.add(files('tcg-stubs.c'))
+ arm_common_system_ss.add(files('tcg-stubs.c'))
endif
target_arch += {'arm': arm_ss}
diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build
index 2d1502b..c59f0f0 100644
--- a/target/arm/tcg/meson.build
+++ b/target/arm/tcg/meson.build
@@ -56,6 +56,8 @@ arm_system_ss.add(files(
arm_system_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('cpu-v7m.c'))
arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files('cpu-v7m.c'))
+arm_common_ss.add(zlib)
+
arm_common_ss.add(files(
'arith_helper.c',
'crypto_helper.c',
diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h
index 164be76..48b88db 100644
--- a/target/i386/confidential-guest.h
+++ b/target/i386/confidential-guest.h
@@ -39,8 +39,10 @@ struct X86ConfidentialGuestClass {
/* <public> */
int (*kvm_type)(X86ConfidentialGuest *cg);
- uint32_t (*mask_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index,
- int reg, uint32_t value);
+ void (*cpu_instance_init)(X86ConfidentialGuest *cg, CPUState *cpu);
+ uint32_t (*adjust_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature,
+ uint32_t index, int reg, uint32_t value);
+ int (*check_features)(X86ConfidentialGuest *cg, CPUState *cs);
};
/**
@@ -59,25 +61,47 @@ static inline int x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg)
}
}
+static inline void x86_confidential_guest_cpu_instance_init(X86ConfidentialGuest *cg,
+ CPUState *cpu)
+{
+ X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg);
+
+ if (klass->cpu_instance_init) {
+ klass->cpu_instance_init(cg, cpu);
+ }
+}
+
/**
- * x86_confidential_guest_mask_cpuid_features:
+ * x86_confidential_guest_adjust_cpuid_features:
*
- * Removes unsupported features from a confidential guest's CPUID values, returns
- * the value with the bits removed. The bits removed should be those that KVM
- * provides independent of host-supported CPUID features, but are not supported by
- * the confidential computing firmware.
+ * Adjust the supported features from a confidential guest's CPUID values,
+ * returns the adjusted value. There are bits being removed that are not
+ * supported by the confidential computing firmware or bits being added that
+ * are forcibly exposed to guest by the confidential computing firmware.
*/
-static inline int x86_confidential_guest_mask_cpuid_features(X86ConfidentialGuest *cg,
+static inline int x86_confidential_guest_adjust_cpuid_features(X86ConfidentialGuest *cg,
uint32_t feature, uint32_t index,
int reg, uint32_t value)
{
X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg);
- if (klass->mask_cpuid_features) {
- return klass->mask_cpuid_features(cg, feature, index, reg, value);
+ if (klass->adjust_cpuid_features) {
+ return klass->adjust_cpuid_features(cg, feature, index, reg, value);
} else {
return value;
}
}
+static inline int x86_confidential_guest_check_features(X86ConfidentialGuest *cg,
+ CPUState *cs)
+{
+ X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg);
+
+ if (klass->check_features) {
+ return klass->check_features(cg, cs);
+ }
+
+ return 0;
+}
+
#endif
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 33afc3e..40aefb3 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -37,6 +37,7 @@
#include "hw/i386/topology.h"
#include "exec/watchpoint.h"
#ifndef CONFIG_USER_ONLY
+#include "confidential-guest.h"
#include "system/reset.h"
#include "qapi/qapi-commands-machine.h"
#include "system/address-spaces.h"
@@ -899,6 +900,7 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
#define TCG_7_1_EAX_FEATURES (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | \
CPUID_7_1_EAX_FSRC | CPUID_7_1_EAX_CMPCCXADD)
+#define TCG_7_1_ECX_FEATURES 0
#define TCG_7_1_EDX_FEATURES 0
#define TCG_7_2_EDX_FEATURES 0
#define TCG_APM_FEATURES 0
@@ -1149,6 +1151,25 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
},
.tcg_features = TCG_7_1_EAX_FEATURES,
},
+ [FEAT_7_1_ECX] = {
+ .type = CPUID_FEATURE_WORD,
+ .feat_names = {
+ NULL, NULL, NULL, NULL,
+ NULL, "msr-imm", NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ },
+ .cpuid = {
+ .eax = 7,
+ .needs_ecx = true, .ecx = 1,
+ .reg = R_ECX,
+ },
+ .tcg_features = TCG_7_1_ECX_FEATURES,
+ },
[FEAT_7_1_EDX] = {
.type = CPUID_FEATURE_WORD,
.feat_names = {
@@ -1252,12 +1273,12 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
[FEAT_8000_0021_EAX] = {
.type = CPUID_FEATURE_WORD,
.feat_names = {
- "no-nested-data-bp", NULL, "lfence-always-serializing", NULL,
+ "no-nested-data-bp", "fs-gs-base-ns", "lfence-always-serializing", NULL,
NULL, NULL, "null-sel-clr-base", NULL,
"auto-ibrs", NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
- NULL, NULL, NULL, NULL,
+ "prefetchi", NULL, NULL, NULL,
"eraps", NULL, NULL, "sbpb",
"ibpb-brtype", "srso-no", "srso-user-kernel-no", NULL,
},
@@ -1677,14 +1698,21 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
},
};
-typedef struct FeatureMask {
- FeatureWord index;
- uint64_t mask;
-} FeatureMask;
+bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg)
+{
+ FeatureWordInfo *wi;
+ FeatureWord w;
-typedef struct FeatureDep {
- FeatureMask from, to;
-} FeatureDep;
+ for (w = 0; w < FEATURE_WORDS; w++) {
+ wi = &feature_word_info[w];
+ if (wi->type == CPUID_FEATURE_WORD && wi->cpuid.eax == feature &&
+ (!wi->cpuid.needs_ecx || wi->cpuid.ecx == index) &&
+ wi->cpuid.reg == reg) {
+ return true;
+ }
+ }
+ return false;
+}
static FeatureDep feature_dependencies[] = {
{
@@ -1796,10 +1824,6 @@ static FeatureDep feature_dependencies[] = {
.to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED },
},
{
- .from = { FEAT_7_1_EAX, CPUID_7_1_EAX_WRMSRNS },
- .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED },
- },
- {
.from = { FEAT_7_0_EBX, CPUID_7_0_EBX_SGX },
.to = { FEAT_7_0_ECX, CPUID_7_0_ECX_SGX_LC },
},
@@ -1854,9 +1878,6 @@ static const X86RegisterInfo32 x86_reg_info_32[CPU_NB_REGS32] = {
};
#undef REGISTER
-/* CPUID feature bits available in XSS */
-#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK)
-
ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = {
[XSTATE_FP_BIT] = {
/* x87 FP state component is always enabled if XSAVE is supported */
@@ -2206,6 +2227,60 @@ static CPUCaches epyc_v4_cache_info = {
},
};
+static CPUCaches epyc_v5_cache_info = {
+ .l1d_cache = &(CPUCacheInfo) {
+ .type = DATA_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 64,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l1i_cache = &(CPUCacheInfo) {
+ .type = INSTRUCTION_CACHE,
+ .level = 1,
+ .size = 64 * KiB,
+ .line_size = 64,
+ .associativity = 4,
+ .partitions = 1,
+ .sets = 256,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l2_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 2,
+ .size = 512 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 1024,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .inclusive = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l3_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 3,
+ .size = 8 * MiB,
+ .line_size = 64,
+ .associativity = 16,
+ .partitions = 1,
+ .sets = 8192,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .no_invd_sharing = true,
+ .complex_indexing = false,
+ .share_level = CPU_TOPOLOGY_LEVEL_DIE,
+ },
+};
+
static const CPUCaches epyc_rome_cache_info = {
.l1d_cache = &(CPUCacheInfo) {
.type = DATA_CACHE,
@@ -2314,6 +2389,60 @@ static const CPUCaches epyc_rome_v3_cache_info = {
},
};
+static const CPUCaches epyc_rome_v5_cache_info = {
+ .l1d_cache = &(CPUCacheInfo) {
+ .type = DATA_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 64,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l1i_cache = &(CPUCacheInfo) {
+ .type = INSTRUCTION_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 64,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l2_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 2,
+ .size = 512 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 1024,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .inclusive = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l3_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 3,
+ .size = 16 * MiB,
+ .line_size = 64,
+ .associativity = 16,
+ .partitions = 1,
+ .sets = 16384,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .no_invd_sharing = true,
+ .complex_indexing = false,
+ .share_level = CPU_TOPOLOGY_LEVEL_DIE,
+ },
+};
+
static const CPUCaches epyc_milan_cache_info = {
.l1d_cache = &(CPUCacheInfo) {
.type = DATA_CACHE,
@@ -2422,6 +2551,60 @@ static const CPUCaches epyc_milan_v2_cache_info = {
},
};
+static const CPUCaches epyc_milan_v3_cache_info = {
+ .l1d_cache = &(CPUCacheInfo) {
+ .type = DATA_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 64,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l1i_cache = &(CPUCacheInfo) {
+ .type = INSTRUCTION_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 64,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l2_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 2,
+ .size = 512 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 1024,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .inclusive = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l3_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 3,
+ .size = 32 * MiB,
+ .line_size = 64,
+ .associativity = 16,
+ .partitions = 1,
+ .sets = 32768,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .no_invd_sharing = true,
+ .complex_indexing = false,
+ .share_level = CPU_TOPOLOGY_LEVEL_DIE,
+ },
+};
+
static const CPUCaches epyc_genoa_cache_info = {
.l1d_cache = &(CPUCacheInfo) {
.type = DATA_CACHE,
@@ -2476,6 +2659,114 @@ static const CPUCaches epyc_genoa_cache_info = {
},
};
+static const CPUCaches epyc_genoa_v2_cache_info = {
+ .l1d_cache = &(CPUCacheInfo) {
+ .type = DATA_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 64,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l1i_cache = &(CPUCacheInfo) {
+ .type = INSTRUCTION_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 64,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l2_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 2,
+ .size = 1 * MiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 2048,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .inclusive = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l3_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 3,
+ .size = 32 * MiB,
+ .line_size = 64,
+ .associativity = 16,
+ .partitions = 1,
+ .sets = 32768,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .no_invd_sharing = true,
+ .complex_indexing = false,
+ .share_level = CPU_TOPOLOGY_LEVEL_DIE,
+ },
+};
+
+static const CPUCaches epyc_turin_cache_info = {
+ .l1d_cache = &(CPUCacheInfo) {
+ .type = DATA_CACHE,
+ .level = 1,
+ .size = 48 * KiB,
+ .line_size = 64,
+ .associativity = 12,
+ .partitions = 1,
+ .sets = 64,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l1i_cache = &(CPUCacheInfo) {
+ .type = INSTRUCTION_CACHE,
+ .level = 1,
+ .size = 32 * KiB,
+ .line_size = 64,
+ .associativity = 8,
+ .partitions = 1,
+ .sets = 64,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l2_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 2,
+ .size = 1 * MiB,
+ .line_size = 64,
+ .associativity = 16,
+ .partitions = 1,
+ .sets = 1024,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .inclusive = true,
+ .share_level = CPU_TOPOLOGY_LEVEL_CORE,
+ },
+ .l3_cache = &(CPUCacheInfo) {
+ .type = UNIFIED_CACHE,
+ .level = 3,
+ .size = 32 * MiB,
+ .line_size = 64,
+ .associativity = 16,
+ .partitions = 1,
+ .sets = 32768,
+ .lines_per_tag = 1,
+ .self_init = true,
+ .no_invd_sharing = true,
+ .complex_indexing = false,
+ .share_level = CPU_TOPOLOGY_LEVEL_DIE,
+ },
+};
+
/* The following VMX features are not supported by KVM and are left out in the
* CPU definitions:
*
@@ -5233,6 +5524,25 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
.cache_info = &epyc_v4_cache_info
},
+ {
+ .version = 5,
+ .props = (PropValue[]) {
+ { "overflow-recov", "on" },
+ { "succor", "on" },
+ { "lbrv", "on" },
+ { "tsc-scale", "on" },
+ { "vmcb-clean", "on" },
+ { "flushbyasid", "on" },
+ { "pause-filter", "on" },
+ { "pfthreshold", "on" },
+ { "v-vmsave-vmload", "on" },
+ { "vgif", "on" },
+ { "model-id",
+ "AMD EPYC-v5 Processor" },
+ { /* end of list */ }
+ },
+ .cache_info = &epyc_v5_cache_info
+ },
{ /* end of list */ }
}
},
@@ -5371,6 +5681,25 @@ static const X86CPUDefinition builtin_x86_defs[] = {
{ /* end of list */ }
},
},
+ {
+ .version = 5,
+ .props = (PropValue[]) {
+ { "overflow-recov", "on" },
+ { "succor", "on" },
+ { "lbrv", "on" },
+ { "tsc-scale", "on" },
+ { "vmcb-clean", "on" },
+ { "flushbyasid", "on" },
+ { "pause-filter", "on" },
+ { "pfthreshold", "on" },
+ { "v-vmsave-vmload", "on" },
+ { "vgif", "on" },
+ { "model-id",
+ "AMD EPYC-Rome-v5 Processor" },
+ { /* end of list */ }
+ },
+ .cache_info = &epyc_rome_v5_cache_info
+ },
{ /* end of list */ }
}
},
@@ -5446,6 +5775,25 @@ static const X86CPUDefinition builtin_x86_defs[] = {
},
.cache_info = &epyc_milan_v2_cache_info
},
+ {
+ .version = 3,
+ .props = (PropValue[]) {
+ { "overflow-recov", "on" },
+ { "succor", "on" },
+ { "lbrv", "on" },
+ { "tsc-scale", "on" },
+ { "vmcb-clean", "on" },
+ { "flushbyasid", "on" },
+ { "pause-filter", "on" },
+ { "pfthreshold", "on" },
+ { "v-vmsave-vmload", "on" },
+ { "vgif", "on" },
+ { "model-id",
+ "AMD EPYC-Milan-v3 Processor" },
+ { /* end of list */ }
+ },
+ .cache_info = &epyc_milan_v3_cache_info
+ },
{ /* end of list */ }
}
},
@@ -5520,6 +5868,31 @@ static const X86CPUDefinition builtin_x86_defs[] = {
.xlevel = 0x80000022,
.model_id = "AMD EPYC-Genoa Processor",
.cache_info = &epyc_genoa_cache_info,
+ .versions = (X86CPUVersionDefinition[]) {
+ { .version = 1 },
+ {
+ .version = 2,
+ .props = (PropValue[]) {
+ { "overflow-recov", "on" },
+ { "succor", "on" },
+ { "lbrv", "on" },
+ { "tsc-scale", "on" },
+ { "vmcb-clean", "on" },
+ { "flushbyasid", "on" },
+ { "pause-filter", "on" },
+ { "pfthreshold", "on" },
+ { "v-vmsave-vmload", "on" },
+ { "vgif", "on" },
+ { "fs-gs-base-ns", "on" },
+ { "perfmon-v2", "on" },
+ { "model-id",
+ "AMD EPYC-Genoa-v2 Processor" },
+ { /* end of list */ }
+ },
+ .cache_info = &epyc_genoa_v2_cache_info
+ },
+ { /* end of list */ }
+ }
},
{
.name = "YongFeng",
@@ -5657,6 +6030,89 @@ static const X86CPUDefinition builtin_x86_defs[] = {
{ /* end of list */ }
}
},
+ {
+ .name = "EPYC-Turin",
+ .level = 0xd,
+ .vendor = CPUID_VENDOR_AMD,
+ .family = 26,
+ .model = 0,
+ .stepping = 0,
+ .features[FEAT_1_ECX] =
+ CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX |
+ CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT |
+ CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 |
+ CPUID_EXT_PCID | CPUID_EXT_CX16 | CPUID_EXT_FMA |
+ CPUID_EXT_SSSE3 | CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ |
+ CPUID_EXT_SSE3,
+ .features[FEAT_1_EDX] =
+ CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH |
+ CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE |
+ CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE |
+ CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE |
+ CPUID_VME | CPUID_FP87,
+ .features[FEAT_6_EAX] =
+ CPUID_6_EAX_ARAT,
+ .features[FEAT_7_0_EBX] =
+ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 |
+ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS |
+ CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_AVX512F |
+ CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX |
+ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA |
+ CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB |
+ CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI |
+ CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL,
+ .features[FEAT_7_0_ECX] =
+ CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU |
+ CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI |
+ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ |
+ CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG |
+ CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 |
+ CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_MOVDIRI |
+ CPUID_7_0_ECX_MOVDIR64B,
+ .features[FEAT_7_0_EDX] =
+ CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_AVX512_VP2INTERSECT,
+ .features[FEAT_7_1_EAX] =
+ CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16,
+ .features[FEAT_8000_0001_ECX] =
+ CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH |
+ CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM |
+ CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM |
+ CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE,
+ .features[FEAT_8000_0001_EDX] =
+ CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB |
+ CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX |
+ CPUID_EXT2_SYSCALL,
+ .features[FEAT_8000_0007_EBX] =
+ CPUID_8000_0007_EBX_OVERFLOW_RECOV | CPUID_8000_0007_EBX_SUCCOR,
+ .features[FEAT_8000_0008_EBX] =
+ CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR |
+ CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB |
+ CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP |
+ CPUID_8000_0008_EBX_STIBP_ALWAYS_ON |
+ CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD,
+ .features[FEAT_8000_0021_EAX] =
+ CPUID_8000_0021_EAX_NO_NESTED_DATA_BP |
+ CPUID_8000_0021_EAX_FS_GS_BASE_NS |
+ CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING |
+ CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE |
+ CPUID_8000_0021_EAX_AUTO_IBRS | CPUID_8000_0021_EAX_PREFETCHI |
+ CPUID_8000_0021_EAX_SBPB | CPUID_8000_0021_EAX_IBPB_BRTYPE |
+ CPUID_8000_0021_EAX_SRSO_USER_KERNEL_NO,
+ .features[FEAT_8000_0022_EAX] =
+ CPUID_8000_0022_EAX_PERFMON_V2,
+ .features[FEAT_XSAVE] =
+ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
+ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES,
+ .features[FEAT_SVM] =
+ CPUID_SVM_NPT | CPUID_SVM_LBRV | CPUID_SVM_NRIPSAVE |
+ CPUID_SVM_TSCSCALE | CPUID_SVM_VMCBCLEAN | CPUID_SVM_FLUSHASID |
+ CPUID_SVM_PAUSEFILTER | CPUID_SVM_PFTHRESHOLD |
+ CPUID_SVM_V_VMSAVE_VMLOAD | CPUID_SVM_VGIF |
+ CPUID_SVM_VNMI | CPUID_SVM_SVME_ADDR_CHK,
+ .xlevel = 0x80000022,
+ .model_id = "AMD EPYC-Turin Processor",
+ .cache_info = &epyc_turin_cache_info,
+ },
};
/*
@@ -5766,7 +6222,7 @@ static const TypeInfo max_x86_cpu_type_info = {
.class_init = max_x86_cpu_class_init,
};
-static char *feature_word_description(FeatureWordInfo *f, uint32_t bit)
+static char *feature_word_description(FeatureWordInfo *f)
{
assert(f->type == CPUID_FEATURE_WORD || f->type == MSR_FEATURE_WORD);
@@ -5775,11 +6231,15 @@ static char *feature_word_description(FeatureWordInfo *f, uint32_t bit)
{
const char *reg = get_register_name_32(f->cpuid.reg);
assert(reg);
- return g_strdup_printf("CPUID.%02XH:%s",
- f->cpuid.eax, reg);
+ if (!f->cpuid.needs_ecx) {
+ return g_strdup_printf("CPUID[eax=%02Xh].%s", f->cpuid.eax, reg);
+ } else {
+ return g_strdup_printf("CPUID[eax=%02Xh,ecx=%02Xh].%s",
+ f->cpuid.eax, f->cpuid.ecx, reg);
+ }
}
case MSR_FEATURE_WORD:
- return g_strdup_printf("MSR(%02XH)",
+ return g_strdup_printf("MSR(%02Xh)",
f->msr.index);
}
@@ -5799,12 +6259,13 @@ static bool x86_cpu_have_filtered_features(X86CPU *cpu)
return false;
}
-static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
- const char *verbose_prefix)
+void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
+ const char *verbose_prefix)
{
CPUX86State *env = &cpu->env;
FeatureWordInfo *f = &feature_word_info[w];
int i;
+ g_autofree char *feat_word_str = feature_word_description(f);
if (!cpu->force_features) {
env->features[w] &= ~mask;
@@ -5817,7 +6278,35 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
for (i = 0; i < 64; ++i) {
if ((1ULL << i) & mask) {
- g_autofree char *feat_word_str = feature_word_description(f, i);
+ warn_report("%s: %s%s%s [bit %d]",
+ verbose_prefix,
+ feat_word_str,
+ f->feat_names[i] ? "." : "",
+ f->feat_names[i] ? f->feat_names[i] : "", i);
+ }
+ }
+}
+
+void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
+ const char *verbose_prefix)
+{
+ CPUX86State *env = &cpu->env;
+ FeatureWordInfo *f = &feature_word_info[w];
+ int i;
+
+ if (!cpu->force_features) {
+ env->features[w] |= mask;
+ }
+
+ cpu->forced_on_features[w] |= mask;
+
+ if (!verbose_prefix) {
+ return;
+ }
+
+ for (i = 0; i < 64; ++i) {
+ if ((1ULL << i) & mask) {
+ g_autofree char *feat_word_str = feature_word_description(f);
warn_report("%s: %s%s%s [bit %d]",
verbose_prefix,
feat_word_str,
@@ -6973,9 +7462,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
*edx = env->features[FEAT_7_0_EDX]; /* Feature flags */
} else if (count == 1) {
*eax = env->features[FEAT_7_1_EAX];
+ *ecx = env->features[FEAT_7_1_ECX];
*edx = env->features[FEAT_7_1_EDX];
*ebx = 0;
- *ecx = 0;
} else if (count == 2) {
*edx = env->features[FEAT_7_2_EDX];
*eax = 0;
@@ -7044,7 +7533,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
break;
case 0x1F:
/* V2 Extended Topology Enumeration Leaf */
- if (!x86_has_extended_topo(env->avail_cpu_topo)) {
+ if (!x86_has_cpuid_0x1f(cpu)) {
*eax = *ebx = *ecx = *edx = 0;
break;
}
@@ -7880,6 +8369,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
x86_cpu_adjust_feat_level(cpu, FEAT_6_EAX);
x86_cpu_adjust_feat_level(cpu, FEAT_7_0_ECX);
x86_cpu_adjust_feat_level(cpu, FEAT_7_1_EAX);
+ x86_cpu_adjust_feat_level(cpu, FEAT_7_1_ECX);
x86_cpu_adjust_feat_level(cpu, FEAT_7_1_EDX);
x86_cpu_adjust_feat_level(cpu, FEAT_7_2_EDX);
x86_cpu_adjust_feat_level(cpu, FEAT_8000_0001_EDX);
@@ -7908,7 +8398,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp)
* cpu->vendor_cpuid_only has been unset for compatibility with older
* machine types.
*/
- if (x86_has_extended_topo(env->avail_cpu_topo) &&
+ if (x86_has_cpuid_0x1f(cpu) &&
(IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) {
x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F);
}
@@ -8543,6 +9033,13 @@ static void x86_cpu_post_initfn(Object *obj)
}
accel_cpu_instance_init(CPU(obj));
+
+#ifndef CONFIG_USER_ONLY
+ if (current_machine && current_machine->cgs) {
+ x86_confidential_guest_cpu_instance_init(
+ X86_CONFIDENTIAL_GUEST(current_machine->cgs), (CPU(obj)));
+ }
+#endif
}
static void x86_cpu_init_default_topo(X86CPU *cpu)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index c51e0a4..545851c 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -584,6 +584,7 @@ typedef enum X86Seg {
#define XSTATE_OPMASK_BIT 5
#define XSTATE_ZMM_Hi256_BIT 6
#define XSTATE_Hi16_ZMM_BIT 7
+#define XSTATE_PT_BIT 8
#define XSTATE_PKRU_BIT 9
#define XSTATE_ARCH_LBR_BIT 15
#define XSTATE_XTILE_CFG_BIT 17
@@ -597,6 +598,7 @@ typedef enum X86Seg {
#define XSTATE_OPMASK_MASK (1ULL << XSTATE_OPMASK_BIT)
#define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT)
#define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT)
+#define XSTATE_PT_MASK (1ULL << XSTATE_PT_BIT)
#define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT)
#define XSTATE_ARCH_LBR_MASK (1ULL << XSTATE_ARCH_LBR_BIT)
#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT)
@@ -619,6 +621,11 @@ typedef enum X86Seg {
XSTATE_Hi16_ZMM_MASK | XSTATE_PKRU_MASK | \
XSTATE_XTILE_CFG_MASK | XSTATE_XTILE_DATA_MASK)
+/* CPUID feature bits available in XSS */
+#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK)
+
+#define CPUID_XSTATE_MASK (CPUID_XSTATE_XCR0_MASK | CPUID_XSTATE_XSS_MASK)
+
/* CPUID feature words */
typedef enum FeatureWord {
FEAT_1_EDX, /* CPUID[1].EDX */
@@ -661,12 +668,22 @@ typedef enum FeatureWord {
FEAT_SGX_12_1_EAX, /* CPUID[EAX=0x12,ECX=1].EAX (SGX ATTRIBUTES[31:0]) */
FEAT_XSAVE_XSS_LO, /* CPUID[EAX=0xd,ECX=1].ECX */
FEAT_XSAVE_XSS_HI, /* CPUID[EAX=0xd,ECX=1].EDX */
+ FEAT_7_1_ECX, /* CPUID[EAX=7,ECX=1].ECX */
FEAT_7_1_EDX, /* CPUID[EAX=7,ECX=1].EDX */
FEAT_7_2_EDX, /* CPUID[EAX=7,ECX=2].EDX */
FEAT_24_0_EBX, /* CPUID[EAX=0x24,ECX=0].EBX */
FEATURE_WORDS,
} FeatureWord;
+typedef struct FeatureMask {
+ FeatureWord index;
+ uint64_t mask;
+} FeatureMask;
+
+typedef struct FeatureDep {
+ FeatureMask from, to;
+} FeatureDep;
+
typedef uint64_t FeatureWordArray[FEATURE_WORDS];
uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
@@ -899,6 +916,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
#define CPUID_7_0_ECX_LA57 (1U << 16)
/* Read Processor ID */
#define CPUID_7_0_ECX_RDPID (1U << 22)
+/* KeyLocker */
+#define CPUID_7_0_ECX_KeyLocker (1U << 23)
/* Bus Lock Debug Exception */
#define CPUID_7_0_ECX_BUS_LOCK_DETECT (1U << 24)
/* Cache Line Demote Instruction */
@@ -920,6 +939,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
#define CPUID_7_0_EDX_FSRM (1U << 4)
/* AVX512 Vector Pair Intersection to a Pair of Mask Registers */
#define CPUID_7_0_EDX_AVX512_VP2INTERSECT (1U << 8)
+ /* "md_clear" VERW clears CPU buffers */
+#define CPUID_7_0_EDX_MD_CLEAR (1U << 10)
/* SERIALIZE instruction */
#define CPUID_7_0_EDX_SERIALIZE (1U << 14)
/* TSX Suspend Load Address Tracking instruction */
@@ -957,6 +978,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
#define CPUID_7_1_EAX_AVX_VNNI (1U << 4)
/* AVX512 BFloat16 Instruction */
#define CPUID_7_1_EAX_AVX512_BF16 (1U << 5)
+/* Linear address space separation */
+#define CPUID_7_1_EAX_LASS (1U << 6)
/* CMPCCXADD Instructions */
#define CPUID_7_1_EAX_CMPCCXADD (1U << 7)
/* Fast Zero REP MOVS */
@@ -978,6 +1001,9 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
/* Linear Address Masking */
#define CPUID_7_1_EAX_LAM (1U << 26)
+/* The immediate form of MSR access instructions */
+#define CPUID_7_1_ECX_MSR_IMM (1U << 5)
+
/* Support for VPDPB[SU,UU,SS]D[,S] */
#define CPUID_7_1_EDX_AVX_VNNI_INT8 (1U << 4)
/* AVX NE CONVERT Instructions */
@@ -1001,6 +1027,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
#define CPUID_7_2_EDX_DDPD_U (1U << 3)
/* Indicate bit 10 of the IA32_SPEC_CTRL MSR is supported */
#define CPUID_7_2_EDX_BHI_CTRL (1U << 4)
+
/* Do not exhibit MXCSR Configuration Dependent Timing (MCDT) behavior */
#define CPUID_7_2_EDX_MCDT_NO (1U << 5)
@@ -1070,12 +1097,16 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
/* Processor ignores nested data breakpoints */
#define CPUID_8000_0021_EAX_NO_NESTED_DATA_BP (1U << 0)
+/* WRMSR to FS_BASE, GS_BASE, or KERNEL_GS_BASE is non-serializing */
+#define CPUID_8000_0021_EAX_FS_GS_BASE_NS (1U << 1)
/* LFENCE is always serializing */
#define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2)
/* Null Selector Clears Base */
#define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6)
/* Automatic IBRS */
#define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8)
+/* Indicates support for IC prefetch */
+#define CPUID_8000_0021_EAX_PREFETCHI (1U << 20)
/* Enhanced Return Address Predictor Scurity */
#define CPUID_8000_0021_EAX_ERAPS (1U << 24)
/* Selective Branch Predictor Barrier */
@@ -1100,6 +1131,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w);
#define CPUID_XSAVE_XSAVEC (1U << 1)
#define CPUID_XSAVE_XGETBV1 (1U << 2)
#define CPUID_XSAVE_XSAVES (1U << 3)
+#define CPUID_XSAVE_XFD (1U << 4)
#define CPUID_6_EAX_ARAT (1U << 2)
@@ -2192,6 +2224,9 @@ struct ArchCPU {
/* Features that were filtered out because of missing host capabilities */
FeatureWordArray filtered_features;
+ /* Features that are forced enabled by underlying hypervisor, e.g., TDX */
+ FeatureWordArray forced_on_features;
+
/* Enable PMU CPUID bits. This can't be enabled by default yet because
* it doesn't have ABI stability guarantees, as it passes all PMU CPUID
* bits returned by GET_SUPPORTED_CPUID (that depend on host CPU and kernel
@@ -2239,6 +2274,9 @@ struct ArchCPU {
/* Compatibility bits for old machine types: */
bool enable_cpuid_0xb;
+ /* Force to enable cpuid 0x1f */
+ bool enable_cpuid_0x1f;
+
/* Enable auto level-increase for all CPUID leaves */
bool full_cpuid_auto_level;
@@ -2499,6 +2537,17 @@ void cpu_set_apic_feature(CPUX86State *env);
void host_cpuid(uint32_t function, uint32_t count,
uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
bool cpu_has_x2apic_feature(CPUX86State *env);
+bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg);
+void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
+ const char *verbose_prefix);
+void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask,
+ const char *verbose_prefix);
+
+static inline bool x86_has_cpuid_0x1f(X86CPU *cpu)
+{
+ return cpu->enable_cpuid_0x1f ||
+ x86_has_extended_topo(cpu->env.avail_cpu_topo);
+}
/* helper.c */
void x86_cpu_set_a20(X86CPU *cpu, int a20_state);
diff --git a/target/i386/emulate/x86_flags.c b/target/i386/emulate/x86_flags.c
index 47bc197..cc138c7 100644
--- a/target/i386/emulate/x86_flags.c
+++ b/target/i386/emulate/x86_flags.c
@@ -255,19 +255,19 @@ void lflags_to_rflags(CPUX86State *env)
void rflags_to_lflags(CPUX86State *env)
{
- target_ulong cf_xor_of;
+ target_ulong cf_af, cf_xor_of;
+ /* Leave the low byte zero so that parity is always even... */
+ env->cc_dst = !(env->eflags & CC_Z) << 8;
+
+ /* ... and therefore cc_src always uses opposite polarity. */
env->cc_src = CC_P;
env->cc_src ^= env->eflags & (CC_S | CC_P);
/* rotate right by one to move CF and AF into the carry-out positions */
- env->cc_src |= (
- (env->eflags >> 1) |
- (env->eflags << (TARGET_LONG_BITS - 1))) & (CC_C | CC_A);
+ cf_af = env->eflags & (CC_C | CC_A);
+ env->cc_src |= ((cf_af >> 1) | (cf_af << (TARGET_LONG_BITS - 1)));
- cf_xor_of = (env->eflags & (CC_C | CC_O)) + (CC_O - CC_C);
+ cf_xor_of = ((env->eflags & (CC_C | CC_O)) + (CC_O - CC_C)) & CC_O;
env->cc_src |= -cf_xor_of & LF_MASK_PO;
-
- /* Leave the low byte zero so that parity is not affected. */
- env->cc_dst = !(env->eflags & CC_Z) << 8;
}
diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c
index a2d3830..7512567 100644
--- a/target/i386/host-cpu.c
+++ b/target/i386/host-cpu.c
@@ -15,7 +15,7 @@
#include "system/system.h"
/* Note: Only safe for use on x86(-64) hosts */
-static uint32_t host_cpu_phys_bits(void)
+uint32_t host_cpu_phys_bits(void)
{
uint32_t eax;
uint32_t host_phys_bits;
diff --git a/target/i386/host-cpu.h b/target/i386/host-cpu.h
index 6a9bc91..b97ec01 100644
--- a/target/i386/host-cpu.h
+++ b/target/i386/host-cpu.h
@@ -10,6 +10,7 @@
#ifndef HOST_CPU_H
#define HOST_CPU_H
+uint32_t host_cpu_phys_bits(void);
void host_cpu_instance_init(X86CPU *cpu);
void host_cpu_max_instance_init(X86CPU *cpu);
bool host_cpu_realizefn(CPUState *cs, Error **errp);
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index c9a3c02..a6bc089 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -38,6 +38,7 @@
#include "kvm_i386.h"
#include "../confidential-guest.h"
#include "sev.h"
+#include "tdx.h"
#include "xen-emu.h"
#include "hyperv.h"
#include "hyperv-proto.h"
@@ -192,6 +193,7 @@ static const char *vm_type_name[] = {
[KVM_X86_SEV_VM] = "SEV",
[KVM_X86_SEV_ES_VM] = "SEV-ES",
[KVM_X86_SNP_VM] = "SEV-SNP",
+ [KVM_X86_TDX_VM] = "TDX",
};
bool kvm_is_vm_type_supported(int type)
@@ -326,7 +328,7 @@ void kvm_synchronize_all_tsc(void)
{
CPUState *cpu;
- if (kvm_enabled()) {
+ if (kvm_enabled() && !is_tdx_vm()) {
CPU_FOREACH(cpu) {
run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL);
}
@@ -392,7 +394,7 @@ static bool host_tsx_broken(void)
/* Returns the value for a specific register on the cpuid entry
*/
-static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg)
+uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg)
{
uint32_t ret = 0;
switch (reg) {
@@ -414,9 +416,9 @@ static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg)
/* Find matching entry for function/index on kvm_cpuid2 struct
*/
-static struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid,
- uint32_t function,
- uint32_t index)
+struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid,
+ uint32_t function,
+ uint32_t index)
{
int i;
for (i = 0; i < cpuid->nent; ++i) {
@@ -572,7 +574,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
}
if (current_machine->cgs) {
- ret = x86_confidential_guest_mask_cpuid_features(
+ ret = x86_confidential_guest_adjust_cpuid_features(
X86_CONFIDENTIAL_GUEST(current_machine->cgs),
function, index, reg, ret);
}
@@ -868,6 +870,15 @@ static int kvm_arch_set_tsc_khz(CPUState *cs)
int r, cur_freq;
bool set_ioctl = false;
+ /*
+ * TSC of TD vcpu is immutable, it cannot be set/changed via vcpu scope
+ * VM_SET_TSC_KHZ, but only be initialized via VM scope VM_SET_TSC_KHZ
+ * before ioctl KVM_TDX_INIT_VM in tdx_pre_create_vcpu()
+ */
+ if (is_tdx_vm()) {
+ return 0;
+ }
+
if (!env->tsc_khz) {
return 0;
}
@@ -1779,8 +1790,6 @@ static int hyperv_init_vcpu(X86CPU *cpu)
static Error *invtsc_mig_blocker;
-#define KVM_MAX_CPUID_ENTRIES 100
-
static void kvm_init_xsave(CPUX86State *env)
{
if (has_xsave2) {
@@ -1823,9 +1832,8 @@ static void kvm_init_nested_state(CPUX86State *env)
}
}
-static uint32_t kvm_x86_build_cpuid(CPUX86State *env,
- struct kvm_cpuid_entry2 *entries,
- uint32_t cpuid_i)
+uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
+ uint32_t cpuid_i)
{
uint32_t limit, i, j;
uint32_t unused;
@@ -1864,7 +1872,7 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env,
break;
}
case 0x1f:
- if (!x86_has_extended_topo(env->avail_cpu_topo)) {
+ if (!x86_has_cpuid_0x1f(env_archcpu(env))) {
cpuid_i--;
break;
}
@@ -2052,6 +2060,15 @@ full:
abort();
}
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ if (is_tdx_vm()) {
+ return tdx_pre_create_vcpu(cpu, errp);
+ }
+
+ return 0;
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
struct {
@@ -2076,6 +2093,14 @@ int kvm_arch_init_vcpu(CPUState *cs)
int r;
Error *local_err = NULL;
+ if (current_machine->cgs) {
+ r = x86_confidential_guest_check_features(
+ X86_CONFIDENTIAL_GUEST(current_machine->cgs), cs);
+ if (r < 0) {
+ return r;
+ }
+ }
+
memset(&cpuid_data, 0, sizeof(cpuid_data));
cpuid_i = 0;
@@ -3206,16 +3231,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
Error *local_err = NULL;
/*
- * Initialize SEV context, if required
- *
- * If no memory encryption is requested (ms->cgs == NULL) this is
- * a no-op.
- *
- * It's also a no-op if a non-SEV confidential guest support
- * mechanism is selected. SEV is the only mechanism available to
- * select on x86 at present, so this doesn't arise, but if new
- * mechanisms are supported in future (e.g. TDX), they'll need
- * their own initialization either here or elsewhere.
+ * Initialize confidential guest (SEV/TDX) context, if required
*/
if (ms->cgs) {
ret = confidential_guest_kvm_init(ms->cgs, &local_err);
@@ -3856,32 +3872,34 @@ static void kvm_init_msrs(X86CPU *cpu)
CPUX86State *env = &cpu->env;
kvm_msr_buf_reset(cpu);
- if (has_msr_arch_capabs) {
- kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES,
- env->features[FEAT_ARCH_CAPABILITIES]);
- }
- if (has_msr_core_capabs) {
- kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY,
- env->features[FEAT_CORE_CAPABILITY]);
- }
+ if (!is_tdx_vm()) {
+ if (has_msr_arch_capabs) {
+ kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES,
+ env->features[FEAT_ARCH_CAPABILITIES]);
+ }
+
+ if (has_msr_core_capabs) {
+ kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY,
+ env->features[FEAT_CORE_CAPABILITY]);
+ }
+
+ if (has_msr_perf_capabs && cpu->enable_pmu) {
+ kvm_msr_entry_add_perf(cpu, env->features);
+ }
- if (has_msr_perf_capabs && cpu->enable_pmu) {
- kvm_msr_entry_add_perf(cpu, env->features);
+ /*
+ * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but
+ * all kernels with MSR features should have them.
+ */
+ if (kvm_feature_msrs && cpu_has_vmx(env)) {
+ kvm_msr_entry_add_vmx(cpu, env->features);
+ }
}
if (has_msr_ucode_rev) {
kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev);
}
-
- /*
- * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but
- * all kernels with MSR features should have them.
- */
- if (kvm_feature_msrs && cpu_has_vmx(env)) {
- kvm_msr_entry_add_vmx(cpu, env->features);
- }
-
assert(kvm_buf_set_msrs(cpu) == 0);
}
@@ -6121,6 +6139,16 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
case KVM_EXIT_HYPERCALL:
ret = kvm_handle_hypercall(run);
break;
+ case KVM_EXIT_SYSTEM_EVENT:
+ switch (run->system_event.type) {
+ case KVM_SYSTEM_EVENT_TDX_FATAL:
+ ret = tdx_handle_report_fatal_error(cpu, run);
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+ break;
default:
fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
ret = -1;
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index 88565e8..5f83e88 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -13,6 +13,8 @@
#include "system/kvm.h"
+#define KVM_MAX_CPUID_ENTRIES 100
+
/* always false if !CONFIG_KVM */
#define kvm_pit_in_kernel() \
(kvm_irqchip_in_kernel() && !kvm_irqchip_is_split())
@@ -42,6 +44,13 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask);
#ifdef CONFIG_KVM
+#include <linux/kvm.h>
+
+typedef struct KvmCpuidInfo {
+ struct kvm_cpuid2 cpuid;
+ struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
+} KvmCpuidInfo;
+
bool kvm_is_vm_type_supported(int type);
bool kvm_has_adjust_clock_stable(void);
bool kvm_has_exception_payload(void);
@@ -57,6 +66,12 @@ uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);
void kvm_update_msi_routes_all(void *private, bool global,
uint32_t index, uint32_t mask);
+struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid,
+ uint32_t function,
+ uint32_t index);
+uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg);
+uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
+ uint32_t cpuid_i);
#endif /* CONFIG_KVM */
void kvm_pc_setup_irq_routing(bool pci_enabled);
diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build
index 3996caf..3f44cde 100644
--- a/target/i386/kvm/meson.build
+++ b/target/i386/kvm/meson.build
@@ -8,6 +8,8 @@ i386_kvm_ss.add(files(
i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c'))
+i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c'), if_false: files('tdx-stub.c'))
+
i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c'))
i386_system_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss)
diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c
new file mode 100644
index 0000000..720a4ff
--- /dev/null
+++ b/target/i386/kvm/tdx-stub.c
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "qemu/osdep.h"
+
+#include "tdx.h"
+
+int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ return -EINVAL;
+}
+
+int tdx_parse_tdvf(void *flash_ptr, int size)
+{
+ return -EINVAL;
+}
+
+int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run)
+{
+ return -EINVAL;
+}
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
new file mode 100644
index 0000000..820ca36
--- /dev/null
+++ b/target/i386/kvm/tdx.c
@@ -0,0 +1,1289 @@
+/*
+ * QEMU TDX support
+ *
+ * Copyright (c) 2025 Intel Corporation
+ *
+ * Author:
+ * Xiaoyao Li <xiaoyao.li@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/base64.h"
+#include "qemu/mmap-alloc.h"
+#include "qapi/error.h"
+#include "qom/object_interfaces.h"
+#include "crypto/hash.h"
+#include "system/kvm_int.h"
+#include "system/runstate.h"
+#include "system/system.h"
+#include "system/ramblock.h"
+
+#include <linux/kvm_para.h>
+
+#include "cpu.h"
+#include "cpu-internal.h"
+#include "host-cpu.h"
+#include "hw/i386/e820_memory_layout.h"
+#include "hw/i386/tdvf.h"
+#include "hw/i386/x86.h"
+#include "hw/i386/tdvf-hob.h"
+#include "kvm_i386.h"
+#include "tdx.h"
+
+#include "standard-headers/asm-x86/kvm_para.h"
+
+#define TDX_MIN_TSC_FREQUENCY_KHZ (100 * 1000)
+#define TDX_MAX_TSC_FREQUENCY_KHZ (10 * 1000 * 1000)
+
+#define TDX_TD_ATTRIBUTES_DEBUG BIT_ULL(0)
+#define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28)
+#define TDX_TD_ATTRIBUTES_PKS BIT_ULL(30)
+#define TDX_TD_ATTRIBUTES_PERFMON BIT_ULL(63)
+
+#define TDX_SUPPORTED_TD_ATTRS (TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE |\
+ TDX_TD_ATTRIBUTES_PKS | \
+ TDX_TD_ATTRIBUTES_PERFMON)
+
+#define TDX_SUPPORTED_KVM_FEATURES ((1U << KVM_FEATURE_NOP_IO_DELAY) | \
+ (1U << KVM_FEATURE_PV_UNHALT) | \
+ (1U << KVM_FEATURE_PV_TLB_FLUSH) | \
+ (1U << KVM_FEATURE_PV_SEND_IPI) | \
+ (1U << KVM_FEATURE_POLL_CONTROL) | \
+ (1U << KVM_FEATURE_PV_SCHED_YIELD) | \
+ (1U << KVM_FEATURE_MSI_EXT_DEST_ID))
+
+static TdxGuest *tdx_guest;
+
+static struct kvm_tdx_capabilities *tdx_caps;
+static struct kvm_cpuid2 *tdx_supported_cpuid;
+
+/* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */
+bool is_tdx_vm(void)
+{
+ return !!tdx_guest;
+}
+
+enum tdx_ioctl_level {
+ TDX_VM_IOCTL,
+ TDX_VCPU_IOCTL,
+};
+
+static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state,
+ int cmd_id, __u32 flags, void *data,
+ Error **errp)
+{
+ struct kvm_tdx_cmd tdx_cmd = {};
+ int r;
+
+ const char *tdx_ioctl_name[] = {
+ [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES",
+ [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM",
+ [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU",
+ [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION",
+ [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM",
+ [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID",
+ };
+
+ tdx_cmd.id = cmd_id;
+ tdx_cmd.flags = flags;
+ tdx_cmd.data = (__u64)(unsigned long)data;
+
+ switch (level) {
+ case TDX_VM_IOCTL:
+ r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
+ break;
+ case TDX_VCPU_IOCTL:
+ r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd);
+ break;
+ default:
+ error_setg(errp, "Invalid tdx_ioctl_level %d", level);
+ return -EINVAL;
+ }
+
+ if (r < 0) {
+ error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx",
+ tdx_ioctl_name[cmd_id], tdx_cmd.hw_error);
+ }
+ return r;
+}
+
+static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data,
+ Error **errp)
+{
+ return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp);
+}
+
+static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags,
+ void *data, Error **errp)
+{
+ return tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp);
+}
+
+static int get_tdx_capabilities(Error **errp)
+{
+ struct kvm_tdx_capabilities *caps;
+ /* 1st generation of TDX reports 6 cpuid configs */
+ int nr_cpuid_configs = 6;
+ size_t size;
+ int r;
+
+ do {
+ Error *local_err = NULL;
+ size = sizeof(struct kvm_tdx_capabilities) +
+ nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2);
+ caps = g_malloc0(size);
+ caps->cpuid.nent = nr_cpuid_configs;
+
+ r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err);
+ if (r == -E2BIG) {
+ g_free(caps);
+ nr_cpuid_configs *= 2;
+ if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) {
+ error_report("KVM TDX seems broken that number of CPUID entries"
+ " in kvm_tdx_capabilities exceeds limit: %d",
+ KVM_MAX_CPUID_ENTRIES);
+ error_propagate(errp, local_err);
+ return r;
+ }
+ error_free(local_err);
+ } else if (r < 0) {
+ g_free(caps);
+ error_propagate(errp, local_err);
+ return r;
+ }
+ } while (r == -E2BIG);
+
+ tdx_caps = caps;
+
+ return 0;
+}
+
+void tdx_set_tdvf_region(MemoryRegion *tdvf_mr)
+{
+ assert(!tdx_guest->tdvf_mr);
+ tdx_guest->tdvf_mr = tdvf_mr;
+}
+
+static TdxFirmwareEntry *tdx_get_hob_entry(TdxGuest *tdx)
+{
+ TdxFirmwareEntry *entry;
+
+ for_each_tdx_fw_entry(&tdx->tdvf, entry) {
+ if (entry->type == TDVF_SECTION_TYPE_TD_HOB) {
+ return entry;
+ }
+ }
+ error_report("TDVF metadata doesn't specify TD_HOB location.");
+ exit(1);
+}
+
+static void tdx_add_ram_entry(uint64_t address, uint64_t length,
+ enum TdxRamType type)
+{
+ uint32_t nr_entries = tdx_guest->nr_ram_entries;
+ tdx_guest->ram_entries = g_renew(TdxRamEntry, tdx_guest->ram_entries,
+ nr_entries + 1);
+
+ tdx_guest->ram_entries[nr_entries].address = address;
+ tdx_guest->ram_entries[nr_entries].length = length;
+ tdx_guest->ram_entries[nr_entries].type = type;
+ tdx_guest->nr_ram_entries++;
+}
+
+static int tdx_accept_ram_range(uint64_t address, uint64_t length)
+{
+ uint64_t head_start, tail_start, head_length, tail_length;
+ uint64_t tmp_address, tmp_length;
+ TdxRamEntry *e;
+ int i = 0;
+
+ do {
+ if (i == tdx_guest->nr_ram_entries) {
+ return -1;
+ }
+
+ e = &tdx_guest->ram_entries[i++];
+ } while (address + length <= e->address || address >= e->address + e->length);
+
+ /*
+ * The to-be-accepted ram range must be fully contained by one
+ * RAM entry.
+ */
+ if (e->address > address ||
+ e->address + e->length < address + length) {
+ return -1;
+ }
+
+ if (e->type == TDX_RAM_ADDED) {
+ return 0;
+ }
+
+ tmp_address = e->address;
+ tmp_length = e->length;
+
+ e->address = address;
+ e->length = length;
+ e->type = TDX_RAM_ADDED;
+
+ head_length = address - tmp_address;
+ if (head_length > 0) {
+ head_start = tmp_address;
+ tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED);
+ }
+
+ tail_start = address + length;
+ if (tail_start < tmp_address + tmp_length) {
+ tail_length = tmp_address + tmp_length - tail_start;
+ tdx_add_ram_entry(tail_start, tail_length, TDX_RAM_UNACCEPTED);
+ }
+
+ return 0;
+}
+
+static int tdx_ram_entry_compare(const void *lhs_, const void* rhs_)
+{
+ const TdxRamEntry *lhs = lhs_;
+ const TdxRamEntry *rhs = rhs_;
+
+ if (lhs->address == rhs->address) {
+ return 0;
+ }
+ if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) {
+ return 1;
+ }
+ return -1;
+}
+
+static void tdx_init_ram_entries(void)
+{
+ unsigned i, j, nr_e820_entries;
+
+ nr_e820_entries = e820_get_table(NULL);
+ tdx_guest->ram_entries = g_new(TdxRamEntry, nr_e820_entries);
+
+ for (i = 0, j = 0; i < nr_e820_entries; i++) {
+ uint64_t addr, len;
+
+ if (e820_get_entry(i, E820_RAM, &addr, &len)) {
+ tdx_guest->ram_entries[j].address = addr;
+ tdx_guest->ram_entries[j].length = len;
+ tdx_guest->ram_entries[j].type = TDX_RAM_UNACCEPTED;
+ j++;
+ }
+ }
+ tdx_guest->nr_ram_entries = j;
+}
+
+static void tdx_post_init_vcpus(void)
+{
+ TdxFirmwareEntry *hob;
+ CPUState *cpu;
+
+ hob = tdx_get_hob_entry(tdx_guest);
+ CPU_FOREACH(cpu) {
+ tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)(uintptr_t)hob->address,
+ &error_fatal);
+ }
+}
+
+static void tdx_finalize_vm(Notifier *notifier, void *unused)
+{
+ TdxFirmware *tdvf = &tdx_guest->tdvf;
+ TdxFirmwareEntry *entry;
+ RAMBlock *ram_block;
+ Error *local_err = NULL;
+ int r;
+
+ tdx_init_ram_entries();
+
+ for_each_tdx_fw_entry(tdvf, entry) {
+ switch (entry->type) {
+ case TDVF_SECTION_TYPE_BFV:
+ case TDVF_SECTION_TYPE_CFV:
+ entry->mem_ptr = tdvf->mem_ptr + entry->data_offset;
+ break;
+ case TDVF_SECTION_TYPE_TD_HOB:
+ case TDVF_SECTION_TYPE_TEMP_MEM:
+ entry->mem_ptr = qemu_ram_mmap(-1, entry->size,
+ qemu_real_host_page_size(), 0, 0);
+ if (entry->mem_ptr == MAP_FAILED) {
+ error_report("Failed to mmap memory for TDVF section %d",
+ entry->type);
+ exit(1);
+ }
+ if (tdx_accept_ram_range(entry->address, entry->size)) {
+ error_report("Failed to accept memory for TDVF section %d",
+ entry->type);
+ qemu_ram_munmap(-1, entry->mem_ptr, entry->size);
+ exit(1);
+ }
+ break;
+ default:
+ error_report("Unsupported TDVF section %d", entry->type);
+ exit(1);
+ }
+ }
+
+ qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries,
+ sizeof(TdxRamEntry), &tdx_ram_entry_compare);
+
+ tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest));
+
+ tdx_post_init_vcpus();
+
+ for_each_tdx_fw_entry(tdvf, entry) {
+ struct kvm_tdx_init_mem_region region;
+ uint32_t flags;
+
+ region = (struct kvm_tdx_init_mem_region) {
+ .source_addr = (uintptr_t)entry->mem_ptr,
+ .gpa = entry->address,
+ .nr_pages = entry->size >> 12,
+ };
+
+ flags = entry->attributes & TDVF_SECTION_ATTRIBUTES_MR_EXTEND ?
+ KVM_TDX_MEASURE_MEMORY_REGION : 0;
+
+ do {
+ error_free(local_err);
+ local_err = NULL;
+ r = tdx_vcpu_ioctl(first_cpu, KVM_TDX_INIT_MEM_REGION, flags,
+ &region, &local_err);
+ } while (r == -EAGAIN || r == -EINTR);
+ if (r < 0) {
+ error_report_err(local_err);
+ exit(1);
+ }
+
+ if (entry->type == TDVF_SECTION_TYPE_TD_HOB ||
+ entry->type == TDVF_SECTION_TYPE_TEMP_MEM) {
+ qemu_ram_munmap(-1, entry->mem_ptr, entry->size);
+ entry->mem_ptr = NULL;
+ }
+ }
+
+ /*
+ * TDVF image has been copied into private region above via
+ * KVM_MEMORY_MAPPING. It becomes useless.
+ */
+ ram_block = tdx_guest->tdvf_mr->ram_block;
+ ram_block_discard_range(ram_block, 0, ram_block->max_length);
+
+ tdx_vm_ioctl(KVM_TDX_FINALIZE_VM, 0, NULL, &error_fatal);
+ CONFIDENTIAL_GUEST_SUPPORT(tdx_guest)->ready = true;
+}
+
+static Notifier tdx_machine_done_notify = {
+ .notify = tdx_finalize_vm,
+};
+
+/*
+ * Some CPUID bits change from fixed1 to configurable bits when TDX module
+ * supports TDX_FEATURES0.VE_REDUCTION. e.g., MCA/MCE/MTRR/CORE_CAPABILITY.
+ *
+ * To make QEMU work with all the versions of TDX module, keep the fixed1 bits
+ * here if they are ever fixed1 bits in any of the version though not fixed1 in
+ * the latest version. Otherwise, with the older version of TDX module, QEMU may
+ * treat the fixed1 bit as unsupported.
+ *
+ * For newer TDX module, it does no harm to keep them in tdx_fixed1_bits even
+ * though they changed to configurable bits. Because tdx_fixed1_bits is used to
+ * setup the supported bits.
+ */
+KvmCpuidInfo tdx_fixed1_bits = {
+ .cpuid.nent = 8,
+ .entries[0] = {
+ .function = 0x1,
+ .index = 0,
+ .ecx = CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_DTES64 |
+ CPUID_EXT_DSCPL | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 |
+ CPUID_EXT_PDCM | CPUID_EXT_PCID | CPUID_EXT_SSE41 |
+ CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE |
+ CPUID_EXT_POPCNT | CPUID_EXT_AES | CPUID_EXT_XSAVE |
+ CPUID_EXT_RDRAND | CPUID_EXT_HYPERVISOR,
+ .edx = CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC |
+ CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC |
+ CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV |
+ CPUID_PAT | CPUID_CLFLUSH | CPUID_DTS | CPUID_MMX | CPUID_FXSR |
+ CPUID_SSE | CPUID_SSE2,
+ },
+ .entries[1] = {
+ .function = 0x6,
+ .index = 0,
+ .eax = CPUID_6_EAX_ARAT,
+ },
+ .entries[2] = {
+ .function = 0x7,
+ .index = 0,
+ .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
+ .ebx = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_FDP_EXCPTN_ONLY |
+ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_INVPCID |
+ CPUID_7_0_EBX_ZERO_FCS_FDS | CPUID_7_0_EBX_RDSEED |
+ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT |
+ CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_SHA_NI,
+ .ecx = CPUID_7_0_ECX_BUS_LOCK_DETECT | CPUID_7_0_ECX_MOVDIRI |
+ CPUID_7_0_ECX_MOVDIR64B,
+ .edx = CPUID_7_0_EDX_MD_CLEAR | CPUID_7_0_EDX_SPEC_CTRL |
+ CPUID_7_0_EDX_STIBP | CPUID_7_0_EDX_FLUSH_L1D |
+ CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_CORE_CAPABILITY |
+ CPUID_7_0_EDX_SPEC_CTRL_SSBD,
+ },
+ .entries[3] = {
+ .function = 0x7,
+ .index = 2,
+ .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
+ .edx = CPUID_7_2_EDX_PSFD | CPUID_7_2_EDX_IPRED_CTRL |
+ CPUID_7_2_EDX_RRSBA_CTRL | CPUID_7_2_EDX_BHI_CTRL,
+ },
+ .entries[4] = {
+ .function = 0xD,
+ .index = 0,
+ .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
+ .eax = XSTATE_FP_MASK | XSTATE_SSE_MASK,
+ },
+ .entries[5] = {
+ .function = 0xD,
+ .index = 1,
+ .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX,
+ .eax = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC|
+ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES,
+ },
+ .entries[6] = {
+ .function = 0x80000001,
+ .index = 0,
+ .ecx = CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH,
+ /*
+ * Strictly speaking, SYSCALL is not fixed1 bit since it depends on
+ * the CPU to be in 64-bit mode. But here fixed1 is used to serve the
+ * purpose of supported bits for TDX. In this sense, SYACALL is always
+ * supported.
+ */
+ .edx = CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB |
+ CPUID_EXT2_RDTSCP | CPUID_EXT2_LM,
+ },
+ .entries[7] = {
+ .function = 0x80000007,
+ .index = 0,
+ .edx = CPUID_APM_INVTSC,
+ },
+};
+
+typedef struct TdxAttrsMap {
+ uint32_t attr_index;
+ uint32_t cpuid_leaf;
+ uint32_t cpuid_subleaf;
+ int cpuid_reg;
+ uint32_t feat_mask;
+} TdxAttrsMap;
+
+static TdxAttrsMap tdx_attrs_maps[] = {
+ {.attr_index = 27,
+ .cpuid_leaf = 7,
+ .cpuid_subleaf = 1,
+ .cpuid_reg = R_EAX,
+ .feat_mask = CPUID_7_1_EAX_LASS,},
+
+ {.attr_index = 30,
+ .cpuid_leaf = 7,
+ .cpuid_subleaf = 0,
+ .cpuid_reg = R_ECX,
+ .feat_mask = CPUID_7_0_ECX_PKS,},
+
+ {.attr_index = 31,
+ .cpuid_leaf = 7,
+ .cpuid_subleaf = 0,
+ .cpuid_reg = R_ECX,
+ .feat_mask = CPUID_7_0_ECX_KeyLocker,},
+};
+
+typedef struct TdxXFAMDep {
+ int xfam_bit;
+ FeatureMask feat_mask;
+} TdxXFAMDep;
+
+/*
+ * Note, only the CPUID bits whose virtualization type are "XFAM & Native" are
+ * defiend here.
+ *
+ * For those whose virtualization type are "XFAM & Configured & Native", they
+ * are reported as configurable bits. And they are not supported if not in the
+ * configureable bits list from KVM even if the corresponding XFAM bit is
+ * supported.
+ */
+TdxXFAMDep tdx_xfam_deps[] = {
+ { XSTATE_YMM_BIT, { FEAT_1_ECX, CPUID_EXT_FMA }},
+ { XSTATE_YMM_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_AVX2 }},
+ { XSTATE_OPMASK_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_AVX512_VBMI}},
+ { XSTATE_OPMASK_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AVX512_FP16}},
+ { XSTATE_PT_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT}},
+ { XSTATE_PKRU_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_PKU}},
+ { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_BF16 }},
+ { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_TILE }},
+ { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_INT8 }},
+};
+
+static struct kvm_cpuid_entry2 *find_in_supported_entry(uint32_t function,
+ uint32_t index)
+{
+ struct kvm_cpuid_entry2 *e;
+
+ e = cpuid_find_entry(tdx_supported_cpuid, function, index);
+ if (!e) {
+ if (tdx_supported_cpuid->nent >= KVM_MAX_CPUID_ENTRIES) {
+ error_report("tdx_supported_cpuid requries more space than %d entries",
+ KVM_MAX_CPUID_ENTRIES);
+ exit(1);
+ }
+ e = &tdx_supported_cpuid->entries[tdx_supported_cpuid->nent++];
+ e->function = function;
+ e->index = index;
+ }
+
+ return e;
+}
+
+static void tdx_add_supported_cpuid_by_fixed1_bits(void)
+{
+ struct kvm_cpuid_entry2 *e, *e1;
+ int i;
+
+ for (i = 0; i < tdx_fixed1_bits.cpuid.nent; i++) {
+ e = &tdx_fixed1_bits.entries[i];
+
+ e1 = find_in_supported_entry(e->function, e->index);
+ e1->eax |= e->eax;
+ e1->ebx |= e->ebx;
+ e1->ecx |= e->ecx;
+ e1->edx |= e->edx;
+ }
+}
+
+static void tdx_add_supported_cpuid_by_attrs(void)
+{
+ struct kvm_cpuid_entry2 *e;
+ TdxAttrsMap *map;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tdx_attrs_maps); i++) {
+ map = &tdx_attrs_maps[i];
+ if (!((1ULL << map->attr_index) & tdx_caps->supported_attrs)) {
+ continue;
+ }
+
+ e = find_in_supported_entry(map->cpuid_leaf, map->cpuid_subleaf);
+
+ switch(map->cpuid_reg) {
+ case R_EAX:
+ e->eax |= map->feat_mask;
+ break;
+ case R_EBX:
+ e->ebx |= map->feat_mask;
+ break;
+ case R_ECX:
+ e->ecx |= map->feat_mask;
+ break;
+ case R_EDX:
+ e->edx |= map->feat_mask;
+ break;
+ }
+ }
+}
+
+static void tdx_add_supported_cpuid_by_xfam(void)
+{
+ struct kvm_cpuid_entry2 *e;
+ int i;
+
+ const TdxXFAMDep *xfam_dep;
+ const FeatureWordInfo *f;
+ for (i = 0; i < ARRAY_SIZE(tdx_xfam_deps); i++) {
+ xfam_dep = &tdx_xfam_deps[i];
+ if (!((1ULL << xfam_dep->xfam_bit) & tdx_caps->supported_xfam)) {
+ continue;
+ }
+
+ f = &feature_word_info[xfam_dep->feat_mask.index];
+ if (f->type != CPUID_FEATURE_WORD) {
+ continue;
+ }
+
+ e = find_in_supported_entry(f->cpuid.eax, f->cpuid.ecx);
+ switch(f->cpuid.reg) {
+ case R_EAX:
+ e->eax |= xfam_dep->feat_mask.mask;
+ break;
+ case R_EBX:
+ e->ebx |= xfam_dep->feat_mask.mask;
+ break;
+ case R_ECX:
+ e->ecx |= xfam_dep->feat_mask.mask;
+ break;
+ case R_EDX:
+ e->edx |= xfam_dep->feat_mask.mask;
+ break;
+ }
+ }
+
+ e = find_in_supported_entry(0xd, 0);
+ e->eax |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK);
+ e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK) >> 32;
+
+ e = find_in_supported_entry(0xd, 1);
+ /*
+ * Mark XFD always support for TDX, it will be cleared finally in
+ * tdx_adjust_cpuid_features() if XFD is unavailable on the hardware
+ * because in this case the original data has it as 0.
+ */
+ e->eax |= CPUID_XSAVE_XFD;
+ e->ecx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK);
+ e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK) >> 32;
+}
+
+static void tdx_add_supported_kvm_features(void)
+{
+ struct kvm_cpuid_entry2 *e;
+
+ e = find_in_supported_entry(0x40000001, 0);
+ e->eax = TDX_SUPPORTED_KVM_FEATURES;
+}
+
+static void tdx_setup_supported_cpuid(void)
+{
+ if (tdx_supported_cpuid) {
+ return;
+ }
+
+ tdx_supported_cpuid = g_malloc0(sizeof(*tdx_supported_cpuid) +
+ KVM_MAX_CPUID_ENTRIES * sizeof(struct kvm_cpuid_entry2));
+
+ memcpy(tdx_supported_cpuid->entries, tdx_caps->cpuid.entries,
+ tdx_caps->cpuid.nent * sizeof(struct kvm_cpuid_entry2));
+ tdx_supported_cpuid->nent = tdx_caps->cpuid.nent;
+
+ tdx_add_supported_cpuid_by_fixed1_bits();
+ tdx_add_supported_cpuid_by_attrs();
+ tdx_add_supported_cpuid_by_xfam();
+
+ tdx_add_supported_kvm_features();
+}
+
+static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
+{
+ MachineState *ms = MACHINE(qdev_get_machine());
+ X86MachineState *x86ms = X86_MACHINE(ms);
+ TdxGuest *tdx = TDX_GUEST(cgs);
+ int r = 0;
+
+ kvm_mark_guest_state_protected();
+
+ if (x86ms->smm == ON_OFF_AUTO_AUTO) {
+ x86ms->smm = ON_OFF_AUTO_OFF;
+ } else if (x86ms->smm == ON_OFF_AUTO_ON) {
+ error_setg(errp, "TDX VM doesn't support SMM");
+ return -EINVAL;
+ }
+
+ if (x86ms->pic == ON_OFF_AUTO_AUTO) {
+ x86ms->pic = ON_OFF_AUTO_OFF;
+ } else if (x86ms->pic == ON_OFF_AUTO_ON) {
+ error_setg(errp, "TDX VM doesn't support PIC");
+ return -EINVAL;
+ }
+
+ if (kvm_state->kernel_irqchip_split == ON_OFF_AUTO_AUTO) {
+ kvm_state->kernel_irqchip_split = ON_OFF_AUTO_ON;
+ } else if (kvm_state->kernel_irqchip_split != ON_OFF_AUTO_ON) {
+ error_setg(errp, "TDX VM requires kernel_irqchip to be split");
+ return -EINVAL;
+ }
+
+ if (!tdx_caps) {
+ r = get_tdx_capabilities(errp);
+ if (r) {
+ return r;
+ }
+ }
+
+ tdx_setup_supported_cpuid();
+
+ /* TDX relies on KVM_HC_MAP_GPA_RANGE to handle TDG.VP.VMCALL<MapGPA> */
+ if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) {
+ return -EOPNOTSUPP;
+ }
+
+ /*
+ * Set kvm_readonly_mem_allowed to false, because TDX only supports readonly
+ * memory for shared memory but not for private memory. Besides, whether a
+ * memslot is private or shared is not determined by QEMU.
+ *
+ * Thus, just mark readonly memory not supported for simplicity.
+ */
+ kvm_readonly_mem_allowed = false;
+
+ qemu_add_machine_init_done_notifier(&tdx_machine_done_notify);
+
+ tdx_guest = tdx;
+ return 0;
+}
+
+static int tdx_kvm_type(X86ConfidentialGuest *cg)
+{
+ /* Do the object check */
+ TDX_GUEST(cg);
+
+ return KVM_X86_TDX_VM;
+}
+
+static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu)
+{
+ X86CPU *x86cpu = X86_CPU(cpu);
+
+ object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort);
+
+ /* invtsc is fixed1 for TD guest */
+ object_property_set_bool(OBJECT(cpu), "invtsc", true, &error_abort);
+
+ x86cpu->enable_cpuid_0x1f = true;
+}
+
+static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg,
+ uint32_t feature, uint32_t index,
+ int reg, uint32_t value)
+{
+ struct kvm_cpuid_entry2 *e;
+
+ e = cpuid_find_entry(&tdx_fixed1_bits.cpuid, feature, index);
+ if (e) {
+ value |= cpuid_entry_get_reg(e, reg);
+ }
+
+ if (is_feature_word_cpuid(feature, index, reg)) {
+ e = cpuid_find_entry(tdx_supported_cpuid, feature, index);
+ if (e) {
+ value &= cpuid_entry_get_reg(e, reg);
+ }
+ }
+
+ return value;
+}
+
+static struct kvm_cpuid2 *tdx_fetch_cpuid(CPUState *cpu, int *ret)
+{
+ struct kvm_cpuid2 *fetch_cpuid;
+ int size = KVM_MAX_CPUID_ENTRIES;
+ Error *local_err = NULL;
+ int r;
+
+ do {
+ error_free(local_err);
+ local_err = NULL;
+
+ fetch_cpuid = g_malloc0(sizeof(*fetch_cpuid) +
+ sizeof(struct kvm_cpuid_entry2) * size);
+ fetch_cpuid->nent = size;
+ r = tdx_vcpu_ioctl(cpu, KVM_TDX_GET_CPUID, 0, fetch_cpuid, &local_err);
+ if (r == -E2BIG) {
+ g_free(fetch_cpuid);
+ size = fetch_cpuid->nent;
+ }
+ } while (r == -E2BIG);
+
+ if (r < 0) {
+ error_report_err(local_err);
+ *ret = r;
+ return NULL;
+ }
+
+ return fetch_cpuid;
+}
+
+static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs)
+{
+ uint64_t actual, requested, unavailable, forced_on;
+ g_autofree struct kvm_cpuid2 *fetch_cpuid;
+ const char *forced_on_prefix = NULL;
+ const char *unav_prefix = NULL;
+ struct kvm_cpuid_entry2 *entry;
+ X86CPU *cpu = X86_CPU(cs);
+ CPUX86State *env = &cpu->env;
+ FeatureWordInfo *wi;
+ FeatureWord w;
+ bool mismatch = false;
+ int r;
+
+ fetch_cpuid = tdx_fetch_cpuid(cs, &r);
+ if (!fetch_cpuid) {
+ return r;
+ }
+
+ if (cpu->check_cpuid || cpu->enforce_cpuid) {
+ unav_prefix = "TDX doesn't support requested feature";
+ forced_on_prefix = "TDX forcibly sets the feature";
+ }
+
+ for (w = 0; w < FEATURE_WORDS; w++) {
+ wi = &feature_word_info[w];
+ actual = 0;
+
+ switch (wi->type) {
+ case CPUID_FEATURE_WORD:
+ entry = cpuid_find_entry(fetch_cpuid, wi->cpuid.eax, wi->cpuid.ecx);
+ if (!entry) {
+ /*
+ * If KVM doesn't report it means it's totally configurable
+ * by QEMU
+ */
+ continue;
+ }
+
+ actual = cpuid_entry_get_reg(entry, wi->cpuid.reg);
+ break;
+ case MSR_FEATURE_WORD:
+ /*
+ * TODO:
+ * validate MSR features when KVM has interface report them.
+ */
+ continue;
+ }
+
+ /* Fixup for special cases */
+ switch (w) {
+ case FEAT_8000_0001_EDX:
+ /*
+ * Intel enumerates SYSCALL bit as 1 only when processor in 64-bit
+ * mode and before vcpu running it's not in 64-bit mode.
+ */
+ actual |= CPUID_EXT2_SYSCALL;
+ break;
+ default:
+ break;
+ }
+
+ requested = env->features[w];
+ unavailable = requested & ~actual;
+ mark_unavailable_features(cpu, w, unavailable, unav_prefix);
+ if (unavailable) {
+ mismatch = true;
+ }
+
+ forced_on = actual & ~requested;
+ mark_forced_on_features(cpu, w, forced_on, forced_on_prefix);
+ if (forced_on) {
+ mismatch = true;
+ }
+ }
+
+ if (cpu->enforce_cpuid && mismatch) {
+ return -EINVAL;
+ }
+
+ if (cpu->phys_bits != host_cpu_phys_bits()) {
+ error_report("TDX requires guest CPU physical bits (%u) "
+ "to match host CPU physical bits (%u)",
+ cpu->phys_bits, host_cpu_phys_bits());
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int tdx_validate_attributes(TdxGuest *tdx, Error **errp)
+{
+ if ((tdx->attributes & ~tdx_caps->supported_attrs)) {
+ error_setg(errp, "Invalid attributes 0x%"PRIx64" for TDX VM "
+ "(KVM supported: 0x%"PRIx64")", tdx->attributes,
+ (uint64_t)tdx_caps->supported_attrs);
+ return -1;
+ }
+
+ if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) {
+ error_setg(errp, "Some QEMU unsupported TD attribute bits being "
+ "requested: 0x%"PRIx64" (QEMU supported: 0x%"PRIx64")",
+ tdx->attributes, (uint64_t)TDX_SUPPORTED_TD_ATTRS);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int setup_td_guest_attributes(X86CPU *x86cpu, Error **errp)
+{
+ CPUX86State *env = &x86cpu->env;
+
+ tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ?
+ TDX_TD_ATTRIBUTES_PKS : 0;
+ tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0;
+
+ return tdx_validate_attributes(tdx_guest, errp);
+}
+
+static int setup_td_xfam(X86CPU *x86cpu, Error **errp)
+{
+ CPUX86State *env = &x86cpu->env;
+ uint64_t xfam;
+
+ xfam = env->features[FEAT_XSAVE_XCR0_LO] |
+ env->features[FEAT_XSAVE_XCR0_HI] |
+ env->features[FEAT_XSAVE_XSS_LO] |
+ env->features[FEAT_XSAVE_XSS_HI];
+
+ if (xfam & ~tdx_caps->supported_xfam) {
+ error_setg(errp, "Invalid XFAM 0x%"PRIx64" for TDX VM (supported: 0x%"PRIx64"))",
+ xfam, (uint64_t)tdx_caps->supported_xfam);
+ return -1;
+ }
+
+ tdx_guest->xfam = xfam;
+ return 0;
+}
+
+static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids)
+{
+ int i, dest_cnt = 0;
+ struct kvm_cpuid_entry2 *src, *dest, *conf;
+
+ for (i = 0; i < cpuids->nent; i++) {
+ src = cpuids->entries + i;
+ conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index);
+ if (!conf) {
+ continue;
+ }
+ dest = cpuids->entries + dest_cnt;
+
+ dest->function = src->function;
+ dest->index = src->index;
+ dest->flags = src->flags;
+ dest->eax = src->eax & conf->eax;
+ dest->ebx = src->ebx & conf->ebx;
+ dest->ecx = src->ecx & conf->ecx;
+ dest->edx = src->edx & conf->edx;
+
+ dest_cnt++;
+ }
+ cpuids->nent = dest_cnt++;
+}
+
+int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ X86CPU *x86cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86cpu->env;
+ g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
+ Error *local_err = NULL;
+ size_t data_len;
+ int retry = 10000;
+ int r = 0;
+
+ QEMU_LOCK_GUARD(&tdx_guest->lock);
+ if (tdx_guest->initialized) {
+ return r;
+ }
+
+ init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
+ sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
+
+ if (!kvm_check_extension(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS)) {
+ error_setg(errp, "KVM doesn't support KVM_CAP_X86_APIC_BUS_CYCLES_NS");
+ return -EOPNOTSUPP;
+ }
+
+ r = kvm_vm_enable_cap(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
+ 0, TDX_APIC_BUS_CYCLES_NS);
+ if (r < 0) {
+ error_setg_errno(errp, -r,
+ "Unable to set core crystal clock frequency to 25MHz");
+ return r;
+ }
+
+ if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ ||
+ env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) {
+ error_setg(errp, "Invalid TSC %"PRId64" KHz, must specify cpu_frequency "
+ "between [%d, %d] kHz", env->tsc_khz,
+ TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ);
+ return -EINVAL;
+ }
+
+ if (env->tsc_khz % (25 * 1000)) {
+ error_setg(errp, "Invalid TSC %"PRId64" KHz, it must be multiple of 25MHz",
+ env->tsc_khz);
+ return -EINVAL;
+ }
+
+ /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */
+ r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz);
+ if (r < 0) {
+ error_setg_errno(errp, -r, "Unable to set TSC frequency to %"PRId64" kHz",
+ env->tsc_khz);
+ return r;
+ }
+
+ if (tdx_guest->mrconfigid) {
+ g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid,
+ strlen(tdx_guest->mrconfigid), &data_len, errp);
+ if (!data) {
+ return -1;
+ }
+ if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
+ error_setg(errp, "TDX: failed to decode mrconfigid");
+ return -1;
+ }
+ memcpy(init_vm->mrconfigid, data, data_len);
+ }
+
+ if (tdx_guest->mrowner) {
+ g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrowner,
+ strlen(tdx_guest->mrowner), &data_len, errp);
+ if (!data) {
+ return -1;
+ }
+ if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
+ error_setg(errp, "TDX: failed to decode mrowner");
+ return -1;
+ }
+ memcpy(init_vm->mrowner, data, data_len);
+ }
+
+ if (tdx_guest->mrownerconfig) {
+ g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrownerconfig,
+ strlen(tdx_guest->mrownerconfig), &data_len, errp);
+ if (!data) {
+ return -1;
+ }
+ if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) {
+ error_setg(errp, "TDX: failed to decode mrownerconfig");
+ return -1;
+ }
+ memcpy(init_vm->mrownerconfig, data, data_len);
+ }
+
+ r = setup_td_guest_attributes(x86cpu, errp);
+ if (r) {
+ return r;
+ }
+
+ r = setup_td_xfam(x86cpu, errp);
+ if (r) {
+ return r;
+ }
+
+ init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
+ tdx_filter_cpuid(&init_vm->cpuid);
+
+ init_vm->attributes = tdx_guest->attributes;
+ init_vm->xfam = tdx_guest->xfam;
+
+ /*
+ * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE)
+ * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or
+ * RDSEED) is busy.
+ *
+ * Retry for the case.
+ */
+ do {
+ error_free(local_err);
+ local_err = NULL;
+ r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err);
+ } while (r == -EAGAIN && --retry);
+
+ if (r < 0) {
+ if (!retry) {
+ error_append_hint(&local_err, "Hardware RNG (Random Number "
+ "Generator) is busy occupied by someone (via RDRAND/RDSEED) "
+ "maliciously, which leads to KVM_TDX_INIT_VM keeping failure "
+ "due to lack of entropy.\n");
+ }
+ error_propagate(errp, local_err);
+ return r;
+ }
+
+ tdx_guest->initialized = true;
+
+ return 0;
+}
+
+int tdx_parse_tdvf(void *flash_ptr, int size)
+{
+ return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size);
+}
+
+static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code,
+ char *message, uint64_t gpa)
+{
+ GuestPanicInformation *panic_info;
+
+ panic_info = g_new0(GuestPanicInformation, 1);
+ panic_info->type = GUEST_PANIC_INFORMATION_TYPE_TDX;
+ panic_info->u.tdx.error_code = (uint32_t) error_code;
+ panic_info->u.tdx.message = message;
+ panic_info->u.tdx.gpa = gpa;
+
+ qemu_system_guest_panicked(panic_info);
+}
+
+/*
+ * Only 8 registers can contain valid ASCII byte stream to form the fatal
+ * message, and their sequence is: R14, R15, RBX, RDI, RSI, R8, R9, RDX
+ */
+#define TDX_FATAL_MESSAGE_MAX 64
+
+#define TDX_REPORT_FATAL_ERROR_GPA_VALID BIT_ULL(63)
+
+int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run)
+{
+ uint64_t error_code = run->system_event.data[R_R12];
+ uint64_t reg_mask = run->system_event.data[R_ECX];
+ char *message = NULL;
+ uint64_t *tmp;
+ uint64_t gpa = -1ull;
+
+ if (error_code & 0xffff) {
+ error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%"PRIx64,
+ error_code);
+ return -1;
+ }
+
+ if (reg_mask) {
+ message = g_malloc0(TDX_FATAL_MESSAGE_MAX + 1);
+ tmp = (uint64_t *)message;
+
+#define COPY_REG(REG) \
+ do { \
+ if (reg_mask & BIT_ULL(REG)) { \
+ *(tmp++) = run->system_event.data[REG]; \
+ } \
+ } while (0)
+
+ COPY_REG(R_R14);
+ COPY_REG(R_R15);
+ COPY_REG(R_EBX);
+ COPY_REG(R_EDI);
+ COPY_REG(R_ESI);
+ COPY_REG(R_R8);
+ COPY_REG(R_R9);
+ COPY_REG(R_EDX);
+ *((char *)tmp) = '\0';
+ }
+#undef COPY_REG
+
+ if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) {
+ gpa = run->system_event.data[R_R13];
+ }
+
+ tdx_panicked_on_fatal_error(cpu, error_code, message, gpa);
+
+ return -1;
+}
+
+static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE);
+}
+
+static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ if (value) {
+ tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
+ } else {
+ tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
+ }
+}
+
+static char *tdx_guest_get_mrconfigid(Object *obj, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ return g_strdup(tdx->mrconfigid);
+}
+
+static void tdx_guest_set_mrconfigid(Object *obj, const char *value, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ g_free(tdx->mrconfigid);
+ tdx->mrconfigid = g_strdup(value);
+}
+
+static char *tdx_guest_get_mrowner(Object *obj, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ return g_strdup(tdx->mrowner);
+}
+
+static void tdx_guest_set_mrowner(Object *obj, const char *value, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ g_free(tdx->mrowner);
+ tdx->mrowner = g_strdup(value);
+}
+
+static char *tdx_guest_get_mrownerconfig(Object *obj, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ return g_strdup(tdx->mrownerconfig);
+}
+
+static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **errp)
+{
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ g_free(tdx->mrownerconfig);
+ tdx->mrownerconfig = g_strdup(value);
+}
+
+/* tdx guest */
+OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest,
+ tdx_guest,
+ TDX_GUEST,
+ X86_CONFIDENTIAL_GUEST,
+ { TYPE_USER_CREATABLE },
+ { NULL })
+
+static void tdx_guest_init(Object *obj)
+{
+ ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj);
+ TdxGuest *tdx = TDX_GUEST(obj);
+
+ qemu_mutex_init(&tdx->lock);
+
+ cgs->require_guest_memfd = true;
+ tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
+
+ object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes,
+ OBJ_PROP_FLAG_READWRITE);
+ object_property_add_bool(obj, "sept-ve-disable",
+ tdx_guest_get_sept_ve_disable,
+ tdx_guest_set_sept_ve_disable);
+ object_property_add_str(obj, "mrconfigid",
+ tdx_guest_get_mrconfigid,
+ tdx_guest_set_mrconfigid);
+ object_property_add_str(obj, "mrowner",
+ tdx_guest_get_mrowner, tdx_guest_set_mrowner);
+ object_property_add_str(obj, "mrownerconfig",
+ tdx_guest_get_mrownerconfig,
+ tdx_guest_set_mrownerconfig);
+}
+
+static void tdx_guest_finalize(Object *obj)
+{
+}
+
+static void tdx_guest_class_init(ObjectClass *oc, const void *data)
+{
+ ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc);
+ X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc);
+
+ klass->kvm_init = tdx_kvm_init;
+ x86_klass->kvm_type = tdx_kvm_type;
+ x86_klass->cpu_instance_init = tdx_cpu_instance_init;
+ x86_klass->adjust_cpuid_features = tdx_adjust_cpuid_features;
+ x86_klass->check_features = tdx_check_features;
+}
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
new file mode 100644
index 0000000..04b5afe
--- /dev/null
+++ b/target/i386/kvm/tdx.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef QEMU_I386_TDX_H
+#define QEMU_I386_TDX_H
+
+#ifndef CONFIG_USER_ONLY
+#include CONFIG_DEVICES /* CONFIG_TDX */
+#endif
+
+#include "confidential-guest.h"
+#include "cpu.h"
+#include "hw/i386/tdvf.h"
+
+#define TYPE_TDX_GUEST "tdx-guest"
+#define TDX_GUEST(obj) OBJECT_CHECK(TdxGuest, (obj), TYPE_TDX_GUEST)
+
+typedef struct TdxGuestClass {
+ X86ConfidentialGuestClass parent_class;
+} TdxGuestClass;
+
+/* TDX requires bus frequency 25MHz */
+#define TDX_APIC_BUS_CYCLES_NS 40
+
+enum TdxRamType {
+ TDX_RAM_UNACCEPTED,
+ TDX_RAM_ADDED,
+};
+
+typedef struct TdxRamEntry {
+ uint64_t address;
+ uint64_t length;
+ enum TdxRamType type;
+} TdxRamEntry;
+
+typedef struct TdxGuest {
+ X86ConfidentialGuest parent_obj;
+
+ QemuMutex lock;
+
+ bool initialized;
+ uint64_t attributes; /* TD attributes */
+ uint64_t xfam;
+ char *mrconfigid; /* base64 encoded sha348 digest */
+ char *mrowner; /* base64 encoded sha348 digest */
+ char *mrownerconfig; /* base64 encoded sha348 digest */
+
+ MemoryRegion *tdvf_mr;
+ TdxFirmware tdvf;
+
+ uint32_t nr_ram_entries;
+ TdxRamEntry *ram_entries;
+} TdxGuest;
+
+#ifdef CONFIG_TDX
+bool is_tdx_vm(void);
+#else
+#define is_tdx_vm() 0
+#endif /* CONFIG_TDX */
+
+int tdx_pre_create_vcpu(CPUState *cpu, Error **errp);
+void tdx_set_tdvf_region(MemoryRegion *tdvf_mr);
+int tdx_parse_tdvf(void *flash_ptr, int size);
+int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run);
+
+#endif /* QEMU_I386_TDX_H */
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 6cb561c..dd2dac1 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -1060,9 +1060,8 @@ static bool tsc_khz_needed(void *opaque)
{
X86CPU *cpu = opaque;
CPUX86State *env = &cpu->env;
- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
- X86MachineClass *x86mc = X86_MACHINE_CLASS(mc);
- return env->tsc_khz && x86mc->save_tsc_khz;
+
+ return env->tsc_khz;
}
static const VMStateDescription vmstate_tsc_khz = {
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 56dd64e..1a12f06 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -212,14 +212,6 @@ static const char *const sev_fw_errlist[] = {
#define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist)
-/* <linux/kvm.h> doesn't expose this, so re-use the max from kvm.c */
-#define KVM_MAX_CPUID_ENTRIES 100
-
-typedef struct KvmCpuidInfo {
- struct kvm_cpuid2 cpuid;
- struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
-} KvmCpuidInfo;
-
#define SNP_CPUID_FUNCTION_MAXCOUNT 64
#define SNP_CPUID_FUNCTION_UNKNOWN 0xFFFFFFFF
@@ -947,7 +939,7 @@ out:
}
static uint32_t
-sev_snp_mask_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index,
+sev_snp_adjust_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index,
int reg, uint32_t value)
{
switch (feature) {
@@ -2405,7 +2397,7 @@ sev_snp_guest_class_init(ObjectClass *oc, const void *data)
klass->launch_finish = sev_snp_launch_finish;
klass->launch_update_data = sev_snp_launch_update_data;
klass->kvm_init = sev_snp_kvm_init;
- x86_klass->mask_cpuid_features = sev_snp_mask_cpuid_features;
+ x86_klass->adjust_cpuid_features = sev_snp_adjust_cpuid_features;
x86_klass->kvm_type = sev_snp_kvm_type;
object_class_property_add(oc, "policy", "uint64",
diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h
index 6b3f198..be011b0 100644
--- a/target/i386/tcg/helper-tcg.h
+++ b/target/i386/tcg/helper-tcg.h
@@ -97,7 +97,7 @@ static inline unsigned int compute_pf(uint8_t x)
/* misc_helper.c */
void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask);
-/* sysemu/svm_helper.c */
+/* system/svm_helper.c */
#ifndef CONFIG_USER_ONLY
G_NORETURN void cpu_vmexit(CPUX86State *nenv, uint32_t exit_code,
uint64_t exit_info_1, uintptr_t retaddr);
@@ -115,7 +115,7 @@ int exception_has_error_code(int intno);
/* smm_helper.c */
void do_smm_enter(X86CPU *cpu);
-/* sysemu/bpt_helper.c */
+/* system/bpt_helper.c */
bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update);
/*
diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c
index 1bda570..c66bdd5 100644
--- a/target/loongarch/kvm/kvm.c
+++ b/target/loongarch/kvm/kvm.c
@@ -1071,7 +1071,11 @@ static int kvm_cpu_check_pv_features(CPUState *cs, Error **errp)
env->pv_features |= BIT(KVM_FEATURE_VIRT_EXTIOI);
}
}
+ return 0;
+}
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
return 0;
}
diff --git a/target/mips/kvm.c b/target/mips/kvm.c
index d67b7c1..ec53acb 100644
--- a/target/mips/kvm.c
+++ b/target/mips/kvm.c
@@ -61,6 +61,11 @@ int kvm_arch_irqchip_create(KVMState *s)
return 0;
}
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ return 0;
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
CPUMIPSState *env = cpu_env(cs);
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 8a957c3..0156580 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -479,6 +479,11 @@ static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
}
}
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ return 0;
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
PowerPCCPU *cpu = POWERPC_CPU(cs);
diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c
index efb41fa..e1a04be 100644
--- a/target/riscv/kvm/kvm-cpu.c
+++ b/target/riscv/kvm/kvm-cpu.c
@@ -1472,6 +1472,11 @@ static int kvm_vcpu_enable_sbi_dbcn(RISCVCPU *cpu, CPUState *cs)
return kvm_set_one_reg(cs, kvm_sbi_dbcn.kvm_reg_id, &reg);
}
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ return 0;
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
int ret = 0;
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 6cd2ebc..67d9a19 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -398,6 +398,11 @@ unsigned long kvm_arch_vcpu_id(CPUState *cpu)
return cpu->cpu_index;
}
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ return 0;
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
unsigned int max_cpus = MACHINE(qdev_get_machine())->smp.max_cpus;
diff --git a/tcg/meson.build b/tcg/meson.build
index bd2821e..706a6eb 100644
--- a/tcg/meson.build
+++ b/tcg/meson.build
@@ -27,5 +27,5 @@ if host_os == 'linux'
tcg_ss.add(files('perf.c'))
endif
-libuser_ss.add_all(tcg_ss)
-libsystem_ss.add_all(tcg_ss)
+user_ss.add_all(tcg_ss)
+system_ss.add_all(tcg_ss)
diff --git a/tests/docker/dockerfiles/fedora-rust-nightly.docker b/tests/docker/dockerfiles/fedora-rust-nightly.docker
index fe4a6ed..4a03330 100644
--- a/tests/docker/dockerfiles/fedora-rust-nightly.docker
+++ b/tests/docker/dockerfiles/fedora-rust-nightly.docker
@@ -156,6 +156,7 @@ ENV PYTHON "/usr/bin/python3"
RUN dnf install -y wget
ENV RUSTUP_HOME=/usr/local/rustup CARGO_HOME=/usr/local/cargo
ENV RUSTC=/usr/local/rustup/toolchains/nightly-x86_64-unknown-linux-gnu/bin/rustc
+ENV RUSTDOC=/usr/local/rustup/toolchains/nightly-x86_64-unknown-linux-gnu/bin/rustdoc
ENV CARGO=/usr/local/rustup/toolchains/nightly-x86_64-unknown-linux-gnu/bin/cargo
RUN set -eux && \
rustArch='x86_64-unknown-linux-gnu' && \
@@ -170,6 +171,7 @@ RUN set -eux && \
/usr/local/cargo/bin/rustup run nightly cargo --version && \
/usr/local/cargo/bin/rustup run nightly rustc --version && \
test "$CARGO" = "$(/usr/local/cargo/bin/rustup +nightly which cargo)" && \
+ test "$RUSTDOC" = "$(/usr/local/cargo/bin/rustup +nightly which rustdoc)" && \
test "$RUSTC" = "$(/usr/local/cargo/bin/rustup +nightly which rustc)"
ENV PATH=$CARGO_HOME/bin:$PATH
RUN /usr/local/cargo/bin/rustup run nightly cargo install bindgen-cli
diff --git a/tests/docker/dockerfiles/ubuntu2204.docker b/tests/docker/dockerfiles/ubuntu2204.docker
index 4a1cf2b..28a6f93 100644
--- a/tests/docker/dockerfiles/ubuntu2204.docker
+++ b/tests/docker/dockerfiles/ubuntu2204.docker
@@ -151,6 +151,7 @@ ENV MAKE "/usr/bin/make"
ENV NINJA "/usr/bin/ninja"
ENV PYTHON "/usr/bin/python3"
ENV RUSTC=/usr/bin/rustc-1.77
+ENV RUSTDOC=/usr/bin/rustdoc-1.77
ENV CARGO_HOME=/usr/local/cargo
ENV PATH=$CARGO_HOME/bin:$PATH
RUN DEBIAN_FRONTEND=noninteractive eatmydata \
diff --git a/tests/functional/meson.build b/tests/functional/meson.build
index 52b4706..557d59d 100644
--- a/tests/functional/meson.build
+++ b/tests/functional/meson.build
@@ -137,6 +137,7 @@ tests_arm_system_thorough = [
'arm_raspi2',
'arm_replay',
'arm_smdkc210',
+ 'arm_stellaris',
'arm_sx1',
'arm_vexpress',
'arm_virt',
diff --git a/tests/functional/test_arm_stellaris.py b/tests/functional/test_arm_stellaris.py
new file mode 100755
index 0000000..cbd21cb
--- /dev/null
+++ b/tests/functional/test_arm_stellaris.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+#
+# Functional test that checks the serial console of the stellaris machines
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+from qemu_test import QemuSystemTest, Asset, exec_command_and_wait_for_pattern
+from qemu_test import wait_for_console_pattern
+
+
+class StellarisMachine(QemuSystemTest):
+
+ ASSET_DAY22 = Asset(
+ 'https://www.qemu-advent-calendar.org/2023/download/day22.tar.gz',
+ 'ae3a63ef4b7a22c21bfc7fc0d85e402fe95e223308ed23ac854405016431ff51')
+
+ def test_lm3s6965evb(self):
+ self.set_machine('lm3s6965evb')
+ kernel_path = self.archive_extract(self.ASSET_DAY22,
+ member='day22/day22.bin')
+ self.vm.set_console()
+ self.vm.add_args('-kernel', kernel_path)
+ self.vm.launch()
+
+ wait_for_console_pattern(self, 'In a one horse open')
+
+ ASSET_NOTMAIN = Asset(
+ 'https://github.com/Ahelion/QemuArmM4FDemoSw/raw/master/build/notmain.bin',
+ '6ceda031aa081a420fca2fca9e137fa681d6e3820d820ad1917736cb265e611a')
+
+ def test_lm3s811evb(self):
+ self.set_machine('lm3s811evb')
+ kernel_path = self.ASSET_NOTMAIN.fetch()
+
+ self.vm.set_console()
+ self.vm.add_args('-cpu', 'cortex-m4')
+ self.vm.add_args('-kernel', kernel_path)
+ self.vm.launch()
+
+ # The test kernel emits an initial '!' and then waits for input.
+ # For each character that we send it responds with a certain
+ # other ASCII character.
+ wait_for_console_pattern(self, '!')
+ exec_command_and_wait_for_pattern(self, '789', 'cdf')
+
+
+if __name__ == '__main__':
+ QemuSystemTest.main()
diff --git a/tests/functional/test_mem_addr_space.py b/tests/functional/test_mem_addr_space.py
index 2d9d31e..61b4a19 100755
--- a/tests/functional/test_mem_addr_space.py
+++ b/tests/functional/test_mem_addr_space.py
@@ -58,8 +58,8 @@ class MemAddrCheck(QemuSystemTest):
should start fine.
"""
self.ensure_64bit_binary()
- self.vm.add_args('-S', '-machine', 'q35', '-m',
- '512,slots=1,maxmem=59.6G',
+ self.set_machine('q35')
+ self.vm.add_args('-S', '-m', '512,slots=1,maxmem=59.6G',
'-cpu', 'pentium,pse36=on', '-display', 'none',
'-object', 'memory-backend-ram,id=mem1,size=1G',
'-device', 'pc-dimm,id=vm0,memdev=mem1')
@@ -76,8 +76,8 @@ class MemAddrCheck(QemuSystemTest):
with pse36 above.
"""
self.ensure_64bit_binary()
- self.vm.add_args('-S', '-machine', 'q35', '-m',
- '512,slots=1,maxmem=59.6G',
+ self.set_machine('q35')
+ self.vm.add_args('-S', '-m', '512,slots=1,maxmem=59.6G',
'-cpu', 'pentium,pae=on', '-display', 'none',
'-object', 'memory-backend-ram,id=mem1,size=1G',
'-device', 'pc-dimm,id=vm0,memdev=mem1')
@@ -93,8 +93,8 @@ class MemAddrCheck(QemuSystemTest):
same options as the failing case above with pse36 cpu feature.
"""
self.ensure_64bit_binary()
- self.vm.add_args('-machine', 'q35', '-m',
- '512,slots=1,maxmem=59.5G',
+ self.set_machine('q35')
+ self.vm.add_args('-m', '512,slots=1,maxmem=59.5G',
'-cpu', 'pentium,pse36=on', '-display', 'none',
'-object', 'memory-backend-ram,id=mem1,size=1G',
'-device', 'pc-dimm,id=vm0,memdev=mem1')
@@ -111,8 +111,8 @@ class MemAddrCheck(QemuSystemTest):
with the same options as the case above.
"""
self.ensure_64bit_binary()
- self.vm.add_args('-machine', 'q35', '-m',
- '512,slots=1,maxmem=59.5G',
+ self.set_machine('q35')
+ self.vm.add_args('-m', '512,slots=1,maxmem=59.5G',
'-cpu', 'pentium,pae=on', '-display', 'none',
'-object', 'memory-backend-ram,id=mem1,size=1G',
'-device', 'pc-dimm,id=vm0,memdev=mem1')
@@ -128,8 +128,8 @@ class MemAddrCheck(QemuSystemTest):
with pse36 ON.
"""
self.ensure_64bit_binary()
- self.vm.add_args('-machine', 'q35', '-m',
- '512,slots=1,maxmem=59.5G',
+ self.set_machine('q35')
+ self.vm.add_args('-m', '512,slots=1,maxmem=59.5G',
'-cpu', 'pentium2', '-display', 'none',
'-object', 'memory-backend-ram,id=mem1,size=1G',
'-device', 'pc-dimm,id=vm0,memdev=mem1')
@@ -148,8 +148,8 @@ class MemAddrCheck(QemuSystemTest):
above 4 GiB due to the PCI hole and simplicity.
"""
self.ensure_64bit_binary()
- self.vm.add_args('-S', '-machine', 'q35', '-m',
- '512,slots=1,maxmem=4G',
+ self.set_machine('q35')
+ self.vm.add_args('-S', '-m', '512,slots=1,maxmem=4G',
'-cpu', 'pentium', '-display', 'none',
'-object', 'memory-backend-ram,id=mem1,size=1G',
'-device', 'pc-dimm,id=vm0,memdev=mem1')
@@ -176,8 +176,8 @@ class MemAddrCheck(QemuSystemTest):
make QEMU fail with the error message.
"""
self.ensure_64bit_binary()
- self.vm.add_args('-S', '-machine', 'pc-q35-7.0', '-m',
- '512,slots=1,maxmem=988G',
+ self.set_machine('pc-q35-7.0')
+ self.vm.add_args('-S', '-m', '512,slots=1,maxmem=988G',
'-display', 'none',
'-object', 'memory-backend-ram,id=mem1,size=1G',
'-device', 'pc-dimm,id=vm0,memdev=mem1')
@@ -197,8 +197,8 @@ class MemAddrCheck(QemuSystemTest):
than 988 GiB).
"""
self.ensure_64bit_binary()
- self.vm.add_args('-S', '-machine', 'pc-q35-7.1', '-m',
- '512,slots=1,maxmem=976G',
+ self.set_machine('pc-q35-7.1')
+ self.vm.add_args('-S', '-m', '512,slots=1,maxmem=976G',
'-display', 'none',
'-object', 'memory-backend-ram,id=mem1,size=1G',
'-device', 'pc-dimm,id=vm0,memdev=mem1')
@@ -214,8 +214,8 @@ class MemAddrCheck(QemuSystemTest):
successfully start when maxmem is < 988G.
"""
self.ensure_64bit_binary()
- self.vm.add_args('-S', '-machine', 'pc-q35-7.0', '-m',
- '512,slots=1,maxmem=987.5G',
+ self.set_machine('pc-q35-7.0')
+ self.vm.add_args('-S', '-m', '512,slots=1,maxmem=987.5G',
'-display', 'none',
'-object', 'memory-backend-ram,id=mem1,size=1G',
'-device', 'pc-dimm,id=vm0,memdev=mem1')
@@ -231,8 +231,8 @@ class MemAddrCheck(QemuSystemTest):
successfully start when maxmem is < 976G.
"""
self.ensure_64bit_binary()
- self.vm.add_args('-S', '-machine', 'pc-q35-7.1', '-m',
- '512,slots=1,maxmem=975.5G',
+ self.set_machine('pc-q35-7.1')
+ self.vm.add_args('-S', '-m', '512,slots=1,maxmem=975.5G',
'-display', 'none',
'-object', 'memory-backend-ram,id=mem1,size=1G',
'-device', 'pc-dimm,id=vm0,memdev=mem1')
@@ -249,9 +249,9 @@ class MemAddrCheck(QemuSystemTest):
"above_4G" memory starts at 4G.
"""
self.ensure_64bit_binary()
+ self.set_machine('pc-q35-7.1')
self.vm.add_args('-S', '-cpu', 'Skylake-Server',
- '-machine', 'pc-q35-7.1', '-m',
- '512,slots=1,maxmem=976G',
+ '-m', '512,slots=1,maxmem=976G',
'-display', 'none',
'-object', 'memory-backend-ram,id=mem1,size=1G',
'-device', 'pc-dimm,id=vm0,memdev=mem1')
@@ -274,9 +274,9 @@ class MemAddrCheck(QemuSystemTest):
fail to start.
"""
self.ensure_64bit_binary()
+ self.set_machine('pc-q35-7.1')
self.vm.add_args('-S', '-cpu', 'EPYC-v4,phys-bits=41',
- '-machine', 'pc-q35-7.1', '-m',
- '512,slots=1,maxmem=992G',
+ '-m', '512,slots=1,maxmem=992G',
'-display', 'none',
'-object', 'memory-backend-ram,id=mem1,size=1G',
'-device', 'pc-dimm,id=vm0,memdev=mem1')
@@ -293,9 +293,9 @@ class MemAddrCheck(QemuSystemTest):
QEMU should start fine.
"""
self.ensure_64bit_binary()
+ self.set_machine('pc-q35-7.1')
self.vm.add_args('-S', '-cpu', 'EPYC-v4,phys-bits=41',
- '-machine', 'pc-q35-7.1', '-m',
- '512,slots=1,maxmem=990G',
+ '-m', '512,slots=1,maxmem=990G',
'-display', 'none',
'-object', 'memory-backend-ram,id=mem1,size=1G',
'-device', 'pc-dimm,id=vm0,memdev=mem1')
@@ -314,12 +314,12 @@ class MemAddrCheck(QemuSystemTest):
alignment constraints with 40 bits (1 TiB) of processor physical bits.
"""
self.ensure_64bit_binary()
+ self.set_machine('q35')
self.vm.add_args('-S', '-cpu', 'Skylake-Server,phys-bits=40',
- '-machine', 'q35,cxl=on', '-m',
- '512,slots=1,maxmem=987G',
+ '-m', '512,slots=1,maxmem=987G',
'-display', 'none',
'-device', 'pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1',
- '-M', 'cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=1G')
+ '-M', 'cxl=on,cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=1G')
self.vm.set_qmp_monitor(enabled=False)
self.vm.launch()
self.vm.wait()
@@ -333,9 +333,10 @@ class MemAddrCheck(QemuSystemTest):
with cxl enabled.
"""
self.ensure_64bit_binary()
+ self.set_machine('q35')
self.vm.add_args('-S', '-cpu', 'Skylake-Server,phys-bits=40',
- '-machine', 'q35,cxl=on', '-m',
- '512,slots=1,maxmem=987G',
+ '-machine', 'cxl=on',
+ '-m', '512,slots=1,maxmem=987G',
'-display', 'none',
'-device', 'pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1')
self.vm.set_qmp_monitor(enabled=False)
diff --git a/tests/functional/test_microblaze_s3adsp1800.py b/tests/functional/test_microblaze_s3adsp1800.py
index c93fa14..f093b16 100755
--- a/tests/functional/test_microblaze_s3adsp1800.py
+++ b/tests/functional/test_microblaze_s3adsp1800.py
@@ -25,12 +25,14 @@ class MicroblazeMachine(QemuSystemTest):
('http://www.qemu-advent-calendar.org/2023/download/day13.tar.gz'),
'b9b3d43c5dd79db88ada495cc6e0d1f591153fe41355e925d791fbf44de50c22')
- def do_ballerina_be_test(self, machine):
- self.set_machine(machine)
+ def do_ballerina_be_test(self, force_endianness=False):
+ self.set_machine('petalogix-s3adsp1800')
self.archive_extract(self.ASSET_IMAGE_BE)
self.vm.set_console()
self.vm.add_args('-kernel',
self.scratch_file('day17', 'ballerina.bin'))
+ if force_endianness:
+ self.vm.add_args('-M', 'endianness=big')
self.vm.launch()
wait_for_console_pattern(self, 'This architecture does not have '
'kernel memory protection')
@@ -39,12 +41,14 @@ class MicroblazeMachine(QemuSystemTest):
# message, that's why we don't test for a later string here. This
# needs some investigation by a microblaze wizard one day...
- def do_xmaton_le_test(self, machine):
+ def do_xmaton_le_test(self, force_endianness=False):
self.require_netdev('user')
- self.set_machine(machine)
+ self.set_machine('petalogix-s3adsp1800')
self.archive_extract(self.ASSET_IMAGE_LE)
self.vm.set_console()
self.vm.add_args('-kernel', self.scratch_file('day13', 'xmaton.bin'))
+ if force_endianness:
+ self.vm.add_args('-M', 'endianness=little')
tftproot = self.scratch_file('day13')
self.vm.add_args('-nic', f'user,tftp={tftproot}')
self.vm.launch()
@@ -59,9 +63,13 @@ class MicroblazeMachine(QemuSystemTest):
class MicroblazeBigEndianMachine(MicroblazeMachine):
ASSET_IMAGE_BE = MicroblazeMachine.ASSET_IMAGE_BE
+ ASSET_IMAGE_LE = MicroblazeMachine.ASSET_IMAGE_LE
def test_microblaze_s3adsp1800_legacy_be(self):
- self.do_ballerina_be_test('petalogix-s3adsp1800')
+ self.do_ballerina_be_test()
+
+ def test_microblaze_s3adsp1800_legacy_le(self):
+ self.do_xmaton_le_test(force_endianness=True)
if __name__ == '__main__':
diff --git a/tests/functional/test_microblazeel_s3adsp1800.py b/tests/functional/test_microblazeel_s3adsp1800.py
index ab59941..915902d 100755
--- a/tests/functional/test_microblazeel_s3adsp1800.py
+++ b/tests/functional/test_microblazeel_s3adsp1800.py
@@ -13,9 +13,13 @@ from test_microblaze_s3adsp1800 import MicroblazeMachine
class MicroblazeLittleEndianMachine(MicroblazeMachine):
ASSET_IMAGE_LE = MicroblazeMachine.ASSET_IMAGE_LE
+ ASSET_IMAGE_BE = MicroblazeMachine.ASSET_IMAGE_BE
def test_microblaze_s3adsp1800_legacy_le(self):
- self.do_xmaton_le_test('petalogix-s3adsp1800')
+ self.do_xmaton_le_test()
+
+ def test_microblaze_s3adsp1800_legacy_be(self):
+ self.do_ballerina_be_test(force_endianness=True)
if __name__ == '__main__':
diff --git a/tests/functional/test_mips_malta.py b/tests/functional/test_mips_malta.py
index 89b9556..30279f0 100755
--- a/tests/functional/test_mips_malta.py
+++ b/tests/functional/test_mips_malta.py
@@ -80,10 +80,8 @@ def mips_check_wheezy(test, kernel_path, image_path, kernel_command_line,
exec_command_and_wait_for_pattern(test, 'cat /proc/devices', 'usb')
exec_command_and_wait_for_pattern(test, 'cat /proc/ioports',
' : piix4_smbus')
- # lspci for the host bridge does not work on big endian targets:
- # https://gitlab.com/qemu-project/qemu/-/issues/2826
- # exec_command_and_wait_for_pattern(test, 'lspci -d 11ab:4620',
- # 'GT-64120')
+ exec_command_and_wait_for_pattern(test, 'lspci -d 11ab:4620',
+ 'GT-64120')
exec_command_and_wait_for_pattern(test,
'cat /sys/bus/i2c/devices/i2c-0/name',
'SMBus PIIX4 adapter')
diff --git a/tests/functional/test_sparc64_tuxrun.py b/tests/functional/test_sparc64_tuxrun.py
index 3be08d6..0d7b43d 100755
--- a/tests/functional/test_sparc64_tuxrun.py
+++ b/tests/functional/test_sparc64_tuxrun.py
@@ -24,6 +24,7 @@ class TuxRunSparc64Test(TuxRunBaselineTest):
'479c3dc104c82b68be55e2c0c5c38cd473d0b37ad4badccde4775bb88ce34611')
def test_sparc64(self):
+ self.set_machine('sun4u')
self.root='sda'
self.wait_for_shutdown=False
self.common_tuxrun(kernel_asset=self.ASSET_SPARC64_KERNEL,
diff --git a/tests/lcitool/mappings.yml b/tests/lcitool/mappings.yml
index 673baf3..8f0e95e 100644
--- a/tests/lcitool/mappings.yml
+++ b/tests/lcitool/mappings.yml
@@ -8,6 +8,10 @@ mappings:
meson:
OpenSUSELeap15:
+ # Use Meson from PyPI wherever Rust is enabled
+ Debian:
+ Fedora:
+ Ubuntu:
python3:
OpenSUSELeap15: python311-base
@@ -72,7 +76,7 @@ mappings:
pypi_mappings:
# Request more recent version
meson:
- default: meson==1.5.0
+ default: meson==1.8.1
# Drop packages that need devel headers
python3-numpy:
diff --git a/tests/lcitool/refresh b/tests/lcitool/refresh
index 8474ea8..d3488b2 100755
--- a/tests/lcitool/refresh
+++ b/tests/lcitool/refresh
@@ -121,6 +121,7 @@ fedora_rustup_nightly_extras = [
"RUN dnf install -y wget\n",
"ENV RUSTUP_HOME=/usr/local/rustup CARGO_HOME=/usr/local/cargo\n",
"ENV RUSTC=/usr/local/rustup/toolchains/nightly-x86_64-unknown-linux-gnu/bin/rustc\n",
+ "ENV RUSTDOC=/usr/local/rustup/toolchains/nightly-x86_64-unknown-linux-gnu/bin/rustdoc\n",
"ENV CARGO=/usr/local/rustup/toolchains/nightly-x86_64-unknown-linux-gnu/bin/cargo\n",
"RUN set -eux && \\\n",
" rustArch='x86_64-unknown-linux-gnu' && \\\n",
@@ -135,6 +136,7 @@ fedora_rustup_nightly_extras = [
" /usr/local/cargo/bin/rustup run nightly cargo --version && \\\n",
" /usr/local/cargo/bin/rustup run nightly rustc --version && \\\n",
' test "$CARGO" = "$(/usr/local/cargo/bin/rustup +nightly which cargo)" && \\\n',
+ ' test "$RUSTDOC" = "$(/usr/local/cargo/bin/rustup +nightly which rustdoc)" && \\\n',
' test "$RUSTC" = "$(/usr/local/cargo/bin/rustup +nightly which rustc)"\n',
'ENV PATH=$CARGO_HOME/bin:$PATH\n',
'RUN /usr/local/cargo/bin/rustup run nightly cargo install bindgen-cli\n',
@@ -143,6 +145,7 @@ fedora_rustup_nightly_extras = [
ubuntu2204_rust_extras = [
"ENV RUSTC=/usr/bin/rustc-1.77\n",
+ "ENV RUSTDOC=/usr/bin/rustdoc-1.77\n",
"ENV CARGO_HOME=/usr/local/cargo\n",
'ENV PATH=$CARGO_HOME/bin:$PATH\n',
"RUN DEBIAN_FRONTEND=noninteractive eatmydata \\\n",
diff --git a/tests/qemu-iotests/106 b/tests/qemu-iotests/106
index ae0fc46..5554843 100755
--- a/tests/qemu-iotests/106
+++ b/tests/qemu-iotests/106
@@ -40,6 +40,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
_supported_fmt raw
_supported_proto file fuse
_supported_os Linux
+_require_disk_usage
# in kB
CREATION_SIZE=128
diff --git a/tests/qemu-iotests/125 b/tests/qemu-iotests/125
index 46279d6..708e7c5 100755
--- a/tests/qemu-iotests/125
+++ b/tests/qemu-iotests/125
@@ -35,7 +35,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
get_image_size_on_host()
{
- echo $(($(stat -c '%b * %B' "$TEST_IMG_FILE")))
+ disk_usage "$TEST_IMG_FILE"
}
# get standard environment and filters
diff --git a/tests/qemu-iotests/175 b/tests/qemu-iotests/175
index f74f053..bbbf550 100755
--- a/tests/qemu-iotests/175
+++ b/tests/qemu-iotests/175
@@ -77,6 +77,7 @@ _supported_os Linux
_default_cache_mode none
_supported_cache_modes none directsync
+_require_disk_usage
size=$((1 * 1024 * 1024))
diff --git a/tests/qemu-iotests/221 b/tests/qemu-iotests/221
index c463fd4..eba00b8 100755
--- a/tests/qemu-iotests/221
+++ b/tests/qemu-iotests/221
@@ -41,6 +41,7 @@ _supported_os Linux
_default_cache_mode writeback
_supported_cache_modes writeback writethrough unsafe
+_require_disk_usage
echo
echo "=== Check mapping of unaligned raw image ==="
diff --git a/tests/qemu-iotests/240 b/tests/qemu-iotests/240
index 9b281e1..f8af9ff 100755
--- a/tests/qemu-iotests/240
+++ b/tests/qemu-iotests/240
@@ -81,8 +81,6 @@ class TestCase(iotests.QMPTestCase):
self.vm.qmp_log('device_del', id='scsi-hd0')
self.vm.event_wait('DEVICE_DELETED')
- self.vm.qmp_log('device_add', id='scsi-hd1', driver='scsi-hd', drive='hd0', bus="scsi1.0")
-
self.vm.qmp_log('device_del', id='scsi-hd1')
self.vm.event_wait('DEVICE_DELETED')
self.vm.qmp_log('blockdev-del', node_name='hd0')
diff --git a/tests/qemu-iotests/240.out b/tests/qemu-iotests/240.out
index 89ed25e..10dcc42 100644
--- a/tests/qemu-iotests/240.out
+++ b/tests/qemu-iotests/240.out
@@ -46,10 +46,8 @@
{"execute": "device_add", "arguments": {"bus": "scsi0.0", "drive": "hd0", "driver": "scsi-hd", "id": "scsi-hd0"}}
{"return": {}}
{"execute": "device_add", "arguments": {"bus": "scsi1.0", "drive": "hd0", "driver": "scsi-hd", "id": "scsi-hd1"}}
-{"error": {"class": "GenericError", "desc": "Cannot change iothread of active block backend"}}
-{"execute": "device_del", "arguments": {"id": "scsi-hd0"}}
{"return": {}}
-{"execute": "device_add", "arguments": {"bus": "scsi1.0", "drive": "hd0", "driver": "scsi-hd", "id": "scsi-hd1"}}
+{"execute": "device_del", "arguments": {"id": "scsi-hd0"}}
{"return": {}}
{"execute": "device_del", "arguments": {"id": "scsi-hd1"}}
{"return": {}}
diff --git a/tests/qemu-iotests/253 b/tests/qemu-iotests/253
index 35039d2..6da85e6 100755
--- a/tests/qemu-iotests/253
+++ b/tests/qemu-iotests/253
@@ -41,6 +41,7 @@ _supported_os Linux
_default_cache_mode none
_supported_cache_modes none directsync
+_require_disk_usage
echo
echo "=== Check mapping of unaligned raw image ==="
diff --git a/tests/qemu-iotests/308 b/tests/qemu-iotests/308
index ea81dc4..6eced3a 100755
--- a/tests/qemu-iotests/308
+++ b/tests/qemu-iotests/308
@@ -51,6 +51,7 @@ _unsupported_fmt vpc
_supported_proto file # We create the FUSE export manually
_supported_os Linux # We need /dev/urandom
+_require_disk_usage
# $1: Export ID
# $2: Options (beyond the node-name and ID)
@@ -290,7 +291,7 @@ echo '--- Try growing non-growable export ---'
# Get the current size so we can write beyond the EOF
orig_len=$(get_proto_len "$EXT_MP" "$TEST_IMG")
-orig_disk_usage=$(stat -c '%b' "$TEST_IMG")
+orig_disk_usage=$(disk_usage "$TEST_IMG")
# Should fail (exports are non-growable by default)
# (Note that qemu-io can never write beyond the EOF, so we have to use
@@ -312,7 +313,7 @@ else
echo 'OK: Post-truncate image size is as expected'
fi
-new_disk_usage=$(stat -c '%b' "$TEST_IMG")
+new_disk_usage=$(disk_usage "$TEST_IMG")
if [ "$new_disk_usage" -gt "$orig_disk_usage" ]; then
echo 'OK: Disk usage grew with fallocate'
else
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 237f746..e977cb4 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -996,6 +996,36 @@ _require_large_file()
rm "$FILENAME"
}
+# Check whether disk_usage can be reliably used.
+_require_disk_usage()
+{
+ local unusable=false
+ # ZFS triggers known failures on this front; it does not immediately
+ # allocate files, and then aggressively compresses writes even when full
+ # allocation was requested.
+ if [ -z "$TEST_IMG_FILE" ]; then
+ FILENAME="$TEST_IMG"
+ else
+ FILENAME="$TEST_IMG_FILE"
+ fi
+ if [ -e "FILENAME" ]; then
+ echo "unwilling to overwrite existing file"
+ exit 1
+ fi
+ $QEMU_IMG create -f raw "$FILENAME" 5M > /dev/null
+ if [ $(disk_usage "$FILENAME") -gt $((1024*1024)) ]; then
+ unusable=true
+ fi
+ $QEMU_IMG create -f raw -o preallocation=full "$FILENAME" 5M > /dev/null
+ if [ $(disk_usage "$FILENAME") -lt $((4*1024*1024)) ]; then
+ unusable=true
+ fi
+ rm -f "$FILENAME"
+ if $unusable; then
+ _notrun "file system on $TEST_DIR does not handle sparse files nicely"
+ fi
+}
+
# Check that a set of devices is available in the QEMU binary
#
_require_devices()
diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io
index 194fda5..dca1167 100755
--- a/tests/qemu-iotests/tests/graph-changes-while-io
+++ b/tests/qemu-iotests/tests/graph-changes-while-io
@@ -27,6 +27,7 @@ from iotests import imgfmt, qemu_img, qemu_img_create, qemu_io, \
top = os.path.join(iotests.test_dir, 'top.img')
+mid = os.path.join(iotests.test_dir, 'mid.img')
nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock')
@@ -57,6 +58,16 @@ class TestGraphChangesWhileIO(QMPTestCase):
def tearDown(self) -> None:
self.qsd.stop()
+ os.remove(top)
+
+ def _wait_for_blockjob(self, status: str) -> None:
+ done = False
+ while not done:
+ for event in self.qsd.get_qmp().get_events(wait=10.0):
+ if event['event'] != 'JOB_STATUS_CHANGE':
+ continue
+ if event['data']['status'] == status:
+ done = True
def test_blockdev_add_while_io(self) -> None:
# Run qemu-img bench in the background
@@ -116,15 +127,92 @@ class TestGraphChangesWhileIO(QMPTestCase):
'device': 'job0',
})
- cancelled = False
- while not cancelled:
- for event in self.qsd.get_qmp().get_events(wait=10.0):
- if event['event'] != 'JOB_STATUS_CHANGE':
- continue
- if event['data']['status'] == 'null':
- cancelled = True
+ self._wait_for_blockjob('null')
+
+ bench_thr.join()
+
+ def test_remove_lower_snapshot_while_io(self) -> None:
+ # Run qemu-img bench in the background
+ bench_thr = Thread(target=do_qemu_img_bench, args=(100000, ))
+ bench_thr.start()
+
+ # While I/O is performed on 'node0' node, consequently add 2 snapshots
+ # on top of it, then remove (commit) them starting from lower one.
+ while bench_thr.is_alive():
+ # Recreate snapshot images on every iteration
+ qemu_img_create('-f', imgfmt, mid, '1G')
+ qemu_img_create('-f', imgfmt, top, '1G')
+
+ self.qsd.cmd('blockdev-add', {
+ 'driver': imgfmt,
+ 'node-name': 'mid',
+ 'file': {
+ 'driver': 'file',
+ 'filename': mid
+ }
+ })
+
+ self.qsd.cmd('blockdev-snapshot', {
+ 'node': 'node0',
+ 'overlay': 'mid',
+ })
+
+ self.qsd.cmd('blockdev-add', {
+ 'driver': imgfmt,
+ 'node-name': 'top',
+ 'file': {
+ 'driver': 'file',
+ 'filename': top
+ }
+ })
+
+ self.qsd.cmd('blockdev-snapshot', {
+ 'node': 'mid',
+ 'overlay': 'top',
+ })
+
+ self.qsd.cmd('block-commit', {
+ 'job-id': 'commit-mid',
+ 'device': 'top',
+ 'top-node': 'mid',
+ 'base-node': 'node0',
+ 'auto-finalize': True,
+ 'auto-dismiss': False,
+ })
+
+ self._wait_for_blockjob('concluded')
+ self.qsd.cmd('job-dismiss', {
+ 'id': 'commit-mid',
+ })
+
+ self.qsd.cmd('block-commit', {
+ 'job-id': 'commit-top',
+ 'device': 'top',
+ 'top-node': 'top',
+ 'base-node': 'node0',
+ 'auto-finalize': True,
+ 'auto-dismiss': False,
+ })
+
+ self._wait_for_blockjob('ready')
+ self.qsd.cmd('job-complete', {
+ 'id': 'commit-top',
+ })
+
+ self._wait_for_blockjob('concluded')
+ self.qsd.cmd('job-dismiss', {
+ 'id': 'commit-top',
+ })
+
+ self.qsd.cmd('blockdev-del', {
+ 'node-name': 'mid'
+ })
+ self.qsd.cmd('blockdev-del', {
+ 'node-name': 'top'
+ })
bench_thr.join()
+ os.remove(mid)
if __name__ == '__main__':
# Format must support raw backing files
diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out
index fbc63e6..8d7e9967 100644
--- a/tests/qemu-iotests/tests/graph-changes-while-io.out
+++ b/tests/qemu-iotests/tests/graph-changes-while-io.out
@@ -1,5 +1,5 @@
-..
+...
----------------------------------------------------------------------
-Ran 2 tests
+Ran 3 tests
OK
diff --git a/tests/qemu-iotests/tests/mirror-sparse b/tests/qemu-iotests/tests/mirror-sparse
index 8c52a4e..cfcaa60 100755
--- a/tests/qemu-iotests/tests/mirror-sparse
+++ b/tests/qemu-iotests/tests/mirror-sparse
@@ -40,6 +40,7 @@ cd ..
_supported_fmt qcow2 raw # Format of the source. dst is always raw file
_supported_proto file
_supported_os Linux
+_require_disk_usage
echo
echo "=== Initial image setup ==="
@@ -96,13 +97,15 @@ _send_qemu_cmd $h1 '{"execute": "blockdev-del", "arguments":
{"node-name": "dst"}}' 'return' \
| _filter_block_job_offset | _filter_block_job_len
$QEMU_IMG compare -U -f $IMGFMT -F raw $TEST_IMG.base $TEST_IMG
+# Some filesystems can fudge allocations for various reasons; rather
+# than expecting precise 2M and 20M images, it is better to allow for slop.
result=$(disk_usage $TEST_IMG)
-if test $result -lt $((3*1024*1024)); then
+if test $result -lt $((4*1024*1024)); then
actual=sparse
-elif test $result = $((20*1024*1024)); then
+elif test $result -gt $((19*1024*1024)); then
actual=full
else
- actual=unknown
+ actual="unexpected size ($result)"
fi
echo "Destination is $actual; expected $expected"
}
diff --git a/tests/qemu-iotests/tests/write-zeroes-unmap b/tests/qemu-iotests/tests/write-zeroes-unmap
index 7cfeeaf..f90fb8e 100755
--- a/tests/qemu-iotests/tests/write-zeroes-unmap
+++ b/tests/qemu-iotests/tests/write-zeroes-unmap
@@ -32,6 +32,7 @@ cd ..
_supported_fmt raw
_supported_proto file
_supported_os Linux
+_require_disk_usage
create_test_image() {
_make_test_img -f $IMGFMT 1m
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 43e5a86..8ad8490 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -208,9 +208,10 @@ qtests_npcm7xx = \
'npcm7xx_sdhci-test',
'npcm7xx_smbus-test',
'npcm7xx_timer-test',
- 'npcm7xx_watchdog_timer-test',
- 'npcm_gmac-test'] + \
+ 'npcm7xx_watchdog_timer-test'] + \
(slirp.found() ? ['npcm7xx_emc-test'] : [])
+qtests_npcm8xx = \
+ ['npcm_gmac-test']
qtests_aspeed = \
['aspeed_gpio-test',
'aspeed_hace-test',
@@ -259,6 +260,7 @@ qtests_aarch64 = \
(config_all_accel.has_key('CONFIG_TCG') and \
config_all_devices.has_key('CONFIG_TPM_TIS_I2C') ? ['tpm-tis-i2c-test'] : []) + \
(config_all_devices.has_key('CONFIG_ASPEED_SOC') ? qtests_aspeed64 : []) + \
+ (config_all_devices.has_key('CONFIG_NPCM8XX') ? qtests_npcm8xx : []) + \
['arm-cpu-features',
'numa-test',
'boot-serial-test',
diff --git a/tests/qtest/npcm_gmac-test.c b/tests/qtest/npcm_gmac-test.c
index c28b471..1317da2 100644
--- a/tests/qtest/npcm_gmac-test.c
+++ b/tests/qtest/npcm_gmac-test.c
@@ -36,7 +36,7 @@ typedef struct TestData {
const GMACModule *module;
} TestData;
-/* Values extracted from hw/arm/npcm7xx.c */
+/* Values extracted from hw/arm/npcm8xx.c */
static const GMACModule gmac_module_list[] = {
{
.irq = 14,
@@ -46,6 +46,14 @@ static const GMACModule gmac_module_list[] = {
.irq = 15,
.base_addr = 0xf0804000
},
+ {
+ .irq = 16,
+ .base_addr = 0xf0806000
+ },
+ {
+ .irq = 17,
+ .base_addr = 0xf0808000
+ }
};
/* Returns the index of the GMAC module. */
@@ -174,18 +182,32 @@ static uint32_t gmac_read(QTestState *qts, const GMACModule *mod,
return qtest_readl(qts, mod->base_addr + regno);
}
+static uint16_t pcs_read(QTestState *qts, const GMACModule *mod,
+ NPCMRegister regno)
+{
+ uint32_t write_value = (regno & 0x3ffe00) >> 9;
+ qtest_writel(qts, PCS_BASE_ADDRESS + NPCM_PCS_IND_AC_BA, write_value);
+ uint32_t read_offset = regno & 0x1ff;
+ return qtest_readl(qts, PCS_BASE_ADDRESS + read_offset);
+}
+
/* Check that GMAC registers are reset to default value */
static void test_init(gconstpointer test_data)
{
const TestData *td = test_data;
const GMACModule *mod = td->module;
- QTestState *qts = qtest_init("-machine npcm750-evb");
+ QTestState *qts = qtest_init("-machine npcm845-evb");
#define CHECK_REG32(regno, value) \
do { \
g_assert_cmphex(gmac_read(qts, mod, (regno)), ==, (value)); \
} while (0)
+#define CHECK_REG_PCS(regno, value) \
+ do { \
+ g_assert_cmphex(pcs_read(qts, mod, (regno)), ==, (value)); \
+ } while (0)
+
CHECK_REG32(NPCM_DMA_BUS_MODE, 0x00020100);
CHECK_REG32(NPCM_DMA_XMT_POLL_DEMAND, 0);
CHECK_REG32(NPCM_DMA_RCV_POLL_DEMAND, 0);
@@ -235,6 +257,63 @@ static void test_init(gconstpointer test_data)
CHECK_REG32(NPCM_GMAC_PTP_TAR, 0);
CHECK_REG32(NPCM_GMAC_PTP_TTSR, 0);
+ if (mod->base_addr == 0xf0802000) {
+ CHECK_REG_PCS(NPCM_PCS_SR_CTL_ID1, 0x699e);
+ CHECK_REG_PCS(NPCM_PCS_SR_CTL_ID2, 0);
+ CHECK_REG_PCS(NPCM_PCS_SR_CTL_STS, 0x8000);
+
+ CHECK_REG_PCS(NPCM_PCS_SR_MII_CTRL, 0x1140);
+ CHECK_REG_PCS(NPCM_PCS_SR_MII_STS, 0x0109);
+ CHECK_REG_PCS(NPCM_PCS_SR_MII_DEV_ID1, 0x699e);
+ CHECK_REG_PCS(NPCM_PCS_SR_MII_DEV_ID2, 0x0ced0);
+ CHECK_REG_PCS(NPCM_PCS_SR_MII_AN_ADV, 0x0020);
+ CHECK_REG_PCS(NPCM_PCS_SR_MII_LP_BABL, 0);
+ CHECK_REG_PCS(NPCM_PCS_SR_MII_AN_EXPN, 0);
+ CHECK_REG_PCS(NPCM_PCS_SR_MII_EXT_STS, 0xc000);
+
+ CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_ABL, 0x0003);
+ CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_TX_MAX_DLY_LWR, 0x0038);
+ CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_TX_MAX_DLY_UPR, 0);
+ CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_TX_MIN_DLY_LWR, 0x0038);
+ CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_TX_MIN_DLY_UPR, 0);
+ CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_RX_MAX_DLY_LWR, 0x0058);
+ CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_RX_MAX_DLY_UPR, 0);
+ CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_RX_MIN_DLY_LWR, 0x0048);
+ CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_RX_MIN_DLY_UPR, 0);
+
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_MMD_DIG_CTRL1, 0x2400);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_AN_CTRL, 0);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_AN_INTR_STS, 0x000a);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_TC, 0);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_DBG_CTRL, 0);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_EEE_MCTRL0, 0x899c);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_EEE_TXTIMER, 0);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_EEE_RXTIMER, 0);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_LINK_TIMER_CTRL, 0);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_EEE_MCTRL1, 0);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_DIG_STS, 0x0010);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_ICG_ERRCNT1, 0);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_MISC_STS, 0);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_RX_LSTS, 0);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_TX_BSTCTRL0, 0x00a);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_TX_LVLCTRL0, 0x007f);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_TX_GENCTRL0, 0x0001);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_TX_GENCTRL1, 0);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_TX_STS, 0);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_RX_GENCTRL0, 0x0100);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_RX_GENCTRL1, 0x1100);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_RX_LOS_CTRL0, 0x000e);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_MPLL_CTRL0, 0x0100);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_MPLL_CTRL1, 0x0032);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_MPLL_STS, 0x0001);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_MISC_CTRL2, 0);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_LVL_CTRL, 0x0019);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_MISC_CTRL0, 0);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_MISC_CTRL1, 0);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_DIG_CTRL2, 0);
+ CHECK_REG_PCS(NPCM_PCS_VR_MII_DIG_ERRCNT_SEL, 0);
+ }
+
qtest_quit(qts);
}
@@ -242,7 +321,7 @@ static void gmac_add_test(const char *name, const TestData* td,
GTestDataFunc fn)
{
g_autofree char *full_name = g_strdup_printf(
- "npcm7xx_gmac/gmac[%d]/%s", gmac_module_index(td->module), name);
+ "npcm8xx_gmac/gmac[%d]/%s", gmac_module_index(td->module), name);
qtest_add_data_func(full_name, td, fn);
}
diff --git a/tests/qtest/test-x86-cpuid-compat.c b/tests/qtest/test-x86-cpuid-compat.c
index c9de47b..456e2af 100644
--- a/tests/qtest/test-x86-cpuid-compat.c
+++ b/tests/qtest/test-x86-cpuid-compat.c
@@ -365,20 +365,6 @@ int main(int argc, char **argv)
"level", 10);
}
- /*
- * xlevel doesn't have any feature that triggers auto-level
- * code on old machine-types. Just check that the compat code
- * is working correctly:
- */
- if (qtest_has_machine("pc-i440fx-2.4")) {
- add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-off",
- "SandyBridge", NULL, "pc-i440fx-2.4",
- "xlevel", 0x80000008);
- add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on",
- "SandyBridge", "svm=on,npt=on", "pc-i440fx-2.4",
- "xlevel", 0x80000008);
- }
-
/* Test feature parsing */
add_feature_test("x86/cpuid/features/plus",
"486", "+arat",
diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 290cd2a..59c2793 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -772,9 +772,11 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
tjob->bs = src;
job = &tjob->common;
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
switch (result) {
case TEST_JOB_SUCCESS:
@@ -953,11 +955,13 @@ static void bdrv_test_top_close(BlockDriverState *bs)
{
BdrvChild *c, *next_c;
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) {
bdrv_unref_child(bs, c);
}
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
}
static int coroutine_fn GRAPH_RDLOCK
@@ -1014,7 +1018,9 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque)
bdrv_graph_co_rdlock();
QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) {
bdrv_graph_co_rdunlock();
+ bdrv_drain_all_begin();
bdrv_co_unref_child(bs, c);
+ bdrv_drain_all_end();
bdrv_graph_co_rdlock();
}
bdrv_graph_co_rdunlock();
@@ -1047,10 +1053,12 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete,
null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
&error_abort);
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds,
BDRV_CHILD_DATA, &error_abort);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
/* This child will be the one to pass to requests through to, and
* it will stall until a drain occurs */
@@ -1058,21 +1066,25 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete,
&error_abort);
child_bs->total_sectors = 65536 >> BDRV_SECTOR_BITS;
/* Takes our reference to child_bs */
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
tts->wait_child = bdrv_attach_child(bs, child_bs, "wait-child",
&child_of_bds,
BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY,
&error_abort);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
/* This child is just there to be deleted
* (for detach_instead_of_delete == true) */
null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
&error_abort);
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, BDRV_CHILD_DATA,
&error_abort);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
blk_insert_bs(blk, bs, &error_abort);
@@ -1155,6 +1167,7 @@ static void no_coroutine_fn detach_indirect_bh(void *opaque)
bdrv_dec_in_flight(data->child_b->bs);
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_unref_child(data->parent_b, data->child_b);
@@ -1163,6 +1176,7 @@ static void no_coroutine_fn detach_indirect_bh(void *opaque)
&child_of_bds, BDRV_CHILD_DATA,
&error_abort);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
}
static void coroutine_mixed_fn detach_by_parent_aio_cb(void *opaque, int ret)
@@ -1260,6 +1274,7 @@ static void TSA_NO_TSA test_detach_indirect(bool by_parent_cb)
/* Set child relationships */
bdrv_ref(b);
bdrv_ref(a);
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
child_b = bdrv_attach_child(parent_b, b, "PB-B", &child_of_bds,
BDRV_CHILD_DATA, &error_abort);
@@ -1271,6 +1286,7 @@ static void TSA_NO_TSA test_detach_indirect(bool by_parent_cb)
by_parent_cb ? &child_of_bds : &detach_by_driver_cb_class,
BDRV_CHILD_DATA, &error_abort);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
g_assert_cmpint(parent_a->refcnt, ==, 1);
g_assert_cmpint(parent_b->refcnt, ==, 1);
@@ -1396,14 +1412,10 @@ static void test_set_aio_context(void)
bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
&error_abort);
- bdrv_drained_begin(bs);
bdrv_try_change_aio_context(bs, ctx_a, NULL, &error_abort);
- bdrv_drained_end(bs);
- bdrv_drained_begin(bs);
bdrv_try_change_aio_context(bs, ctx_b, NULL, &error_abort);
bdrv_try_change_aio_context(bs, qemu_get_aio_context(), NULL, &error_abort);
- bdrv_drained_end(bs);
bdrv_unref(bs);
iothread_join(a);
@@ -1687,6 +1699,7 @@ static void test_drop_intermediate_poll(void)
* Establish the chain last, so the chain links are the first
* elements in the BDS.parents lists
*/
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
for (i = 0; i < 3; i++) {
if (i) {
@@ -1696,6 +1709,7 @@ static void test_drop_intermediate_poll(void)
}
}
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
job = block_job_create("job", &test_simple_job_driver, NULL, job_node,
0, BLK_PERM_ALL, 0, 0, NULL, NULL, &error_abort);
@@ -1942,10 +1956,12 @@ static void do_test_replace_child_mid_drain(int old_drain_count,
new_child_bs->total_sectors = 1;
bdrv_ref(old_child_bs);
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds,
BDRV_CHILD_COW, &error_abort);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
parent_s->setup_completed = true;
for (i = 0; i < old_drain_count; i++) {
diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c
index d743abb..7b03ebe 100644
--- a/tests/unit/test-bdrv-graph-mod.c
+++ b/tests/unit/test-bdrv-graph-mod.c
@@ -137,10 +137,12 @@ static void test_update_perm_tree(void)
blk_insert_bs(root, bs, &error_abort);
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_attach_child(filter, bs, "child", &child_of_bds,
BDRV_CHILD_DATA, &error_abort);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
ret = bdrv_append(filter, bs, NULL);
g_assert_cmpint(ret, <, 0);
@@ -204,11 +206,13 @@ static void test_should_update_child(void)
bdrv_set_backing_hd(target, bs, &error_abort);
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
g_assert(target->backing->bs == bs);
bdrv_attach_child(filter, target, "target", &child_of_bds,
BDRV_CHILD_DATA, &error_abort);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
bdrv_append(filter, bs, &error_abort);
bdrv_graph_rdlock_main_loop();
@@ -244,6 +248,7 @@ static void test_parallel_exclusive_write(void)
bdrv_ref(base);
bdrv_ref(fl1);
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_attach_child(top, fl1, "backing", &child_of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -257,6 +262,7 @@ static void test_parallel_exclusive_write(void)
bdrv_replace_node(fl1, fl2, &error_abort);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
bdrv_drained_end(fl2);
bdrv_drained_end(fl1);
@@ -363,6 +369,7 @@ static void test_parallel_perm_update(void)
*/
bdrv_ref(base);
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_attach_child(top, ws, "file", &child_of_bds, BDRV_CHILD_DATA,
&error_abort);
@@ -377,6 +384,7 @@ static void test_parallel_perm_update(void)
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
&error_abort);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
/* Select fl1 as first child to be active */
s->selected = c_fl1;
@@ -430,11 +438,13 @@ static void test_append_greedy_filter(void)
BlockDriverState *base = no_perm_node("base");
BlockDriverState *fl = exclusive_writer_node("fl1");
+ bdrv_drain_all_begin();
bdrv_graph_wrlock();
bdrv_attach_child(top, base, "backing", &child_of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
&error_abort);
bdrv_graph_wrunlock();
+ bdrv_drain_all_end();
bdrv_append(fl, base, &error_abort);
bdrv_unref(fl);
diff --git a/tests/unit/test-util-sockets.c b/tests/unit/test-util-sockets.c
index 8492f4d..ee66d72 100644
--- a/tests/unit/test-util-sockets.c
+++ b/tests/unit/test-util-sockets.c
@@ -341,8 +341,12 @@ static void inet_parse_test_helper(const char *str,
int rc = inet_parse(&addr, str, &error);
if (success) {
+ if (error) {
+ error_report_err(error);
+ }
g_assert_cmpint(rc, ==, 0);
} else {
+ error_free(error);
g_assert_cmpint(rc, <, 0);
}
if (exp_addr != NULL) {