diff options
589 files changed, 15139 insertions, 5993 deletions
diff --git a/.gitlab-ci.d/buildtest-template.yml b/.gitlab-ci.d/buildtest-template.yml index 118371e..038c3c9 100644 --- a/.gitlab-ci.d/buildtest-template.yml +++ b/.gitlab-ci.d/buildtest-template.yml @@ -24,6 +24,7 @@ - ccache --zero-stats - section_start configure "Running configure" - ../configure --enable-werror --disable-docs --enable-fdt=system + --disable-debug-info ${TARGETS:+--target-list="$TARGETS"} $CONFIGURE_ARGS || { cat config.log meson-logs/meson-log.txt && exit 1; } @@ -76,7 +77,8 @@ fi - section_end buildenv - section_start test "Running tests" - - $MAKE NINJA=":" $MAKE_CHECK_ARGS + # doctests need all the compilation artifacts + - $MAKE NINJA=":" MTESTARGS="--no-suite doc" $MAKE_CHECK_ARGS - section_end test .native_test_job_template: diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml index 248aaed..d888a60 100644 --- a/.gitlab-ci.d/buildtest.yml +++ b/.gitlab-ci.d/buildtest.yml @@ -41,7 +41,7 @@ build-system-ubuntu: IMAGE: ubuntu2204 CONFIGURE_ARGS: --enable-docs --enable-rust TARGETS: alpha-softmmu microblazeel-softmmu mips64el-softmmu - MAKE_CHECK_ARGS: check-build + MAKE_CHECK_ARGS: check-build check-doc check-system-ubuntu: extends: .native_test_job_template @@ -115,7 +115,7 @@ build-system-fedora: CONFIGURE_ARGS: --disable-gcrypt --enable-nettle --enable-docs --enable-crypto-afalg --enable-rust TARGETS: microblaze-softmmu mips-softmmu xtensa-softmmu m68k-softmmu riscv32-softmmu ppc-softmmu sparc64-softmmu - MAKE_CHECK_ARGS: check-build + MAKE_CHECK_ARGS: check-build check-doc build-system-fedora-rust-nightly: extends: @@ -127,12 +127,7 @@ build-system-fedora-rust-nightly: IMAGE: fedora-rust-nightly CONFIGURE_ARGS: --disable-docs --enable-rust --enable-strict-rust-lints TARGETS: aarch64-softmmu - MAKE_CHECK_ARGS: check-build - after_script: - - source scripts/ci/gitlab-ci-section - - section_start test "Running Rust doctests" - - cd build - - pyvenv/bin/meson devenv -w ../rust ${CARGO-cargo} test --doc -p qemu_api + MAKE_CHECK_ARGS: check-build check-doc allow_failure: true @@ -188,12 +183,11 @@ build-previous-qemu: when: on_success expire_in: 2 days paths: - - build-previous - exclude: - - build-previous/**/*.p - - build-previous/**/*.a.p - - build-previous/**/*.c.o - - build-previous/**/*.c.o.d + - build-previous/qemu-bundle + - build-previous/qemu-system-aarch64 + - build-previous/qemu-system-x86_64 + - build-previous/tests/qtest/migration-test + - build-previous/scripts needs: job: amd64-opensuse-leap-container variables: @@ -203,6 +197,11 @@ build-previous-qemu: GIT_FETCH_EXTRA_FLAGS: --prune --quiet before_script: - source scripts/ci/gitlab-ci-section + # Skip if this series contains the release bump commit. During the + # release process there might be a window of commits when the + # version tag is not yet present in the remote and git fetch would + # fail. + - if grep -q "\.0$" VERSION; then exit 0; fi - export QEMU_PREV_VERSION="$(sed 's/\([0-9.]*\)\.[0-9]*/v\1.0/' VERSION)" - git remote add upstream https://gitlab.com/qemu-project/qemu - git fetch upstream refs/tags/$QEMU_PREV_VERSION:refs/tags/$QEMU_PREV_VERSION @@ -223,18 +222,13 @@ build-previous-qemu: IMAGE: opensuse-leap MAKE_CHECK_ARGS: check-build script: + # Skip for round release numbers, this job is only relevant for + # testing a development tree. + - if grep -q "\.0$" VERSION; then exit 0; fi # Use the migration-tests from the older QEMU tree. This avoids # testing an old QEMU against new features/tests that it is not # compatible with. - cd build-previous - # Don't allow python-based tests to run. The - # vmstate-checker-script test has a race that causes it to fail - # sometimes. It cannot be fixed it because this job runs the test - # from the old QEMU version. The test will be removed on master, - # but this job will only see the change in the next release. - # - # TODO: remove this line after 9.2 release - - unset PYTHON # old to new - QTEST_QEMU_BINARY_SRC=./qemu-system-${TARGET} QTEST_QEMU_BINARY=../build/qemu-system-${TARGET} ./tests/qtest/migration-test @@ -67,7 +67,8 @@ Andrey Drobyshev <andrey.drobyshev@virtuozzo.com> Andrey Drobyshev via <qemu-blo BALATON Zoltan <balaton@eik.bme.hu> BALATON Zoltan via <qemu-ppc@nongnu.org> # Next, replace old addresses by a more recent one. -Akihiko Odaki <akihiko.odaki@daynix.com> <akihiko.odaki@gmail.com> +Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp> <akihiko.odaki@daynix.com> +Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp> <akihiko.odaki@gmail.com> Aleksandar Markovic <aleksandar.qemu.devel@gmail.com> <aleksandar.markovic@mips.com> Aleksandar Markovic <aleksandar.qemu.devel@gmail.com> <aleksandar.markovic@imgtec.com> Aleksandar Markovic <aleksandar.qemu.devel@gmail.com> <amarkovic@wavecomp.com> diff --git a/.travis.yml b/.travis.yml index 8fc1ae0..0a634d7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -79,41 +79,6 @@ after_script: jobs: include: - - name: "[aarch64] GCC check-tcg" - arch: arm64 - addons: - apt_packages: - - libaio-dev - - libattr1-dev - - libbrlapi-dev - - libcacard-dev - - libcap-ng-dev - - libfdt-dev - - libgcrypt20-dev - - libgnutls28-dev - - libgtk-3-dev - - libiscsi-dev - - liblttng-ust-dev - - libncurses5-dev - - libnfs-dev - - libpixman-1-dev - - libpng-dev - - librados-dev - - libsdl2-dev - - libseccomp-dev - - liburcu-dev - - libusb-1.0-0-dev - - libvdeplug-dev - - libvte-2.91-dev - - ninja-build - - python3-tomli - # Tests dependencies - - genisoimage - env: - - TEST_CMD="make check check-tcg V=1" - - CONFIG="--disable-containers --enable-fdt=system - --target-list=${MAIN_SYSTEM_TARGETS} --cxx=/bin/false" - - name: "[ppc64] Clang check-tcg" arch: ppc64le compiler: clang diff --git a/MAINTAINERS b/MAINTAINERS index 7060cf4..94c4076 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -112,6 +112,7 @@ F: hw/intc/s390_flic.c F: hw/intc/s390_flic_kvm.c F: hw/s390x/ F: hw/vfio/ap.c +F: hw/s390x/ap-stub.c F: hw/vfio/ccw.c F: hw/watchdog/wdt_diag288.c F: include/hw/s390x/ @@ -219,7 +220,7 @@ S: Maintained F: docs/system/target-avr.rst F: gdb-xml/avr-cpu.xml F: target/avr/ -F: tests/functional/test_avr_mega2560.py +F: tests/functional/test_avr_*.py Hexagon TCG CPUs M: Brian Cain <brian.cain@oss.qualcomm.com> @@ -510,6 +511,7 @@ Apple Silicon HVF CPUs M: Alexander Graf <agraf@csgraf.de> S: Maintained F: target/arm/hvf/ +F: target/arm/hvf-stub.c X86 HVF CPUs M: Cameron Esfahani <dirty@apple.com> @@ -953,6 +955,7 @@ F: hw/cpu/realview_mpcore.c F: hw/intc/realview_gic.c F: include/hw/intc/realview_gic.h F: docs/system/arm/realview.rst +F: tests/functional/test_arm_realview.py SABRELITE / i.MX6 M: Peter Maydell <peter.maydell@linaro.org> @@ -1003,6 +1006,7 @@ F: hw/display/ssd03* F: include/hw/input/gamepad.h F: include/hw/timer/stellaris-gptm.h F: docs/system/arm/stellaris.rst +F: tests/functional/test_arm_stellaris.py STM32L4x5 SoC Family M: Samuel Tardieu <sam@rfc1149.net> @@ -1234,6 +1238,7 @@ Arduino M: Philippe Mathieu-Daudé <philmd@linaro.org> S: Maintained F: hw/avr/arduino.c +F: tests/functional/test_avr_uno.py HP-PARISC Machines ------------------ @@ -1938,7 +1943,6 @@ F: hw/core/numa.c F: hw/cpu/cluster.c F: qapi/machine.json F: qapi/machine-common.json -F: qapi/machine-target.json F: include/hw/boards.h F: include/hw/core/cpu.h F: include/hw/cpu/cluster.h @@ -2123,7 +2127,7 @@ M: Michael S. Tsirkin <mst@redhat.com> S: Supported F: tests/functional/acpi-bits/* F: tests/functional/test_acpi_bits.py -F: docs/devel/acpi-bits.rst +F: docs/devel/testing/acpi-bits.rst ACPI/HEST/GHES R: Dongjiu Geng <gengdongjiu1@gmail.com> @@ -2518,7 +2522,7 @@ F: tests/qtest/fuzz-megasas-test.c Network packet abstractions M: Dmitry Fleytman <dmitry.fleytman@gmail.com> -R: Akihiko Odaki <akihiko.odaki@daynix.com> +R: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp> S: Maintained F: include/net/eth.h F: net/eth.c @@ -2548,13 +2552,13 @@ F: docs/specs/rocker.rst e1000x M: Dmitry Fleytman <dmitry.fleytman@gmail.com> -R: Akihiko Odaki <akihiko.odaki@daynix.com> +R: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp> S: Maintained F: hw/net/e1000x* e1000e M: Dmitry Fleytman <dmitry.fleytman@gmail.com> -R: Akihiko Odaki <akihiko.odaki@daynix.com> +R: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp> S: Maintained F: hw/net/e1000e* F: tests/qtest/fuzz-e1000e-test.c @@ -2562,9 +2566,9 @@ F: tests/qtest/e1000e-test.c F: tests/qtest/libqos/e1000e.* igb -M: Akihiko Odaki <akihiko.odaki@daynix.com> +M: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp> R: Sriram Yagnaraman <sriram.yagnaraman@ericsson.com> -S: Maintained +S: Odd Fixes F: docs/system/devices/igb.rst F: hw/net/igb* F: tests/functional/test_netdev_ethtool.py @@ -2909,7 +2913,7 @@ Core Audio framework backend M: Gerd Hoffmann <kraxel@redhat.com> M: Philippe Mathieu-Daudé <philmd@linaro.org> R: Christian Schoenebeck <qemu_oss@crudebyte.com> -R: Akihiko Odaki <akihiko.odaki@daynix.com> +R: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp> S: Odd Fixes F: audio/coreaudio.m @@ -3031,6 +3035,16 @@ F: include/qemu/co-shared-resource.h T: git https://gitlab.com/jsnow/qemu.git jobs T: git https://gitlab.com/vsementsov/qemu.git block +CheckPoint and Restart (CPR) +R: Steve Sistare <steven.sistare@oracle.com> +S: Supported +F: hw/vfio/cpr* +F: include/hw/vfio/vfio-cpr.h +F: include/migration/cpr.h +F: migration/cpr* +F: tests/qtest/migration/cpr* +F: docs/devel/migration/CPR.rst + Compute Express Link M: Jonathan Cameron <jonathan.cameron@huawei.com> R: Fan Ni <fan.ni@samsung.com> @@ -3200,7 +3214,7 @@ F: tests/functional/test_vnc.py Cocoa graphics M: Peter Maydell <peter.maydell@linaro.org> M: Philippe Mathieu-Daudé <philmd@linaro.org> -R: Akihiko Odaki <akihiko.odaki@daynix.com> +R: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp> S: Odd Fixes F: ui/cocoa.m @@ -3429,8 +3443,8 @@ F: system/qtest.c F: include/system/qtest.h F: accel/qtest/ F: tests/qtest/ -F: docs/devel/qgraph.rst -F: docs/devel/qtest.rst +F: docs/devel/testing/qgraph.rst +F: docs/devel/testing/qtest.rst X: tests/qtest/bios-tables-test* X: tests/qtest/migration-* @@ -3448,7 +3462,7 @@ F: tests/qtest/fuzz-*test.c F: tests/docker/test-fuzz F: scripts/oss-fuzz/ F: hw/mem/sparse-mem.c -F: docs/devel/fuzzing.rst +F: docs/devel/testing/fuzzing.rst Register API M: Alistair Francis <alistair@alistair23.me> @@ -3727,7 +3741,7 @@ F: util/iova-tree.c elf2dmp M: Viktor Prutyanov <viktor.prutyanov@phystech.edu> -R: Akihiko Odaki <akihiko.odaki@daynix.com> +R: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp> S: Maintained F: contrib/elf2dmp/ @@ -4067,7 +4081,7 @@ M: Stefan Hajnoczi <stefanha@redhat.com> L: qemu-block@nongnu.org S: Supported F: block/blkverify.c -F: docs/devel/blkverify.rst +F: docs/devel/testing/blkverify.rst bochs M: Stefan Hajnoczi <stefanha@redhat.com> @@ -4103,6 +4117,7 @@ M: Stefan Hajnoczi <stefanha@redhat.com> L: qemu-block@nongnu.org S: Supported F: block/qed.c +F: docs/interop/qed_spec.rst raw M: Kevin Wolf <kwolf@redhat.com> @@ -4131,7 +4146,7 @@ M: Hanna Reitz <hreitz@redhat.com> L: qemu-block@nongnu.org S: Supported F: block/qcow2* -F: docs/interop/qcow2.txt +F: docs/interop/qcow2.rst qcow M: Kevin Wolf <kwolf@redhat.com> @@ -4145,7 +4160,7 @@ M: Hanna Reitz <hreitz@redhat.com> L: qemu-block@nongnu.org S: Supported F: block/blkdebug.c -F: docs/devel/blkdebug.rst +F: docs/devel/testing/blkdebug.rst vpc M: Kevin Wolf <kwolf@redhat.com> @@ -4265,7 +4280,8 @@ F: tests/vm/ F: tests/lcitool/ F: tests/functional/test_*_tuxrun.py F: scripts/archive-source.sh -F: docs/devel/testing.rst +F: docs/devel/testing/ci* +F: docs/devel/testing/main.rst W: https://gitlab.com/qemu-project/qemu/pipelines W: https://travis-ci.org/qemu/qemu diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c index b8b6116..d60446b 100644 --- a/accel/hvf/hvf-accel-ops.c +++ b/accel/hvf/hvf-accel-ops.c @@ -366,6 +366,7 @@ static void hvf_accel_class_init(ObjectClass *oc, const void *data) static const TypeInfo hvf_accel_type = { .name = TYPE_HVF_ACCEL, .parent = TYPE_ACCEL, + .instance_size = sizeof(HVFState), .class_init = hvf_accel_class_init, }; diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 278a506..a317783 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -99,6 +99,7 @@ bool kvm_allowed; bool kvm_readonly_mem_allowed; bool kvm_vm_attributes_allowed; bool kvm_msi_use_devid; +bool kvm_pre_fault_memory_supported; static bool kvm_has_guest_debug; static int kvm_sstep_flags; static bool kvm_immediate_exit; @@ -471,7 +472,9 @@ int kvm_create_vcpu(CPUState *cpu) cpu->kvm_fd = kvm_fd; cpu->kvm_state = s; - cpu->vcpu_dirty = true; + if (!s->guest_state_protected) { + cpu->vcpu_dirty = true; + } cpu->dirty_pages = 0; cpu->throttle_us_per_full = 0; @@ -545,6 +548,11 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); + ret = kvm_arch_pre_create_vcpu(cpu, errp); + if (ret < 0) { + goto err; + } + ret = kvm_create_vcpu(cpu); if (ret < 0) { error_setg_errno(errp, -ret, @@ -2426,7 +2434,7 @@ static int kvm_recommended_vcpus(KVMState *s) static int kvm_max_vcpus(KVMState *s) { - int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS); + int ret = kvm_vm_check_extension(s, KVM_CAP_MAX_VCPUS); return (ret) ? ret : kvm_recommended_vcpus(s); } @@ -2738,6 +2746,7 @@ static int kvm_init(MachineState *ms) kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) && kvm_check_extension(s, KVM_CAP_USER_MEMORY2) && (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE); + kvm_pre_fault_memory_supported = kvm_vm_check_extension(s, KVM_CAP_PRE_FAULT_MEMORY); if (s->kernel_irqchip_split == ON_OFF_AUTO_AUTO) { s->kernel_irqchip_split = mc->default_kernel_irqchip_split ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index cc5f362..713bdb2 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -1039,6 +1039,7 @@ bool tcg_exec_realizefn(CPUState *cpu, Error **errp) assert(tcg_ops->cpu_exec_halt); assert(tcg_ops->cpu_exec_interrupt); assert(tcg_ops->cpu_exec_reset); + assert(tcg_ops->pointer_wrap); #endif /* !CONFIG_USER_ONLY */ assert(tcg_ops->translate_code); assert(tcg_ops->get_tb_cpu_state); diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index 5f6d7c6..87e14bd 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -1773,6 +1773,9 @@ static bool mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, l->page[1].size = l->page[0].size - size0; l->page[0].size = size0; + l->page[1].addr = cpu->cc->tcg_ops->pointer_wrap(cpu, l->mmu_idx, + l->page[1].addr, addr); + /* * Lookup both pages, recognizing exceptions from either. If the * second lookup potentially resized, refresh first CPUTLBEntryFull. @@ -1871,8 +1874,12 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, goto stop_the_world; } - /* Collect tlb flags for read. */ + /* Finish collecting tlb flags for both read and write. */ + full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index]; tlb_addr |= tlbe->addr_read; + tlb_addr &= TLB_FLAGS_MASK & ~TLB_FORCE_SLOW; + tlb_addr |= full->slow_flags[MMU_DATA_STORE]; + tlb_addr |= full->slow_flags[MMU_DATA_LOAD]; /* Notice an IO access or a needs-MMU-lookup access */ if (unlikely(tlb_addr & (TLB_MMIO | TLB_DISCARD_WRITE))) { @@ -1882,13 +1889,12 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, } hostaddr = (void *)((uintptr_t)addr + tlbe->addend); - full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index]; if (unlikely(tlb_addr & TLB_NOTDIRTY)) { notdirty_write(cpu, addr, size, full, retaddr); } - if (unlikely(tlb_addr & TLB_FORCE_SLOW)) { + if (unlikely(tlb_addr & TLB_WATCHPOINT)) { int wp_flags = 0; if (full->slow_flags[MMU_DATA_STORE] & TLB_WATCHPOINT) { @@ -1897,10 +1903,8 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi, if (full->slow_flags[MMU_DATA_LOAD] & TLB_WATCHPOINT) { wp_flags |= BP_MEM_READ; } - if (wp_flags) { - cpu_check_watchpoint(cpu, addr, size, - full->attrs, wp_flags, retaddr); - } + cpu_check_watchpoint(cpu, addr, size, + full->attrs, wp_flags, retaddr); } return hostaddr; @@ -2926,3 +2930,22 @@ uint64_t cpu_ldq_code_mmu(CPUArchState *env, vaddr addr, { return do_ld8_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH); } + +/* + * Common pointer_wrap implementations. + */ + +/* + * To be used for strict alignment targets. + * Because no accesses are unaligned, no accesses wrap either. + */ +vaddr cpu_pointer_wrap_notreached(CPUState *cs, int idx, vaddr res, vaddr base) +{ + g_assert_not_reached(); +} + +/* To be used for strict 32-bit targets. */ +vaddr cpu_pointer_wrap_uint32(CPUState *cs, int idx, vaddr res, vaddr base) +{ + return (uint32_t)res; +} diff --git a/accel/tcg/meson.build b/accel/tcg/meson.build index 97d5e5a..575e92b 100644 --- a/accel/tcg/meson.build +++ b/accel/tcg/meson.build @@ -18,15 +18,15 @@ if get_option('plugins') tcg_ss.add(files('plugin-gen.c')) endif -libuser_ss.add_all(tcg_ss) -libsystem_ss.add_all(tcg_ss) +user_ss.add_all(tcg_ss) +system_ss.add_all(tcg_ss) -libuser_ss.add(files( +user_ss.add(files( 'user-exec.c', 'user-exec-stub.c', )) -libsystem_ss.add(files( +system_ss.add(files( 'cputlb.c', 'icount-common.c', 'monitor.c', diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 451b383..d468667 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -24,7 +24,6 @@ #include "tcg/tcg.h" #include "exec/mmap-lock.h" #include "tb-internal.h" -#include "tlb-bounds.h" #include "exec/tb-flush.h" #include "qemu/cacheinfo.h" #include "qemu/target-info.h" @@ -313,11 +312,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu, TCGTBCPUState s) tcg_ctx->gen_tb = tb; tcg_ctx->addr_type = target_long_bits() == 32 ? TCG_TYPE_I32 : TCG_TYPE_I64; -#ifdef CONFIG_SOFTMMU - tcg_ctx->page_bits = TARGET_PAGE_BITS; - tcg_ctx->page_mask = TARGET_PAGE_MASK; - tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS; -#endif tcg_ctx->guest_mo = cpu->cc->tcg_ops->guest_default_memory_order; restart_translate: diff --git a/audio/alsaaudio.c b/audio/alsaaudio.c index cacae1e..9b6c01c 100644 --- a/audio/alsaaudio.c +++ b/audio/alsaaudio.c @@ -899,7 +899,7 @@ static void alsa_enable_in(HWVoiceIn *hw, bool enable) static void alsa_init_per_direction(AudiodevAlsaPerDirectionOptions *apdo) { if (!apdo->has_try_poll) { - apdo->try_poll = true; + apdo->try_poll = false; apdo->has_try_poll = true; } } diff --git a/audio/audio.c b/audio/audio.c index 41ee11a..89f091b 100644 --- a/audio/audio.c +++ b/audio/audio.c @@ -905,6 +905,14 @@ size_t AUD_read(SWVoiceIn *sw, void *buf, size_t size) int AUD_get_buffer_size_out(SWVoiceOut *sw) { + if (!sw) { + return 0; + } + + if (audio_get_pdo_out(sw->s->dev)->mixing_engine) { + return sw->resample_buf.size * sw->info.bytes_per_frame; + } + return sw->hw->samples * sw->hw->info.bytes_per_frame; } @@ -1884,7 +1892,8 @@ CaptureVoiceOut *AUD_add_capture( cap->buf = g_malloc0_n(hw->mix_buf.size, hw->info.bytes_per_frame); if (hw->info.is_float) { - hw->clip = mixeng_clip_float[hw->info.nchannels == 2]; + hw->clip = mixeng_clip_float[hw->info.nchannels == 2] + [hw->info.swap_endianness]; } else { hw->clip = mixeng_clip [hw->info.nchannels == 2] @@ -2274,17 +2283,19 @@ size_t audio_rate_peek_bytes(RateCtl *rate, struct audio_pcm_info *info) ticks = now - rate->start_ticks; bytes = muldiv64(ticks, info->bytes_per_second, NANOSECONDS_PER_SECOND); frames = (bytes - rate->bytes_sent) / info->bytes_per_frame; - if (frames < 0 || frames > 65536) { - AUD_log(NULL, "Resetting rate control (%" PRId64 " frames)\n", frames); - audio_rate_start(rate); - frames = 0; - } + rate->peeked_frames = frames; - return frames * info->bytes_per_frame; + return frames < 0 ? 0 : frames * info->bytes_per_frame; } void audio_rate_add_bytes(RateCtl *rate, size_t bytes_used) { + if (rate->peeked_frames < 0 || rate->peeked_frames > 65536) { + AUD_log(NULL, "Resetting rate control (%" PRId64 " frames)\n", + rate->peeked_frames); + audio_rate_start(rate); + } + rate->bytes_sent += bytes_used; } diff --git a/audio/audio_int.h b/audio/audio_int.h index 2d079d0..f78ca05 100644 --- a/audio/audio_int.h +++ b/audio/audio_int.h @@ -255,6 +255,7 @@ const char *audio_application_name(void); typedef struct RateCtl { int64_t start_ticks; int64_t bytes_sent; + int64_t peeked_frames; } RateCtl; void audio_rate_start(RateCtl *rate); diff --git a/audio/audio_template.h b/audio/audio_template.h index 7ccfec0..c29d79c 100644 --- a/audio/audio_template.h +++ b/audio/audio_template.h @@ -174,9 +174,11 @@ static int glue (audio_pcm_sw_init_, TYPE) ( if (sw->info.is_float) { #ifdef DAC - sw->conv = mixeng_conv_float[sw->info.nchannels == 2]; + sw->conv = mixeng_conv_float[sw->info.nchannels == 2] + [sw->info.swap_endianness]; #else - sw->clip = mixeng_clip_float[sw->info.nchannels == 2]; + sw->clip = mixeng_clip_float[sw->info.nchannels == 2] + [sw->info.swap_endianness]; #endif } else { #ifdef DAC @@ -303,9 +305,11 @@ static HW *glue(audio_pcm_hw_add_new_, TYPE)(AudioState *s, if (hw->info.is_float) { #ifdef DAC - hw->clip = mixeng_clip_float[hw->info.nchannels == 2]; + hw->clip = mixeng_clip_float[hw->info.nchannels == 2] + [hw->info.swap_endianness]; #else - hw->conv = mixeng_conv_float[hw->info.nchannels == 2]; + hw->conv = mixeng_conv_float[hw->info.nchannels == 2] + [hw->info.swap_endianness]; #endif } else { #ifdef DAC diff --git a/audio/mixeng.c b/audio/mixeng.c index 69f6549..703ee54 100644 --- a/audio/mixeng.c +++ b/audio/mixeng.c @@ -283,10 +283,15 @@ static const float float_scale_reciprocal = 1.f / ((int64_t)INT32_MAX + 1); #endif #endif +#define F32_TO_F32S(v) \ + bswap32((union { uint32_t i; float f; }){ .f = (v) }.i) +#define F32S_TO_F32(v) \ + ((union { uint32_t i; float f; }){ .i = bswap32(v) }.f) + static void conv_natural_float_to_mono(struct st_sample *dst, const void *src, int samples) { - float *in = (float *)src; + const float *in = src; while (samples--) { dst->r = dst->l = CONV_NATURAL_FLOAT(*in++); @@ -294,10 +299,21 @@ static void conv_natural_float_to_mono(struct st_sample *dst, const void *src, } } +static void conv_swap_float_to_mono(struct st_sample *dst, const void *src, + int samples) +{ + const uint32_t *in_f32s = src; + + while (samples--) { + dst->r = dst->l = CONV_NATURAL_FLOAT(F32S_TO_F32(*in_f32s++)); + dst++; + } +} + static void conv_natural_float_to_stereo(struct st_sample *dst, const void *src, int samples) { - float *in = (float *)src; + const float *in = src; while (samples--) { dst->l = CONV_NATURAL_FLOAT(*in++); @@ -306,15 +322,33 @@ static void conv_natural_float_to_stereo(struct st_sample *dst, const void *src, } } -t_sample *mixeng_conv_float[2] = { - conv_natural_float_to_mono, - conv_natural_float_to_stereo, +static void conv_swap_float_to_stereo(struct st_sample *dst, const void *src, + int samples) +{ + const uint32_t *in_f32s = src; + + while (samples--) { + dst->l = CONV_NATURAL_FLOAT(F32S_TO_F32(*in_f32s++)); + dst->r = CONV_NATURAL_FLOAT(F32S_TO_F32(*in_f32s++)); + dst++; + } +} + +t_sample *mixeng_conv_float[2][2] = { + { + conv_natural_float_to_mono, + conv_swap_float_to_mono, + }, + { + conv_natural_float_to_stereo, + conv_swap_float_to_stereo, + } }; static void clip_natural_float_from_mono(void *dst, const struct st_sample *src, int samples) { - float *out = (float *)dst; + float *out = dst; while (samples--) { *out++ = CLIP_NATURAL_FLOAT(src->l + src->r); @@ -322,10 +356,21 @@ static void clip_natural_float_from_mono(void *dst, const struct st_sample *src, } } +static void clip_swap_float_from_mono(void *dst, const struct st_sample *src, + int samples) +{ + uint32_t *out_f32s = dst; + + while (samples--) { + *out_f32s++ = F32_TO_F32S(CLIP_NATURAL_FLOAT(src->l + src->r)); + src++; + } +} + static void clip_natural_float_from_stereo( void *dst, const struct st_sample *src, int samples) { - float *out = (float *)dst; + float *out = dst; while (samples--) { *out++ = CLIP_NATURAL_FLOAT(src->l); @@ -334,9 +379,27 @@ static void clip_natural_float_from_stereo( } } -f_sample *mixeng_clip_float[2] = { - clip_natural_float_from_mono, - clip_natural_float_from_stereo, +static void clip_swap_float_from_stereo( + void *dst, const struct st_sample *src, int samples) +{ + uint32_t *out_f32s = dst; + + while (samples--) { + *out_f32s++ = F32_TO_F32S(CLIP_NATURAL_FLOAT(src->l)); + *out_f32s++ = F32_TO_F32S(CLIP_NATURAL_FLOAT(src->r)); + src++; + } +} + +f_sample *mixeng_clip_float[2][2] = { + { + clip_natural_float_from_mono, + clip_swap_float_from_mono, + }, + { + clip_natural_float_from_stereo, + clip_swap_float_from_stereo, + } }; void audio_sample_to_uint64(const void *samples, int pos, diff --git a/audio/mixeng.h b/audio/mixeng.h index a5f56d2..ead93ac 100644 --- a/audio/mixeng.h +++ b/audio/mixeng.h @@ -42,9 +42,9 @@ typedef void (f_sample) (void *dst, const struct st_sample *src, int samples); extern t_sample *mixeng_conv[2][2][2][3]; extern f_sample *mixeng_clip[2][2][2][3]; -/* indices: [stereo] */ -extern t_sample *mixeng_conv_float[2]; -extern f_sample *mixeng_clip_float[2]; +/* indices: [stereo][swap endianness] */ +extern t_sample *mixeng_conv_float[2][2]; +extern f_sample *mixeng_clip_float[2][2]; void *st_rate_start (int inrate, int outrate); void st_rate_flow(void *opaque, st_sample *ibuf, st_sample *obuf, diff --git a/backends/iommufd.c b/backends/iommufd.c index b73f75c..c2c47ab 100644 --- a/backends/iommufd.c +++ b/backends/iommufd.c @@ -311,6 +311,62 @@ bool iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid, return true; } +bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id, + uint32_t data_type, uint32_t entry_len, + uint32_t *entry_num, void *data, + Error **errp) +{ + int ret, fd = be->fd; + uint32_t total_entries = *entry_num; + struct iommu_hwpt_invalidate cache = { + .size = sizeof(cache), + .hwpt_id = id, + .data_type = data_type, + .entry_len = entry_len, + .entry_num = total_entries, + .data_uptr = (uintptr_t)data, + }; + + ret = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cache); + trace_iommufd_backend_invalidate_cache(fd, id, data_type, entry_len, + total_entries, cache.entry_num, + (uintptr_t)data, ret ? errno : 0); + *entry_num = cache.entry_num; + + if (ret) { + error_setg_errno(errp, errno, "IOMMU_HWPT_INVALIDATE failed:" + " total %d entries, processed %d entries", + total_entries, cache.entry_num); + } else if (total_entries != cache.entry_num) { + error_setg(errp, "IOMMU_HWPT_INVALIDATE succeed but with unprocessed" + " entries: total %d entries, processed %d entries." + " Kernel BUG?!", total_entries, cache.entry_num); + return false; + } + + return !ret; +} + +bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + uint32_t hwpt_id, Error **errp) +{ + HostIOMMUDeviceIOMMUFDClass *idevc = + HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev); + + g_assert(idevc->attach_hwpt); + return idevc->attach_hwpt(idev, hwpt_id, errp); +} + +bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + Error **errp) +{ + HostIOMMUDeviceIOMMUFDClass *idevc = + HOST_IOMMU_DEVICE_IOMMUFD_GET_CLASS(idev); + + g_assert(idevc->detach_hwpt); + return idevc->detach_hwpt(idev, errp); +} + static int hiod_iommufd_get_cap(HostIOMMUDevice *hiod, int cap, Error **errp) { HostIOMMUDeviceCaps *caps = &hiod->caps; @@ -349,6 +405,8 @@ static const TypeInfo types[] = { }, { .name = TYPE_HOST_IOMMU_DEVICE_IOMMUFD, .parent = TYPE_HOST_IOMMU_DEVICE, + .instance_size = sizeof(HostIOMMUDeviceIOMMUFD), + .class_size = sizeof(HostIOMMUDeviceIOMMUFDClass), .class_init = hiod_iommufd_class_init, .abstract = true, } diff --git a/backends/tpm/tpm_emulator.c b/backends/tpm/tpm_emulator.c index 43d350e..4a234ab 100644 --- a/backends/tpm/tpm_emulator.c +++ b/backends/tpm/tpm_emulator.c @@ -129,11 +129,11 @@ static int tpm_emulator_ctrlcmd(TPMEmulator *tpm, unsigned long cmd, void *msg, CharBackend *dev = &tpm->ctrl_chr; uint32_t cmd_no = cpu_to_be32(cmd); ssize_t n = sizeof(uint32_t) + msg_len_in; - uint8_t *buf = NULL; ptm_res res; WITH_QEMU_LOCK_GUARD(&tpm->mutex) { - buf = g_alloca(n); + g_autofree uint8_t *buf = g_malloc(n); + memcpy(buf, &cmd_no, sizeof(cmd_no)); memcpy(buf + sizeof(cmd_no), msg, msg_len_in); diff --git a/backends/trace-events b/backends/trace-events index 40811a3..7278214 100644 --- a/backends/trace-events +++ b/backends/trace-events @@ -18,3 +18,4 @@ iommufd_backend_alloc_hwpt(int iommufd, uint32_t dev_id, uint32_t pt_id, uint32_ iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)" iommufd_backend_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%u enable=%d (%d)" iommufd_backend_get_dirty_bitmap(int iommufd, uint32_t hwpt_id, uint64_t iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%u iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)" +iommufd_backend_invalidate_cache(int iommufd, uint32_t id, uint32_t data_type, uint32_t entry_len, uint32_t entry_num, uint32_t done_num, uint64_t data_ptr, int ret) " iommufd=%d id=%u data_type=%u entry_len=%u entry_num=%u done_num=%u data_ptr=0x%"PRIx64" (%d)" @@ -106,9 +106,9 @@ static void bdrv_reopen_abort(BDRVReopenState *reopen_state); static bool bdrv_backing_overridden(BlockDriverState *bs); -static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp); +static bool GRAPH_RDLOCK +bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, + GHashTable *visited, Transaction *tran, Error **errp); /* If non-zero, use only whitelisted block drivers */ static int use_bdrv_whitelist; @@ -1226,9 +1226,10 @@ static int bdrv_child_cb_inactivate(BdrvChild *child) return 0; } -static bool bdrv_child_cb_change_aio_ctx(BdrvChild *child, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp) +static bool GRAPH_RDLOCK +bdrv_child_cb_change_aio_ctx(BdrvChild *child, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp) { BlockDriverState *bs = child->opaque; return bdrv_change_aio_context(bs, ctx, visited, tran, errp); @@ -1720,12 +1721,14 @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name, open_failed: bs->drv = NULL; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); if (bs->file != NULL) { bdrv_unref_child(bs, bs->file); assert(!bs->file); } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); g_free(bs->opaque); bs->opaque = NULL; @@ -3027,7 +3030,8 @@ static void GRAPH_WRLOCK bdrv_attach_child_common_abort(void *opaque) bdrv_replace_child_noperm(s->child, NULL); if (bdrv_get_aio_context(bs) != s->old_child_ctx) { - bdrv_try_change_aio_context(bs, s->old_child_ctx, NULL, &error_abort); + bdrv_try_change_aio_context_locked(bs, s->old_child_ctx, NULL, + &error_abort); } if (bdrv_child_get_parent_aio_context(s->child) != s->old_parent_ctx) { @@ -3069,6 +3073,9 @@ static TransactionActionDrv bdrv_attach_child_common_drv = { * * Both @parent_bs and @child_bs can move to a different AioContext in this * function. + * + * All block nodes must be drained before this function is called until after + * the transaction is finalized. */ static BdrvChild * GRAPH_WRLOCK bdrv_attach_child_common(BlockDriverState *child_bs, @@ -3112,8 +3119,8 @@ bdrv_attach_child_common(BlockDriverState *child_bs, parent_ctx = bdrv_child_get_parent_aio_context(new_child); if (child_ctx != parent_ctx) { Error *local_err = NULL; - int ret = bdrv_try_change_aio_context(child_bs, parent_ctx, NULL, - &local_err); + int ret = bdrv_try_change_aio_context_locked(child_bs, parent_ctx, NULL, + &local_err); if (ret < 0 && child_class->change_aio_ctx) { Transaction *aio_ctx_tran = tran_new(); @@ -3179,6 +3186,9 @@ bdrv_attach_child_common(BlockDriverState *child_bs, * * After calling this function, the transaction @tran may only be completed * while holding a writer lock for the graph. + * + * All block nodes must be drained before this function is called until after + * the transaction is finalized. */ static BdrvChild * GRAPH_WRLOCK bdrv_attach_child_noperm(BlockDriverState *parent_bs, @@ -3220,6 +3230,8 @@ bdrv_attach_child_noperm(BlockDriverState *parent_bs, * * On failure NULL is returned, errp is set and the reference to * child_bs is also dropped. + * + * All block nodes must be drained. */ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, const char *child_name, @@ -3259,6 +3271,8 @@ out: * * On failure NULL is returned, errp is set and the reference to * child_bs is also dropped. + * + * All block nodes must be drained. */ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, @@ -3293,7 +3307,11 @@ out: return ret < 0 ? NULL : child; } -/* Callers must ensure that child->frozen is false. */ +/* + * Callers must ensure that child->frozen is false. + * + * All block nodes must be drained. + */ void bdrv_root_unref_child(BdrvChild *child) { BlockDriverState *child_bs = child->bs; @@ -3314,8 +3332,8 @@ void bdrv_root_unref_child(BdrvChild *child) * When the parent requiring a non-default AioContext is removed, the * node moves back to the main AioContext */ - bdrv_try_change_aio_context(child_bs, qemu_get_aio_context(), NULL, - NULL); + bdrv_try_change_aio_context_locked(child_bs, qemu_get_aio_context(), + NULL, NULL); } bdrv_schedule_unref(child_bs); @@ -3388,7 +3406,11 @@ bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child, } } -/* Callers must ensure that child->frozen is false. */ +/* + * Callers must ensure that child->frozen is false. + * + * All block nodes must be drained. + */ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) { GLOBAL_STATE_CODE(); @@ -3453,6 +3475,9 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs) * * After calling this function, the transaction @tran may only be completed * while holding a writer lock for the graph. + * + * All block nodes must be drained before this function is called until after + * the transaction is finalized. */ static int GRAPH_WRLOCK bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs, @@ -3545,8 +3570,7 @@ out: * Both @bs and @backing_hd can move to a different AioContext in this * function. * - * If a backing child is already present (i.e. we're detaching a node), that - * child node must be drained. + * All block nodes must be drained. */ int bdrv_set_backing_hd_drained(BlockDriverState *bs, BlockDriverState *backing_hd, @@ -3575,21 +3599,14 @@ out: int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, Error **errp) { - BlockDriverState *drain_bs; int ret; GLOBAL_STATE_CODE(); - bdrv_graph_rdlock_main_loop(); - drain_bs = bs->backing ? bs->backing->bs : bs; - bdrv_graph_rdunlock_main_loop(); - - bdrv_ref(drain_bs); - bdrv_drained_begin(drain_bs); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp); bdrv_graph_wrunlock(); - bdrv_drained_end(drain_bs); - bdrv_unref(drain_bs); + bdrv_drain_all_end(); return ret; } @@ -3780,10 +3797,12 @@ static BdrvChild *bdrv_open_child_common(const char *filename, return NULL; } + bdrv_drain_all_begin(); bdrv_graph_wrlock(); child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role, errp); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); return child; } @@ -4358,9 +4377,7 @@ bdrv_recurse_has_child(BlockDriverState *bs, BlockDriverState *child) * returns a pointer to bs_queue, which is either the newly allocated * bs_queue, or the existing bs_queue being used. * - * bs is drained here and undrained by bdrv_reopen_queue_free(). - * - * To be called with bs->aio_context locked. + * bs must be drained. */ static BlockReopenQueue * GRAPH_RDLOCK bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs, @@ -4379,12 +4396,7 @@ bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs, GLOBAL_STATE_CODE(); - /* - * Strictly speaking, draining is illegal under GRAPH_RDLOCK. We know that - * we've been called with bdrv_graph_rdlock_main_loop(), though, so it's ok - * in practice. - */ - bdrv_drained_begin(bs); + assert(bs->quiesce_counter > 0); if (bs_queue == NULL) { bs_queue = g_new0(BlockReopenQueue, 1); @@ -4519,12 +4531,17 @@ bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, BlockDriverState *bs, return bs_queue; } -/* To be called with bs->aio_context locked */ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, BlockDriverState *bs, QDict *options, bool keep_old_opts) { GLOBAL_STATE_CODE(); + + if (bs_queue == NULL) { + /* Paired with bdrv_drain_all_end() in bdrv_reopen_queue_free(). */ + bdrv_drain_all_begin(); + } + GRAPH_RDLOCK_GUARD_MAINLOOP(); return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false, @@ -4537,12 +4554,14 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) if (bs_queue) { BlockReopenQueueEntry *bs_entry, *next; QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { - bdrv_drained_end(bs_entry->state.bs); qobject_unref(bs_entry->state.explicit_options); qobject_unref(bs_entry->state.options); g_free(bs_entry); } g_free(bs_queue); + + /* Paired with bdrv_drain_all_begin() in bdrv_reopen_queue(). */ + bdrv_drain_all_end(); } } @@ -4709,6 +4728,9 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only, * Return 0 on success, otherwise return < 0 and set @errp. * * @reopen_state->bs can move to a different AioContext in this function. + * + * All block nodes must be drained before this function is called until after + * the transaction is finalized. */ static int GRAPH_UNLOCKED bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, @@ -4802,7 +4824,7 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, if (old_child_bs) { bdrv_ref(old_child_bs); - bdrv_drained_begin(old_child_bs); + assert(old_child_bs->quiesce_counter > 0); } bdrv_graph_rdunlock_main_loop(); @@ -4814,7 +4836,6 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state, bdrv_graph_wrunlock(); if (old_child_bs) { - bdrv_drained_end(old_child_bs); bdrv_unref(old_child_bs); } @@ -4843,6 +4864,9 @@ out_rdlock: * * After calling this function, the transaction @change_child_tran may only be * completed while holding a writer lock for the graph. + * + * All block nodes must be drained before this function is called until after + * the transaction is finalized. */ static int GRAPH_UNLOCKED bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, @@ -5156,6 +5180,7 @@ static void bdrv_close(BlockDriverState *bs) bs->drv = NULL; } + bdrv_drain_all_begin(); bdrv_graph_wrlock(); QLIST_FOREACH_SAFE(child, &bs->children, next, next) { bdrv_unref_child(bs, child); @@ -5164,6 +5189,7 @@ static void bdrv_close(BlockDriverState *bs) assert(!bs->backing); assert(!bs->file); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); g_free(bs->opaque); bs->opaque = NULL; @@ -5489,9 +5515,7 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, assert(!bs_new->backing); bdrv_graph_rdunlock_main_loop(); - bdrv_drained_begin(bs_top); - bdrv_drained_begin(bs_new); - + bdrv_drain_all_begin(); bdrv_graph_wrlock(); child = bdrv_attach_child_noperm(bs_new, bs_top, "backing", @@ -5513,9 +5537,7 @@ out: bdrv_refresh_limits(bs_top, NULL, NULL); bdrv_graph_wrunlock(); - - bdrv_drained_end(bs_top); - bdrv_drained_end(bs_new); + bdrv_drain_all_end(); return ret; } @@ -6989,6 +7011,8 @@ bdrv_inactivate_recurse(BlockDriverState *bs, bool top_level) GLOBAL_STATE_CODE(); + assert(bs->quiesce_counter > 0); + if (!bs->drv) { return -ENOMEDIUM; } @@ -7032,9 +7056,7 @@ bdrv_inactivate_recurse(BlockDriverState *bs, bool top_level) return -EPERM; } - bdrv_drained_begin(bs); bs->open_flags |= BDRV_O_INACTIVE; - bdrv_drained_end(bs); /* * Update permissions, they may differ for inactive nodes. @@ -7059,20 +7081,26 @@ int bdrv_inactivate(BlockDriverState *bs, Error **errp) int ret; GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); + + bdrv_drain_all_begin(); + bdrv_graph_rdlock_main_loop(); if (bdrv_has_bds_parent(bs, true)) { error_setg(errp, "Node has active parent node"); - return -EPERM; + ret = -EPERM; + goto out; } ret = bdrv_inactivate_recurse(bs, true); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to inactivate node"); - return ret; + goto out; } - return 0; +out: + bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_end(); + return ret; } int bdrv_inactivate_all(void) @@ -7082,7 +7110,9 @@ int bdrv_inactivate_all(void) int ret = 0; GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); + + bdrv_drain_all_begin(); + bdrv_graph_rdlock_main_loop(); for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { /* Nodes with BDS parents are covered by recursion from the last @@ -7098,6 +7128,9 @@ int bdrv_inactivate_all(void) } } + bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_end(); + return ret; } @@ -7278,10 +7311,6 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs) return true; } -/* - * Must not be called while holding the lock of an AioContext other than the - * current one. - */ void bdrv_img_create(const char *filename, const char *fmt, const char *base_filename, const char *base_fmt, char *options, uint64_t img_size, int flags, bool quiet, @@ -7568,10 +7597,21 @@ typedef struct BdrvStateSetAioContext { BlockDriverState *bs; } BdrvStateSetAioContext; -static bool bdrv_parent_change_aio_context(BdrvChild *c, AioContext *ctx, - GHashTable *visited, - Transaction *tran, - Error **errp) +/* + * Changes the AioContext of @child to @ctx and recursively for the associated + * block nodes and all their children and parents. Returns true if the change is + * possible and the transaction @tran can be continued. Returns false and sets + * @errp if not and the transaction must be aborted. + * + * @visited will accumulate all visited BdrvChild objects. The caller is + * responsible for freeing the list afterwards. + * + * Must be called with the affected block nodes drained. + */ +static bool GRAPH_RDLOCK +bdrv_parent_change_aio_context(BdrvChild *c, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp) { GLOBAL_STATE_CODE(); if (g_hash_table_contains(visited, c)) { @@ -7596,6 +7636,17 @@ static bool bdrv_parent_change_aio_context(BdrvChild *c, AioContext *ctx, return true; } +/* + * Changes the AioContext of @c->bs to @ctx and recursively for all its children + * and parents. Returns true if the change is possible and the transaction @tran + * can be continued. Returns false and sets @errp if not and the transaction + * must be aborted. + * + * @visited will accumulate all visited BdrvChild objects. The caller is + * responsible for freeing the list afterwards. + * + * Must be called with the affected block nodes drained. + */ bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, GHashTable *visited, Transaction *tran, Error **errp) @@ -7611,10 +7662,6 @@ bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, static void bdrv_set_aio_context_clean(void *opaque) { BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque; - BlockDriverState *bs = (BlockDriverState *) state->bs; - - /* Paired with bdrv_drained_begin in bdrv_change_aio_context() */ - bdrv_drained_end(bs); g_free(state); } @@ -7642,10 +7689,12 @@ static TransactionActionDrv set_aio_context = { * * @visited will accumulate all visited BdrvChild objects. The caller is * responsible for freeing the list afterwards. + * + * @bs must be drained. */ -static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp) +static bool GRAPH_RDLOCK +bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, + GHashTable *visited, Transaction *tran, Error **errp) { BdrvChild *c; BdrvStateSetAioContext *state; @@ -7656,21 +7705,17 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, return true; } - bdrv_graph_rdlock_main_loop(); QLIST_FOREACH(c, &bs->parents, next_parent) { if (!bdrv_parent_change_aio_context(c, ctx, visited, tran, errp)) { - bdrv_graph_rdunlock_main_loop(); return false; } } QLIST_FOREACH(c, &bs->children, next) { if (!bdrv_child_change_aio_context(c, ctx, visited, tran, errp)) { - bdrv_graph_rdunlock_main_loop(); return false; } } - bdrv_graph_rdunlock_main_loop(); state = g_new(BdrvStateSetAioContext, 1); *state = (BdrvStateSetAioContext) { @@ -7678,8 +7723,7 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, .bs = bs, }; - /* Paired with bdrv_drained_end in bdrv_set_aio_context_clean() */ - bdrv_drained_begin(bs); + assert(bs->quiesce_counter > 0); tran_add(tran, &set_aio_context, state); @@ -7692,9 +7736,13 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx, * * If ignore_child is not NULL, that child (and its subgraph) will not * be touched. + * + * Called with the graph lock held. + * + * Called while all bs are drained. */ -int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, - BdrvChild *ignore_child, Error **errp) +int bdrv_try_change_aio_context_locked(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp) { Transaction *tran; GHashTable *visited; @@ -7703,9 +7751,9 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, /* * Recursion phase: go through all nodes of the graph. - * Take care of checking that all nodes support changing AioContext - * and drain them, building a linear list of callbacks to run if everything - * is successful (the transaction itself). + * Take care of checking that all nodes support changing AioContext, + * building a linear list of callbacks to run if everything is successful + * (the transaction itself). */ tran = tran_new(); visited = g_hash_table_new(NULL, NULL); @@ -7732,6 +7780,29 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, return 0; } +/* + * Change bs's and recursively all of its parents' and children's AioContext + * to the given new context, returning an error if that isn't possible. + * + * If ignore_child is not NULL, that child (and its subgraph) will not + * be touched. + */ +int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp) +{ + int ret; + + GLOBAL_STATE_CODE(); + + bdrv_drain_all_begin(); + bdrv_graph_rdlock_main_loop(); + ret = bdrv_try_change_aio_context_locked(bs, ctx, ignore_child, errp); + bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_end(); + + return ret; +} + void bdrv_add_aio_context_notifier(BlockDriverState *bs, void (*attached_aio_context)(AioContext *new_context, void *opaque), void (*detach_aio_context)(void *opaque), void *opaque) @@ -8159,8 +8230,10 @@ char *bdrv_dirname(BlockDriverState *bs, Error **errp) } /* - * Hot add/remove a BDS's child. So the user can take a child offline when - * it is broken and take a new child online + * Hot add a BDS's child. Used in combination with bdrv_del_child, so the user + * can take a child offline when it is broken and take a new child online. + * + * All block nodes must be drained. */ void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, Error **errp) @@ -8200,6 +8273,12 @@ void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs, parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp); } +/* + * Hot remove a BDS's child. Used in combination with bdrv_add_child, so the + * user can take a child offline when it is broken and take a new child online. + * + * All block nodes must be drained. + */ void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp) { BdrvChild *tmp; diff --git a/block/backup.c b/block/backup.c index 0151e84..909027c 100644 --- a/block/backup.c +++ b/block/backup.c @@ -498,10 +498,12 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, block_copy_set_speed(bcs, speed); /* Required permissions are taken by copy-before-write filter target */ + bdrv_drain_all_begin(); bdrv_graph_wrlock(); block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); return &job->common; diff --git a/block/blklogwrites.c b/block/blklogwrites.c index b0f78c4..70ac76f 100644 --- a/block/blklogwrites.c +++ b/block/blklogwrites.c @@ -281,9 +281,11 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, ret = 0; fail_log: if (ret < 0) { + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, s->log_file); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); s->log_file = NULL; qemu_mutex_destroy(&s->mutex); } @@ -296,10 +298,12 @@ static void blk_log_writes_close(BlockDriverState *bs) { BDRVBlkLogWritesState *s = bs->opaque; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, s->log_file); s->log_file = NULL; bdrv_graph_wrunlock(); + bdrv_drain_all_end(); qemu_mutex_destroy(&s->mutex); } diff --git a/block/blkverify.c b/block/blkverify.c index db79a36..3a71f74 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -151,10 +151,12 @@ static void blkverify_close(BlockDriverState *bs) { BDRVBlkverifyState *s = bs->opaque; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, s->test_file); s->test_file = NULL; bdrv_graph_wrunlock(); + bdrv_drain_all_end(); } static int64_t coroutine_fn GRAPH_RDLOCK diff --git a/block/block-backend.c b/block/block-backend.c index a402db1..68209bb 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -136,9 +136,9 @@ static void blk_root_drained_end(BdrvChild *child); static void blk_root_change_media(BdrvChild *child, bool load); static void blk_root_resize(BdrvChild *child); -static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp); +static bool GRAPH_RDLOCK +blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx, GHashTable *visited, + Transaction *tran, Error **errp); static char *blk_root_get_parent_desc(BdrvChild *child) { @@ -889,9 +889,11 @@ void blk_remove_bs(BlockBackend *blk) root = blk->root; blk->root = NULL; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_root_unref_child(root); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); } /* @@ -904,6 +906,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) GLOBAL_STATE_CODE(); bdrv_ref(bs); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); if ((bs->open_flags & BDRV_O_INACTIVE) && blk_can_inactivate(blk)) { @@ -919,6 +922,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, perm, shared_perm, blk, errp); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); if (blk->root == NULL) { return -EPERM; } diff --git a/block/commit.c b/block/commit.c index 7cc8c0f..6c4b736 100644 --- a/block/commit.c +++ b/block/commit.c @@ -392,6 +392,7 @@ void commit_start(const char *job_id, BlockDriverState *bs, * this is the responsibility of the interface (i.e. whoever calls * commit_start()). */ + bdrv_drain_all_begin(); bdrv_graph_wrlock(); s->base_overlay = bdrv_find_overlay(top, base); assert(s->base_overlay); @@ -424,18 +425,21 @@ void commit_start(const char *job_id, BlockDriverState *bs, iter_shared_perms, errp); if (ret < 0) { bdrv_graph_wrunlock(); + bdrv_drain_all_end(); goto fail; } } if (bdrv_freeze_backing_chain(commit_top_bs, base, errp) < 0) { bdrv_graph_wrunlock(); + bdrv_drain_all_end(); goto fail; } s->chain_frozen = true; ret = block_job_add_bdrv(&s->common, "base", base, 0, BLK_PERM_ALL, errp); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); if (ret < 0) { goto fail; diff --git a/block/file-posix.c b/block/file-posix.c index ec95b74..9b5f08c 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -41,6 +41,7 @@ #include "scsi/pr-manager.h" #include "scsi/constants.h" +#include "scsi/utils.h" #if defined(__APPLE__) && (__MACH__) #include <sys/ioctl.h> @@ -72,6 +73,7 @@ #include <linux/blkzoned.h> #endif #include <linux/cdrom.h> +#include <linux/dm-ioctl.h> #include <linux/fd.h> #include <linux/fs.h> #include <linux/hdreg.h> @@ -138,6 +140,22 @@ #define RAW_LOCK_PERM_BASE 100 #define RAW_LOCK_SHARED_BASE 200 +/* + * Multiple retries are mostly meant for two separate scenarios: + * + * - DM_MPATH_PROBE_PATHS returns success, but before SG_IO completes, another + * path goes down. + * + * - DM_MPATH_PROBE_PATHS failed all paths in the current path group, so we have + * to send another SG_IO to switch to another path group to probe the paths in + * it. + * + * Even if each path is in a separate path group (path_grouping_policy set to + * failover), it's rare to have more than eight path groups - and even then + * pretty unlikely that only bad path groups would be chosen in eight retries. + */ +#define SG_IO_MAX_RETRIES 8 + typedef struct BDRVRawState { int fd; bool use_lock; @@ -165,6 +183,7 @@ typedef struct BDRVRawState { bool use_linux_aio:1; bool has_laio_fdsync:1; bool use_linux_io_uring:1; + bool use_mpath:1; int page_cache_inconsistent; /* errno from fdatasync failure */ bool has_fallocate; bool needs_alignment; @@ -785,17 +804,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, } #endif - if (S_ISBLK(st.st_mode)) { -#ifdef __linux__ - /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache. Do - * not rely on the contents of discarded blocks unless using O_DIRECT. - * Same for BLKZEROOUT. - */ - if (!(bs->open_flags & BDRV_O_NOCACHE)) { - s->has_write_zeroes = false; - } -#endif - } #ifdef __FreeBSD__ if (S_ISCHR(st.st_mode)) { /* @@ -4264,15 +4272,105 @@ hdev_open_Mac_error: /* Since this does ioctl the device must be already opened */ bs->sg = hdev_is_sg(bs); + /* sg devices aren't even block devices and can't use dm-mpath */ + s->use_mpath = !bs->sg; + return ret; } #if defined(__linux__) +#if defined(DM_MPATH_PROBE_PATHS) +static bool coroutine_fn sgio_path_error(int ret, sg_io_hdr_t *io_hdr) +{ + if (ret < 0) { + switch (ret) { + case -ENODEV: + return true; + case -EAGAIN: + /* + * The device is probably suspended. This happens while the dm table + * is reloaded, e.g. because a path is added or removed. This is an + * operation that should complete within 1ms, so just wait a bit and + * retry. + * + * If the device was suspended for another reason, we'll wait and + * retry SG_IO_MAX_RETRIES times. This is a tolerable delay before + * we return an error and potentially stop the VM. + */ + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); + return true; + default: + return false; + } + } + + if (io_hdr->host_status != SCSI_HOST_OK) { + return true; + } + + switch (io_hdr->status) { + case GOOD: + case CONDITION_GOOD: + case INTERMEDIATE_GOOD: + case INTERMEDIATE_C_GOOD: + case RESERVATION_CONFLICT: + case COMMAND_TERMINATED: + return false; + case CHECK_CONDITION: + return !scsi_sense_buf_is_guest_recoverable(io_hdr->sbp, + io_hdr->mx_sb_len); + default: + return true; + } +} + +static bool coroutine_fn hdev_co_ioctl_sgio_retry(RawPosixAIOData *acb, int ret) +{ + BDRVRawState *s = acb->bs->opaque; + RawPosixAIOData probe_acb; + + if (!s->use_mpath) { + return false; + } + + if (!sgio_path_error(ret, acb->ioctl.buf)) { + return false; + } + + probe_acb = (RawPosixAIOData) { + .bs = acb->bs, + .aio_type = QEMU_AIO_IOCTL, + .aio_fildes = s->fd, + .aio_offset = 0, + .ioctl = { + .buf = NULL, + .cmd = DM_MPATH_PROBE_PATHS, + }, + }; + + ret = raw_thread_pool_submit(handle_aiocb_ioctl, &probe_acb); + if (ret == -ENOTTY) { + s->use_mpath = false; + } else if (ret == -EAGAIN) { + /* The device might be suspended for a table reload, worth retrying */ + return true; + } + + return ret == 0; +} +#else +static bool coroutine_fn hdev_co_ioctl_sgio_retry(RawPosixAIOData *acb, int ret) +{ + return false; +} +#endif /* DM_MPATH_PROBE_PATHS */ + static int coroutine_fn hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) { BDRVRawState *s = bs->opaque; RawPosixAIOData acb; + int retries = SG_IO_MAX_RETRIES; int ret; ret = fd_open(bs); @@ -4300,7 +4398,11 @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) }, }; - return raw_thread_pool_submit(handle_aiocb_ioctl, &acb); + do { + ret = raw_thread_pool_submit(handle_aiocb_ioctl, &acb); + } while (req == SG_IO && retries-- && hdev_co_ioctl_sgio_retry(&acb, ret)); + + return ret; } #endif /* linux */ @@ -413,7 +413,6 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) /* At this point, we should be always running in the main loop. */ GLOBAL_STATE_CODE(); assert(bs->quiesce_counter > 0); - GLOBAL_STATE_CODE(); /* Re-enable things in child-to-parent order */ old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); diff --git a/block/linux-aio.c b/block/linux-aio.c index 407369f..c200e7a 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -291,7 +291,7 @@ static void ioq_submit(LinuxAioState *s) { int ret, len; struct qemu_laiocb *aiocb; - struct iocb *iocbs[MAX_EVENTS]; + QEMU_UNINITIALIZED struct iocb *iocbs[MAX_EVENTS]; QSIMPLEQ_HEAD(, qemu_laiocb) completed; do { diff --git a/block/mirror.c b/block/mirror.c index c2c5099..6e8caf4 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -2014,6 +2014,7 @@ static BlockJob *mirror_start_job( */ bdrv_disable_dirty_bitmap(s->dirty_bitmap); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); ret = block_job_add_bdrv(&s->common, "source", bs, 0, BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE | @@ -2021,6 +2022,7 @@ static BlockJob *mirror_start_job( errp); if (ret < 0) { bdrv_graph_wrunlock(); + bdrv_drain_all_end(); goto fail; } @@ -2066,16 +2068,19 @@ static BlockJob *mirror_start_job( iter_shared_perms, errp); if (ret < 0) { bdrv_graph_wrunlock(); + bdrv_drain_all_end(); goto fail; } } if (bdrv_freeze_backing_chain(mirror_top_bs, target, errp) < 0) { bdrv_graph_wrunlock(); + bdrv_drain_all_end(); goto fail; } } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); QTAILQ_INIT(&s->ops_in_flight); diff --git a/block/qcow2.c b/block/qcow2.c index 66fba89..45451a7 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1895,7 +1895,9 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, g_free(s->image_data_file); if (open_data_file && has_data_file(bs)) { bdrv_graph_co_rdunlock(); + bdrv_drain_all_begin(); bdrv_co_unref_child(bs, s->data_file); + bdrv_drain_all_end(); bdrv_graph_co_rdlock(); s->data_file = NULL; } @@ -2821,9 +2823,11 @@ qcow2_do_close(BlockDriverState *bs, bool close_data_file) if (close_data_file && has_data_file(bs)) { GLOBAL_STATE_CODE(); bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, s->data_file); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); s->data_file = NULL; bdrv_graph_rdlock_main_loop(); } diff --git a/block/quorum.c b/block/quorum.c index ed8ce80..cc3bc5f 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -1037,6 +1037,7 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, close_exit: /* cleanup on error */ + bdrv_drain_all_begin(); bdrv_graph_wrlock(); for (i = 0; i < s->num_children; i++) { if (!opened[i]) { @@ -1045,6 +1046,7 @@ close_exit: bdrv_unref_child(bs, s->children[i]); } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); g_free(s->children); g_free(opened); exit: @@ -1057,11 +1059,13 @@ static void quorum_close(BlockDriverState *bs) BDRVQuorumState *s = bs->opaque; int i; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); for (i = 0; i < s->num_children; i++) { bdrv_unref_child(bs, s->children[i]); } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); g_free(s->children); } diff --git a/block/replication.c b/block/replication.c index 07f274d..0879718 100644 --- a/block/replication.c +++ b/block/replication.c @@ -540,6 +540,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, return; } + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_ref(hidden_disk->bs); @@ -549,6 +550,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, if (local_err) { error_propagate(errp, local_err); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); return; } @@ -559,6 +561,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, if (local_err) { error_propagate(errp, local_err); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); return; } @@ -571,12 +574,14 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode, !check_top_bs(top_bs, bs)) { error_setg(errp, "No top_bs or it is invalid"); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); reopen_backing_file(bs, false, NULL); return; } bdrv_op_block_all(top_bs, s->blocker); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); s->backup_job = backup_job_create( NULL, s->secondary_disk->bs, s->hidden_disk->bs, @@ -651,12 +656,14 @@ static void replication_done(void *opaque, int ret) if (ret == 0) { s->stage = BLOCK_REPLICATION_DONE; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, s->secondary_disk); s->secondary_disk = NULL; bdrv_unref_child(bs, s->hidden_disk); s->hidden_disk = NULL; bdrv_graph_wrunlock(); + bdrv_drain_all_end(); s->error = 0; } else { diff --git a/block/snapshot.c b/block/snapshot.c index 22567f1..28c9c43 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -291,9 +291,11 @@ int bdrv_snapshot_goto(BlockDriverState *bs, } /* .bdrv_open() will re-attach it */ + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, fallback); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp); memset(bs->opaque, 0, drv->instance_size); @@ -327,7 +329,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs, /** * Delete an internal snapshot by @snapshot_id and @name. - * @bs: block device used in the operation + * @bs: block device used in the operation, must be drained * @snapshot_id: unique snapshot ID, or NULL * @name: snapshot name, or NULL * @errp: location to store error @@ -358,6 +360,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs, GLOBAL_STATE_CODE(); + assert(bs->quiesce_counter > 0); + if (!drv) { error_setg(errp, "Device '%s' has no medium", bdrv_get_device_name(bs)); @@ -368,9 +372,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs, return -EINVAL; } - /* drain all pending i/o before deleting snapshot */ - bdrv_drained_begin(bs); - if (drv->bdrv_snapshot_delete) { ret = drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp); } else if (fallback_bs) { @@ -382,7 +383,6 @@ int bdrv_snapshot_delete(BlockDriverState *bs, ret = -ENOTSUP; } - bdrv_drained_end(bs); return ret; } @@ -571,19 +571,22 @@ int bdrv_all_delete_snapshot(const char *name, ERRP_GUARD(); g_autoptr(GList) bdrvs = NULL; GList *iterbdrvs; + int ret = 0; GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); - if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) { - return -1; + bdrv_drain_all_begin(); + bdrv_graph_rdlock_main_loop(); + + ret = bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp); + if (ret < 0) { + goto out; } iterbdrvs = bdrvs; while (iterbdrvs) { BlockDriverState *bs = iterbdrvs->data; QEMUSnapshotInfo sn1, *snapshot = &sn1; - int ret = 0; if ((devices || bdrv_all_snapshots_includes_bs(bs)) && bdrv_snapshot_find(bs, snapshot, name) >= 0) @@ -594,13 +597,16 @@ int bdrv_all_delete_snapshot(const char *name, if (ret < 0) { error_prepend(errp, "Could not delete snapshot '%s' on '%s': ", name, bdrv_get_device_or_node_name(bs)); - return -1; + goto out; } iterbdrvs = iterbdrvs->next; } - return 0; +out: + bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_end(); + return ret; } diff --git a/block/stream.c b/block/stream.c index 999d9e5..f5441f2 100644 --- a/block/stream.c +++ b/block/stream.c @@ -80,11 +80,10 @@ static int stream_prepare(Job *job) * may end up working with the wrong base node (or it might even have gone * away by the time we want to use it). */ - bdrv_drained_begin(unfiltered_bs); if (unfiltered_bs_cow) { bdrv_ref(unfiltered_bs_cow); - bdrv_drained_begin(unfiltered_bs_cow); } + bdrv_drain_all_begin(); bdrv_graph_rdlock_main_loop(); base = bdrv_filter_or_cow_bs(s->above_base); @@ -123,11 +122,10 @@ static int stream_prepare(Job *job) } out: + bdrv_drain_all_end(); if (unfiltered_bs_cow) { - bdrv_drained_end(unfiltered_bs_cow); bdrv_unref(unfiltered_bs_cow); } - bdrv_drained_end(unfiltered_bs); return ret; } @@ -373,10 +371,12 @@ void stream_start(const char *job_id, BlockDriverState *bs, * already have our own plans. Also don't allow resize as the image size is * queried only at the job start and then cached. */ + bdrv_drain_all_begin(); bdrv_graph_wrlock(); if (block_job_add_bdrv(&s->common, "active node", bs, 0, basic_flags | BLK_PERM_WRITE, errp)) { bdrv_graph_wrunlock(); + bdrv_drain_all_end(); goto fail; } @@ -397,10 +397,12 @@ void stream_start(const char *job_id, BlockDriverState *bs, basic_flags, errp); if (ret < 0) { bdrv_graph_wrunlock(); + bdrv_drain_all_end(); goto fail; } } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); s->base_overlay = base_overlay; s->above_base = above_base; diff --git a/block/vmdk.c b/block/vmdk.c index 9c7ab03..89a7250 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -271,6 +271,7 @@ static void vmdk_free_extents(BlockDriverState *bs) BDRVVmdkState *s = bs->opaque; VmdkExtent *e; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); for (i = 0; i < s->num_extents; i++) { e = &s->extents[i]; @@ -283,6 +284,7 @@ static void vmdk_free_extents(BlockDriverState *bs) } } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); g_free(s->extents); } @@ -1247,9 +1249,11 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, 0, 0, 0, 0, 0, &extent, errp); if (ret < 0) { bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, extent_file); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); bdrv_graph_rdlock_main_loop(); goto out; } @@ -1266,9 +1270,11 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, g_free(buf); if (ret) { bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, extent_file); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); bdrv_graph_rdlock_main_loop(); goto out; } @@ -1277,9 +1283,11 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp); if (ret) { bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, extent_file); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); bdrv_graph_rdlock_main_loop(); goto out; } @@ -1287,9 +1295,11 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options, } else { error_setg(errp, "Unsupported extent type '%s'", type); bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(bs, extent_file); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); bdrv_graph_rdlock_main_loop(); ret = -ENOTSUP; goto out; @@ -1132,39 +1132,41 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, int ret; GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); + + bdrv_drain_all_begin(); + bdrv_graph_rdlock_main_loop(); bs = qmp_get_root_bs(device, errp); if (!bs) { - return NULL; + goto error; } if (!id && !name) { error_setg(errp, "Name or id must be provided"); - return NULL; + goto error; } if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) { - return NULL; + goto error; } ret = bdrv_snapshot_find_by_id_and_name(bs, id, name, &sn, &local_err); if (local_err) { error_propagate(errp, local_err); - return NULL; + goto error; } if (!ret) { error_setg(errp, "Snapshot with id '%s' and name '%s' does not exist on " "device '%s'", STR_OR_NULL(id), STR_OR_NULL(name), device); - return NULL; + goto error; } bdrv_snapshot_delete(bs, id, name, &local_err); if (local_err) { error_propagate(errp, local_err); - return NULL; + goto error; } info = g_new0(SnapshotInfo, 1); @@ -1180,6 +1182,9 @@ SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device, info->has_icount = true; } +error: + bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_end(); return info; } @@ -1203,7 +1208,7 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, Error *local_err = NULL; const char *device; const char *name; - BlockDriverState *bs; + BlockDriverState *bs, *check_bs; QEMUSnapshotInfo old_sn, *sn; bool ret; int64_t rt; @@ -1211,7 +1216,7 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, int ret1; GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); + bdrv_graph_rdlock_main_loop(); tran_add(tran, &internal_snapshot_drv, state); @@ -1220,14 +1225,29 @@ static void internal_snapshot_action(BlockdevSnapshotInternal *internal, bs = qmp_get_root_bs(device, errp); if (!bs) { + bdrv_graph_rdunlock_main_loop(); return; } state->bs = bs; + /* Need to drain while unlocked. */ + bdrv_graph_rdunlock_main_loop(); /* Paired with .clean() */ bdrv_drained_begin(bs); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + + /* Make sure the root bs did not change with the drain. */ + check_bs = qmp_get_root_bs(device, errp); + if (bs != check_bs) { + if (check_bs) { + error_setg(errp, "Block node of device '%s' unexpectedly changed", + device); + } /* else errp is already set */ + return; + } + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, errp)) { return; } @@ -1295,12 +1315,14 @@ static void internal_snapshot_abort(void *opaque) Error *local_error = NULL; GLOBAL_STATE_CODE(); - GRAPH_RDLOCK_GUARD_MAINLOOP(); if (!state->created) { return; } + bdrv_drain_all_begin(); + bdrv_graph_rdlock_main_loop(); + if (bdrv_snapshot_delete(bs, sn->id_str, sn->name, &local_error) < 0) { error_reportf_err(local_error, "Failed to delete snapshot with id '%s' and " @@ -1308,6 +1330,8 @@ static void internal_snapshot_abort(void *opaque) sn->id_str, sn->name, bdrv_get_device_name(bs)); } + bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_end(); } static void internal_snapshot_clean(void *opaque) @@ -1353,9 +1377,10 @@ static void external_snapshot_action(TransactionAction *action, const char *new_image_file; ExternalSnapshotState *state = g_new0(ExternalSnapshotState, 1); uint64_t perm, shared; + BlockDriverState *check_bs; /* TODO We'll eventually have to take a writer lock in this function */ - GRAPH_RDLOCK_GUARD_MAINLOOP(); + bdrv_graph_rdlock_main_loop(); tran_add(tran, &external_snapshot_drv, state); @@ -1388,11 +1413,25 @@ static void external_snapshot_action(TransactionAction *action, state->old_bs = bdrv_lookup_bs(device, node_name, errp); if (!state->old_bs) { + bdrv_graph_rdunlock_main_loop(); return; } + /* Need to drain while unlocked. */ + bdrv_graph_rdunlock_main_loop(); /* Paired with .clean() */ bdrv_drained_begin(state->old_bs); + GRAPH_RDLOCK_GUARD_MAINLOOP(); + + /* Make sure the associated bs did not change with the drain. */ + check_bs = bdrv_lookup_bs(device, node_name, errp); + if (state->old_bs != check_bs) { + if (check_bs) { + error_setg(errp, "Block node of device '%s' unexpectedly changed", + device); + } /* else errp is already set */ + return; + } if (!bdrv_is_inserted(state->old_bs)) { error_setg(errp, "Device '%s' has no medium", @@ -3522,6 +3561,7 @@ void qmp_x_blockdev_change(const char *parent, const char *child, BlockDriverState *parent_bs, *new_bs = NULL; BdrvChild *p_child; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); parent_bs = bdrv_lookup_bs(parent, parent, errp); @@ -3559,6 +3599,7 @@ void qmp_x_blockdev_change(const char *parent, const char *child, out: bdrv_graph_wrunlock(); + bdrv_drain_all_end(); } BlockJobInfoList *qmp_query_block_jobs(Error **errp) @@ -3592,12 +3633,13 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, AioContext *new_context; BlockDriverState *bs; - GRAPH_RDLOCK_GUARD_MAINLOOP(); + bdrv_drain_all_begin(); + bdrv_graph_rdlock_main_loop(); bs = bdrv_find_node(node_name); if (!bs) { error_setg(errp, "Failed to find node with node-name='%s'", node_name); - return; + goto out; } /* Protects against accidents. */ @@ -3605,14 +3647,14 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, error_setg(errp, "Node %s is associated with a BlockBackend and could " "be in use (use force=true to override this check)", node_name); - return; + goto out; } if (iothread->type == QTYPE_QSTRING) { IOThread *obj = iothread_by_id(iothread->u.s); if (!obj) { error_setg(errp, "Cannot find iothread %s", iothread->u.s); - return; + goto out; } new_context = iothread_get_aio_context(obj); @@ -3620,7 +3662,11 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, new_context = qemu_get_aio_context(); } - bdrv_try_change_aio_context(bs, new_context, NULL, errp); + bdrv_try_change_aio_context_locked(bs, new_context, NULL, errp); + +out: + bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_end(); } QemuOptsList qemu_common_drive_opts = { @@ -144,9 +144,9 @@ static TransactionActionDrv change_child_job_context = { .clean = g_free, }; -static bool child_job_change_aio_ctx(BdrvChild *c, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp) +static bool GRAPH_RDLOCK +child_job_change_aio_ctx(BdrvChild *c, AioContext *ctx, GHashTable *visited, + Transaction *tran, Error **errp) { BlockJob *job = c->opaque; BdrvStateChildJobContext *s; @@ -198,6 +198,7 @@ void block_job_remove_all_bdrv(BlockJob *job) * one to make sure that such a concurrent access does not attempt * to process an already freed BdrvChild. */ + bdrv_drain_all_begin(); bdrv_graph_wrlock(); while (job->nodes) { GSList *l = job->nodes; @@ -211,6 +212,7 @@ void block_job_remove_all_bdrv(BlockJob *job) g_slist_free_1(l); } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); } bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs) @@ -496,6 +498,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, int ret; GLOBAL_STATE_CODE(); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); if (job_id == NULL && !(flags & JOB_INTERNAL)) { @@ -506,6 +509,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, flags, cb, opaque, errp); if (job == NULL) { bdrv_graph_wrunlock(); + bdrv_drain_all_end(); return NULL; } @@ -544,10 +548,12 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver, } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); return job; fail: bdrv_graph_wrunlock(); + bdrv_drain_all_end(); job_early_fail(&job->job); return NULL; } diff --git a/chardev/char-fd.c b/chardev/char-fd.c index 23bfe3c..6f03adf 100644 --- a/chardev/char-fd.c +++ b/chardev/char-fd.c @@ -50,7 +50,7 @@ static gboolean fd_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) Chardev *chr = CHARDEV(opaque); FDChardev *s = FD_CHARDEV(opaque); int len; - uint8_t buf[CHR_READ_BUF_LEN]; + QEMU_UNINITIALIZED uint8_t buf[CHR_READ_BUF_LEN]; ssize_t ret; len = sizeof(buf); diff --git a/chardev/char-pty.c b/chardev/char-pty.c index c28554e..674e9b3 100644 --- a/chardev/char-pty.c +++ b/chardev/char-pty.c @@ -154,7 +154,7 @@ static gboolean pty_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) Chardev *chr = CHARDEV(opaque); PtyChardev *s = PTY_CHARDEV(opaque); gsize len; - uint8_t buf[CHR_READ_BUF_LEN]; + QEMU_UNINITIALIZED uint8_t buf[CHR_READ_BUF_LEN]; ssize_t ret; len = sizeof(buf); diff --git a/chardev/char-socket.c b/chardev/char-socket.c index e8dd293..1e83139 100644 --- a/chardev/char-socket.c +++ b/chardev/char-socket.c @@ -497,7 +497,7 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) { Chardev *chr = CHARDEV(opaque); SocketChardev *s = SOCKET_CHARDEV(opaque); - uint8_t buf[CHR_READ_BUF_LEN]; + QEMU_UNINITIALIZED uint8_t buf[CHR_READ_BUF_LEN]; int len, size; if ((s->state != TCP_CHARDEV_STATE_CONNECTED) || diff --git a/rust/clippy.toml b/clippy.toml index 58a62c0..9016172 100644 --- a/rust/clippy.toml +++ b/clippy.toml @@ -1,3 +1,3 @@ -doc-valid-idents = ["PrimeCell", ".."] +doc-valid-idents = ["IrDA", "PrimeCell", ".."] allow-mixed-uninlined-format-args = false msrv = "1.77.0" diff --git a/configs/devices/i386-softmmu/default.mak b/configs/devices/i386-softmmu/default.mak index 4faf2f0..bc0479a 100644 --- a/configs/devices/i386-softmmu/default.mak +++ b/configs/devices/i386-softmmu/default.mak @@ -18,6 +18,7 @@ #CONFIG_QXL=n #CONFIG_SEV=n #CONFIG_SGA=n +#CONFIG_TDX=n #CONFIG_TEST_DEVICES=n #CONFIG_TPM_CRB=n #CONFIG_TPM_TIS_ISA=n diff --git a/configs/targets/microblaze-softmmu.mak b/configs/targets/microblaze-softmmu.mak index 23457d0..bab7b49 100644 --- a/configs/targets/microblaze-softmmu.mak +++ b/configs/targets/microblaze-softmmu.mak @@ -3,6 +3,4 @@ TARGET_BIG_ENDIAN=y # needed by boot.c TARGET_NEED_FDT=y TARGET_XML_FILES=gdb-xml/microblaze-core.xml gdb-xml/microblaze-stack-protect.xml -# System mode can address up to 64 bits via lea/sea instructions. -# TODO: These bypass the mmu, so we could emulate these differently. -TARGET_LONG_BITS=64 +TARGET_LONG_BITS=32 diff --git a/configs/targets/microblazeel-softmmu.mak b/configs/targets/microblazeel-softmmu.mak index c82c509..8aee7eb 100644 --- a/configs/targets/microblazeel-softmmu.mak +++ b/configs/targets/microblazeel-softmmu.mak @@ -2,6 +2,4 @@ TARGET_ARCH=microblaze # needed by boot.c TARGET_NEED_FDT=y TARGET_XML_FILES=gdb-xml/microblaze-core.xml gdb-xml/microblaze-stack-protect.xml -# System mode can address up to 64 bits via lea/sea instructions. -# TODO: These bypass the mmu, so we could emulate these differently. -TARGET_LONG_BITS=64 +TARGET_LONG_BITS=32 @@ -209,6 +209,8 @@ for opt do ;; --rustc=*) RUSTC="$optarg" ;; + --rustdoc=*) RUSTDOC="$optarg" + ;; --cpu=*) cpu="$optarg" ;; --extra-cflags=*) @@ -323,6 +325,7 @@ pkg_config="${PKG_CONFIG-${cross_prefix}pkg-config}" sdl2_config="${SDL2_CONFIG-${cross_prefix}sdl2-config}" rustc="${RUSTC-rustc}" +rustdoc="${RUSTDOC-rustdoc}" check_define() { cat > $TMPC <<EOF @@ -660,6 +663,8 @@ for opt do ;; --rustc=*) ;; + --rustdoc=*) + ;; --make=*) ;; --install=*) @@ -890,6 +895,7 @@ Advanced options (experts only): --cxx=CXX use C++ compiler CXX [$cxx] --objcc=OBJCC use Objective-C compiler OBJCC [$objcc] --rustc=RUSTC use Rust compiler RUSTC [$rustc] + --rustdoc=RUSTDOC use rustdoc binary RUSTDOC [$rustdoc] --extra-cflags=CFLAGS append extra C compiler flags CFLAGS --extra-cxxflags=CXXFLAGS append extra C++ compiler flags CXXFLAGS --extra-objcflags=OBJCFLAGS append extra Objective C compiler flags OBJCFLAGS @@ -1178,6 +1184,14 @@ fi ########################################## # detect rust triple +meson_version=$($meson --version) +if test "$rust" != disabled && ! version_ge "$meson_version" 1.8.1; then + if test "$rust" = enabled; then + error_exit "Rust support needs Meson 1.8.1 or newer" + fi + echo "Rust needs Meson 1.8.1, disabling" 2>&1 + rust=disabled +fi if test "$rust" != disabled && has "$rustc" && $rustc -vV > "${TMPDIR1}/${TMPB}.out"; then rust_host_triple=$(sed -n 's/^host: //p' "${TMPDIR1}/${TMPB}.out") else @@ -1893,8 +1907,10 @@ if test "$skip_meson" = no; then if test "$rust" != disabled; then if test "$rust_host_triple" != "$rust_target_triple"; then echo "rust = [$(meson_quote $rustc --target "$rust_target_triple")]" >> $cross + echo "rustdoc = [$(meson_quote $rustdoc --target "$rust_target_triple")]" >> $cross else echo "rust = [$(meson_quote $rustc)]" >> $cross + echo "rustdoc = [$(meson_quote $rustdoc)]" >> $cross fi fi echo "ar = [$(meson_quote $ar)]" >> $cross diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c index e5297db..f110c56 100644 --- a/contrib/plugins/ips.c +++ b/contrib/plugins/ips.c @@ -129,20 +129,62 @@ static void plugin_exit(qemu_plugin_id_t id, void *udata) qemu_plugin_scoreboard_free(vcpus); } +typedef struct { + const char *suffix; + unsigned long multipler; +} ScaleEntry; + +/* a bit like units.h but not binary */ +static const ScaleEntry scales[] = { + { "k", 1000 }, + { "m", 1000 * 1000 }, + { "g", 1000 * 1000 * 1000 }, +}; + QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, int argc, char **argv) { + bool ipq_set = false; + for (int i = 0; i < argc; i++) { char *opt = argv[i]; g_auto(GStrv) tokens = g_strsplit(opt, "=", 2); if (g_strcmp0(tokens[0], "ips") == 0) { - max_insn_per_second = g_ascii_strtoull(tokens[1], NULL, 10); + char *endptr = NULL; + max_insn_per_second = g_ascii_strtoull(tokens[1], &endptr, 10); if (!max_insn_per_second && errno) { fprintf(stderr, "%s: couldn't parse %s (%s)\n", __func__, tokens[1], g_strerror(errno)); return -1; } + + if (endptr && *endptr != 0) { + g_autofree gchar *lower = g_utf8_strdown(endptr, -1); + unsigned long scale = 0; + + for (int j = 0; j < G_N_ELEMENTS(scales); j++) { + if (g_strcmp0(lower, scales[j].suffix) == 0) { + scale = scales[j].multipler; + break; + } + } + + if (scale) { + max_insn_per_second *= scale; + } else { + fprintf(stderr, "bad suffix: %s\n", endptr); + return -1; + } + } + } else if (g_strcmp0(tokens[0], "ipq") == 0) { + max_insn_per_quantum = g_ascii_strtoull(tokens[1], NULL, 10); + + if (!max_insn_per_quantum) { + fprintf(stderr, "bad ipq value: %s\n", tokens[0]); + return -1; + } + ipq_set = true; } else { fprintf(stderr, "option parsing failed: %s\n", opt); return -1; @@ -150,7 +192,10 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id, } vcpus = qemu_plugin_scoreboard_new(sizeof(vCPUTime)); - max_insn_per_quantum = max_insn_per_second / NUM_TIME_UPDATE_PER_SEC; + + if (!ipq_set) { + max_insn_per_quantum = max_insn_per_second / NUM_TIME_UPDATE_PER_SEC; + } if (max_insn_per_quantum == 0) { fprintf(stderr, "minimum of %d instructions per second needed\n", diff --git a/contrib/plugins/meson.build b/contrib/plugins/meson.build index fa8a426..1876bc7 100644 --- a/contrib/plugins/meson.build +++ b/contrib/plugins/meson.build @@ -24,7 +24,7 @@ endif if t.length() > 0 alias_target('contrib-plugins', t) else - run_target('contrib-plugins', command: find_program('true')) + run_target('contrib-plugins', command: [python, '-c', '']) endif plugin_modules += t diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index 44d3427..4203713 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -148,6 +148,14 @@ options are removed in favor of using explicit ``blockdev-create`` and ``blockdev-add`` calls. See :doc:`/interop/live-block-operations` for details. +``query-migrationthreads`` (since 9.2) +'''''''''''''''''''''''''''''''''''''' + +To be removed with no replacement, as it reports only a limited set of +threads (for example, it only reports source side of multifd threads, +without reporting any destination threads, or non-multifd source threads). +For debugging purpose, please use ``-name $VM,debug-threads=on`` instead. + ``block-job-pause`` (since 10.1) '''''''''''''''''''''''''''''''' @@ -179,27 +187,10 @@ Use ``job-dismiss`` instead. Use ``job-finalize`` instead. -``query-migrationthreads`` (since 9.2) -'''''''''''''''''''''''''''''''''''''' +``migrate`` argument ``detach`` (since 10.1) +'''''''''''''''''''''''''''''''''''''''''''' -To be removed with no replacement, as it reports only a limited set of -threads (for example, it only reports source side of multifd threads, -without reporting any destination threads, or non-multifd source threads). -For debugging purpose, please use ``-name $VM,debug-threads=on`` instead. - -Incorrectly typed ``device_add`` arguments (since 6.2) -'''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Due to shortcomings in the internal implementation of ``device_add``, QEMU -incorrectly accepts certain invalid arguments: Any object or list arguments are -silently ignored. Other argument types are not checked, but an implicit -conversion happens, so that e.g. string values can be assigned to integer -device properties or vice versa. - -This is a bug in QEMU that will be fixed in the future so that previously -accepted incorrect commands will return an error. Users should make sure that -all arguments passed to ``device_add`` are consistent with the documented -property types. +This argument has always been ignored. Host Architectures ------------------ @@ -324,12 +315,6 @@ deprecated; use the new name ``dtb-randomness`` instead. The new name better reflects the way this property affects all random data within the device tree blob, not just the ``kaslr-seed`` node. -Big-Endian variants of MicroBlaze ``petalogix-ml605`` and ``xlnx-zynqmp-pmu`` machines (since 9.2) -'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' - -Both ``petalogix-ml605`` and ``xlnx-zynqmp-pmu`` were added for little endian -CPUs. Big endian support is not tested. - Mips ``mipssim`` machine (since 10.0) ''''''''''''''''''''''''''''''''''''' @@ -360,6 +345,19 @@ machine must ensure that they're setting the ``spike`` machine in the command line (``-M spike``). +System emulator binaries +------------------------ + +``qemu-system-microblazeel`` (since 10.1) +''''''''''''''''''''''''''''''''''''''''' + +The ``qemu-system-microblaze`` binary can emulate little-endian machines +now, too, so the separate binary ``qemu-system-microblazeel`` (with the +``el`` suffix) for little-endian targets is not required anymore. The +``petalogix-s3adsp1800`` machine can now be switched to little endian by +setting its ``endianness`` property to ``little``. + + Backend options --------------- @@ -531,14 +529,6 @@ PCIe passthrough shall be the mainline solution. CPU device properties ''''''''''''''''''''' -``pcommit`` on x86 (since 9.1) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The PCOMMIT instruction was never included in any physical processor. -It was implemented as a no-op instruction in TCG up to QEMU 9.0, but -only with ``-cpu max`` (which does not guarantee migration compatibility -across versions). - ``pmu-num=n`` on RISC-V CPUs (since 8.2) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -548,6 +538,14 @@ be calculated with ``((2 ^ n) - 1) << 3``. The least significant three bits must be left clear. +``pcommit`` on x86 (since 9.1) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The PCOMMIT instruction was never included in any physical processor. +It was implemented as a no-op instruction in TCG up to QEMU 9.0, but +only with ``-cpu max`` (which does not guarantee migration compatibility +across versions). + Backwards compatibility ----------------------- diff --git a/docs/about/emulation.rst b/docs/about/emulation.rst index a72591e..456d01d 100644 --- a/docs/about/emulation.rst +++ b/docs/about/emulation.rst @@ -811,6 +811,10 @@ This plugin can limit the number of Instructions Per Second that are executed:: * - ips=N - Maximum number of instructions per cpu that can be executed in one second. The plugin will sleep when the given number of instructions is reached. + * - ipq=N + - Instructions per quantum. How many instructions before we re-calculate time. + The lower the number the more accurate time will be, but the less efficient the plugin. + Defaults to ips/10 Other emulation features ------------------------ diff --git a/docs/about/removed-features.rst b/docs/about/removed-features.rst index 063284d..d7c2113 100644 --- a/docs/about/removed-features.rst +++ b/docs/about/removed-features.rst @@ -162,6 +162,12 @@ specified with ``-mem-path`` can actually provide the guest RAM configured with The ``name`` parameter of the ``-net`` option was a synonym for the ``id`` parameter, which should now be used instead. +RISC-V firmware not booted by default (removed in 5.1) +'''''''''''''''''''''''''''''''''''''''''''''''''''''' + +QEMU 5.1 changes the default behaviour from ``-bios none`` to ``-bios default`` +for the RISC-V ``virt`` machine and ``sifive_u`` machine. + ``-numa node,mem=...`` (removed in 5.1) ''''''''''''''''''''''''''''''''''''''' @@ -324,12 +330,6 @@ devices. Drives the board doesn't pick up can no longer be used with This option was undocumented and not used in the field. Use ``-device usb-ccid`` instead. -RISC-V firmware not booted by default (removed in 5.1) -'''''''''''''''''''''''''''''''''''''''''''''''''''''' - -QEMU 5.1 changes the default behaviour from ``-bios none`` to ``-bios default`` -for the RISC-V ``virt`` machine and ``sifive_u`` machine. - ``-no-quit`` (removed in 7.0) ''''''''''''''''''''''''''''' @@ -722,6 +722,15 @@ Use ``multifd-channels`` instead. Use ``multifd-compression`` instead. +Incorrectly typed ``device_add`` arguments (since 9.2) +'''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Due to shortcomings in the internal implementation of ``device_add``, +QEMU used to incorrectly accept certain invalid arguments. Any object +or list arguments were silently ignored. Other argument types were not +checked, but an implicit conversion happened, so that e.g. string +values could be assigned to integer device properties or vice versa. + QEMU Machine Protocol (QMP) events ---------------------------------- @@ -902,14 +911,6 @@ The RISC-V no MMU cpus have been removed. The two CPUs: ``rv32imacu-nommu`` and ``rv64imacu-nommu`` can no longer be used. Instead the MMU status can be specified via the CPU ``mmu`` option when using the ``rv32`` or ``rv64`` CPUs. -RISC-V 'any' CPU type ``-cpu any`` (removed in 9.2) -''''''''''''''''''''''''''''''''''''''''''''''''''' - -The 'any' CPU type was introduced back in 2018 and was around since the -initial RISC-V QEMU port. Its usage was always been unclear: users don't know -what to expect from a CPU called 'any', and in fact the CPU does not do anything -special that isn't already done by the default CPUs rv32/rv64. - ``compat`` property of server class POWER CPUs (removed in 6.0) ''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' @@ -956,6 +957,14 @@ The CRIS architecture was pulled from Linux in 4.17 and the compiler was no longer packaged in any distro making it harder to run the ``check-tcg`` tests. +RISC-V 'any' CPU type ``-cpu any`` (removed in 9.2) +''''''''''''''''''''''''''''''''''''''''''''''''''' + +The 'any' CPU type was introduced back in 2018 and was around since the +initial RISC-V QEMU port. Its usage was always been unclear: users don't know +what to expect from a CPU called 'any', and in fact the CPU does not do anything +special that isn't already done by the default CPUs rv32/rv64. + System accelerators ------------------- @@ -966,18 +975,18 @@ Userspace local APIC with KVM (x86, removed in 8.0) a local APIC. The ``split`` setting is supported, as is using ``-M kernel-irqchip=off`` when the CPU does not have a local APIC. -HAXM (``-accel hax``) (removed in 8.2) -'''''''''''''''''''''''''''''''''''''' - -The HAXM project has been retired (see https://github.com/intel/haxm#status). -Use "whpx" (on Windows) or "hvf" (on macOS) instead. - MIPS "Trap-and-Emulate" KVM support (removed in 8.0) '''''''''''''''''''''''''''''''''''''''''''''''''''' The MIPS "Trap-and-Emulate" KVM host and guest support was removed from Linux in 2021, and is not supported anymore by QEMU either. +HAXM (``-accel hax``) (removed in 8.2) +'''''''''''''''''''''''''''''''''''''' + +The HAXM project has been retired (see https://github.com/intel/haxm#status). +Use "whpx" (on Windows) or "hvf" (on macOS) instead. + System emulator machines ------------------------ @@ -1035,16 +1044,6 @@ Aspeed ``swift-bmc`` machine (removed in 7.0) This machine was removed because it was unused. Alternative AST2500 based OpenPOWER machines are ``witherspoon-bmc`` and ``romulus-bmc``. -Aspeed ``tacoma-bmc`` machine (removed in 10.0) -''''''''''''''''''''''''''''''''''''''''''''''' - -The ``tacoma-bmc`` machine was removed because it didn't bring much -compared to the ``rainier-bmc`` machine. Also, the ``tacoma-bmc`` was -a board used for bring up of the AST2600 SoC that never left the -labs. It can be easily replaced by the ``rainier-bmc`` machine, which -was the actual final product, or by the ``ast2600-evb`` with some -tweaks. - ppc ``taihu`` machine (removed in 7.2) ''''''''''''''''''''''''''''''''''''''''''''' @@ -1075,6 +1074,16 @@ for all machine types using the PXA2xx and OMAP2 SoCs. We are also dropping the ``cheetah`` OMAP1 board, because we don't have any test images for it and don't know of anybody who does. +Aspeed ``tacoma-bmc`` machine (removed in 10.0) +''''''''''''''''''''''''''''''''''''''''''''''' + +The ``tacoma-bmc`` machine was removed because it didn't bring much +compared to the ``rainier-bmc`` machine. Also, the ``tacoma-bmc`` was +a board used for bring up of the AST2600 SoC that never left the +labs. It can be easily replaced by the ``rainier-bmc`` machine, which +was the actual final product, or by the ``ast2600-evb`` with some +tweaks. + ppc ``ref405ep`` machine (removed in 10.0) '''''''''''''''''''''''''''''''''''''''''' @@ -1082,6 +1091,15 @@ This machine was removed because PPC 405 CPU have no known users, firmware images are not available, OpenWRT dropped support in 2019, U-Boot in 2017, and Linux in 2024. +Big-Endian variants of ``petalogix-ml605`` and ``xlnx-zynqmp-pmu`` machines (removed in 10.1) +''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' + +Both the MicroBlaze ``petalogix-ml605`` and ``xlnx-zynqmp-pmu`` machines +were added for little endian CPUs. Big endian support was never tested +and likely never worked. Starting with QEMU v10.1, the machines are now +only available as little-endian machines. + + linux-user mode CPUs -------------------- diff --git a/docs/devel/rust.rst b/docs/devel/rust.rst index 171d908..dc8c441 100644 --- a/docs/devel/rust.rst +++ b/docs/devel/rust.rst @@ -37,12 +37,16 @@ output directory (typically ``rust/target/``). A vanilla invocation of Cargo will complain that it cannot find the generated sources, which can be fixed in different ways: -* by using special shorthand targets in the QEMU build directory:: +* by using Makefile targets, provided by Meson, that run ``clippy`` or + ``rustdoc``: make clippy - make rustfmt make rustdoc +A target for ``rustfmt`` is also declared in ``rust/meson.build``: + + make rustfmt + * by invoking ``cargo`` through the Meson `development environment`__ feature:: @@ -50,7 +54,7 @@ which can be fixed in different ways: pyvenv/bin/meson devenv -w ../rust cargo fmt If you are going to use ``cargo`` repeatedly, ``pyvenv/bin/meson devenv`` - will enter a shell where commands like ``cargo clippy`` just work. + will enter a shell where commands like ``cargo fmt`` just work. __ https://mesonbuild.com/Commands.html#devenv @@ -66,7 +70,7 @@ be run via ``meson test`` or ``make``:: make check-rust -Building Rust code with ``--enable-modules`` is not supported yet. +Note that doctests require all ``.o`` files from the build to be available. Supported tools ''''''''''''''' @@ -92,6 +96,11 @@ are missing: architecture (VMState). Right now, VMState lacks type safety because it is hard to place the ``VMStateField`` definitions in traits. +* NUL-terminated file names with ``#[track_caller]`` are scheduled for + inclusion as ``#![feature(location_file_nul)]``, but it will be a while + before QEMU can use them. For now, there is special code in + ``util/error.c`` to support non-NUL-terminated file names. + * associated const equality would be nice to have for some users of ``callbacks::FnCall``, but is still experimental. ``ASSERT_IS_SOME`` replaces it. @@ -151,10 +160,11 @@ module status ``callbacks`` complete ``cell`` stable ``errno`` complete +``error`` stable ``irq`` complete +``log`` proof of concept ``memory`` stable ``module`` complete -``offset_of`` stable ``qdev`` stable ``qom`` stable ``sysbus`` stable diff --git a/docs/interop/bitmaps.rst b/docs/interop/bitmaps.rst index ddf8947..7536f0b 100644 --- a/docs/interop/bitmaps.rst +++ b/docs/interop/bitmaps.rst @@ -97,7 +97,7 @@ time. - Persistent storage formats may impose their own requirements on bitmap names and namespaces. Presently, only qcow2 supports persistent bitmaps. See - docs/interop/qcow2.txt for more details on restrictions. Notably: + :doc:`qcow2` for more details on restrictions. Notably: - qcow2 bitmap names are limited to between 1 and 1023 bytes long. diff --git a/docs/interop/index.rst b/docs/interop/index.rst index 999e44e..972f3e4 100644 --- a/docs/interop/index.rst +++ b/docs/interop/index.rst @@ -17,6 +17,8 @@ are useful for making QEMU interoperate with other software. nbd parallels prl-xml + qcow2 + qed_spec pr-helper qmp-spec qemu-ga diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.rst index 2c46183..5948591 100644 --- a/docs/interop/qcow2.txt +++ b/docs/interop/qcow2.rst @@ -1,6 +1,8 @@ -== General == +======================= +Qcow2 Image File Format +======================= -A qcow2 image file is organized in units of constant size, which are called +A ``qcow2`` image file is organized in units of constant size, which are called (host) clusters. A cluster is the unit in which all allocations are done, both for actual guest data and for image metadata. @@ -9,10 +11,10 @@ clusters of the same size. All numbers in qcow2 are stored in Big Endian byte order. +Header +------ -== Header == - -The first cluster of a qcow2 image contains the file header: +The first cluster of a qcow2 image contains the file header:: Byte 0 - 3: magic QCOW magic string ("QFI\xfb") @@ -38,7 +40,7 @@ The first cluster of a qcow2 image contains the file header: within a cluster (1 << cluster_bits is the cluster size). Must not be less than 9 (i.e. 512 byte clusters). - Note: qemu as of today has an implementation limit of 2 MB + Note: QEMU as of today has an implementation limit of 2 MB as the maximum cluster size and won't be able to open images with larger cluster sizes. @@ -48,7 +50,7 @@ The first cluster of a qcow2 image contains the file header: 24 - 31: size Virtual disk size in bytes. - Note: qemu has an implementation limit of 32 MB as + Note: QEMU has an implementation limit of 32 MB as the maximum L1 table size. With a 2 MB cluster size, it is unable to populate a virtual cluster beyond 2 EB (61 bits); with a 512 byte cluster @@ -87,7 +89,8 @@ The first cluster of a qcow2 image contains the file header: For version 2, the header is exactly 72 bytes in length, and finishes here. For version 3 or higher, the header length is at least 104 bytes, including -the next fields through header_length. +the next fields through ``header_length``. +:: 72 - 79: incompatible_features Bitmask of incompatible features. An implementation must @@ -185,7 +188,8 @@ the next fields through header_length. of 8. -=== Additional fields (version 3 and higher) === +Additional fields (version 3 and higher) +---------------------------------------- In general, these fields are optional and may be safely ignored by the software, as well as filled by zeros (which is equal to field absence), if software needs @@ -193,21 +197,25 @@ to set field B, but does not care about field A which precedes B. More formally, additional fields have the following compatibility rules: 1. If the value of the additional field must not be ignored for correct -handling of the file, it will be accompanied by a corresponding incompatible -feature bit. + handling of the file, it will be accompanied by a corresponding incompatible + feature bit. 2. If there are no unrecognized incompatible feature bits set, an unknown -additional field may be safely ignored other than preserving its value when -rewriting the image header. + additional field may be safely ignored other than preserving its value when + rewriting the image header. + +.. _ref_rules_3: 3. An explicit value of 0 will have the same behavior as when the field is not -present*, if not altered by a specific incompatible bit. + present*, if not altered by a specific incompatible bit. -*. A field is considered not present when header_length is less than or equal +(*) A field is considered not present when ``header_length`` is less than or equal to the field's offset. Also, all additional fields are not present for version 2. - 104: compression_type +:: + + 104: compression_type Defines the compression method used for compressed clusters. All compressed clusters in an image use the same compression @@ -219,8 +227,8 @@ version 2. or must be zero (which means deflate). Available compression type values: - 0: deflate <https://www.ietf.org/rfc/rfc1951.txt> - 1: zstd <http://github.com/facebook/zstd> + - 0: deflate <https://www.ietf.org/rfc/rfc1951.txt> + - 1: zstd <http://github.com/facebook/zstd> The deflate compression type is called "zlib" <https://www.zlib.net/> in QEMU. However, clusters with the @@ -228,19 +236,21 @@ version 2. 105 - 111: Padding, contents defined below. -=== Header padding === +Header padding +-------------- -@header_length must be a multiple of 8, which means that if the end of the last +``header_length`` must be a multiple of 8, which means that if the end of the last additional field is not aligned, some padding is needed. This padding must be zeroed, so that if some existing (or future) additional field will fall into -the padding, it will be interpreted accordingly to point [3.] of the previous +the padding, it will be interpreted accordingly to point `[3.] <#ref_rules_3>`_ of the previous paragraph, i.e. in the same manner as when this field is not present. -=== Header extensions === +Header extensions +----------------- Directly after the image header, optional sections called header extensions can -be stored. Each extension has a structure like the following: +be stored. Each extension has a structure like the following:: Byte 0 - 3: Header extension type: 0x00000000 - End of the header extension area @@ -270,17 +280,19 @@ data of compatible features that it doesn't support. Compatible features that need space for additional data can use a header extension. -== String header extensions == +String header extensions +------------------------ Some header extensions (such as the backing file format name and the external data file name) are just a single string. In this case, the header extension -length is the string length and the string is not '\0' terminated. (The header -extension padding can make it look like a string is '\0' terminated, but +length is the string length and the string is not ``\0`` terminated. (The header +extension padding can make it look like a string is ``\0`` terminated, but neither is padding always necessary nor is there a guarantee that zero bytes are used for padding.) -== Feature name table == +Feature name table +------------------ The feature name table is an optional header extension that contains the name for features used by the image. It can be used by applications that don't know @@ -288,7 +300,7 @@ the respective feature (e.g. because the feature was introduced only later) to display a useful error message. The number of entries in the feature name table is determined by the length of -the header extension data. Each entry look like this: +the header extension data. Each entry looks like this:: Byte 0: Type of feature (select feature bitmap) 0: Incompatible feature @@ -302,7 +314,8 @@ the header extension data. Each entry look like this: terminated if it has full length) -== Bitmaps extension == +Bitmaps extension +----------------- The bitmaps extension is an optional header extension. It provides the ability to store bitmaps related to a virtual disk. For now, there is only one bitmap @@ -310,9 +323,9 @@ type: the dirty tracking bitmap, which tracks virtual disk changes from some point in time. The data of the extension should be considered consistent only if the -corresponding auto-clear feature bit is set, see autoclear_features above. +corresponding auto-clear feature bit is set, see ``autoclear_features`` above. -The fields of the bitmaps extension are: +The fields of the bitmaps extension are:: Byte 0 - 3: nb_bitmaps The number of bitmaps contained in the image. Must be @@ -331,15 +344,17 @@ The fields of the bitmaps extension are: Offset into the image file at which the bitmap directory starts. Must be aligned to a cluster boundary. -== Full disk encryption header pointer == +Full disk encryption header pointer +----------------------------------- The full disk encryption header must be present if, and only if, the -'crypt_method' header requires metadata. Currently this is only true -of the 'LUKS' crypt method. The header extension must be absent for +``crypt_method`` header requires metadata. Currently this is only true +of the ``LUKS`` crypt method. The header extension must be absent for other methods. This header provides the offset at which the crypt method can store its additional data, as well as the length of such data. +:: Byte 0 - 7: Offset into the image file at which the encryption header starts in bytes. Must be aligned to a cluster @@ -357,10 +372,10 @@ The first 592 bytes of the header clusters will contain the LUKS partition header. This is then followed by the key material data areas. The size of the key material data areas is determined by the number of stripes in the key slot and key size. Refer to the LUKS format -specification ('docs/on-disk-format.pdf' in the cryptsetup source +specification (``docs/on-disk-format.pdf`` in the cryptsetup source package) for details of the LUKS partition header format. -In the LUKS partition header, the "payload-offset" field will be +In the LUKS partition header, the ``payload-offset`` field will be calculated as normal for the LUKS spec. ie the size of the LUKS header, plus key material regions, plus padding, relative to the start of the LUKS header. This offset value is not required to be @@ -369,11 +384,12 @@ context of qcow2, since the qcow2 file format itself defines where the real payload offset is, but none the less a valid payload offset should always be present. -In the LUKS key slots header, the "key-material-offset" is relative +In the LUKS key slots header, the ``key-material-offset`` is relative to the start of the LUKS header clusters in the qcow2 container, not the start of the qcow2 file. Logically the layout looks like +:: +-----------------------------+ | QCow2 header | @@ -405,7 +421,8 @@ Logically the layout looks like | | +-----------------------------+ -== Data encryption == +Data encryption +--------------- When an encryption method is requested in the header, the image payload data must be encrypted/decrypted on every write/read. The image headers @@ -413,7 +430,7 @@ and metadata are never encrypted. The algorithms used for encryption vary depending on the method - - AES: + - ``AES``: The AES cipher, in CBC mode, with 256 bit keys. @@ -425,7 +442,7 @@ The algorithms used for encryption vary depending on the method supported in the command line tools for the sake of back compatibility and data liberation. - - LUKS: + - ``LUKS``: The algorithms are specified in the LUKS header. @@ -433,7 +450,8 @@ The algorithms used for encryption vary depending on the method in the LUKS header, with the physical disk sector as the input tweak. -== Host cluster management == +Host cluster management +----------------------- qcow2 manages the allocation of host clusters by maintaining a reference count for each host cluster. A refcount of 0 means that the cluster is free, 1 means @@ -453,14 +471,15 @@ Although a large enough refcount table can reserve clusters past 64 PB large), note that some qcow2 metadata such as L1/L2 tables must point to clusters prior to that point. -Note: qemu has an implementation limit of 8 MB as the maximum refcount -table size. With a 2 MB cluster size and a default refcount_order of -4, it is unable to reference host resources beyond 2 EB (61 bits); in -the worst case, with a 512 cluster size and refcount_order of 6, it is -unable to access beyond 32 GB (35 bits). +.. note:: + QEMU has an implementation limit of 8 MB as the maximum refcount + table size. With a 2 MB cluster size and a default refcount_order of + 4, it is unable to reference host resources beyond 2 EB (61 bits); in + the worst case, with a 512 cluster size and refcount_order of 6, it is + unable to access beyond 32 GB (35 bits). Given an offset into the image file, the refcount of its cluster can be -obtained as follows: +obtained as follows:: refcount_block_entries = (cluster_size * 8 / refcount_bits) @@ -470,7 +489,7 @@ obtained as follows: refcount_block = load_cluster(refcount_table[refcount_table_index]); return refcount_block[refcount_block_index]; -Refcount table entry: +Refcount table entry:: Bit 0 - 8: Reserved (set to 0) @@ -482,14 +501,15 @@ Refcount table entry: been allocated. All refcounts managed by this refcount block are 0. -Refcount block entry (x = refcount_bits - 1): +Refcount block entry ``(x = refcount_bits - 1)``:: Bit 0 - x: Reference count of the cluster. If refcount_bits implies a sub-byte width, note that bit 0 means the least significant bit in this context. -== Cluster mapping == +Cluster mapping +--------------- Just as for refcounts, qcow2 uses a two-level structure for the mapping of guest clusters to host clusters. They are called L1 and L2 table. @@ -509,7 +529,7 @@ compressed clusters to reside below 512 TB (49 bits), and this limit cannot be relaxed without an incompatible layout change). Given an offset into the virtual disk, the offset into the image file can be -obtained as follows: +obtained as follows:: l2_entries = (cluster_size / sizeof(uint64_t)) [*] @@ -523,7 +543,7 @@ obtained as follows: [*] this changes if Extended L2 Entries are enabled, see next section -L1 table entry: +L1 table entry:: Bit 0 - 8: Reserved (set to 0) @@ -538,7 +558,7 @@ L1 table entry: refcount is exactly one. This information is only accurate in the active L1 table. -L2 table entry: +L2 table entry:: Bit 0 - 61: Cluster descriptor @@ -555,7 +575,7 @@ L2 table entry: mapping for guest cluster offsets), so this bit should be 1 for all allocated clusters. -Standard Cluster Descriptor: +Standard Cluster Descriptor:: Bit 0: If set to 1, the cluster reads as all zeros. The host cluster offset can be used to describe a preallocation, @@ -577,7 +597,7 @@ Standard Cluster Descriptor: 56 - 61: Reserved (set to 0) -Compressed Clusters Descriptor (x = 62 - (cluster_bits - 8)): +Compressed Clusters Descriptor ``(x = 62 - (cluster_bits - 8))``:: Bit 0 - x-1: Host cluster offset. This is usually _not_ aligned to a cluster or sector boundary! If cluster_bits is @@ -601,7 +621,8 @@ file (except if bit 0 in the Standard Cluster Descriptor is set). If there is no backing file or the backing file is smaller than the image, they shall read zeros for all parts that are not covered by the backing file. -== Extended L2 Entries == +Extended L2 Entries +------------------- An image uses Extended L2 Entries if bit 4 is set on the incompatible_features field of the header. @@ -615,6 +636,8 @@ subclusters so they are treated the same as in images without this feature. The size of an extended L2 entry is 128 bits so the number of entries per table is calculated using this formula: +.. code:: + l2_entries = (cluster_size / (2 * sizeof(uint64_t))) The first 64 bits have the same format as the standard L2 table entry described @@ -623,7 +646,7 @@ descriptor. The last 64 bits contain a subcluster allocation bitmap with this format: -Subcluster Allocation Bitmap (for standard clusters): +Subcluster Allocation Bitmap (for standard clusters):: Bit 0 - 31: Allocation status (one bit per subcluster) @@ -647,13 +670,14 @@ Subcluster Allocation Bitmap (for standard clusters): Bits are assigned starting from the least significant one (i.e. bit x is used for subcluster x - 32). -Subcluster Allocation Bitmap (for compressed clusters): +Subcluster Allocation Bitmap (for compressed clusters):: Bit 0 - 63: Reserved (set to 0) Compressed clusters don't have subclusters, so this field is not used. -== Snapshots == +Snapshots +--------- qcow2 supports internal snapshots. Their basic principle of operation is to switch the active L1 table, so that a different set of host clusters are @@ -672,7 +696,7 @@ in the image file, whose starting offset and length are given by the header fields snapshots_offset and nb_snapshots. The entries of the snapshot table have variable length, depending on the length of ID, name and extra data. -Snapshot table entry: +Snapshot table entry:: Byte 0 - 7: Offset into the image file at which the L1 table for the snapshot starts. Must be aligned to a cluster boundary. @@ -728,7 +752,8 @@ Snapshot table entry: next multiple of 8. -== Bitmaps == +Bitmaps +------- As mentioned above, the bitmaps extension provides the ability to store bitmaps related to a virtual disk. This section describes how these bitmaps are stored. @@ -739,20 +764,23 @@ each bitmap size is equal to the virtual disk size. Each bit of the bitmap is responsible for strictly defined range of the virtual disk. For bit number bit_nr the corresponding range (in bytes) will be: +.. code:: + [bit_nr * bitmap_granularity .. (bit_nr + 1) * bitmap_granularity - 1] Granularity is a property of the concrete bitmap, see below. -=== Bitmap directory === +Bitmap directory +---------------- Each bitmap saved in the image is described in a bitmap directory entry. The bitmap directory is a contiguous area in the image file, whose starting offset -and length are given by the header extension fields bitmap_directory_offset and -bitmap_directory_size. The entries of the bitmap directory have variable +and length are given by the header extension fields ``bitmap_directory_offset`` and +``bitmap_directory_size``. The entries of the bitmap directory have variable length, depending on the lengths of the bitmap name and extra data. -Structure of a bitmap directory entry: +Structure of a bitmap directory entry:: Byte 0 - 7: bitmap_table_offset Offset into the image file at which the bitmap table @@ -833,7 +861,8 @@ Structure of a bitmap directory entry: next multiple of 8. All bytes of the padding must be zero. -=== Bitmap table === +Bitmap table +------------ Each bitmap is stored using a one-level structure (as opposed to two-level structures like for refcounts and guest clusters mapping) for the mapping of @@ -843,7 +872,7 @@ Each bitmap table has a variable size (stored in the bitmap directory entry) and may use multiple clusters, however, it must be contiguous in the image file. -Structure of a bitmap table entry: +Structure of a bitmap table entry:: Bit 0: Reserved and must be zero if bits 9 - 55 are non-zero. If bits 9 - 55 are zero: @@ -860,11 +889,12 @@ Structure of a bitmap table entry: 56 - 63: Reserved and must be zero. -=== Bitmap data === +Bitmap data +----------- As noted above, bitmap data is stored in separate clusters, described by the bitmap table. Given an offset (in bytes) into the bitmap data, the offset into -the image file can be obtained as follows: +the image file can be obtained as follows:: image_offset(bitmap_data_offset) = bitmap_table[bitmap_data_offset / cluster_size] + @@ -875,7 +905,7 @@ above). Given an offset byte_nr into the virtual disk and the bitmap's granularity, the bit offset into the image file to the corresponding bit of the bitmap can be -calculated like this: +calculated like this:: bit_offset(byte_nr) = image_offset(byte_nr / granularity / 8) * 8 + @@ -886,21 +916,22 @@ last cluster of the bitmap data contains some unused tail bits. These bits must be zero. -=== Dirty tracking bitmaps === +Dirty tracking bitmaps +---------------------- -Bitmaps with 'type' field equal to one are dirty tracking bitmaps. +Bitmaps with ``type`` field equal to one are dirty tracking bitmaps. -When the virtual disk is in use dirty tracking bitmap may be 'enabled' or -'disabled'. While the bitmap is 'enabled', all writes to the virtual disk +When the virtual disk is in use dirty tracking bitmap may be ``enabled`` or +``disabled``. While the bitmap is ``enabled``, all writes to the virtual disk should be reflected in the bitmap. A set bit in the bitmap means that the corresponding range of the virtual disk (see above) was written to while the -bitmap was 'enabled'. An unset bit means that this range was not written to. +bitmap was ``enabled``. An unset bit means that this range was not written to. The software doesn't have to sync the bitmap in the image file with its -representation in RAM after each write or metadata change. Flag 'in_use' +representation in RAM after each write or metadata change. Flag ``in_use`` should be set while the bitmap is not synced. -In the image file the 'enabled' state is reflected by the 'auto' flag. If this -flag is set, the software must consider the bitmap as 'enabled' and start +In the image file the ``enabled`` state is reflected by the ``auto`` flag. If this +flag is set, the software must consider the bitmap as ``enabled`` and start tracking virtual disk changes to this bitmap from the first write to the virtual disk. If this flag is not set then the bitmap is disabled. diff --git a/docs/interop/qed_spec.rst b/docs/interop/qed_spec.rst new file mode 100644 index 0000000..cd6c7d9 --- /dev/null +++ b/docs/interop/qed_spec.rst @@ -0,0 +1,219 @@ +=================================== +QED Image File Format Specification +=================================== + +The file format looks like this:: + + +----------+----------+----------+-----+ + | cluster0 | cluster1 | cluster2 | ... | + +----------+----------+----------+-----+ + +The first cluster begins with the ``header``. The header contains information +about where regular clusters start; this allows the header to be extensible and +store extra information about the image file. A regular cluster may be +a ``data cluster``, an ``L2``, or an ``L1 table``. L1 and L2 tables are composed +of one or more contiguous clusters. + +Normally the file size will be a multiple of the cluster size. If the file size +is not a multiple, extra information after the last cluster may not be preserved +if data is written. Legitimate extra information should use space between the header +and the first regular cluster. + +All fields are little-endian. + +Header +------ + +:: + + Header { + uint32_t magic; /* QED\0 */ + + uint32_t cluster_size; /* in bytes */ + uint32_t table_size; /* for L1 and L2 tables, in clusters */ + uint32_t header_size; /* in clusters */ + + uint64_t features; /* format feature bits */ + uint64_t compat_features; /* compat feature bits */ + uint64_t autoclear_features; /* self-resetting feature bits */ + + uint64_t l1_table_offset; /* in bytes */ + uint64_t image_size; /* total logical image size, in bytes */ + + /* if (features & QED_F_BACKING_FILE) */ + uint32_t backing_filename_offset; /* in bytes from start of header */ + uint32_t backing_filename_size; /* in bytes */ + } + +Field descriptions: +~~~~~~~~~~~~~~~~~~~ + +- ``cluster_size`` must be a power of 2 in range [2^12, 2^26]. +- ``table_size`` must be a power of 2 in range [1, 16]. +- ``header_size`` is the number of clusters used by the header and any additional + information stored before regular clusters. +- ``features``, ``compat_features``, and ``autoclear_features`` are file format + extension bitmaps. They work as follows: + + - An image with unknown ``features`` bits enabled must not be opened. File format + changes that are not backwards-compatible must use ``features`` bits. + - An image with unknown ``compat_features`` bits enabled can be opened safely. + The unknown features are simply ignored and represent backwards-compatible + changes to the file format. + - An image with unknown ``autoclear_features`` bits enable can be opened safely + after clearing the unknown bits. This allows for backwards-compatible changes + to the file format which degrade gracefully and can be re-enabled again by a + new program later. +- ``l1_table_offset`` is the offset of the first byte of the L1 table in the image + file and must be a multiple of ``cluster_size``. +- ``image_size`` is the block device size seen by the guest and must be a multiple + of 512 bytes. +- ``backing_filename_offset`` and ``backing_filename_size`` describe a string in + (byte offset, byte size) form. It is not NUL-terminated and has no alignment constraints. + The string must be stored within the first ``header_size`` clusters. The backing filename + may be an absolute path or relative to the image file. + +Feature bits: +~~~~~~~~~~~~~ + +- ``QED_F_BACKING_FILE = 0x01``. The image uses a backing file. +- ``QED_F_NEED_CHECK = 0x02``. The image needs a consistency check before use. +- ``QED_F_BACKING_FORMAT_NO_PROBE = 0x04``. The backing file is a raw disk image + and no file format autodetection should be attempted. This should be used to + ensure that raw backing files are never detected as an image format if they happen + to contain magic constants. + +There are currently no defined ``compat_features`` or ``autoclear_features`` bits. + +Fields predicated on a feature bit are only used when that feature is set. +The fields always take up header space, regardless of whether or not the feature +bit is set. + +Tables +------ + +Tables provide the translation from logical offsets in the block device to cluster +offsets in the file. + +:: + + #define TABLE_NOFFSETS (table_size * cluster_size / sizeof(uint64_t)) + + Table { + uint64_t offsets[TABLE_NOFFSETS]; + } + +The tables are organized as follows:: + + +----------+ + | L1 table | + +----------+ + ,------' | '------. + +----------+ | +----------+ + | L2 table | ... | L2 table | + +----------+ +----------+ + ,------' | '------. + +----------+ | +----------+ + | Data | ... | Data | + +----------+ +----------+ + +A table is made up of one or more contiguous clusters. The ``table_size`` header +field determines table size for an image file. For example, ``cluster_size=64 KB`` +and ``table_size=4`` results in 256 KB tables. + +The logical image size must be less than or equal to the maximum possible size of +clusters rooted by the L1 table: + +.. code:: + + header.image_size <= TABLE_NOFFSETS * TABLE_NOFFSETS * header.cluster_size + +L1, L2, and data cluster offsets must be aligned to ``header.cluster_size``. +The following offsets have special meanings: + +L2 table offsets +~~~~~~~~~~~~~~~~ + +- 0 - unallocated. The L2 table is not yet allocated. + +Data cluster offsets +~~~~~~~~~~~~~~~~~~~~ + +- 0 - unallocated. The data cluster is not yet allocated. +- 1 - zero. The data cluster contents are all zeroes and no cluster is allocated. + +Future format extensions may wish to store per-offset information. The least +significant 12 bits of an offset are reserved for this purpose and must be set +to zero. Image files with ``cluster_size`` > 2^12 will have more unused bits +which should also be zeroed. + +Unallocated L2 tables and data clusters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Reads to an unallocated area of the image file access the backing file. If there +is no backing file, then zeroes are produced. The backing file may be smaller +than the image file and reads of unallocated areas beyond the end of the backing +file produce zeroes. + +Writes to an unallocated area cause a new data clusters to be allocated, and a new +L2 table if that is also unallocated. The new data cluster is populated with data +from the backing file (or zeroes if no backing file) and the data being written. + +Zero data clusters +~~~~~~~~~~~~~~~~~~ + +Zero data clusters are a space-efficient way of storing zeroed regions of the image. + +Reads to a zero data cluster produce zeroes. + +.. note:: + The difference between an unallocated and a zero data cluster is that zero data + clusters stop the reading of contents from the backing file. + +Writes to a zero data cluster cause a new data cluster to be allocated. The new +data cluster is populated with zeroes and the data being written. + +Logical offset translation +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Logical offsets are translated into cluster offsets as follows:: + + table_bits table_bits cluster_bits + <--------> <--------> <---------------> + +----------+----------+-----------------+ + | L1 index | L2 index | byte offset | + +----------+----------+-----------------+ + + Structure of a logical offset + + offset_mask = ~(cluster_size - 1) # mask for the image file byte offset + + def logical_to_cluster_offset(l1_index, l2_index, byte_offset): + l2_offset = l1_table[l1_index] + l2_table = load_table(l2_offset) + cluster_offset = l2_table[l2_index] & offset_mask + return cluster_offset + byte_offset + +Consistency checking +-------------------- + +This section is informational and included to provide background on the use +of the ``QED_F_NEED_CHECK features`` bit. + +The ``QED_F_NEED_CHECK`` bit is used to mark an image as dirty before starting +an operation that could leave the image in an inconsistent state if interrupted +by a crash or power failure. A dirty image must be checked on open because its +metadata may not be consistent. + +Consistency check includes the following invariants: + +- Each cluster is referenced once and only once. It is an inconsistency to have + a cluster referenced more than once by L1 or L2 tables. A cluster has been leaked + if it has no references. +- Offsets must be within the image file size and must be ``cluster_size`` aligned. +- Table offsets must at least ``table_size`` * ``cluster_size`` bytes from the end + of the image file so that there is space for the entire table. + +The consistency check process starts from ``l1_table_offset`` and scans all L2 tables. +After the check completes with no other errors besides leaks, the ``QED_F_NEED_CHECK`` +bit can be cleared and the image can be accessed. diff --git a/docs/interop/qed_spec.txt b/docs/interop/qed_spec.txt deleted file mode 100644 index 7982e05..0000000 --- a/docs/interop/qed_spec.txt +++ /dev/null @@ -1,138 +0,0 @@ -=Specification= - -The file format looks like this: - - +----------+----------+----------+-----+ - | cluster0 | cluster1 | cluster2 | ... | - +----------+----------+----------+-----+ - -The first cluster begins with the '''header'''. The header contains information about where regular clusters start; this allows the header to be extensible and store extra information about the image file. A regular cluster may be a '''data cluster''', an '''L2''', or an '''L1 table'''. L1 and L2 tables are composed of one or more contiguous clusters. - -Normally the file size will be a multiple of the cluster size. If the file size is not a multiple, extra information after the last cluster may not be preserved if data is written. Legitimate extra information should use space between the header and the first regular cluster. - -All fields are little-endian. - -==Header== - Header { - uint32_t magic; /* QED\0 */ - - uint32_t cluster_size; /* in bytes */ - uint32_t table_size; /* for L1 and L2 tables, in clusters */ - uint32_t header_size; /* in clusters */ - - uint64_t features; /* format feature bits */ - uint64_t compat_features; /* compat feature bits */ - uint64_t autoclear_features; /* self-resetting feature bits */ - - uint64_t l1_table_offset; /* in bytes */ - uint64_t image_size; /* total logical image size, in bytes */ - - /* if (features & QED_F_BACKING_FILE) */ - uint32_t backing_filename_offset; /* in bytes from start of header */ - uint32_t backing_filename_size; /* in bytes */ - } - -Field descriptions: -* ''cluster_size'' must be a power of 2 in range [2^12, 2^26]. -* ''table_size'' must be a power of 2 in range [1, 16]. -* ''header_size'' is the number of clusters used by the header and any additional information stored before regular clusters. -* ''features'', ''compat_features'', and ''autoclear_features'' are file format extension bitmaps. They work as follows: -** An image with unknown ''features'' bits enabled must not be opened. File format changes that are not backwards-compatible must use ''features'' bits. -** An image with unknown ''compat_features'' bits enabled can be opened safely. The unknown features are simply ignored and represent backwards-compatible changes to the file format. -** An image with unknown ''autoclear_features'' bits enable can be opened safely after clearing the unknown bits. This allows for backwards-compatible changes to the file format which degrade gracefully and can be re-enabled again by a new program later. -* ''l1_table_offset'' is the offset of the first byte of the L1 table in the image file and must be a multiple of ''cluster_size''. -* ''image_size'' is the block device size seen by the guest and must be a multiple of 512 bytes. -* ''backing_filename_offset'' and ''backing_filename_size'' describe a string in (byte offset, byte size) form. It is not NUL-terminated and has no alignment constraints. The string must be stored within the first ''header_size'' clusters. The backing filename may be an absolute path or relative to the image file. - -Feature bits: -* QED_F_BACKING_FILE = 0x01. The image uses a backing file. -* QED_F_NEED_CHECK = 0x02. The image needs a consistency check before use. -* QED_F_BACKING_FORMAT_NO_PROBE = 0x04. The backing file is a raw disk image and no file format autodetection should be attempted. This should be used to ensure that raw backing files are never detected as an image format if they happen to contain magic constants. - -There are currently no defined ''compat_features'' or ''autoclear_features'' bits. - -Fields predicated on a feature bit are only used when that feature is set. The fields always take up header space, regardless of whether or not the feature bit is set. - -==Tables== - -Tables provide the translation from logical offsets in the block device to cluster offsets in the file. - - #define TABLE_NOFFSETS (table_size * cluster_size / sizeof(uint64_t)) - - Table { - uint64_t offsets[TABLE_NOFFSETS]; - } - -The tables are organized as follows: - - +----------+ - | L1 table | - +----------+ - ,------' | '------. - +----------+ | +----------+ - | L2 table | ... | L2 table | - +----------+ +----------+ - ,------' | '------. - +----------+ | +----------+ - | Data | ... | Data | - +----------+ +----------+ - -A table is made up of one or more contiguous clusters. The table_size header field determines table size for an image file. For example, cluster_size=64 KB and table_size=4 results in 256 KB tables. - -The logical image size must be less than or equal to the maximum possible size of clusters rooted by the L1 table: - header.image_size <= TABLE_NOFFSETS * TABLE_NOFFSETS * header.cluster_size - -L1, L2, and data cluster offsets must be aligned to header.cluster_size. The following offsets have special meanings: - -===L2 table offsets=== -* 0 - unallocated. The L2 table is not yet allocated. - -===Data cluster offsets=== -* 0 - unallocated. The data cluster is not yet allocated. -* 1 - zero. The data cluster contents are all zeroes and no cluster is allocated. - -Future format extensions may wish to store per-offset information. The least significant 12 bits of an offset are reserved for this purpose and must be set to zero. Image files with cluster_size > 2^12 will have more unused bits which should also be zeroed. - -===Unallocated L2 tables and data clusters=== -Reads to an unallocated area of the image file access the backing file. If there is no backing file, then zeroes are produced. The backing file may be smaller than the image file and reads of unallocated areas beyond the end of the backing file produce zeroes. - -Writes to an unallocated area cause a new data clusters to be allocated, and a new L2 table if that is also unallocated. The new data cluster is populated with data from the backing file (or zeroes if no backing file) and the data being written. - -===Zero data clusters=== -Zero data clusters are a space-efficient way of storing zeroed regions of the image. - -Reads to a zero data cluster produce zeroes. Note that the difference between an unallocated and a zero data cluster is that zero data clusters stop the reading of contents from the backing file. - -Writes to a zero data cluster cause a new data cluster to be allocated. The new data cluster is populated with zeroes and the data being written. - -===Logical offset translation=== -Logical offsets are translated into cluster offsets as follows: - - table_bits table_bits cluster_bits - <--------> <--------> <---------------> - +----------+----------+-----------------+ - | L1 index | L2 index | byte offset | - +----------+----------+-----------------+ - - Structure of a logical offset - - offset_mask = ~(cluster_size - 1) # mask for the image file byte offset - - def logical_to_cluster_offset(l1_index, l2_index, byte_offset): - l2_offset = l1_table[l1_index] - l2_table = load_table(l2_offset) - cluster_offset = l2_table[l2_index] & offset_mask - return cluster_offset + byte_offset - -==Consistency checking== - -This section is informational and included to provide background on the use of the QED_F_NEED_CHECK ''features'' bit. - -The QED_F_NEED_CHECK bit is used to mark an image as dirty before starting an operation that could leave the image in an inconsistent state if interrupted by a crash or power failure. A dirty image must be checked on open because its metadata may not be consistent. - -Consistency check includes the following invariants: -# Each cluster is referenced once and only once. It is an inconsistency to have a cluster referenced more than once by L1 or L2 tables. A cluster has been leaked if it has no references. -# Offsets must be within the image file size and must be ''cluster_size'' aligned. -# Table offsets must at least ''table_size'' * ''cluster_size'' bytes from the end of the image file so that there is space for the entire table. - -The consistency check process starts by from ''l1_table_offset'' and scans all L2 tables. After the check completes with no other errors besides leaks, the QED_F_NEED_CHECK bit can be cleared and the image can be accessed. diff --git a/docs/qcow2-cache.txt b/docs/qcow2-cache.txt index 5f763aa..204a574 100644 --- a/docs/qcow2-cache.txt +++ b/docs/qcow2-cache.txt @@ -15,7 +15,7 @@ not a straightforward operation. This document attempts to give an overview of the L2 and refcount caches, and how to configure them. -Please refer to the docs/interop/qcow2.txt file for an in-depth +Please refer to the docs/interop/qcow2.rst file for an in-depth technical description of the qcow2 file format. diff --git a/docs/sphinx/qapi_domain.py b/docs/sphinx/qapi_domain.py index c94af57..ebc46a7 100644 --- a/docs/sphinx/qapi_domain.py +++ b/docs/sphinx/qapi_domain.py @@ -20,16 +20,6 @@ from typing import ( from docutils import nodes from docutils.parsers.rst import directives - -from compat import ( - CompatField, - CompatGroupedField, - CompatTypedField, - KeywordNode, - ParserFix, - Signature, - SpaceNode, -) from sphinx import addnodes from sphinx.directives import ObjectDescription from sphinx.domains import ( @@ -44,6 +34,16 @@ from sphinx.util import logging from sphinx.util.docutils import SphinxDirective from sphinx.util.nodes import make_id, make_refnode +from compat import ( + CompatField, + CompatGroupedField, + CompatTypedField, + KeywordNode, + ParserFix, + Signature, + SpaceNode, +) + if TYPE_CHECKING: from typing import ( @@ -56,7 +56,6 @@ if TYPE_CHECKING: ) from docutils.nodes import Element, Node - from sphinx.addnodes import desc_signature, pending_xref from sphinx.application import Sphinx from sphinx.builders import Builder @@ -168,6 +167,8 @@ class QAPIDescription(ParserFix): """ def handle_signature(self, sig: str, signode: desc_signature) -> Signature: + # pylint: disable=unused-argument + # Do nothing. The return value here is the "name" of the entity # being documented; for QAPI, this is the same as the # "signature", which is just a name. @@ -210,6 +211,8 @@ class QAPIDescription(ParserFix): def add_target_and_index( self, name: Signature, sig: str, signode: desc_signature ) -> None: + # pylint: disable=unused-argument + # name is the return value of handle_signature. # sig is the original, raw text argument to handle_signature. # For QAPI, these are identical, currently. diff --git a/docs/sphinx/qapidoc.py b/docs/sphinx/qapidoc.py index 661b2c4..8011ac9 100644 --- a/docs/sphinx/qapidoc.py +++ b/docs/sphinx/qapidoc.py @@ -27,6 +27,7 @@ https://www.sphinx-doc.org/en/master/development/index.html from __future__ import annotations + __version__ = "2.0" from contextlib import contextmanager @@ -56,8 +57,6 @@ from qapi.schema import ( QAPISchemaVisitor, ) from qapi.source import QAPISourceInfo - -from qapidoc_legacy import QAPISchemaGenRSTVisitor # type: ignore from sphinx import addnodes from sphinx.directives.code import CodeBlock from sphinx.errors import ExtensionError @@ -65,6 +64,8 @@ from sphinx.util import logging from sphinx.util.docutils import SphinxDirective, switch_source_input from sphinx.util.nodes import nested_parse_with_titles +from qapidoc_legacy import QAPISchemaGenRSTVisitor # type: ignore + if TYPE_CHECKING: from typing import ( diff --git a/docs/system/arm/aspeed.rst b/docs/system/arm/aspeed.rst index 58a8020..43d27d8 100644 --- a/docs/system/arm/aspeed.rst +++ b/docs/system/arm/aspeed.rst @@ -1,4 +1,4 @@ -Aspeed family boards (``ast2500-evb``, ``ast2600-evb``, ``ast2700-evb``, ``ast2700fc``, ``bletchley-bmc``, ``fuji-bmc``, ``fby35-bmc``, ``fp5280g2-bmc``, ``g220a-bmc``, ``palmetto-bmc``, ``qcom-dc-scm-v1-bmc``, ``qcom-firework-bmc``, ``quanta-q71l-bmc``, ``rainier-bmc``, ``romulus-bmc``, ``sonorapass-bmc``, ``supermicrox11-bmc``, ``supermicrox11spi-bmc``, ``tiogapass-bmc``, ``witherspoon-bmc``, ``yosemitev2-bmc``) +Aspeed family boards (``ast2500-evb``, ``ast2600-evb``, ``ast2700-evb``, ``bletchley-bmc``, ``fuji-bmc``, ``fby35-bmc``, ``fp5280g2-bmc``, ``g220a-bmc``, ``palmetto-bmc``, ``qcom-dc-scm-v1-bmc``, ``qcom-firework-bmc``, ``quanta-q71l-bmc``, ``rainier-bmc``, ``romulus-bmc``, ``sonorapass-bmc``, ``supermicrox11-bmc``, ``supermicrox11spi-bmc``, ``tiogapass-bmc``, ``witherspoon-bmc``, ``yosemitev2-bmc``) ================================================================================================================================================================================================================================================================================================================================================================================================================================= The QEMU Aspeed machines model BMCs of various OpenPOWER systems and diff --git a/docs/system/confidential-guest-support.rst b/docs/system/confidential-guest-support.rst index 0c490db..66129fb 100644 --- a/docs/system/confidential-guest-support.rst +++ b/docs/system/confidential-guest-support.rst @@ -38,6 +38,7 @@ Supported mechanisms Currently supported confidential guest mechanisms are: * AMD Secure Encrypted Virtualization (SEV) (see :doc:`i386/amd-memory-encryption`) +* Intel Trust Domain Extension (TDX) (see :doc:`i386/tdx`) * POWER Protected Execution Facility (PEF) (see :ref:`power-papr-protected-execution-facility-pef`) * s390x Protected Virtualization (PV) (see :doc:`s390x/protvirt`) diff --git a/docs/system/i386/tdx.rst b/docs/system/i386/tdx.rst new file mode 100644 index 0000000..8131750 --- /dev/null +++ b/docs/system/i386/tdx.rst @@ -0,0 +1,161 @@ +Intel Trusted Domain eXtension (TDX) +==================================== + +Intel Trusted Domain eXtensions (TDX) refers to an Intel technology that extends +Virtual Machine Extensions (VMX) and Multi-Key Total Memory Encryption (MKTME) +with a new kind of virtual machine guest called a Trust Domain (TD). A TD runs +in a CPU mode that is designed to protect the confidentiality of its memory +contents and its CPU state from any other software, including the hosting +Virtual Machine Monitor (VMM), unless explicitly shared by the TD itself. + +Prerequisites +------------- + +To run TD, the physical machine needs to have TDX module loaded and initialized +while KVM hypervisor has TDX support and has TDX enabled. If those requirements +are met, the ``KVM_CAP_VM_TYPES`` will report the support of ``KVM_X86_TDX_VM``. + +Trust Domain Virtual Firmware (TDVF) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Trust Domain Virtual Firmware (TDVF) is required to provide TD services to boot +TD Guest OS. TDVF needs to be copied to guest private memory and measured before +the TD boots. + +KVM vcpu ioctl ``KVM_TDX_INIT_MEM_REGION`` can be used to populate the TDVF +content into its private memory. + +Since TDX doesn't support readonly memslot, TDVF cannot be mapped as pflash +device and it actually works as RAM. "-bios" option is chosen to load TDVF. + +OVMF is the opensource firmware that implements the TDVF support. Thus the +command line to specify and load TDVF is ``-bios OVMF.fd`` + +Feature Configuration +--------------------- + +Unlike non-TDX VM, the CPU features (enumerated by CPU or MSR) of a TD are not +under full control of VMM. VMM can only configure part of features of a TD on +``KVM_TDX_INIT_VM`` command of VM scope ``MEMORY_ENCRYPT_OP`` ioctl. + +The configurable features have three types: + +- Attributes: + - PKS (bit 30) controls whether Supervisor Protection Keys is exposed to TD, + which determines related CPUID bit and CR4 bit; + - PERFMON (bit 63) controls whether PMU is exposed to TD. + +- XSAVE related features (XFAM): + XFAM is a 64b mask, which has the same format as XCR0 or IA32_XSS MSR. It + determines the set of extended features available for use by the guest TD. + +- CPUID features: + Only some bits of some CPUID leaves are directly configurable by VMM. + +What features can be configured is reported via TDX capabilities. + +TDX capabilities +~~~~~~~~~~~~~~~~ + +The VM scope ``MEMORY_ENCRYPT_OP`` ioctl provides command ``KVM_TDX_CAPABILITIES`` +to get the TDX capabilities from KVM. It returns a data structure of +``struct kvm_tdx_capabilities``, which tells the supported configuration of +attributes, XFAM and CPUIDs. + +TD attributes +~~~~~~~~~~~~~ + +QEMU supports configuring raw 64-bit TD attributes directly via "attributes" +property of "tdx-guest" object. Note, it's users' responsibility to provide a +valid value because some bits may not supported by current QEMU or KVM yet. + +QEMU also supports the configuration of individual attribute bits that are +supported by it, via properties of "tdx-guest" object. +E.g., "sept-ve-disable" (bit 28). + +MSR based features +~~~~~~~~~~~~~~~~~~ + +Current KVM doesn't support MSR based feature (e.g., MSR_IA32_ARCH_CAPABILITIES) +configuration for TDX, and it's a future work to enable it in QEMU when KVM adds +support of it. + +Feature check +~~~~~~~~~~~~~ + +QEMU checks if the final (CPU) features, determined by given cpu model and +explicit feature adjustment of "+featureA/-featureB", can be supported or not. +It can produce feature not supported warning like + + "warning: host doesn't support requested feature: CPUID.07H:EBX.intel-pt [bit 25]" + +It can also produce warning like + + "warning: TDX forcibly sets the feature: CPUID.80000007H:EDX.invtsc [bit 8]" + +if the fixed-1 feature is requested to be disabled explicitly. This is newly +added to QEMU for TDX because TDX has fixed-1 features that are forcibly enabled +by TDX module and VMM cannot disable them. + +Launching a TD (TDX VM) +----------------------- + +To launch a TD, the necessary command line options are tdx-guest object and +split kernel-irqchip, as below: + +.. parsed-literal:: + + |qemu_system_x86| \\ + -accel kvm \\ + -cpu host \\ + -object tdx-guest,id=tdx0 \\ + -machine ...,confidential-guest-support=tdx0 \\ + -bios OVMF.fd \\ + +Restrictions +------------ + + - kernel-irqchip must be split; + + This is set by default for TDX guest if kernel-irqchip is left on its default + 'auto' setting. + + - No readonly support for private memory; + + - No SMM support: SMM support requires manipulating the guest register states + which is not allowed; + +Debugging +--------- + +Bit 0 of TD attributes, is DEBUG bit, which decides if the TD runs in off-TD +debug mode. When in off-TD debug mode, TD's VCPU state and private memory are +accessible via given SEAMCALLs. This requires KVM to expose APIs to invoke those +SEAMCALLs and corresonponding QEMU change. + +It's targeted as future work. + +TD attestation +-------------- + +In TD guest, the attestation process is used to verify the TDX guest +trustworthiness to other entities before provisioning secrets to the guest. + +TD attestation is initiated first by calling TDG.MR.REPORT inside TD to get the +REPORT. Then the REPORT data needs to be converted into a remotely verifiable +Quote by SGX Quoting Enclave (QE). + +It's a future work in QEMU to add support of TD attestation since it lacks +support in current KVM. + +Live Migration +-------------- + +Future work. + +References +---------- + +- `TDX Homepage <https://www.intel.com/content/www/us/en/developer/articles/technical/intel-trust-domain-extensions.html>`__ + +- `SGX QE <https://github.com/intel/SGXDataCenterAttestationPrimitives/tree/master/QuoteGeneration>`__ diff --git a/docs/system/target-i386.rst b/docs/system/target-i386.rst index ab7af1a..43b09c7 100644 --- a/docs/system/target-i386.rst +++ b/docs/system/target-i386.rst @@ -31,6 +31,7 @@ Architectural features i386/kvm-pv i386/sgx i386/amd-memory-encryption + i386/tdx OS requirements ~~~~~~~~~~~~~~~ diff --git a/gdb-xml/aarch64-core.xml b/gdb-xml/aarch64-core.xml index e1e9dc3..b804651 100644 --- a/gdb-xml/aarch64-core.xml +++ b/gdb-xml/aarch64-core.xml @@ -1,5 +1,5 @@ <?xml version="1.0"?> -<!-- Copyright (C) 2009-2012 Free Software Foundation, Inc. +<!-- Copyright (C) 2009-2025 Free Software Foundation, Inc. Contributed by ARM Ltd. Copying and distribution of this file, with or without modification, @@ -42,5 +42,53 @@ <reg name="sp" bitsize="64" type="data_ptr"/> <reg name="pc" bitsize="64" type="code_ptr"/> - <reg name="cpsr" bitsize="32"/> + + <flags id="cpsr_flags" size="4"> + <!-- Stack Pointer. --> + <field name="SP" start="0" end="0"/> + + <!-- Exception Level. --> + <field name="EL" start="2" end="3"/> + <!-- Execution state. --> + <field name="nRW" start="4" end="4"/> + + <!-- FIQ interrupt mask. --> + <field name="F" start="6" end="6"/> + <!-- IRQ interrupt mask. --> + <field name="I" start="7" end="7"/> + <!-- SError interrupt mask. --> + <field name="A" start="8" end="8"/> + <!-- Debug exception mask. --> + <field name="D" start="9" end="9"/> + + <!-- ARMv8.5-A: Branch Target Identification BTYPE. --> + <field name="BTYPE" start="10" end="11"/> + + <!-- ARMv8.0-A: Speculative Store Bypass. --> + <field name="SSBS" start="12" end="12"/> + + <!-- Illegal Execution state. --> + <field name="IL" start="20" end="20"/> + <!-- Software Step. --> + <field name="SS" start="21" end="21"/> + <!-- ARMv8.1-A: Privileged Access Never. --> + <field name="PAN" start="22" end="22"/> + <!-- ARMv8.2-A: User Access Override. --> + <field name="UAO" start="23" end="23"/> + <!-- ARMv8.4-A: Data Independent Timing. --> + <field name="DIT" start="24" end="24"/> + <!-- ARMv8.5-A: Tag Check Override. --> + <field name="TCO" start="25" end="25"/> + + <!-- Overflow Condition flag. --> + <field name="V" start="28" end="28"/> + <!-- Carry Condition flag. --> + <field name="C" start="29" end="29"/> + <!-- Zero Condition flag. --> + <field name="Z" start="30" end="30"/> + <!-- Negative Condition flag. --> + <field name="N" start="31" end="31"/> + </flags> + <reg name="cpsr" bitsize="32" type="cpsr_flags"/> + </feature> diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c index 565f6b3..def0b7e 100644 --- a/gdbstub/gdbstub.c +++ b/gdbstub/gdbstub.c @@ -28,6 +28,7 @@ #include "qemu/cutils.h" #include "qemu/module.h" #include "qemu/error-report.h" +#include "qemu/target-info.h" #include "trace.h" #include "exec/gdbstub.h" #include "gdbstub/commands.h" @@ -1343,8 +1344,8 @@ static void handle_read_all_regs(GArray *params, void *user_ctx) len += gdb_read_register(gdbserver_state.g_cpu, gdbserver_state.mem_buf, reg_id); + g_assert(len == gdbserver_state.mem_buf->len); } - g_assert(len == gdbserver_state.mem_buf->len); gdb_memtohex(gdbserver_state.str_buf, gdbserver_state.mem_buf->data, len); gdb_put_strbuf(); @@ -1597,6 +1598,18 @@ static void handle_query_threads(GArray *params, void *user_ctx) gdbserver_state.query_cpu = gdb_next_attached_cpu(gdbserver_state.query_cpu); } +static void handle_query_gdb_server_version(GArray *params, void *user_ctx) +{ +#if defined(CONFIG_USER_ONLY) + g_string_printf(gdbserver_state.str_buf, "name:qemu-%s;version:%s;", + target_name(), QEMU_VERSION); +#else + g_string_printf(gdbserver_state.str_buf, "name:qemu-system-%s;version:%s;", + target_name(), QEMU_VERSION); +#endif + gdb_put_strbuf(); +} + static void handle_query_first_threads(GArray *params, void *user_ctx) { gdbserver_state.query_cpu = gdb_first_attached_cpu(); @@ -1843,6 +1856,10 @@ static const GdbCmdParseEntry gdb_gen_query_table[] = { .cmd = "sThreadInfo", }, { + .handler = handle_query_gdb_server_version, + .cmd = "GDBServerVersion", + }, + { .handler = handle_query_first_threads, .cmd = "fThreadInfo", }, diff --git a/gdbstub/meson.build b/gdbstub/meson.build index b25db86..15c666f 100644 --- a/gdbstub/meson.build +++ b/gdbstub/meson.build @@ -5,13 +5,13 @@ # # We build two versions of gdbstub, one for each mode -libuser_ss.add(files( +user_ss.add(files( 'gdbstub.c', 'syscalls.c', 'user.c' )) -libsystem_ss.add(files( +system_ss.add(files( 'gdbstub.c', 'syscalls.c', 'system.c' diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx index c59cd66..639a450 100644 --- a/hmp-commands-info.hx +++ b/hmp-commands-info.hx @@ -475,9 +475,9 @@ ERST { .name = "migrate", - .args_type = "", - .params = "", - .help = "show migration status", + .args_type = "all:-a", + .params = "[-a]", + .help = "show migration status (-a: all, dump all status)", .cmd = hmp_info_migrate, }, diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig index a55b44d..f543d94 100644 --- a/hw/arm/Kconfig +++ b/hw/arm/Kconfig @@ -147,7 +147,6 @@ config OMAP bool select FRAMEBUFFER select I2C - select NAND select PFLASH_CFI01 select SD select SERIAL_MM diff --git a/hw/arm/aspeed_ast27x0-fc.c b/hw/arm/aspeed_ast27x0-fc.c index 125a3ad..7087be4 100644 --- a/hw/arm/aspeed_ast27x0-fc.c +++ b/hw/arm/aspeed_ast27x0-fc.c @@ -48,7 +48,7 @@ struct Ast2700FCState { bool mmio_exec; }; -#define AST2700FC_BMC_RAM_SIZE (2 * GiB) +#define AST2700FC_BMC_RAM_SIZE (1 * GiB) #define AST2700FC_CM4_DRAM_SIZE (32 * MiB) #define AST2700FC_HW_STRAP1 0x000000C0 @@ -68,6 +68,7 @@ static void ast2700fc_ca35_init(MachineState *machine) memory_region_init(&s->ca35_memory, OBJECT(&s->ca35), "ca35-memory", UINT64_MAX); + memory_region_add_subregion(get_system_memory(), 0, &s->ca35_memory); if (!memory_region_init_ram(&s->ca35_dram, OBJECT(&s->ca35), "ca35-dram", AST2700FC_BMC_RAM_SIZE, &error_abort)) { @@ -86,6 +87,13 @@ static void ast2700fc_ca35_init(MachineState *machine) AST2700FC_BMC_RAM_SIZE, &error_abort)) { return; } + + for (int i = 0; i < sc->macs_num; i++) { + if (!qemu_configure_nic_device(DEVICE(&soc->ftgmac100[i]), + true, NULL)) { + break; + } + } if (!object_property_set_int(OBJECT(&s->ca35), "hw-strap1", AST2700FC_HW_STRAP1, &error_abort)) { return; diff --git a/hw/arm/aspeed_ast27x0.c b/hw/arm/aspeed_ast27x0.c index 1974a25..6aa3841 100644 --- a/hw/arm/aspeed_ast27x0.c +++ b/hw/arm/aspeed_ast27x0.c @@ -23,14 +23,14 @@ #include "qobject/qlist.h" #include "qemu/log.h" -#define AST2700_SOC_IO_SIZE 0x01000000 +#define AST2700_SOC_IO_SIZE 0x00FE0000 #define AST2700_SOC_IOMEM_SIZE 0x01000000 #define AST2700_SOC_DPMCU_SIZE 0x00040000 #define AST2700_SOC_LTPI_SIZE 0x01000000 static const hwaddr aspeed_soc_ast2700_memmap[] = { - [ASPEED_DEV_IOMEM] = 0x00000000, [ASPEED_DEV_VBOOTROM] = 0x00000000, + [ASPEED_DEV_IOMEM] = 0x00020000, [ASPEED_DEV_SRAM] = 0x10000000, [ASPEED_DEV_DPMCU] = 0x11000000, [ASPEED_DEV_IOMEM0] = 0x12000000, @@ -346,8 +346,9 @@ static void aspeed_ram_capacity_write(void *opaque, hwaddr addr, uint64_t data, * If writes the data to the address which is beyond the ram size, * it would write the data to the "address % ram_size". */ - result = address_space_write(&s->dram_as, addr % ram_size, - MEMTXATTRS_UNSPECIFIED, &data, 4); + address_space_stl_le(&s->dram_as, addr % ram_size, data, + MEMTXATTRS_UNSPECIFIED, &result); + if (result != MEMTX_OK) { qemu_log_mask(LOG_GUEST_ERROR, "%s: DRAM write failed, addr:0x%" HWADDR_PRIx @@ -360,9 +361,10 @@ static const MemoryRegionOps aspeed_ram_capacity_ops = { .read = aspeed_ram_capacity_read, .write = aspeed_ram_capacity_write, .endianness = DEVICE_LITTLE_ENDIAN, + .impl.min_access_size = 4, .valid = { - .min_access_size = 1, - .max_access_size = 8, + .min_access_size = 4, + .max_access_size = 4, }, }; diff --git a/hw/arm/boot.c b/hw/arm/boot.c index f94b940..becd827 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -19,6 +19,7 @@ #include "system/kvm.h" #include "system/tcg.h" #include "system/system.h" +#include "system/memory.h" #include "system/numa.h" #include "hw/boards.h" #include "system/reset.h" @@ -526,7 +527,7 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, if (binfo->dtb_filename) { char *filename; - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, binfo->dtb_filename); + filename = qemu_find_file(QEMU_FILE_TYPE_DTB, binfo->dtb_filename); if (!filename) { fprintf(stderr, "Couldn't open dtb file %s\n", binfo->dtb_filename); goto fail; @@ -743,7 +744,7 @@ static void do_cpu_reset(void *opaque) } else { if (arm_feature(env, ARM_FEATURE_EL3) && (info->secure_boot || - (info->secure_board_setup && cs == first_cpu))) { + (info->secure_board_setup && cpu == info->primary_cpu))) { /* Start this CPU in Secure SVC */ target_el = 3; } @@ -751,7 +752,7 @@ static void do_cpu_reset(void *opaque) arm_emulate_firmware_reset(cs, target_el); - if (cs == first_cpu) { + if (cpu == info->primary_cpu) { AddressSpace *as = arm_boot_address_space(cpu, info); cpu_set_pc(cs, info->loader_start); @@ -1238,6 +1239,9 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info) info->dtb_filename = ms->dtb; info->dtb_limit = 0; + /* We assume the CPU passed as argument is the primary CPU. */ + info->primary_cpu = cpu; + /* Load the kernel. */ if (!info->kernel_filename || info->firmware_loaded) { arm_setup_firmware_boot(cpu, info); @@ -1287,12 +1291,8 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info) object_property_set_int(cpuobj, "psci-conduit", info->psci_conduit, &error_abort); - /* - * Secondary CPUs start in PSCI powered-down state. Like the - * code in do_cpu_reset(), we assume first_cpu is the primary - * CPU. - */ - if (cs != first_cpu) { + /* Secondary CPUs start in PSCI powered-down state. */ + if (ARM_CPU(cs) != info->primary_cpu) { object_property_set_bool(cpuobj, "start-powered-off", true, &error_abort); } diff --git a/hw/arm/fby35.c b/hw/arm/fby35.c index e123fa6..c14fc2e 100644 --- a/hw/arm/fby35.c +++ b/hw/arm/fby35.c @@ -77,6 +77,7 @@ static void fby35_bmc_init(Fby35State *s) memory_region_init(&s->bmc_memory, OBJECT(&s->bmc), "bmc-memory", UINT64_MAX); + memory_region_add_subregion(get_system_memory(), 0, &s->bmc_memory); memory_region_init_ram(&s->bmc_dram, OBJECT(&s->bmc), "bmc-dram", FBY35_BMC_RAM_SIZE, &error_abort); diff --git a/hw/arm/meson.build b/hw/arm/meson.build index 5098795..d90be8f 100644 --- a/hw/arm/meson.build +++ b/hw/arm/meson.build @@ -8,7 +8,7 @@ arm_common_ss.add(when: 'CONFIG_HIGHBANK', if_true: files('highbank.c')) arm_common_ss.add(when: 'CONFIG_INTEGRATOR', if_true: files('integratorcp.c')) arm_common_ss.add(when: 'CONFIG_MICROBIT', if_true: files('microbit.c')) arm_common_ss.add(when: 'CONFIG_MPS3R', if_true: files('mps3r.c')) -arm_common_ss.add(when: 'CONFIG_MUSICPAL', if_true: [pixman, files('musicpal.c')]) +arm_common_ss.add(when: 'CONFIG_MUSICPAL', if_true: [files('musicpal.c')]) arm_common_ss.add(when: 'CONFIG_NETDUINOPLUS2', if_true: files('netduinoplus2.c')) arm_common_ss.add(when: 'CONFIG_OLIMEX_STM32_H405', if_true: files('olimex-stm32-h405.c')) arm_common_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx.c', 'npcm7xx_boards.c')) @@ -79,7 +79,7 @@ arm_common_ss.add(when: 'CONFIG_SX1', if_true: files('omap_sx1.c')) arm_common_ss.add(when: 'CONFIG_VERSATILE', if_true: files('versatilepb.c')) arm_common_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c')) -arm_common_ss.add(fdt, files('boot.c')) +arm_common_ss.add(files('boot.c')) hw_arch += {'arm': arm_ss} hw_common_arch += {'arm': arm_common_ss} diff --git a/hw/arm/mps2.c b/hw/arm/mps2.c index 58efb41..bd378e3 100644 --- a/hw/arm/mps2.c +++ b/hw/arm/mps2.c @@ -224,7 +224,11 @@ static void mps2_common_init(MachineState *machine) switch (mmc->fpga_type) { case FPGA_AN385: case FPGA_AN386: + qdev_prop_set_uint32(armv7m, "num-irq", 32); + break; case FPGA_AN500: + /* The AN500 configures its Cortex-M7 with 16 MPU regions */ + qdev_prop_set_uint32(armv7m, "mpu-ns-regions", 16); qdev_prop_set_uint32(armv7m, "num-irq", 32); break; case FPGA_AN511: diff --git a/hw/arm/npcm8xx.c b/hw/arm/npcm8xx.c index d7ee306..a276fea 100644 --- a/hw/arm/npcm8xx.c +++ b/hw/arm/npcm8xx.c @@ -67,6 +67,9 @@ /* SDHCI Modules */ #define NPCM8XX_MMC_BA 0xf0842000 +/* PCS Module */ +#define NPCM8XX_PCS_BA 0xf0780000 + /* PSPI Modules */ #define NPCM8XX_PSPI_BA 0xf0201000 @@ -85,6 +88,10 @@ enum NPCM8xxInterrupt { NPCM8XX_ADC_IRQ = 0, NPCM8XX_PECI_IRQ = 6, NPCM8XX_KCS_HIB_IRQ = 9, + NPCM8XX_GMAC1_IRQ = 14, + NPCM8XX_GMAC2_IRQ, + NPCM8XX_GMAC3_IRQ, + NPCM8XX_GMAC4_IRQ, NPCM8XX_MMC_IRQ = 26, NPCM8XX_PSPI_IRQ = 28, NPCM8XX_TIMER0_IRQ = 32, /* Timer Module 0 */ @@ -260,6 +267,14 @@ static const hwaddr npcm8xx_smbus_addr[] = { 0xfff0a000, }; +/* Register base address for each GMAC Module */ +static const hwaddr npcm8xx_gmac_addr[] = { + 0xf0802000, + 0xf0804000, + 0xf0806000, + 0xf0808000, +}; + /* Register base address for each USB host EHCI registers */ static const hwaddr npcm8xx_ehci_addr[] = { 0xf0828100, @@ -350,6 +365,7 @@ static struct arm_boot_info npcm8xx_binfo = { .secure_boot = false, .board_id = -1, .board_setup_addr = NPCM8XX_BOARD_SETUP_ADDR, + .psci_conduit = QEMU_PSCI_CONDUIT_SMC, }; void npcm8xx_load_kernel(MachineState *machine, NPCM8xxState *soc) @@ -444,6 +460,11 @@ static void npcm8xx_init(Object *obj) object_initialize_child(obj, "mft[*]", &s->mft[i], TYPE_NPCM7XX_MFT); } + for (i = 0; i < ARRAY_SIZE(s->gmac); i++) { + object_initialize_child(obj, "gmac[*]", &s->gmac[i], TYPE_NPCM_GMAC); + } + object_initialize_child(obj, "pcs", &s->pcs, TYPE_NPCM_PCS); + object_initialize_child(obj, "mmc", &s->mmc, TYPE_NPCM7XX_SDHCI); object_initialize_child(obj, "pspi", &s->pspi, TYPE_NPCM_PSPI); } @@ -669,6 +690,35 @@ static void npcm8xx_realize(DeviceState *dev, Error **errp) } /* + * GMAC Modules. Cannot fail. + */ + QEMU_BUILD_BUG_ON(ARRAY_SIZE(npcm8xx_gmac_addr) != ARRAY_SIZE(s->gmac)); + for (i = 0; i < ARRAY_SIZE(s->gmac); i++) { + SysBusDevice *sbd = SYS_BUS_DEVICE(&s->gmac[i]); + + /* This is used to make sure that the NIC can create the device */ + qemu_configure_nic_device(DEVICE(sbd), false, NULL); + + /* + * The device exists regardless of whether it's connected to a QEMU + * netdev backend. So always instantiate it even if there is no + * backend. + */ + sysbus_realize(sbd, &error_abort); + sysbus_mmio_map(sbd, 0, npcm8xx_gmac_addr[i]); + /* + * N.B. The values for the second argument sysbus_connect_irq are + * chosen to match the registration order in npcm7xx_emc_realize. + */ + sysbus_connect_irq(sbd, 0, npcm8xx_irq(s, NPCM8XX_GMAC1_IRQ + i)); + } + /* + * GMAC Physical Coding Sublayer(PCS) Module. Cannot fail. + */ + sysbus_realize(SYS_BUS_DEVICE(&s->pcs), &error_abort); + sysbus_mmio_map(SYS_BUS_DEVICE(&s->pcs), 0, NPCM8XX_PCS_BA); + + /* * Flash Interface Unit (FIU). Can fail if incorrect number of chip selects * specified, but this is a programming error. */ @@ -741,12 +791,7 @@ static void npcm8xx_realize(DeviceState *dev, Error **errp) create_unimplemented_device("npcm8xx.ahbpci", 0xf0400000, 1 * MiB); create_unimplemented_device("npcm8xx.dap", 0xf0500000, 960 * KiB); create_unimplemented_device("npcm8xx.mcphy", 0xf05f0000, 64 * KiB); - create_unimplemented_device("npcm8xx.pcs", 0xf0780000, 256 * KiB); create_unimplemented_device("npcm8xx.tsgen", 0xf07fc000, 8 * KiB); - create_unimplemented_device("npcm8xx.gmac1", 0xf0802000, 8 * KiB); - create_unimplemented_device("npcm8xx.gmac2", 0xf0804000, 8 * KiB); - create_unimplemented_device("npcm8xx.gmac3", 0xf0806000, 8 * KiB); - create_unimplemented_device("npcm8xx.gmac4", 0xf0808000, 8 * KiB); create_unimplemented_device("npcm8xx.copctl", 0xf080c000, 4 * KiB); create_unimplemented_device("npcm8xx.tipctl", 0xf080d000, 4 * KiB); create_unimplemented_device("npcm8xx.rst", 0xf080e000, 4 * KiB); diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 9a6cd08..99fde58 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -1487,9 +1487,12 @@ static void create_virtio_iommu_dt_bindings(VirtMachineState *vms) qemu_fdt_setprop_cell(ms->fdt, node, "phandle", vms->iommu_phandle); g_free(node); - qemu_fdt_setprop_cells(ms->fdt, vms->pciehb_nodename, "iommu-map", - 0x0, vms->iommu_phandle, 0x0, bdf, - bdf + 1, vms->iommu_phandle, bdf + 1, 0xffff - bdf); + if (!vms->default_bus_bypass_iommu) { + qemu_fdt_setprop_cells(ms->fdt, vms->pciehb_nodename, "iommu-map", + 0x0, vms->iommu_phandle, 0x0, bdf, + bdf + 1, vms->iommu_phandle, bdf + 1, + 0xffff - bdf); + } } static void create_pcie(VirtMachineState *vms) @@ -1612,8 +1615,10 @@ static void create_pcie(VirtMachineState *vms) switch (vms->iommu) { case VIRT_IOMMU_SMMUV3: create_smmu(vms, vms->bus); - qemu_fdt_setprop_cells(ms->fdt, nodename, "iommu-map", - 0x0, vms->iommu_phandle, 0x0, 0x10000); + if (!vms->default_bus_bypass_iommu) { + qemu_fdt_setprop_cells(ms->fdt, nodename, "iommu-map", + 0x0, vms->iommu_phandle, 0x0, 0x10000); + } break; default: g_assert_not_reached(); diff --git a/hw/audio/ac97.c b/hw/audio/ac97.c index 669a046..eb7a847 100644 --- a/hw/audio/ac97.c +++ b/hw/audio/ac97.c @@ -886,7 +886,7 @@ static void nabm_writel(void *opaque, uint32_t addr, uint32_t val) static int write_audio(AC97LinkState *s, AC97BusMasterRegs *r, int max, int *stop) { - uint8_t tmpbuf[4096]; + QEMU_UNINITIALIZED uint8_t tmpbuf[4096]; uint32_t addr = r->bd.addr; uint32_t temp = r->picb << 1; uint32_t written = 0; @@ -959,7 +959,7 @@ static void write_bup(AC97LinkState *s, int elapsed) static int read_audio(AC97LinkState *s, AC97BusMasterRegs *r, int max, int *stop) { - uint8_t tmpbuf[4096]; + QEMU_UNINITIALIZED uint8_t tmpbuf[4096]; uint32_t addr = r->bd.addr; uint32_t temp = r->picb << 1; uint32_t nread = 0; diff --git a/hw/audio/asc.c b/hw/audio/asc.c index 18382cc..edd42d6 100644 --- a/hw/audio/asc.c +++ b/hw/audio/asc.c @@ -12,6 +12,7 @@ #include "qemu/osdep.h" #include "qemu/timer.h" +#include "qapi/error.h" #include "hw/sysbus.h" #include "hw/irq.h" #include "audio/audio.h" @@ -653,11 +654,17 @@ static void asc_realize(DeviceState *dev, Error **errp) s->voice = AUD_open_out(&s->card, s->voice, "asc.out", s, asc_out_cb, &as); + if (!s->voice) { + AUD_remove_card(&s->card); + error_setg(errp, "Initializing audio stream failed"); + return; + } + s->shift = 1; s->samples = AUD_get_buffer_size_out(s->voice) >> s->shift; s->mixbuf = g_malloc0(s->samples << s->shift); - s->silentbuf = g_malloc0(s->samples << s->shift); + s->silentbuf = g_malloc(s->samples << s->shift); memset(s->silentbuf, 0x80, s->samples << s->shift); /* Add easc registers if required */ diff --git a/hw/audio/cs4231a.c b/hw/audio/cs4231a.c index eb9a458..6dfff20 100644 --- a/hw/audio/cs4231a.c +++ b/hw/audio/cs4231a.c @@ -528,7 +528,7 @@ static int cs_write_audio (CSState *s, int nchan, int dma_pos, int dma_len, int len) { int temp, net; - uint8_t tmpbuf[4096]; + QEMU_UNINITIALIZED uint8_t tmpbuf[4096]; IsaDmaClass *k = ISADMA_GET_CLASS(s->isa_dma); temp = len; @@ -547,7 +547,7 @@ static int cs_write_audio (CSState *s, int nchan, int dma_pos, copied = k->read_memory(s->isa_dma, nchan, tmpbuf, dma_pos, to_copy); if (s->tab) { int i; - int16_t linbuf[4096]; + QEMU_UNINITIALIZED int16_t linbuf[4096]; for (i = 0; i < copied; ++i) linbuf[i] = s->tab[tmpbuf[i]]; diff --git a/hw/audio/es1370.c b/hw/audio/es1370.c index 8efb969..a6a32a6 100644 --- a/hw/audio/es1370.c +++ b/hw/audio/es1370.c @@ -604,7 +604,7 @@ static uint64_t es1370_read(void *opaque, hwaddr addr, unsigned size) static void es1370_transfer_audio (ES1370State *s, struct chan *d, int loop_sel, int max, bool *irq) { - uint8_t tmpbuf[4096]; + QEMU_UNINITIALIZED uint8_t tmpbuf[4096]; size_t to_transfer; uint32_t addr = d->frame_addr; int sc = d->scount & 0xffff; diff --git a/hw/audio/gus.c b/hw/audio/gus.c index 87e8634..c36df02 100644 --- a/hw/audio/gus.c +++ b/hw/audio/gus.c @@ -183,7 +183,7 @@ static int GUS_read_DMA (void *opaque, int nchan, int dma_pos, int dma_len) { GUSState *s = opaque; IsaDmaClass *k = ISADMA_GET_CLASS(s->isa_dma); - char tmpbuf[4096]; + QEMU_UNINITIALIZED char tmpbuf[4096]; int pos = dma_pos, mode, left = dma_len - dma_pos; ldebug ("read DMA %#x %d\n", dma_pos, dma_len); diff --git a/hw/audio/marvell_88w8618.c b/hw/audio/marvell_88w8618.c index 6d3ebbb..c5c79d0 100644 --- a/hw/audio/marvell_88w8618.c +++ b/hw/audio/marvell_88w8618.c @@ -66,7 +66,7 @@ static void mv88w8618_audio_callback(void *opaque, int free_out, int free_in) { mv88w8618_audio_state *s = opaque; int16_t *codec_buffer; - int8_t buf[4096]; + QEMU_UNINITIALIZED int8_t buf[4096]; int8_t *mem_buffer; int pos, block_size; diff --git a/hw/audio/sb16.c b/hw/audio/sb16.c index 19fd3b9..bac6411 100644 --- a/hw/audio/sb16.c +++ b/hw/audio/sb16.c @@ -1181,7 +1181,7 @@ static int write_audio (SB16State *s, int nchan, int dma_pos, IsaDma *isa_dma = nchan == s->dma ? s->isa_dma : s->isa_hdma; IsaDmaClass *k = ISADMA_GET_CLASS(isa_dma); int temp, net; - uint8_t tmpbuf[4096]; + QEMU_UNINITIALIZED uint8_t tmpbuf[4096]; temp = len; net = 0; diff --git a/hw/audio/via-ac97.c b/hw/audio/via-ac97.c index 1e0a5c7..d5231e1 100644 --- a/hw/audio/via-ac97.c +++ b/hw/audio/via-ac97.c @@ -175,7 +175,7 @@ static void out_cb(void *opaque, int avail) ViaAC97SGDChannel *c = &s->aur; int temp, to_copy, copied; bool stop = false; - uint8_t tmpbuf[4096]; + QEMU_UNINITIALIZED uint8_t tmpbuf[4096]; if (c->stat & STAT_PAUSED) { return; diff --git a/hw/block/Kconfig b/hw/block/Kconfig index a898e04..737dbcd 100644 --- a/hw/block/Kconfig +++ b/hw/block/Kconfig @@ -13,9 +13,6 @@ config FDC_SYSBUS config SSI_M25P80 bool -config NAND - bool - config PFLASH_CFI01 bool diff --git a/hw/block/meson.build b/hw/block/meson.build index 16a51bf..6557044 100644 --- a/hw/block/meson.build +++ b/hw/block/meson.build @@ -6,7 +6,6 @@ system_ss.add(files( system_ss.add(when: 'CONFIG_FDC', if_true: files('fdc.c')) system_ss.add(when: 'CONFIG_FDC_ISA', if_true: files('fdc-isa.c')) system_ss.add(when: 'CONFIG_FDC_SYSBUS', if_true: files('fdc-sysbus.c')) -system_ss.add(when: 'CONFIG_NAND', if_true: files('nand.c')) system_ss.add(when: 'CONFIG_PFLASH_CFI01', if_true: files('pflash_cfi01.c')) system_ss.add(when: 'CONFIG_PFLASH_CFI02', if_true: files('pflash_cfi02.c')) system_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c')) diff --git a/hw/block/nand.c b/hw/block/nand.c deleted file mode 100644 index c80bf78..0000000 --- a/hw/block/nand.c +++ /dev/null @@ -1,835 +0,0 @@ -/* - * Flash NAND memory emulation. Based on "16M x 8 Bit NAND Flash - * Memory" datasheet for the KM29U128AT / K9F2808U0A chips from - * Samsung Electronic. - * - * Copyright (c) 2006 Openedhand Ltd. - * Written by Andrzej Zaborowski <balrog@zabor.org> - * - * Support for additional features based on "MT29F2G16ABCWP 2Gx16" - * datasheet from Micron Technology and "NAND02G-B2C" datasheet - * from ST Microelectronics. - * - * This code is licensed under the GNU GPL v2. - * - * Contributions after 2012-01-13 are licensed under the terms of the - * GNU GPL, version 2 or (at your option) any later version. - */ - -#ifndef NAND_IO - -#include "qemu/osdep.h" -#include "hw/hw.h" -#include "hw/qdev-properties.h" -#include "hw/qdev-properties-system.h" -#include "hw/block/flash.h" -#include "system/block-backend.h" -#include "migration/vmstate.h" -#include "qapi/error.h" -#include "qemu/error-report.h" -#include "qemu/module.h" -#include "qom/object.h" - -# define NAND_CMD_READ0 0x00 -# define NAND_CMD_READ1 0x01 -# define NAND_CMD_READ2 0x50 -# define NAND_CMD_LPREAD2 0x30 -# define NAND_CMD_NOSERIALREAD2 0x35 -# define NAND_CMD_RANDOMREAD1 0x05 -# define NAND_CMD_RANDOMREAD2 0xe0 -# define NAND_CMD_READID 0x90 -# define NAND_CMD_RESET 0xff -# define NAND_CMD_PAGEPROGRAM1 0x80 -# define NAND_CMD_PAGEPROGRAM2 0x10 -# define NAND_CMD_CACHEPROGRAM2 0x15 -# define NAND_CMD_BLOCKERASE1 0x60 -# define NAND_CMD_BLOCKERASE2 0xd0 -# define NAND_CMD_READSTATUS 0x70 -# define NAND_CMD_COPYBACKPRG1 0x85 - -# define NAND_IOSTATUS_ERROR (1 << 0) -# define NAND_IOSTATUS_PLANE0 (1 << 1) -# define NAND_IOSTATUS_PLANE1 (1 << 2) -# define NAND_IOSTATUS_PLANE2 (1 << 3) -# define NAND_IOSTATUS_PLANE3 (1 << 4) -# define NAND_IOSTATUS_READY (1 << 6) -# define NAND_IOSTATUS_UNPROTCT (1 << 7) - -# define MAX_PAGE 0x800 -# define MAX_OOB 0x40 - -typedef struct NANDFlashState NANDFlashState; -struct NANDFlashState { - DeviceState parent_obj; - - uint8_t manf_id, chip_id; - uint8_t buswidth; /* in BYTES */ - int size, pages; - int page_shift, oob_shift, erase_shift, addr_shift; - uint8_t *storage; - BlockBackend *blk; - int mem_oob; - - uint8_t cle, ale, ce, wp, gnd; - - uint8_t io[MAX_PAGE + MAX_OOB + 0x400]; - uint8_t *ioaddr; - int iolen; - - uint32_t cmd; - uint64_t addr; - int addrlen; - int status; - int offset; - - void (*blk_write)(NANDFlashState *s); - void (*blk_erase)(NANDFlashState *s); - /* - * Returns %true when block containing (@addr + @offset) is - * successfully loaded, otherwise %false. - */ - bool (*blk_load)(NANDFlashState *s, uint64_t addr, unsigned offset); - - uint32_t ioaddr_vmstate; -}; - -#define TYPE_NAND "nand" - -OBJECT_DECLARE_SIMPLE_TYPE(NANDFlashState, NAND) - -static void mem_and(uint8_t *dest, const uint8_t *src, size_t n) -{ - /* Like memcpy() but we logical-AND the data into the destination */ - int i; - for (i = 0; i < n; i++) { - dest[i] &= src[i]; - } -} - -# define NAND_NO_AUTOINCR 0x00000001 -# define NAND_BUSWIDTH_16 0x00000002 -# define NAND_NO_PADDING 0x00000004 -# define NAND_CACHEPRG 0x00000008 -# define NAND_COPYBACK 0x00000010 -# define NAND_IS_AND 0x00000020 -# define NAND_4PAGE_ARRAY 0x00000040 -# define NAND_NO_READRDY 0x00000100 -# define NAND_SAMSUNG_LP (NAND_NO_PADDING | NAND_COPYBACK) - -# define NAND_IO - -# define PAGE(addr) ((addr) >> ADDR_SHIFT) -# define PAGE_START(page) (PAGE(page) * (NAND_PAGE_SIZE + OOB_SIZE)) -# define PAGE_MASK ((1 << ADDR_SHIFT) - 1) -# define OOB_SHIFT (PAGE_SHIFT - 5) -# define OOB_SIZE (1 << OOB_SHIFT) -# define SECTOR(addr) ((addr) >> (9 + ADDR_SHIFT - PAGE_SHIFT)) -# define SECTOR_OFFSET(addr) ((addr) & ((511 >> PAGE_SHIFT) << 8)) - -# define NAND_PAGE_SIZE 256 -# define PAGE_SHIFT 8 -# define PAGE_SECTORS 1 -# define ADDR_SHIFT 8 -# include "nand.c" -# define NAND_PAGE_SIZE 512 -# define PAGE_SHIFT 9 -# define PAGE_SECTORS 1 -# define ADDR_SHIFT 8 -# include "nand.c" -# define NAND_PAGE_SIZE 2048 -# define PAGE_SHIFT 11 -# define PAGE_SECTORS 4 -# define ADDR_SHIFT 16 -# include "nand.c" - -/* Information based on Linux drivers/mtd/nand/raw/nand_ids.c */ -static const struct { - int size; - int width; - int page_shift; - int erase_shift; - uint32_t options; -} nand_flash_ids[0x100] = { - [0 ... 0xff] = { 0 }, - - [0x6b] = { 4, 8, 9, 4, 0 }, - [0xe3] = { 4, 8, 9, 4, 0 }, - [0xe5] = { 4, 8, 9, 4, 0 }, - [0xd6] = { 8, 8, 9, 4, 0 }, - [0xe6] = { 8, 8, 9, 4, 0 }, - - [0x33] = { 16, 8, 9, 5, 0 }, - [0x73] = { 16, 8, 9, 5, 0 }, - [0x43] = { 16, 16, 9, 5, NAND_BUSWIDTH_16 }, - [0x53] = { 16, 16, 9, 5, NAND_BUSWIDTH_16 }, - - [0x35] = { 32, 8, 9, 5, 0 }, - [0x75] = { 32, 8, 9, 5, 0 }, - [0x45] = { 32, 16, 9, 5, NAND_BUSWIDTH_16 }, - [0x55] = { 32, 16, 9, 5, NAND_BUSWIDTH_16 }, - - [0x36] = { 64, 8, 9, 5, 0 }, - [0x76] = { 64, 8, 9, 5, 0 }, - [0x46] = { 64, 16, 9, 5, NAND_BUSWIDTH_16 }, - [0x56] = { 64, 16, 9, 5, NAND_BUSWIDTH_16 }, - - [0x78] = { 128, 8, 9, 5, 0 }, - [0x39] = { 128, 8, 9, 5, 0 }, - [0x79] = { 128, 8, 9, 5, 0 }, - [0x72] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 }, - [0x49] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 }, - [0x74] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 }, - [0x59] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 }, - - [0x71] = { 256, 8, 9, 5, 0 }, - - /* - * These are the new chips with large page size. The pagesize and the - * erasesize is determined from the extended id bytes - */ -# define LP_OPTIONS (NAND_SAMSUNG_LP | NAND_NO_READRDY | NAND_NO_AUTOINCR) -# define LP_OPTIONS16 (LP_OPTIONS | NAND_BUSWIDTH_16) - - /* 512 Megabit */ - [0xa2] = { 64, 8, 0, 0, LP_OPTIONS }, - [0xf2] = { 64, 8, 0, 0, LP_OPTIONS }, - [0xb2] = { 64, 16, 0, 0, LP_OPTIONS16 }, - [0xc2] = { 64, 16, 0, 0, LP_OPTIONS16 }, - - /* 1 Gigabit */ - [0xa1] = { 128, 8, 0, 0, LP_OPTIONS }, - [0xf1] = { 128, 8, 0, 0, LP_OPTIONS }, - [0xb1] = { 128, 16, 0, 0, LP_OPTIONS16 }, - [0xc1] = { 128, 16, 0, 0, LP_OPTIONS16 }, - - /* 2 Gigabit */ - [0xaa] = { 256, 8, 0, 0, LP_OPTIONS }, - [0xda] = { 256, 8, 0, 0, LP_OPTIONS }, - [0xba] = { 256, 16, 0, 0, LP_OPTIONS16 }, - [0xca] = { 256, 16, 0, 0, LP_OPTIONS16 }, - - /* 4 Gigabit */ - [0xac] = { 512, 8, 0, 0, LP_OPTIONS }, - [0xdc] = { 512, 8, 0, 0, LP_OPTIONS }, - [0xbc] = { 512, 16, 0, 0, LP_OPTIONS16 }, - [0xcc] = { 512, 16, 0, 0, LP_OPTIONS16 }, - - /* 8 Gigabit */ - [0xa3] = { 1024, 8, 0, 0, LP_OPTIONS }, - [0xd3] = { 1024, 8, 0, 0, LP_OPTIONS }, - [0xb3] = { 1024, 16, 0, 0, LP_OPTIONS16 }, - [0xc3] = { 1024, 16, 0, 0, LP_OPTIONS16 }, - - /* 16 Gigabit */ - [0xa5] = { 2048, 8, 0, 0, LP_OPTIONS }, - [0xd5] = { 2048, 8, 0, 0, LP_OPTIONS }, - [0xb5] = { 2048, 16, 0, 0, LP_OPTIONS16 }, - [0xc5] = { 2048, 16, 0, 0, LP_OPTIONS16 }, -}; - -static void nand_reset(DeviceState *dev) -{ - NANDFlashState *s = NAND(dev); - s->cmd = NAND_CMD_READ0; - s->addr = 0; - s->addrlen = 0; - s->iolen = 0; - s->offset = 0; - s->status &= NAND_IOSTATUS_UNPROTCT; - s->status |= NAND_IOSTATUS_READY; -} - -static inline void nand_pushio_byte(NANDFlashState *s, uint8_t value) -{ - s->ioaddr[s->iolen++] = value; - for (value = s->buswidth; --value;) { - s->ioaddr[s->iolen++] = 0; - } -} - -/* - * nand_load_block: Load block containing (s->addr + @offset). - * Returns length of data available at @offset in this block. - */ -static unsigned nand_load_block(NANDFlashState *s, unsigned offset) -{ - unsigned iolen; - - if (!s->blk_load(s, s->addr, offset)) { - return 0; - } - - iolen = (1 << s->page_shift); - if (s->gnd) { - iolen += 1 << s->oob_shift; - } - assert(offset <= iolen); - iolen -= offset; - - return iolen; -} - -static void nand_command(NANDFlashState *s) -{ - switch (s->cmd) { - case NAND_CMD_READ0: - s->iolen = 0; - break; - - case NAND_CMD_READID: - s->ioaddr = s->io; - s->iolen = 0; - nand_pushio_byte(s, s->manf_id); - nand_pushio_byte(s, s->chip_id); - nand_pushio_byte(s, 'Q'); /* Don't-care byte (often 0xa5) */ - if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) { - /* Page Size, Block Size, Spare Size; bit 6 indicates - * 8 vs 16 bit width NAND. - */ - nand_pushio_byte(s, (s->buswidth == 2) ? 0x55 : 0x15); - } else { - nand_pushio_byte(s, 0xc0); /* Multi-plane */ - } - break; - - case NAND_CMD_RANDOMREAD2: - case NAND_CMD_NOSERIALREAD2: - if (!(nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP)) - break; - s->iolen = nand_load_block(s, s->addr & ((1 << s->addr_shift) - 1)); - break; - - case NAND_CMD_RESET: - nand_reset(DEVICE(s)); - break; - - case NAND_CMD_PAGEPROGRAM1: - s->ioaddr = s->io; - s->iolen = 0; - break; - - case NAND_CMD_PAGEPROGRAM2: - if (s->wp) { - s->blk_write(s); - } - break; - - case NAND_CMD_BLOCKERASE1: - break; - - case NAND_CMD_BLOCKERASE2: - s->addr &= (1ull << s->addrlen * 8) - 1; - s->addr <<= nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP ? - 16 : 8; - - if (s->wp) { - s->blk_erase(s); - } - break; - - case NAND_CMD_READSTATUS: - s->ioaddr = s->io; - s->iolen = 0; - nand_pushio_byte(s, s->status); - break; - - default: - printf("%s: Unknown NAND command 0x%02x\n", __func__, s->cmd); - } -} - -static int nand_pre_save(void *opaque) -{ - NANDFlashState *s = NAND(opaque); - - s->ioaddr_vmstate = s->ioaddr - s->io; - - return 0; -} - -static int nand_post_load(void *opaque, int version_id) -{ - NANDFlashState *s = NAND(opaque); - - if (s->ioaddr_vmstate > sizeof(s->io)) { - return -EINVAL; - } - s->ioaddr = s->io + s->ioaddr_vmstate; - - return 0; -} - -static const VMStateDescription vmstate_nand = { - .name = "nand", - .version_id = 1, - .minimum_version_id = 1, - .pre_save = nand_pre_save, - .post_load = nand_post_load, - .fields = (const VMStateField[]) { - VMSTATE_UINT8(cle, NANDFlashState), - VMSTATE_UINT8(ale, NANDFlashState), - VMSTATE_UINT8(ce, NANDFlashState), - VMSTATE_UINT8(wp, NANDFlashState), - VMSTATE_UINT8(gnd, NANDFlashState), - VMSTATE_BUFFER(io, NANDFlashState), - VMSTATE_UINT32(ioaddr_vmstate, NANDFlashState), - VMSTATE_INT32(iolen, NANDFlashState), - VMSTATE_UINT32(cmd, NANDFlashState), - VMSTATE_UINT64(addr, NANDFlashState), - VMSTATE_INT32(addrlen, NANDFlashState), - VMSTATE_INT32(status, NANDFlashState), - VMSTATE_INT32(offset, NANDFlashState), - /* XXX: do we want to save s->storage too? */ - VMSTATE_END_OF_LIST() - } -}; - -static void nand_realize(DeviceState *dev, Error **errp) -{ - int pagesize; - NANDFlashState *s = NAND(dev); - int ret; - - - s->buswidth = nand_flash_ids[s->chip_id].width >> 3; - s->size = nand_flash_ids[s->chip_id].size << 20; - if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) { - s->page_shift = 11; - s->erase_shift = 6; - } else { - s->page_shift = nand_flash_ids[s->chip_id].page_shift; - s->erase_shift = nand_flash_ids[s->chip_id].erase_shift; - } - - switch (1 << s->page_shift) { - case 256: - nand_init_256(s); - break; - case 512: - nand_init_512(s); - break; - case 2048: - nand_init_2048(s); - break; - default: - error_setg(errp, "Unsupported NAND block size %#x", - 1 << s->page_shift); - return; - } - - pagesize = 1 << s->oob_shift; - s->mem_oob = 1; - if (s->blk) { - if (!blk_supports_write_perm(s->blk)) { - error_setg(errp, "Can't use a read-only drive"); - return; - } - ret = blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE, - BLK_PERM_ALL, errp); - if (ret < 0) { - return; - } - if (blk_getlength(s->blk) >= - (s->pages << s->page_shift) + (s->pages << s->oob_shift)) { - pagesize = 0; - s->mem_oob = 0; - } - } else { - pagesize += 1 << s->page_shift; - } - if (pagesize) { - s->storage = (uint8_t *) memset(g_malloc(s->pages * pagesize), - 0xff, s->pages * pagesize); - } - /* Give s->ioaddr a sane value in case we save state before it is used. */ - s->ioaddr = s->io; -} - -static const Property nand_properties[] = { - DEFINE_PROP_UINT8("manufacturer_id", NANDFlashState, manf_id, 0), - DEFINE_PROP_UINT8("chip_id", NANDFlashState, chip_id, 0), - DEFINE_PROP_DRIVE("drive", NANDFlashState, blk), -}; - -static void nand_class_init(ObjectClass *klass, const void *data) -{ - DeviceClass *dc = DEVICE_CLASS(klass); - - dc->realize = nand_realize; - device_class_set_legacy_reset(dc, nand_reset); - dc->vmsd = &vmstate_nand; - device_class_set_props(dc, nand_properties); - set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); -} - -static const TypeInfo nand_info = { - .name = TYPE_NAND, - .parent = TYPE_DEVICE, - .instance_size = sizeof(NANDFlashState), - .class_init = nand_class_init, -}; - -static void nand_register_types(void) -{ - type_register_static(&nand_info); -} - -/* - * Chip inputs are CLE, ALE, CE, WP, GND and eight I/O pins. Chip - * outputs are R/B and eight I/O pins. - * - * CE, WP and R/B are active low. - */ -void nand_setpins(DeviceState *dev, uint8_t cle, uint8_t ale, - uint8_t ce, uint8_t wp, uint8_t gnd) -{ - NANDFlashState *s = NAND(dev); - - s->cle = cle; - s->ale = ale; - s->ce = ce; - s->wp = wp; - s->gnd = gnd; - if (wp) { - s->status |= NAND_IOSTATUS_UNPROTCT; - } else { - s->status &= ~NAND_IOSTATUS_UNPROTCT; - } -} - -void nand_getpins(DeviceState *dev, int *rb) -{ - *rb = 1; -} - -void nand_setio(DeviceState *dev, uint32_t value) -{ - int i; - NANDFlashState *s = NAND(dev); - - if (!s->ce && s->cle) { - if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) { - if (s->cmd == NAND_CMD_READ0 && value == NAND_CMD_LPREAD2) - return; - if (value == NAND_CMD_RANDOMREAD1) { - s->addr &= ~((1 << s->addr_shift) - 1); - s->addrlen = 0; - return; - } - } - if (value == NAND_CMD_READ0) { - s->offset = 0; - } else if (value == NAND_CMD_READ1) { - s->offset = 0x100; - value = NAND_CMD_READ0; - } else if (value == NAND_CMD_READ2) { - s->offset = 1 << s->page_shift; - value = NAND_CMD_READ0; - } - - s->cmd = value; - - if (s->cmd == NAND_CMD_READSTATUS || - s->cmd == NAND_CMD_PAGEPROGRAM2 || - s->cmd == NAND_CMD_BLOCKERASE1 || - s->cmd == NAND_CMD_BLOCKERASE2 || - s->cmd == NAND_CMD_NOSERIALREAD2 || - s->cmd == NAND_CMD_RANDOMREAD2 || - s->cmd == NAND_CMD_RESET) { - nand_command(s); - } - - if (s->cmd != NAND_CMD_RANDOMREAD2) { - s->addrlen = 0; - } - } - - if (s->ale) { - unsigned int shift = s->addrlen * 8; - uint64_t mask = ~(0xffull << shift); - uint64_t v = (uint64_t)value << shift; - - s->addr = (s->addr & mask) | v; - s->addrlen ++; - - switch (s->addrlen) { - case 1: - if (s->cmd == NAND_CMD_READID) { - nand_command(s); - } - break; - case 2: /* fix cache address as a byte address */ - s->addr <<= (s->buswidth - 1); - break; - case 3: - if (!(nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) && - (s->cmd == NAND_CMD_READ0 || - s->cmd == NAND_CMD_PAGEPROGRAM1)) { - nand_command(s); - } - break; - case 4: - if ((nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) && - nand_flash_ids[s->chip_id].size < 256 && /* 1Gb or less */ - (s->cmd == NAND_CMD_READ0 || - s->cmd == NAND_CMD_PAGEPROGRAM1)) { - nand_command(s); - } - break; - case 5: - if ((nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) && - nand_flash_ids[s->chip_id].size >= 256 && /* 2Gb or more */ - (s->cmd == NAND_CMD_READ0 || - s->cmd == NAND_CMD_PAGEPROGRAM1)) { - nand_command(s); - } - break; - default: - break; - } - } - - if (!s->cle && !s->ale && s->cmd == NAND_CMD_PAGEPROGRAM1) { - if (s->iolen < (1 << s->page_shift) + (1 << s->oob_shift)) { - for (i = s->buswidth; i--; value >>= 8) { - s->io[s->iolen ++] = (uint8_t) (value & 0xff); - } - } - } else if (!s->cle && !s->ale && s->cmd == NAND_CMD_COPYBACKPRG1) { - if ((s->addr & ((1 << s->addr_shift) - 1)) < - (1 << s->page_shift) + (1 << s->oob_shift)) { - for (i = s->buswidth; i--; s->addr++, value >>= 8) { - s->io[s->iolen + (s->addr & ((1 << s->addr_shift) - 1))] = - (uint8_t) (value & 0xff); - } - } - } -} - -uint32_t nand_getio(DeviceState *dev) -{ - int offset; - uint32_t x = 0; - NANDFlashState *s = NAND(dev); - - /* Allow sequential reading */ - if (!s->iolen && s->cmd == NAND_CMD_READ0) { - offset = (int) (s->addr & ((1 << s->addr_shift) - 1)) + s->offset; - s->offset = 0; - s->iolen = nand_load_block(s, offset); - } - - if (s->ce || s->iolen <= 0) { - return 0; - } - - for (offset = s->buswidth; offset--;) { - x |= s->ioaddr[offset] << (offset << 3); - } - /* after receiving READ STATUS command all subsequent reads will - * return the status register value until another command is issued - */ - if (s->cmd != NAND_CMD_READSTATUS) { - s->addr += s->buswidth; - s->ioaddr += s->buswidth; - s->iolen -= s->buswidth; - } - return x; -} - -uint32_t nand_getbuswidth(DeviceState *dev) -{ - NANDFlashState *s = (NANDFlashState *) dev; - return s->buswidth << 3; -} - -DeviceState *nand_init(BlockBackend *blk, int manf_id, int chip_id) -{ - DeviceState *dev; - - if (nand_flash_ids[chip_id].size == 0) { - hw_error("%s: Unsupported NAND chip ID.\n", __func__); - } - dev = qdev_new(TYPE_NAND); - qdev_prop_set_uint8(dev, "manufacturer_id", manf_id); - qdev_prop_set_uint8(dev, "chip_id", chip_id); - if (blk) { - qdev_prop_set_drive_err(dev, "drive", blk, &error_fatal); - } - - qdev_realize(dev, NULL, &error_fatal); - return dev; -} - -type_init(nand_register_types) - -#else - -/* Program a single page */ -static void glue(nand_blk_write_, NAND_PAGE_SIZE)(NANDFlashState *s) -{ - uint64_t off, page, sector, soff; - uint8_t iobuf[(PAGE_SECTORS + 2) * 0x200]; - if (PAGE(s->addr) >= s->pages) - return; - - if (!s->blk) { - mem_and(s->storage + PAGE_START(s->addr) + (s->addr & PAGE_MASK) + - s->offset, s->io, s->iolen); - } else if (s->mem_oob) { - sector = SECTOR(s->addr); - off = (s->addr & PAGE_MASK) + s->offset; - soff = SECTOR_OFFSET(s->addr); - if (blk_pread(s->blk, sector << BDRV_SECTOR_BITS, - PAGE_SECTORS << BDRV_SECTOR_BITS, iobuf, 0) < 0) { - printf("%s: read error in sector %" PRIu64 "\n", __func__, sector); - return; - } - - mem_and(iobuf + (soff | off), s->io, MIN(s->iolen, NAND_PAGE_SIZE - off)); - if (off + s->iolen > NAND_PAGE_SIZE) { - page = PAGE(s->addr); - mem_and(s->storage + (page << OOB_SHIFT), s->io + NAND_PAGE_SIZE - off, - MIN(OOB_SIZE, off + s->iolen - NAND_PAGE_SIZE)); - } - - if (blk_pwrite(s->blk, sector << BDRV_SECTOR_BITS, - PAGE_SECTORS << BDRV_SECTOR_BITS, iobuf, 0) < 0) { - printf("%s: write error in sector %" PRIu64 "\n", __func__, sector); - } - } else { - off = PAGE_START(s->addr) + (s->addr & PAGE_MASK) + s->offset; - sector = off >> 9; - soff = off & 0x1ff; - if (blk_pread(s->blk, sector << BDRV_SECTOR_BITS, - (PAGE_SECTORS + 2) << BDRV_SECTOR_BITS, iobuf, 0) < 0) { - printf("%s: read error in sector %" PRIu64 "\n", __func__, sector); - return; - } - - mem_and(iobuf + soff, s->io, s->iolen); - - if (blk_pwrite(s->blk, sector << BDRV_SECTOR_BITS, - (PAGE_SECTORS + 2) << BDRV_SECTOR_BITS, iobuf, 0) < 0) { - printf("%s: write error in sector %" PRIu64 "\n", __func__, sector); - } - } - s->offset = 0; -} - -/* Erase a single block */ -static void glue(nand_blk_erase_, NAND_PAGE_SIZE)(NANDFlashState *s) -{ - uint64_t i, page, addr; - uint8_t iobuf[0x200] = { [0 ... 0x1ff] = 0xff, }; - addr = s->addr & ~((1 << (ADDR_SHIFT + s->erase_shift)) - 1); - - if (PAGE(addr) >= s->pages) { - return; - } - - if (!s->blk) { - memset(s->storage + PAGE_START(addr), - 0xff, (NAND_PAGE_SIZE + OOB_SIZE) << s->erase_shift); - } else if (s->mem_oob) { - memset(s->storage + (PAGE(addr) << OOB_SHIFT), - 0xff, OOB_SIZE << s->erase_shift); - i = SECTOR(addr); - page = SECTOR(addr + (1 << (ADDR_SHIFT + s->erase_shift))); - for (; i < page; i ++) - if (blk_pwrite(s->blk, i << BDRV_SECTOR_BITS, - BDRV_SECTOR_SIZE, iobuf, 0) < 0) { - printf("%s: write error in sector %" PRIu64 "\n", __func__, i); - } - } else { - addr = PAGE_START(addr); - page = addr >> 9; - if (blk_pread(s->blk, page << BDRV_SECTOR_BITS, - BDRV_SECTOR_SIZE, iobuf, 0) < 0) { - printf("%s: read error in sector %" PRIu64 "\n", __func__, page); - } - memset(iobuf + (addr & 0x1ff), 0xff, (~addr & 0x1ff) + 1); - if (blk_pwrite(s->blk, page << BDRV_SECTOR_BITS, - BDRV_SECTOR_SIZE, iobuf, 0) < 0) { - printf("%s: write error in sector %" PRIu64 "\n", __func__, page); - } - - memset(iobuf, 0xff, 0x200); - i = (addr & ~0x1ff) + 0x200; - for (addr += ((NAND_PAGE_SIZE + OOB_SIZE) << s->erase_shift) - 0x200; - i < addr; i += 0x200) { - if (blk_pwrite(s->blk, i, BDRV_SECTOR_SIZE, iobuf, 0) < 0) { - printf("%s: write error in sector %" PRIu64 "\n", - __func__, i >> 9); - } - } - - page = i >> 9; - if (blk_pread(s->blk, page << BDRV_SECTOR_BITS, - BDRV_SECTOR_SIZE, iobuf, 0) < 0) { - printf("%s: read error in sector %" PRIu64 "\n", __func__, page); - } - memset(iobuf, 0xff, ((addr - 1) & 0x1ff) + 1); - if (blk_pwrite(s->blk, page << BDRV_SECTOR_BITS, - BDRV_SECTOR_SIZE, iobuf, 0) < 0) { - printf("%s: write error in sector %" PRIu64 "\n", __func__, page); - } - } -} - -static bool glue(nand_blk_load_, NAND_PAGE_SIZE)(NANDFlashState *s, - uint64_t addr, unsigned offset) -{ - if (PAGE(addr) >= s->pages) { - return false; - } - - if (offset > NAND_PAGE_SIZE + OOB_SIZE) { - return false; - } - - if (s->blk) { - if (s->mem_oob) { - if (blk_pread(s->blk, SECTOR(addr) << BDRV_SECTOR_BITS, - PAGE_SECTORS << BDRV_SECTOR_BITS, s->io, 0) < 0) { - printf("%s: read error in sector %" PRIu64 "\n", - __func__, SECTOR(addr)); - } - memcpy(s->io + SECTOR_OFFSET(s->addr) + NAND_PAGE_SIZE, - s->storage + (PAGE(s->addr) << OOB_SHIFT), - OOB_SIZE); - s->ioaddr = s->io + SECTOR_OFFSET(s->addr) + offset; - } else { - if (blk_pread(s->blk, PAGE_START(addr), - (PAGE_SECTORS + 2) << BDRV_SECTOR_BITS, s->io, 0) - < 0) { - printf("%s: read error in sector %" PRIu64 "\n", - __func__, PAGE_START(addr) >> 9); - } - s->ioaddr = s->io + (PAGE_START(addr) & 0x1ff) + offset; - } - } else { - memcpy(s->io, s->storage + PAGE_START(s->addr) + - offset, NAND_PAGE_SIZE + OOB_SIZE - offset); - s->ioaddr = s->io; - } - - return true; -} - -static void glue(nand_init_, NAND_PAGE_SIZE)(NANDFlashState *s) -{ - s->oob_shift = PAGE_SHIFT - 5; - s->pages = s->size >> PAGE_SHIFT; - s->addr_shift = ADDR_SHIFT; - - s->blk_erase = glue(nand_blk_erase_, NAND_PAGE_SIZE); - s->blk_write = glue(nand_blk_write_, NAND_PAGE_SIZE); - s->blk_load = glue(nand_blk_load_, NAND_PAGE_SIZE); -} - -# undef NAND_PAGE_SIZE -# undef PAGE_SHIFT -# undef PAGE_SECTORS -# undef ADDR_SHIFT -#endif /* NAND_IO */ diff --git a/hw/char/sclpconsole-lm.c b/hw/char/sclpconsole-lm.c index e9580aa..3e40d5e 100644 --- a/hw/char/sclpconsole-lm.c +++ b/hw/char/sclpconsole-lm.c @@ -214,7 +214,7 @@ static int process_mdb(SCLPEvent *event, MDBO *mdbo) { int rc; int len; - uint8_t buffer[SIZE_BUFFER]; + QEMU_UNINITIALIZED uint8_t buffer[SIZE_BUFFER]; len = be16_to_cpu(mdbo->length); len -= sizeof(mdbo->length) + sizeof(mdbo->type) diff --git a/hw/char/sh_serial.c b/hw/char/sh_serial.c index 6abd803..30447fa 100644 --- a/hw/char/sh_serial.c +++ b/hw/char/sh_serial.c @@ -78,10 +78,6 @@ struct SHSerialState { qemu_irq bri; }; -typedef struct {} SHSerialStateClass; - -OBJECT_DEFINE_TYPE(SHSerialState, sh_serial, SH_SERIAL, SYS_BUS_DEVICE) - static void sh_serial_clear_fifo(SHSerialState *s) { memset(s->rx_fifo, 0, SH_RX_FIFO_LENGTH); @@ -434,17 +430,13 @@ static void sh_serial_realize(DeviceState *d, Error **errp) s->etu = NANOSECONDS_PER_SECOND / 9600; } -static void sh_serial_finalize(Object *obj) +static void sh_serial_unrealize(DeviceState *dev) { - SHSerialState *s = SH_SERIAL(obj); + SHSerialState *s = SH_SERIAL(dev); timer_del(&s->fifo_timeout_timer); } -static void sh_serial_init(Object *obj) -{ -} - static const Property sh_serial_properties[] = { DEFINE_PROP_CHR("chardev", SHSerialState, chr), DEFINE_PROP_UINT8("features", SHSerialState, feat, 0), @@ -456,7 +448,19 @@ static void sh_serial_class_init(ObjectClass *oc, const void *data) device_class_set_props(dc, sh_serial_properties); dc->realize = sh_serial_realize; + dc->unrealize = sh_serial_unrealize; device_class_set_legacy_reset(dc, sh_serial_reset); /* Reason: part of SuperH CPU/SoC, needs to be wired up */ dc->user_creatable = false; } + +static const TypeInfo sh_serial_types[] = { + { + .name = TYPE_SH_SERIAL, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(SHSerialState), + .class_init = sh_serial_class_init, + }, +}; + +DEFINE_TYPES(sh_serial_types) diff --git a/hw/core/loader.c b/hw/core/loader.c index b792a54..e7056ba 100644 --- a/hw/core/loader.c +++ b/hw/core/loader.c @@ -1333,20 +1333,6 @@ void rom_set_fw(FWCfgState *f) fw_cfg = f; } -void rom_set_order_override(int order) -{ - if (!fw_cfg) - return; - fw_cfg_set_order_override(fw_cfg, order); -} - -void rom_reset_order_override(void) -{ - if (!fw_cfg) - return; - fw_cfg_reset_order_override(fw_cfg); -} - void rom_transaction_begin(void) { Rom *rom; diff --git a/hw/core/machine.c b/hw/core/machine.c index b8ae155..e869821 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -37,7 +37,9 @@ #include "hw/virtio/virtio-iommu.h" #include "audio/audio.h" -GlobalProperty hw_compat_10_0[] = {}; +GlobalProperty hw_compat_10_0[] = { + { "scsi-hd", "dpofua", "off" }, +}; const size_t hw_compat_10_0_len = G_N_ELEMENTS(hw_compat_10_0); GlobalProperty hw_compat_9_2[] = { @@ -283,24 +285,6 @@ GlobalProperty hw_compat_2_6[] = { }; const size_t hw_compat_2_6_len = G_N_ELEMENTS(hw_compat_2_6); -GlobalProperty hw_compat_2_5[] = { - { "isa-fdc", "fallback", "144" }, - { "pvscsi", "x-old-pci-configuration", "on" }, - { "pvscsi", "x-disable-pcie", "on" }, - { "vmxnet3", "x-old-msi-offsets", "on" }, - { "vmxnet3", "x-disable-pcie", "on" }, -}; -const size_t hw_compat_2_5_len = G_N_ELEMENTS(hw_compat_2_5); - -GlobalProperty hw_compat_2_4[] = { - { "e1000", "extra_mac_registers", "off" }, - { "virtio-pci", "x-disable-pcie", "on" }, - { "virtio-pci", "migrate-extra", "off" }, - { "fw_cfg_mem", "dma_enabled", "off" }, - { "fw_cfg_io", "dma_enabled", "off" } -}; -const size_t hw_compat_2_4_len = G_N_ELEMENTS(hw_compat_2_4); - MachineState *current_machine; static char *machine_get_kernel(Object *obj, Error **errp) diff --git a/hw/core/meson.build b/hw/core/meson.build index 547de65..b5a545a 100644 --- a/hw/core/meson.build +++ b/hw/core/meson.build @@ -26,7 +26,7 @@ system_ss.add(when: 'CONFIG_XILINX_AXI', if_true: files('stream.c')) system_ss.add(when: 'CONFIG_PLATFORM_BUS', if_true: files('sysbus-fdt.c')) system_ss.add(when: 'CONFIG_EIF', if_true: [files('eif.c'), zlib, libcbor, gnutls]) -libsystem_ss.add(files( +system_ss.add(files( 'cpu-system.c', 'fw-path-provider.c', 'gpio.c', @@ -46,7 +46,7 @@ libsystem_ss.add(files( 'vm-change-state-handler.c', 'clock-vmstate.c', )) -libuser_ss.add(files( +user_ss.add(files( 'cpu-user.c', 'qdev-user.c', )) diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c index 8e11e63..24e145d 100644 --- a/hw/core/qdev-properties-system.c +++ b/hw/core/qdev-properties-system.c @@ -145,6 +145,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name, if (ctx != bdrv_get_aio_context(bs)) { error_setg(errp, "Different aio context is not supported for new " "node"); + return; } blk_replace_bs(blk, bs, errp); diff --git a/hw/display/apple-gfx.m b/hw/display/apple-gfx.m index 8dde1f1..174d56a 100644 --- a/hw/display/apple-gfx.m +++ b/hw/display/apple-gfx.m @@ -454,7 +454,7 @@ static void set_cursor_glyph(void *opaque) /* ------ DMA (device reading system memory) ------ */ typedef struct AppleGFXReadMemoryJob { - QemuSemaphore sem; + QemuEvent event; hwaddr physical_address; uint64_t length; void *dst; @@ -470,7 +470,7 @@ static void apple_gfx_do_read_memory(void *opaque) job->dst, job->length, MEMTXATTRS_UNSPECIFIED); job->success = (r == MEMTX_OK); - qemu_sem_post(&job->sem); + qemu_event_set(&job->event); } static bool apple_gfx_read_memory(AppleGFXState *s, hwaddr physical_address, @@ -483,11 +483,11 @@ static bool apple_gfx_read_memory(AppleGFXState *s, hwaddr physical_address, trace_apple_gfx_read_memory(physical_address, length, dst); /* Performing DMA requires BQL, so do it in a BH. */ - qemu_sem_init(&job.sem, 0); + qemu_event_init(&job.event, 0); aio_bh_schedule_oneshot(qemu_get_aio_context(), apple_gfx_do_read_memory, &job); - qemu_sem_wait(&job.sem); - qemu_sem_destroy(&job.sem); + qemu_event_wait(&job.event); + qemu_event_destroy(&job.event); return job.success; } diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c index 544bb65..bc1a8ed 100644 --- a/hw/display/vmware_vga.c +++ b/hw/display/vmware_vga.c @@ -618,7 +618,7 @@ static void vmsvga_fifo_run(struct vmsvga_state_s *s) uint32_t cmd, colour; int args, len, maxloop = 1024; int x, y, dx, dy, width, height; - struct vmsvga_cursor_definition_s cursor; + QEMU_UNINITIALIZED struct vmsvga_cursor_definition_s cursor; uint32_t cmd_start; len = vmsvga_fifo_length(s); diff --git a/hw/dma/xlnx_csu_dma.c b/hw/dma/xlnx_csu_dma.c index 3db3904..d8c7da1 100644 --- a/hw/dma/xlnx_csu_dma.c +++ b/hw/dma/xlnx_csu_dma.c @@ -287,7 +287,7 @@ static uint32_t xlnx_csu_dma_advance(XlnxCSUDMA *s, uint32_t len) static void xlnx_csu_dma_src_notify(void *opaque) { XlnxCSUDMA *s = XLNX_CSU_DMA(opaque); - unsigned char buf[4 * 1024]; + QEMU_UNINITIALIZED unsigned char buf[4 * 1024]; size_t rlen = 0; ptimer_transaction_begin(s->src_timer); diff --git a/hw/gpio/pca9552.c b/hw/gpio/pca9552.c index d65c0a2..1e10238 100644 --- a/hw/gpio/pca9552.c +++ b/hw/gpio/pca9552.c @@ -76,7 +76,7 @@ static void pca955x_display_pins_status(PCA955xState *s, return; } if (trace_event_get_state_backends(TRACE_PCA955X_GPIO_STATUS)) { - char *buf = g_newa(char, k->pin_count + 1); + char buf[PCA955X_PIN_COUNT_MAX + 1]; for (i = 0; i < k->pin_count; i++) { if (extract32(pins_status, i, 1)) { diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c index 94b0abb..6dbcb2d 100644 --- a/hw/hyperv/hv-balloon.c +++ b/hw/hyperv/hv-balloon.c @@ -67,10 +67,6 @@ * these requests */ -struct HvBalloonClass { - VMBusDeviceClass parent_class; -} HvBalloonClass; - typedef enum State { /* not a real state */ S_NO_CHANGE = 0, @@ -162,8 +158,9 @@ typedef struct HvBalloon { MemoryRegion *mr; } HvBalloon; -OBJECT_DEFINE_TYPE_WITH_INTERFACES(HvBalloon, hv_balloon, HV_BALLOON, VMBUS_DEVICE, \ - { TYPE_MEMORY_DEVICE }, { }) +OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(HvBalloon, hv_balloon, \ + HV_BALLOON, VMBUS_DEVICE, \ + { TYPE_MEMORY_DEVICE }, { }) #define HV_BALLOON_SET_STATE(hvb, news) \ do { \ diff --git a/hw/hyperv/syndbg.c b/hw/hyperv/syndbg.c index 8b8a147..ac7e15f 100644 --- a/hw/hyperv/syndbg.c +++ b/hw/hyperv/syndbg.c @@ -192,7 +192,7 @@ static uint16_t handle_recv_msg(HvSynDbg *syndbg, uint64_t outgpa, { uint16_t ret; g_assert(MSG_BUFSZ >= qemu_target_page_size()); - uint8_t data_buf[MSG_BUFSZ]; + QEMU_UNINITIALIZED uint8_t data_buf[MSG_BUFSZ]; hwaddr out_len; void *out_data; ssize_t recv_byte_count; diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index d34ce07..eb65bda 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -10,6 +10,11 @@ config SGX bool depends on KVM +config TDX + bool + select X86_FW_OVMF + depends on KVM + config PC bool imply APPLESMC @@ -26,6 +31,7 @@ config PC imply QXL imply SEV imply SGX + imply TDX imply TEST_DEVICES imply TPM_CRB imply TPM_TIS_ISA diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 0775c8f..963aa24 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1426,7 +1426,6 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) AMDVIState *s = opaque; AMDVIAddressSpace **iommu_as, *amdvi_dev_as; int bus_num = pci_bus_num(bus); - X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); iommu_as = s->address_spaces[bus_num]; @@ -1486,15 +1485,8 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) AMDVI_INT_ADDR_FIRST, &amdvi_dev_as->iommu_ir, 1); - if (!x86_iommu->pt_supported) { - memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false); - memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), - true); - } else { - memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), - false); - memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, true); - } + memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false); + memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), true); } return &iommu_as[devfn]->as; } @@ -1723,6 +1715,14 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) exit(EXIT_FAILURE); } + if (s->xtsup) { + if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) { + error_report("AMD IOMMU xtsup=on requires x2APIC support on " + "the KVM side"); + exit(EXIT_FAILURE); + } + } + pci_setup_iommu(bus, &amdvi_iommu_ops, s); amdvi_init(s); } diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c index 39035db..1be9bfe 100644 --- a/hw/i386/kvm/apic.c +++ b/hw/i386/kvm/apic.c @@ -17,6 +17,7 @@ #include "system/hw_accel.h" #include "system/kvm.h" #include "kvm/kvm_i386.h" +#include "kvm/tdx.h" static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic, int reg_id, uint32_t val) @@ -141,6 +142,10 @@ static void kvm_apic_put(CPUState *cs, run_on_cpu_data data) struct kvm_lapic_state kapic; int ret; + if (is_tdx_vm()) { + return; + } + kvm_put_apicbase(s->cpu, s->apicbase); kvm_put_apic_state(s, &kapic); diff --git a/hw/i386/kvm/xen-stubs.c b/hw/i386/kvm/xen-stubs.c index d03131e..ce73119 100644 --- a/hw/i386/kvm/xen-stubs.c +++ b/hw/i386/kvm/xen-stubs.c @@ -12,7 +12,6 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "qapi/qapi-commands-misc-target.h" #include "xen_evtchn.h" #include "xen_primary_console.h" @@ -38,15 +37,3 @@ void xen_primary_console_create(void) void xen_primary_console_set_be_port(uint16_t port) { } -#ifdef TARGET_I386 -EvtchnInfoList *qmp_xen_event_list(Error **errp) -{ - error_setg(errp, "Xen event channel emulation not enabled"); - return NULL; -} - -void qmp_xen_event_inject(uint32_t port, Error **errp) -{ - error_setg(errp, "Xen event channel emulation not enabled"); -} -#endif diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c index b519054..dd566c4 100644 --- a/hw/i386/kvm/xen_evtchn.c +++ b/hw/i386/kvm/xen_evtchn.c @@ -19,7 +19,7 @@ #include "monitor/monitor.h" #include "monitor/hmp.h" #include "qapi/error.h" -#include "qapi/qapi-commands-misc-target.h" +#include "qapi/qapi-commands-misc-i386.h" #include "qobject/qdict.h" #include "qom/object.h" #include "exec/target_page.h" diff --git a/hw/i386/meson.build b/hw/i386/meson.build index 10bdfde..7896f34 100644 --- a/hw/i386/meson.build +++ b/hw/i386/meson.build @@ -32,6 +32,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files( 'port92.c')) i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'), if_false: files('pc_sysfw_ovmf-stubs.c')) +i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c', 'tdvf-hob.c')) subdir('kvm') subdir('xen') diff --git a/hw/i386/monitor.c b/hw/i386/monitor.c index 1921e4d..79df965 100644 --- a/hw/i386/monitor.c +++ b/hw/i386/monitor.c @@ -26,7 +26,7 @@ #include "monitor/monitor.h" #include "qobject/qdict.h" #include "qapi/error.h" -#include "qapi/qapi-commands-misc-target.h" +#include "qapi/qapi-commands-misc-i386.h" #include "hw/i386/x86.h" #include "hw/rtc/mc146818rtc.h" diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 7065615..b211633 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -44,6 +44,7 @@ #include "system/xen.h" #include "system/reset.h" #include "kvm/kvm_i386.h" +#include "kvm/tdx.h" #include "hw/xen/xen.h" #include "qobject/qlist.h" #include "qemu/error-report.h" @@ -259,28 +260,6 @@ GlobalProperty pc_compat_2_6[] = { }; const size_t pc_compat_2_6_len = G_N_ELEMENTS(pc_compat_2_6); -GlobalProperty pc_compat_2_5[] = {}; -const size_t pc_compat_2_5_len = G_N_ELEMENTS(pc_compat_2_5); - -GlobalProperty pc_compat_2_4[] = { - PC_CPU_MODEL_IDS("2.4.0") - { "Haswell-" TYPE_X86_CPU, "abm", "off" }, - { "Haswell-noTSX-" TYPE_X86_CPU, "abm", "off" }, - { "Broadwell-" TYPE_X86_CPU, "abm", "off" }, - { "Broadwell-noTSX-" TYPE_X86_CPU, "abm", "off" }, - { "host" "-" TYPE_X86_CPU, "host-cache-info", "on" }, - { TYPE_X86_CPU, "check", "off" }, - { "qemu64" "-" TYPE_X86_CPU, "sse4a", "on" }, - { "qemu64" "-" TYPE_X86_CPU, "abm", "on" }, - { "qemu64" "-" TYPE_X86_CPU, "popcnt", "on" }, - { "qemu32" "-" TYPE_X86_CPU, "popcnt", "on" }, - { "Opteron_G2" "-" TYPE_X86_CPU, "rdtscp", "on" }, - { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "on" }, - { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "on" }, - { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "on", } -}; -const size_t pc_compat_2_4_len = G_N_ELEMENTS(pc_compat_2_4); - /* * @PC_FW_DATA: * Size of the chunk of memory at the top of RAM for the BIOS ACPI tables @@ -976,21 +955,23 @@ void pc_memory_init(PCMachineState *pcms, /* Initialize PC system firmware */ pc_system_firmware_init(pcms, rom_memory); - option_rom_mr = g_malloc(sizeof(*option_rom_mr)); - if (machine_require_guest_memfd(machine)) { - memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom", - PC_ROM_SIZE, &error_fatal); - } else { - memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, - &error_fatal); - if (pcmc->pci_enabled) { - memory_region_set_readonly(option_rom_mr, true); + if (!is_tdx_vm()) { + option_rom_mr = g_malloc(sizeof(*option_rom_mr)); + if (machine_require_guest_memfd(machine)) { + memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom", + PC_ROM_SIZE, &error_fatal); + } else { + memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, + &error_fatal); + if (pcmc->pci_enabled) { + memory_region_set_readonly(option_rom_mr, true); + } } + memory_region_add_subregion_overlap(rom_memory, + PC_ROM_MIN_VGA, + option_rom_mr, + 1); } - memory_region_add_subregion_overlap(rom_memory, - PC_ROM_MIN_VGA, - option_rom_mr, - 1); fw_cfg = fw_cfg_arch_create(machine, x86ms->boot_cpus, x86ms->apic_id_limit); @@ -999,14 +980,13 @@ void pc_memory_init(PCMachineState *pcms, if (machine->device_memory) { uint64_t *val = g_malloc(sizeof(*val)); - uint64_t res_mem_end = machine->device_memory->base; - - if (!pcmc->broken_reserved_end) { - res_mem_end += memory_region_size(&machine->device_memory->mr); - } + uint64_t res_mem_end; if (pcms->cxl_devices_state.is_enabled) { res_mem_end = cxl_resv_end; + } else { + res_mem_end = machine->device_memory->base + + memory_region_size(&machine->device_memory->mr); } *val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB)); fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val)); @@ -1044,9 +1024,7 @@ uint64_t pc_pci_hole64_start(void) hole64_start = pc_get_cxl_range_end(pcms); } else if (pcmc->has_reserved_memory && (ms->ram_size < ms->maxram_size)) { pc_get_device_memory_range(pcms, &hole64_start, &size); - if (!pcmc->broken_reserved_end) { - hole64_start += size; - } + hole64_start += size; } else { hole64_start = pc_above_4g_end(pcms); } @@ -1058,7 +1036,6 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus) { DeviceState *dev = NULL; - rom_set_order_override(FW_CFG_ORDER_OVERRIDE_VGA); if (pci_bus) { PCIDevice *pcidev = pci_vga_init(pci_bus); dev = pcidev ? &pcidev->qdev : NULL; @@ -1066,7 +1043,7 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus) ISADevice *isadev = isa_vga_init(isa_bus); dev = isadev ? DEVICE(isadev) : NULL; } - rom_reset_order_override(); + return dev; } @@ -1256,8 +1233,6 @@ void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus) bool default_is_ne2k = g_str_equal(mc->default_nic, TYPE_ISA_NE2000); NICInfo *nd; - rom_set_order_override(FW_CFG_ORDER_OVERRIDE_NIC); - while ((nd = qemu_find_nic_info(TYPE_ISA_NE2000, default_is_ne2k, NULL))) { pc_init_ne2k_isa(isa_bus, nd, &error_fatal); } @@ -1266,8 +1241,6 @@ void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus) if (pci_bus) { pci_init_nic_devices(pci_bus, mc->default_nic); } - - rom_reset_order_override(); } void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 0dce512..ea7572e 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -285,6 +285,8 @@ static void pc_init1(MachineState *machine, const char *pci_type) pcms->idebus[0] = qdev_get_child_bus(dev, "ide.0"); pcms->idebus[1] = qdev_get_child_bus(dev, "ide.1"); } else { + uint32_t irq; + isa_bus = isa_bus_new(NULL, system_memory, system_io, &error_abort); isa_bus_register_input_irqs(isa_bus, x86ms->gsi); @@ -292,6 +294,9 @@ static void pc_init1(MachineState *machine, const char *pci_type) x86ms->rtc = isa_new(TYPE_MC146818_RTC); qdev_prop_set_int32(DEVICE(x86ms->rtc), "base_year", 2000); isa_realize_and_unref(x86ms->rtc, isa_bus, &error_fatal); + irq = object_property_get_uint(OBJECT(x86ms->rtc), "irq", + &error_fatal); + isa_connect_gpio_out(ISA_DEVICE(x86ms->rtc), 0, irq); i8257_dma_init(OBJECT(machine), isa_bus, 0); pcms->hpet_enabled = false; @@ -778,32 +783,6 @@ static void pc_i440fx_machine_2_6_options(MachineClass *m) DEFINE_I440FX_MACHINE(2, 6); -static void pc_i440fx_machine_2_5_options(MachineClass *m) -{ - X86MachineClass *x86mc = X86_MACHINE_CLASS(m); - - pc_i440fx_machine_2_6_options(m); - x86mc->save_tsc_khz = false; - m->legacy_fw_cfg_order = 1; - compat_props_add(m->compat_props, hw_compat_2_5, hw_compat_2_5_len); - compat_props_add(m->compat_props, pc_compat_2_5, pc_compat_2_5_len); -} - -DEFINE_I440FX_MACHINE(2, 5); - -static void pc_i440fx_machine_2_4_options(MachineClass *m) -{ - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - - pc_i440fx_machine_2_5_options(m); - m->hw_version = "2.4.0"; - pcmc->broken_reserved_end = true; - compat_props_add(m->compat_props, hw_compat_2_4, hw_compat_2_4_len); - compat_props_add(m->compat_props, pc_compat_2_4, pc_compat_2_4_len); -} - -DEFINE_I440FX_MACHINE(2, 4); - #ifdef CONFIG_ISAPC static void isapc_machine_options(MachineClass *m) { diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index c538b3d..33211b1 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -672,29 +672,3 @@ static void pc_q35_machine_2_6_options(MachineClass *m) } DEFINE_Q35_MACHINE(2, 6); - -static void pc_q35_machine_2_5_options(MachineClass *m) -{ - X86MachineClass *x86mc = X86_MACHINE_CLASS(m); - - pc_q35_machine_2_6_options(m); - x86mc->save_tsc_khz = false; - m->legacy_fw_cfg_order = 1; - compat_props_add(m->compat_props, hw_compat_2_5, hw_compat_2_5_len); - compat_props_add(m->compat_props, pc_compat_2_5, pc_compat_2_5_len); -} - -DEFINE_Q35_MACHINE(2, 5); - -static void pc_q35_machine_2_4_options(MachineClass *m) -{ - PCMachineClass *pcmc = PC_MACHINE_CLASS(m); - - pc_q35_machine_2_5_options(m); - m->hw_version = "2.4.0"; - pcmc->broken_reserved_end = true; - compat_props_add(m->compat_props, hw_compat_2_4, hw_compat_2_4_len); - compat_props_add(m->compat_props, pc_compat_2_4, pc_compat_2_4_len); -} - -DEFINE_Q35_MACHINE(2, 4); diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index 1eeb58a..821396c 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -37,6 +37,7 @@ #include "hw/block/flash.h" #include "system/kvm.h" #include "target/i386/sev.h" +#include "kvm/tdx.h" #define FLASH_SECTOR_SIZE 4096 @@ -280,5 +281,11 @@ void x86_firmware_configure(hwaddr gpa, void *ptr, int size) } sev_encrypt_flash(gpa, ptr, size, &error_fatal); + } else if (is_tdx_vm()) { + ret = tdx_parse_tdvf(ptr, size); + if (ret) { + error_report("failed to parse TDVF for TDX VM"); + exit(1); + } } } diff --git a/hw/i386/sgx-stub.c b/hw/i386/sgx-stub.c index ccb21a9..d295e54 100644 --- a/hw/i386/sgx-stub.c +++ b/hw/i386/sgx-stub.c @@ -3,8 +3,8 @@ #include "monitor/hmp-target.h" #include "hw/i386/pc.h" #include "hw/i386/sgx-epc.h" +#include "qapi/qapi-commands-misc-i386.h" #include "qapi/error.h" -#include "qapi/qapi-commands-misc-target.h" void sgx_epc_build_srat(GArray *table_data) { diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c index c80203b..e280154 100644 --- a/hw/i386/sgx.c +++ b/hw/i386/sgx.c @@ -19,7 +19,7 @@ #include "monitor/hmp-target.h" #include "qapi/error.h" #include "qemu/error-report.h" -#include "qapi/qapi-commands-misc-target.h" +#include "qapi/qapi-commands-misc-i386.h" #include "system/address-spaces.h" #include "system/hw_accel.h" #include "system/reset.h" diff --git a/hw/i386/tdvf-hob.c b/hw/i386/tdvf-hob.c new file mode 100644 index 0000000..782b3d1 --- /dev/null +++ b/hw/i386/tdvf-hob.c @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2025 Intel Corporation + * Author: Isaku Yamahata <isaku.yamahata at gmail.com> + * <isaku.yamahata at intel.com> + * Xiaoyao Li <xiaoyao.li@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "standard-headers/uefi/uefi.h" +#include "hw/pci/pcie_host.h" +#include "tdvf-hob.h" + +typedef struct TdvfHob { + hwaddr hob_addr; + void *ptr; + int size; + + /* working area */ + void *current; + void *end; +} TdvfHob; + +static uint64_t tdvf_current_guest_addr(const TdvfHob *hob) +{ + return hob->hob_addr + (hob->current - hob->ptr); +} + +static void tdvf_align(TdvfHob *hob, size_t align) +{ + hob->current = QEMU_ALIGN_PTR_UP(hob->current, align); +} + +static void *tdvf_get_area(TdvfHob *hob, uint64_t size) +{ + void *ret; + + if (hob->current + size > hob->end) { + error_report("TD_HOB overrun, size = 0x%" PRIx64, size); + exit(1); + } + + ret = hob->current; + hob->current += size; + tdvf_align(hob, 8); + return ret; +} + +static void tdvf_hob_add_memory_resources(TdxGuest *tdx, TdvfHob *hob) +{ + EFI_HOB_RESOURCE_DESCRIPTOR *region; + EFI_RESOURCE_ATTRIBUTE_TYPE attr; + EFI_RESOURCE_TYPE resource_type; + + TdxRamEntry *e; + int i; + + for (i = 0; i < tdx->nr_ram_entries; i++) { + e = &tdx->ram_entries[i]; + + if (e->type == TDX_RAM_UNACCEPTED) { + resource_type = EFI_RESOURCE_MEMORY_UNACCEPTED; + attr = EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED; + } else if (e->type == TDX_RAM_ADDED) { + resource_type = EFI_RESOURCE_SYSTEM_MEMORY; + attr = EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE; + } else { + error_report("unknown TDX_RAM_ENTRY type %d", e->type); + exit(1); + } + + region = tdvf_get_area(hob, sizeof(*region)); + *region = (EFI_HOB_RESOURCE_DESCRIPTOR) { + .Header = { + .HobType = EFI_HOB_TYPE_RESOURCE_DESCRIPTOR, + .HobLength = cpu_to_le16(sizeof(*region)), + .Reserved = cpu_to_le32(0), + }, + .Owner = EFI_HOB_OWNER_ZERO, + .ResourceType = cpu_to_le32(resource_type), + .ResourceAttribute = cpu_to_le32(attr), + .PhysicalStart = cpu_to_le64(e->address), + .ResourceLength = cpu_to_le64(e->length), + }; + } +} + +void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob) +{ + TdvfHob hob = { + .hob_addr = td_hob->address, + .size = td_hob->size, + .ptr = td_hob->mem_ptr, + + .current = td_hob->mem_ptr, + .end = td_hob->mem_ptr + td_hob->size, + }; + + EFI_HOB_GENERIC_HEADER *last_hob; + EFI_HOB_HANDOFF_INFO_TABLE *hit; + + /* Note, Efi{Free}Memory{Bottom,Top} are ignored, leave 'em zeroed. */ + hit = tdvf_get_area(&hob, sizeof(*hit)); + *hit = (EFI_HOB_HANDOFF_INFO_TABLE) { + .Header = { + .HobType = EFI_HOB_TYPE_HANDOFF, + .HobLength = cpu_to_le16(sizeof(*hit)), + .Reserved = cpu_to_le32(0), + }, + .Version = cpu_to_le32(EFI_HOB_HANDOFF_TABLE_VERSION), + .BootMode = cpu_to_le32(0), + .EfiMemoryTop = cpu_to_le64(0), + .EfiMemoryBottom = cpu_to_le64(0), + .EfiFreeMemoryTop = cpu_to_le64(0), + .EfiFreeMemoryBottom = cpu_to_le64(0), + .EfiEndOfHobList = cpu_to_le64(0), /* initialized later */ + }; + + tdvf_hob_add_memory_resources(tdx, &hob); + + last_hob = tdvf_get_area(&hob, sizeof(*last_hob)); + *last_hob = (EFI_HOB_GENERIC_HEADER) { + .HobType = EFI_HOB_TYPE_END_OF_HOB_LIST, + .HobLength = cpu_to_le16(sizeof(*last_hob)), + .Reserved = cpu_to_le32(0), + }; + hit->EfiEndOfHobList = tdvf_current_guest_addr(&hob); +} diff --git a/hw/i386/tdvf-hob.h b/hw/i386/tdvf-hob.h new file mode 100644 index 0000000..4fc6a37 --- /dev/null +++ b/hw/i386/tdvf-hob.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef HW_I386_TD_HOB_H +#define HW_I386_TD_HOB_H + +#include "hw/i386/tdvf.h" +#include "target/i386/kvm/tdx.h" + +void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob); + +#define EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE \ + (EFI_RESOURCE_ATTRIBUTE_PRESENT | \ + EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \ + EFI_RESOURCE_ATTRIBUTE_TESTED) + +#define EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED \ + (EFI_RESOURCE_ATTRIBUTE_PRESENT | \ + EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \ + EFI_RESOURCE_ATTRIBUTE_TESTED) + +#define EFI_RESOURCE_ATTRIBUTE_TDVF_MMIO \ + (EFI_RESOURCE_ATTRIBUTE_PRESENT | \ + EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \ + EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE) + +#endif diff --git a/hw/i386/tdvf.c b/hw/i386/tdvf.c new file mode 100644 index 0000000..645d9d1 --- /dev/null +++ b/hw/i386/tdvf.c @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2025 Intel Corporation + * Author: Isaku Yamahata <isaku.yamahata at gmail.com> + * <isaku.yamahata at intel.com> + * Xiaoyao Li <xiaoyao.li@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" + +#include "hw/i386/pc.h" +#include "hw/i386/tdvf.h" +#include "system/kvm.h" + +#define TDX_METADATA_OFFSET_GUID "e47a6535-984a-4798-865e-4685a7bf8ec2" +#define TDX_METADATA_VERSION 1 +#define TDVF_SIGNATURE 0x46564454 /* TDVF as little endian */ +#define TDVF_ALIGNMENT 4096 + +/* + * the raw structs read from TDVF keeps the name convention in + * TDVF Design Guide spec. + */ +typedef struct { + uint32_t DataOffset; + uint32_t RawDataSize; + uint64_t MemoryAddress; + uint64_t MemoryDataSize; + uint32_t Type; + uint32_t Attributes; +} TdvfSectionEntry; + +typedef struct { + uint32_t Signature; + uint32_t Length; + uint32_t Version; + uint32_t NumberOfSectionEntries; + TdvfSectionEntry SectionEntries[]; +} TdvfMetadata; + +struct tdx_metadata_offset { + uint32_t offset; +}; + +static TdvfMetadata *tdvf_get_metadata(void *flash_ptr, int size) +{ + TdvfMetadata *metadata; + uint32_t offset = 0; + uint8_t *data; + + if ((uint32_t) size != size) { + return NULL; + } + + if (pc_system_ovmf_table_find(TDX_METADATA_OFFSET_GUID, &data, NULL)) { + offset = size - le32_to_cpu(((struct tdx_metadata_offset *)data)->offset); + + if (offset + sizeof(*metadata) > size) { + return NULL; + } + } else { + error_report("Cannot find TDX_METADATA_OFFSET_GUID"); + return NULL; + } + + metadata = flash_ptr + offset; + + /* Finally, verify the signature to determine if this is a TDVF image. */ + metadata->Signature = le32_to_cpu(metadata->Signature); + if (metadata->Signature != TDVF_SIGNATURE) { + error_report("Invalid TDVF signature in metadata!"); + return NULL; + } + + /* Sanity check that the TDVF doesn't overlap its own metadata. */ + metadata->Length = le32_to_cpu(metadata->Length); + if (offset + metadata->Length > size) { + return NULL; + } + + /* Only version 1 is supported/defined. */ + metadata->Version = le32_to_cpu(metadata->Version); + if (metadata->Version != TDX_METADATA_VERSION) { + return NULL; + } + + return metadata; +} + +static int tdvf_parse_and_check_section_entry(const TdvfSectionEntry *src, + TdxFirmwareEntry *entry) +{ + entry->data_offset = le32_to_cpu(src->DataOffset); + entry->data_len = le32_to_cpu(src->RawDataSize); + entry->address = le64_to_cpu(src->MemoryAddress); + entry->size = le64_to_cpu(src->MemoryDataSize); + entry->type = le32_to_cpu(src->Type); + entry->attributes = le32_to_cpu(src->Attributes); + + /* sanity check */ + if (entry->size < entry->data_len) { + error_report("Broken metadata RawDataSize 0x%x MemoryDataSize 0x%"PRIx64, + entry->data_len, entry->size); + return -1; + } + if (!QEMU_IS_ALIGNED(entry->address, TDVF_ALIGNMENT)) { + error_report("MemoryAddress 0x%"PRIx64" not page aligned", entry->address); + return -1; + } + if (!QEMU_IS_ALIGNED(entry->size, TDVF_ALIGNMENT)) { + error_report("MemoryDataSize 0x%"PRIx64" not page aligned", entry->size); + return -1; + } + + switch (entry->type) { + case TDVF_SECTION_TYPE_BFV: + case TDVF_SECTION_TYPE_CFV: + /* The sections that must be copied from firmware image to TD memory */ + if (entry->data_len == 0) { + error_report("%d section with RawDataSize == 0", entry->type); + return -1; + } + break; + case TDVF_SECTION_TYPE_TD_HOB: + case TDVF_SECTION_TYPE_TEMP_MEM: + /* The sections that no need to be copied from firmware image */ + if (entry->data_len != 0) { + error_report("%d section with RawDataSize 0x%x != 0", + entry->type, entry->data_len); + return -1; + } + break; + default: + error_report("TDVF contains unsupported section type %d", entry->type); + return -1; + } + + return 0; +} + +int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size) +{ + g_autofree TdvfSectionEntry *sections = NULL; + TdvfMetadata *metadata; + ssize_t entries_size; + int i; + + metadata = tdvf_get_metadata(flash_ptr, size); + if (!metadata) { + return -EINVAL; + } + + /* load and parse metadata entries */ + fw->nr_entries = le32_to_cpu(metadata->NumberOfSectionEntries); + if (fw->nr_entries < 2) { + error_report("Invalid number of fw entries (%u) in TDVF Metadata", + fw->nr_entries); + return -EINVAL; + } + + entries_size = fw->nr_entries * sizeof(TdvfSectionEntry); + if (metadata->Length != sizeof(*metadata) + entries_size) { + error_report("TDVF metadata len (0x%x) mismatch, expected (0x%x)", + metadata->Length, + (uint32_t)(sizeof(*metadata) + entries_size)); + return -EINVAL; + } + + fw->entries = g_new(TdxFirmwareEntry, fw->nr_entries); + sections = g_new(TdvfSectionEntry, fw->nr_entries); + + memcpy(sections, (void *)metadata + sizeof(*metadata), entries_size); + + for (i = 0; i < fw->nr_entries; i++) { + if (tdvf_parse_and_check_section_entry(§ions[i], &fw->entries[i])) { + goto err; + } + } + + fw->mem_ptr = flash_ptr; + return 0; + +err: + fw->entries = 0; + g_free(fw->entries); + return -EINVAL; +} diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c index 1b0671c..b1b5f11 100644 --- a/hw/i386/x86-common.c +++ b/hw/i386/x86-common.c @@ -44,6 +44,7 @@ #include "standard-headers/asm-x86/bootparam.h" #include CONFIG_DEVICES #include "kvm/kvm_i386.h" +#include "kvm/tdx.h" #ifdef CONFIG_XEN_EMU #include "hw/xen/xen.h" @@ -1035,11 +1036,14 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, if (machine_require_guest_memfd(MACHINE(x86ms))) { memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios", bios_size, &error_fatal); + if (is_tdx_vm()) { + tdx_set_tdvf_region(&x86ms->bios); + } } else { memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, &error_fatal); } - if (sev_enabled()) { + if (sev_enabled() || is_tdx_vm()) { /* * The concept of a "reset" simply doesn't exist for * confidential computing guests, we have to destroy and diff --git a/hw/i386/x86.c b/hw/i386/x86.c index e2d0409..f80533d 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -382,7 +382,6 @@ static void x86_machine_class_init(ObjectClass *oc, const void *data) mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; mc->kvm_type = x86_kvm_type; - x86mc->save_tsc_khz = true; x86mc->fwcfg_dma_enabled = true; nc->nmi_monitor_handler = x86_nmi; diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c index d18bef4..899f133 100644 --- a/hw/intc/arm_gic.c +++ b/hw/intc/arm_gic.c @@ -59,7 +59,7 @@ static const uint8_t gic_id_gicv2[] = { static inline int gic_get_current_cpu(GICState *s) { if (!qtest_enabled() && s->num_cpu > 1) { - return current_cpu->cpu_index; + return current_cpu->cpu_index - s->first_cpu_index; } return 0; } diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c index 0f0c48d..ed5be05 100644 --- a/hw/intc/arm_gic_common.c +++ b/hw/intc/arm_gic_common.c @@ -350,6 +350,7 @@ static void arm_gic_common_linux_init(ARMLinuxBootIf *obj, static const Property arm_gic_common_properties[] = { DEFINE_PROP_UINT32("num-cpu", GICState, num_cpu, 1), + DEFINE_PROP_UINT32("first-cpu-index", GICState, first_cpu_index, 0), DEFINE_PROP_UINT32("num-irq", GICState, num_irq, 32), /* Revision can be 1 or 2 for GIC architecture specification * versions 1 or 2, or 0 to indicate the legacy 11MPCore GIC. diff --git a/hw/intc/aspeed_intc.c b/hw/intc/aspeed_intc.c index 33fcbe7..5cd786d 100644 --- a/hw/intc/aspeed_intc.c +++ b/hw/intc/aspeed_intc.c @@ -737,6 +737,7 @@ static const MemoryRegionOps aspeed_intc_ops = { .read = aspeed_intc_read, .write = aspeed_intc_write, .endianness = DEVICE_LITTLE_ENDIAN, + .impl.min_access_size = 4, .valid = { .min_access_size = 4, .max_access_size = 4, @@ -747,6 +748,7 @@ static const MemoryRegionOps aspeed_intcio_ops = { .read = aspeed_intcio_read, .write = aspeed_intcio_write, .endianness = DEVICE_LITTLE_ENDIAN, + .impl.min_access_size = 4, .valid = { .min_access_size = 4, .max_access_size = 4, @@ -757,6 +759,7 @@ static const MemoryRegionOps aspeed_ssp_intc_ops = { .read = aspeed_intc_read, .write = aspeed_ssp_intc_write, .endianness = DEVICE_LITTLE_ENDIAN, + .impl.min_access_size = 4, .valid = { .min_access_size = 4, .max_access_size = 4, @@ -767,6 +770,7 @@ static const MemoryRegionOps aspeed_ssp_intcio_ops = { .read = aspeed_intcio_read, .write = aspeed_ssp_intcio_write, .endianness = DEVICE_LITTLE_ENDIAN, + .impl.min_access_size = 4, .valid = { .min_access_size = 4, .max_access_size = 4, @@ -777,6 +781,7 @@ static const MemoryRegionOps aspeed_tsp_intc_ops = { .read = aspeed_intc_read, .write = aspeed_tsp_intc_write, .endianness = DEVICE_LITTLE_ENDIAN, + .impl.min_access_size = 4, .valid = { .min_access_size = 4, .max_access_size = 4, @@ -787,6 +792,7 @@ static const MemoryRegionOps aspeed_tsp_intcio_ops = { .read = aspeed_intcio_read, .write = aspeed_tsp_intcio_write, .endianness = DEVICE_LITTLE_ENDIAN, + .impl.min_access_size = 4, .valid = { .min_access_size = 4, .max_access_size = 4, @@ -995,7 +1001,8 @@ static AspeedINTCIRQ aspeed_2700ssp_intcio_irqs[ASPEED_INTC_MAX_INPINS] = { {5, 5, 1, R_SSPINT165_EN, R_SSPINT165_STATUS}, }; -static void aspeed_2700ssp_intcio_class_init(ObjectClass *klass, const void *data) +static void aspeed_2700ssp_intcio_class_init(ObjectClass *klass, + const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); AspeedINTCClass *aic = ASPEED_INTC_CLASS(klass); @@ -1063,7 +1070,8 @@ static AspeedINTCIRQ aspeed_2700tsp_intcio_irqs[ASPEED_INTC_MAX_INPINS] = { {5, 5, 1, R_TSPINT165_EN, R_TSPINT165_STATUS}, }; -static void aspeed_2700tsp_intcio_class_init(ObjectClass *klass, const void *data) +static void aspeed_2700tsp_intcio_class_init(ObjectClass *klass, + const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); AspeedINTCClass *aic = ASPEED_INTC_CLASS(klass); diff --git a/hw/intc/loongarch_pch_pic.c b/hw/intc/loongarch_pch_pic.c index cbba2fc..ebb33ed 100644 --- a/hw/intc/loongarch_pch_pic.c +++ b/hw/intc/loongarch_pch_pic.c @@ -82,7 +82,7 @@ static uint64_t pch_pic_read(void *opaque, hwaddr addr, uint64_t field_mask) addr -= offset; switch (addr) { case PCH_PIC_INT_ID: - val = s->id.data; + val = cpu_to_le64(s->id.data); break; case PCH_PIC_INT_MASK: val = s->int_mask; diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c index 9b6292e..14d6c52 100644 --- a/hw/loongarch/boot.c +++ b/hw/loongarch/boot.c @@ -35,12 +35,6 @@ struct loongarch_linux_hdr { uint32_t pe_header_offset; } QEMU_PACKED; -struct memmap_entry *memmap_table; -unsigned memmap_entries; - -ram_addr_t initrd_offset; -uint64_t initrd_size; - static const unsigned int slave_boot_code[] = { /* Configure reset ebase. */ 0x0400302c, /* csrwr $t0, LOONGARCH_CSR_EENTRY */ @@ -94,12 +88,16 @@ static inline void *guidcpy(void *dst, const void *src) return memcpy(dst, src, sizeof(efi_guid_t)); } -static void init_efi_boot_memmap(struct efi_system_table *systab, +static void init_efi_boot_memmap(MachineState *ms, + struct efi_system_table *systab, void *p, void *start) { unsigned i; struct efi_boot_memmap *boot_memmap = p; efi_guid_t tbl_guid = LINUX_EFI_BOOT_MEMMAP_GUID; + LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(ms); + struct memmap_entry *memmap_table; + unsigned int memmap_entries; /* efi_configuration_table 1 */ guidcpy(&systab->tables[0].guid, &tbl_guid); @@ -111,6 +109,8 @@ static void init_efi_boot_memmap(struct efi_system_table *systab, boot_memmap->map_size = 0; efi_memory_desc_t *map = p + sizeof(struct efi_boot_memmap); + memmap_table = lvms->memmap_table; + memmap_entries = lvms->memmap_entries; for (i = 0; i < memmap_entries; i++) { map = (void *)boot_memmap + sizeof(*map); map[i].type = memmap_table[i].type; @@ -121,7 +121,8 @@ static void init_efi_boot_memmap(struct efi_system_table *systab, } } -static void init_efi_initrd_table(struct efi_system_table *systab, +static void init_efi_initrd_table(struct loongarch_boot_info *info, + struct efi_system_table *systab, void *p, void *start) { efi_guid_t tbl_guid = LINUX_EFI_INITRD_MEDIA_GUID; @@ -132,8 +133,8 @@ static void init_efi_initrd_table(struct efi_system_table *systab, systab->tables[1].table = (struct efi_configuration_table *)(p - start); systab->nr_tables = 2; - initrd_table->base = initrd_offset; - initrd_table->size = initrd_size; + initrd_table->base = info->initrd_addr; + initrd_table->size = info->initrd_size; } static void init_efi_fdt_table(struct efi_system_table *systab) @@ -146,10 +147,12 @@ static void init_efi_fdt_table(struct efi_system_table *systab) systab->nr_tables = 3; } -static void init_systab(struct loongarch_boot_info *info, void *p, void *start) +static void init_systab(MachineState *ms, + struct loongarch_boot_info *info, void *p, void *start) { void *bp_tables_start; struct efi_system_table *systab = p; + LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(ms); info->a2 = p - start; @@ -166,10 +169,10 @@ static void init_systab(struct loongarch_boot_info *info, void *p, void *start) systab->tables = p; bp_tables_start = p; - init_efi_boot_memmap(systab, p, start); + init_efi_boot_memmap(ms, systab, p, start); p += ROUND_UP(sizeof(struct efi_boot_memmap) + - sizeof(efi_memory_desc_t) * memmap_entries, 64 * KiB); - init_efi_initrd_table(systab, p, start); + sizeof(efi_memory_desc_t) * lvms->memmap_entries, 64 * KiB); + init_efi_initrd_table(info, systab, p, start); p += ROUND_UP(sizeof(struct efi_initrd), 64 * KiB); init_efi_fdt_table(systab); @@ -276,8 +279,8 @@ static ram_addr_t alloc_initrd_memory(struct loongarch_boot_info *info, static int64_t load_kernel_info(struct loongarch_boot_info *info) { - uint64_t kernel_entry, kernel_low, kernel_high; - ssize_t kernel_size; + uint64_t kernel_entry, kernel_low, kernel_high, initrd_offset = 0; + ssize_t kernel_size, initrd_size; kernel_size = load_elf(info->kernel_filename, NULL, cpu_loongarch_virt_to_phys, NULL, @@ -313,8 +316,9 @@ static int64_t load_kernel_info(struct loongarch_boot_info *info) info->initrd_filename); exit(1); } - } else { - initrd_size = 0; + + info->initrd_addr = initrd_offset; + info->initrd_size = initrd_size; } return kernel_entry; @@ -369,17 +373,19 @@ static void loongarch_firmware_boot(LoongArchVirtMachineState *lvms, fw_cfg_add_kernel_info(info, lvms->fw_cfg); } -static void init_boot_rom(struct loongarch_boot_info *info, void *p) +static void init_boot_rom(MachineState *ms, + struct loongarch_boot_info *info, void *p) { void *start = p; init_cmdline(info, p, start); p += COMMAND_LINE_SIZE; - init_systab(info, p, start); + init_systab(ms, info, p, start); } -static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info) +static void loongarch_direct_kernel_boot(MachineState *ms, + struct loongarch_boot_info *info) { void *p, *bp; int64_t kernel_addr = VIRT_FLASH0_BASE; @@ -397,7 +403,7 @@ static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info) /* Load cmdline and system tables at [0 - 1 MiB] */ p = g_malloc0(1 * MiB); bp = p; - init_boot_rom(info, p); + init_boot_rom(ms, info, p); rom_add_blob_fixed_as("boot_info", bp, 1 * MiB, 0, &address_space_memory); /* Load slave boot code at pflash0 . */ @@ -437,6 +443,6 @@ void loongarch_load_kernel(MachineState *ms, struct loongarch_boot_info *info) if (lvms->bios_loaded) { loongarch_firmware_boot(lvms, info); } else { - loongarch_direct_kernel_boot(info); + loongarch_direct_kernel_boot(ms, info); } } diff --git a/hw/loongarch/virt-acpi-build.c b/hw/loongarch/virt-acpi-build.c index 073b6de..2cd2d9d 100644 --- a/hw/loongarch/virt-acpi-build.c +++ b/hw/loongarch/virt-acpi-build.c @@ -575,8 +575,8 @@ static void acpi_build(AcpiBuildTables *tables, MachineState *machine) acpi_add_table(table_offsets, tables_blob); { AcpiMcfgInfo mcfg = { - .base = cpu_to_le64(VIRT_PCI_CFG_BASE), - .size = cpu_to_le64(VIRT_PCI_CFG_SIZE), + .base = VIRT_PCI_CFG_BASE, + .size = VIRT_PCI_CFG_SIZE, }; build_mcfg(tables_blob, tables->linker, &mcfg, lvms->oem_id, lvms->oem_table_id); diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c index 1b50404..34dfbd1 100644 --- a/hw/loongarch/virt.c +++ b/hw/loongarch/virt.c @@ -136,6 +136,10 @@ static void virt_build_smbios(LoongArchVirtMachineState *lvms) return; } + if (kvm_enabled()) { + product = "KVM Virtual Machine"; + } + smbios_set_defaults("QEMU", product, mc->name); smbios_get_tables(ms, SMBIOS_ENTRY_POINT_TYPE_64, @@ -168,8 +172,15 @@ static void virt_powerdown_req(Notifier *notifier, void *opaque) acpi_send_event(s->acpi_ged, ACPI_POWER_DOWN_STATUS); } -static void memmap_add_entry(uint64_t address, uint64_t length, uint32_t type) +static void memmap_add_entry(MachineState *ms, uint64_t address, + uint64_t length, uint32_t type) { + LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(ms); + struct memmap_entry *memmap_table; + unsigned int memmap_entries; + + memmap_table = lvms->memmap_table; + memmap_entries = lvms->memmap_entries; /* Ensure there are no duplicate entries. */ for (unsigned i = 0; i < memmap_entries; i++) { assert(memmap_table[i].address != address); @@ -182,6 +193,8 @@ static void memmap_add_entry(uint64_t address, uint64_t length, uint32_t type) memmap_table[memmap_entries].type = cpu_to_le32(type); memmap_table[memmap_entries].reserved = 0; memmap_entries++; + lvms->memmap_table = memmap_table; + lvms->memmap_entries = memmap_entries; } static DeviceState *create_acpi_ged(DeviceState *pch_pic, @@ -619,13 +632,13 @@ static void fw_cfg_add_memory(MachineState *ms) } if (size >= gap) { - memmap_add_entry(base, gap, 1); + memmap_add_entry(ms, base, gap, 1); size -= gap; base = VIRT_HIGHMEM_BASE; } if (size) { - memmap_add_entry(base, size, 1); + memmap_add_entry(ms, base, size, 1); base += size; } @@ -640,7 +653,7 @@ static void fw_cfg_add_memory(MachineState *ms) * lowram: [base, +(gap - numa_info[0].node_mem)) * highram: [VIRT_HIGHMEM_BASE, +(ram_size - gap)) */ - memmap_add_entry(base, gap - numa_info[0].node_mem, 1); + memmap_add_entry(ms, base, gap - numa_info[0].node_mem, 1); size = ram_size - gap; base = VIRT_HIGHMEM_BASE; } else { @@ -648,7 +661,7 @@ static void fw_cfg_add_memory(MachineState *ms) } if (size) { - memmap_add_entry(base, size, 1); + memmap_add_entry(ms, base, size, 1); } } @@ -734,8 +747,8 @@ static void virt_init(MachineState *machine) rom_set_fw(lvms->fw_cfg); if (lvms->fw_cfg != NULL) { fw_cfg_add_file(lvms->fw_cfg, "etc/memmap", - memmap_table, - sizeof(struct memmap_entry) * (memmap_entries)); + lvms->memmap_table, + sizeof(struct memmap_entry) * lvms->memmap_entries); } /* Initialize the IO interrupt subsystem */ diff --git a/hw/microblaze/petalogix_ml605_mmu.c b/hw/microblaze/petalogix_ml605_mmu.c index bea6b68..6e923c4 100644 --- a/hw/microblaze/petalogix_ml605_mmu.c +++ b/hw/microblaze/petalogix_ml605_mmu.c @@ -80,8 +80,6 @@ petalogix_ml605_init(MachineState *machine) MemoryRegion *phys_lmb_bram = g_new(MemoryRegion, 1); MemoryRegion *phys_ram = g_new(MemoryRegion, 1); qemu_irq irq[32]; - EndianMode endianness = TARGET_BIG_ENDIAN ? ENDIAN_MODE_BIG - : ENDIAN_MODE_LITTLE; /* init CPUs */ cpu = MICROBLAZE_CPU(object_new(TYPE_MICROBLAZE_CPU)); @@ -113,7 +111,7 @@ petalogix_ml605_init(MachineState *machine) dev = qdev_new("xlnx.xps-intc"); - qdev_prop_set_enum(dev, "endianness", endianness); + qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE); qdev_prop_set_uint32(dev, "kind-of-intr", 1 << TIMER_IRQ); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, INTC_BASEADDR); @@ -129,7 +127,7 @@ petalogix_ml605_init(MachineState *machine) /* 2 timers at irq 2 @ 100 Mhz. */ dev = qdev_new("xlnx.xps-timer"); - qdev_prop_set_enum(dev, "endianness", endianness); + qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE); qdev_prop_set_uint32(dev, "one-timer-only", 0); qdev_prop_set_uint32(dev, "clock-frequency", 100 * 1000000); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); @@ -177,7 +175,7 @@ petalogix_ml605_init(MachineState *machine) SSIBus *spi; dev = qdev_new("xlnx.xps-spi"); - qdev_prop_set_enum(dev, "endianness", endianness); + qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE); qdev_prop_set_uint8(dev, "num-ss-bits", NUM_SPI_FLASHES); busdev = SYS_BUS_DEVICE(dev); sysbus_realize_and_unref(busdev, &error_fatal); @@ -218,12 +216,7 @@ petalogix_ml605_init(MachineState *machine) static void petalogix_ml605_machine_init(MachineClass *mc) { - if (TARGET_BIG_ENDIAN) { - mc->desc = "PetaLogix linux refdesign for xilinx ml605 (big endian)"; - mc->deprecation_reason = "big endian support is not tested"; - } else { - mc->desc = "PetaLogix linux refdesign for xilinx ml605 (little endian)"; - } + mc->desc = "PetaLogix linux refdesign for xilinx ml605 (little endian)"; mc->init = petalogix_ml605_init; } diff --git a/hw/microblaze/petalogix_s3adsp1800_mmu.c b/hw/microblaze/petalogix_s3adsp1800_mmu.c index 032f6f7..e8d0ddf 100644 --- a/hw/microblaze/petalogix_s3adsp1800_mmu.c +++ b/hw/microblaze/petalogix_s3adsp1800_mmu.c @@ -58,9 +58,20 @@ #define TYPE_PETALOGIX_S3ADSP1800_MACHINE \ MACHINE_TYPE_NAME("petalogix-s3adsp1800") +struct S3Adsp1800MachineState { + MachineState parent_class; + + EndianMode endianness; +}; + +OBJECT_DECLARE_TYPE(S3Adsp1800MachineState, MachineClass, + PETALOGIX_S3ADSP1800_MACHINE) + + static void petalogix_s3adsp1800_init(MachineState *machine) { + S3Adsp1800MachineState *psms = PETALOGIX_S3ADSP1800_MACHINE(machine); ram_addr_t ram_size = machine->ram_size; DeviceState *dev; MicroBlazeCPU *cpu; @@ -71,13 +82,12 @@ petalogix_s3adsp1800_init(MachineState *machine) MemoryRegion *phys_ram = g_new(MemoryRegion, 1); qemu_irq irq[32]; MemoryRegion *sysmem = get_system_memory(); - EndianMode endianness = TARGET_BIG_ENDIAN ? ENDIAN_MODE_BIG - : ENDIAN_MODE_LITTLE; + EndianMode endianness = psms->endianness; cpu = MICROBLAZE_CPU(object_new(TYPE_MICROBLAZE_CPU)); object_property_set_str(OBJECT(cpu), "version", "7.10.d", &error_abort); object_property_set_bool(OBJECT(cpu), "little-endian", - !TARGET_BIG_ENDIAN, &error_abort); + endianness == ENDIAN_MODE_LITTLE, &error_abort); qdev_realize(DEVICE(cpu), NULL, &error_abort); /* Attach emulated BRAM through the LMB. */ @@ -135,20 +145,41 @@ petalogix_s3adsp1800_init(MachineState *machine) create_unimplemented_device("xps_gpio", GPIO_BASEADDR, 0x10000); - microblaze_load_kernel(cpu, !TARGET_BIG_ENDIAN, ddr_base, ram_size, - machine->initrd_filename, + microblaze_load_kernel(cpu, endianness == ENDIAN_MODE_LITTLE, ddr_base, + ram_size, machine->initrd_filename, BINARY_DEVICE_TREE_FILE, NULL); } +static int machine_get_endianness(Object *obj, Error **errp G_GNUC_UNUSED) +{ + S3Adsp1800MachineState *ms = PETALOGIX_S3ADSP1800_MACHINE(obj); + return ms->endianness; +} + +static void machine_set_endianness(Object *obj, int endianness, Error **errp) +{ + S3Adsp1800MachineState *ms = PETALOGIX_S3ADSP1800_MACHINE(obj); + ms->endianness = endianness; +} + static void petalogix_s3adsp1800_machine_class_init(ObjectClass *oc, const void *data) { MachineClass *mc = MACHINE_CLASS(oc); + ObjectProperty *prop; mc->desc = "PetaLogix linux refdesign for xilinx Spartan 3ADSP1800"; mc->init = petalogix_s3adsp1800_init; mc->is_default = true; + + prop = object_class_property_add_enum(oc, "endianness", "EndianMode", + &EndianMode_lookup, + machine_get_endianness, + machine_set_endianness); + object_property_set_default_str(prop, TARGET_BIG_ENDIAN ? "big" : "little"); + object_class_property_set_description(oc, "endianness", + "Defines whether the machine runs in big or little endian mode"); } static const TypeInfo petalogix_s3adsp1800_machine_types[] = { @@ -156,6 +187,7 @@ static const TypeInfo petalogix_s3adsp1800_machine_types[] = { .name = TYPE_PETALOGIX_S3ADSP1800_MACHINE, .parent = TYPE_MACHINE, .class_init = petalogix_s3adsp1800_machine_class_init, + .instance_size = sizeof(S3Adsp1800MachineState), }, }; diff --git a/hw/microblaze/xlnx-zynqmp-pmu.c b/hw/microblaze/xlnx-zynqmp-pmu.c index ed40b5f..e909802 100644 --- a/hw/microblaze/xlnx-zynqmp-pmu.c +++ b/hw/microblaze/xlnx-zynqmp-pmu.c @@ -181,12 +181,7 @@ static void xlnx_zynqmp_pmu_init(MachineState *machine) static void xlnx_zynqmp_pmu_machine_init(MachineClass *mc) { - if (TARGET_BIG_ENDIAN) { - mc->desc = "Xilinx ZynqMP PMU machine (big endian)"; - mc->deprecation_reason = "big endian support is not tested"; - } else { - mc->desc = "Xilinx ZynqMP PMU machine (little endian)"; - } + mc->desc = "Xilinx ZynqMP PMU machine (little endian)"; mc->init = xlnx_zynqmp_pmu_init; } diff --git a/hw/misc/aspeed_hace.c b/hw/misc/aspeed_hace.c index f4bff32..726368f 100644 --- a/hw/misc/aspeed_hace.c +++ b/hw/misc/aspeed_hace.c @@ -10,14 +10,17 @@ */ #include "qemu/osdep.h" +#include "qemu/cutils.h" #include "qemu/log.h" #include "qemu/error-report.h" +#include "qemu/iov.h" #include "hw/misc/aspeed_hace.h" #include "qapi/error.h" #include "migration/vmstate.h" #include "crypto/hash.h" #include "hw/qdev-properties.h" #include "hw/irq.h" +#include "trace.h" #define R_CRYPT_CMD (0x10 / 4) @@ -27,9 +30,12 @@ #define TAG_IRQ BIT(15) #define R_HASH_SRC (0x20 / 4) -#define R_HASH_DEST (0x24 / 4) +#define R_HASH_DIGEST (0x24 / 4) #define R_HASH_KEY_BUFF (0x28 / 4) #define R_HASH_SRC_LEN (0x2c / 4) +#define R_HASH_SRC_HI (0x90 / 4) +#define R_HASH_DIGEST_HI (0x94 / 4) +#define R_HASH_KEY_BUFF_HI (0x98 / 4) #define R_HASH_CMD (0x30 / 4) /* Hash algorithm selection */ @@ -84,6 +90,42 @@ static const struct { QCRYPTO_HASH_ALGO_SHA256 }, }; +static void hace_hexdump(const char *desc, const char *buf, size_t size) +{ + g_autoptr(GString) str = g_string_sized_new(64); + size_t len; + size_t i; + + for (i = 0; i < size; i += len) { + len = MIN(16, size - i); + g_string_truncate(str, 0); + qemu_hexdump_line(str, buf + i, len, 1, 4); + trace_aspeed_hace_hexdump(desc, i, str->str); + } +} + +static void hace_iov_hexdump(const char *desc, const struct iovec *iov, + const unsigned int iov_cnt) +{ + size_t size = 0; + char *buf; + int i; + + for (i = 0; i < iov_cnt; i++) { + size += iov[i].iov_len; + } + + buf = g_malloc(size); + + if (!buf) { + return; + } + + iov_to_buf(iov, iov_cnt, 0, buf, size); + hace_hexdump(desc, buf, size); + g_free(buf); +} + static int hash_algo_lookup(uint32_t reg) { int i; @@ -142,171 +184,269 @@ static bool has_padding(AspeedHACEState *s, struct iovec *iov, return false; } -static int reconstruct_iov(AspeedHACEState *s, struct iovec *iov, int id, - uint32_t *pad_offset) +static uint64_t hash_get_source_addr(AspeedHACEState *s) { - int i, iov_count; - if (*pad_offset != 0) { - s->iov_cache[s->iov_count].iov_base = iov[id].iov_base; - s->iov_cache[s->iov_count].iov_len = *pad_offset; - ++s->iov_count; - } - for (i = 0; i < s->iov_count; i++) { - iov[i].iov_base = s->iov_cache[i].iov_base; - iov[i].iov_len = s->iov_cache[i].iov_len; + AspeedHACEClass *ahc = ASPEED_HACE_GET_CLASS(s); + uint64_t src_addr = 0; + + src_addr = deposit64(src_addr, 0, 32, s->regs[R_HASH_SRC]); + if (ahc->has_dma64) { + src_addr = deposit64(src_addr, 32, 32, s->regs[R_HASH_SRC_HI]); } - iov_count = s->iov_count; - s->iov_count = 0; - s->total_req_len = 0; - return iov_count; + + return src_addr; } -static void do_hash_operation(AspeedHACEState *s, int algo, bool sg_mode, - bool acc_mode) +static int hash_prepare_direct_iov(AspeedHACEState *s, struct iovec *iov, + bool acc_mode, bool *acc_final_request) { - struct iovec iov[ASPEED_HACE_MAX_SG]; uint32_t total_msg_len; uint32_t pad_offset; - g_autofree uint8_t *digest_buf = NULL; - size_t digest_len = 0; - bool sg_acc_mode_final_request = false; - int i; + uint64_t src; void *haddr; - Error *local_err = NULL; + hwaddr plen; + int iov_idx; + + plen = s->regs[R_HASH_SRC_LEN]; + src = hash_get_source_addr(s); + trace_aspeed_hace_hash_addr("src", src); + haddr = address_space_map(&s->dram_as, src, &plen, false, + MEMTXATTRS_UNSPECIFIED); + if (haddr == NULL) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Unable to map address, addr=0x%" HWADDR_PRIx + " ,plen=0x%" HWADDR_PRIx "\n", + __func__, src, plen); + return -1; + } - if (acc_mode && s->hash_ctx == NULL) { - s->hash_ctx = qcrypto_hash_new(algo, &local_err); - if (s->hash_ctx == NULL) { - qemu_log_mask(LOG_GUEST_ERROR, "qcrypto hash failed : %s", - error_get_pretty(local_err)); - error_free(local_err); - return; + iov[0].iov_base = haddr; + iov_idx = 1; + + if (acc_mode) { + s->total_req_len += plen; + + if (has_padding(s, &iov[0], plen, &total_msg_len, + &pad_offset)) { + /* Padding being present indicates the final request */ + *acc_final_request = true; + iov[0].iov_len = pad_offset; + } else { + iov[0].iov_len = plen; } + } else { + iov[0].iov_len = plen; } - if (sg_mode) { - uint32_t len = 0; - - for (i = 0; !(len & SG_LIST_LEN_LAST); i++) { - uint32_t addr, src; - hwaddr plen; + return iov_idx; +} - if (i == ASPEED_HACE_MAX_SG) { - qemu_log_mask(LOG_GUEST_ERROR, - "aspeed_hace: guest failed to set end of sg list marker\n"); - break; - } +static int hash_prepare_sg_iov(AspeedHACEState *s, struct iovec *iov, + bool acc_mode, bool *acc_final_request) +{ + uint32_t total_msg_len; + uint32_t pad_offset; + uint32_t len = 0; + uint32_t sg_addr; + uint64_t src; + int iov_idx; + hwaddr plen; + void *haddr; - src = s->regs[R_HASH_SRC] + (i * SG_LIST_ENTRY_SIZE); + src = hash_get_source_addr(s); + for (iov_idx = 0; !(len & SG_LIST_LEN_LAST); iov_idx++) { + if (iov_idx == ASPEED_HACE_MAX_SG) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Failed to set end of sg list marker\n", + __func__); + return -1; + } - len = address_space_ldl_le(&s->dram_as, src, + len = address_space_ldl_le(&s->dram_as, src, + MEMTXATTRS_UNSPECIFIED, NULL); + sg_addr = address_space_ldl_le(&s->dram_as, src + SG_LIST_LEN_SIZE, MEMTXATTRS_UNSPECIFIED, NULL); + sg_addr &= SG_LIST_ADDR_MASK; + trace_aspeed_hace_hash_sg(iov_idx, src, sg_addr, len); + /* + * To maintain compatibility with older SoCs such as the AST2600, + * the AST2700 HW automatically set bit 34 of the 64-bit sg_addr. + * As a result, the firmware only needs to provide a 32-bit sg_addr + * containing bits [31:0]. This is sufficient for the AST2700, as + * it uses a DRAM offset rather than a DRAM address. + */ + plen = len & SG_LIST_LEN_MASK; + haddr = address_space_map(&s->dram_as, sg_addr, &plen, false, + MEMTXATTRS_UNSPECIFIED); - addr = address_space_ldl_le(&s->dram_as, src + SG_LIST_LEN_SIZE, - MEMTXATTRS_UNSPECIFIED, NULL); - addr &= SG_LIST_ADDR_MASK; + if (haddr == NULL) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Unable to map address, sg_addr=0x%x, " + "plen=0x%" HWADDR_PRIx "\n", + __func__, sg_addr, plen); + return -1; + } - plen = len & SG_LIST_LEN_MASK; - haddr = address_space_map(&s->dram_as, addr, &plen, false, - MEMTXATTRS_UNSPECIFIED); - if (haddr == NULL) { - qemu_log_mask(LOG_GUEST_ERROR, - "%s: qcrypto failed\n", __func__); - return; - } - iov[i].iov_base = haddr; - if (acc_mode) { - s->total_req_len += plen; - - if (has_padding(s, &iov[i], plen, &total_msg_len, - &pad_offset)) { - /* Padding being present indicates the final request */ - sg_acc_mode_final_request = true; - iov[i].iov_len = pad_offset; - } else { - iov[i].iov_len = plen; - } + src += SG_LIST_ENTRY_SIZE; + + iov[iov_idx].iov_base = haddr; + if (acc_mode) { + s->total_req_len += plen; + + if (has_padding(s, &iov[iov_idx], plen, &total_msg_len, + &pad_offset)) { + /* Padding being present indicates the final request */ + *acc_final_request = true; + iov[iov_idx].iov_len = pad_offset; } else { - iov[i].iov_len = plen; + iov[iov_idx].iov_len = plen; } + } else { + iov[iov_idx].iov_len = plen; } - } else { - hwaddr len = s->regs[R_HASH_SRC_LEN]; + } - haddr = address_space_map(&s->dram_as, s->regs[R_HASH_SRC], - &len, false, MEMTXATTRS_UNSPECIFIED); - if (haddr == NULL) { - qemu_log_mask(LOG_GUEST_ERROR, "%s: qcrypto failed\n", __func__); + return iov_idx; +} + +static uint64_t hash_get_digest_addr(AspeedHACEState *s) +{ + AspeedHACEClass *ahc = ASPEED_HACE_GET_CLASS(s); + uint64_t digest_addr = 0; + + digest_addr = deposit64(digest_addr, 0, 32, s->regs[R_HASH_DIGEST]); + if (ahc->has_dma64) { + digest_addr = deposit64(digest_addr, 32, 32, s->regs[R_HASH_DIGEST_HI]); + } + + return digest_addr; +} + +static void hash_write_digest_and_unmap_iov(AspeedHACEState *s, + struct iovec *iov, + int iov_idx, + uint8_t *digest_buf, + size_t digest_len) +{ + uint64_t digest_addr = 0; + + digest_addr = hash_get_digest_addr(s); + trace_aspeed_hace_hash_addr("digest", digest_addr); + if (address_space_write(&s->dram_as, digest_addr, + MEMTXATTRS_UNSPECIFIED, + digest_buf, digest_len)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Failed to write digest to 0x%" HWADDR_PRIx "\n", + __func__, digest_addr); + } + + if (trace_event_get_state_backends(TRACE_ASPEED_HACE_HEXDUMP)) { + hace_hexdump("digest", (char *)digest_buf, digest_len); + } + + for (; iov_idx > 0; iov_idx--) { + address_space_unmap(&s->dram_as, iov[iov_idx - 1].iov_base, + iov[iov_idx - 1].iov_len, false, + iov[iov_idx - 1].iov_len); + } +} + +static void hash_execute_non_acc_mode(AspeedHACEState *s, int algo, + struct iovec *iov, int iov_idx) +{ + g_autofree uint8_t *digest_buf = NULL; + Error *local_err = NULL; + size_t digest_len = 0; + + if (qcrypto_hash_bytesv(algo, iov, iov_idx, &digest_buf, + &digest_len, &local_err) < 0) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: qcrypto hash bytesv failed : %s", + __func__, error_get_pretty(local_err)); + error_free(local_err); + return; + } + + hash_write_digest_and_unmap_iov(s, iov, iov_idx, digest_buf, digest_len); +} + +static void hash_execute_acc_mode(AspeedHACEState *s, int algo, + struct iovec *iov, int iov_idx, + bool final_request) +{ + g_autofree uint8_t *digest_buf = NULL; + Error *local_err = NULL; + size_t digest_len = 0; + + trace_aspeed_hace_hash_execute_acc_mode(final_request); + + if (s->hash_ctx == NULL) { + s->hash_ctx = qcrypto_hash_new(algo, &local_err); + if (s->hash_ctx == NULL) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: qcrypto hash new failed : %s", + __func__, error_get_pretty(local_err)); + error_free(local_err); return; } - iov[0].iov_base = haddr; - iov[0].iov_len = len; - i = 1; - - if (s->iov_count) { - /* - * In aspeed sdk kernel driver, sg_mode is disabled in hash_final(). - * Thus if we received a request with sg_mode disabled, it is - * required to check whether cache is empty. If no, we should - * combine cached iov and the current iov. - */ - s->total_req_len += len; - if (has_padding(s, iov, len, &total_msg_len, &pad_offset)) { - i = reconstruct_iov(s, iov, 0, &pad_offset); - } - } } - if (acc_mode) { - if (qcrypto_hash_updatev(s->hash_ctx, iov, i, &local_err) < 0) { - qemu_log_mask(LOG_GUEST_ERROR, "qcrypto hash update failed : %s", - error_get_pretty(local_err)); + if (qcrypto_hash_updatev(s->hash_ctx, iov, iov_idx, &local_err) < 0) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: qcrypto hash updatev failed : %s", + __func__, error_get_pretty(local_err)); + error_free(local_err); + return; + } + + if (final_request) { + if (qcrypto_hash_finalize_bytes(s->hash_ctx, &digest_buf, + &digest_len, &local_err)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: qcrypto hash finalize bytes failed : %s", + __func__, error_get_pretty(local_err)); error_free(local_err); - return; + local_err = NULL; } - if (sg_acc_mode_final_request) { - if (qcrypto_hash_finalize_bytes(s->hash_ctx, &digest_buf, - &digest_len, &local_err)) { - qemu_log_mask(LOG_GUEST_ERROR, - "qcrypto hash finalize failed : %s", - error_get_pretty(local_err)); - error_free(local_err); - local_err = NULL; - } + qcrypto_hash_free(s->hash_ctx); + + s->hash_ctx = NULL; + s->total_req_len = 0; + } - qcrypto_hash_free(s->hash_ctx); + hash_write_digest_and_unmap_iov(s, iov, iov_idx, digest_buf, digest_len); +} - s->hash_ctx = NULL; - s->iov_count = 0; - s->total_req_len = 0; - } - } else if (qcrypto_hash_bytesv(algo, iov, i, &digest_buf, - &digest_len, &local_err) < 0) { - qemu_log_mask(LOG_GUEST_ERROR, "qcrypto hash bytesv failed : %s", - error_get_pretty(local_err)); - error_free(local_err); - return; +static void do_hash_operation(AspeedHACEState *s, int algo, bool sg_mode, + bool acc_mode) +{ + QEMU_UNINITIALIZED struct iovec iov[ASPEED_HACE_MAX_SG]; + bool acc_final_request = false; + int iov_idx = -1; + + /* Prepares the iov for hashing operations based on the selected mode */ + if (sg_mode) { + iov_idx = hash_prepare_sg_iov(s, iov, acc_mode, &acc_final_request); + } else { + iov_idx = hash_prepare_direct_iov(s, iov, acc_mode, + &acc_final_request); } - if (address_space_write(&s->dram_as, s->regs[R_HASH_DEST], - MEMTXATTRS_UNSPECIFIED, - digest_buf, digest_len)) { + if (iov_idx <= 0) { qemu_log_mask(LOG_GUEST_ERROR, - "aspeed_hace: address space write failed\n"); + "%s: Failed to prepare iov\n", __func__); + return; } - for (; i > 0; i--) { - address_space_unmap(&s->dram_as, iov[i - 1].iov_base, - iov[i - 1].iov_len, false, - iov[i - 1].iov_len); + if (trace_event_get_state_backends(TRACE_ASPEED_HACE_HEXDUMP)) { + hace_iov_hexdump("plaintext", iov, iov_idx); } - /* - * Set status bits to indicate completion. Testing shows hardware sets - * these irrespective of HASH_IRQ_EN. - */ - s->regs[R_STATUS] |= HASH_IRQ; + /* Executes the hash operation */ + if (acc_mode) { + hash_execute_acc_mode(s, algo, iov, iov_idx, acc_final_request); + } else { + hash_execute_non_acc_mode(s, algo, iov, iov_idx); + } } static uint64_t aspeed_hace_read(void *opaque, hwaddr addr, unsigned int size) @@ -315,12 +455,7 @@ static uint64_t aspeed_hace_read(void *opaque, hwaddr addr, unsigned int size) addr >>= 2; - if (addr >= ASPEED_HACE_NR_REGS) { - qemu_log_mask(LOG_GUEST_ERROR, - "%s: Out-of-bounds read at offset 0x%" HWADDR_PRIx "\n", - __func__, addr << 2); - return 0; - } + trace_aspeed_hace_read(addr << 2, s->regs[addr]); return s->regs[addr]; } @@ -333,12 +468,7 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data, addr >>= 2; - if (addr >= ASPEED_HACE_NR_REGS) { - qemu_log_mask(LOG_GUEST_ERROR, - "%s: Out-of-bounds write at offset 0x%" HWADDR_PRIx "\n", - __func__, addr << 2); - return; - } + trace_aspeed_hace_write(addr << 2, data); switch (addr) { case R_STATUS: @@ -362,7 +492,7 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data, case R_HASH_SRC: data &= ahc->src_mask; break; - case R_HASH_DEST: + case R_HASH_DIGEST: data &= ahc->dest_mask; break; case R_HASH_KEY_BUFF: @@ -390,10 +520,16 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data, qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid hash algorithm selection 0x%"PRIx64"\n", __func__, data & ahc->hash_mask); - break; + } else { + do_hash_operation(s, algo, data & HASH_SG_EN, + ((data & HASH_HMAC_MASK) == HASH_DIGEST_ACCUM)); } - do_hash_operation(s, algo, data & HASH_SG_EN, - ((data & HASH_HMAC_MASK) == HASH_DIGEST_ACCUM)); + + /* + * Set status bits to indicate completion. Testing shows hardware sets + * these irrespective of HASH_IRQ_EN. + */ + s->regs[R_STATUS] |= HASH_IRQ; if (data & HASH_IRQ_EN) { qemu_irq_raise(s->irq); @@ -410,6 +546,15 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data, } } break; + case R_HASH_SRC_HI: + data &= ahc->src_hi_mask; + break; + case R_HASH_DIGEST_HI: + data &= ahc->dest_hi_mask; + break; + case R_HASH_KEY_BUFF_HI: + data &= ahc->key_hi_mask; + break; default: break; } @@ -430,14 +575,14 @@ static const MemoryRegionOps aspeed_hace_ops = { static void aspeed_hace_reset(DeviceState *dev) { struct AspeedHACEState *s = ASPEED_HACE(dev); + AspeedHACEClass *ahc = ASPEED_HACE_GET_CLASS(s); if (s->hash_ctx != NULL) { qcrypto_hash_free(s->hash_ctx); s->hash_ctx = NULL; } - memset(s->regs, 0, sizeof(s->regs)); - s->iov_count = 0; + memset(s->regs, 0, ahc->nr_regs << 2); s->total_req_len = 0; } @@ -445,11 +590,13 @@ static void aspeed_hace_realize(DeviceState *dev, Error **errp) { AspeedHACEState *s = ASPEED_HACE(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + AspeedHACEClass *ahc = ASPEED_HACE_GET_CLASS(s); sysbus_init_irq(sbd, &s->irq); + s->regs = g_new(uint32_t, ahc->nr_regs); memory_region_init_io(&s->iomem, OBJECT(s), &aspeed_hace_ops, s, - TYPE_ASPEED_HACE, 0x1000); + TYPE_ASPEED_HACE, ahc->nr_regs << 2); if (!s->dram_mr) { error_setg(errp, TYPE_ASPEED_HACE ": 'dram' link not set"); @@ -469,21 +616,28 @@ static const Property aspeed_hace_properties[] = { static const VMStateDescription vmstate_aspeed_hace = { .name = TYPE_ASPEED_HACE, - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, .fields = (const VMStateField[]) { - VMSTATE_UINT32_ARRAY(regs, AspeedHACEState, ASPEED_HACE_NR_REGS), VMSTATE_UINT32(total_req_len, AspeedHACEState), - VMSTATE_UINT32(iov_count, AspeedHACEState), VMSTATE_END_OF_LIST(), } }; +static void aspeed_hace_unrealize(DeviceState *dev) +{ + AspeedHACEState *s = ASPEED_HACE(dev); + + g_free(s->regs); + s->regs = NULL; +} + static void aspeed_hace_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = aspeed_hace_realize; + dc->unrealize = aspeed_hace_unrealize; device_class_set_legacy_reset(dc, aspeed_hace_reset); device_class_set_props(dc, aspeed_hace_properties); dc->vmsd = &vmstate_aspeed_hace; @@ -504,6 +658,7 @@ static void aspeed_ast2400_hace_class_init(ObjectClass *klass, const void *data) dc->desc = "AST2400 Hash and Crypto Engine"; + ahc->nr_regs = 0x64 >> 2; ahc->src_mask = 0x0FFFFFFF; ahc->dest_mask = 0x0FFFFFF8; ahc->key_mask = 0x0FFFFFC0; @@ -523,6 +678,7 @@ static void aspeed_ast2500_hace_class_init(ObjectClass *klass, const void *data) dc->desc = "AST2500 Hash and Crypto Engine"; + ahc->nr_regs = 0x64 >> 2; ahc->src_mask = 0x3fffffff; ahc->dest_mask = 0x3ffffff8; ahc->key_mask = 0x3FFFFFC0; @@ -542,6 +698,7 @@ static void aspeed_ast2600_hace_class_init(ObjectClass *klass, const void *data) dc->desc = "AST2600 Hash and Crypto Engine"; + ahc->nr_regs = 0x64 >> 2; ahc->src_mask = 0x7FFFFFFF; ahc->dest_mask = 0x7FFFFFF8; ahc->key_mask = 0x7FFFFFF8; @@ -561,6 +718,7 @@ static void aspeed_ast1030_hace_class_init(ObjectClass *klass, const void *data) dc->desc = "AST1030 Hash and Crypto Engine"; + ahc->nr_regs = 0x64 >> 2; ahc->src_mask = 0x7FFFFFFF; ahc->dest_mask = 0x7FFFFFF8; ahc->key_mask = 0x7FFFFFF8; @@ -580,17 +738,36 @@ static void aspeed_ast2700_hace_class_init(ObjectClass *klass, const void *data) dc->desc = "AST2700 Hash and Crypto Engine"; + ahc->nr_regs = 0x9C >> 2; ahc->src_mask = 0x7FFFFFFF; ahc->dest_mask = 0x7FFFFFF8; ahc->key_mask = 0x7FFFFFF8; ahc->hash_mask = 0x00147FFF; /* + * The AST2700 supports a maximum DRAM size of 8 GB, with a DRAM + * addressable range from 0x0_0000_0000 to 0x1_FFFF_FFFF. Since this range + * fits within 34 bits, only bits [33:0] are needed to store the DRAM + * offset. To optimize address storage, the high physical address bits + * [1:0] of the source, digest and key buffer addresses are stored as + * dram_offset bits [33:32]. + * + * This approach eliminates the need to reduce the high part of the DRAM + * physical address for DMA operations. Previously, this was calculated as + * (high physical address bits [7:0] - 4), since the DRAM start address is + * 0x4_00000000, making the high part address [7:0] - 4. + */ + ahc->src_hi_mask = 0x00000003; + ahc->dest_hi_mask = 0x00000003; + ahc->key_hi_mask = 0x00000003; + + /* * Currently, it does not support the CRYPT command. Instead, it only * sends an interrupt to notify the firmware that the crypt command * has completed. It is a temporary workaround. */ ahc->raise_crypt_interrupt_workaround = true; + ahc->has_dma64 = true; } static const TypeInfo aspeed_ast2700_hace_info = { diff --git a/hw/misc/stm32_rcc.c b/hw/misc/stm32_rcc.c index 94e8dae..5815b3e 100644 --- a/hw/misc/stm32_rcc.c +++ b/hw/misc/stm32_rcc.c @@ -60,7 +60,7 @@ static void stm32_rcc_write(void *opaque, hwaddr addr, uint32_t value = val64; uint32_t prev_value, new_value, irq_offset; - trace_stm32_rcc_write(value, addr); + trace_stm32_rcc_write(addr, value); if (addr > STM32_RCC_DCKCFGR2) { qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%"HWADDR_PRIx"\n", diff --git a/hw/misc/trace-events b/hw/misc/trace-events index 4383808..e3f64c0 100644 --- a/hw/misc/trace-events +++ b/hw/misc/trace-events @@ -302,6 +302,14 @@ aspeed_peci_read(uint64_t offset, uint64_t data) "offset 0x%" PRIx64 " data 0x%" aspeed_peci_write(uint64_t offset, uint64_t data) "offset 0x%" PRIx64 " data 0x%" PRIx64 aspeed_peci_raise_interrupt(uint32_t ctrl, uint32_t status) "ctrl 0x%" PRIx32 " status 0x%" PRIx32 +# aspeed_hace.c +aspeed_hace_read(uint64_t offset, uint64_t data) "offset 0x%" PRIx64 " data 0x%" PRIx64 +aspeed_hace_write(uint64_t offset, uint64_t data) "offset 0x%" PRIx64 " data 0x%" PRIx64 +aspeed_hace_hash_sg(int index, uint64_t list_addr, uint64_t buf_addr, uint32_t len) "%d: list_addr 0x%" PRIx64 " buf_addr 0x%" PRIx64 " len 0x%" PRIx32 +aspeed_hace_hash_addr(const char *s, uint64_t addr) "%s: 0x%" PRIx64 +aspeed_hace_hash_execute_acc_mode(bool final_request) "final request: %d" +aspeed_hace_hexdump(const char *desc, uint32_t offset, char *s) "%s: 0x%08x: %s" + # bcm2835_property.c bcm2835_mbox_property(uint32_t tag, uint32_t bufsize, size_t resplen) "mbox property tag:0x%08x in_sz:%u out_sz:%zu" diff --git a/hw/net/e1000.c b/hw/net/e1000.c index cba4999..a80a7b0 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -127,10 +127,8 @@ struct E1000State_st { QEMUTimer *flush_queue_timer; /* Compatibility flags for migration to/from qemu 1.3.0 and older */ -#define E1000_FLAG_MAC_BIT 2 #define E1000_FLAG_TSO_BIT 3 #define E1000_FLAG_VET_BIT 4 -#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT) #define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT) #define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT) @@ -1212,52 +1210,51 @@ enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) }; enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 }; -#define markflag(x) ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED) /* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p] * f - flag bits (up to 6 possible flags) * n - flag needed - * p - partially implenented */ + * p - partially implemented */ static const uint8_t mac_reg_access[0x8000] = { - [IPAV] = markflag(MAC), [WUC] = markflag(MAC), - [IP6AT] = markflag(MAC), [IP4AT] = markflag(MAC), - [FFVT] = markflag(MAC), [WUPM] = markflag(MAC), - [ECOL] = markflag(MAC), [MCC] = markflag(MAC), - [DC] = markflag(MAC), [TNCRS] = markflag(MAC), - [RLEC] = markflag(MAC), [XONRXC] = markflag(MAC), - [XOFFTXC] = markflag(MAC), [RFC] = markflag(MAC), - [TSCTFC] = markflag(MAC), [MGTPRC] = markflag(MAC), - [WUS] = markflag(MAC), [AIT] = markflag(MAC), - [FFLT] = markflag(MAC), [FFMT] = markflag(MAC), - [SCC] = markflag(MAC), [FCRUC] = markflag(MAC), - [LATECOL] = markflag(MAC), [COLC] = markflag(MAC), - [SEQEC] = markflag(MAC), [CEXTERR] = markflag(MAC), - [XONTXC] = markflag(MAC), [XOFFRXC] = markflag(MAC), - [RJC] = markflag(MAC), [RNBC] = markflag(MAC), - [MGTPDC] = markflag(MAC), [MGTPTC] = markflag(MAC), - [RUC] = markflag(MAC), [ROC] = markflag(MAC), - [GORCL] = markflag(MAC), [GORCH] = markflag(MAC), - [GOTCL] = markflag(MAC), [GOTCH] = markflag(MAC), - [BPRC] = markflag(MAC), [MPRC] = markflag(MAC), - [TSCTC] = markflag(MAC), [PRC64] = markflag(MAC), - [PRC127] = markflag(MAC), [PRC255] = markflag(MAC), - [PRC511] = markflag(MAC), [PRC1023] = markflag(MAC), - [PRC1522] = markflag(MAC), [PTC64] = markflag(MAC), - [PTC127] = markflag(MAC), [PTC255] = markflag(MAC), - [PTC511] = markflag(MAC), [PTC1023] = markflag(MAC), - [PTC1522] = markflag(MAC), [MPTC] = markflag(MAC), - [BPTC] = markflag(MAC), - - [TDFH] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [TDFT] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [RDFH] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [RDFT] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL, - [PBM] = markflag(MAC) | MAC_ACCESS_PARTIAL, + [IPAV] = MAC_ACCESS_FLAG_NEEDED, [WUC] = MAC_ACCESS_FLAG_NEEDED, + [IP6AT] = MAC_ACCESS_FLAG_NEEDED, [IP4AT] = MAC_ACCESS_FLAG_NEEDED, + [FFVT] = MAC_ACCESS_FLAG_NEEDED, [WUPM] = MAC_ACCESS_FLAG_NEEDED, + [ECOL] = MAC_ACCESS_FLAG_NEEDED, [MCC] = MAC_ACCESS_FLAG_NEEDED, + [DC] = MAC_ACCESS_FLAG_NEEDED, [TNCRS] = MAC_ACCESS_FLAG_NEEDED, + [RLEC] = MAC_ACCESS_FLAG_NEEDED, [XONRXC] = MAC_ACCESS_FLAG_NEEDED, + [XOFFTXC] = MAC_ACCESS_FLAG_NEEDED, [RFC] = MAC_ACCESS_FLAG_NEEDED, + [TSCTFC] = MAC_ACCESS_FLAG_NEEDED, [MGTPRC] = MAC_ACCESS_FLAG_NEEDED, + [WUS] = MAC_ACCESS_FLAG_NEEDED, [AIT] = MAC_ACCESS_FLAG_NEEDED, + [FFLT] = MAC_ACCESS_FLAG_NEEDED, [FFMT] = MAC_ACCESS_FLAG_NEEDED, + [SCC] = MAC_ACCESS_FLAG_NEEDED, [FCRUC] = MAC_ACCESS_FLAG_NEEDED, + [LATECOL] = MAC_ACCESS_FLAG_NEEDED, [COLC] = MAC_ACCESS_FLAG_NEEDED, + [SEQEC] = MAC_ACCESS_FLAG_NEEDED, [CEXTERR] = MAC_ACCESS_FLAG_NEEDED, + [XONTXC] = MAC_ACCESS_FLAG_NEEDED, [XOFFRXC] = MAC_ACCESS_FLAG_NEEDED, + [RJC] = MAC_ACCESS_FLAG_NEEDED, [RNBC] = MAC_ACCESS_FLAG_NEEDED, + [MGTPDC] = MAC_ACCESS_FLAG_NEEDED, [MGTPTC] = MAC_ACCESS_FLAG_NEEDED, + [RUC] = MAC_ACCESS_FLAG_NEEDED, [ROC] = MAC_ACCESS_FLAG_NEEDED, + [GORCL] = MAC_ACCESS_FLAG_NEEDED, [GORCH] = MAC_ACCESS_FLAG_NEEDED, + [GOTCL] = MAC_ACCESS_FLAG_NEEDED, [GOTCH] = MAC_ACCESS_FLAG_NEEDED, + [BPRC] = MAC_ACCESS_FLAG_NEEDED, [MPRC] = MAC_ACCESS_FLAG_NEEDED, + [TSCTC] = MAC_ACCESS_FLAG_NEEDED, [PRC64] = MAC_ACCESS_FLAG_NEEDED, + [PRC127] = MAC_ACCESS_FLAG_NEEDED, [PRC255] = MAC_ACCESS_FLAG_NEEDED, + [PRC511] = MAC_ACCESS_FLAG_NEEDED, [PRC1023] = MAC_ACCESS_FLAG_NEEDED, + [PRC1522] = MAC_ACCESS_FLAG_NEEDED, [PTC64] = MAC_ACCESS_FLAG_NEEDED, + [PTC127] = MAC_ACCESS_FLAG_NEEDED, [PTC255] = MAC_ACCESS_FLAG_NEEDED, + [PTC511] = MAC_ACCESS_FLAG_NEEDED, [PTC1023] = MAC_ACCESS_FLAG_NEEDED, + [PTC1522] = MAC_ACCESS_FLAG_NEEDED, [MPTC] = MAC_ACCESS_FLAG_NEEDED, + [BPTC] = MAC_ACCESS_FLAG_NEEDED, + + [TDFH] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [TDFT] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [TDFHS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [TDFTS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [TDFPC] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [RDFH] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [RDFT] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [RDFHS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [RDFTS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [RDFPC] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, + [PBM] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL, }; static void @@ -1419,13 +1416,6 @@ static int e1000_tx_tso_post_load(void *opaque, int version_id) return 0; } -static bool e1000_full_mac_needed(void *opaque) -{ - E1000State *s = opaque; - - return chkflag(MAC); -} - static bool e1000_tso_state_needed(void *opaque) { E1000State *s = opaque; @@ -1451,7 +1441,6 @@ static const VMStateDescription vmstate_e1000_full_mac_state = { .name = "e1000/full_mac_state", .version_id = 1, .minimum_version_id = 1, - .needed = e1000_full_mac_needed, .fields = (const VMStateField[]) { VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000), VMSTATE_END_OF_LIST() @@ -1679,8 +1668,6 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp) static const Property e1000_properties[] = { DEFINE_NIC_PROPERTIES(E1000State, conf), - DEFINE_PROP_BIT("extra_mac_registers", E1000State, - compat_flags, E1000_FLAG_MAC_BIT, true), DEFINE_PROP_BIT("migrate_tso_props", E1000State, compat_flags, E1000_FLAG_TSO_BIT, true), DEFINE_PROP_BIT("init-vet", E1000State, diff --git a/hw/net/fsl_etsec/etsec.c b/hw/net/fsl_etsec/etsec.c index d14cb2a..846f6cb 100644 --- a/hw/net/fsl_etsec/etsec.c +++ b/hw/net/fsl_etsec/etsec.c @@ -389,6 +389,7 @@ static void etsec_realize(DeviceState *dev, Error **errp) { eTSEC *etsec = ETSEC_COMMON(dev); + qemu_macaddr_default_if_unset(&etsec->conf.macaddr); etsec->nic = qemu_new_nic(&net_etsec_info, &etsec->conf, object_get_typename(OBJECT(dev)), dev->id, &dev->mem_reentrancy_guard, etsec); diff --git a/hw/net/i82596.c b/hw/net/i82596.c index 64ed3c8..c1ff3e6 100644 --- a/hw/net/i82596.c +++ b/hw/net/i82596.c @@ -5,7 +5,7 @@ * This work is licensed under the GNU GPL license version 2 or later. * * This software was written to be compatible with the specification: - * https://www.intel.com/assets/pdf/general/82596ca.pdf + * https://parisc.docs.kernel.org/en/latest/_downloads/96672be0650d9fc046bbcea40b92482f/82596CA.pdf */ #include "qemu/osdep.h" @@ -177,6 +177,26 @@ static void set_individual_address(I82596State *s, uint32_t addr) trace_i82596_new_mac(nc->info_str); } +static void i82596_configure(I82596State *s, uint32_t addr) +{ + uint8_t byte_cnt; + byte_cnt = get_byte(addr + 8) & 0x0f; + + byte_cnt = MAX(byte_cnt, 4); + byte_cnt = MIN(byte_cnt, sizeof(s->config)); + /* copy byte_cnt max. */ + address_space_read(&address_space_memory, addr + 8, + MEMTXATTRS_UNSPECIFIED, s->config, byte_cnt); + /* config byte according to page 35ff */ + s->config[2] &= 0x82; /* mask valid bits */ + s->config[2] |= 0x40; + s->config[7] &= 0xf7; /* clear zero bit */ + assert(I596_NOCRC_INS == 0); /* do CRC insertion */ + s->config[10] = MAX(s->config[10], 5); /* min frame length */ + s->config[12] &= 0x40; /* only full duplex field valid */ + s->config[13] |= 0x3f; /* set ones in byte 13 */ +} + static void set_multicast_list(I82596State *s, uint32_t addr) { uint16_t mc_count, i; @@ -234,7 +254,6 @@ static void command_loop(I82596State *s) { uint16_t cmd; uint16_t status; - uint8_t byte_cnt; DBG(printf("STARTING COMMAND LOOP cmd_p=%08x\n", s->cmd_p)); @@ -254,20 +273,7 @@ static void command_loop(I82596State *s) set_individual_address(s, s->cmd_p); break; case CmdConfigure: - byte_cnt = get_byte(s->cmd_p + 8) & 0x0f; - byte_cnt = MAX(byte_cnt, 4); - byte_cnt = MIN(byte_cnt, sizeof(s->config)); - /* copy byte_cnt max. */ - address_space_read(&address_space_memory, s->cmd_p + 8, - MEMTXATTRS_UNSPECIFIED, s->config, byte_cnt); - /* config byte according to page 35ff */ - s->config[2] &= 0x82; /* mask valid bits */ - s->config[2] |= 0x40; - s->config[7] &= 0xf7; /* clear zero bit */ - assert(I596_NOCRC_INS == 0); /* do CRC insertion */ - s->config[10] = MAX(s->config[10], 5); /* min frame length */ - s->config[12] &= 0x40; /* only full duplex field valid */ - s->config[13] |= 0x3f; /* set ones in byte 13 */ + i82596_configure(s, s->cmd_p); break; case CmdTDR: /* get signal LINK */ diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h index 6e0962f..ae06c1c 100644 --- a/hw/net/rocker/rocker.h +++ b/hw/net/rocker/rocker.h @@ -36,15 +36,7 @@ static inline G_GNUC_PRINTF(1, 2) int DPRINTF(const char *fmt, ...) } #endif -#define __le16 uint16_t -#define __le32 uint32_t -#define __le64 uint64_t - -#define __be16 uint16_t -#define __be32 uint32_t -#define __be64 uint64_t - -static inline bool ipv4_addr_is_multicast(__be32 addr) +static inline bool ipv4_addr_is_multicast(uint32_t addr) { return (addr & htonl(0xf0000000)) == htonl(0xe0000000); } @@ -52,8 +44,8 @@ static inline bool ipv4_addr_is_multicast(__be32 addr) typedef struct ipv6_addr { union { uint8_t addr8[16]; - __be16 addr16[8]; - __be32 addr32[4]; + uint16_t addr16[8]; + uint32_t addr32[4]; }; } Ipv6Addr; diff --git a/hw/net/rocker/rocker_hw.h b/hw/net/rocker/rocker_hw.h index 1786323..7ec6bfb 100644 --- a/hw/net/rocker/rocker_hw.h +++ b/hw/net/rocker/rocker_hw.h @@ -9,10 +9,6 @@ #ifndef ROCKER_HW_H #define ROCKER_HW_H -#define __le16 uint16_t -#define __le32 uint32_t -#define __le64 uint64_t - /* * Return codes */ @@ -124,12 +120,12 @@ enum { */ typedef struct rocker_desc { - __le64 buf_addr; + uint64_t buf_addr; uint64_t cookie; - __le16 buf_size; - __le16 tlv_size; - __le16 rsvd[5]; /* pad to 32 bytes */ - __le16 comp_err; + uint16_t buf_size; + uint16_t tlv_size; + uint16_t rsvd[5]; /* pad to 32 bytes */ + uint16_t comp_err; } __attribute__((packed, aligned(8))) RockerDesc; /* @@ -137,9 +133,9 @@ typedef struct rocker_desc { */ typedef struct rocker_tlv { - __le32 type; - __le16 len; - __le16 rsvd; + uint32_t type; + uint16_t len; + uint16_t rsvd; } __attribute__((packed, aligned(8))) RockerTlv; /* cmd msg */ diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c index 3378f63..4aed178 100644 --- a/hw/net/rocker/rocker_of_dpa.c +++ b/hw/net/rocker/rocker_of_dpa.c @@ -52,10 +52,10 @@ typedef struct of_dpa_flow_key { uint32_t tunnel_id; /* overlay tunnel id */ uint32_t tbl_id; /* table id */ struct { - __be16 vlan_id; /* 0 if no VLAN */ + uint16_t vlan_id; /* 0 if no VLAN */ MACAddr src; /* ethernet source address */ MACAddr dst; /* ethernet destination address */ - __be16 type; /* ethernet frame type */ + uint16_t type; /* ethernet frame type */ } eth; struct { uint8_t proto; /* IP protocol or ARP opcode */ @@ -66,14 +66,14 @@ typedef struct of_dpa_flow_key { union { struct { struct { - __be32 src; /* IP source address */ - __be32 dst; /* IP destination address */ + uint32_t src; /* IP source address */ + uint32_t dst; /* IP destination address */ } addr; union { struct { - __be16 src; /* TCP/UDP/SCTP source port */ - __be16 dst; /* TCP/UDP/SCTP destination port */ - __be16 flags; /* TCP flags */ + uint16_t src; /* TCP/UDP/SCTP source port */ + uint16_t dst; /* TCP/UDP/SCTP destination port */ + uint16_t flags; /* TCP flags */ } tp; struct { MACAddr sha; /* ARP source hardware address */ @@ -86,11 +86,11 @@ typedef struct of_dpa_flow_key { Ipv6Addr src; /* IPv6 source address */ Ipv6Addr dst; /* IPv6 destination address */ } addr; - __be32 label; /* IPv6 flow label */ + uint32_t label; /* IPv6 flow label */ struct { - __be16 src; /* TCP/UDP/SCTP source port */ - __be16 dst; /* TCP/UDP/SCTP destination port */ - __be16 flags; /* TCP flags */ + uint16_t src; /* TCP/UDP/SCTP source port */ + uint16_t dst; /* TCP/UDP/SCTP destination port */ + uint16_t flags; /* TCP flags */ } tp; struct { Ipv6Addr target; /* ND target address */ @@ -112,13 +112,13 @@ typedef struct of_dpa_flow_action { struct { uint32_t group_id; uint32_t tun_log_lport; - __be16 vlan_id; + uint16_t vlan_id; } write; struct { - __be16 new_vlan_id; + uint16_t new_vlan_id; uint32_t out_pport; uint8_t copy_to_cpu; - __be16 vlan_id; + uint16_t vlan_id; } apply; } OfDpaFlowAction; @@ -143,7 +143,7 @@ typedef struct of_dpa_flow { typedef struct of_dpa_flow_pkt_fields { uint32_t tunnel_id; struct eth_header *ethhdr; - __be16 *h_proto; + uint16_t *h_proto; struct vlan_header *vlanhdr; struct ip_header *ipv4hdr; struct ip6_header *ipv6hdr; @@ -180,7 +180,7 @@ typedef struct of_dpa_group { uint32_t group_id; MACAddr src_mac; MACAddr dst_mac; - __be16 vlan_id; + uint16_t vlan_id; } l2_rewrite; struct { uint16_t group_count; @@ -190,13 +190,13 @@ typedef struct of_dpa_group { uint32_t group_id; MACAddr src_mac; MACAddr dst_mac; - __be16 vlan_id; + uint16_t vlan_id; uint8_t ttl_check; } l3_unicast; }; } OfDpaGroup; -static int of_dpa_mask2prefix(__be32 mask) +static int of_dpa_mask2prefix(uint32_t mask) { int i; int count = 32; @@ -451,7 +451,7 @@ static void of_dpa_flow_pkt_parse(OfDpaFlowContext *fc, fc->iovcnt = iovcnt + 2; } -static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, __be16 vlan_id) +static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, uint16_t vlan_id) { OfDpaFlowPktFields *fields = &fc->fields; uint16_t h_proto = fields->ethhdr->h_proto; @@ -486,7 +486,7 @@ static void of_dpa_flow_pkt_strip_vlan(OfDpaFlowContext *fc) static void of_dpa_flow_pkt_hdr_rewrite(OfDpaFlowContext *fc, uint8_t *src_mac, uint8_t *dst_mac, - __be16 vlan_id) + uint16_t vlan_id) { OfDpaFlowPktFields *fields = &fc->fields; diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c index 15b8f75..654a087 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -1816,7 +1816,7 @@ static int rtl8139_transmit_one(RTL8139State *s, int descriptor) PCIDevice *d = PCI_DEVICE(s); int txsize = s->TxStatus[descriptor] & 0x1fff; - uint8_t txbuffer[0x2000]; + QEMU_UNINITIALIZED uint8_t txbuffer[0x2000]; DPRINTF("+++ transmit reading %d bytes from host memory at 0x%08x\n", txsize, s->TxAddr[descriptor]); diff --git a/hw/net/tulip.c b/hw/net/tulip.c index 63fe513..319af90 100644 --- a/hw/net/tulip.c +++ b/hw/net/tulip.c @@ -629,7 +629,7 @@ static void tulip_setup_filter_addr(TULIPState *s, uint8_t *buf, int n) static void tulip_setup_frame(TULIPState *s, struct tulip_descriptor *desc) { - uint8_t buf[4096]; + QEMU_UNINITIALIZED uint8_t buf[4096]; int len = (desc->control >> TDES1_BUF1_SIZE_SHIFT) & TDES1_BUF1_SIZE_MASK; int i; diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 221252e..eb93607 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1911,9 +1911,9 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, VirtIONet *n = qemu_get_nic_opaque(nc); VirtIONetQueue *q; VirtIODevice *vdev = VIRTIO_DEVICE(n); - VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; - size_t lens[VIRTQUEUE_MAX_SIZE]; - struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; + QEMU_UNINITIALIZED VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; + QEMU_UNINITIALIZED size_t lens[VIRTQUEUE_MAX_SIZE]; + QEMU_UNINITIALIZED struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; struct virtio_net_hdr_v1_hash extra_hdr; unsigned mhdr_cnt = 0; size_t offset, i, guest_offset, j; diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 83d942a..7c0ca56 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -41,19 +41,9 @@ #define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1 #define VMXNET3_MSIX_BAR_SIZE 0x2000 -/* Compatibility flags for migration */ -#define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0 -#define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS \ - (1 << VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT) -#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT 1 -#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE \ - (1 << VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT) - #define VMXNET3_EXP_EP_OFFSET (0x48) -#define VMXNET3_MSI_OFFSET(s) \ - ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x50 : 0x84) -#define VMXNET3_MSIX_OFFSET(s) \ - ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0 : 0x9c) +#define VMXNET3_MSI_OFFSET (0x84) +#define VMXNET3_MSIX_OFFSET (0x9c) #define VMXNET3_DSN_OFFSET (0x100) #define VMXNET3_BAR0_IDX (0) @@ -61,8 +51,7 @@ #define VMXNET3_MSIX_BAR_IDX (2) #define VMXNET3_OFF_MSIX_TABLE (0x000) -#define VMXNET3_OFF_MSIX_PBA(s) \ - ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x800 : 0x1000) +#define VMXNET3_OFF_MSIX_PBA (0x1000) /* Link speed in Mbps should be shifted by 16 */ #define VMXNET3_LINK_SPEED (1000 << 16) @@ -2122,8 +2111,8 @@ vmxnet3_init_msix(VMXNET3State *s) &s->msix_bar, VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_TABLE, &s->msix_bar, - VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA(s), - VMXNET3_MSIX_OFFSET(s), NULL); + VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA, + VMXNET3_MSIX_OFFSET, NULL); if (0 > res) { VMW_WRPRN("Failed to initialize MSI-X, error %d", res); @@ -2221,7 +2210,7 @@ static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp) /* Interrupt pin A */ pci_dev->config[PCI_INTERRUPT_PIN] = 0x01; - ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS, + ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET, VMXNET3_MAX_NMSIX_INTRS, VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK, NULL); /* Any error other than -ENOTSUP(board's MSI support is broken) * is a programming error. Fall back to INTx silently on -ENOTSUP */ @@ -2249,6 +2238,7 @@ static void vmxnet3_instance_init(Object *obj) device_add_bootindex_property(obj, &s->conf.bootindex, "bootindex", "/ethernet-phy@0", DEVICE(obj)); + PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS; } static void vmxnet3_pci_uninit(PCIDevice *pci_dev) @@ -2472,30 +2462,12 @@ static const VMStateDescription vmstate_vmxnet3 = { static const Property vmxnet3_properties[] = { DEFINE_NIC_PROPERTIES(VMXNET3State, conf), - DEFINE_PROP_BIT("x-old-msi-offsets", VMXNET3State, compat_flags, - VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT, false), - DEFINE_PROP_BIT("x-disable-pcie", VMXNET3State, compat_flags, - VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT, false), }; -static void vmxnet3_realize(DeviceState *qdev, Error **errp) -{ - VMXNET3Class *vc = VMXNET3_DEVICE_GET_CLASS(qdev); - PCIDevice *pci_dev = PCI_DEVICE(qdev); - VMXNET3State *s = VMXNET3(qdev); - - if (!(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE)) { - pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; - } - - vc->parent_dc_realize(qdev, errp); -} - static void vmxnet3_class_init(ObjectClass *class, const void *data) { DeviceClass *dc = DEVICE_CLASS(class); PCIDeviceClass *c = PCI_DEVICE_CLASS(class); - VMXNET3Class *vc = VMXNET3_DEVICE_CLASS(class); c->realize = vmxnet3_pci_realize; c->exit = vmxnet3_pci_uninit; @@ -2506,8 +2478,6 @@ static void vmxnet3_class_init(ObjectClass *class, const void *data) c->class_id = PCI_CLASS_NETWORK_ETHERNET; c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE; c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3; - device_class_set_parent_realize(dc, vmxnet3_realize, - &vc->parent_dc_realize); dc->desc = "VMWare Paravirtualized Ethernet v3"; device_class_set_legacy_reset(dc, vmxnet3_qdev_reset); dc->vmsd = &vmstate_vmxnet3; diff --git a/hw/net/xgmac.c b/hw/net/xgmac.c index 9c87c4e..d45f872 100644 --- a/hw/net/xgmac.c +++ b/hw/net/xgmac.c @@ -207,7 +207,7 @@ static void xgmac_enet_send(XgmacState *s) struct desc bd; int frame_size; int len; - uint8_t frame[8192]; + QEMU_UNINITIALIZED uint8_t frame[8192]; uint8_t *ptr; ptr = frame; diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index fd93550..2200028 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -1057,7 +1057,8 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl, */ #define SEG_CHUNK_SIZE 256 - NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld; + QEMU_UNINITIALIZED NvmeSglDescriptor segment[SEG_CHUNK_SIZE]; + NvmeSglDescriptor *sgld, *last_sgld; uint64_t nsgld; uint32_t seg_len; uint16_t status; @@ -5128,7 +5129,7 @@ static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, uint64_t off, NvmeRequest *req) { - uint32_t nslist[1024]; + uint32_t nslist[1024] = {}; uint32_t trans_len; int i = 0; uint32_t nsid; @@ -5138,7 +5139,6 @@ static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len, return NVME_INVALID_FIELD | NVME_DNR; } - memset(nslist, 0x0, sizeof(nslist)); trans_len = MIN(sizeof(nslist) - off, buf_len); while ((nsid = find_first_bit(n->changed_nsids, NVME_CHANGED_NSID_SIZE)) != diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c index 237b9f7..aa24050 100644 --- a/hw/nvram/fw_cfg.c +++ b/hw/nvram/fw_cfg.c @@ -817,62 +817,6 @@ void fw_cfg_modify_i64(FWCfgState *s, uint16_t key, uint64_t value) g_free(old); } -void fw_cfg_set_order_override(FWCfgState *s, int order) -{ - assert(s->fw_cfg_order_override == 0); - s->fw_cfg_order_override = order; -} - -void fw_cfg_reset_order_override(FWCfgState *s) -{ - assert(s->fw_cfg_order_override != 0); - s->fw_cfg_order_override = 0; -} - -/* - * This is the legacy order list. For legacy systems, files are in - * the fw_cfg in the order defined below, by the "order" value. Note - * that some entries (VGA ROMs, NIC option ROMS, etc.) go into a - * specific area, but there may be more than one and they occur in the - * order that the user specifies them on the command line. Those are - * handled in a special manner, using the order override above. - * - * For non-legacy, the files are sorted by filename to avoid this kind - * of complexity in the future. - * - * This is only for x86, other arches don't implement versioning so - * they won't set legacy mode. - */ -static struct { - const char *name; - int order; -} fw_cfg_order[] = { - { "etc/boot-menu-wait", 10 }, - { "bootsplash.jpg", 11 }, - { "bootsplash.bmp", 12 }, - { "etc/boot-fail-wait", 15 }, - { "etc/smbios/smbios-tables", 20 }, - { "etc/smbios/smbios-anchor", 30 }, - { "etc/e820", 40 }, - { "etc/reserved-memory-end", 50 }, - { "genroms/kvmvapic.bin", 55 }, - { "genroms/linuxboot.bin", 60 }, - { }, /* VGA ROMs from pc_vga_init come here, 70. */ - { }, /* NIC option ROMs from pc_nic_init come here, 80. */ - { "etc/system-states", 90 }, - { }, /* User ROMs come here, 100. */ - { }, /* Device FW comes here, 110. */ - { "etc/extra-pci-roots", 120 }, - { "etc/acpi/tables", 130 }, - { "etc/table-loader", 140 }, - { "etc/tpm/log", 150 }, - { "etc/acpi/rsdp", 160 }, - { "bootorder", 170 }, - { "etc/msr_feature_control", 180 }, - -#define FW_CFG_ORDER_OVERRIDE_LAST 200 -}; - /* * Any sub-page size update to these table MRs will be lost during migration, * as we use aligned size in ram_load_precopy() -> qemu_ram_resize() path. @@ -890,29 +834,6 @@ static void fw_cfg_acpi_mr_save(FWCfgState *s, const char *filename, size_t len) } } -static int get_fw_cfg_order(FWCfgState *s, const char *name) -{ - int i; - - if (s->fw_cfg_order_override > 0) { - return s->fw_cfg_order_override; - } - - for (i = 0; i < ARRAY_SIZE(fw_cfg_order); i++) { - if (fw_cfg_order[i].name == NULL) { - continue; - } - - if (strcmp(name, fw_cfg_order[i].name) == 0) { - return fw_cfg_order[i].order; - } - } - - /* Stick unknown stuff at the end. */ - warn_report("Unknown firmware file in legacy mode: %s", name); - return FW_CFG_ORDER_OVERRIDE_LAST; -} - void fw_cfg_add_file_callback(FWCfgState *s, const char *filename, FWCfgCallback select_cb, FWCfgWriteCallback write_cb, @@ -921,7 +842,6 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename, { int i, index, count; size_t dsize; - MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); int order = 0; if (!s->files) { @@ -933,22 +853,11 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename, count = be32_to_cpu(s->files->count); assert(count < fw_cfg_file_slots(s)); - /* Find the insertion point. */ - if (mc->legacy_fw_cfg_order) { - /* - * Sort by order. For files with the same order, we keep them - * in the sequence in which they were added. - */ - order = get_fw_cfg_order(s, filename); - for (index = count; - index > 0 && order < s->entry_order[index - 1]; - index--); - } else { - /* Sort by file name. */ - for (index = count; - index > 0 && strcmp(filename, s->files->f[index - 1].name) < 0; - index--); - } + /* Find the insertion point, sorting by file name. */ + for (index = count; + index > 0 && strcmp(filename, s->files->f[index - 1].name) < 0; + index--) + ; /* * Move all the entries from the index point and after down one @@ -1058,7 +967,6 @@ bool fw_cfg_add_file_from_generator(FWCfgState *s, static void fw_cfg_machine_reset(void *opaque) { - MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); FWCfgState *s = opaque; void *ptr; size_t len; @@ -1068,11 +976,9 @@ static void fw_cfg_machine_reset(void *opaque) ptr = fw_cfg_modify_file(s, "bootorder", (uint8_t *)buf, len); g_free(ptr); - if (!mc->legacy_fw_cfg_order) { - buf = get_boot_devices_lchs_list(&len); - ptr = fw_cfg_modify_file(s, "bios-geometry", (uint8_t *)buf, len); - g_free(ptr); - } + buf = get_boot_devices_lchs_list(&len); + ptr = fw_cfg_modify_file(s, "bios-geometry", (uint8_t *)buf, len); + g_free(ptr); } static void fw_cfg_machine_ready(struct Notifier *n, void *data) diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c index e97a515..52269b0 100644 --- a/hw/pci-host/ppce500.c +++ b/hw/pci-host/ppce500.c @@ -16,7 +16,6 @@ #include "qemu/osdep.h" #include "hw/irq.h" -#include "hw/ppc/e500-ccsr.h" #include "hw/qdev-properties.h" #include "migration/vmstate.h" #include "hw/pci/pci_device.h" @@ -418,11 +417,12 @@ static const VMStateDescription vmstate_ppce500_pci = { static void e500_pcihost_bridge_realize(PCIDevice *d, Error **errp) { PPCE500PCIBridgeState *b = PPC_E500_PCI_BRIDGE(d); - PPCE500CCSRState *ccsr = CCSR( + SysBusDevice *ccsr = SYS_BUS_DEVICE( object_resolve_path_component(qdev_get_machine(), "e500-ccsr")); + MemoryRegion *ccsr_space = sysbus_mmio_get_region(ccsr, 0); - memory_region_init_alias(&b->bar0, OBJECT(ccsr), "e500-pci-bar0", &ccsr->ccsr_space, - 0, int128_get64(ccsr->ccsr_space.size)); + memory_region_init_alias(&b->bar0, OBJECT(ccsr), "e500-pci-bar0", + ccsr_space, 0, int128_get64(ccsr_space->size)); pci_register_bar(d, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &b->bar0); } diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c index 21f7ca6..f8c0be5 100644 --- a/hw/pci-host/raven.c +++ b/hw/pci-host/raven.c @@ -24,7 +24,6 @@ */ #include "qemu/osdep.h" -#include "qemu/datadir.h" #include "qemu/units.h" #include "qemu/log.h" #include "qapi/error.h" @@ -35,9 +34,7 @@ #include "migration/vmstate.h" #include "hw/intc/i8259.h" #include "hw/irq.h" -#include "hw/loader.h" #include "hw/or-irq.h" -#include "elf.h" #include "qom/object.h" #define TYPE_RAVEN_PCI_DEVICE "raven" @@ -47,10 +44,6 @@ OBJECT_DECLARE_SIMPLE_TYPE(RavenPCIState, RAVEN_PCI_DEVICE) struct RavenPCIState { PCIDevice dev; - - uint32_t elf_machine; - char *bios_name; - MemoryRegion bios; }; typedef struct PRePPCIState PREPPCIState; @@ -75,11 +68,8 @@ struct PRePPCIState { RavenPCIState pci_dev; int contiguous_map; - bool is_legacy_prep; }; -#define BIOS_SIZE (1 * MiB) - #define PCI_IO_BASE_ADDR 0x80000000 /* Physical address on main bus */ static inline uint32_t raven_pci_io_config(hwaddr addr) @@ -243,22 +233,18 @@ static void raven_pcihost_realizefn(DeviceState *d, Error **errp) MemoryRegion *address_space_mem = get_system_memory(); int i; - if (s->is_legacy_prep) { - for (i = 0; i < PCI_NUM_PINS; i++) { - sysbus_init_irq(dev, &s->pci_irqs[i]); - } - } else { - /* According to PReP specification section 6.1.6 "System Interrupt - * Assignments", all PCI interrupts are routed via IRQ 15 */ - s->or_irq = OR_IRQ(object_new(TYPE_OR_IRQ)); - object_property_set_int(OBJECT(s->or_irq), "num-lines", PCI_NUM_PINS, - &error_fatal); - qdev_realize(DEVICE(s->or_irq), NULL, &error_fatal); - sysbus_init_irq(dev, &s->or_irq->out_irq); - - for (i = 0; i < PCI_NUM_PINS; i++) { - s->pci_irqs[i] = qdev_get_gpio_in(DEVICE(s->or_irq), i); - } + /* + * According to PReP specification section 6.1.6 "System Interrupt + * Assignments", all PCI interrupts are routed via IRQ 15 + */ + s->or_irq = OR_IRQ(object_new(TYPE_OR_IRQ)); + object_property_set_int(OBJECT(s->or_irq), "num-lines", PCI_NUM_PINS, + &error_fatal); + qdev_realize(DEVICE(s->or_irq), NULL, &error_fatal); + sysbus_init_irq(dev, &s->or_irq->out_irq); + + for (i = 0; i < PCI_NUM_PINS; i++) { + s->pci_irqs[i] = qdev_get_gpio_in(DEVICE(s->or_irq), i); } qdev_init_gpio_in(d, raven_change_gpio, 1); @@ -338,48 +324,9 @@ static void raven_pcihost_initfn(Object *obj) static void raven_realize(PCIDevice *d, Error **errp) { - RavenPCIState *s = RAVEN_PCI_DEVICE(d); - char *filename; - int bios_size = -1; - d->config[PCI_CACHE_LINE_SIZE] = 0x08; d->config[PCI_LATENCY_TIMER] = 0x10; d->config[PCI_CAPABILITY_LIST] = 0x00; - - if (!memory_region_init_rom_nomigrate(&s->bios, OBJECT(s), "bios", - BIOS_SIZE, errp)) { - return; - } - memory_region_add_subregion(get_system_memory(), (uint32_t)(-BIOS_SIZE), - &s->bios); - if (s->bios_name) { - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, s->bios_name); - if (filename) { - if (s->elf_machine != EM_NONE) { - bios_size = load_elf(filename, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, - ELFDATA2MSB, s->elf_machine, 0, 0); - } - if (bios_size < 0) { - bios_size = get_image_size(filename); - if (bios_size > 0 && bios_size <= BIOS_SIZE) { - hwaddr bios_addr; - bios_size = (bios_size + 0xfff) & ~0xfff; - bios_addr = (uint32_t)(-BIOS_SIZE); - bios_size = load_image_targphys(filename, bios_addr, - bios_size); - } - } - } - g_free(filename); - if (bios_size < 0 || bios_size > BIOS_SIZE) { - memory_region_del_subregion(get_system_memory(), &s->bios); - error_setg(errp, "Could not load bios image '%s'", s->bios_name); - return; - } - } - - vmstate_register_ram_global(&s->bios); } static const VMStateDescription vmstate_raven = { @@ -422,22 +369,12 @@ static const TypeInfo raven_info = { }, }; -static const Property raven_pcihost_properties[] = { - DEFINE_PROP_UINT32("elf-machine", PREPPCIState, pci_dev.elf_machine, - EM_NONE), - DEFINE_PROP_STRING("bios-name", PREPPCIState, pci_dev.bios_name), - /* Temporary workaround until legacy prep machine is removed */ - DEFINE_PROP_BOOL("is-legacy-prep", PREPPCIState, is_legacy_prep, - false), -}; - static void raven_pcihost_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); dc->realize = raven_pcihost_realizefn; - device_class_set_props(dc, raven_pcihost_properties); dc->fw_name = "pci"; } diff --git a/hw/pci/msix.c b/hw/pci/msix.c index 66f27b9..8c7f670 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -72,7 +72,7 @@ static uint8_t *msix_pending_byte(PCIDevice *dev, int vector) return dev->msix_pba + vector / 8; } -static int msix_is_pending(PCIDevice *dev, int vector) +int msix_is_pending(PCIDevice *dev, unsigned int vector) { return *msix_pending_byte(dev, vector) & msix_pending_mask(vector); } diff --git a/hw/pci/pci.c b/hw/pci/pci.c index f5ab510..c70b5ce 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -32,6 +32,7 @@ #include "hw/pci/pci_host.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" +#include "migration/cpr.h" #include "migration/qemu-file-types.h" #include "migration/vmstate.h" #include "net/net.h" @@ -128,6 +129,12 @@ static GSequence *pci_acpi_index_list(void) return used_acpi_index_list; } +static void pci_set_master(PCIDevice *d, bool enable) +{ + memory_region_set_enabled(&d->bus_master_enable_region, enable); + d->is_master = enable; /* cache the status */ +} + static void pci_init_bus_master(PCIDevice *pci_dev) { AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev); @@ -135,7 +142,7 @@ static void pci_init_bus_master(PCIDevice *pci_dev) memory_region_init_alias(&pci_dev->bus_master_enable_region, OBJECT(pci_dev), "bus master", dma_as->root, 0, memory_region_size(dma_as->root)); - memory_region_set_enabled(&pci_dev->bus_master_enable_region, false); + pci_set_master(pci_dev, false); memory_region_add_subregion(&pci_dev->bus_master_container_region, 0, &pci_dev->bus_master_enable_region); } @@ -531,6 +538,10 @@ static void pci_reset_regions(PCIDevice *dev) static void pci_do_device_reset(PCIDevice *dev) { + if ((dev->cap_present & QEMU_PCI_SKIP_RESET_ON_CPR) && cpr_is_incoming()) { + return; + } + pci_device_deassert_intx(dev); assert(dev->irq_state == 0); @@ -804,9 +815,8 @@ static int get_pci_config_device(QEMUFile *f, void *pv, size_t size, pci_bridge_update_mappings(PCI_BRIDGE(s)); } - memory_region_set_enabled(&s->bus_master_enable_region, - pci_get_word(s->config + PCI_COMMAND) - & PCI_COMMAND_MASTER); + pci_set_master(s, pci_get_word(s->config + PCI_COMMAND) + & PCI_COMMAND_MASTER); g_free(config); return 0; @@ -1725,7 +1735,7 @@ static void pci_update_mappings(PCIDevice *d) pci_update_vga(d); } -static inline int pci_irq_disabled(PCIDevice *d) +int pci_irq_disabled(PCIDevice *d) { return pci_get_word(d->config + PCI_COMMAND) & PCI_COMMAND_INTX_DISABLE; } @@ -1787,9 +1797,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int if (ranges_overlap(addr, l, PCI_COMMAND, 2)) { pci_update_irq_disabled(d, was_irq_disabled); - memory_region_set_enabled(&d->bus_master_enable_region, - (pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER) && d->enabled); + pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND) & + PCI_COMMAND_MASTER) && d->enabled); } msi_write_config(d, addr, val_in, l); @@ -2935,6 +2944,23 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) return &address_space_memory; } +int pci_iommu_init_iotlb_notifier(PCIDevice *dev, IOMMUNotifier *n, + IOMMUNotify fn, void *opaque) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->init_iotlb_notifier) { + iommu_bus->iommu_ops->init_iotlb_notifier(bus, iommu_bus->iommu_opaque, + devfn, n, fn, opaque); + return 0; + } + + return -ENODEV; +} + bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod, Error **errp) { @@ -2966,6 +2992,170 @@ void pci_device_unset_iommu_device(PCIDevice *dev) } } +int pci_pri_request_page(PCIDevice *dev, uint32_t pasid, bool priv_req, + bool exec_req, hwaddr addr, bool lpig, + uint16_t prgi, bool is_read, bool is_write) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if (!dev->is_master || + ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) { + return -EPERM; + } + + if (!pcie_pri_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->pri_request_page) { + return iommu_bus->iommu_ops->pri_request_page(bus, + iommu_bus->iommu_opaque, + devfn, pasid, priv_req, + exec_req, addr, lpig, prgi, + is_read, is_write); + } + + return -ENODEV; +} + +int pci_pri_register_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUPRINotifier *notifier) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if (!dev->is_master || + ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->pri_register_notifier) { + iommu_bus->iommu_ops->pri_register_notifier(bus, + iommu_bus->iommu_opaque, + devfn, pasid, notifier); + return 0; + } + + return -ENODEV; +} + +void pci_pri_unregister_notifier(PCIDevice *dev, uint32_t pasid) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->pri_unregister_notifier) { + iommu_bus->iommu_ops->pri_unregister_notifier(bus, + iommu_bus->iommu_opaque, + devfn, pasid); + } +} + +ssize_t pci_ats_request_translation(PCIDevice *dev, uint32_t pasid, + bool priv_req, bool exec_req, + hwaddr addr, size_t length, + bool no_write, IOMMUTLBEntry *result, + size_t result_length, + uint32_t *err_count) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if (!dev->is_master || + ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) { + return -EPERM; + } + + if (result_length == 0) { + return -ENOSPC; + } + + if (!pcie_ats_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->ats_request_translation) { + return iommu_bus->iommu_ops->ats_request_translation(bus, + iommu_bus->iommu_opaque, + devfn, pasid, priv_req, + exec_req, addr, length, + no_write, result, + result_length, err_count); + } + + return -ENODEV; +} + +int pci_iommu_register_iotlb_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUNotifier *n) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->register_iotlb_notifier) { + iommu_bus->iommu_ops->register_iotlb_notifier(bus, + iommu_bus->iommu_opaque, devfn, + pasid, n); + return 0; + } + + return -ENODEV; +} + +int pci_iommu_unregister_iotlb_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUNotifier *n) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) { + return -EPERM; + } + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->unregister_iotlb_notifier) { + iommu_bus->iommu_ops->unregister_iotlb_notifier(bus, + iommu_bus->iommu_opaque, + devfn, pasid, n); + return 0; + } + + return -ENODEV; +} + +int pci_iommu_get_iotlb_info(PCIDevice *dev, uint8_t *addr_width, + uint32_t *min_page_size) +{ + PCIBus *bus; + PCIBus *iommu_bus; + int devfn; + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (iommu_bus && iommu_bus->iommu_ops->get_iotlb_info) { + iommu_bus->iommu_ops->get_iotlb_info(iommu_bus->iommu_opaque, + addr_width, min_page_size); + return 0; + } + + return -ENODEV; +} + void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque) { /* @@ -3100,9 +3290,8 @@ void pci_set_enabled(PCIDevice *d, bool state) d->enabled = state; pci_update_mappings(d); - memory_region_set_enabled(&d->bus_master_enable_region, - (pci_get_word(d->config + PCI_COMMAND) - & PCI_COMMAND_MASTER) && d->enabled); + pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND) + & PCI_COMMAND_MASTER) && d->enabled); if (qdev_is_realized(&d->qdev)) { pci_device_reset(d); } diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 1b12db6..eaeb688 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -1214,3 +1214,81 @@ void pcie_acs_reset(PCIDevice *dev) pci_set_word(dev->config + dev->exp.acs_cap + PCI_ACS_CTRL, 0); } } + +/* PASID */ +void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width, + bool exec_perm, bool priv_mod) +{ + static const uint16_t control_reg_rw_mask = 0x07; + uint16_t capability_reg; + + assert(pasid_width <= PCI_EXT_CAP_PASID_MAX_WIDTH); + + pcie_add_capability(dev, PCI_EXT_CAP_ID_PASID, PCI_PASID_VER, offset, + PCI_EXT_CAP_PASID_SIZEOF); + + capability_reg = ((uint16_t)pasid_width) << PCI_PASID_CAP_WIDTH_SHIFT; + capability_reg |= exec_perm ? PCI_PASID_CAP_EXEC : 0; + capability_reg |= priv_mod ? PCI_PASID_CAP_PRIV : 0; + pci_set_word(dev->config + offset + PCI_PASID_CAP, capability_reg); + + /* Everything is disabled by default */ + pci_set_word(dev->config + offset + PCI_PASID_CTRL, 0); + + pci_set_word(dev->wmask + offset + PCI_PASID_CTRL, control_reg_rw_mask); + + dev->exp.pasid_cap = offset; +} + +/* PRI */ +void pcie_pri_init(PCIDevice *dev, uint16_t offset, uint32_t outstanding_pr_cap, + bool prg_response_pasid_req) +{ + static const uint16_t control_reg_rw_mask = 0x3; + static const uint16_t status_reg_rw1_mask = 0x3; + static const uint32_t pr_alloc_reg_rw_mask = 0xffffffff; + uint16_t status_reg; + + status_reg = prg_response_pasid_req ? PCI_PRI_STATUS_PASID : 0; + status_reg |= PCI_PRI_STATUS_STOPPED; /* Stopped by default */ + + pcie_add_capability(dev, PCI_EXT_CAP_ID_PRI, PCI_PRI_VER, offset, + PCI_EXT_CAP_PRI_SIZEOF); + /* Disabled by default */ + + pci_set_word(dev->config + offset + PCI_PRI_STATUS, status_reg); + pci_set_long(dev->config + offset + PCI_PRI_MAX_REQ, outstanding_pr_cap); + + pci_set_word(dev->wmask + offset + PCI_PRI_CTRL, control_reg_rw_mask); + pci_set_word(dev->w1cmask + offset + PCI_PRI_STATUS, status_reg_rw1_mask); + pci_set_long(dev->wmask + offset + PCI_PRI_ALLOC_REQ, pr_alloc_reg_rw_mask); + + dev->exp.pri_cap = offset; +} + +bool pcie_pri_enabled(const PCIDevice *dev) +{ + if (!pci_is_express(dev) || !dev->exp.pri_cap) { + return false; + } + return (pci_get_word(dev->config + dev->exp.pri_cap + PCI_PRI_CTRL) & + PCI_PRI_CTRL_ENABLE) != 0; +} + +bool pcie_pasid_enabled(const PCIDevice *dev) +{ + if (!pci_is_express(dev) || !dev->exp.pasid_cap) { + return false; + } + return (pci_get_word(dev->config + dev->exp.pasid_cap + PCI_PASID_CTRL) & + PCI_PASID_CTRL_ENABLE) != 0; +} + +bool pcie_ats_enabled(const PCIDevice *dev) +{ + if (!pci_is_express(dev) || !dev->exp.ats_cap) { + return false; + } + return (pci_get_word(dev->config + dev->exp.ats_cap + PCI_ATS_CTRL) & + PCI_ATS_CTRL_ENABLE) != 0; +} diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index 809078a..723c97f 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -79,8 +79,6 @@ #define MPC85XX_ESDHC_IRQ 72 #define RTC_REGS_OFFSET 0x68 -#define PLATFORM_CLK_FREQ_HZ (400 * 1000 * 1000) - struct boot_info { uint32_t dt_base; @@ -120,7 +118,7 @@ static uint32_t *pci_map_create(void *fdt, uint32_t mpic, int first_slot, } static void dt_serial_create(void *fdt, unsigned long long offset, - const char *soc, const char *mpic, + const char *soc, uint32_t freq, const char *mpic, const char *alias, int idx, bool defcon) { char *ser; @@ -131,7 +129,7 @@ static void dt_serial_create(void *fdt, unsigned long long offset, qemu_fdt_setprop_string(fdt, ser, "compatible", "ns16550"); qemu_fdt_setprop_cells(fdt, ser, "reg", offset, 0x100); qemu_fdt_setprop_cell(fdt, ser, "cell-index", idx); - qemu_fdt_setprop_cell(fdt, ser, "clock-frequency", PLATFORM_CLK_FREQ_HZ); + qemu_fdt_setprop_cell(fdt, ser, "clock-frequency", freq); qemu_fdt_setprop_cells(fdt, ser, "interrupts", 42, 2); qemu_fdt_setprop_phandle(fdt, ser, "interrupt-parent", mpic); qemu_fdt_setprop_string(fdt, "/aliases", alias, ser); @@ -382,8 +380,7 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms, int fdt_size; void *fdt; uint8_t hypercall[16]; - uint32_t clock_freq = PLATFORM_CLK_FREQ_HZ; - uint32_t tb_freq = PLATFORM_CLK_FREQ_HZ; + uint32_t clock_freq, tb_freq; int i; char compatible_sb[] = "fsl,mpc8544-immr\0simple-bus"; char *soc; @@ -411,7 +408,7 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms, if (dtb_file) { char *filename; - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, dtb_file); + filename = qemu_find_file(QEMU_FILE_TYPE_DTB, dtb_file); if (!filename) { goto out; } @@ -484,6 +481,9 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms, if (kvmppc_get_hasidle(env)) { qemu_fdt_setprop(fdt, "/hypervisor", "has-idle", NULL, 0); } + } else { + clock_freq = pmc->clock_freq; + tb_freq = pmc->tb_freq; } /* Create CPU nodes */ @@ -564,12 +564,12 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms, */ if (serial_hd(1)) { dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET, - soc, mpic, "serial1", 1, false); + soc, pmc->clock_freq, mpic, "serial1", 1, false); } if (serial_hd(0)) { dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET, - soc, mpic, "serial0", 0, true); + soc, pmc->clock_freq, mpic, "serial0", 0, true); } /* i2c */ @@ -931,7 +931,6 @@ void ppce500_init(MachineState *machine) CPUPPCState *firstenv = NULL; MemoryRegion *ccsr_addr_space; SysBusDevice *s; - PPCE500CCSRState *ccsr; I2CBus *i2c; irqs = g_new0(IrqLines, smp_cpus); @@ -968,7 +967,7 @@ void ppce500_init(MachineState *machine) env->spr_cb[SPR_BOOKE_PIR].default_value = cs->cpu_index = i; env->mpic_iack = pmc->ccsrbar_base + MPC8544_MPIC_REGS_OFFSET + 0xa0; - ppc_booke_timers_init(cpu, PLATFORM_CLK_FREQ_HZ, PPC_TIMER_E500); + ppc_booke_timers_init(cpu, pmc->tb_freq, PPC_TIMER_E500); /* Register reset handler */ if (!i) { @@ -993,10 +992,10 @@ void ppce500_init(MachineState *machine) memory_region_add_subregion(address_space_mem, 0, machine->ram); dev = qdev_new("e500-ccsr"); + s = SYS_BUS_DEVICE(dev); object_property_add_child(OBJECT(machine), "e500-ccsr", OBJECT(dev)); - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - ccsr = CCSR(dev); - ccsr_addr_space = &ccsr->ccsr_space; + sysbus_realize_and_unref(s, &error_fatal); + ccsr_addr_space = sysbus_mmio_get_region(s, 0); memory_region_add_subregion(address_space_mem, pmc->ccsrbar_base, ccsr_addr_space); @@ -1284,6 +1283,7 @@ static void e500_ccsr_initfn(Object *obj) PPCE500CCSRState *ccsr = CCSR(obj); memory_region_init(&ccsr->ccsr_space, obj, "e500-ccsr", MPC8544_CCSRBAR_SIZE); + sysbus_init_mmio(SYS_BUS_DEVICE(ccsr), &ccsr->ccsr_space); } static const TypeInfo e500_ccsr_info = { diff --git a/hw/ppc/e500.h b/hw/ppc/e500.h index 01db102..00f4905 100644 --- a/hw/ppc/e500.h +++ b/hw/ppc/e500.h @@ -5,6 +5,8 @@ #include "hw/platform-bus.h" #include "qom/object.h" +#define PLATFORM_CLK_FREQ_HZ (400 * 1000 * 1000) + struct PPCE500MachineState { /*< private >*/ MachineState parent_obj; @@ -37,6 +39,8 @@ struct PPCE500MachineClass { hwaddr pci_mmio_base; hwaddr pci_mmio_bus_base; hwaddr spin_base; + uint32_t clock_freq; + uint32_t tb_freq; }; void ppce500_init(MachineState *machine); diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c index 775b9d8..4f1d659 100644 --- a/hw/ppc/e500plat.c +++ b/hw/ppc/e500plat.c @@ -93,6 +93,8 @@ static void e500plat_machine_class_init(ObjectClass *oc, const void *data) pmc->pci_mmio_base = 0xC00000000ULL; pmc->pci_mmio_bus_base = 0xE0000000ULL; pmc->spin_base = 0xFEF000000ULL; + pmc->clock_freq = PLATFORM_CLK_FREQ_HZ; + pmc->tb_freq = PLATFORM_CLK_FREQ_HZ; mc->desc = "generic paravirt e500 platform"; mc->init = e500plat_init; diff --git a/hw/ppc/mpc8544ds.c b/hw/ppc/mpc8544ds.c index 97fb0f3..5826985 100644 --- a/hw/ppc/mpc8544ds.c +++ b/hw/ppc/mpc8544ds.c @@ -55,6 +55,8 @@ static void mpc8544ds_machine_class_init(ObjectClass *oc, const void *data) pmc->pci_mmio_bus_base = 0xC0000000ULL; pmc->pci_pio_base = 0xE1000000ULL; pmc->spin_base = 0xEF000000ULL; + pmc->clock_freq = PLATFORM_CLK_FREQ_HZ; + pmc->tb_freq = PLATFORM_CLK_FREQ_HZ; mc->desc = "mpc8544ds"; mc->init = mpc8544ds_init; diff --git a/hw/ppc/pnv_occ.c b/hw/ppc/pnv_occ.c index fa6f31c..24b789c 100644 --- a/hw/ppc/pnv_occ.c +++ b/hw/ppc/pnv_occ.c @@ -789,7 +789,7 @@ static bool occ_opal_process_command(PnvOCC *occ, static bool occ_model_tick(PnvOCC *occ) { - struct occ_dynamic_data dynamic_data; + QEMU_UNINITIALIZED struct occ_dynamic_data dynamic_data; if (!occ_read_dynamic_data(occ, &dynamic_data, NULL)) { /* Can't move OCC state field to safe because we can't map it! */ diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c index 7395263..982e40e 100644 --- a/hw/ppc/prep.c +++ b/hw/ppc/prep.c @@ -35,6 +35,7 @@ #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/log.h" +#include "qemu/datadir.h" #include "hw/loader.h" #include "hw/rtc/mc146818rtc.h" #include "hw/isa/pc87312.h" @@ -55,6 +56,8 @@ #define KERNEL_LOAD_ADDR 0x01000000 #define INITRD_LOAD_ADDR 0x01800000 +#define BIOS_ADDR 0xfff00000 +#define BIOS_SIZE (1 * MiB) #define NVRAM_SIZE 0x2000 static void fw_cfg_boot_set(void *opaque, const char *boot_device, @@ -241,6 +244,9 @@ static void ibm_40p_init(MachineState *machine) ISADevice *isa_dev; ISABus *isa_bus; void *fw_cfg; + MemoryRegion *bios = g_new(MemoryRegion, 1); + char *filename; + ssize_t bios_size = -1; uint32_t kernel_base = 0, initrd_base = 0; long kernel_size = 0, initrd_size = 0; char boot_device; @@ -263,10 +269,27 @@ static void ibm_40p_init(MachineState *machine) cpu_ppc_tb_init(env, 100UL * 1000UL * 1000UL); qemu_register_reset(ppc_prep_reset, cpu); + /* allocate and load firmware */ + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + if (!filename) { + error_report("Could not find bios image '%s'", bios_name); + exit(1); + } + memory_region_init_rom(bios, NULL, "bios", BIOS_SIZE, &error_fatal); + memory_region_add_subregion(get_system_memory(), BIOS_ADDR, bios); + bios_size = load_elf(filename, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + ELFDATA2MSB, PPC_ELF_MACHINE, 0, 0); + if (bios_size < 0) { + bios_size = load_image_targphys(filename, BIOS_ADDR, BIOS_SIZE); + } + if (bios_size < 0 || bios_size > BIOS_SIZE) { + error_report("Could not load bios image '%s'", filename); + return; + } + g_free(filename); + /* PCI host */ dev = qdev_new("raven-pcihost"); - qdev_prop_set_string(dev, "bios-name", bios_name); - qdev_prop_set_uint32(dev, "elf-machine", PPC_ELF_MACHINE); pcihost = SYS_BUS_DEVICE(dev); object_property_add_child(qdev_get_machine(), "raven", OBJECT(dev)); sysbus_realize_and_unref(pcihost, &error_fatal); diff --git a/hw/ppc/spapr_tpm_proxy.c b/hw/ppc/spapr_tpm_proxy.c index 862eeaa..1297b3a 100644 --- a/hw/ppc/spapr_tpm_proxy.c +++ b/hw/ppc/spapr_tpm_proxy.c @@ -41,8 +41,8 @@ static ssize_t tpm_execute(SpaprTpmProxy *tpm_proxy, target_ulong *args) target_ulong data_in_size = args[2]; uint64_t data_out = ppc64_phys_to_real(args[3]); target_ulong data_out_size = args[4]; - uint8_t buf_in[TPM_SPAPR_BUFSIZE]; - uint8_t buf_out[TPM_SPAPR_BUFSIZE]; + QEMU_UNINITIALIZED uint8_t buf_in[TPM_SPAPR_BUFSIZE]; + QEMU_UNINITIALIZED uint8_t buf_out[TPM_SPAPR_BUFSIZE]; ssize_t ret; trace_spapr_tpm_execute(data_in, data_in_size, data_out, data_out_size); diff --git a/hw/riscv/riscv-iommu-pci.c b/hw/riscv/riscv-iommu-pci.c index 1f44eef..cdb4a7a 100644 --- a/hw/riscv/riscv-iommu-pci.c +++ b/hw/riscv/riscv-iommu-pci.c @@ -68,12 +68,6 @@ typedef struct RISCVIOMMUStatePci { RISCVIOMMUState iommu; /* common IOMMU state */ } RISCVIOMMUStatePci; -struct RISCVIOMMUPciClass { - /*< public >*/ - DeviceRealize parent_realize; - ResettablePhases parent_phases; -}; - /* interrupt delivery callback */ static void riscv_iommu_pci_notify(RISCVIOMMUState *iommu, unsigned vector) { diff --git a/hw/riscv/riscv-iommu-sys.c b/hw/riscv/riscv-iommu-sys.c index 74e76b9..e34d00a 100644 --- a/hw/riscv/riscv-iommu-sys.c +++ b/hw/riscv/riscv-iommu-sys.c @@ -53,12 +53,6 @@ struct RISCVIOMMUStateSys { uint8_t *msix_pba; }; -struct RISCVIOMMUSysClass { - /*< public >*/ - DeviceRealize parent_realize; - ResettablePhases parent_phases; -}; - static uint64_t msix_table_mmio_read(void *opaque, hwaddr addr, unsigned size) { diff --git a/hw/s390x/ap-stub.c b/hw/s390x/ap-stub.c new file mode 100644 index 0000000..001fe5f --- /dev/null +++ b/hw/s390x/ap-stub.c @@ -0,0 +1,21 @@ +/* + * VFIO based AP matrix device assignment + * + * Copyright 2025 IBM Corp. + * Author(s): Rorie Reyes <rreyes@linux.ibm.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hw/s390x/ap-bridge.h" + +int ap_chsc_sei_nt0_get_event(void *res) +{ + return EVENT_INFORMATION_NOT_STORED; +} + +bool ap_chsc_sei_nt0_have_event(void) +{ + return false; +} diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c index 7d4e1f5..b513f89 100644 --- a/hw/s390x/cpu-topology.c +++ b/hw/s390x/cpu-topology.c @@ -23,8 +23,8 @@ #include "target/s390x/cpu.h" #include "hw/s390x/s390-virtio-ccw.h" #include "hw/s390x/cpu-topology.h" -#include "qapi/qapi-commands-machine-target.h" -#include "qapi/qapi-events-machine-target.h" +#include "qapi/qapi-commands-machine-s390x.h" +#include "qapi/qapi-events-machine-s390x.h" /* * s390_topology is used to keep the topology information. diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build index 3bbebfd..99cbcbd 100644 --- a/hw/s390x/meson.build +++ b/hw/s390x/meson.build @@ -33,6 +33,7 @@ s390x_ss.add(when: 'CONFIG_S390_CCW_VIRTIO', if_true: files( )) s390x_ss.add(when: 'CONFIG_TERMINAL3270', if_true: files('3270-ccw.c')) s390x_ss.add(when: 'CONFIG_VFIO', if_true: files('s390-pci-vfio.c')) +s390x_ss.add(when: 'CONFIG_VFIO_AP', if_false: files('ap-stub.c')) virtio_ss = ss.source_set() virtio_ss.add(files('virtio-ccw.c')) diff --git a/hw/s390x/s390-skeys.c b/hw/s390x/s390-skeys.c index aedb62b..8eeecfd 100644 --- a/hw/s390x/s390-skeys.c +++ b/hw/s390x/s390-skeys.c @@ -17,7 +17,6 @@ #include "hw/s390x/storage-keys.h" #include "qapi/error.h" #include "qapi/qapi-commands-machine.h" -#include "qapi/qapi-commands-misc-target.h" #include "qobject/qdict.h" #include "qemu/error-report.h" #include "system/memory_mapping.h" diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index f69a4d8..ce3c13d 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -1145,20 +1145,6 @@ static void ccw_machine_4_2_class_options(MachineClass *mc) } DEFINE_CCW_MACHINE(4, 2); -static void ccw_machine_4_1_instance_options(MachineState *machine) -{ - static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V4_1 }; - ccw_machine_4_2_instance_options(machine); - s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat); -} - -static void ccw_machine_4_1_class_options(MachineClass *mc) -{ - ccw_machine_4_2_class_options(mc); - compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len); -} -DEFINE_CCW_MACHINE(4, 1); - static void ccw_machine_register_types(void) { type_register_static(&ccw_machine_info); diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c index f4f2ef3..9ea4aa0 100644 --- a/hw/scsi/lsi53c895a.c +++ b/hw/scsi/lsi53c895a.c @@ -1112,7 +1112,7 @@ bad: static void lsi_memcpy(LSIState *s, uint32_t dest, uint32_t src, int count) { int n; - uint8_t buf[LSI_BUF_SIZE]; + QEMU_UNINITIALIZED uint8_t buf[LSI_BUF_SIZE]; trace_lsi_memcpy(dest, src, count); while (count) { diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c index 55cd188..844643d 100644 --- a/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c @@ -981,13 +981,11 @@ static int megasas_event_wait(MegasasState *s, MegasasCmd *cmd) static int megasas_dcmd_pd_get_list(MegasasState *s, MegasasCmd *cmd) { - struct mfi_pd_list info; - size_t dcmd_size = sizeof(info); + struct mfi_pd_list info = {}; BusChild *kid; uint32_t offset, dcmd_limit, num_pd_disks = 0, max_pd_disks; dma_addr_t residual; - memset(&info, 0, dcmd_size); offset = 8; dcmd_limit = offset + sizeof(struct mfi_pd_address); if (cmd->iov_size < dcmd_limit) { @@ -1429,11 +1427,10 @@ static int megasas_dcmd_cfg_read(MegasasState *s, MegasasCmd *cmd) static int megasas_dcmd_get_properties(MegasasState *s, MegasasCmd *cmd) { - struct mfi_ctrl_props info; + struct mfi_ctrl_props info = {}; size_t dcmd_size = sizeof(info); dma_addr_t residual; - memset(&info, 0x0, dcmd_size); if (cmd->iov_size < dcmd_size) { trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size, dcmd_size); diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index cb4af1b..b4782c6 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -74,7 +74,7 @@ struct SCSIDiskClass { */ DMAIOFunc *dma_readv; DMAIOFunc *dma_writev; - bool (*need_fua_emulation)(SCSICommand *cmd); + bool (*need_fua)(SCSICommand *cmd); void (*update_sense)(SCSIRequest *r); }; @@ -85,7 +85,7 @@ typedef struct SCSIDiskReq { uint32_t sector_count; uint32_t buflen; bool started; - bool need_fua_emulation; + bool need_fua; struct iovec iov; QEMUIOVector qiov; BlockAcctCookie acct; @@ -389,24 +389,6 @@ static bool scsi_is_cmd_fua(SCSICommand *cmd) } } -static void scsi_write_do_fua(SCSIDiskReq *r) -{ - SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - - assert(r->req.aiocb == NULL); - assert(!r->req.io_canceled); - - if (r->need_fua_emulation) { - block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0, - BLOCK_ACCT_FLUSH); - r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r); - return; - } - - scsi_req_complete(&r->req, GOOD); - scsi_req_unref(&r->req); -} - static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret) { assert(r->req.aiocb == NULL); @@ -416,12 +398,7 @@ static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret) r->sector += r->sector_count; r->sector_count = 0; - if (r->req.cmd.mode == SCSI_XFER_TO_DEV) { - scsi_write_do_fua(r); - return; - } else { - scsi_req_complete(&r->req, GOOD); - } + scsi_req_complete(&r->req, GOOD); done: scsi_req_unref(&r->req); @@ -564,7 +541,7 @@ static void scsi_read_data(SCSIRequest *req) first = !r->started; r->started = true; - if (first && r->need_fua_emulation) { + if (first && r->need_fua) { block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0, BLOCK_ACCT_FLUSH); r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read_cb, r); @@ -589,8 +566,7 @@ static void scsi_write_complete_noio(SCSIDiskReq *r, int ret) r->sector += n; r->sector_count -= n; if (r->sector_count == 0) { - scsi_write_do_fua(r); - return; + scsi_req_complete(&r->req, GOOD); } else { scsi_init_iovec(r, SCSI_DMA_BUF_SIZE); trace_scsi_disk_write_complete_noio(r->req.tag, r->qiov.size); @@ -623,6 +599,7 @@ static void scsi_write_data(SCSIRequest *req) SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); + BlockCompletionFunc *cb; /* No data transfer may already be in progress */ assert(r->req.aiocb == NULL); @@ -648,11 +625,10 @@ static void scsi_write_data(SCSIRequest *req) if (r->req.cmd.buf[0] == VERIFY_10 || r->req.cmd.buf[0] == VERIFY_12 || r->req.cmd.buf[0] == VERIFY_16) { - if (r->req.sg) { - scsi_dma_complete_noio(r, 0); - } else { - scsi_write_complete_noio(r, 0); - } + block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0, + BLOCK_ACCT_FLUSH); + cb = r->req.sg ? scsi_dma_complete : scsi_write_complete; + r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, cb, r); return; } @@ -2391,7 +2367,7 @@ static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf) scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE)); return 0; } - r->need_fua_emulation = sdc->need_fua_emulation(&r->req.cmd); + r->need_fua = sdc->need_fua(&r->req.cmd); if (r->sector_count == 0) { scsi_req_complete(&r->req, GOOD); } @@ -3137,7 +3113,8 @@ BlockAIOCB *scsi_dma_writev(int64_t offset, QEMUIOVector *iov, { SCSIDiskReq *r = opaque; SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque); + int flags = r->need_fua ? BDRV_REQ_FUA : 0; + return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, flags, cb, cb_opaque); } static char *scsi_property_get_loadparm(Object *obj, Error **errp) @@ -3186,7 +3163,7 @@ static void scsi_disk_base_class_initfn(ObjectClass *klass, const void *data) device_class_set_legacy_reset(dc, scsi_disk_reset); sdc->dma_readv = scsi_dma_readv; sdc->dma_writev = scsi_dma_writev; - sdc->need_fua_emulation = scsi_is_cmd_fua; + sdc->need_fua = scsi_is_cmd_fua; } static const TypeInfo scsi_disk_base_info = { @@ -3215,7 +3192,7 @@ static const Property scsi_hd_properties[] = { DEFINE_PROP_BIT("removable", SCSIDiskState, features, SCSI_DISK_F_REMOVABLE, false), DEFINE_PROP_BIT("dpofua", SCSIDiskState, features, - SCSI_DISK_F_DPOFUA, false), + SCSI_DISK_F_DPOFUA, true), DEFINE_PROP_UINT64("wwn", SCSIDiskState, qdev.wwn, 0), DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, qdev.port_wwn, 0), DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0), @@ -3338,7 +3315,7 @@ static void scsi_block_class_initfn(ObjectClass *klass, const void *data) sdc->dma_readv = scsi_block_dma_readv; sdc->dma_writev = scsi_block_dma_writev; sdc->update_sense = scsi_block_update_sense; - sdc->need_fua_emulation = scsi_block_no_fua; + sdc->need_fua = scsi_block_no_fua; dc->desc = "SCSI block device passthrough"; device_class_set_props(dc, scsi_block_properties); dc->vmsd = &vmstate_scsi_disk_state; diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c index d5825b6..7c98b1b 100644 --- a/hw/scsi/vmw_pvscsi.c +++ b/hw/scsi/vmw_pvscsi.c @@ -68,18 +68,7 @@ struct PVSCSIClass { OBJECT_DECLARE_TYPE(PVSCSIState, PVSCSIClass, PVSCSI) -/* Compatibility flags for migration */ -#define PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT 0 -#define PVSCSI_COMPAT_OLD_PCI_CONFIGURATION \ - (1 << PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT) -#define PVSCSI_COMPAT_DISABLE_PCIE_BIT 1 -#define PVSCSI_COMPAT_DISABLE_PCIE \ - (1 << PVSCSI_COMPAT_DISABLE_PCIE_BIT) - -#define PVSCSI_USE_OLD_PCI_CONFIGURATION(s) \ - ((s)->compat_flags & PVSCSI_COMPAT_OLD_PCI_CONFIGURATION) -#define PVSCSI_MSI_OFFSET(s) \ - (PVSCSI_USE_OLD_PCI_CONFIGURATION(s) ? 0x50 : 0x7c) +#define PVSCSI_MSI_OFFSET (0x7c) #define PVSCSI_EXP_EP_OFFSET (0x40) typedef struct PVSCSIRingInfo { @@ -129,8 +118,6 @@ struct PVSCSIState { uint8_t msi_used; /* For migration compatibility */ PVSCSIRingInfo rings; /* Data transfer rings manager */ uint32_t resetting; /* Reset in progress */ - - uint32_t compat_flags; }; typedef struct PVSCSIRequest { @@ -1110,7 +1097,7 @@ pvscsi_init_msi(PVSCSIState *s) int res; PCIDevice *d = PCI_DEVICE(s); - res = msi_init(d, PVSCSI_MSI_OFFSET(s), PVSCSI_MSIX_NUM_VECTORS, + res = msi_init(d, PVSCSI_MSI_OFFSET, PVSCSI_MSIX_NUM_VECTORS, PVSCSI_USE_64BIT, PVSCSI_PER_VECTOR_MASK, NULL); if (res < 0) { trace_pvscsi_init_msi_fail(res); @@ -1158,15 +1145,11 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp) trace_pvscsi_state("init"); /* PCI subsystem ID, subsystem vendor ID, revision */ - if (PVSCSI_USE_OLD_PCI_CONFIGURATION(s)) { - pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, 0x1000); - } else { - pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID, - PCI_VENDOR_ID_VMWARE); - pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, - PCI_DEVICE_ID_VMWARE_PVSCSI); - pci_config_set_revision(pci_dev->config, 0x2); - } + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID, + PCI_VENDOR_ID_VMWARE); + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, + PCI_DEVICE_ID_VMWARE_PVSCSI); + pci_config_set_revision(pci_dev->config, 0x2); /* PCI latency timer = 255 */ pci_dev->config[PCI_LATENCY_TIMER] = 0xff; @@ -1234,21 +1217,8 @@ pvscsi_post_load(void *opaque, int version_id) return 0; } -static bool pvscsi_vmstate_need_pcie_device(void *opaque) -{ - PVSCSIState *s = PVSCSI(opaque); - - return !(s->compat_flags & PVSCSI_COMPAT_DISABLE_PCIE); -} - -static bool pvscsi_vmstate_test_pci_device(void *opaque, int version_id) -{ - return !pvscsi_vmstate_need_pcie_device(opaque); -} - static const VMStateDescription vmstate_pvscsi_pcie_device = { .name = "pvscsi/pcie", - .needed = pvscsi_vmstate_need_pcie_device, .fields = (const VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PVSCSIState), VMSTATE_END_OF_LIST() @@ -1262,9 +1232,6 @@ static const VMStateDescription vmstate_pvscsi = { .pre_save = pvscsi_pre_save, .post_load = pvscsi_post_load, .fields = (const VMStateField[]) { - VMSTATE_STRUCT_TEST(parent_obj, PVSCSIState, - pvscsi_vmstate_test_pci_device, 0, - vmstate_pci_device, PCIDevice), VMSTATE_UINT8(msi_used, PVSCSIState), VMSTATE_UINT32(resetting, PVSCSIState), VMSTATE_UINT64(reg_interrupt_status, PVSCSIState), @@ -1298,30 +1265,17 @@ static const VMStateDescription vmstate_pvscsi = { static const Property pvscsi_properties[] = { DEFINE_PROP_UINT8("use_msg", PVSCSIState, use_msg, 1), - DEFINE_PROP_BIT("x-old-pci-configuration", PVSCSIState, compat_flags, - PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT, false), - DEFINE_PROP_BIT("x-disable-pcie", PVSCSIState, compat_flags, - PVSCSI_COMPAT_DISABLE_PCIE_BIT, false), }; -static void pvscsi_realize(DeviceState *qdev, Error **errp) +static void pvscsi_instance_init(Object *obj) { - PVSCSIClass *pvs_c = PVSCSI_GET_CLASS(qdev); - PCIDevice *pci_dev = PCI_DEVICE(qdev); - PVSCSIState *s = PVSCSI(qdev); - - if (!(s->compat_flags & PVSCSI_COMPAT_DISABLE_PCIE)) { - pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; - } - - pvs_c->parent_dc_realize(qdev, errp); + PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS; } static void pvscsi_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - PVSCSIClass *pvs_k = PVSCSI_CLASS(klass); HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass); k->realize = pvscsi_realizefn; @@ -1330,8 +1284,6 @@ static void pvscsi_class_init(ObjectClass *klass, const void *data) k->device_id = PCI_DEVICE_ID_VMWARE_PVSCSI; k->class_id = PCI_CLASS_STORAGE_SCSI; k->subsystem_id = 0x1000; - device_class_set_parent_realize(dc, pvscsi_realize, - &pvs_k->parent_dc_realize); device_class_set_legacy_reset(dc, pvscsi_reset); dc->vmsd = &vmstate_pvscsi; device_class_set_props(dc, pvscsi_properties); @@ -1346,6 +1298,7 @@ static const TypeInfo pvscsi_info = { .class_size = sizeof(PVSCSIClass), .instance_size = sizeof(PVSCSIState), .class_init = pvscsi_class_init, + .instance_init = pvscsi_instance_init, .interfaces = (const InterfaceInfo[]) { { TYPE_HOTPLUG_HANDLER }, { INTERFACE_PCIE_DEVICE }, diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c index d1b7bc5..cb48cc1 100644 --- a/hw/timer/hpet.c +++ b/hw/timer/hpet.c @@ -328,16 +328,16 @@ static const VMStateDescription vmstate_hpet_timer = { static const VMStateDescription vmstate_hpet = { .name = "hpet", .version_id = 2, - .minimum_version_id = 1, + .minimum_version_id = 2, .pre_save = hpet_pre_save, .post_load = hpet_post_load, .fields = (const VMStateField[]) { VMSTATE_UINT64(config, HPETState), VMSTATE_UINT64(isr, HPETState), VMSTATE_UINT64(hpet_counter, HPETState), - VMSTATE_UINT8_V(num_timers_save, HPETState, 2), + VMSTATE_UINT8(num_timers_save, HPETState), VMSTATE_VALIDATE("num_timers must match", hpet_validate_num_timers), - VMSTATE_STRUCT_VARRAY_UINT8(timer, HPETState, num_timers, 0, + VMSTATE_STRUCT_VARRAY_UINT8(timer, HPETState, num_timers_save, 0, vmstate_hpet_timer, HPETTimer), VMSTATE_END_OF_LIST() }, @@ -426,30 +426,11 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr, uint64_t cur_tick; trace_hpet_ram_read(addr); + addr &= ~4; - /*address range of all TN regs*/ - if (addr >= 0x100 && addr <= 0x3ff) { - uint8_t timer_id = (addr - 0x100) / 0x20; - HPETTimer *timer = &s->timer[timer_id]; - - if (timer_id > s->num_timers) { - trace_hpet_timer_id_out_of_range(timer_id); - return 0; - } - - switch (addr & 0x18) { - case HPET_TN_CFG: // including interrupt capabilities - return timer->config >> shift; - case HPET_TN_CMP: // comparator register - return timer->cmp >> shift; - case HPET_TN_ROUTE: - return timer->fsb >> shift; - default: - trace_hpet_ram_read_invalid(); - break; - } - } else { - switch (addr & ~4) { + /*address range of all global regs*/ + if (addr <= 0xff) { + switch (addr) { case HPET_ID: // including HPET_PERIOD return s->capability >> shift; case HPET_CFG: @@ -468,6 +449,26 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr, trace_hpet_ram_read_invalid(); break; } + } else { + uint8_t timer_id = (addr - 0x100) / 0x20; + HPETTimer *timer = &s->timer[timer_id]; + + if (timer_id > s->num_timers) { + trace_hpet_timer_id_out_of_range(timer_id); + return 0; + } + + switch (addr & 0x1f) { + case HPET_TN_CFG: // including interrupt capabilities + return timer->config >> shift; + case HPET_TN_CMP: // comparator register + return timer->cmp >> shift; + case HPET_TN_ROUTE: + return timer->fsb >> shift; + default: + trace_hpet_ram_read_invalid(); + break; + } } return 0; } @@ -482,9 +483,67 @@ static void hpet_ram_write(void *opaque, hwaddr addr, uint64_t old_val, new_val, cleared; trace_hpet_ram_write(addr, value); + addr &= ~4; - /*address range of all TN regs*/ - if (addr >= 0x100 && addr <= 0x3ff) { + /*address range of all global regs*/ + if (addr <= 0xff) { + switch (addr) { + case HPET_ID: + return; + case HPET_CFG: + old_val = s->config; + new_val = deposit64(old_val, shift, len, value); + new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK); + s->config = new_val; + if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) { + /* Enable main counter and interrupt generation. */ + s->hpet_offset = + ticks_to_ns(s->hpet_counter) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + for (i = 0; i < s->num_timers; i++) { + if (timer_enabled(&s->timer[i]) && (s->isr & (1 << i))) { + update_irq(&s->timer[i], 1); + } + hpet_set_timer(&s->timer[i]); + } + } else if (deactivating_bit(old_val, new_val, HPET_CFG_ENABLE)) { + /* Halt main counter and disable interrupt generation. */ + s->hpet_counter = hpet_get_ticks(s); + for (i = 0; i < s->num_timers; i++) { + hpet_del_timer(&s->timer[i]); + } + } + /* i8254 and RTC output pins are disabled + * when HPET is in legacy mode */ + if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) { + qemu_set_irq(s->pit_enabled, 0); + qemu_irq_lower(s->irqs[0]); + qemu_irq_lower(s->irqs[RTC_ISA_IRQ]); + } else if (deactivating_bit(old_val, new_val, HPET_CFG_LEGACY)) { + qemu_irq_lower(s->irqs[0]); + qemu_set_irq(s->pit_enabled, 1); + qemu_set_irq(s->irqs[RTC_ISA_IRQ], s->rtc_irq_level); + } + break; + case HPET_STATUS: + new_val = value << shift; + cleared = new_val & s->isr; + for (i = 0; i < s->num_timers; i++) { + if (cleared & (1 << i)) { + update_irq(&s->timer[i], 0); + } + } + break; + case HPET_COUNTER: + if (hpet_enabled(s)) { + trace_hpet_ram_write_counter_write_while_enabled(); + } + s->hpet_counter = deposit64(s->hpet_counter, shift, len, value); + break; + default: + trace_hpet_ram_write_invalid(); + break; + } + } else { uint8_t timer_id = (addr - 0x100) / 0x20; HPETTimer *timer = &s->timer[timer_id]; @@ -550,63 +609,6 @@ static void hpet_ram_write(void *opaque, hwaddr addr, break; } return; - } else { - switch (addr & ~4) { - case HPET_ID: - return; - case HPET_CFG: - old_val = s->config; - new_val = deposit64(old_val, shift, len, value); - new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK); - s->config = new_val; - if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) { - /* Enable main counter and interrupt generation. */ - s->hpet_offset = - ticks_to_ns(s->hpet_counter) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - for (i = 0; i < s->num_timers; i++) { - if (timer_enabled(&s->timer[i]) && (s->isr & (1 << i))) { - update_irq(&s->timer[i], 1); - } - hpet_set_timer(&s->timer[i]); - } - } else if (deactivating_bit(old_val, new_val, HPET_CFG_ENABLE)) { - /* Halt main counter and disable interrupt generation. */ - s->hpet_counter = hpet_get_ticks(s); - for (i = 0; i < s->num_timers; i++) { - hpet_del_timer(&s->timer[i]); - } - } - /* i8254 and RTC output pins are disabled - * when HPET is in legacy mode */ - if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) { - qemu_set_irq(s->pit_enabled, 0); - qemu_irq_lower(s->irqs[0]); - qemu_irq_lower(s->irqs[RTC_ISA_IRQ]); - } else if (deactivating_bit(old_val, new_val, HPET_CFG_LEGACY)) { - qemu_irq_lower(s->irqs[0]); - qemu_set_irq(s->pit_enabled, 1); - qemu_set_irq(s->irqs[RTC_ISA_IRQ], s->rtc_irq_level); - } - break; - case HPET_STATUS: - new_val = value << shift; - cleared = new_val & s->isr; - for (i = 0; i < s->num_timers; i++) { - if (cleared & (1 << i)) { - update_irq(&s->timer[i], 0); - } - } - break; - case HPET_COUNTER: - if (hpet_enabled(s)) { - trace_hpet_ram_write_counter_write_while_enabled(); - } - s->hpet_counter = deposit64(s->hpet_counter, shift, len, value); - break; - default: - trace_hpet_ram_write_invalid(); - break; - } } } @@ -689,8 +691,14 @@ static void hpet_realize(DeviceState *dev, Error **errp) int i; HPETTimer *timer; + if (s->num_timers < HPET_MIN_TIMERS || s->num_timers > HPET_MAX_TIMERS) { + error_setg(errp, "hpet.num_timers must be between %d and %d", + HPET_MIN_TIMERS, HPET_MAX_TIMERS); + return; + } if (!s->intcap) { - warn_report("Hpet's intcap not initialized"); + error_setg(errp, "hpet.hpet-intcap not initialized"); + return; } if (hpet_fw_cfg.count == UINT8_MAX) { /* first instance */ @@ -698,7 +706,7 @@ static void hpet_realize(DeviceState *dev, Error **errp) } if (hpet_fw_cfg.count == 8) { - error_setg(errp, "Only 8 instances of HPET is allowed"); + error_setg(errp, "Only 8 instances of HPET are allowed"); return; } @@ -708,11 +716,6 @@ static void hpet_realize(DeviceState *dev, Error **errp) sysbus_init_irq(sbd, &s->irqs[i]); } - if (s->num_timers < HPET_MIN_TIMERS) { - s->num_timers = HPET_MIN_TIMERS; - } else if (s->num_timers > HPET_MAX_TIMERS) { - s->num_timers = HPET_MAX_TIMERS; - } for (i = 0; i < HPET_MAX_TIMERS; i++) { timer = &s->timer[i]; timer->qemu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, hpet_timer, timer); diff --git a/hw/ufs/lu.c b/hw/ufs/lu.c index 57b307e..2d8ffd7 100644 --- a/hw/ufs/lu.c +++ b/hw/ufs/lu.c @@ -194,7 +194,7 @@ static int ufs_emulate_wlun_inquiry(UfsRequest *req, uint8_t *outbuf, static UfsReqResult ufs_emulate_scsi_cmd(UfsLu *lu, UfsRequest *req) { uint8_t lun = lu->lun; - uint8_t outbuf[4096]; + QEMU_UNINITIALIZED uint8_t outbuf[4096]; uint8_t sense_buf[UFS_SENSE_SIZE]; uint8_t scsi_status; int len = 0; diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c index 71b5491..72a9f9f 100644 --- a/hw/usb/hcd-ohci.c +++ b/hw/usb/hcd-ohci.c @@ -577,7 +577,7 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed) USBDevice *dev; USBEndpoint *ep; USBPacket *pkt; - uint8_t buf[8192]; + QEMU_UNINITIALIZED uint8_t buf[8192]; bool int_req; struct ohci_iso_td iso_td; uint32_t addr; diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c index 785c0a0..874e0d1 100644 --- a/hw/vfio/ap.c +++ b/hw/vfio/ap.c @@ -10,6 +10,7 @@ * directory. */ +#include <stdbool.h> #include "qemu/osdep.h" #include CONFIG_DEVICES /* CONFIG_IOMMUFD */ #include <linux/vfio.h> @@ -18,8 +19,10 @@ #include "hw/vfio/vfio-device.h" #include "system/iommufd.h" #include "hw/s390x/ap-device.h" +#include "hw/s390x/css.h" #include "qemu/error-report.h" #include "qemu/event_notifier.h" +#include "qemu/lockable.h" #include "qemu/main-loop.h" #include "qemu/module.h" #include "qemu/option.h" @@ -37,8 +40,18 @@ struct VFIOAPDevice { APDevice apdev; VFIODevice vdev; EventNotifier req_notifier; + EventNotifier cfg_notifier; }; +typedef struct APConfigChgEvent { + QTAILQ_ENTRY(APConfigChgEvent) next; +} APConfigChgEvent; + +static QTAILQ_HEAD(, APConfigChgEvent) cfg_chg_events = + QTAILQ_HEAD_INITIALIZER(cfg_chg_events); + +static QemuMutex cfg_chg_events_lock; + OBJECT_DECLARE_SIMPLE_TYPE(VFIOAPDevice, VFIO_AP_DEVICE) static void vfio_ap_compute_needs_reset(VFIODevice *vdev) @@ -70,6 +83,57 @@ static void vfio_ap_req_notifier_handler(void *opaque) } } +static void vfio_ap_cfg_chg_notifier_handler(void *opaque) +{ + APConfigChgEvent *cfg_chg_event; + VFIOAPDevice *vapdev = opaque; + + if (!event_notifier_test_and_clear(&vapdev->cfg_notifier)) { + return; + } + + cfg_chg_event = g_new0(APConfigChgEvent, 1); + + WITH_QEMU_LOCK_GUARD(&cfg_chg_events_lock) { + QTAILQ_INSERT_TAIL(&cfg_chg_events, cfg_chg_event, next); + } + + css_generate_css_crws(0); + +} + +int ap_chsc_sei_nt0_get_event(void *res) +{ + ChscSeiNt0Res *nt0_res = (ChscSeiNt0Res *)res; + APConfigChgEvent *cfg_chg_event; + + WITH_QEMU_LOCK_GUARD(&cfg_chg_events_lock) { + if (QTAILQ_EMPTY(&cfg_chg_events)) { + return EVENT_INFORMATION_NOT_STORED; + } + + cfg_chg_event = QTAILQ_FIRST(&cfg_chg_events); + QTAILQ_REMOVE(&cfg_chg_events, cfg_chg_event, next); + } + + memset(nt0_res, 0, sizeof(*nt0_res)); + g_free(cfg_chg_event); + nt0_res->flags |= PENDING_EVENT_INFO_BITMASK; + nt0_res->length = sizeof(ChscSeiNt0Res); + nt0_res->code = NT0_RES_RESPONSE_CODE; + nt0_res->nt = NT0_RES_NT_DEFAULT; + nt0_res->rs = NT0_RES_RS_AP_CHANGE; + nt0_res->cc = NT0_RES_CC_AP_CHANGE; + + return EVENT_INFORMATION_STORED; +} + +bool ap_chsc_sei_nt0_have_event(void) +{ + QEMU_LOCK_GUARD(&cfg_chg_events_lock); + return !QTAILQ_EMPTY(&cfg_chg_events); +} + static bool vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev, unsigned int irq, Error **errp) { @@ -85,6 +149,10 @@ static bool vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev, notifier = &vapdev->req_notifier; fd_read = vfio_ap_req_notifier_handler; break; + case VFIO_AP_CFG_CHG_IRQ_INDEX: + notifier = &vapdev->cfg_notifier; + fd_read = vfio_ap_cfg_chg_notifier_handler; + break; default: error_setg(errp, "vfio: Unsupported device irq(%d)", irq); return false; @@ -137,6 +205,9 @@ static void vfio_ap_unregister_irq_notifier(VFIOAPDevice *vapdev, case VFIO_AP_REQ_IRQ_INDEX: notifier = &vapdev->req_notifier; break; + case VFIO_AP_CFG_CHG_IRQ_INDEX: + notifier = &vapdev->cfg_notifier; + break; default: error_report("vfio: Unsupported device irq(%d)", irq); return; @@ -159,6 +230,13 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) VFIOAPDevice *vapdev = VFIO_AP_DEVICE(dev); VFIODevice *vbasedev = &vapdev->vdev; + static bool lock_initialized; + + if (!lock_initialized) { + qemu_mutex_init(&cfg_chg_events_lock); + lock_initialized = true; + } + if (!vfio_device_get_name(vbasedev, errp)) { return; } @@ -176,6 +254,15 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp) warn_report_err(err); } + if (!vfio_ap_register_irq_notifier(vapdev, VFIO_AP_CFG_CHG_IRQ_INDEX, &err)) + { + /* + * Report this error, but do not make it a failing condition. + * Lack of this IRQ in the host does not prevent normal operation. + */ + warn_report_err(err); + } + return; error: @@ -188,6 +275,7 @@ static void vfio_ap_unrealize(DeviceState *dev) VFIOAPDevice *vapdev = VFIO_AP_DEVICE(dev); vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX); + vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_CFG_CHG_IRQ_INDEX); vfio_device_detach(&vapdev->vdev); g_free(vapdev->vdev.name); } diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c index 1c6ca94..d834bd4 100644 --- a/hw/vfio/container-base.c +++ b/hw/vfio/container-base.c @@ -75,12 +75,12 @@ void vfio_address_space_insert(VFIOAddressSpace *space, int vfio_container_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly) + void *vaddr, bool readonly, MemoryRegion *mr) { VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); g_assert(vioc->dma_map); - return vioc->dma_map(bcontainer, iova, size, vaddr, readonly); + return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr); } int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, diff --git a/hw/vfio/container.c b/hw/vfio/container.c index a9f0dba..3e8d645 100644 --- a/hw/vfio/container.c +++ b/hw/vfio/container.c @@ -31,10 +31,11 @@ #include "system/reset.h" #include "trace.h" #include "qapi/error.h" +#include "migration/cpr.h" +#include "migration/blocker.h" #include "pci.h" #include "hw/vfio/vfio-container.h" #include "vfio-helpers.h" -#include "vfio-cpr.h" #include "vfio-listener.h" #define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio" @@ -135,6 +136,8 @@ static int vfio_legacy_dma_unmap_one(const VFIOContainerBase *bcontainer, int ret; Error *local_err = NULL; + g_assert(!cpr_is_incoming()); + if (iotlb && vfio_container_dirty_tracking_is_started(bcontainer)) { if (!vfio_container_devices_dirty_tracking_is_supported(bcontainer) && bcontainer->dirty_pages_supported) { @@ -207,7 +210,8 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer, } static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova, - ram_addr_t size, void *vaddr, bool readonly) + ram_addr_t size, void *vaddr, bool readonly, + MemoryRegion *mr) { const VFIOContainer *container = container_of(bcontainer, VFIOContainer, bcontainer); @@ -425,7 +429,12 @@ static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group, return NULL; } - if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) { + /* + * During CPR, just set the container type and skip the ioctls, as the + * container and group are already configured in the kernel. + */ + if (!cpr_is_incoming() && + !vfio_set_iommu(fd, group->fd, &iommu_type, errp)) { return NULL; } @@ -592,6 +601,11 @@ static bool vfio_container_group_add(VFIOContainer *container, VFIOGroup *group, group->container = container; QLIST_INSERT_HEAD(&container->group_list, group, container_next); vfio_group_add_kvm_device(group); + /* + * Remember the container fd for each group, so we can attach to the same + * container after CPR. + */ + cpr_resave_fd("vfio_container_for_group", group->groupid, container->fd); return true; } @@ -601,6 +615,7 @@ static void vfio_container_group_del(VFIOContainer *container, VFIOGroup *group) group->container = NULL; vfio_group_del_kvm_device(group); vfio_ram_block_discard_disable(container, false); + cpr_delete_fd("vfio_container_for_group", group->groupid); } static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, @@ -615,17 +630,34 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, bool group_was_added = false; space = vfio_address_space_get(as); + fd = cpr_find_fd("vfio_container_for_group", group->groupid); - QLIST_FOREACH(bcontainer, &space->containers, next) { - container = container_of(bcontainer, VFIOContainer, bcontainer); - if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { - return vfio_container_group_add(container, group, errp); + if (!cpr_is_incoming()) { + QLIST_FOREACH(bcontainer, &space->containers, next) { + container = container_of(bcontainer, VFIOContainer, bcontainer); + if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) { + return vfio_container_group_add(container, group, errp); + } } - } - fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp); - if (fd < 0) { - goto fail; + fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp); + if (fd < 0) { + goto fail; + } + } else { + /* + * For incoming CPR, the group is already attached in the kernel. + * If a container with matching fd is found, then update the + * userland group list and return. If not, then after the loop, + * create the container struct and group list. + */ + QLIST_FOREACH(bcontainer, &space->containers, next) { + container = container_of(bcontainer, VFIOContainer, bcontainer); + + if (vfio_cpr_container_match(container, group, fd)) { + return vfio_container_group_add(container, group, errp); + } + } } ret = ioctl(fd, VFIO_GET_API_VERSION); @@ -642,7 +674,7 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, new_container = true; bcontainer = &container->bcontainer; - if (!vfio_cpr_register_container(bcontainer, errp)) { + if (!vfio_legacy_cpr_register_container(container, errp)) { goto fail; } @@ -660,8 +692,17 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as, } group_was_added = true; - if (!vfio_listener_register(bcontainer, errp)) { - goto fail; + /* + * If CPR, register the listener later, after all state that may + * affect regions and mapping boundaries has been cpr load'ed. Later, + * the listener will invoke its callback on each flat section and call + * dma_map to supply the new vaddr, and the calls will match the mappings + * remembered by the kernel. + */ + if (!cpr_is_incoming()) { + if (!vfio_listener_register(bcontainer, errp)) { + goto fail; + } } bcontainer->initialized = true; @@ -678,7 +719,7 @@ fail: vioc->release(bcontainer); } if (new_container) { - vfio_cpr_unregister_container(bcontainer); + vfio_legacy_cpr_unregister_container(container); object_unref(container); } if (fd >= 0) { @@ -697,6 +738,7 @@ static void vfio_container_disconnect(VFIOGroup *group) QLIST_REMOVE(group, container_next); group->container = NULL; + cpr_delete_fd("vfio_container_for_group", group->groupid); /* * Explicitly release the listener first before unset container, @@ -719,7 +761,7 @@ static void vfio_container_disconnect(VFIOGroup *group) VFIOAddressSpace *space = bcontainer->space; trace_vfio_container_disconnect(container->fd); - vfio_cpr_unregister_container(bcontainer); + vfio_legacy_cpr_unregister_container(container); close(container->fd); object_unref(container); @@ -750,7 +792,7 @@ static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp) group = g_malloc0(sizeof(*group)); snprintf(path, sizeof(path), "/dev/vfio/%d", groupid); - group->fd = qemu_open(path, O_RDWR, errp); + group->fd = cpr_open_fd(path, O_RDWR, "vfio_group", groupid, errp); if (group->fd < 0) { goto free_group_exit; } @@ -782,6 +824,7 @@ static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp) return group; close_fd_exit: + cpr_delete_fd("vfio_group", groupid); close(group->fd); free_group_exit: @@ -803,6 +846,7 @@ static void vfio_group_put(VFIOGroup *group) vfio_container_disconnect(group); QLIST_REMOVE(group, next); trace_vfio_group_put(group->fd); + cpr_delete_fd("vfio_group", group->groupid); close(group->fd); g_free(group); } @@ -813,7 +857,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name, g_autofree struct vfio_device_info *info = NULL; int fd; - fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); + fd = vfio_cpr_group_get_device_fd(group->fd, name); if (fd < 0) { error_setg_errno(errp, errno, "error getting device from group %d", group->groupid); @@ -826,8 +870,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name, info = vfio_get_device_info(fd); if (!info) { error_setg_errno(errp, errno, "error getting device info"); - close(fd); - return false; + goto fail; } /* @@ -841,8 +884,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name, if (!QLIST_EMPTY(&group->device_list)) { error_setg(errp, "Inconsistent setting of support for discarding " "RAM (e.g., balloon) within group"); - close(fd); - return false; + goto fail; } if (!group->ram_block_discard_allowed) { @@ -860,6 +902,11 @@ static bool vfio_device_get(VFIOGroup *group, const char *name, trace_vfio_device_get(name, info->flags, info->num_regions, info->num_irqs); return true; + +fail: + close(fd); + cpr_delete_fd(name, 0); + return false; } static void vfio_device_put(VFIODevice *vbasedev) @@ -870,6 +917,7 @@ static void vfio_device_put(VFIODevice *vbasedev) QLIST_REMOVE(vbasedev, next); vbasedev->group = NULL; trace_vfio_device_put(vbasedev->fd); + cpr_delete_fd(vbasedev->name, 0); close(vbasedev->fd); } @@ -939,6 +987,13 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev, goto device_put_exit; } + if (vbasedev->mdev) { + error_setg(&vbasedev->cpr.mdev_blocker, + "CPR does not support vfio mdev %s", vbasedev->name); + migrate_add_blocker_modes(&vbasedev->cpr.mdev_blocker, &error_fatal, + MIG_MODE_CPR_TRANSFER, -1); + } + return true; device_put_exit: @@ -956,6 +1011,7 @@ static void vfio_legacy_detach_device(VFIODevice *vbasedev) vfio_device_unprepare(vbasedev); + migrate_del_blocker(&vbasedev->cpr.mdev_blocker); object_unref(vbasedev->hiod); vfio_device_put(vbasedev); vfio_group_put(group); diff --git a/hw/vfio/cpr-legacy.c b/hw/vfio/cpr-legacy.c new file mode 100644 index 0000000..a84c324 --- /dev/null +++ b/hw/vfio/cpr-legacy.c @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2021-2025 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <sys/ioctl.h> +#include <linux/vfio.h> +#include "qemu/osdep.h" +#include "hw/vfio/vfio-container.h" +#include "hw/vfio/vfio-device.h" +#include "hw/vfio/vfio-listener.h" +#include "migration/blocker.h" +#include "migration/cpr.h" +#include "migration/migration.h" +#include "migration/vmstate.h" +#include "qapi/error.h" +#include "qemu/error-report.h" + +static bool vfio_dma_unmap_vaddr_all(VFIOContainer *container, Error **errp) +{ + struct vfio_iommu_type1_dma_unmap unmap = { + .argsz = sizeof(unmap), + .flags = VFIO_DMA_UNMAP_FLAG_VADDR | VFIO_DMA_UNMAP_FLAG_ALL, + .iova = 0, + .size = 0, + }; + if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { + error_setg_errno(errp, errno, "vfio_dma_unmap_vaddr_all"); + return false; + } + container->cpr.vaddr_unmapped = true; + return true; +} + +/* + * Set the new @vaddr for any mappings registered during cpr load. + * The incoming state is cleared thereafter. + */ +static int vfio_legacy_cpr_dma_map(const VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, void *vaddr, + bool readonly, MemoryRegion *mr) +{ + const VFIOContainer *container = container_of(bcontainer, VFIOContainer, + bcontainer); + struct vfio_iommu_type1_dma_map map = { + .argsz = sizeof(map), + .flags = VFIO_DMA_MAP_FLAG_VADDR, + .vaddr = (__u64)(uintptr_t)vaddr, + .iova = iova, + .size = size, + }; + + g_assert(cpr_is_incoming()); + + if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map)) { + return -errno; + } + + return 0; +} + +static void vfio_region_remap(MemoryListener *listener, + MemoryRegionSection *section) +{ + VFIOContainer *container = container_of(listener, VFIOContainer, + cpr.remap_listener); + vfio_container_region_add(&container->bcontainer, section, true); +} + +static bool vfio_cpr_supported(VFIOContainer *container, Error **errp) +{ + if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UPDATE_VADDR)) { + error_setg(errp, "VFIO container does not support VFIO_UPDATE_VADDR"); + return false; + + } else if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UNMAP_ALL)) { + error_setg(errp, "VFIO container does not support VFIO_UNMAP_ALL"); + return false; + + } else { + return true; + } +} + +static int vfio_container_pre_save(void *opaque) +{ + VFIOContainer *container = opaque; + Error *local_err = NULL; + + if (!vfio_dma_unmap_vaddr_all(container, &local_err)) { + error_report_err(local_err); + return -1; + } + return 0; +} + +static int vfio_container_post_load(void *opaque, int version_id) +{ + VFIOContainer *container = opaque; + VFIOContainerBase *bcontainer = &container->bcontainer; + VFIOGroup *group; + Error *local_err = NULL; + + if (!vfio_listener_register(bcontainer, &local_err)) { + error_report_err(local_err); + return -1; + } + + QLIST_FOREACH(group, &container->group_list, container_next) { + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + + /* Restore original dma_map function */ + vioc->dma_map = container->cpr.saved_dma_map; + } + return 0; +} + +static const VMStateDescription vfio_container_vmstate = { + .name = "vfio-container", + .version_id = 0, + .minimum_version_id = 0, + .priority = MIG_PRI_LOW, /* Must happen after devices and groups */ + .pre_save = vfio_container_pre_save, + .post_load = vfio_container_post_load, + .needed = cpr_incoming_needed, + .fields = (VMStateField[]) { + VMSTATE_END_OF_LIST() + } +}; + +static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier, + MigrationEvent *e, Error **errp) +{ + VFIOContainer *container = + container_of(notifier, VFIOContainer, cpr.transfer_notifier); + VFIOContainerBase *bcontainer = &container->bcontainer; + + if (e->type != MIG_EVENT_PRECOPY_FAILED) { + return 0; + } + + if (container->cpr.vaddr_unmapped) { + /* + * Force a call to vfio_region_remap for each mapped section by + * temporarily registering a listener, and temporarily diverting + * dma_map to vfio_legacy_cpr_dma_map. The latter restores vaddr. + */ + + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + vioc->dma_map = vfio_legacy_cpr_dma_map; + + container->cpr.remap_listener = (MemoryListener) { + .name = "vfio cpr recover", + .region_add = vfio_region_remap + }; + memory_listener_register(&container->cpr.remap_listener, + bcontainer->space->as); + memory_listener_unregister(&container->cpr.remap_listener); + container->cpr.vaddr_unmapped = false; + vioc->dma_map = container->cpr.saved_dma_map; + } + return 0; +} + +bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp) +{ + VFIOContainerBase *bcontainer = &container->bcontainer; + Error **cpr_blocker = &container->cpr.blocker; + + migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier, + vfio_cpr_reboot_notifier, + MIG_MODE_CPR_REBOOT); + + if (!vfio_cpr_supported(container, cpr_blocker)) { + return migrate_add_blocker_modes(cpr_blocker, errp, + MIG_MODE_CPR_TRANSFER, -1) == 0; + } + + vmstate_register(NULL, -1, &vfio_container_vmstate, container); + + /* During incoming CPR, divert calls to dma_map. */ + if (cpr_is_incoming()) { + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer); + container->cpr.saved_dma_map = vioc->dma_map; + vioc->dma_map = vfio_legacy_cpr_dma_map; + } + + migration_add_notifier_mode(&container->cpr.transfer_notifier, + vfio_cpr_fail_notifier, + MIG_MODE_CPR_TRANSFER); + return true; +} + +void vfio_legacy_cpr_unregister_container(VFIOContainer *container) +{ + VFIOContainerBase *bcontainer = &container->bcontainer; + + migration_remove_notifier(&bcontainer->cpr_reboot_notifier); + migrate_del_blocker(&container->cpr.blocker); + vmstate_unregister(NULL, &vfio_container_vmstate, container); + migration_remove_notifier(&container->cpr.transfer_notifier); +} + +/* + * In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after + * succeeding for others, so the latter have lost their vaddr. Call this + * to restore vaddr for a section with a giommu. + * + * The giommu already exists. Find it and replay it, which calls + * vfio_legacy_cpr_dma_map further down the stack. + */ +void vfio_cpr_giommu_remap(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) +{ + VFIOGuestIOMMU *giommu = NULL; + hwaddr as_offset = section->offset_within_address_space; + hwaddr iommu_offset = as_offset - section->offset_within_region; + + QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) { + if (giommu->iommu_mr == IOMMU_MEMORY_REGION(section->mr) && + giommu->iommu_offset == iommu_offset) { + break; + } + } + g_assert(giommu); + memory_region_iommu_replay(giommu->iommu_mr, &giommu->n); +} + +/* + * In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after + * succeeding for others, so the latter have lost their vaddr. Call this + * to restore vaddr for a section with a RamDiscardManager. + * + * The ram discard listener already exists. Call its populate function + * directly, which calls vfio_legacy_cpr_dma_map. + */ +bool vfio_cpr_ram_discard_register_listener(VFIOContainerBase *bcontainer, + MemoryRegionSection *section) +{ + VFIORamDiscardListener *vrdl = + vfio_find_ram_discard_listener(bcontainer, section); + + g_assert(vrdl); + return vrdl->listener.notify_populate(&vrdl->listener, section) == 0; +} + +int vfio_cpr_group_get_device_fd(int d, const char *name) +{ + const int id = 0; + int fd = cpr_find_fd(name, id); + + if (fd < 0) { + fd = ioctl(d, VFIO_GROUP_GET_DEVICE_FD, name); + if (fd >= 0) { + cpr_save_fd(name, id, fd); + } + } + return fd; +} + +static bool same_device(int fd1, int fd2) +{ + struct stat st1, st2; + + return !fstat(fd1, &st1) && !fstat(fd2, &st2) && st1.st_dev == st2.st_dev; +} + +bool vfio_cpr_container_match(VFIOContainer *container, VFIOGroup *group, + int fd) +{ + if (container->fd == fd) { + return true; + } + if (!same_device(container->fd, fd)) { + return false; + } + /* + * Same device, different fd. This occurs when the container fd is + * cpr_save'd multiple times, once for each groupid, so SCM_RIGHTS + * produces duplicates. De-dup it. + */ + cpr_delete_fd("vfio_container_for_group", group->groupid); + close(fd); + cpr_save_fd("vfio_container_for_group", group->groupid, container->fd); + return true; +} diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c index 3214184..fdbb58e 100644 --- a/hw/vfio/cpr.c +++ b/hw/vfio/cpr.c @@ -7,13 +7,14 @@ #include "qemu/osdep.h" #include "hw/vfio/vfio-device.h" -#include "migration/misc.h" +#include "hw/vfio/vfio-cpr.h" +#include "hw/vfio/pci.h" +#include "migration/cpr.h" #include "qapi/error.h" #include "system/runstate.h" -#include "vfio-cpr.h" -static int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, - MigrationEvent *e, Error **errp) +int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, + MigrationEvent *e, Error **errp) { if (e->type == MIG_EVENT_PRECOPY_SETUP && !runstate_check(RUN_STATE_SUSPENDED) && !vm_get_suspended()) { @@ -38,3 +39,32 @@ void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer) { migration_remove_notifier(&bcontainer->cpr_reboot_notifier); } + +/* + * The kernel may change non-emulated config bits. Exclude them from the + * changed-bits check in get_pci_config_device. + */ +static int vfio_cpr_pci_pre_load(void *opaque) +{ + VFIOPCIDevice *vdev = opaque; + PCIDevice *pdev = &vdev->pdev; + int size = MIN(pci_config_size(pdev), vdev->config_size); + int i; + + for (i = 0; i < size; i++) { + pdev->cmask[i] &= vdev->emulated_config_bits[i]; + } + + return 0; +} + +const VMStateDescription vfio_cpr_pci_vmstate = { + .name = "vfio-cpr-pci", + .version_id = 0, + .minimum_version_id = 0, + .pre_load = vfio_cpr_pci_pre_load, + .needed = cpr_incoming_needed, + .fields = (VMStateField[]) { + VMSTATE_END_OF_LIST() + } +}; diff --git a/hw/vfio/device.c b/hw/vfio/device.c index 9fba2c7..d32600e 100644 --- a/hw/vfio/device.c +++ b/hw/vfio/device.c @@ -200,6 +200,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index, struct vfio_region_info **info) { size_t argsz = sizeof(struct vfio_region_info); + int fd = -1; int ret; /* check cache */ @@ -214,7 +215,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index, retry: (*info)->argsz = argsz; - ret = vbasedev->io_ops->get_region_info(vbasedev, *info); + ret = vbasedev->io_ops->get_region_info(vbasedev, *info, &fd); if (ret != 0) { g_free(*info); *info = NULL; @@ -225,11 +226,19 @@ retry: argsz = (*info)->argsz; *info = g_realloc(*info, argsz); + if (fd != -1) { + close(fd); + fd = -1; + } + goto retry; } /* fill cache */ vbasedev->reginfo[index] = *info; + if (vbasedev->region_fds != NULL) { + vbasedev->region_fds[index] = fd; + } return 0; } @@ -334,6 +343,7 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops, vbasedev->io_ops = &vfio_device_io_ops_ioctl; vbasedev->dev = dev; vbasedev->fd = -1; + vbasedev->use_region_fds = false; vbasedev->ram_block_discard_allowed = ram_discard; } @@ -444,6 +454,9 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer, vbasedev->reginfo = g_new0(struct vfio_region_info *, vbasedev->num_regions); + if (vbasedev->use_region_fds) { + vbasedev->region_fds = g_new0(int, vbasedev->num_regions); + } } void vfio_device_unprepare(VFIODevice *vbasedev) @@ -452,9 +465,14 @@ void vfio_device_unprepare(VFIODevice *vbasedev) for (i = 0; i < vbasedev->num_regions; i++) { g_free(vbasedev->reginfo[i]); + if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) { + close(vbasedev->region_fds[i]); + } + } - g_free(vbasedev->reginfo); - vbasedev->reginfo = NULL; + + g_clear_pointer(&vbasedev->reginfo, g_free); + g_clear_pointer(&vbasedev->region_fds, g_free); QLIST_REMOVE(vbasedev, container_next); QLIST_REMOVE(vbasedev, global_next); @@ -476,10 +494,13 @@ static int vfio_device_io_device_feature(VFIODevice *vbasedev, } static int vfio_device_io_get_region_info(VFIODevice *vbasedev, - struct vfio_region_info *info) + struct vfio_region_info *info, + int *fd) { int ret; + *fd = -1; + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info); return ret < 0 ? -errno : ret; @@ -522,7 +543,8 @@ static int vfio_device_io_region_read(VFIODevice *vbasedev, uint8_t index, } static int vfio_device_io_region_write(VFIODevice *vbasedev, uint8_t index, - off_t off, uint32_t size, void *data) + off_t off, uint32_t size, void *data, + bool post) { struct vfio_region_info *info; int ret; diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c index e7952d1..e7a9d1f 100644 --- a/hw/vfio/igd.c +++ b/hw/vfio/igd.c @@ -187,23 +187,21 @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, } static bool vfio_pci_igd_opregion_detect(VFIOPCIDevice *vdev, - struct vfio_region_info **opregion, - Error **errp) + struct vfio_region_info **opregion) { int ret; - /* Hotplugging is not supported for opregion access */ - if (vdev->pdev.qdev.hotplugged) { - error_setg(errp, "IGD OpRegion is not supported on hotplugged device"); - return false; - } - ret = vfio_device_get_region_info_type(&vdev->vbasedev, VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, opregion); if (ret) { - error_setg_errno(errp, -ret, - "Device does not supports IGD OpRegion feature"); + return false; + } + + /* Hotplugging is not supported for opregion access */ + if (vdev->pdev.qdev.hotplugged) { + warn_report("IGD device detected, but OpRegion is not supported " + "on hotplugged device."); return false; } @@ -524,7 +522,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp) } /* IGD device always comes with OpRegion */ - if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) { + if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) { return true; } info_report("OpRegion detected on Intel display %x.", vdev->device_id); @@ -695,7 +693,7 @@ static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp) return true; } - if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) { + if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) { /* Should never reach here, KVMGT always emulates OpRegion */ return false; } diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index af1c7ab..d3efef7 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -21,20 +21,21 @@ #include "qapi/error.h" #include "system/iommufd.h" #include "hw/qdev-core.h" +#include "hw/vfio/vfio-cpr.h" #include "system/reset.h" #include "qemu/cutils.h" #include "qemu/chardev_open.h" #include "pci.h" #include "vfio-iommufd.h" #include "vfio-helpers.h" -#include "vfio-cpr.h" #include "vfio-listener.h" #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \ TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio" static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova, - ram_addr_t size, void *vaddr, bool readonly) + ram_addr_t size, void *vaddr, bool readonly, + MemoryRegion *mr) { const VFIOIOMMUFDContainer *container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer); @@ -592,6 +593,10 @@ found_container: goto err_listener_register; } + /* + * Do not move this code before attachment! The nested IOMMU support + * needs device and hwpt id which are generated only after attachment. + */ if (!vfio_device_hiod_create_and_realize(vbasedev, TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, errp)) { goto err_listener_register; @@ -810,21 +815,38 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data) vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap; }; +static bool +host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + uint32_t hwpt_id, Error **errp) +{ + VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent; + + return !iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp); +} + +static bool +host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + Error **errp) +{ + VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent; + + return iommufd_cdev_detach_ioas_hwpt(vbasedev, errp); +} + static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, Error **errp) { VFIODevice *vdev = opaque; + HostIOMMUDeviceIOMMUFD *idev; HostIOMMUDeviceCaps *caps = &hiod->caps; + VendorCaps *vendor_caps = &caps->vendor_caps; enum iommu_hw_info_type type; - union { - struct iommu_hw_info_vtd vtd; - } data; uint64_t hw_caps; hiod->agent = opaque; - if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, - &type, &data, sizeof(data), + if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, &type, + vendor_caps, sizeof(*vendor_caps), &hw_caps, errp)) { return false; } @@ -833,6 +855,11 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque, caps->type = type; caps->hw_caps = hw_caps; + idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod); + idev->iommufd = vdev->iommufd; + idev->devid = vdev->devid; + idev->hwpt_id = vdev->hwpt->hwpt_id; + return true; } @@ -858,10 +885,14 @@ hiod_iommufd_vfio_get_page_size_mask(HostIOMMUDevice *hiod) static void hiod_iommufd_vfio_class_init(ObjectClass *oc, const void *data) { HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc); + HostIOMMUDeviceIOMMUFDClass *idevc = HOST_IOMMU_DEVICE_IOMMUFD_CLASS(oc); hiodc->realize = hiod_iommufd_vfio_realize; hiodc->get_iova_ranges = hiod_iommufd_vfio_get_iova_ranges; hiodc->get_page_size_mask = hiod_iommufd_vfio_get_page_size_mask; + + idevc->attach_hwpt = host_iommu_device_iommufd_vfio_attach_hwpt; + idevc->detach_hwpt = host_iommu_device_iommufd_vfio_detach_hwpt; }; static const TypeInfo types[] = { diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c index bfacb3d..f498e23 100644 --- a/hw/vfio/listener.c +++ b/hw/vfio/listener.c @@ -90,16 +90,17 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section) section->offset_within_address_space & (1ULL << 63); } -/* Called with rcu_read_lock held. */ -static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, - ram_addr_t *ram_addr, bool *read_only, - Error **errp) +/* + * Called with rcu_read_lock held. + * The returned MemoryRegion must not be accessed after calling rcu_read_unlock. + */ +static MemoryRegion *vfio_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p, + Error **errp) { - bool ret, mr_has_discard_manager; + MemoryRegion *mr; - ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, read_only, - &mr_has_discard_manager, errp); - if (ret && mr_has_discard_manager) { + mr = memory_translate_iotlb(iotlb, xlat_p, errp); + if (mr && memory_region_has_ram_discard_manager(mr)) { /* * Malicious VMs might trigger discarding of IOMMU-mapped memory. The * pages will remain pinned inside vfio until unmapped, resulting in a @@ -118,7 +119,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, " intended via an IOMMU. It's possible to mitigate " " by setting/adjusting RLIMIT_MEMLOCK."); } - return ret; + return mr; } static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) @@ -126,6 +127,8 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); VFIOContainerBase *bcontainer = giommu->bcontainer; hwaddr iova = iotlb->iova + giommu->iommu_offset; + MemoryRegion *mr; + hwaddr xlat; void *vaddr; int ret; Error *local_err = NULL; @@ -150,10 +153,14 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { bool read_only; - if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, &local_err)) { + mr = vfio_translate_iotlb(iotlb, &xlat, &local_err); + if (!mr) { error_report_err(local_err); goto out; } + vaddr = memory_region_get_ram_ptr(mr) + xlat; + read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly; + /* * vaddr is only valid until rcu_read_unlock(). But after * vfio_dma_map has set up the mapping the pages will be @@ -163,7 +170,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) */ ret = vfio_container_dma_map(bcontainer, iova, iotlb->addr_mask + 1, vaddr, - read_only); + read_only, mr); if (ret) { error_report("vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " "0x%"HWADDR_PRIx", %p) = %d (%s)", @@ -233,7 +240,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl, vaddr = memory_region_get_ram_ptr(section->mr) + start; ret = vfio_container_dma_map(bcontainer, iova, next - start, - vaddr, section->readonly); + vaddr, section->readonly, section->mr); if (ret) { /* Rollback */ vfio_ram_discard_notify_discard(rdl, section); @@ -430,7 +437,7 @@ static void vfio_listener_commit(MemoryListener *listener) listener); void (*listener_commit)(VFIOContainerBase *bcontainer); - listener_commit = VFIO_IOMMU_GET_CLASS(bcontainer)->listener_begin; + listener_commit = VFIO_IOMMU_GET_CLASS(bcontainer)->listener_commit; if (listener_commit) { listener_commit(bcontainer); @@ -449,11 +456,38 @@ static void vfio_device_error_append(VFIODevice *vbasedev, Error **errp) } } +VFIORamDiscardListener *vfio_find_ram_discard_listener( + VFIOContainerBase *bcontainer, MemoryRegionSection *section) +{ + VFIORamDiscardListener *vrdl = NULL; + + QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { + if (vrdl->mr == section->mr && + vrdl->offset_within_address_space == + section->offset_within_address_space) { + break; + } + } + + if (!vrdl) { + hw_error("vfio: Trying to sync missing RAM discard listener"); + /* does not return */ + } + return vrdl; +} + static void vfio_listener_region_add(MemoryListener *listener, MemoryRegionSection *section) { VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase, listener); + vfio_container_region_add(bcontainer, section, false); +} + +void vfio_container_region_add(VFIOContainerBase *bcontainer, + MemoryRegionSection *section, + bool cpr_remap) +{ hwaddr iova, end; Int128 llend, llsize; void *vaddr; @@ -489,6 +523,11 @@ static void vfio_listener_region_add(MemoryListener *listener, int iommu_idx; trace_vfio_listener_region_add_iommu(section->mr->name, iova, end); + + if (cpr_remap) { + vfio_cpr_giommu_remap(bcontainer, section); + } + /* * FIXME: For VFIO iommu types which have KVM acceleration to * avoid bouncing all map/unmaps through qemu this way, this @@ -531,7 +570,12 @@ static void vfio_listener_region_add(MemoryListener *listener, * about changes. */ if (memory_region_has_ram_discard_manager(section->mr)) { - vfio_ram_discard_register_listener(bcontainer, section); + if (!cpr_remap) { + vfio_ram_discard_register_listener(bcontainer, section); + } else if (!vfio_cpr_ram_discard_register_listener(bcontainer, + section)) { + goto fail; + } return; } @@ -557,7 +601,7 @@ static void vfio_listener_region_add(MemoryListener *listener, } ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize), - vaddr, section->readonly); + vaddr, section->readonly, section->mr); if (ret) { error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", " "0x%"HWADDR_PRIx", %p) = %d (%s)", @@ -1010,6 +1054,8 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) ram_addr_t translated_addr; Error *local_err = NULL; int ret = -EINVAL; + MemoryRegion *mr; + hwaddr xlat; trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask); @@ -1021,9 +1067,11 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) } rcu_read_lock(); - if (!vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL, &local_err)) { + mr = vfio_translate_iotlb(iotlb, &xlat, &local_err); + if (!mr) { goto out_unlock; } + translated_addr = memory_region_get_ram_addr(mr) + xlat; ret = vfio_container_query_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1, translated_addr, &local_err); @@ -1075,19 +1123,8 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer, MemoryRegionSection *section) { RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr); - VFIORamDiscardListener *vrdl = NULL; - - QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) { - if (vrdl->mr == section->mr && - vrdl->offset_within_address_space == - section->offset_within_address_space) { - break; - } - } - - if (!vrdl) { - hw_error("vfio: Trying to sync missing RAM discard listener"); - } + VFIORamDiscardListener *vrdl = + vfio_find_ram_discard_listener(bcontainer, section); /* * We only want/can synchronize the bitmap for actually mapped parts - diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build index bccb050..73d29f9 100644 --- a/hw/vfio/meson.build +++ b/hw/vfio/meson.build @@ -21,6 +21,7 @@ system_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c')) system_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c')) system_ss.add(when: 'CONFIG_VFIO', if_true: files( 'cpr.c', + 'cpr-legacy.c', 'device.c', 'migration.c', 'migration-multifd.c', diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index a1bfdfe..fa25bde 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -30,6 +30,7 @@ #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" #include "migration/vmstate.h" +#include "migration/cpr.h" #include "qobject/qdict.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" @@ -56,6 +57,23 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); static void vfio_msi_disable_common(VFIOPCIDevice *vdev); +static bool vfio_notifier_init(VFIOPCIDevice *vdev, EventNotifier *e, + const char *name, int nr, Error **errp) +{ + int ret = event_notifier_init(e, 0); + + if (ret) { + error_setg_errno(errp, -ret, "vfio_notifier_init %s failed", name); + } + return !ret; +} + +static void vfio_notifier_cleanup(VFIOPCIDevice *vdev, EventNotifier *e, + const char *name, int nr) +{ + event_notifier_cleanup(e); +} + /* * Disabling BAR mmaping can be slow, but toggling it around INTx can * also be a huge overhead. We try to get the best of both worlds by @@ -103,7 +121,7 @@ static void vfio_intx_interrupt(void *opaque) } } -static void vfio_intx_eoi(VFIODevice *vbasedev) +void vfio_pci_intx_eoi(VFIODevice *vbasedev) { VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); @@ -111,7 +129,7 @@ static void vfio_intx_eoi(VFIODevice *vbasedev) return; } - trace_vfio_intx_eoi(vbasedev->name); + trace_vfio_pci_intx_eoi(vbasedev->name); vdev->intx.pending = false; pci_irq_deassert(&vdev->pdev); @@ -136,8 +154,7 @@ static bool vfio_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp) pci_irq_deassert(&vdev->pdev); /* Get an eventfd for resample/unmask */ - if (event_notifier_init(&vdev->intx.unmask, 0)) { - error_setg(errp, "event_notifier_init failed eoi"); + if (!vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0, errp)) { goto fail; } @@ -169,7 +186,7 @@ fail_vfio: kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vdev->intx.interrupt, vdev->intx.route.irq); fail_irqfd: - event_notifier_cleanup(&vdev->intx.unmask); + vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0); fail: qemu_set_fd_handler(irq_fd, vfio_intx_interrupt, NULL, vdev); vfio_device_irq_unmask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX); @@ -201,7 +218,7 @@ static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev) } /* We only need to close the eventfd for VFIO to cleanup the kernel side */ - event_notifier_cleanup(&vdev->intx.unmask); + vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0); /* QEMU starts listening for interrupt events. */ qemu_set_fd_handler(event_notifier_get_fd(&vdev->intx.interrupt), @@ -236,7 +253,7 @@ static void vfio_intx_update(VFIOPCIDevice *vdev, PCIINTxRoute *route) } /* Re-enable the interrupt in cased we missed an EOI */ - vfio_intx_eoi(&vdev->vbasedev); + vfio_pci_intx_eoi(&vdev->vbasedev); } static void vfio_intx_routing_notifier(PCIDevice *pdev) @@ -268,7 +285,6 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1); Error *err = NULL; int32_t fd; - int ret; if (!pin) { @@ -291,9 +307,8 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) } #endif - ret = event_notifier_init(&vdev->intx.interrupt, 0); - if (ret) { - error_setg_errno(errp, -ret, "event_notifier_init failed"); + if (!vfio_notifier_init(vdev, &vdev->intx.interrupt, "intx-interrupt", 0, + errp)) { return false; } fd = event_notifier_get_fd(&vdev->intx.interrupt); @@ -302,7 +317,7 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp) if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0, VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) { qemu_set_fd_handler(fd, NULL, NULL, vdev); - event_notifier_cleanup(&vdev->intx.interrupt); + vfio_notifier_cleanup(vdev, &vdev->intx.interrupt, "intx-interrupt", 0); return false; } @@ -329,13 +344,18 @@ static void vfio_intx_disable(VFIOPCIDevice *vdev) fd = event_notifier_get_fd(&vdev->intx.interrupt); qemu_set_fd_handler(fd, NULL, NULL, vdev); - event_notifier_cleanup(&vdev->intx.interrupt); + vfio_notifier_cleanup(vdev, &vdev->intx.interrupt, "intx-interrupt", 0); vdev->interrupt = VFIO_INT_NONE; trace_vfio_intx_disable(vdev->vbasedev.name); } +bool vfio_pci_intx_enable(VFIOPCIDevice *vdev, Error **errp) +{ + return vfio_intx_enable(vdev, errp); +} + /* * MSI/X */ @@ -460,8 +480,8 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) return ret; } -static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, - int vector_n, bool msix) +void vfio_pci_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, + int vector_n, bool msix) { if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) { return; @@ -471,13 +491,16 @@ static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, vector_n, &vdev->pdev); } -static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector) +static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector, int nr) { + const char *name = "kvm_interrupt"; + if (vector->virq < 0) { return; } - if (event_notifier_init(&vector->kvm_interrupt, 0)) { + if (!vfio_notifier_init(vector->vdev, &vector->kvm_interrupt, name, nr, + NULL)) { goto fail_notifier; } @@ -489,19 +512,20 @@ static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector) return; fail_kvm: - event_notifier_cleanup(&vector->kvm_interrupt); + vfio_notifier_cleanup(vector->vdev, &vector->kvm_interrupt, name, nr); fail_notifier: kvm_irqchip_release_virq(kvm_state, vector->virq); vector->virq = -1; } -static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector) +static void vfio_remove_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, + int nr) { kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt, vector->virq); kvm_irqchip_release_virq(kvm_state, vector->virq); vector->virq = -1; - event_notifier_cleanup(&vector->kvm_interrupt); + vfio_notifier_cleanup(vdev, &vector->kvm_interrupt, "kvm_interrupt", nr); } static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg, @@ -511,6 +535,43 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg, kvm_irqchip_commit_routes(kvm_state); } +static void set_irq_signalling(VFIODevice *vbasedev, VFIOMSIVector *vector, + unsigned int nr) +{ + Error *err = NULL; + int32_t fd; + + if (vector->virq >= 0) { + fd = event_notifier_get_fd(&vector->kvm_interrupt); + } else { + fd = event_notifier_get_fd(&vector->interrupt); + } + + if (!vfio_device_irq_set_signaling(vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, nr, + VFIO_IRQ_SET_ACTION_TRIGGER, + fd, &err)) { + error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name); + } +} + +void vfio_pci_vector_init(VFIOPCIDevice *vdev, int nr) +{ + VFIOMSIVector *vector = &vdev->msi_vectors[nr]; + PCIDevice *pdev = &vdev->pdev; + Error *local_err = NULL; + + vector->vdev = vdev; + vector->virq = -1; + if (!vfio_notifier_init(vdev, &vector->interrupt, "interrupt", nr, + &local_err)) { + error_report_err(local_err); + } + vector->use = true; + if (vdev->interrupt == VFIO_INT_MSIX) { + msix_vector_use(pdev, nr); + } +} + static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, MSIMessage *msg, IOHandler *handler) { @@ -524,13 +585,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, vector = &vdev->msi_vectors[nr]; if (!vector->use) { - vector->vdev = vdev; - vector->virq = -1; - if (event_notifier_init(&vector->interrupt, 0)) { - error_report("vfio: Error: event_notifier_init failed"); - } - vector->use = true; - msix_vector_use(pdev, nr); + vfio_pci_vector_init(vdev, nr); } qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), @@ -542,19 +597,19 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, */ if (vector->virq >= 0) { if (!msg) { - vfio_remove_kvm_msi_virq(vector); + vfio_remove_kvm_msi_virq(vdev, vector, nr); } else { vfio_update_kvm_msi_virq(vector, *msg, pdev); } } else { if (msg) { if (vdev->defer_kvm_irq_routing) { - vfio_add_kvm_msi_virq(vdev, vector, nr, true); + vfio_pci_add_kvm_msi_virq(vdev, vector, nr, true); } else { vfio_route_change = kvm_irqchip_begin_route_changes(kvm_state); - vfio_add_kvm_msi_virq(vdev, vector, nr, true); + vfio_pci_add_kvm_msi_virq(vdev, vector, nr, true); kvm_irqchip_commit_route_changes(&vfio_route_change); - vfio_connect_kvm_msi_virq(vector); + vfio_connect_kvm_msi_virq(vector, nr); } } } @@ -583,21 +638,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, strerror(-ret)); } } else { - Error *err = NULL; - int32_t fd; - - if (vector->virq >= 0) { - fd = event_notifier_get_fd(&vector->kvm_interrupt); - } else { - fd = event_notifier_get_fd(&vector->interrupt); - } - - if (!vfio_device_irq_set_signaling(&vdev->vbasedev, - VFIO_PCI_MSIX_IRQ_INDEX, nr, - VFIO_IRQ_SET_ACTION_TRIGGER, fd, - &err)) { - error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); - } + set_irq_signalling(&vdev->vbasedev, vector, nr); } } @@ -645,14 +686,14 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) } } -static void vfio_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev) +void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev) { assert(!vdev->defer_kvm_irq_routing); vdev->defer_kvm_irq_routing = true; vfio_route_change = kvm_irqchip_begin_route_changes(kvm_state); } -static void vfio_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev) +void vfio_pci_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev) { int i; @@ -662,7 +703,7 @@ static void vfio_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev) kvm_irqchip_commit_route_changes(&vfio_route_change); for (i = 0; i < vdev->nr_vectors; i++) { - vfio_connect_kvm_msi_virq(&vdev->msi_vectors[i]); + vfio_connect_kvm_msi_virq(&vdev->msi_vectors[i], i); } } @@ -682,14 +723,14 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev) * routes once rather than per vector provides a substantial * performance improvement. */ - vfio_prepare_kvm_msi_virq_batch(vdev); + vfio_pci_prepare_kvm_msi_virq_batch(vdev); if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use, vfio_msix_vector_release, NULL)) { error_report("vfio: msix_set_vector_notifiers failed"); } - vfio_commit_kvm_msi_virq_batch(vdev); + vfio_pci_commit_kvm_msi_virq_batch(vdev); if (vdev->nr_vectors) { ret = vfio_enable_vectors(vdev, true); @@ -733,19 +774,21 @@ retry: * Deferring to commit the KVM routes once rather than per vector * provides a substantial performance improvement. */ - vfio_prepare_kvm_msi_virq_batch(vdev); + vfio_pci_prepare_kvm_msi_virq_batch(vdev); vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->nr_vectors); for (i = 0; i < vdev->nr_vectors; i++) { VFIOMSIVector *vector = &vdev->msi_vectors[i]; + Error *local_err = NULL; vector->vdev = vdev; vector->virq = -1; vector->use = true; - if (event_notifier_init(&vector->interrupt, 0)) { - error_report("vfio: Error: event_notifier_init failed"); + if (!vfio_notifier_init(vdev, &vector->interrupt, "interrupt", i, + &local_err)) { + error_report_err(local_err); } qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), @@ -755,10 +798,10 @@ retry: * Attempt to enable route through KVM irqchip, * default to userspace handling if unavailable. */ - vfio_add_kvm_msi_virq(vdev, vector, i, false); + vfio_pci_add_kvm_msi_virq(vdev, vector, i, false); } - vfio_commit_kvm_msi_virq_batch(vdev); + vfio_pci_commit_kvm_msi_virq_batch(vdev); /* Set interrupt type prior to possible interrupts */ vdev->interrupt = VFIO_INT_MSI; @@ -801,11 +844,11 @@ static void vfio_msi_disable_common(VFIOPCIDevice *vdev) VFIOMSIVector *vector = &vdev->msi_vectors[i]; if (vdev->msi_vectors[i].use) { if (vector->virq >= 0) { - vfio_remove_kvm_msi_virq(vector); + vfio_remove_kvm_msi_virq(vdev, vector, i); } qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), NULL, NULL, NULL); - event_notifier_cleanup(&vector->interrupt); + vfio_notifier_cleanup(vdev, &vector->interrupt, "interrupt", i); } } @@ -984,7 +1027,7 @@ static int vfio_pci_config_space_write(VFIOPCIDevice *vdev, off_t offset, { return vdev->vbasedev.io_ops->region_write(&vdev->vbasedev, VFIO_PCI_CONFIG_REGION_INDEX, - offset, size, data); + offset, size, data, false); } static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size) @@ -1738,7 +1781,7 @@ static bool vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp) return true; } -static void vfio_teardown_msi(VFIOPCIDevice *vdev) +void vfio_pci_teardown_msi(VFIOPCIDevice *vdev) { msi_uninit(&vdev->pdev); @@ -1788,6 +1831,9 @@ static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr) bar->type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK : ~PCI_BASE_ADDRESS_MEM_MASK); bar->size = bar->region.size; + + /* IO regions are sync, memory can be async */ + bar->region.post_wr = (bar->ioport == 0); } static void vfio_bars_prepare(VFIOPCIDevice *vdev) @@ -1834,7 +1880,7 @@ static void vfio_bars_register(VFIOPCIDevice *vdev) } } -static void vfio_bars_exit(VFIOPCIDevice *vdev) +void vfio_pci_bars_exit(VFIOPCIDevice *vdev) { int i; @@ -2425,7 +2471,7 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev) g_free(config); } -static bool vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp) +bool vfio_pci_add_capabilities(VFIOPCIDevice *vdev, Error **errp) { PCIDevice *pdev = &vdev->pdev; @@ -2701,7 +2747,7 @@ static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) static VFIODeviceOps vfio_pci_ops = { .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, - .vfio_eoi = vfio_intx_eoi, + .vfio_eoi = vfio_pci_intx_eoi, .vfio_get_object = vfio_pci_get_object, .vfio_save_config = vfio_pci_save_config, .vfio_load_config = vfio_pci_load_config, @@ -2772,7 +2818,7 @@ bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) return true; } -static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) +bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp) { VFIODevice *vbasedev = &vdev->vbasedev; struct vfio_region_info *reg_info = NULL; @@ -2818,7 +2864,7 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) return false; } - trace_vfio_populate_device_config(vdev->vbasedev.name, + trace_vfio_pci_populate_device_config(vdev->vbasedev.name, (unsigned long)reg_info->size, (unsigned long)reg_info->offset, (unsigned long)reg_info->flags); @@ -2840,7 +2886,7 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_ERR_IRQ_INDEX, &irq_info); if (ret) { /* This can fail for an old kernel or legacy PCI dev */ - trace_vfio_populate_device_get_irq_info_failure(strerror(-ret)); + trace_vfio_pci_populate_device_get_irq_info_failure(strerror(-ret)); } else if (irq_info.count == 1) { vdev->pci_aer = true; } else { @@ -2852,8 +2898,20 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) return true; } -static void vfio_pci_put_device(VFIOPCIDevice *vdev) +void vfio_pci_put_device(VFIOPCIDevice *vdev) { + vfio_display_finalize(vdev); + vfio_bars_finalize(vdev); + g_free(vdev->emulated_config_bits); + g_free(vdev->rom); + /* + * XXX Leaking igd_opregion is not an oversight, we can't remove the + * fw_cfg entry therefore leaking this allocation seems like the safest + * option. + * + * g_free(vdev->igd_opregion); + */ + vfio_device_detach(&vdev->vbasedev); g_free(vdev->vbasedev.name); @@ -2888,7 +2946,7 @@ static void vfio_err_notifier_handler(void *opaque) * and continue after disabling error recovery support for the * device. */ -static void vfio_register_err_notifier(VFIOPCIDevice *vdev) +void vfio_pci_register_err_notifier(VFIOPCIDevice *vdev) { Error *err = NULL; int32_t fd; @@ -2897,8 +2955,9 @@ static void vfio_register_err_notifier(VFIOPCIDevice *vdev) return; } - if (event_notifier_init(&vdev->err_notifier, 0)) { - error_report("vfio: Unable to init event notifier for error detection"); + if (!vfio_notifier_init(vdev, &vdev->err_notifier, "err_notifier", 0, + &err)) { + error_report_err(err); vdev->pci_aer = false; return; } @@ -2910,7 +2969,7 @@ static void vfio_register_err_notifier(VFIOPCIDevice *vdev) VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) { error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); qemu_set_fd_handler(fd, NULL, NULL, vdev); - event_notifier_cleanup(&vdev->err_notifier); + vfio_notifier_cleanup(vdev, &vdev->err_notifier, "err_notifier", 0); vdev->pci_aer = false; } } @@ -2929,7 +2988,7 @@ static void vfio_unregister_err_notifier(VFIOPCIDevice *vdev) } qemu_set_fd_handler(event_notifier_get_fd(&vdev->err_notifier), NULL, NULL, vdev); - event_notifier_cleanup(&vdev->err_notifier); + vfio_notifier_cleanup(vdev, &vdev->err_notifier, "err_notifier", 0); } static void vfio_req_notifier_handler(void *opaque) @@ -2947,7 +3006,7 @@ static void vfio_req_notifier_handler(void *opaque) } } -static void vfio_register_req_notifier(VFIOPCIDevice *vdev) +void vfio_pci_register_req_notifier(VFIOPCIDevice *vdev) { struct vfio_irq_info irq_info; Error *err = NULL; @@ -2964,8 +3023,9 @@ static void vfio_register_req_notifier(VFIOPCIDevice *vdev) return; } - if (event_notifier_init(&vdev->req_notifier, 0)) { - error_report("vfio: Unable to init event notifier for device request"); + if (!vfio_notifier_init(vdev, &vdev->req_notifier, "req_notifier", 0, + &err)) { + error_report_err(err); return; } @@ -2976,7 +3036,7 @@ static void vfio_register_req_notifier(VFIOPCIDevice *vdev) VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) { error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); qemu_set_fd_handler(fd, NULL, NULL, vdev); - event_notifier_cleanup(&vdev->req_notifier); + vfio_notifier_cleanup(vdev, &vdev->req_notifier, "req_notifier", 0); } else { vdev->req_enabled = true; } @@ -2996,15 +3056,28 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) } qemu_set_fd_handler(event_notifier_get_fd(&vdev->req_notifier), NULL, NULL, vdev); - event_notifier_cleanup(&vdev->req_notifier); + vfio_notifier_cleanup(vdev, &vdev->req_notifier, "req_notifier", 0); vdev->req_enabled = false; } -static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) +bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) { PCIDevice *pdev = &vdev->pdev; VFIODevice *vbasedev = &vdev->vbasedev; + uint32_t config_space_size; + int ret; + + config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size); + + /* Get a copy of config space */ + ret = vfio_pci_config_space_read(vdev, 0, config_space_size, + vdev->pdev.config); + if (ret < (int)config_space_size) { + ret = ret < 0 ? -ret : EFAULT; + error_setg_errno(errp, ret, "failed to read device config space"); + return false; + } /* vfio emulates a lot for us, but some bits need extra love */ vdev->emulated_config_bits = g_malloc0(vdev->config_size); @@ -3094,7 +3167,7 @@ static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) return true; } -static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) +bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) { PCIDevice *pdev = &vdev->pdev; @@ -3126,15 +3199,14 @@ static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) return true; } -static void vfio_realize(PCIDevice *pdev, Error **errp) +static void vfio_pci_realize(PCIDevice *pdev, Error **errp) { ERRP_GUARD(); VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); VFIODevice *vbasedev = &vdev->vbasedev; - int i, ret; + int i; char uuid[UUID_STR_LEN]; g_autofree char *name = NULL; - uint32_t config_space_size; if (vbasedev->fd < 0 && !vbasedev->sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || @@ -3185,18 +3257,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) goto error; } - if (!vfio_populate_device(vdev, errp)) { - goto error; - } - - config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size); - - /* Get a copy of config space */ - ret = vfio_pci_config_space_read(vdev, 0, config_space_size, - vdev->pdev.config); - if (ret < (int)config_space_size) { - ret = ret < 0 ? -ret : EFAULT; - error_setg_errno(errp, ret, "failed to read device config space"); + if (!vfio_pci_populate_device(vdev, errp)) { goto error; } @@ -3210,7 +3271,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) goto out_teardown; } - if (!vfio_add_capabilities(vdev, errp)) { + if (!vfio_pci_add_capabilities(vdev, errp)) { goto out_unset_idev; } @@ -3226,7 +3287,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) vfio_bar_quirk_setup(vdev, i); } - if (!vfio_interrupt_setup(vdev, errp)) { + if (!vfio_pci_interrupt_setup(vdev, errp)) { goto out_unset_idev; } @@ -3270,8 +3331,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) } } - vfio_register_err_notifier(vdev); - vfio_register_req_notifier(vdev); + vfio_pci_register_err_notifier(vdev); + vfio_pci_register_req_notifier(vdev); vfio_setup_resetfn_quirk(vdev); return; @@ -3292,8 +3353,8 @@ out_unset_idev: pci_device_unset_iommu_device(pdev); } out_teardown: - vfio_teardown_msi(vdev); - vfio_bars_exit(vdev); + vfio_pci_teardown_msi(vdev); + vfio_pci_bars_exit(vdev); error: error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name); } @@ -3302,17 +3363,6 @@ static void vfio_instance_finalize(Object *obj) { VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj); - vfio_display_finalize(vdev); - vfio_bars_finalize(vdev); - g_free(vdev->emulated_config_bits); - g_free(vdev->rom); - /* - * XXX Leaking igd_opregion is not an oversight, we can't remove the - * fw_cfg entry therefore leaking this allocation seems like the safest - * option. - * - * g_free(vdev->igd_opregion); - */ vfio_pci_put_device(vdev); } @@ -3331,9 +3381,9 @@ static void vfio_exitfn(PCIDevice *pdev) if (vdev->intx.mmap_timer) { timer_free(vdev->intx.mmap_timer); } - vfio_teardown_msi(vdev); + vfio_pci_teardown_msi(vdev); vfio_pci_disable_rp_atomics(vdev); - vfio_bars_exit(vdev); + vfio_pci_bars_exit(vdev); vfio_migration_exit(vbasedev); if (!vbasedev->mdev) { pci_device_unset_iommu_device(pdev); @@ -3344,6 +3394,11 @@ static void vfio_pci_reset(DeviceState *dev) { VFIOPCIDevice *vdev = VFIO_PCI_BASE(dev); + /* Do not reset the device during qemu_system_reset prior to cpr load */ + if (cpr_is_incoming()) { + return; + } + trace_vfio_pci_reset(vdev->vbasedev.name); vfio_pci_pre_reset(vdev); @@ -3401,6 +3456,13 @@ static void vfio_instance_init(Object *obj) /* QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command * line, therefore, no need to wait to realize like other devices */ pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; + + /* + * A device that is resuming for cpr is already configured, so do not + * reset it during qemu_system_reset prior to cpr load, else interrupts + * may be lost. + */ + pci_dev->cap_present |= QEMU_PCI_SKIP_RESET_ON_CPR; } static void vfio_pci_base_dev_class_init(ObjectClass *klass, const void *data) @@ -3418,7 +3480,7 @@ static void vfio_pci_base_dev_class_init(ObjectClass *klass, const void *data) static const TypeInfo vfio_pci_base_dev_info = { .name = TYPE_VFIO_PCI_BASE, .parent = TYPE_PCI_DEVICE, - .instance_size = 0, + .instance_size = sizeof(VFIOPCIDevice), .abstract = true, .class_init = vfio_pci_base_dev_class_init, .interfaces = (const InterfaceInfo[]) { @@ -3513,8 +3575,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data) #ifdef CONFIG_IOMMUFD object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd); #endif + dc->vmsd = &vfio_cpr_pci_vmstate; dc->desc = "VFIO-based PCI device assignment"; - pdc->realize = vfio_realize; + pdc->realize = vfio_pci_realize; object_class_property_set_description(klass, /* 1.3 */ "host", @@ -3640,7 +3703,6 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data) static const TypeInfo vfio_pci_dev_info = { .name = TYPE_VFIO_PCI, .parent = TYPE_VFIO_PCI_BASE, - .instance_size = sizeof(VFIOPCIDevice), .class_init = vfio_pci_dev_class_init, .instance_init = vfio_instance_init, .instance_finalize = vfio_instance_finalize, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 5ce0fb9..d3dc227 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -210,6 +210,14 @@ static inline bool vfio_is_vga(VFIOPCIDevice *vdev) return class == PCI_CLASS_DISPLAY_VGA; } +/* MSI/MSI-X/INTx */ +void vfio_pci_vector_init(VFIOPCIDevice *vdev, int nr); +void vfio_pci_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, + int vector_n, bool msix); +void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev); +void vfio_pci_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev); +bool vfio_pci_intx_enable(VFIOPCIDevice *vdev, Error **errp); + uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len); void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr, uint32_t val, int len); @@ -248,4 +256,15 @@ void vfio_display_finalize(VFIOPCIDevice *vdev); extern const VMStateDescription vfio_display_vmstate; +void vfio_pci_bars_exit(VFIOPCIDevice *vdev); +bool vfio_pci_add_capabilities(VFIOPCIDevice *vdev, Error **errp); +bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp); +bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp); +void vfio_pci_intx_eoi(VFIODevice *vbasedev); +void vfio_pci_put_device(VFIOPCIDevice *vdev); +bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp); +void vfio_pci_register_err_notifier(VFIOPCIDevice *vdev); +void vfio_pci_register_req_notifier(VFIOPCIDevice *vdev); +void vfio_pci_teardown_msi(VFIOPCIDevice *vdev); + #endif /* HW_VFIO_VFIO_PCI_H */ diff --git a/hw/vfio/region.c b/hw/vfio/region.c index 34752c3..f5b8e3c 100644 --- a/hw/vfio/region.c +++ b/hw/vfio/region.c @@ -66,7 +66,7 @@ void vfio_region_write(void *opaque, hwaddr addr, } ret = vbasedev->io_ops->region_write(vbasedev, region->nr, - addr, size, &buf); + addr, size, &buf, region->post_wr); if (ret != size) { error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64 ",%d) failed: %s", @@ -200,6 +200,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, region->size = info->size; region->fd_offset = info->offset; region->nr = index; + region->post_wr = false; if (region->size) { region->mem = g_new0(MemoryRegion, 1); @@ -241,6 +242,7 @@ int vfio_region_mmap(VFIORegion *region) { int i, ret, prot = 0; char *name; + int fd; if (!region->mem) { return 0; @@ -271,14 +273,18 @@ int vfio_region_mmap(VFIORegion *region) goto no_mmap; } + /* Use the per-region fd if set, or the shared fd. */ + fd = region->vbasedev->region_fds ? + region->vbasedev->region_fds[region->nr] : + region->vbasedev->fd, + map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align); munmap(map_base, map_align - map_base); munmap(map_align + region->mmaps[i].size, align - (map_align - map_base)); region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot, - MAP_SHARED | MAP_FIXED, - region->vbasedev->fd, + MAP_SHARED | MAP_FIXED, fd, region->fd_offset + region->mmaps[i].offset); if (region->mmaps[i].mmap == MAP_FAILED) { diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index e90ec9b..f06236f 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -2,7 +2,7 @@ # pci.c vfio_intx_interrupt(const char *name, char line) " (%s) Pin %c" -vfio_intx_eoi(const char *name) " (%s) EOI" +vfio_pci_intx_eoi(const char *name) " (%s) EOI" vfio_intx_enable_kvm(const char *name) " (%s) KVM INTx accel enabled" vfio_intx_disable_kvm(const char *name) " (%s) KVM INTx accel disabled" vfio_intx_update(const char *name, int new_irq, int target_irq) " (%s) IRQ moved %d -> %d" @@ -35,8 +35,8 @@ vfio_pci_hot_reset(const char *name, const char *type) " (%s) %s" vfio_pci_hot_reset_has_dep_devices(const char *name) "%s: hot reset dependent devices:" vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int group_id) "\t%04x:%02x:%02x.%x group %d" vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s" -vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device '%s' config: size: 0x%lx, offset: 0x%lx, flags: 0x%lx" -vfio_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s" +vfio_pci_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device '%s' config: size: 0x%lx, offset: 0x%lx, flags: 0x%lx" +vfio_pci_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s" vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d" vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x" vfio_pci_reset(const char *name) " (%s)" diff --git a/hw/vfio/vfio-cpr.h b/hw/vfio/vfio-cpr.h deleted file mode 100644 index 134b83a..0000000 --- a/hw/vfio/vfio-cpr.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * VFIO CPR - * - * Copyright (c) 2025 Oracle and/or its affiliates. - * - * SPDX-License-Identifier: GPL-2.0-or-later - */ - -#ifndef HW_VFIO_CPR_H -#define HW_VFIO_CPR_H - -bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp); -void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer); - -#endif /* HW_VFIO_CPR_H */ diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 1ab2c11..7061b6e 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -209,6 +209,8 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) int ret; Int128 llend; Error *local_err = NULL; + MemoryRegion *mr; + hwaddr xlat; if (iotlb->target_as != &address_space_memory) { error_report("Wrong target AS \"%s\", only system memory is allowed", @@ -228,11 +230,14 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { bool read_only; - if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL, - &local_err)) { + mr = memory_translate_iotlb(iotlb, &xlat, &local_err); + if (!mr) { error_report_err(local_err); return; } + vaddr = memory_region_get_ram_ptr(mr) + xlat; + read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly; + ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova, iotlb->addr_mask + 1, vaddr, read_only); if (ret) { @@ -594,6 +599,36 @@ static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v) v->shadow_vqs = g_steal_pointer(&shadow_vqs); } +static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) +{ + struct vhost_vdpa *v = dev->opaque; + + uint64_t features; + uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | + 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | + 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID | + 0x1ULL << VHOST_BACKEND_F_SUSPEND; + int r; + + if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { + return -EFAULT; + } + + features &= f; + + if (vhost_vdpa_first_dev(dev)) { + r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); + if (r) { + return -EFAULT; + } + } + + dev->backend_cap = features; + v->shared->backend_cap = features; + + return 0; +} + static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) { struct vhost_vdpa *v = opaque; @@ -603,7 +638,12 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) v->dev = dev; dev->opaque = opaque ; - v->shared->listener = vhost_vdpa_memory_listener; + + ret = vhost_vdpa_set_backend_cap(dev); + if (unlikely(ret != 0)) { + return ret; + } + vhost_vdpa_init_svq(dev, v); error_propagate(&dev->migration_blocker, v->migration_blocker); @@ -639,6 +679,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER); + v->shared->listener = vhost_vdpa_memory_listener; return 0; } @@ -841,36 +882,6 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); } -static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) -{ - struct vhost_vdpa *v = dev->opaque; - - uint64_t features; - uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | - 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | - 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID | - 0x1ULL << VHOST_BACKEND_F_SUSPEND; - int r; - - if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { - return -EFAULT; - } - - features &= f; - - if (vhost_vdpa_first_dev(dev)) { - r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); - if (r) { - return -EFAULT; - } - } - - dev->backend_cap = features; - v->shared->backend_cap = features; - - return 0; -} - static int vhost_vdpa_get_device_id(struct vhost_dev *dev, uint32_t *device_id) { @@ -888,8 +899,14 @@ static int vhost_vdpa_reset_device(struct vhost_dev *dev) ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); trace_vhost_vdpa_reset_device(dev); + if (ret) { + return ret; + } + + memory_listener_unregister(&v->shared->listener); + v->shared->listener_registered = false; v->suspended = false; - return ret; + return 0; } static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) @@ -1373,7 +1390,15 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) "IOMMU and try again"); return -1; } - memory_listener_register(&v->shared->listener, dev->vdev->dma_as); + if (v->shared->listener_registered && + dev->vdev->dma_as != v->shared->listener.address_space) { + memory_listener_unregister(&v->shared->listener); + v->shared->listener_registered = false; + } + if (!v->shared->listener_registered) { + memory_listener_register(&v->shared->listener, dev->vdev->dma_as); + v->shared->listener_registered = true; + } return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); } @@ -1383,8 +1408,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) static void vhost_vdpa_reset_status(struct vhost_dev *dev) { - struct vhost_vdpa *v = dev->opaque; - if (!vhost_vdpa_last_dev(dev)) { return; } @@ -1392,7 +1415,6 @@ static void vhost_vdpa_reset_status(struct vhost_dev *dev) vhost_vdpa_reset_device(dev); vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER); - memory_listener_unregister(&v->shared->listener); } static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, @@ -1526,12 +1548,27 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev, static int vhost_vdpa_set_owner(struct vhost_dev *dev) { + int r; + struct vhost_vdpa *v; + if (!vhost_vdpa_first_dev(dev)) { return 0; } trace_vhost_vdpa_set_owner(dev); - return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); + r = vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); + if (unlikely(r < 0)) { + return r; + } + + /* + * Being optimistic and listening address space memory. If the device + * uses vIOMMU, it is changed at vhost_vdpa_dev_start. + */ + v = dev->opaque; + memory_listener_register(&v->shared->listener, &address_space_memory); + v->shared->listener_registered = true; + return 0; } static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, @@ -1563,7 +1600,6 @@ const VhostOps vdpa_ops = { .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, .vhost_set_vring_call = vhost_vdpa_set_vring_call, .vhost_get_features = vhost_vdpa_get_features, - .vhost_set_backend_cap = vhost_vdpa_set_backend_cap, .vhost_set_owner = vhost_vdpa_set_owner, .vhost_set_vring_endian = NULL, .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 9b48aa8..fba2372 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -146,9 +146,7 @@ static const VMStateDescription vmstate_virtio_pci = { static bool virtio_pci_has_extra_state(DeviceState *d) { - VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - - return proxy->flags & VIRTIO_PCI_FLAG_MIGRATE_EXTRA; + return true; } static void virtio_pci_save_extra_state(DeviceState *d, QEMUFile *f) @@ -1215,7 +1213,12 @@ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, static bool virtio_pci_query_guest_notifiers(DeviceState *d) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - return msix_enabled(&proxy->pci_dev); + + if (msix_enabled(&proxy->pci_dev)) { + return true; + } else { + return pci_irq_disabled(&proxy->pci_dev); + } } static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) @@ -2363,12 +2366,8 @@ static void virtio_pci_bus_reset_hold(Object *obj, ResetType type) static const Property virtio_pci_properties[] = { DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false), - DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags, - VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true), DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false), - DEFINE_PROP_BIT("x-disable-pcie", VirtIOPCIProxy, flags, - VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false), DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false), DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy, @@ -2397,8 +2396,7 @@ static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp) VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev); PCIDevice *pci_dev = &proxy->pci_dev; - if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) && - virtio_pci_modern(proxy)) { + if (virtio_pci_modern(proxy)) { pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; } diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 2e98cec..82a285a 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -205,6 +205,15 @@ static const char *virtio_id_to_name(uint16_t device_id) return name; } +static void virtio_check_indirect_feature(VirtIODevice *vdev) +{ + if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Device %s: indirect_desc was not negotiated!\n", + vdev->name); + } +} + /* Called within call_rcu(). */ static void virtio_free_region_cache(VRingMemoryRegionCaches *caches) { @@ -1680,8 +1689,8 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) VirtIODevice *vdev = vq->vdev; VirtQueueElement *elem = NULL; unsigned out_num, in_num, elem_entries; - hwaddr addr[VIRTQUEUE_MAX_SIZE]; - struct iovec iov[VIRTQUEUE_MAX_SIZE]; + hwaddr QEMU_UNINITIALIZED addr[VIRTQUEUE_MAX_SIZE]; + struct iovec QEMU_UNINITIALIZED iov[VIRTQUEUE_MAX_SIZE]; VRingDesc desc; int rc; @@ -1733,6 +1742,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) virtio_error(vdev, "Invalid size for indirect buffer table"); goto done; } + virtio_check_indirect_feature(vdev); /* loop over the indirect descriptor table */ len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as, @@ -1826,8 +1836,8 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) VirtIODevice *vdev = vq->vdev; VirtQueueElement *elem = NULL; unsigned out_num, in_num, elem_entries; - hwaddr addr[VIRTQUEUE_MAX_SIZE]; - struct iovec iov[VIRTQUEUE_MAX_SIZE]; + hwaddr QEMU_UNINITIALIZED addr[VIRTQUEUE_MAX_SIZE]; + struct iovec QEMU_UNINITIALIZED iov[VIRTQUEUE_MAX_SIZE]; VRingPackedDesc desc; uint16_t id; int rc; @@ -1870,6 +1880,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) virtio_error(vdev, "Invalid size for indirect buffer table"); goto done; } + virtio_check_indirect_feature(vdev); /* loop over the indirect descriptor table */ len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as, diff --git a/include/accel/tcg/cpu-ops.h b/include/accel/tcg/cpu-ops.h index cd22e5d..dd8ea30 100644 --- a/include/accel/tcg/cpu-ops.h +++ b/include/accel/tcg/cpu-ops.h @@ -223,6 +223,13 @@ struct TCGCPUOps { MMUAccessType access_type, int mmu_idx, bool probe, uintptr_t retaddr); /** + * @pointer_wrap: + * + * We have incremented @base to @result, resulting in a page change. + * For the current cpu state, adjust @result for possible overflow. + */ + vaddr (*pointer_wrap)(CPUState *cpu, int mmu_idx, vaddr result, vaddr base); + /** * @do_transaction_failed: Callback for handling failed memory transactions * (ie bus faults or external aborts; not MMU faults) */ @@ -315,6 +322,12 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, */ int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len); +/* + * Common pointer_wrap implementations. + */ +vaddr cpu_pointer_wrap_notreached(CPUState *, int, vaddr, vaddr); +vaddr cpu_pointer_wrap_uint32(CPUState *, int, vaddr, vaddr); + #endif #endif /* TCG_CPU_OPS_H */ diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h index 9be34b3..84a2a4e 100644 --- a/include/block/block-global-state.h +++ b/include/block/block-global-state.h @@ -192,10 +192,10 @@ int bdrv_inactivate_all(void); int bdrv_flush_all(void); void bdrv_close_all(void); -void bdrv_drain_all_begin(void); +void GRAPH_UNLOCKED bdrv_drain_all_begin(void); void bdrv_drain_all_begin_nopoll(void); void bdrv_drain_all_end(void); -void bdrv_drain_all(void); +void GRAPH_UNLOCKED bdrv_drain_all(void); void bdrv_aio_cancel(BlockAIOCB *acb); @@ -274,11 +274,16 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag); int bdrv_debug_resume(BlockDriverState *bs, const char *tag); bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); -bool bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp); -int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, - BdrvChild *ignore_child, Error **errp); +bool GRAPH_RDLOCK +bdrv_child_change_aio_context(BdrvChild *c, AioContext *ctx, + GHashTable *visited, Transaction *tran, + Error **errp); +int GRAPH_UNLOCKED +bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp); +int GRAPH_RDLOCK +bdrv_try_change_aio_context_locked(BlockDriverState *bs, AioContext *ctx, + BdrvChild *ignore_child, Error **errp); int GRAPH_RDLOCK bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz); int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); diff --git a/include/block/block-io.h b/include/block/block-io.h index b99cc98..4cf83fb 100644 --- a/include/block/block-io.h +++ b/include/block/block-io.h @@ -431,7 +431,7 @@ bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, * * This function can be recursive. */ -void bdrv_drained_begin(BlockDriverState *bs); +void GRAPH_UNLOCKED bdrv_drained_begin(BlockDriverState *bs); /** * bdrv_do_drained_begin_quiesce: diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h index 2982dd3..925a3e7 100644 --- a/include/block/block_int-common.h +++ b/include/block/block_int-common.h @@ -396,9 +396,23 @@ struct BlockDriver { int GRAPH_RDLOCK_PTR (*bdrv_probe_geometry)( BlockDriverState *bs, HDGeometry *geo); + /** + * Hot add a BDS's child. Used in combination with bdrv_del_child, so the + * user can take a child offline when it is broken and take a new child + * online. + * + * All block nodes must be drained. + */ void GRAPH_WRLOCK_PTR (*bdrv_add_child)( BlockDriverState *parent, BlockDriverState *child, Error **errp); + /** + * Hot remove a BDS's child. Used in combination with bdrv_add_child, so the + * user can take a child offline when it is broken and take a new child + * online. + * + * All block nodes must be drained. + */ void GRAPH_WRLOCK_PTR (*bdrv_del_child)( BlockDriverState *parent, BdrvChild *child, Error **errp); @@ -983,9 +997,21 @@ struct BdrvChildClass { bool backing_mask_protocol, Error **errp); - bool (*change_aio_ctx)(BdrvChild *child, AioContext *ctx, - GHashTable *visited, Transaction *tran, - Error **errp); + /* + * Notifies the parent that the child is trying to change its AioContext. + * The parent may in turn change the AioContext of other nodes in the same + * transaction. Returns true if the change is possible and the transaction + * can be continued. Returns false and sets @errp if not and the transaction + * must be aborted. + * + * @visited will accumulate all visited BdrvChild objects. The caller is + * responsible for freeing the list afterwards. + * + * Must be called with the affected block nodes drained. + */ + bool GRAPH_RDLOCK_PTR (*change_aio_ctx)(BdrvChild *child, AioContext *ctx, + GHashTable *visited, + Transaction *tran, Error **errp); /* * I/O API functions. These functions are thread-safe. diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 7061ab7..990f3e1 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -137,6 +137,8 @@ BlockJob *block_job_get_locked(const char *id); * Add @bs to the list of BlockDriverState that are involved in * @job. This means that all operations will be blocked on @bs while * @job exists. + * + * All block nodes must be drained. */ int GRAPH_WRLOCK block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs, diff --git a/include/exec/memop.h b/include/exec/memop.h index 407a47d..cf7da33 100644 --- a/include/exec/memop.h +++ b/include/exec/memop.h @@ -162,8 +162,8 @@ static inline unsigned memop_size(MemOp op) static inline MemOp size_memop(unsigned size) { #ifdef CONFIG_DEBUG_TCG - /* Power of 2 up to 8. */ - assert((size & (size - 1)) == 0 && size >= 1 && size <= 8); + /* Power of 2 up to 1024 */ + assert(is_power_of_2(size) && size >= 1 && size <= (1 << MO_SIZE)); #endif return (MemOp)ctz32(size); } diff --git a/include/gdbstub/commands.h b/include/gdbstub/commands.h index 40f0514..bff3674 100644 --- a/include/gdbstub/commands.h +++ b/include/gdbstub/commands.h @@ -1,5 +1,5 @@ #ifndef GDBSTUB_COMMANDS_H -#define GDBSTUB +#define GDBSTUB_COMMANDS_H typedef void (*GdbCmdHandler)(GArray *params, void *user_ctx); diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h index b12bf61..a2e22bd 100644 --- a/include/hw/arm/boot.h +++ b/include/hw/arm/boot.h @@ -132,6 +132,9 @@ struct arm_boot_info { bool secure_board_setup; arm_endianness endianness; + + /* CPU having load the kernel and that should be the first to boot. */ + ARMCPU *primary_cpu; }; /** diff --git a/include/hw/arm/npcm8xx.h b/include/hw/arm/npcm8xx.h index 3436abf..a8377db 100644 --- a/include/hw/arm/npcm8xx.h +++ b/include/hw/arm/npcm8xx.h @@ -28,7 +28,8 @@ #include "hw/misc/npcm7xx_mft.h" #include "hw/misc/npcm7xx_pwm.h" #include "hw/misc/npcm7xx_rng.h" -#include "hw/net/npcm7xx_emc.h" +#include "hw/net/npcm_gmac.h" +#include "hw/net/npcm_pcs.h" #include "hw/nvram/npcm7xx_otp.h" #include "hw/sd/npcm7xx_sdhci.h" #include "hw/timer/npcm7xx_timer.h" @@ -99,6 +100,8 @@ struct NPCM8xxState { EHCISysBusState ehci[2]; OHCISysBusState ohci[2]; NPCM7xxFIUState fiu[3]; + NPCMGMACState gmac[4]; + NPCMPCSState pcs; NPCM7xxSDHCIState mmc; NPCMPSPIState pspi; }; diff --git a/include/hw/block/flash.h b/include/hw/block/flash.h index 5fd67f5..3671f01 100644 --- a/include/hw/block/flash.h +++ b/include/hw/block/flash.h @@ -44,24 +44,6 @@ PFlashCFI02 *pflash_cfi02_register(hwaddr base, uint16_t unlock_addr1, int be); -/* nand.c */ -DeviceState *nand_init(BlockBackend *blk, int manf_id, int chip_id); -void nand_setpins(DeviceState *dev, uint8_t cle, uint8_t ale, - uint8_t ce, uint8_t wp, uint8_t gnd); -void nand_getpins(DeviceState *dev, int *rb); -void nand_setio(DeviceState *dev, uint32_t value); -uint32_t nand_getio(DeviceState *dev); -uint32_t nand_getbuswidth(DeviceState *dev); - -#define NAND_MFR_TOSHIBA 0x98 -#define NAND_MFR_SAMSUNG 0xec -#define NAND_MFR_FUJITSU 0x04 -#define NAND_MFR_NATIONAL 0x8f -#define NAND_MFR_RENESAS 0x07 -#define NAND_MFR_STMICRO 0x20 -#define NAND_MFR_HYNIX 0xad -#define NAND_MFR_MICRON 0x2c - /* m25p80.c */ #define TYPE_M25P80 "m25p80-generic" diff --git a/include/hw/boards.h b/include/hw/boards.h index a7b1fcf..f424b2b 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -286,8 +286,7 @@ struct MachineClass { no_parallel:1, no_floppy:1, no_cdrom:1, - pci_allow_0_address:1, - legacy_fw_cfg_order:1; + pci_allow_0_address:1; bool auto_create_sdcard; bool is_default; const char *default_machine_opts; @@ -863,10 +862,4 @@ extern const size_t hw_compat_2_7_len; extern GlobalProperty hw_compat_2_6[]; extern const size_t hw_compat_2_6_len; -extern GlobalProperty hw_compat_2_5[]; -extern const size_t hw_compat_2_5_len; - -extern GlobalProperty hw_compat_2_4[]; -extern const size_t hw_compat_2_4_len; - #endif diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 1e87f7d..33296a1 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -1126,4 +1126,10 @@ extern const VMStateDescription vmstate_cpu_common; #define UNASSIGNED_CPU_INDEX -1 #define UNASSIGNED_CLUSTER_INDEX -1 +enum CacheType { + DATA_CACHE, + INSTRUCTION_CACHE, + UNIFIED_CACHE +}; + #endif diff --git a/include/hw/core/resetcontainer.h b/include/hw/core/resetcontainer.h index 23db0c7..daeb18c 100644 --- a/include/hw/core/resetcontainer.h +++ b/include/hw/core/resetcontainer.h @@ -20,7 +20,7 @@ #include "qom/object.h" #define TYPE_RESETTABLE_CONTAINER "resettable-container" -OBJECT_DECLARE_TYPE(ResettableContainer, ResettableContainerClass, RESETTABLE_CONTAINER) +OBJECT_DECLARE_SIMPLE_TYPE(ResettableContainer, RESETTABLE_CONTAINER) /** * resettable_container_add: Add a resettable object to the container diff --git a/include/hw/gpio/aspeed_gpio.h b/include/hw/gpio/aspeed_gpio.h index e1e6c54..e6b2fe7 100644 --- a/include/hw/gpio/aspeed_gpio.h +++ b/include/hw/gpio/aspeed_gpio.h @@ -70,7 +70,7 @@ typedef struct AspeedGPIOReg { } AspeedGPIOReg; struct AspeedGPIOClass { - SysBusDevice parent_obj; + SysBusDeviceClass parent_class; const GPIOSetProperties *props; uint32_t nr_gpio_pins; uint32_t nr_gpio_sets; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 9563674..79b72c5 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -107,7 +107,6 @@ struct PCMachineClass { /* RAM / address space compat: */ bool gigabyte_align; bool has_reserved_memory; - bool broken_reserved_end; bool enforce_amd_1tb_hole; bool isa_bios_alias; @@ -299,12 +298,6 @@ extern const size_t pc_compat_2_7_len; extern GlobalProperty pc_compat_2_6[]; extern const size_t pc_compat_2_6_len; -extern GlobalProperty pc_compat_2_5[]; -extern const size_t pc_compat_2_5_len; - -extern GlobalProperty pc_compat_2_4[]; -extern const size_t pc_compat_2_4_len; - #define DEFINE_PC_MACHINE(suffix, namestr, initfn, optsfn) \ static void pc_machine_##suffix##_class_init(ObjectClass *oc, \ const void *data) \ diff --git a/include/hw/i386/tdvf.h b/include/hw/i386/tdvf.h new file mode 100644 index 0000000..e75c8d1 --- /dev/null +++ b/include/hw/i386/tdvf.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2025 Intel Corporation + * Author: Isaku Yamahata <isaku.yamahata at gmail.com> + * <isaku.yamahata at intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_I386_TDVF_H +#define HW_I386_TDVF_H + +#include "qemu/osdep.h" + +#define TDVF_SECTION_TYPE_BFV 0 +#define TDVF_SECTION_TYPE_CFV 1 +#define TDVF_SECTION_TYPE_TD_HOB 2 +#define TDVF_SECTION_TYPE_TEMP_MEM 3 + +#define TDVF_SECTION_ATTRIBUTES_MR_EXTEND (1U << 0) +#define TDVF_SECTION_ATTRIBUTES_PAGE_AUG (1U << 1) + +typedef struct TdxFirmwareEntry { + uint32_t data_offset; + uint32_t data_len; + uint64_t address; + uint64_t size; + uint32_t type; + uint32_t attributes; + + void *mem_ptr; +} TdxFirmwareEntry; + +typedef struct TdxFirmware { + void *mem_ptr; + + uint32_t nr_entries; + TdxFirmwareEntry *entries; +} TdxFirmware; + +#define for_each_tdx_fw_entry(fw, e) \ + for (e = (fw)->entries; e != (fw)->entries + (fw)->nr_entries; e++) + +int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size); + +#endif /* HW_I386_TDVF_H */ diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index 258b134..fc460b8 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -27,13 +27,8 @@ #include "qom/object.h" struct X86MachineClass { - /*< private >*/ MachineClass parent; - /*< public >*/ - - /* TSC rate migration: */ - bool save_tsc_khz; /* use DMA capable linuxboot option rom */ bool fwcfg_dma_enabled; /* CPU and apic information: */ diff --git a/include/hw/intc/arm_gic.h b/include/hw/intc/arm_gic.h index 48f6a51..be923f7 100644 --- a/include/hw/intc/arm_gic.h +++ b/include/hw/intc/arm_gic.h @@ -27,6 +27,9 @@ * implement the security extensions * + QOM property "has-virtualization-extensions": set true if the GIC should * implement the virtualization extensions + * + QOM property "first-cpu-index": index of the first cpu attached to the + * GIC (default 0). The CPUs connected to the GIC are assumed to be + * first-cpu-index, first-cpu-index + 1, ... first-cpu-index + num-cpu - 1. * + unnamed GPIO inputs: (where P is number of SPIs, i.e. num-irq - 32) * [0..P-1] SPIs * [P..P+31] PPIs for CPU 0 diff --git a/include/hw/intc/arm_gic_common.h b/include/hw/intc/arm_gic_common.h index 97fea41..93a3cc2 100644 --- a/include/hw/intc/arm_gic_common.h +++ b/include/hw/intc/arm_gic_common.h @@ -129,6 +129,8 @@ struct GICState { uint32_t num_lrs; uint32_t num_cpu; + /* cpu_index of the first CPU, attached to this GIC. */ + uint32_t first_cpu_index; MemoryRegion iomem; /* Distributor */ /* This is just so we can have an opaque pointer which identifies diff --git a/include/hw/intc/loongarch_extioi_common.h b/include/hw/intc/loongarch_extioi_common.h index 735bfee..dca25ff 100644 --- a/include/hw/intc/loongarch_extioi_common.h +++ b/include/hw/intc/loongarch_extioi_common.h @@ -35,7 +35,7 @@ #define EXTIOI_ISR_START (0x700 - APIC_OFFSET) #define EXTIOI_ISR_END (0x720 - APIC_OFFSET) #define EXTIOI_COREISR_START (0x800 - APIC_OFFSET) -#define EXTIOI_COREISR_END (0xB20 - APIC_OFFSET) +#define EXTIOI_COREISR_END (0x820 - APIC_OFFSET) #define EXTIOI_COREMAP_START (0xC00 - APIC_OFFSET) #define EXTIOI_COREMAP_END (0xD00 - APIC_OFFSET) #define EXTIOI_SIZE 0x800 diff --git a/include/hw/loader.h b/include/hw/loader.h index d280dc3..c96b5e1 100644 --- a/include/hw/loader.h +++ b/include/hw/loader.h @@ -270,8 +270,6 @@ int rom_add_elf_program(const char *name, GMappedFile *mapped_file, void *data, AddressSpace *as); int rom_check_and_register_reset(void); void rom_set_fw(FWCfgState *f); -void rom_set_order_override(int order); -void rom_reset_order_override(void); /** * rom_transaction_begin: diff --git a/include/hw/loongarch/boot.h b/include/hw/loongarch/boot.h index b3b870d..9819f7f 100644 --- a/include/hw/loongarch/boot.h +++ b/include/hw/loongarch/boot.h @@ -102,11 +102,10 @@ struct loongarch_boot_info { const char *kernel_cmdline; const char *initrd_filename; uint64_t a0, a1, a2; + uint64_t initrd_addr; + uint64_t initrd_size; }; -extern struct memmap_entry *memmap_table; -extern unsigned memmap_entries; - struct memmap_entry { uint64_t address; uint64_t length; diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h index 2b7d199..602feab 100644 --- a/include/hw/loongarch/virt.h +++ b/include/hw/loongarch/virt.h @@ -63,6 +63,8 @@ struct LoongArchVirtMachineState { struct loongarch_boot_info bootinfo; DeviceState *ipi; DeviceState *extioi; + struct memmap_entry *memmap_table; + unsigned int memmap_entries; }; #define TYPE_LOONGARCH_VIRT_MACHINE MACHINE_TYPE_NAME("virt") diff --git a/include/hw/misc/aspeed_hace.h b/include/hw/misc/aspeed_hace.h index 5d4aa19..d5d07c6 100644 --- a/include/hw/misc/aspeed_hace.h +++ b/include/hw/misc/aspeed_hace.h @@ -22,7 +22,6 @@ OBJECT_DECLARE_TYPE(AspeedHACEState, AspeedHACEClass, ASPEED_HACE) -#define ASPEED_HACE_NR_REGS (0x64 >> 2) #define ASPEED_HACE_MAX_SG 256 /* max number of entries */ struct AspeedHACEState { @@ -31,10 +30,8 @@ struct AspeedHACEState { MemoryRegion iomem; qemu_irq irq; - struct iovec iov_cache[ASPEED_HACE_MAX_SG]; - uint32_t regs[ASPEED_HACE_NR_REGS]; + uint32_t *regs; uint32_t total_req_len; - uint32_t iov_count; MemoryRegion *dram_mr; AddressSpace dram_as; @@ -46,11 +43,17 @@ struct AspeedHACEState { struct AspeedHACEClass { SysBusDeviceClass parent_class; + const MemoryRegionOps *reg_ops; uint32_t src_mask; uint32_t dest_mask; uint32_t key_mask; uint32_t hash_mask; + uint64_t nr_regs; bool raise_crypt_interrupt_workaround; + uint32_t src_hi_mask; + uint32_t dest_hi_mask; + uint32_t key_hi_mask; + bool has_dma64; }; #endif /* ASPEED_HACE_H */ diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h index 47578cc..d41b932 100644 --- a/include/hw/nvram/fw_cfg.h +++ b/include/hw/nvram/fw_cfg.h @@ -42,14 +42,6 @@ struct FWCfgDataGeneratorClass { typedef struct fw_cfg_file FWCfgFile; -#define FW_CFG_ORDER_OVERRIDE_VGA 70 -#define FW_CFG_ORDER_OVERRIDE_NIC 80 -#define FW_CFG_ORDER_OVERRIDE_USER 100 -#define FW_CFG_ORDER_OVERRIDE_DEVICE 110 - -void fw_cfg_set_order_override(FWCfgState *fw_cfg, int order); -void fw_cfg_reset_order_override(FWCfgState *fw_cfg); - typedef struct FWCfgFiles { uint32_t count; FWCfgFile f[]; @@ -75,8 +67,6 @@ struct FWCfgState { uint32_t cur_offset; Notifier machine_ready; - int fw_cfg_order_override; - bool dma_enabled; dma_addr_t dma_addr; AddressSpace *dma_as; diff --git a/include/hw/pci/msix.h b/include/hw/pci/msix.h index 0e6f257..11ef945 100644 --- a/include/hw/pci/msix.h +++ b/include/hw/pci/msix.h @@ -32,6 +32,7 @@ int msix_present(PCIDevice *dev); bool msix_is_masked(PCIDevice *dev, unsigned vector); void msix_set_pending(PCIDevice *dev, unsigned vector); void msix_clr_pending(PCIDevice *dev, int vector); +int msix_is_pending(PCIDevice *dev, unsigned vector); void msix_vector_use(PCIDevice *dev, unsigned vector); void msix_vector_unuse(PCIDevice *dev, unsigned vector); diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index c2fe6ca..df3cc7b 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -222,6 +222,8 @@ enum { QEMU_PCIE_EXT_TAG = (1 << QEMU_PCIE_EXT_TAG_BITNR), #define QEMU_PCI_CAP_PM_BITNR 14 QEMU_PCI_CAP_PM = (1 << QEMU_PCI_CAP_PM_BITNR), +#define QEMU_PCI_SKIP_RESET_ON_CPR_BITNR 15 + QEMU_PCI_SKIP_RESET_ON_CPR = (1 << QEMU_PCI_SKIP_RESET_ON_CPR_BITNR), }; typedef struct PCIINTxRoute { @@ -375,6 +377,28 @@ void pci_bus_get_w64_range(PCIBus *bus, Range *range); void pci_device_deassert_intx(PCIDevice *dev); +/* Page Request Interface */ +typedef enum { + IOMMU_PRI_RESP_SUCCESS, + IOMMU_PRI_RESP_INVALID_REQUEST, + IOMMU_PRI_RESP_FAILURE, +} IOMMUPRIResponseCode; + +typedef struct IOMMUPRIResponse { + IOMMUPRIResponseCode response_code; + uint16_t prgi; +} IOMMUPRIResponse; + +struct IOMMUPRINotifier; + +typedef void (*IOMMUPRINotify)(struct IOMMUPRINotifier *notifier, + IOMMUPRIResponse *response); + +typedef struct IOMMUPRINotifier { + IOMMUPRINotify notify; +} IOMMUPRINotifier; + +#define PCI_PRI_PRGI_MASK 0x1ffU /** * struct PCIIOMMUOps: callbacks structure for specific IOMMU handlers @@ -429,6 +453,179 @@ typedef struct PCIIOMMUOps { * @devfn: device and function number of the PCI device. */ void (*unset_iommu_device)(PCIBus *bus, void *opaque, int devfn); + /** + * @get_iotlb_info: get properties required to initialize a device IOTLB. + * + * Callback required if devices are allowed to cache translations. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @addr_width: the address width of the IOMMU (output parameter). + * + * @min_page_size: the page size of the IOMMU (output parameter). + */ + void (*get_iotlb_info)(void *opaque, uint8_t *addr_width, + uint32_t *min_page_size); + /** + * @init_iotlb_notifier: initialize an IOMMU notifier. + * + * Optional callback. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @n: the notifier to be initialized. + * + * @fn: the callback to be installed. + * + * @user_opaque: a user pointer that can be used to track a state. + */ + void (*init_iotlb_notifier)(PCIBus *bus, void *opaque, int devfn, + IOMMUNotifier *n, IOMMUNotify fn, + void *user_opaque); + /** + * @register_iotlb_notifier: setup an IOTLB invalidation notifier. + * + * Callback required if devices are allowed to cache translations. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @pasid: the pasid of the address space to watch. + * + * @n: the notifier to register. + */ + void (*register_iotlb_notifier)(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid, IOMMUNotifier *n); + /** + * @unregister_iotlb_notifier: remove an IOTLB invalidation notifier. + * + * Callback required if devices are allowed to cache translations. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @pasid: the pasid of the address space to stop watching. + * + * @n: the notifier to unregister. + */ + void (*unregister_iotlb_notifier)(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid, IOMMUNotifier *n); + /** + * @ats_request_translation: issue an ATS request. + * + * Callback required if devices are allowed to use the address + * translation service. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @pasid: the pasid of the address space to use for the request. + * + * @priv_req: privileged mode bit (PASID TLP). + * + * @exec_req: execute request bit (PASID TLP). + * + * @addr: start address of the memory range to be translated. + * + * @length: length of the memory range in bytes. + * + * @no_write: request a read-only translation (if supported). + * + * @result: buffer in which the TLB entries will be stored. + * + * @result_length: result buffer length. + * + * @err_count: number of untranslated subregions. + * + * Returns: the number of translations stored in the result buffer, or + * -ENOMEM if the buffer is not large enough. + */ + ssize_t (*ats_request_translation)(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid, bool priv_req, + bool exec_req, hwaddr addr, + size_t length, bool no_write, + IOMMUTLBEntry *result, + size_t result_length, + uint32_t *err_count); + /** + * @pri_register_notifier: setup the PRI completion callback. + * + * Callback required if devices are allowed to use the page request + * interface. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @pasid: the pasid of the address space to track. + * + * @notifier: the notifier to register. + */ + void (*pri_register_notifier)(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid, IOMMUPRINotifier *notifier); + /** + * @pri_unregister_notifier: remove the PRI completion callback. + * + * Callback required if devices are allowed to use the page request + * interface. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @pasid: the pasid of the address space to stop tracking. + */ + void (*pri_unregister_notifier)(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid); + /** + * @pri_request_page: issue a PRI request. + * + * Callback required if devices are allowed to use the page request + * interface. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @pasid: the pasid of the address space to use for the request. + * + * @priv_req: privileged mode bit (PASID TLP). + * + * @exec_req: execute request bit (PASID TLP). + * + * @addr: untranslated address of the requested page. + * + * @lpig: last page in group. + * + * @prgi: page request group index. + * + * @is_read: request read access. + * + * @is_write: request write access. + */ + int (*pri_request_page)(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid, bool priv_req, bool exec_req, + hwaddr addr, bool lpig, uint16_t prgi, bool is_read, + bool is_write); } PCIIOMMUOps; AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); @@ -437,6 +634,126 @@ bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod, void pci_device_unset_iommu_device(PCIDevice *dev); /** + * pci_iommu_get_iotlb_info: get properties required to initialize a + * device IOTLB. + * + * Returns 0 on success, or a negative errno otherwise. + * + * @dev: the device that wants to get the information. + * @addr_width: the address width of the IOMMU (output parameter). + * @min_page_size: the page size of the IOMMU (output parameter). + */ +int pci_iommu_get_iotlb_info(PCIDevice *dev, uint8_t *addr_width, + uint32_t *min_page_size); + +/** + * pci_iommu_init_iotlb_notifier: initialize an IOMMU notifier. + * + * This function is used by devices before registering an IOTLB notifier. + * + * @dev: the device. + * @n: the notifier to be initialized. + * @fn: the callback to be installed. + * @opaque: a user pointer that can be used to track a state. + */ +int pci_iommu_init_iotlb_notifier(PCIDevice *dev, IOMMUNotifier *n, + IOMMUNotify fn, void *opaque); + +/** + * pci_ats_request_translation: perform an ATS request. + * + * Returns the number of translations stored in @result in case of success, + * a negative error code otherwise. + * -ENOMEM is returned when the result buffer is not large enough to store + * all the translations. + * + * @dev: the ATS-capable PCI device. + * @pasid: the pasid of the address space in which the translation will be done. + * @priv_req: privileged mode bit (PASID TLP). + * @exec_req: execute request bit (PASID TLP). + * @addr: start address of the memory range to be translated. + * @length: length of the memory range in bytes. + * @no_write: request a read-only translation (if supported). + * @result: buffer in which the TLB entries will be stored. + * @result_length: result buffer length. + * @err_count: number of untranslated subregions. + */ +ssize_t pci_ats_request_translation(PCIDevice *dev, uint32_t pasid, + bool priv_req, bool exec_req, + hwaddr addr, size_t length, + bool no_write, IOMMUTLBEntry *result, + size_t result_length, + uint32_t *err_count); + +/** + * pci_pri_request_page: perform a PRI request. + * + * Returns 0 if the PRI request has been sent to the guest OS, + * an error code otherwise. + * + * @dev: the PRI-capable PCI device. + * @pasid: the pasid of the address space in which the translation will be done. + * @priv_req: privileged mode bit (PASID TLP). + * @exec_req: execute request bit (PASID TLP). + * @addr: untranslated address of the requested page. + * @lpig: last page in group. + * @prgi: page request group index. + * @is_read: request read access. + * @is_write: request write access. + */ +int pci_pri_request_page(PCIDevice *dev, uint32_t pasid, bool priv_req, + bool exec_req, hwaddr addr, bool lpig, + uint16_t prgi, bool is_read, bool is_write); + +/** + * pci_pri_register_notifier: register the PRI callback for a given address + * space. + * + * Returns 0 on success, an error code otherwise. + * + * @dev: the PRI-capable PCI device. + * @pasid: the pasid of the address space to track. + * @notifier: the notifier to register. + */ +int pci_pri_register_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUPRINotifier *notifier); + +/** + * pci_pri_unregister_notifier: remove the PRI callback from a given address + * space. + * + * @dev: the PRI-capable PCI device. + * @pasid: the pasid of the address space to stop tracking. + */ +void pci_pri_unregister_notifier(PCIDevice *dev, uint32_t pasid); + +/** + * pci_iommu_register_iotlb_notifier: register a notifier for changes to + * IOMMU translation entries in a specific address space. + * + * Returns 0 on success, or a negative errno otherwise. + * + * @dev: the device that wants to get notified. + * @pasid: the pasid of the address space to track. + * @n: the notifier to register. + */ +int pci_iommu_register_iotlb_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUNotifier *n); + +/** + * pci_iommu_unregister_iotlb_notifier: unregister a notifier that has been + * registerd with pci_iommu_register_iotlb_notifier. + * + * Returns 0 on success, or a negative errno otherwise. + * + * @dev: the device that wants to stop notifications. + * @pasid: the pasid of the address space to stop tracking. + * @n: the notifier to unregister. + */ +int pci_iommu_unregister_iotlb_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUNotifier *n); + +/** * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus * * Let PCI host bridges define specific operations. @@ -668,6 +985,7 @@ void lsi53c8xx_handle_legacy_cmdline(DeviceState *lsi_dev); qemu_irq pci_allocate_irq(PCIDevice *pci_dev); void pci_set_irq(PCIDevice *pci_dev, int level); +int pci_irq_disabled(PCIDevice *d); static inline void pci_irq_assert(PCIDevice *pci_dev) { diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h index e41d95b..eee0338 100644 --- a/include/hw/pci/pci_device.h +++ b/include/hw/pci/pci_device.h @@ -90,6 +90,7 @@ struct PCIDevice { char name[64]; PCIIORegion io_regions[PCI_NUM_REGIONS]; AddressSpace bus_master_as; + bool is_master; MemoryRegion bus_master_container_region; MemoryRegion bus_master_enable_region; diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h index 70a5de0..ff6ce08 100644 --- a/include/hw/pci/pcie.h +++ b/include/hw/pci/pcie.h @@ -70,8 +70,10 @@ struct PCIExpressDevice { uint16_t aer_cap; PCIEAERLog aer_log; - /* Offset of ATS capability in config space */ + /* Offset of ATS, PRI and PASID capabilities in config space */ uint16_t ats_cap; + uint16_t pasid_cap; + uint16_t pri_cap; /* ACS */ uint16_t acs_cap; @@ -150,4 +152,13 @@ void pcie_cap_slot_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); + +void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width, + bool exec_perm, bool priv_mod); +void pcie_pri_init(PCIDevice *dev, uint16_t offset, uint32_t outstanding_pr_cap, + bool prg_response_pasid_req); + +bool pcie_pri_enabled(const PCIDevice *dev); +bool pcie_pasid_enabled(const PCIDevice *dev); +bool pcie_ats_enabled(const PCIDevice *dev); #endif /* QEMU_PCIE_H */ diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h index 9d3b686..33a2222 100644 --- a/include/hw/pci/pcie_regs.h +++ b/include/hw/pci/pcie_regs.h @@ -86,6 +86,14 @@ typedef enum PCIExpLinkWidth { #define PCI_ARI_VER 1 #define PCI_ARI_SIZEOF 8 +/* PASID */ +#define PCI_PASID_VER 1 +#define PCI_EXT_CAP_PASID_MAX_WIDTH 20 +#define PCI_PASID_CAP_WIDTH_SHIFT 8 + +/* PRI */ +#define PCI_PRI_VER 1 + /* AER */ #define PCI_ERR_VER 2 #define PCI_ERR_SIZEOF 0x48 diff --git a/include/hw/riscv/iommu.h b/include/hw/riscv/iommu.h index b03339d..8a8acfc 100644 --- a/include/hw/riscv/iommu.h +++ b/include/hw/riscv/iommu.h @@ -30,14 +30,12 @@ typedef struct RISCVIOMMUState RISCVIOMMUState; typedef struct RISCVIOMMUSpace RISCVIOMMUSpace; #define TYPE_RISCV_IOMMU_PCI "riscv-iommu-pci" -OBJECT_DECLARE_TYPE(RISCVIOMMUStatePci, RISCVIOMMUPciClass, RISCV_IOMMU_PCI) +OBJECT_DECLARE_SIMPLE_TYPE(RISCVIOMMUStatePci, RISCV_IOMMU_PCI) typedef struct RISCVIOMMUStatePci RISCVIOMMUStatePci; -typedef struct RISCVIOMMUPciClass RISCVIOMMUPciClass; #define TYPE_RISCV_IOMMU_SYS "riscv-iommu-device" -OBJECT_DECLARE_TYPE(RISCVIOMMUStateSys, RISCVIOMMUSysClass, RISCV_IOMMU_SYS) +OBJECT_DECLARE_SIMPLE_TYPE(RISCVIOMMUStateSys, RISCV_IOMMU_SYS) typedef struct RISCVIOMMUStateSys RISCVIOMMUStateSys; -typedef struct RISCVIOMMUSysClass RISCVIOMMUSysClass; #define FDT_IRQ_TYPE_EDGE_LOW 1 diff --git a/include/hw/s390x/ap-bridge.h b/include/hw/s390x/ap-bridge.h index 470e439..7efc529 100644 --- a/include/hw/s390x/ap-bridge.h +++ b/include/hw/s390x/ap-bridge.h @@ -16,4 +16,43 @@ void s390_init_ap(void); +typedef struct ChscSeiNt0Res { + uint16_t length; + uint16_t code; + uint8_t reserved1; + uint16_t reserved2; + uint8_t nt; +#define PENDING_EVENT_INFO_BITMASK 0x80; + uint8_t flags; + uint8_t reserved3; + uint8_t rs; + uint8_t cc; +} QEMU_PACKED ChscSeiNt0Res; + +#define NT0_RES_RESPONSE_CODE 1 +#define NT0_RES_NT_DEFAULT 0 +#define NT0_RES_RS_AP_CHANGE 5 +#define NT0_RES_CC_AP_CHANGE 3 + +#define EVENT_INFORMATION_NOT_STORED 1 +#define EVENT_INFORMATION_STORED 0 + +/** + * ap_chsc_sei_nt0_get_event - Retrieve the next pending AP config + * change event + * @res: Pointer to a ChscSeiNt0Res struct to be filled with event + * data + * + * This function checks for any pending AP config change events and, + * if present, populates the provided response structure with the + * appropriate SEI NT0 fields. + * + * Return: + * EVENT_INFORMATION_STORED - An event was available and written to @res + * EVENT_INFORMATION_NOT_STORED - No event was available + */ +int ap_chsc_sei_nt0_get_event(void *res); + +bool ap_chsc_sei_nt0_have_event(void); + #endif diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h index 9283c94..d5e9aa4 100644 --- a/include/hw/s390x/cpu-topology.h +++ b/include/hw/s390x/cpu-topology.h @@ -13,7 +13,7 @@ #include "qemu/queue.h" #include "hw/boards.h" -#include "qapi/qapi-types-machine-target.h" +#include "qapi/qapi-types-machine-s390x.h" #define S390_TOPOLOGY_CPU_IFL 0x03 diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index 3d392b0..f023265 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -78,7 +78,7 @@ void vfio_address_space_insert(VFIOAddressSpace *space, int vfio_container_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly); + void *vaddr, bool readonly, MemoryRegion *mr); int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb, bool unmap_all); @@ -115,13 +115,57 @@ OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU) struct VFIOIOMMUClass { ObjectClass parent_class; - /* basic feature */ + /** + * @setup + * + * Perform basic setup of the container, including configuring IOMMU + * capabilities, IOVA ranges, supported page sizes, etc. + * + * @bcontainer: #VFIOContainerBase + * @errp: pointer to Error*, to store an error if it happens. + * + * Returns true to indicate success and false for error. + */ bool (*setup)(VFIOContainerBase *bcontainer, Error **errp); + + /** + * @listener_begin + * + * Called at the beginning of an address space update transaction. + * See #MemoryListener. + * + * @bcontainer: #VFIOContainerBase + */ void (*listener_begin)(VFIOContainerBase *bcontainer); + + /** + * @listener_commit + * + * Called at the end of an address space update transaction, + * See #MemoryListener. + * + * @bcontainer: #VFIOContainerBase + */ void (*listener_commit)(VFIOContainerBase *bcontainer); + + /** + * @dma_map + * + * Map an address range into the container. Note that the memory region is + * referenced within an RCU read lock region across this call. + * + * @bcontainer: #VFIOContainerBase to use + * @iova: start address to map + * @size: size of the range to map + * @vaddr: process virtual address of mapping + * @readonly: true if mapping should be readonly + * @mr: the memory region for this mapping + * + * Returns 0 to indicate success and -errno otherwise. + */ int (*dma_map)(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly); + void *vaddr, bool readonly, MemoryRegion *mr); /** * @dma_unmap * @@ -132,12 +176,38 @@ struct VFIOIOMMUClass { * @size: size of the range to unmap * @iotlb: The IOMMU TLB mapping entry (or NULL) * @unmap_all: if set, unmap the entire address space + * + * Returns 0 to indicate success and -errno otherwise. */ int (*dma_unmap)(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb, bool unmap_all); + + + /** + * @attach_device + * + * Associate the given device with a container and do some related + * initialization of the device context. + * + * @name: name of the device + * @vbasedev: the device + * @as: address space to use + * @errp: pointer to Error*, to store an error if it happens. + * + * Returns true to indicate success and false for error. + */ bool (*attach_device)(const char *name, VFIODevice *vbasedev, AddressSpace *as, Error **errp); + + /* + * @detach_device + * + * Detach the given device from its container and clean up any necessary + * state. + * + * @vbasedev: the device to disassociate + */ void (*detach_device)(VFIODevice *vbasedev); /* migration feature */ @@ -152,7 +222,7 @@ struct VFIOIOMMUClass { * @start: indicates whether to start or stop dirty pages tracking * @errp: pointer to Error*, to store an error if it happens. * - * Returns zero to indicate success and negative for error + * Returns zero to indicate success and negative for error. */ int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer, bool start, Error **errp); @@ -167,7 +237,7 @@ struct VFIOIOMMUClass { * @size: size of iova range * @errp: pointer to Error*, to store an error if it happens. * - * Returns zero to indicate success and negative for error + * Returns zero to indicate success and negative for error. */ int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp); @@ -183,4 +253,10 @@ struct VFIOIOMMUClass { void (*release)(VFIOContainerBase *bcontainer); }; +VFIORamDiscardListener *vfio_find_ram_discard_listener( + VFIOContainerBase *bcontainer, MemoryRegionSection *section); + +void vfio_container_region_add(VFIOContainerBase *bcontainer, + MemoryRegionSection *section, bool cpr_remap); + #endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ diff --git a/include/hw/vfio/vfio-container.h b/include/hw/vfio/vfio-container.h index afc498d..21e5807 100644 --- a/include/hw/vfio/vfio-container.h +++ b/include/hw/vfio/vfio-container.h @@ -10,6 +10,7 @@ #define HW_VFIO_CONTAINER_H #include "hw/vfio/vfio-container-base.h" +#include "hw/vfio/vfio-cpr.h" typedef struct VFIOContainer VFIOContainer; typedef struct VFIODevice VFIODevice; @@ -29,6 +30,7 @@ typedef struct VFIOContainer { int fd; /* /dev/vfio/vfio, empowered by the attached groups */ unsigned iommu_type; QLIST_HEAD(, VFIOGroup) group_list; + VFIOContainerCPR cpr; } VFIOContainer; OBJECT_DECLARE_SIMPLE_TYPE(VFIOContainer, VFIO_IOMMU_LEGACY); diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h new file mode 100644 index 0000000..8bf85b9 --- /dev/null +++ b/include/hw/vfio/vfio-cpr.h @@ -0,0 +1,57 @@ +/* + * VFIO CPR + * + * Copyright (c) 2025 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_VFIO_VFIO_CPR_H +#define HW_VFIO_VFIO_CPR_H + +#include "migration/misc.h" +#include "system/memory.h" + +struct VFIOContainer; +struct VFIOContainerBase; +struct VFIOGroup; + +typedef struct VFIOContainerCPR { + Error *blocker; + bool vaddr_unmapped; + NotifierWithReturn transfer_notifier; + MemoryListener remap_listener; + int (*saved_dma_map)(const struct VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + void *vaddr, bool readonly, MemoryRegion *mr); +} VFIOContainerCPR; + +typedef struct VFIODeviceCPR { + Error *mdev_blocker; +} VFIODeviceCPR; + +bool vfio_legacy_cpr_register_container(struct VFIOContainer *container, + Error **errp); +void vfio_legacy_cpr_unregister_container(struct VFIOContainer *container); + +int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, MigrationEvent *e, + Error **errp); + +bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer, + Error **errp); +void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer); + +int vfio_cpr_group_get_device_fd(int d, const char *name); + +bool vfio_cpr_container_match(struct VFIOContainer *container, + struct VFIOGroup *group, int fd); + +void vfio_cpr_giommu_remap(struct VFIOContainerBase *bcontainer, + MemoryRegionSection *section); + +bool vfio_cpr_ram_discard_register_listener( + struct VFIOContainerBase *bcontainer, MemoryRegionSection *section); + +extern const VMStateDescription vfio_cpr_pci_vmstate; + +#endif /* HW_VFIO_VFIO_CPR_H */ diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h index 8bcb3c1..d45e5a6 100644 --- a/include/hw/vfio/vfio-device.h +++ b/include/hw/vfio/vfio-device.h @@ -28,6 +28,7 @@ #endif #include "system/system.h" #include "hw/vfio/vfio-container-base.h" +#include "hw/vfio/vfio-cpr.h" #include "system/host_iommu_device.h" #include "system/iommufd.h" @@ -66,6 +67,7 @@ typedef struct VFIODevice { OnOffAuto enable_migration; OnOffAuto migration_multifd_transfer; bool migration_events; + bool use_region_fds; VFIODeviceOps *ops; VFIODeviceIOOps *io_ops; unsigned int num_irqs; @@ -84,6 +86,8 @@ typedef struct VFIODevice { VFIOIOASHwpt *hwpt; QLIST_ENTRY(VFIODevice) hwpt_next; struct vfio_region_info **reginfo; + int *region_fds; + VFIODeviceCPR cpr; } VFIODevice; struct VFIODeviceOps { @@ -164,36 +168,64 @@ struct VFIODeviceIOOps { * @device_feature * * Fill in feature info for the given device. + * + * @vdev: #VFIODevice to use + * @feat: feature information to fill in + * + * Returns 0 on success or -errno. */ - int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *); + int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *feat); /** * @get_region_info * - * Fill in @info with information on the region given by @info->index. + * Get the information for a given region on the device. + * + * @vdev: #VFIODevice to use + * @info: set @info->index to the region index to look up; the rest of the + * struct will be filled in on success + * @fd: pointer to the fd for the region; will be -1 if not found + * + * Returns 0 on success or -errno. */ int (*get_region_info)(VFIODevice *vdev, - struct vfio_region_info *info); + struct vfio_region_info *info, int *fd); /** * @get_irq_info * - * Fill in @irq with information on the IRQ given by @info->index. + * @vdev: #VFIODevice to use + * @irq: set @irq->index to the IRQ index to look up; the rest of the struct + * will be filled in on success + * + * Returns 0 on success or -errno. */ int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq); /** * @set_irqs * - * Configure IRQs as defined by @irqs. + * Configure IRQs. + * + * @vdev: #VFIODevice to use + * @irqs: IRQ configuration as defined by VFIO docs. + * + * Returns 0 on success or -errno. */ int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs); /** * @region_read * - * Read @size bytes from the region @nr at offset @off into the buffer - * @data. + * Read part of a region. + * + * @vdev: #VFIODevice to use + * @nr: region index + * @off: offset within the region + * @size: size in bytes to read + * @data: buffer to read into + * + * Returns number of bytes read on success or -errno. */ int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size, void *data); @@ -201,11 +233,18 @@ struct VFIODeviceIOOps { /** * @region_write * - * Write @size bytes to the region @nr at offset @off from the buffer - * @data. + * Write part of a region. + * + * @vdev: #VFIODevice to use + * @nr: region index + * @off: offset within the region + * @size: size in bytes to write + * @data: buffer to write from + * + * Returns number of bytes write on success or -errno. */ int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size, - void *data); + void *data, bool post); }; void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer, diff --git a/include/hw/vfio/vfio-region.h b/include/hw/vfio/vfio-region.h index cbffb26..ede6e0c 100644 --- a/include/hw/vfio/vfio-region.h +++ b/include/hw/vfio/vfio-region.h @@ -29,6 +29,7 @@ typedef struct VFIORegion { uint32_t nr_mmaps; VFIOMmap *mmaps; uint8_t nr; /* cache the region number for debug */ + bool post_wr; /* writes can be posted */ } VFIORegion; diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 0a9575b..449bf5c 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -43,7 +43,21 @@ typedef struct vhost_vdpa_shared { struct vhost_vdpa_iova_range iova_range; QLIST_HEAD(, vdpa_iommu) iommu_list; - /* IOVA mapping used by the Shadow Virtqueue */ + /* + * IOVA mapping used by the Shadow Virtqueue + * + * It is shared among all ASID for simplicity, whether CVQ shares ASID with + * guest or not: + * - Memory listener need access to guest's memory addresses allocated in + * the IOVA tree. + * - There should be plenty of IOVA address space for both ASID not to + * worry about collisions between them. Guest's translations are still + * validated with virtio virtqueue_pop so there is no risk for the guest + * to access memory that it shouldn't. + * + * To allocate a iova tree per ASID is doable but it complicates the code + * and it is not worth it for the moment. + */ VhostIOVATree *iova_tree; /* Copy of backend features */ @@ -51,6 +65,12 @@ typedef struct vhost_vdpa_shared { bool iotlb_batch_begin_sent; + /* + * The memory listener has been registered, so DMA maps have been sent to + * the device. + */ + bool listener_registered; + /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */ bool shadow_data; diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h index bc4f787..e0ab31b 100644 --- a/include/hw/virtio/virtio-mem.h +++ b/include/hw/virtio/virtio-mem.h @@ -134,7 +134,7 @@ struct VirtioMemSystemReset { struct VirtIOMEMClass { /* private */ - VirtIODevice parent; + VirtioDeviceClass parent_class; /* public */ void (*fill_device_info)(const VirtIOMEM *vmen, VirtioMEMDeviceInfo *vi); diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h index 1dbc385..eab5394 100644 --- a/include/hw/virtio/virtio-pci.h +++ b/include/hw/virtio/virtio-pci.h @@ -32,9 +32,7 @@ DECLARE_OBJ_CHECKERS(VirtioPCIBusState, VirtioPCIBusClass, enum { VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, - VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, - VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, VIRTIO_PCI_FLAG_ATS_BIT, VIRTIO_PCI_FLAG_INIT_DEVERR_BIT, @@ -54,12 +52,6 @@ enum { * vcpu thread using ioeventfd for some devices. */ #define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT) -/* virtio version flags */ -#define VIRTIO_PCI_FLAG_DISABLE_PCIE (1 << VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT) - -/* migrate extra state */ -#define VIRTIO_PCI_FLAG_MIGRATE_EXTRA (1 << VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT) - /* have pio notification for modern device ? */ #define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY \ (1 << VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT) diff --git a/include/hw/virtio/virtio-pmem.h b/include/hw/virtio/virtio-pmem.h index fc4fd1f..9cce600 100644 --- a/include/hw/virtio/virtio-pmem.h +++ b/include/hw/virtio/virtio-pmem.h @@ -36,7 +36,7 @@ struct VirtIOPMEM { struct VirtIOPMEMClass { /* private */ - VirtIODevice parent; + VirtioDeviceClass parent_class; /* public */ void (*fill_device_info)(const VirtIOPMEM *pmem, VirtioPMEMDeviceInfo *vi); diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h index ab15577..a88cf8b 100644 --- a/include/io/channel-socket.h +++ b/include/io/channel-socket.h @@ -261,5 +261,18 @@ QIOChannelSocket * qio_channel_socket_accept(QIOChannelSocket *ioc, Error **errp); +/** + * qio_channel_socket_set_send_buffer: + * @ioc: the socket channel object + * @size: buffer size + * @errp: pointer to a NULL-initialized error object + * + * Set the underlying socket send buffer size. + * + * Retruns: 0 on success, or -1 on error. + */ +int qio_channel_socket_set_send_buffer(QIOChannelSocket *ioc, + size_t size, + Error **errp); #endif /* QIO_CHANNEL_SOCKET_H */ diff --git a/include/migration/cpr.h b/include/migration/cpr.h index 7561fc7..07858e9 100644 --- a/include/migration/cpr.h +++ b/include/migration/cpr.h @@ -18,6 +18,9 @@ void cpr_save_fd(const char *name, int id, int fd); void cpr_delete_fd(const char *name, int id); int cpr_find_fd(const char *name, int id); +void cpr_resave_fd(const char *name, int id, int fd); +int cpr_open_fd(const char *path, int flags, const char *name, int id, + Error **errp); MigMode cpr_get_incoming_mode(void); void cpr_set_incoming_mode(MigMode mode); @@ -28,6 +31,8 @@ int cpr_state_load(MigrationChannel *channel, Error **errp); void cpr_state_close(void); struct QIOChannel *cpr_state_ioc(void); +bool cpr_incoming_needed(void *opaque); + QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp); QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp); diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index a1dfab4..1ff7bd9 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -155,7 +155,11 @@ enum VMStateFlags { }; typedef enum { - MIG_PRI_DEFAULT = 0, + MIG_PRI_UNINITIALIZED = 0, /* An uninitialized priority field maps to */ + /* MIG_PRI_DEFAULT in save_state_priority */ + + MIG_PRI_LOW, /* Must happen after default */ + MIG_PRI_DEFAULT, MIG_PRI_IOMMU, /* Must happen before PCI devices */ MIG_PRI_PCI_BUS, /* Must happen before IOMMU */ MIG_PRI_VIRTIO_MEM, /* Must happen before IOMMU */ diff --git a/include/qapi/error-internal.h b/include/qapi/error-internal.h new file mode 100644 index 0000000..ff18a20 --- /dev/null +++ b/include/qapi/error-internal.h @@ -0,0 +1,35 @@ +/* + * QEMU Error Objects - struct definition + * + * Copyright IBM, Corp. 2011 + * Copyright (C) 2011-2015 Red Hat, Inc. + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * Markus Armbruster <armbru@redhat.com>, + * + * This work is licensed under the terms of the GNU LGPL, version 2. See + * the COPYING.LIB file in the top-level directory. + */ + +#ifndef QAPI_ERROR_INTERNAL_H + +struct Error +{ + char *msg; + ErrorClass err_class; + + /* Used for error_abort only, may be NULL. */ + const char *func; + + /* + * src might be NUL-terminated or not. If it is, src_len is negative. + * If it is not, src_len is the length. + */ + const char *src; + int src_len; + int line; + GString *hint; +}; + +#endif diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index 496dac5..65b8995 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -208,6 +208,26 @@ #endif /* + * Disable -ftrivial-auto-var-init on a local variable. + * + * Use this in cases where there a method in the device I/O path (or other + * important hot paths), that has large variables on the stack. A rule of + * thumb is that "large" means a method with 4kb data in the local stack + * frame. Any variables which are KB in size, should be annotated with this + * attribute, to pre-emptively eliminate any potential overhead from the + * compiler's implicit zero'ing of memory. + * + * Given that this turns off a security hardening feature, when using this + * to flag variables, it is important that the code is double-checked to + * ensure there is no possible use of uninitialized data in the method. + */ +#if __has_attribute(uninitialized) +# define QEMU_UNINITIALIZED __attribute__((uninitialized)) +#else +# define QEMU_UNINITIALIZED +#endif + +/* * http://clang.llvm.org/docs/ThreadSafetyAnalysis.html * * TSA is available since clang 3.6-ish. diff --git a/include/qemu/futex.h b/include/qemu/futex.h index 91ae889..607613e 100644 --- a/include/qemu/futex.h +++ b/include/qemu/futex.h @@ -1,5 +1,5 @@ /* - * Wrappers around Linux futex syscall + * Wrappers around Linux futex syscall and similar * * Copyright Red Hat, Inc. 2017 * @@ -11,17 +11,35 @@ * */ +/* + * Note that a wake-up can also be caused by common futex usage patterns in + * unrelated code that happened to have previously used the futex word's + * memory location (e.g., typical futex-based implementations of Pthreads + * mutexes can cause this under some conditions). Therefore, qemu_futex_wait() + * callers should always conservatively assume that it is a spurious wake-up, + * and use the futex word's value (i.e., the user-space synchronization scheme) + * to decide whether to continue to block or not. + */ + #ifndef QEMU_FUTEX_H #define QEMU_FUTEX_H +#define HAVE_FUTEX + +#ifdef CONFIG_LINUX #include <sys/syscall.h> #include <linux/futex.h> #define qemu_futex(...) syscall(__NR_futex, __VA_ARGS__) -static inline void qemu_futex_wake(void *f, int n) +static inline void qemu_futex_wake_all(void *f) { - qemu_futex(f, FUTEX_WAKE, n, NULL, NULL, 0); + qemu_futex(f, FUTEX_WAKE, INT_MAX, NULL, NULL, 0); +} + +static inline void qemu_futex_wake_single(void *f) +{ + qemu_futex(f, FUTEX_WAKE, 1, NULL, NULL, 0); } static inline void qemu_futex_wait(void *f, unsigned val) @@ -37,5 +55,25 @@ static inline void qemu_futex_wait(void *f, unsigned val) } } } +#elif defined(CONFIG_WIN32) +#include <synchapi.h> + +static inline void qemu_futex_wake_all(void *f) +{ + WakeByAddressAll(f); +} + +static inline void qemu_futex_wake_single(void *f) +{ + WakeByAddressSingle(f); +} + +static inline void qemu_futex_wait(void *f, unsigned val) +{ + WaitOnAddress(f, &val, sizeof(val), INFINITE); +} +#else +#undef HAVE_FUTEX +#endif #endif /* QEMU_FUTEX_H */ diff --git a/include/qemu/lockcnt.h b/include/qemu/lockcnt.h index f4b62a3..5a2800e 100644 --- a/include/qemu/lockcnt.h +++ b/include/qemu/lockcnt.h @@ -17,7 +17,7 @@ typedef struct QemuLockCnt QemuLockCnt; struct QemuLockCnt { -#ifndef CONFIG_LINUX +#ifndef HAVE_FUTEX QemuMutex mutex; #endif unsigned count; diff --git a/include/qemu/thread-posix.h b/include/qemu/thread-posix.h index 5f2f3d1..758808b 100644 --- a/include/qemu/thread-posix.h +++ b/include/qemu/thread-posix.h @@ -32,15 +32,6 @@ struct QemuSemaphore { unsigned int count; }; -struct QemuEvent { -#ifndef __linux__ - pthread_mutex_t lock; - pthread_cond_t cond; -#endif - unsigned value; - bool initialized; -}; - struct QemuThread { pthread_t thread; }; diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h index d95af44..da9e732 100644 --- a/include/qemu/thread-win32.h +++ b/include/qemu/thread-win32.h @@ -28,12 +28,6 @@ struct QemuSemaphore { bool initialized; }; -struct QemuEvent { - int value; - HANDLE event; - bool initialized; -}; - typedef struct QemuThreadData QemuThreadData; struct QemuThread { QemuThreadData *data; diff --git a/include/qemu/thread.h b/include/qemu/thread.h index 6f800aa..f0302ed 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -3,13 +3,32 @@ #include "qemu/processor.h" #include "qemu/atomic.h" +#include "qemu/futex.h" typedef struct QemuCond QemuCond; typedef struct QemuSemaphore QemuSemaphore; -typedef struct QemuEvent QemuEvent; typedef struct QemuLockCnt QemuLockCnt; typedef struct QemuThread QemuThread; +/* + * QemuEvent + * ========= + * + * QemuEvent is an implementation of Win32 manual-reset event object. + * For details, refer to: + * https://learn.microsoft.com/en-us/windows/win32/sync/using-event-objects + * + * QemuEvent is more lightweight than QemuSemaphore when HAVE_FUTEX is defined. + */ +typedef struct QemuEvent { +#ifndef HAVE_FUTEX + pthread_mutex_t lock; + pthread_cond_t cond; +#endif + unsigned value; + bool initialized; +} QemuEvent; + #ifdef _WIN32 #include "qemu/thread-win32.h" #else diff --git a/include/standard-headers/asm-x86/setup_data.h b/include/standard-headers/asm-x86/setup_data.h index a483d72..2e446c1d 100644 --- a/include/standard-headers/asm-x86/setup_data.h +++ b/include/standard-headers/asm-x86/setup_data.h @@ -13,7 +13,8 @@ #define SETUP_CC_BLOB 7 #define SETUP_IMA 8 #define SETUP_RNG_SEED 9 -#define SETUP_ENUM_MAX SETUP_RNG_SEED +#define SETUP_KEXEC_KHO 10 +#define SETUP_ENUM_MAX SETUP_KEXEC_KHO #define SETUP_INDIRECT (1<<31) #define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) @@ -78,6 +79,16 @@ struct ima_setup_data { uint64_t size; } QEMU_PACKED; +/* + * Locations of kexec handover metadata + */ +struct kho_data { + uint64_t fdt_addr; + uint64_t fdt_size; + uint64_t scratch_addr; + uint64_t scratch_size; +} QEMU_PACKED; + #endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_SETUP_DATA_H */ diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h index a8b759d..c8309d3 100644 --- a/include/standard-headers/drm/drm_fourcc.h +++ b/include/standard-headers/drm/drm_fourcc.h @@ -421,6 +421,7 @@ extern "C" { #define DRM_FORMAT_MOD_VENDOR_ALLWINNER 0x09 #define DRM_FORMAT_MOD_VENDOR_AMLOGIC 0x0a #define DRM_FORMAT_MOD_VENDOR_MTK 0x0b +#define DRM_FORMAT_MOD_VENDOR_APPLE 0x0c /* add more to the end as needed */ @@ -1494,6 +1495,50 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) #define DRM_FORMAT_MOD_MTK_16L_32S_TILE DRM_FORMAT_MOD_MTK(MTK_FMT_MOD_TILE_16L32S) /* + * Apple GPU-tiled layouts. + * + * Apple GPUs support nonlinear tilings with optional lossless compression. + * + * GPU-tiled images are divided into 16KiB tiles: + * + * Bytes per pixel Tile size + * --------------- --------- + * 1 128x128 + * 2 128x64 + * 4 64x64 + * 8 64x32 + * 16 32x32 + * + * Tiles are raster-order. Pixels within a tile are interleaved (Morton order). + * + * Compressed images pad the body to 128-bytes and are immediately followed by a + * metadata section. The metadata section rounds the image dimensions to + * powers-of-two and contains 8 bytes for each 16x16 compression subtile. + * Subtiles are interleaved (Morton order). + * + * All images are 128-byte aligned. + * + * These layouts fundamentally do not have meaningful strides. No matter how we + * specify strides for these layouts, userspace unaware of Apple image layouts + * will be unable to use correctly the specified stride for any purpose. + * Userspace aware of the image layouts do not use strides. The most "correct" + * convention would be setting the image stride to 0. Unfortunately, some + * software assumes the stride is at least (width * bytes per pixel). We + * therefore require that stride equals (width * bytes per pixel). Since the + * stride is arbitrary here, we pick the simplest convention. + * + * Although containing two sections, compressed image layouts are treated in + * software as a single plane. This is modelled after AFBC, a similar + * scheme. Attempting to separate the sections to be "explicit" in DRM would + * only generate more confusion, as software does not treat the image this way. + * + * For detailed information on the hardware image layouts, see + * https://docs.mesa3d.org/drivers/asahi.html#image-layouts + */ +#define DRM_FORMAT_MOD_APPLE_GPU_TILED fourcc_mod_code(APPLE, 1) +#define DRM_FORMAT_MOD_APPLE_GPU_TILED_COMPRESSED fourcc_mod_code(APPLE, 2) + +/* * AMD modifiers * * Memory layout: diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h index 5d1ad5f..cef0d20 100644 --- a/include/standard-headers/linux/ethtool.h +++ b/include/standard-headers/linux/ethtool.h @@ -2295,71 +2295,75 @@ static inline int ethtool_validate_duplex(uint8_t duplex) #define RXH_XFRM_SYM_OR_XOR (1 << 1) #define RXH_XFRM_NO_CHANGE 0xff -/* L2-L4 network traffic flow types */ -#define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */ -#define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */ -#define SCTP_V4_FLOW 0x03 /* hash or spec (sctp_ip4_spec) */ -#define AH_ESP_V4_FLOW 0x04 /* hash only */ -#define TCP_V6_FLOW 0x05 /* hash or spec (tcp_ip6_spec; nfc only) */ -#define UDP_V6_FLOW 0x06 /* hash or spec (udp_ip6_spec; nfc only) */ -#define SCTP_V6_FLOW 0x07 /* hash or spec (sctp_ip6_spec; nfc only) */ -#define AH_ESP_V6_FLOW 0x08 /* hash only */ -#define AH_V4_FLOW 0x09 /* hash or spec (ah_ip4_spec) */ -#define ESP_V4_FLOW 0x0a /* hash or spec (esp_ip4_spec) */ -#define AH_V6_FLOW 0x0b /* hash or spec (ah_ip6_spec; nfc only) */ -#define ESP_V6_FLOW 0x0c /* hash or spec (esp_ip6_spec; nfc only) */ -#define IPV4_USER_FLOW 0x0d /* spec only (usr_ip4_spec) */ -#define IP_USER_FLOW IPV4_USER_FLOW -#define IPV6_USER_FLOW 0x0e /* spec only (usr_ip6_spec; nfc only) */ -#define IPV4_FLOW 0x10 /* hash only */ -#define IPV6_FLOW 0x11 /* hash only */ -#define ETHER_FLOW 0x12 /* spec only (ether_spec) */ - -/* Used for GTP-U IPv4 and IPv6. - * The format of GTP packets only includes - * elements such as TEID and GTP version. - * It is primarily intended for data communication of the UE. - */ -#define GTPU_V4_FLOW 0x13 /* hash only */ -#define GTPU_V6_FLOW 0x14 /* hash only */ - -/* Use for GTP-C IPv4 and v6. - * The format of these GTP packets does not include TEID. - * Primarily expected to be used for communication - * to create sessions for UE data communication, - * commonly referred to as CSR (Create Session Request). - */ -#define GTPC_V4_FLOW 0x15 /* hash only */ -#define GTPC_V6_FLOW 0x16 /* hash only */ - -/* Use for GTP-C IPv4 and v6. - * Unlike GTPC_V4_FLOW, the format of these GTP packets includes TEID. - * After session creation, it becomes this packet. - * This is mainly used for requests to realize UE handover. - */ -#define GTPC_TEID_V4_FLOW 0x17 /* hash only */ -#define GTPC_TEID_V6_FLOW 0x18 /* hash only */ - -/* Use for GTP-U and extended headers for the PSC (PDU Session Container). - * The format of these GTP packets includes TEID and QFI. - * In 5G communication using UPF (User Plane Function), - * data communication with this extended header is performed. - */ -#define GTPU_EH_V4_FLOW 0x19 /* hash only */ -#define GTPU_EH_V6_FLOW 0x1a /* hash only */ - -/* Use for GTP-U IPv4 and v6 PSC (PDU Session Container) extended headers. - * This differs from GTPU_EH_V(4|6)_FLOW in that it is distinguished by - * UL/DL included in the PSC. - * There are differences in the data included based on Downlink/Uplink, - * and can be used to distinguish packets. - * The functions described so far are useful when you want to - * handle communication from the mobile network in UPF, PGW, etc. - */ -#define GTPU_UL_V4_FLOW 0x1b /* hash only */ -#define GTPU_UL_V6_FLOW 0x1c /* hash only */ -#define GTPU_DL_V4_FLOW 0x1d /* hash only */ -#define GTPU_DL_V6_FLOW 0x1e /* hash only */ +enum { + /* L2-L4 network traffic flow types */ + TCP_V4_FLOW = 0x01, /* hash or spec (tcp_ip4_spec) */ + UDP_V4_FLOW = 0x02, /* hash or spec (udp_ip4_spec) */ + SCTP_V4_FLOW = 0x03, /* hash or spec (sctp_ip4_spec) */ + AH_ESP_V4_FLOW = 0x04, /* hash only */ + TCP_V6_FLOW = 0x05, /* hash or spec (tcp_ip6_spec; nfc only) */ + UDP_V6_FLOW = 0x06, /* hash or spec (udp_ip6_spec; nfc only) */ + SCTP_V6_FLOW = 0x07, /* hash or spec (sctp_ip6_spec; nfc only) */ + AH_ESP_V6_FLOW = 0x08, /* hash only */ + AH_V4_FLOW = 0x09, /* hash or spec (ah_ip4_spec) */ + ESP_V4_FLOW = 0x0a, /* hash or spec (esp_ip4_spec) */ + AH_V6_FLOW = 0x0b, /* hash or spec (ah_ip6_spec; nfc only) */ + ESP_V6_FLOW = 0x0c, /* hash or spec (esp_ip6_spec; nfc only) */ + IPV4_USER_FLOW = 0x0d, /* spec only (usr_ip4_spec) */ + IP_USER_FLOW = IPV4_USER_FLOW, + IPV6_USER_FLOW = 0x0e, /* spec only (usr_ip6_spec; nfc only) */ + IPV4_FLOW = 0x10, /* hash only */ + IPV6_FLOW = 0x11, /* hash only */ + ETHER_FLOW = 0x12, /* spec only (ether_spec) */ + + /* Used for GTP-U IPv4 and IPv6. + * The format of GTP packets only includes + * elements such as TEID and GTP version. + * It is primarily intended for data communication of the UE. + */ + GTPU_V4_FLOW = 0x13, /* hash only */ + GTPU_V6_FLOW = 0x14, /* hash only */ + + /* Use for GTP-C IPv4 and v6. + * The format of these GTP packets does not include TEID. + * Primarily expected to be used for communication + * to create sessions for UE data communication, + * commonly referred to as CSR (Create Session Request). + */ + GTPC_V4_FLOW = 0x15, /* hash only */ + GTPC_V6_FLOW = 0x16, /* hash only */ + + /* Use for GTP-C IPv4 and v6. + * Unlike GTPC_V4_FLOW, the format of these GTP packets includes TEID. + * After session creation, it becomes this packet. + * This is mainly used for requests to realize UE handover. + */ + GTPC_TEID_V4_FLOW = 0x17, /* hash only */ + GTPC_TEID_V6_FLOW = 0x18, /* hash only */ + + /* Use for GTP-U and extended headers for the PSC (PDU Session Container). + * The format of these GTP packets includes TEID and QFI. + * In 5G communication using UPF (User Plane Function), + * data communication with this extended header is performed. + */ + GTPU_EH_V4_FLOW = 0x19, /* hash only */ + GTPU_EH_V6_FLOW = 0x1a, /* hash only */ + + /* Use for GTP-U IPv4 and v6 PSC (PDU Session Container) extended headers. + * This differs from GTPU_EH_V(4|6)_FLOW in that it is distinguished by + * UL/DL included in the PSC. + * There are differences in the data included based on Downlink/Uplink, + * and can be used to distinguish packets. + * The functions described so far are useful when you want to + * handle communication from the mobile network in UPF, PGW, etc. + */ + GTPU_UL_V4_FLOW = 0x1b, /* hash only */ + GTPU_UL_V6_FLOW = 0x1c, /* hash only */ + GTPU_DL_V4_FLOW = 0x1d, /* hash only */ + GTPU_DL_V6_FLOW = 0x1e, /* hash only */ + + __FLOW_TYPE_COUNT, +}; /* Flag to enable additional fields in struct ethtool_rx_flow_spec */ #define FLOW_EXT 0x80000000 diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h index a2b5815..d8b2fd6 100644 --- a/include/standard-headers/linux/fuse.h +++ b/include/standard-headers/linux/fuse.h @@ -232,6 +232,9 @@ * * 7.43 * - add FUSE_REQUEST_TIMEOUT + * + * 7.44 + * - add FUSE_NOTIFY_INC_EPOCH */ #ifndef _LINUX_FUSE_H @@ -263,7 +266,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 43 +#define FUSE_KERNEL_MINOR_VERSION 44 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -667,6 +670,7 @@ enum fuse_notify_code { FUSE_NOTIFY_RETRIEVE = 5, FUSE_NOTIFY_DELETE = 6, FUSE_NOTIFY_RESEND = 7, + FUSE_NOTIFY_INC_EPOCH = 8, FUSE_NOTIFY_CODE_MAX, }; diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h index 09ba0ad..a82ff79 100644 --- a/include/standard-headers/linux/input-event-codes.h +++ b/include/standard-headers/linux/input-event-codes.h @@ -925,7 +925,8 @@ #define SW_MUTE_DEVICE 0x0e /* set = device disabled */ #define SW_PEN_INSERTED 0x0f /* set = pen inserted */ #define SW_MACHINE_COVER 0x10 /* set = cover closed */ -#define SW_MAX_ 0x10 +#define SW_USB_INSERT 0x11 /* set = USB audio device connected */ +#define SW_MAX_ 0x11 #define SW_CNT (SW_MAX_+1) /* diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h index ba32671..a3a3e94 100644 --- a/include/standard-headers/linux/pci_regs.h +++ b/include/standard-headers/linux/pci_regs.h @@ -750,7 +750,8 @@ #define PCI_EXT_CAP_ID_NPEM 0x29 /* Native PCIe Enclosure Management */ #define PCI_EXT_CAP_ID_PL_32GT 0x2A /* Physical Layer 32.0 GT/s */ #define PCI_EXT_CAP_ID_DOE 0x2E /* Data Object Exchange */ -#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_DOE +#define PCI_EXT_CAP_ID_PL_64GT 0x31 /* Physical Layer 64.0 GT/s */ +#define PCI_EXT_CAP_ID_MAX PCI_EXT_CAP_ID_PL_64GT #define PCI_EXT_CAP_DSN_SIZEOF 12 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40 @@ -1144,12 +1145,21 @@ #define PCI_DLF_CAP 0x04 /* Capabilities Register */ #define PCI_DLF_EXCHANGE_ENABLE 0x80000000 /* Data Link Feature Exchange Enable */ +/* Secondary PCIe Capability 8.0 GT/s */ +#define PCI_SECPCI_LE_CTRL 0x0c /* Lane Equalization Control Register */ + /* Physical Layer 16.0 GT/s */ #define PCI_PL_16GT_LE_CTRL 0x20 /* Lane Equalization Control Register */ #define PCI_PL_16GT_LE_CTRL_DSP_TX_PRESET_MASK 0x0000000F #define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK 0x000000F0 #define PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT 4 +/* Physical Layer 32.0 GT/s */ +#define PCI_PL_32GT_LE_CTRL 0x20 /* Lane Equalization Control Register */ + +/* Physical Layer 64.0 GT/s */ +#define PCI_PL_64GT_LE_CTRL 0x20 /* Lane Equalization Control Register */ + /* Native PCIe Enclosure Management */ #define PCI_NPEM_CAP 0x04 /* NPEM capability register */ #define PCI_NPEM_CAP_CAPABLE 0x00000001 /* NPEM Capable */ diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h index 6459fdb..00cd3f0 100644 --- a/include/standard-headers/linux/virtio_gpu.h +++ b/include/standard-headers/linux/virtio_gpu.h @@ -309,8 +309,9 @@ struct virtio_gpu_cmd_submit { #define VIRTIO_GPU_CAPSET_VIRGL 1 #define VIRTIO_GPU_CAPSET_VIRGL2 2 -/* 3 is reserved for gfxstream */ +#define VIRTIO_GPU_CAPSET_GFXSTREAM_VULKAN 3 #define VIRTIO_GPU_CAPSET_VENUS 4 +#define VIRTIO_GPU_CAPSET_CROSS_DOMAIN 5 #define VIRTIO_GPU_CAPSET_DRM 6 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ diff --git a/include/standard-headers/linux/virtio_pci.h b/include/standard-headers/linux/virtio_pci.h index 91fec6f..09e964e 100644 --- a/include/standard-headers/linux/virtio_pci.h +++ b/include/standard-headers/linux/virtio_pci.h @@ -246,6 +246,7 @@ struct virtio_pci_cfg_cap { #define VIRTIO_ADMIN_CMD_LIST_USE 0x1 /* Admin command group type. */ +#define VIRTIO_ADMIN_GROUP_TYPE_SELF 0x0 #define VIRTIO_ADMIN_GROUP_TYPE_SRIOV 0x1 /* Transitional device admin command. */ diff --git a/include/standard-headers/uefi/uefi.h b/include/standard-headers/uefi/uefi.h new file mode 100644 index 0000000..5256349 --- /dev/null +++ b/include/standard-headers/uefi/uefi.h @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2025 Intel Corporation + * + * Author: Isaku Yamahata <isaku.yamahata at gmail.com> + * <isaku.yamahata at intel.com> + * Xiaoyao Li <xiaoyao.li@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_I386_UEFI_H +#define HW_I386_UEFI_H + +/***************************************************************************/ +/* + * basic EFI definitions + * supplemented with UEFI Specification Version 2.8 (Errata A) + * released February 2020 + */ +/* UEFI integer is little endian */ + +typedef struct { + uint32_t Data1; + uint16_t Data2; + uint16_t Data3; + uint8_t Data4[8]; +} EFI_GUID; + +typedef enum { + EfiReservedMemoryType, + EfiLoaderCode, + EfiLoaderData, + EfiBootServicesCode, + EfiBootServicesData, + EfiRuntimeServicesCode, + EfiRuntimeServicesData, + EfiConventionalMemory, + EfiUnusableMemory, + EfiACPIReclaimMemory, + EfiACPIMemoryNVS, + EfiMemoryMappedIO, + EfiMemoryMappedIOPortSpace, + EfiPalCode, + EfiPersistentMemory, + EfiUnacceptedMemoryType, + EfiMaxMemoryType +} EFI_MEMORY_TYPE; + +#define EFI_HOB_HANDOFF_TABLE_VERSION 0x0009 + +#define EFI_HOB_TYPE_HANDOFF 0x0001 +#define EFI_HOB_TYPE_MEMORY_ALLOCATION 0x0002 +#define EFI_HOB_TYPE_RESOURCE_DESCRIPTOR 0x0003 +#define EFI_HOB_TYPE_GUID_EXTENSION 0x0004 +#define EFI_HOB_TYPE_FV 0x0005 +#define EFI_HOB_TYPE_CPU 0x0006 +#define EFI_HOB_TYPE_MEMORY_POOL 0x0007 +#define EFI_HOB_TYPE_FV2 0x0009 +#define EFI_HOB_TYPE_LOAD_PEIM_UNUSED 0x000A +#define EFI_HOB_TYPE_UEFI_CAPSULE 0x000B +#define EFI_HOB_TYPE_FV3 0x000C +#define EFI_HOB_TYPE_UNUSED 0xFFFE +#define EFI_HOB_TYPE_END_OF_HOB_LIST 0xFFFF + +typedef struct { + uint16_t HobType; + uint16_t HobLength; + uint32_t Reserved; +} EFI_HOB_GENERIC_HEADER; + +typedef uint64_t EFI_PHYSICAL_ADDRESS; +typedef uint32_t EFI_BOOT_MODE; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + uint32_t Version; + EFI_BOOT_MODE BootMode; + EFI_PHYSICAL_ADDRESS EfiMemoryTop; + EFI_PHYSICAL_ADDRESS EfiMemoryBottom; + EFI_PHYSICAL_ADDRESS EfiFreeMemoryTop; + EFI_PHYSICAL_ADDRESS EfiFreeMemoryBottom; + EFI_PHYSICAL_ADDRESS EfiEndOfHobList; +} EFI_HOB_HANDOFF_INFO_TABLE; + +#define EFI_RESOURCE_SYSTEM_MEMORY 0x00000000 +#define EFI_RESOURCE_MEMORY_MAPPED_IO 0x00000001 +#define EFI_RESOURCE_IO 0x00000002 +#define EFI_RESOURCE_FIRMWARE_DEVICE 0x00000003 +#define EFI_RESOURCE_MEMORY_MAPPED_IO_PORT 0x00000004 +#define EFI_RESOURCE_MEMORY_RESERVED 0x00000005 +#define EFI_RESOURCE_IO_RESERVED 0x00000006 +#define EFI_RESOURCE_MEMORY_UNACCEPTED 0x00000007 +#define EFI_RESOURCE_MAX_MEMORY_TYPE 0x00000008 + +#define EFI_RESOURCE_ATTRIBUTE_PRESENT 0x00000001 +#define EFI_RESOURCE_ATTRIBUTE_INITIALIZED 0x00000002 +#define EFI_RESOURCE_ATTRIBUTE_TESTED 0x00000004 +#define EFI_RESOURCE_ATTRIBUTE_SINGLE_BIT_ECC 0x00000008 +#define EFI_RESOURCE_ATTRIBUTE_MULTIPLE_BIT_ECC 0x00000010 +#define EFI_RESOURCE_ATTRIBUTE_ECC_RESERVED_1 0x00000020 +#define EFI_RESOURCE_ATTRIBUTE_ECC_RESERVED_2 0x00000040 +#define EFI_RESOURCE_ATTRIBUTE_READ_PROTECTED 0x00000080 +#define EFI_RESOURCE_ATTRIBUTE_WRITE_PROTECTED 0x00000100 +#define EFI_RESOURCE_ATTRIBUTE_EXECUTION_PROTECTED 0x00000200 +#define EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE 0x00000400 +#define EFI_RESOURCE_ATTRIBUTE_WRITE_COMBINEABLE 0x00000800 +#define EFI_RESOURCE_ATTRIBUTE_WRITE_THROUGH_CACHEABLE 0x00001000 +#define EFI_RESOURCE_ATTRIBUTE_WRITE_BACK_CACHEABLE 0x00002000 +#define EFI_RESOURCE_ATTRIBUTE_16_BIT_IO 0x00004000 +#define EFI_RESOURCE_ATTRIBUTE_32_BIT_IO 0x00008000 +#define EFI_RESOURCE_ATTRIBUTE_64_BIT_IO 0x00010000 +#define EFI_RESOURCE_ATTRIBUTE_UNCACHED_EXPORTED 0x00020000 +#define EFI_RESOURCE_ATTRIBUTE_READ_ONLY_PROTECTED 0x00040000 +#define EFI_RESOURCE_ATTRIBUTE_READ_ONLY_PROTECTABLE 0x00080000 +#define EFI_RESOURCE_ATTRIBUTE_READ_PROTECTABLE 0x00100000 +#define EFI_RESOURCE_ATTRIBUTE_WRITE_PROTECTABLE 0x00200000 +#define EFI_RESOURCE_ATTRIBUTE_EXECUTION_PROTECTABLE 0x00400000 +#define EFI_RESOURCE_ATTRIBUTE_PERSISTENT 0x00800000 +#define EFI_RESOURCE_ATTRIBUTE_PERSISTABLE 0x01000000 +#define EFI_RESOURCE_ATTRIBUTE_MORE_RELIABLE 0x02000000 + +typedef uint32_t EFI_RESOURCE_TYPE; +typedef uint32_t EFI_RESOURCE_ATTRIBUTE_TYPE; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + EFI_GUID Owner; + EFI_RESOURCE_TYPE ResourceType; + EFI_RESOURCE_ATTRIBUTE_TYPE ResourceAttribute; + EFI_PHYSICAL_ADDRESS PhysicalStart; + uint64_t ResourceLength; +} EFI_HOB_RESOURCE_DESCRIPTOR; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + EFI_GUID Name; + + /* guid specific data follows */ +} EFI_HOB_GUID_TYPE; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + EFI_PHYSICAL_ADDRESS BaseAddress; + uint64_t Length; +} EFI_HOB_FIRMWARE_VOLUME; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + EFI_PHYSICAL_ADDRESS BaseAddress; + uint64_t Length; + EFI_GUID FvName; + EFI_GUID FileName; +} EFI_HOB_FIRMWARE_VOLUME2; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + EFI_PHYSICAL_ADDRESS BaseAddress; + uint64_t Length; + uint32_t AuthenticationStatus; + bool ExtractedFv; + EFI_GUID FvName; + EFI_GUID FileName; +} EFI_HOB_FIRMWARE_VOLUME3; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + uint8_t SizeOfMemorySpace; + uint8_t SizeOfIoSpace; + uint8_t Reserved[6]; +} EFI_HOB_CPU; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; +} EFI_HOB_MEMORY_POOL; + +typedef struct { + EFI_HOB_GENERIC_HEADER Header; + + EFI_PHYSICAL_ADDRESS BaseAddress; + uint64_t Length; +} EFI_HOB_UEFI_CAPSULE; + +#define EFI_HOB_OWNER_ZERO \ + ((EFI_GUID){ 0x00000000, 0x0000, 0x0000, \ + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }) + +#endif diff --git a/include/system/host_iommu_device.h b/include/system/host_iommu_device.h index 809cced..ab849a4 100644 --- a/include/system/host_iommu_device.h +++ b/include/system/host_iommu_device.h @@ -14,6 +14,13 @@ #include "qom/object.h" #include "qapi/error.h" +#ifdef CONFIG_LINUX +#include "linux/iommufd.h" + +typedef union VendorCaps { + struct iommu_hw_info_vtd vtd; + struct iommu_hw_info_arm_smmuv3 smmuv3; +} VendorCaps; /** * struct HostIOMMUDeviceCaps - Define host IOMMU device capabilities. @@ -22,11 +29,17 @@ * * @hw_caps: host platform IOMMU capabilities (e.g. on IOMMUFD this represents * the @out_capabilities value returned from IOMMU_GET_HW_INFO ioctl) + * + * @vendor_caps: host platform IOMMU vendor specific capabilities (e.g. on + * IOMMUFD this represents a user-space buffer filled by kernel + * with host IOMMU @type specific hardware information data) */ typedef struct HostIOMMUDeviceCaps { uint32_t type; uint64_t hw_caps; + VendorCaps vendor_caps; } HostIOMMUDeviceCaps; +#endif #define TYPE_HOST_IOMMU_DEVICE "host-iommu-device" OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE) @@ -38,7 +51,9 @@ struct HostIOMMUDevice { void *agent; /* pointer to agent device, ie. VFIO or VDPA device */ PCIBus *aliased_bus; int aliased_devfn; +#ifdef CONFIG_LINUX HostIOMMUDeviceCaps caps; +#endif }; /** diff --git a/include/system/hvf_int.h b/include/system/hvf_int.h index 8c8b840..d774e58 100644 --- a/include/system/hvf_int.h +++ b/include/system/hvf_int.h @@ -44,6 +44,7 @@ typedef struct hvf_vcpu_caps { struct HVFState { AccelState parent; + hvf_slot slots[32]; int num_slots; diff --git a/include/system/iommufd.h b/include/system/iommufd.h index cbab75b..283861b 100644 --- a/include/system/iommufd.h +++ b/include/system/iommufd.h @@ -61,6 +61,60 @@ bool iommufd_backend_get_dirty_bitmap(IOMMUFDBackend *be, uint32_t hwpt_id, uint64_t iova, ram_addr_t size, uint64_t page_size, uint64_t *data, Error **errp); +bool iommufd_backend_invalidate_cache(IOMMUFDBackend *be, uint32_t id, + uint32_t data_type, uint32_t entry_len, + uint32_t *entry_num, void *data, + Error **errp); #define TYPE_HOST_IOMMU_DEVICE_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd" +OBJECT_DECLARE_TYPE(HostIOMMUDeviceIOMMUFD, HostIOMMUDeviceIOMMUFDClass, + HOST_IOMMU_DEVICE_IOMMUFD) + +/* Overload of the host IOMMU device for the iommufd backend */ +struct HostIOMMUDeviceIOMMUFD { + HostIOMMUDevice parent_obj; + + IOMMUFDBackend *iommufd; + uint32_t devid; + uint32_t hwpt_id; +}; + +struct HostIOMMUDeviceIOMMUFDClass { + HostIOMMUDeviceClass parent_class; + + /** + * @attach_hwpt: attach host IOMMU device to IOMMUFD hardware page table. + * VFIO and VDPA device can have different implementation. + * + * Mandatory callback. + * + * @idev: host IOMMU device backed by IOMMUFD backend. + * + * @hwpt_id: ID of IOMMUFD hardware page table. + * + * @errp: pass an Error out when attachment fails. + * + * Returns: true on success, false on failure. + */ + bool (*attach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, uint32_t hwpt_id, + Error **errp); + /** + * @detach_hwpt: detach host IOMMU device from IOMMUFD hardware page table. + * VFIO and VDPA device can have different implementation. + * + * Mandatory callback. + * + * @idev: host IOMMU device backed by IOMMUFD backend. + * + * @errp: pass an Error out when attachment fails. + * + * Returns: true on success, false on failure. + */ + bool (*detach_hwpt)(HostIOMMUDeviceIOMMUFD *idev, Error **errp); +}; + +bool host_iommu_device_iommufd_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + uint32_t hwpt_id, Error **errp); +bool host_iommu_device_iommufd_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev, + Error **errp); #endif diff --git a/include/system/kvm.h b/include/system/kvm.h index b690dda..7cc60d2 100644 --- a/include/system/kvm.h +++ b/include/system/kvm.h @@ -42,6 +42,7 @@ extern bool kvm_gsi_routing_allowed; extern bool kvm_gsi_direct_mapping; extern bool kvm_readonly_mem_allowed; extern bool kvm_msi_use_devid; +extern bool kvm_pre_fault_memory_supported; #define kvm_enabled() (kvm_allowed) /** @@ -376,6 +377,7 @@ int kvm_arch_get_default_type(MachineState *ms); int kvm_arch_init(MachineState *ms, KVMState *s); +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp); int kvm_arch_init_vcpu(CPUState *cpu); int kvm_arch_destroy_vcpu(CPUState *cpu); diff --git a/include/system/memory.h b/include/system/memory.h index fbbf4cf..0848690 100644 --- a/include/system/memory.h +++ b/include/system/memory.h @@ -183,6 +183,7 @@ struct IOMMUNotifier { hwaddr start; hwaddr end; int iommu_idx; + void *opaque; QLIST_ENTRY(IOMMUNotifier) node; }; typedef struct IOMMUNotifier IOMMUNotifier; @@ -738,21 +739,20 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm, RamDiscardListener *rdl); /** - * memory_get_xlat_addr: Extract addresses from a TLB entry + * memory_translate_iotlb: Extract addresses from a TLB entry. + * Called with rcu_read_lock held. * * @iotlb: pointer to an #IOMMUTLBEntry - * @vaddr: virtual address - * @ram_addr: RAM address - * @read_only: indicates if writes are allowed - * @mr_has_discard_manager: indicates memory is controlled by a - * RamDiscardManager + * @xlat_p: return the offset of the entry from the start of the returned + * MemoryRegion. * @errp: pointer to Error*, to store an error if it happens. * - * Return: true on success, else false setting @errp with error. + * Return: On success, return the MemoryRegion containing the @iotlb translated + * addr. The MemoryRegion must not be accessed after rcu_read_unlock. + * On failure, return NULL, setting @errp with error. */ -bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, - ram_addr_t *ram_addr, bool *read_only, - bool *mr_has_discard_manager, Error **errp); +MemoryRegion *memory_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p, + Error **errp); typedef struct CoalescedMemoryRange CoalescedMemoryRange; typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd; diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index 3fa5a7a..125323f 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -365,10 +365,6 @@ struct TCGContext { int nb_indirects; int nb_ops; TCGType addr_type; /* TCG_TYPE_I32 or TCG_TYPE_I64 */ - - int page_mask; - uint8_t page_bits; - uint8_t tlb_dyn_max_bits; TCGBar guest_mo; TCGRegSet reserved_regs; diff --git a/include/ui/clipboard.h b/include/ui/clipboard.h index ab6acdb..62a96ce 100644 --- a/include/ui/clipboard.h +++ b/include/ui/clipboard.h @@ -2,6 +2,7 @@ #define QEMU_CLIPBOARD_H #include "qemu/notify.h" +#include "migration/vmstate.h" /** * DOC: Introduction @@ -25,6 +26,9 @@ typedef enum QemuClipboardSelection QemuClipboardSelection; typedef struct QemuClipboardPeer QemuClipboardPeer; typedef struct QemuClipboardNotify QemuClipboardNotify; typedef struct QemuClipboardInfo QemuClipboardInfo; +typedef struct QemuClipboardContent QemuClipboardContent; + +extern const VMStateDescription vmstate_cbinfo; /** * enum QemuClipboardType @@ -97,6 +101,24 @@ struct QemuClipboardNotify { }; }; + +/** + * struct QemuClipboardContent + * + * @available: whether the data is available + * @requested: whether the data was requested + * @size: the size of the @data + * @data: the clipboard data + * + * Clipboard content. + */ +struct QemuClipboardContent { + bool available; + bool requested; + uint32_t size; + void *data; +}; + /** * struct QemuClipboardInfo * @@ -112,15 +134,10 @@ struct QemuClipboardNotify { struct QemuClipboardInfo { uint32_t refcount; QemuClipboardPeer *owner; - QemuClipboardSelection selection; + int selection; /* QemuClipboardSelection */ bool has_serial; uint32_t serial; - struct { - bool available; - bool requested; - size_t size; - void *data; - } types[QEMU_CLIPBOARD_TYPE__COUNT]; + QemuClipboardContent types[QEMU_CLIPBOARD_TYPE__COUNT]; }; /** diff --git a/include/ui/egl-helpers.h b/include/ui/egl-helpers.h index fb80e15..acf993f 100644 --- a/include/ui/egl-helpers.h +++ b/include/ui/egl-helpers.h @@ -17,6 +17,8 @@ extern bool qemu_egl_angle_d3d; typedef struct egl_fb { int width; int height; + int x; + int y; GLuint texture; GLuint framebuffer; bool delete_texture; @@ -26,7 +28,7 @@ typedef struct egl_fb { #define EGL_FB_INIT { 0, } void egl_fb_destroy(egl_fb *fb); -void egl_fb_setup_default(egl_fb *fb, int width, int height); +void egl_fb_setup_default(egl_fb *fb, int width, int height, int x, int y); void egl_fb_setup_for_tex(egl_fb *fb, int width, int height, GLuint texture, bool delete); void egl_fb_setup_new_tex(egl_fb *fb, int width, int height); diff --git a/include/ui/gtk.h b/include/ui/gtk.h index aa3d637..d394404 100644 --- a/include/ui/gtk.h +++ b/include/ui/gtk.h @@ -224,4 +224,6 @@ int gd_gl_area_make_current(DisplayGLCtx *dgc, /* gtk-clipboard.c */ void gd_clipboard_init(GtkDisplayState *gd); +void gd_update_scale(VirtualConsole *vc, int ww, int wh, int fbw, int fbh); + #endif /* UI_GTK_H */ diff --git a/io/channel-socket.c b/io/channel-socket.c index 088b49f..3b7ca92 100644 --- a/io/channel-socket.c +++ b/io/channel-socket.c @@ -78,6 +78,17 @@ qio_channel_socket_new(void) return sioc; } +int qio_channel_socket_set_send_buffer(QIOChannelSocket *ioc, + size_t size, + Error **errp) +{ + if (setsockopt(ioc->fd, SOL_SOCKET, SO_SNDBUF, &size, sizeof(size)) < 0) { + error_setg_errno(errp, errno, "Unable to set socket send buffer size"); + return -1; + } + + return 0; +} static int qio_channel_socket_set_fd(QIOChannelSocket *sioc, diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index 4e6aff0..f4d9baa 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -419,10 +419,11 @@ enum { /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 -#define KVM_ARM_VCPU_PMU_V3_IRQ 0 -#define KVM_ARM_VCPU_PMU_V3_INIT 1 -#define KVM_ARM_VCPU_PMU_V3_FILTER 2 -#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 +#define KVM_ARM_VCPU_PMU_V3_IRQ 0 +#define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_PMU_V3_FILTER 2 +#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 +#define KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS 4 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 7fb57cc..cd275ae 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -843,6 +843,7 @@ struct kvm_sev_snp_launch_start { }; /* Kept in sync with firmware values for simplicity. */ +#define KVM_SEV_PAGE_TYPE_INVALID 0x0 #define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1 #define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3 #define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4 diff --git a/linux-headers/linux/bits.h b/linux-headers/linux/bits.h index 58596d1..9243f38 100644 --- a/linux-headers/linux/bits.h +++ b/linux-headers/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _LINUX_BITS_H #define _LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 99cc82a..0690743 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -178,6 +178,7 @@ struct kvm_xen_exit { #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 +#define KVM_EXIT_TDX 40 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -439,6 +440,27 @@ struct kvm_run { __u64 gpa; __u64 size; } memory_fault; + /* KVM_EXIT_TDX */ + struct { + __u64 flags; + __u64 nr; + union { + struct { + __u64 ret; + __u64 data[5]; + } unknown; + struct { + __u64 ret; + __u64 gpa; + __u64 size; + } get_quote; + struct { + __u64 ret; + __u64 leaf; + __u64 r11, r12, r13, r14; + } get_tdvmcall_info; + }; + } tdx; /* Fix the size of the union. */ char padding[256]; }; @@ -923,6 +945,9 @@ struct kvm_enable_cap { #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 #define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 +#define KVM_CAP_ARM_EL2 240 +#define KVM_CAP_ARM_EL2_E2H0 241 +#define KVM_CAP_RISCV_MP_STATE_RESET 242 struct kvm_irq_routing_irqchip { __u32 irqchip; diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index b95dd84..d4b3e2a 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -28,10 +28,10 @@ /* Set current process as the (exclusive) owner of this file descriptor. This * must be called before any other vhost command. Further calls to - * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */ + * VHOST_SET_OWNER fail until VHOST_RESET_OWNER is called. */ #define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) /* Give up ownership, and reset the device to default values. - * Allows subsequent call to VHOST_OWNER_SET to succeed. */ + * Allows subsequent call to VHOST_SET_OWNER to succeed. */ #define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) /* Set up/modify memory layout */ diff --git a/linux-user/arm/cpu_loop.c b/linux-user/arm/cpu_loop.c index e8417d0..33f6395 100644 --- a/linux-user/arm/cpu_loop.c +++ b/linux-user/arm/cpu_loop.c @@ -363,6 +363,7 @@ void cpu_loop(CPUARMState *env) switch (n) { case ARM_NR_cacheflush: /* nop */ + env->regs[0] = 0; break; case ARM_NR_set_tls: cpu_set_tls(env, env->regs[0]); diff --git a/linux-user/hppa/cpu_loop.c b/linux-user/hppa/cpu_loop.c index 890e758..9abaad5 100644 --- a/linux-user/hppa/cpu_loop.c +++ b/linux-user/hppa/cpu_loop.c @@ -112,7 +112,7 @@ static abi_ulong hppa_lws(CPUHPPAState *env) void cpu_loop(CPUHPPAState *env) { CPUState *cs = env_cpu(env); - abi_ulong ret; + abi_ulong ret, si_code = 0; int trapnr; while (1) { @@ -169,7 +169,15 @@ void cpu_loop(CPUHPPAState *env) force_sig_fault(TARGET_SIGFPE, TARGET_FPE_CONDTRAP, env->iaoq_f); break; case EXCP_ASSIST: - force_sig_fault(TARGET_SIGFPE, 0, env->iaoq_f); + #define set_si_code(mask, val) \ + if (env->fr[0] & mask) { si_code = val; } + set_si_code(R_FPSR_FLG_I_MASK, TARGET_FPE_FLTRES); + set_si_code(R_FPSR_FLG_U_MASK, TARGET_FPE_FLTUND); + set_si_code(R_FPSR_FLG_O_MASK, TARGET_FPE_FLTOVF); + set_si_code(R_FPSR_FLG_Z_MASK, TARGET_FPE_FLTDIV); + set_si_code(R_FPSR_FLG_V_MASK, TARGET_FPE_FLTINV); + #undef set_si_code + force_sig_fault(TARGET_SIGFPE, si_code, env->iaoq_f); break; case EXCP_BREAK: force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->iaoq_f); diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 23b901b..fc37028 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -8235,6 +8235,9 @@ static int open_self_stat(CPUArchState *cpu_env, int fd) } else if (i == 3) { /* ppid */ g_string_printf(buf, FMT_pid " ", getppid()); + } else if (i == 4) { + /* pgid */ + g_string_printf(buf, FMT_pid " ", getpgrp()); } else if (i == 19) { /* num_threads */ int cpus = 0; diff --git a/meson.build b/meson.build index fdad3fb..4676908 100644 --- a/meson.build +++ b/meson.build @@ -106,6 +106,7 @@ if have_rust endif if have_rust + rustdoc = find_program('rustdoc', required: get_option('rust')) bindgen = find_program('bindgen', required: get_option('rust')) if not bindgen.found() or bindgen.version().version_compare('<0.60.0') if get_option('rust').enabled() @@ -837,13 +838,17 @@ emulator_link_args = [] midl = not_found widl = not_found pathcch = not_found +synchronization = not_found host_dsosuf = '.so' if host_os == 'windows' midl = find_program('midl', required: false) widl = find_program('widl', required: false) - pathcch = cc.find_library('pathcch') - socket = cc.find_library('ws2_32') - winmm = cc.find_library('winmm') + + # MinGW uses lowercase for library names + pathcch = cc.find_library('pathcch', required: true) + synchronization = cc.find_library('synchronization', required: true) + socket = cc.find_library('ws2_32', required: true) + winmm = cc.find_library('winmm', required: true) win = import('windows') version_res = win.compile_resources('version.rc', @@ -3272,6 +3277,7 @@ config_devices_mak_list = [] config_devices_h = {} config_target_h = {} config_target_mak = {} +config_base_arch_mak = {} disassemblers = { 'alpha' : ['CONFIG_ALPHA_DIS'], @@ -3463,6 +3469,11 @@ foreach target : target_dirs config_all_devices += config_devices endif config_target_mak += {target: config_target} + + # build a merged config for all targets with the same TARGET_BASE_ARCH + target_base_arch = config_target['TARGET_BASE_ARCH'] + config_base_arch = config_base_arch_mak.get(target_base_arch, {}) + config_target + config_base_arch_mak += {target_base_arch: config_base_arch} endforeach target_dirs = actual_target_dirs @@ -3718,14 +3729,12 @@ io_ss = ss.source_set() qmp_ss = ss.source_set() qom_ss = ss.source_set() system_ss = ss.source_set() -libsystem_ss = ss.source_set() specific_fuzz_ss = ss.source_set() specific_ss = ss.source_set() rust_devices_ss = ss.source_set() stub_ss = ss.source_set() trace_ss = ss.source_set() user_ss = ss.source_set() -libuser_ss = ss.source_set() util_ss = ss.source_set() # accel modules @@ -4102,30 +4111,20 @@ common_ss.add(hwcore) system_ss.add(authz, blockdev, chardev, crypto, io, qmp) common_ss.add(qom, qemuutil) -common_ss.add_all(when: 'CONFIG_SYSTEM_ONLY', if_true: [system_ss]) -common_ss.add_all(when: 'CONFIG_USER_ONLY', if_true: user_ss) - -libuser_ss = libuser_ss.apply({}) libuser = static_library('user', - libuser_ss.sources() + genh, + user_ss.all_sources() + genh, c_args: ['-DCONFIG_USER_ONLY', '-DCOMPILING_SYSTEM_VS_USER'], - dependencies: libuser_ss.dependencies(), + include_directories: common_user_inc, + dependencies: user_ss.all_dependencies(), build_by_default: false) -libuser = declare_dependency(objects: libuser.extract_all_objects(recursive: false), - dependencies: libuser_ss.dependencies()) -common_ss.add(when: 'CONFIG_USER_ONLY', if_true: libuser) -libsystem_ss = libsystem_ss.apply({}) libsystem = static_library('system', - libsystem_ss.sources() + genh, + system_ss.all_sources() + genh, c_args: ['-DCONFIG_SOFTMMU', '-DCOMPILING_SYSTEM_VS_USER'], - dependencies: libsystem_ss.dependencies(), + dependencies: system_ss.all_dependencies(), build_by_default: false) -libsystem = declare_dependency(objects: libsystem.extract_all_objects(recursive: false), - dependencies: libsystem_ss.dependencies()) -common_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_true: libsystem) # Note that this library is never used directly (only through extract_objects) # and is not built by default; therefore, source files not used by the build @@ -4133,73 +4132,79 @@ common_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_true: libsystem) common_all = static_library('common', build_by_default: false, sources: common_ss.all_sources() + genh, - include_directories: common_user_inc, implicit_include_directories: false, dependencies: common_ss.all_dependencies()) # construct common libraries per base architecture -hw_common_arch_libs = {} target_common_arch_libs = {} target_common_system_arch_libs = {} -foreach target : target_dirs - config_target = config_target_mak[target] - target_base_arch = config_target['TARGET_BASE_ARCH'] +foreach target_base_arch, config_base_arch : config_base_arch_mak target_inc = [include_directories('target' / target_base_arch)] inc = [common_user_inc + target_inc] + target_common = common_ss.apply(config_base_arch, strict: false) + target_system = system_ss.apply(config_base_arch, strict: false) + target_user = user_ss.apply(config_base_arch, strict: false) + common_deps = [] + system_deps = [] + user_deps = [] + foreach dep: target_common.dependencies() + common_deps += dep.partial_dependency(compile_args: true, includes: true) + endforeach + foreach dep: target_system.dependencies() + system_deps += dep.partial_dependency(compile_args: true, includes: true) + endforeach + foreach dep: target_user.dependencies() + user_deps += dep.partial_dependency(compile_args: true, includes: true) + endforeach + # prevent common code to access cpu compile time definition, # but still allow access to cpu.h target_c_args = ['-DCPU_DEFS_H'] target_system_c_args = target_c_args + ['-DCOMPILING_SYSTEM_VS_USER', '-DCONFIG_SOFTMMU'] - if target_base_arch in hw_common_arch - if target_base_arch not in hw_common_arch_libs - src = hw_common_arch[target_base_arch] - lib = static_library( - 'hw_' + target_base_arch, - build_by_default: false, - sources: src.all_sources() + genh, - include_directories: inc, - c_args: target_system_c_args, - dependencies: src.all_dependencies()) - hw_common_arch_libs += {target_base_arch: lib} - endif - endif - if target_base_arch in target_common_arch - if target_base_arch not in target_common_arch_libs - src = target_common_arch[target_base_arch] - lib = static_library( - 'target_' + target_base_arch, - build_by_default: false, - sources: src.all_sources() + genh, - include_directories: inc, - c_args: target_c_args, - dependencies: src.all_dependencies()) - target_common_arch_libs += {target_base_arch: lib} + src = target_common_arch[target_base_arch] + lib = static_library( + 'common_' + target_base_arch, + build_by_default: false, + sources: src.all_sources() + genh, + include_directories: inc, + c_args: target_c_args, + dependencies: src.all_dependencies() + common_deps + + system_deps + user_deps) + target_common_arch_libs += {target_base_arch: lib} + endif + + # merge hw_common_arch in target_common_system_arch + if target_base_arch in hw_common_arch + hw_src = hw_common_arch[target_base_arch] + if target_base_arch in target_common_system_arch + target_common_system_arch[target_base_arch].add_all(hw_src) + else + target_common_system_arch += {target_base_arch: hw_src} endif endif if target_base_arch in target_common_system_arch - if target_base_arch not in target_common_system_arch_libs - src = target_common_system_arch[target_base_arch] - lib = static_library( - 'target_system_' + target_base_arch, - build_by_default: false, - sources: src.all_sources() + genh, - include_directories: inc, - c_args: target_system_c_args, - dependencies: src.all_dependencies()) - target_common_system_arch_libs += {target_base_arch: lib} - endif + src = target_common_system_arch[target_base_arch] + lib = static_library( + 'system_' + target_base_arch, + build_by_default: false, + sources: src.all_sources() + genh, + include_directories: inc, + c_args: target_system_c_args, + dependencies: src.all_dependencies() + common_deps + system_deps) + target_common_system_arch_libs += {target_base_arch: lib} endif endforeach if have_rust + bindings_incdir = include_directories('.', 'include') # We would like to use --generate-cstr, but it is only available # starting with bindgen 0.66.0. The oldest supported versions # is 0.60.x (Debian 12 has 0.60.1) which introduces --allowlist-file. - bindgen_args = [ + bindgen_args_common = [ '--disable-header-comment', '--raw-line', '// @generated', '--ctypes-prefix', 'std::os::raw', @@ -4215,59 +4220,20 @@ if have_rust ] if not rustfmt.found() if bindgen.version().version_compare('<0.65.0') - bindgen_args += ['--no-rustfmt-bindings'] + bindgen_args_common += ['--no-rustfmt-bindings'] else - bindgen_args += ['--formatter', 'none'] + bindgen_args_common += ['--formatter', 'none'] endif endif if bindgen.version().version_compare('>=0.66.0') - bindgen_args += ['--rust-target', '1.59'] + bindgen_args_common += ['--rust-target', '1.59'] endif if bindgen.version().version_compare('<0.61.0') # default in 0.61+ - bindgen_args += ['--size_t-is-usize'] + bindgen_args_common += ['--size_t-is-usize'] else - bindgen_args += ['--merge-extern-blocks'] - endif - c_enums = [ - 'DeviceCategory', - 'GpioPolarity', - 'MachineInitPhase', - 'MemoryDeviceInfoKind', - 'MigrationPolicy', - 'MigrationPriority', - 'QEMUChrEvent', - 'QEMUClockType', - 'ResetType', - 'device_endian', - 'module_init_type', - ] - foreach enum : c_enums - bindgen_args += ['--rustified-enum', enum] - endforeach - c_bitfields = [ - 'ClockEvent', - 'VMStateFlags', - ] - foreach enum : c_bitfields - bindgen_args += ['--bitfield-enum', enum] - endforeach - - # TODO: Remove this comment when the clang/libclang mismatch issue is solved. - # - # Rust bindings generation with `bindgen` might fail in some cases where the - # detected `libclang` does not match the expected `clang` version/target. In - # this case you must pass the path to `clang` and `libclang` to your build - # command invocation using the environment variables CLANG_PATH and - # LIBCLANG_PATH - bindings_rs = rust.bindgen( - input: 'rust/wrapper.h', - dependencies: common_ss.all_dependencies(), - output: 'bindings.inc.rs', - include_directories: include_directories('.', 'include'), - bindgen_version: ['>=0.60.0'], - args: bindgen_args, - ) + bindgen_args_common += ['--merge-extern-blocks'] + endif subdir('rust') endif @@ -4368,10 +4334,14 @@ foreach target : target_dirs objects += lib.extract_objects(src.sources()) arch_deps += src.dependencies() endif - if target_type == 'system' and target_base_arch in hw_common_arch_libs - src = hw_common_arch[target_base_arch].apply(config_target, strict: false) - lib = hw_common_arch_libs[target_base_arch] - objects += lib.extract_objects(src.sources()) + if target_type == 'system' + src = system_ss.apply(config_target, strict: false) + objects += libsystem.extract_objects(src.sources()) + arch_deps += src.dependencies() + endif + if target_type == 'user' + src = user_ss.apply(config_target, strict: false) + objects += libuser.extract_objects(src.sources()) arch_deps += src.dependencies() endif if target_type == 'system' and target_base_arch in target_common_system_arch_libs @@ -4399,7 +4369,7 @@ foreach target : target_dirs build_by_default: true, build_always_stale: true) rlib = static_library('rust_' + target.underscorify(), - rlib_rs, + structured_sources([], {'.': rlib_rs}), dependencies: target_rust.dependencies(), override_options: ['rust_std=2021', 'build.rust_std=2021'], rust_abi: 'c') @@ -4753,6 +4723,7 @@ if have_rust summary_info += {'Rust target': config_host['RUST_TARGET_TRIPLE']} summary_info += {'rustc': ' '.join(rustc.cmd_array())} summary_info += {'rustc version': rustc.version()} + summary_info += {'rustdoc': rustdoc} summary_info += {'bindgen': bindgen.full_path()} summary_info += {'bindgen version': bindgen.version()} endif diff --git a/migration/colo.c b/migration/colo.c index c976b3f..e0f713c 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -146,7 +146,7 @@ static void secondary_vm_do_failover(void) return; } /* Notify COLO incoming thread that failover work is finished */ - qemu_sem_post(&mis->colo_incoming_sem); + qemu_event_set(&mis->colo_incoming_event); /* For Secondary VM, jump to incoming co */ if (mis->colo_incoming_co) { @@ -195,7 +195,7 @@ static void primary_vm_do_failover(void) } /* Notify COLO thread that failover work is finished */ - qemu_sem_post(&s->colo_exit_sem); + qemu_event_set(&s->colo_exit_event); } COLOMode get_colo_mode(void) @@ -620,8 +620,8 @@ out: } /* Hope this not to be too long to wait here */ - qemu_sem_wait(&s->colo_exit_sem); - qemu_sem_destroy(&s->colo_exit_sem); + qemu_event_wait(&s->colo_exit_event); + qemu_event_destroy(&s->colo_exit_event); /* * It is safe to unregister notifier after failover finished. @@ -651,7 +651,7 @@ void migrate_start_colo_process(MigrationState *s) s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST, colo_checkpoint_notify_timer, NULL); - qemu_sem_init(&s->colo_exit_sem, 0); + qemu_event_init(&s->colo_exit_event, false); colo_process_checkpoint(s); bql_lock(); } @@ -808,11 +808,11 @@ void colo_shutdown(void) case COLO_MODE_PRIMARY: s = migrate_get_current(); qemu_event_set(&s->colo_checkpoint_event); - qemu_sem_post(&s->colo_exit_sem); + qemu_event_set(&s->colo_exit_event); break; case COLO_MODE_SECONDARY: mis = migration_incoming_get_current(); - qemu_sem_post(&mis->colo_incoming_sem); + qemu_event_set(&mis->colo_incoming_event); break; default: break; @@ -827,7 +827,7 @@ static void *colo_process_incoming_thread(void *opaque) Error *local_err = NULL; rcu_register_thread(); - qemu_sem_init(&mis->colo_incoming_sem, 0); + qemu_event_init(&mis->colo_incoming_event, false); migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_COLO); @@ -923,8 +923,8 @@ out: } /* Hope this not to be too long to loop here */ - qemu_sem_wait(&mis->colo_incoming_sem); - qemu_sem_destroy(&mis->colo_incoming_sem); + qemu_event_wait(&mis->colo_incoming_event); + qemu_event_destroy(&mis->colo_incoming_event); rcu_unregister_thread(); return NULL; diff --git a/migration/cpr.c b/migration/cpr.c index 42c4656..a50a57e 100644 --- a/migration/cpr.c +++ b/migration/cpr.c @@ -95,6 +95,36 @@ int cpr_find_fd(const char *name, int id) trace_cpr_find_fd(name, id, fd); return fd; } + +void cpr_resave_fd(const char *name, int id, int fd) +{ + CprFd *elem = find_fd(&cpr_state.fds, name, id); + int old_fd = elem ? elem->fd : -1; + + if (old_fd < 0) { + cpr_save_fd(name, id, fd); + } else if (old_fd != fd) { + error_setg(&error_fatal, + "internal error: cpr fd '%s' id %d value %d " + "already saved with a different value %d", + name, id, fd, old_fd); + } +} + +int cpr_open_fd(const char *path, int flags, const char *name, int id, + Error **errp) +{ + int fd = cpr_find_fd(name, id); + + if (fd < 0) { + fd = qemu_open(path, flags, errp); + if (fd >= 0) { + cpr_save_fd(name, id, fd); + } + } + return fd; +} + /*************************************************************************/ #define CPR_STATE "CprState" @@ -228,3 +258,9 @@ void cpr_state_close(void) cpr_state_file = NULL; } } + +bool cpr_incoming_needed(void *opaque) +{ + MigMode mode = migrate_mode(); + return mode == MIG_MODE_CPR_TRANSFER; +} diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c index 49c26da..e8a563c 100644 --- a/migration/migration-hmp-cmds.c +++ b/migration/migration-hmp-cmds.c @@ -37,29 +37,28 @@ static void migration_global_dump(Monitor *mon) { MigrationState *ms = migrate_get_current(); - monitor_printf(mon, "globals:\n"); - monitor_printf(mon, "store-global-state: %s\n", + monitor_printf(mon, "Globals:\n"); + monitor_printf(mon, " store-global-state: %s\n", ms->store_global_state ? "on" : "off"); - monitor_printf(mon, "only-migratable: %s\n", + monitor_printf(mon, " only-migratable: %s\n", only_migratable ? "on" : "off"); - monitor_printf(mon, "send-configuration: %s\n", + monitor_printf(mon, " send-configuration: %s\n", ms->send_configuration ? "on" : "off"); - monitor_printf(mon, "send-section-footer: %s\n", + monitor_printf(mon, " send-section-footer: %s\n", ms->send_section_footer ? "on" : "off"); - monitor_printf(mon, "send-switchover-start: %s\n", + monitor_printf(mon, " send-switchover-start: %s\n", ms->send_switchover_start ? "on" : "off"); - monitor_printf(mon, "clear-bitmap-shift: %u\n", + monitor_printf(mon, " clear-bitmap-shift: %u\n", ms->clear_bitmap_shift); } void hmp_info_migrate(Monitor *mon, const QDict *qdict) { + bool show_all = qdict_get_try_bool(qdict, "all", false); MigrationInfo *info; info = qmp_query_migrate(NULL); - migration_global_dump(mon); - if (info->blocked_reasons) { strList *reasons = info->blocked_reasons; monitor_printf(mon, "Outgoing migration blocked:\n"); @@ -70,7 +69,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) } if (info->has_status) { - monitor_printf(mon, "Migration status: %s", + monitor_printf(mon, "Status: %s", MigrationStatus_str(info->status)); if (info->status == MIGRATION_STATUS_FAILED && info->error_desc) { monitor_printf(mon, " (%s)\n", info->error_desc); @@ -78,107 +77,130 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) monitor_printf(mon, "\n"); } - monitor_printf(mon, "total time: %" PRIu64 " ms\n", - info->total_time); - if (info->has_expected_downtime) { - monitor_printf(mon, "expected downtime: %" PRIu64 " ms\n", - info->expected_downtime); - } - if (info->has_downtime) { - monitor_printf(mon, "downtime: %" PRIu64 " ms\n", - info->downtime); + if (info->total_time) { + monitor_printf(mon, "Time (ms): total=%" PRIu64, + info->total_time); + if (info->has_setup_time) { + monitor_printf(mon, ", setup=%" PRIu64, + info->setup_time); + } + if (info->has_expected_downtime) { + monitor_printf(mon, ", exp_down=%" PRIu64, + info->expected_downtime); + } + if (info->has_downtime) { + monitor_printf(mon, ", down=%" PRIu64, + info->downtime); + } + monitor_printf(mon, "\n"); } - if (info->has_setup_time) { - monitor_printf(mon, "setup: %" PRIu64 " ms\n", - info->setup_time); + } + + if (info->has_socket_address) { + SocketAddressList *addr; + + monitor_printf(mon, "Sockets: [\n"); + + for (addr = info->socket_address; addr; addr = addr->next) { + char *s = socket_uri(addr->value); + monitor_printf(mon, "\t%s\n", s); + g_free(s); } + monitor_printf(mon, "]\n"); } if (info->ram) { - monitor_printf(mon, "transferred ram: %" PRIu64 " kbytes\n", - info->ram->transferred >> 10); - monitor_printf(mon, "throughput: %0.2f mbps\n", + monitor_printf(mon, "RAM info:\n"); + monitor_printf(mon, " Throughput (Mbps): %0.2f\n", info->ram->mbps); - monitor_printf(mon, "remaining ram: %" PRIu64 " kbytes\n", - info->ram->remaining >> 10); - monitor_printf(mon, "total ram: %" PRIu64 " kbytes\n", + monitor_printf(mon, " Sizes (KiB): pagesize=%" PRIu64 + ", total=%" PRIu64 ",\n", + info->ram->page_size >> 10, info->ram->total >> 10); - monitor_printf(mon, "duplicate: %" PRIu64 " pages\n", - info->ram->duplicate); - monitor_printf(mon, "normal: %" PRIu64 " pages\n", - info->ram->normal); - monitor_printf(mon, "normal bytes: %" PRIu64 " kbytes\n", - info->ram->normal_bytes >> 10); - monitor_printf(mon, "dirty sync count: %" PRIu64 "\n", - info->ram->dirty_sync_count); - monitor_printf(mon, "page size: %" PRIu64 " kbytes\n", - info->ram->page_size >> 10); - monitor_printf(mon, "multifd bytes: %" PRIu64 " kbytes\n", - info->ram->multifd_bytes >> 10); - monitor_printf(mon, "pages-per-second: %" PRIu64 "\n", + monitor_printf(mon, " transferred=%" PRIu64 + ", remain=%" PRIu64 ",\n", + info->ram->transferred >> 10, + info->ram->remaining >> 10); + monitor_printf(mon, " precopy=%" PRIu64 + ", multifd=%" PRIu64 + ", postcopy=%" PRIu64, + info->ram->precopy_bytes >> 10, + info->ram->multifd_bytes >> 10, + info->ram->postcopy_bytes >> 10); + + if (info->vfio) { + monitor_printf(mon, ", vfio=%" PRIu64, + info->vfio->transferred >> 10); + } + monitor_printf(mon, "\n"); + + monitor_printf(mon, " Pages: normal=%" PRIu64 ", zero=%" PRIu64 + ", rate_per_sec=%" PRIu64 "\n", + info->ram->normal, + info->ram->duplicate, info->ram->pages_per_second); + monitor_printf(mon, " Others: dirty_syncs=%" PRIu64, + info->ram->dirty_sync_count); if (info->ram->dirty_pages_rate) { - monitor_printf(mon, "dirty pages rate: %" PRIu64 " pages\n", + monitor_printf(mon, ", dirty_pages_rate=%" PRIu64, info->ram->dirty_pages_rate); } if (info->ram->postcopy_requests) { - monitor_printf(mon, "postcopy request count: %" PRIu64 "\n", + monitor_printf(mon, ", postcopy_req=%" PRIu64, info->ram->postcopy_requests); } - if (info->ram->precopy_bytes) { - monitor_printf(mon, "precopy ram: %" PRIu64 " kbytes\n", - info->ram->precopy_bytes >> 10); - } if (info->ram->downtime_bytes) { - monitor_printf(mon, "downtime ram: %" PRIu64 " kbytes\n", - info->ram->downtime_bytes >> 10); - } - if (info->ram->postcopy_bytes) { - monitor_printf(mon, "postcopy ram: %" PRIu64 " kbytes\n", - info->ram->postcopy_bytes >> 10); + monitor_printf(mon, ", downtime_ram=%" PRIu64, + info->ram->downtime_bytes); } if (info->ram->dirty_sync_missed_zero_copy) { - monitor_printf(mon, - "Zero-copy-send fallbacks happened: %" PRIu64 " times\n", + monitor_printf(mon, ", zerocopy_fallbacks=%" PRIu64, info->ram->dirty_sync_missed_zero_copy); } + monitor_printf(mon, "\n"); + } + + if (!show_all) { + goto out; } + migration_global_dump(mon); + if (info->xbzrle_cache) { - monitor_printf(mon, "cache size: %" PRIu64 " bytes\n", - info->xbzrle_cache->cache_size); - monitor_printf(mon, "xbzrle transferred: %" PRIu64 " kbytes\n", - info->xbzrle_cache->bytes >> 10); - monitor_printf(mon, "xbzrle pages: %" PRIu64 " pages\n", - info->xbzrle_cache->pages); - monitor_printf(mon, "xbzrle cache miss: %" PRIu64 " pages\n", - info->xbzrle_cache->cache_miss); - monitor_printf(mon, "xbzrle cache miss rate: %0.2f\n", - info->xbzrle_cache->cache_miss_rate); - monitor_printf(mon, "xbzrle encoding rate: %0.2f\n", - info->xbzrle_cache->encoding_rate); - monitor_printf(mon, "xbzrle overflow: %" PRIu64 "\n", + monitor_printf(mon, "XBZRLE: size=%" PRIu64 + ", transferred=%" PRIu64 + ", pages=%" PRIu64 + ", miss=%" PRIu64 "\n" + " miss_rate=%0.2f" + ", encode_rate=%0.2f" + ", overflow=%" PRIu64 "\n", + info->xbzrle_cache->cache_size, + info->xbzrle_cache->bytes, + info->xbzrle_cache->pages, + info->xbzrle_cache->cache_miss, + info->xbzrle_cache->cache_miss_rate, + info->xbzrle_cache->encoding_rate, info->xbzrle_cache->overflow); } if (info->has_cpu_throttle_percentage) { - monitor_printf(mon, "cpu throttle percentage: %" PRIu64 "\n", + monitor_printf(mon, "CPU Throttle (%%): %" PRIu64 "\n", info->cpu_throttle_percentage); } if (info->has_dirty_limit_throttle_time_per_round) { - monitor_printf(mon, "dirty-limit throttle time: %" PRIu64 " us\n", + monitor_printf(mon, "Dirty-limit Throttle (us): %" PRIu64 "\n", info->dirty_limit_throttle_time_per_round); } if (info->has_dirty_limit_ring_full_time) { - monitor_printf(mon, "dirty-limit ring full time: %" PRIu64 " us\n", + monitor_printf(mon, "Dirty-limit Ring Full (us): %" PRIu64 "\n", info->dirty_limit_ring_full_time); } if (info->has_postcopy_blocktime) { - monitor_printf(mon, "postcopy blocktime: %u\n", + monitor_printf(mon, "Postcopy Blocktime (ms): %" PRIu32 "\n", info->postcopy_blocktime); } @@ -189,28 +211,12 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) visit_type_uint32List(v, NULL, &info->postcopy_vcpu_blocktime, &error_abort); visit_complete(v, &str); - monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str); + monitor_printf(mon, "Postcopy vCPU Blocktime: %s\n", str); g_free(str); visit_free(v); } - if (info->has_socket_address) { - SocketAddressList *addr; - - monitor_printf(mon, "socket address: [\n"); - - for (addr = info->socket_address; addr; addr = addr->next) { - char *s = socket_uri(addr->value); - monitor_printf(mon, "\t%s\n", s); - g_free(s); - } - monitor_printf(mon, "]\n"); - } - - if (info->vfio) { - monitor_printf(mon, "vfio device transferred: %" PRIu64 " kbytes\n", - info->vfio->transferred >> 10); - } +out: qapi_free_MigrationInfo(info); } diff --git a/migration/migration.c b/migration/migration.c index 4697732..4098870 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1630,7 +1630,7 @@ void migration_cancel(void) } /* If the migration is paused, kick it out of the pause */ if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) { - qemu_sem_post(&s->pause_sem); + qemu_event_set(&s->pause_event); } migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING); } while (s->state != MIGRATION_STATUS_CANCELLING); @@ -2342,7 +2342,7 @@ void qmp_migrate_continue(MigrationStatus state, Error **errp) MigrationStatus_str(s->state)); return; } - qemu_sem_post(&s->pause_sem); + qemu_event_set(&s->pause_event); } int migration_rp_wait(MigrationState *s) @@ -2911,21 +2911,18 @@ static bool migration_switchover_prepare(MigrationState *s) return true; } - /* Since leaving this state is not atomic with posting the semaphore + /* + * Since leaving this state is not atomic with setting the event * it's possible that someone could have issued multiple migrate_continue - * and the semaphore is incorrectly positive at this point; - * the docs say it's undefined to reinit a semaphore that's already - * init'd, so use timedwait to eat up any existing posts. + * and the event is incorrectly set at this point so reset it. */ - while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) { - /* This block intentionally left blank */ - } + qemu_event_reset(&s->pause_event); /* Update [POSTCOPY_]ACTIVE to PRE_SWITCHOVER */ migrate_set_state(&s->state, s->state, MIGRATION_STATUS_PRE_SWITCHOVER); bql_unlock(); - qemu_sem_wait(&s->pause_sem); + qemu_event_wait(&s->pause_event); bql_lock(); /* @@ -4057,7 +4054,7 @@ static void migration_instance_finalize(Object *obj) qemu_mutex_destroy(&ms->qemu_file_lock); qemu_sem_destroy(&ms->wait_unplug_sem); qemu_sem_destroy(&ms->rate_limit_sem); - qemu_sem_destroy(&ms->pause_sem); + qemu_event_destroy(&ms->pause_event); qemu_sem_destroy(&ms->postcopy_pause_sem); qemu_sem_destroy(&ms->rp_state.rp_sem); qemu_sem_destroy(&ms->rp_state.rp_pong_acks); @@ -4072,7 +4069,7 @@ static void migration_instance_init(Object *obj) ms->state = MIGRATION_STATUS_NONE; ms->mbps = -1; ms->pages_per_second = -1; - qemu_sem_init(&ms->pause_sem, 0); + qemu_event_init(&ms->pause_event, false); qemu_mutex_init(&ms->error_mutex); migrate_params_init(&ms->parameters); diff --git a/migration/migration.h b/migration/migration.h index d53f7ca..739289d 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -98,9 +98,9 @@ struct MigrationIncomingState { void (*transport_cleanup)(void *data); /* * Used to sync thread creations. Note that we can't create threads in - * parallel with this sem. + * parallel with this event. */ - QemuSemaphore thread_sync_sem; + QemuEvent thread_sync_event; /* * Free at the start of the main state load, set as the main thread finishes * loading state. @@ -186,7 +186,7 @@ struct MigrationIncomingState { /* The coroutine we should enter (back) after failover */ Coroutine *colo_incoming_co; - QemuSemaphore colo_incoming_sem; + QemuEvent colo_incoming_event; /* Optional load threads pool and its thread exit request flag */ ThreadPool *load_threads; @@ -379,10 +379,10 @@ struct MigrationState { QemuSemaphore wait_unplug_sem; /* Migration is paused due to pause-before-switchover */ - QemuSemaphore pause_sem; + QemuEvent pause_event; - /* The semaphore is used to notify COLO thread that failover is finished */ - QemuSemaphore colo_exit_sem; + /* The event is used to notify COLO thread that failover is finished */ + QemuEvent colo_exit_event; /* The event is used to notify COLO thread to do checkpoint */ QemuEvent colo_checkpoint_event; diff --git a/migration/multifd-nocomp.c b/migration/multifd-nocomp.c index 88fe0f9..b48eae3 100644 --- a/migration/multifd-nocomp.c +++ b/migration/multifd-nocomp.c @@ -17,6 +17,7 @@ #include "migration-stats.h" #include "multifd.h" #include "options.h" +#include "migration.h" #include "qapi/error.h" #include "qemu/cutils.h" #include "qemu/error-report.h" @@ -398,7 +399,7 @@ int multifd_ram_flush_and_sync(QEMUFile *f) MultiFDSyncReq req; int ret; - if (!migrate_multifd()) { + if (!migrate_multifd() || migration_in_postcopy()) { return 0; } diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c index dbc1184..4cde868 100644 --- a/migration/multifd-zero-page.c +++ b/migration/multifd-zero-page.c @@ -85,9 +85,27 @@ void multifd_recv_zero_page_process(MultiFDRecvParams *p) { for (int i = 0; i < p->zero_num; i++) { void *page = p->host + p->zero[i]; - if (ramblock_recv_bitmap_test_byte_offset(p->block, p->zero[i])) { + bool received = + ramblock_recv_bitmap_test_byte_offset(p->block, p->zero[i]); + + /* + * During multifd migration zero page is written to the memory + * only if it is migrated more than once. + * + * It becomes a problem when both multifd & postcopy options are + * enabled. If the zero page which was skipped during multifd phase, + * is accessed during the postcopy phase of the migration, a page + * fault occurs. But this page fault is not served because the + * 'receivedmap' says the zero page is already received. Thus the + * thread accessing that page may hang. + * + * When postcopy is enabled, always write the zero page as and when + * it is migrated. + */ + if (migrate_postcopy_ram() || received) { memset(page, 0, multifd_ram_page_size()); - } else { + } + if (!received) { ramblock_recv_bitmap_set_offset(p->block, p->zero[i]); } } diff --git a/migration/multifd.c b/migration/multifd.c index ec108af..b255778 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -690,6 +690,7 @@ static void *multifd_send_thread(void *opaque) if (qatomic_load_acquire(&p->pending_job)) { bool is_device_state = multifd_payload_device_state(p->data); size_t total_size; + int write_flags_masked = 0; p->flags = 0; p->iovs_num = 0; @@ -697,6 +698,9 @@ static void *multifd_send_thread(void *opaque) if (is_device_state) { multifd_device_state_send_prepare(p); + + /* Device state packets cannot be sent via zerocopy */ + write_flags_masked |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; } else { ret = multifd_send_state->ops->send_prepare(p, &local_err); if (ret != 0) { @@ -718,7 +722,8 @@ static void *multifd_send_thread(void *opaque) &p->data->u.ram, &local_err); } else { ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, - NULL, 0, p->write_flags, + NULL, 0, + p->write_flags & ~write_flags_masked, &local_err); } @@ -1379,6 +1384,13 @@ static void *multifd_recv_thread(void *opaque) } if (has_data) { + /* + * multifd thread should not be active and receive data + * when migration is in the Postcopy phase. Two threads + * writing the same memory area could easily corrupt + * the guest state. + */ + assert(!migration_in_postcopy()); if (is_device_state) { assert(use_packets); ret = multifd_device_state_recv(p, &local_err); diff --git a/migration/options.c b/migration/options.c index b6ae953..162c72c 100644 --- a/migration/options.c +++ b/migration/options.c @@ -509,11 +509,6 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) error_setg(errp, "Postcopy is not compatible with ignore-shared"); return false; } - - if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { - error_setg(errp, "Postcopy is not yet compatible with multifd"); - return false; - } } if (new_caps[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) { @@ -573,7 +568,7 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) return false; } - if (migrate_incoming_started()) { + if (!migrate_postcopy_preempt() && migrate_incoming_started()) { error_setg(errp, "Postcopy preempt must be set before incoming starts"); return false; @@ -581,7 +576,7 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) } if (new_caps[MIGRATION_CAPABILITY_MULTIFD]) { - if (migrate_incoming_started()) { + if (!migrate_multifd() && migrate_incoming_started()) { error_setg(errp, "Multifd must be set before incoming starts"); return false; } diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 995614b..75fd310 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -90,10 +90,10 @@ void postcopy_thread_create(MigrationIncomingState *mis, QemuThread *thread, const char *name, void *(*fn)(void *), int joinable) { - qemu_sem_init(&mis->thread_sync_sem, 0); + qemu_event_init(&mis->thread_sync_event, false); qemu_thread_create(thread, name, fn, mis, joinable); - qemu_sem_wait(&mis->thread_sync_sem); - qemu_sem_destroy(&mis->thread_sync_sem); + qemu_event_wait(&mis->thread_sync_event); + qemu_event_destroy(&mis->thread_sync_event); } /* Postcopy needs to detect accesses to pages that haven't yet been copied @@ -964,7 +964,7 @@ static void *postcopy_ram_fault_thread(void *opaque) trace_postcopy_ram_fault_thread_entry(); rcu_register_thread(); mis->last_rb = NULL; /* last RAMBlock we sent part of */ - qemu_sem_post(&mis->thread_sync_sem); + qemu_event_set(&mis->thread_sync_event); struct pollfd *pfd; size_t pfd_len = 2 + mis->postcopy_remote_fds->len; @@ -1716,7 +1716,7 @@ void *postcopy_preempt_thread(void *opaque) rcu_register_thread(); - qemu_sem_post(&mis->thread_sync_sem); + qemu_event_set(&mis->thread_sync_event); /* * The preempt channel is established in asynchronous way. Wait diff --git a/migration/ram.c b/migration/ram.c index e12913b..d26dbd3 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1993,9 +1993,8 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss) } } - if (migrate_multifd()) { - RAMBlock *block = pss->block; - return ram_save_multifd_page(block, offset); + if (migrate_multifd() && !migration_in_postcopy()) { + return ram_save_multifd_page(pss->block, offset); } return ram_save_page(rs, pss); diff --git a/migration/savevm.c b/migration/savevm.c index 006514c..bb04a45 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -266,7 +266,7 @@ typedef struct SaveState { static SaveState savevm_state = { .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers), - .handler_pri_head = { [MIG_PRI_DEFAULT ... MIG_PRI_MAX] = NULL }, + .handler_pri_head = { [0 ... MIG_PRI_MAX] = NULL }, .global_section_id = 0, }; @@ -737,7 +737,7 @@ static int calculate_compat_instance_id(const char *idstr) static inline MigrationPriority save_state_priority(SaveStateEntry *se) { - if (se->vmsd) { + if (se->vmsd && se->vmsd->priority) { return se->vmsd->priority; } return MIG_PRI_DEFAULT; @@ -2078,7 +2078,7 @@ static void *postcopy_ram_listen_thread(void *opaque) migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_POSTCOPY_ACTIVE); - qemu_sem_post(&mis->thread_sync_sem); + qemu_event_set(&mis->thread_sync_event); trace_postcopy_ram_listen_thread_start(); rcu_register_thread(); diff --git a/nbd/client-connection.c b/nbd/client-connection.c index b11e266..79ea97e 100644 --- a/nbd/client-connection.c +++ b/nbd/client-connection.c @@ -31,6 +31,8 @@ #include "qapi/clone-visitor.h" #include "qemu/coroutine.h" +#include "nbd/nbd-internal.h" + struct NBDClientConnection { /* Initialization constants, never change */ SocketAddress *saddr; /* address to connect to */ @@ -140,6 +142,7 @@ static int nbd_connect(QIOChannelSocket *sioc, SocketAddress *addr, return ret; } + nbd_set_socket_send_buffer(sioc); qio_channel_set_delay(QIO_CHANNEL(sioc), false); if (!info) { diff --git a/nbd/common.c b/nbd/common.c index 589a748..2a133a6 100644 --- a/nbd/common.c +++ b/nbd/common.c @@ -18,6 +18,9 @@ #include "qemu/osdep.h" #include "trace.h" +#include "io/channel-socket.h" +#include "qapi/error.h" +#include "qemu/units.h" #include "nbd-internal.h" /* Discard length bytes from channel. Return -errno on failure and 0 on @@ -264,3 +267,26 @@ const char *nbd_mode_lookup(NBDMode mode) return "<unknown>"; } } + +/* + * Testing shows that 2m send buffer is optimal. Changing the receive buffer + * size has no effect on performance. + * On Linux we need to increase net.core.wmem_max to make this effective. + */ +#if defined(__APPLE__) || defined(__linux__) +#define UNIX_STREAM_SOCKET_SEND_BUFFER_SIZE (2 * MiB) +#endif + +void nbd_set_socket_send_buffer(QIOChannelSocket *sioc) +{ +#ifdef UNIX_STREAM_SOCKET_SEND_BUFFER_SIZE + if (sioc->localAddr.ss_family == AF_UNIX) { + size_t size = UNIX_STREAM_SOCKET_SEND_BUFFER_SIZE; + Error *errp = NULL; + + if (qio_channel_socket_set_send_buffer(sioc, size, &errp) < 0) { + warn_report_err(errp); + } + } +#endif /* UNIX_STREAM_SOCKET_SEND_BUFFER_SIZE */ +} diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h index 715d92d..6bafeef 100644 --- a/nbd/nbd-internal.h +++ b/nbd/nbd-internal.h @@ -74,4 +74,9 @@ static inline int nbd_write(QIOChannel *ioc, const void *buffer, size_t size, int nbd_drop(QIOChannel *ioc, size_t size, Error **errp); +/* nbd_set_socket_send_buffer + * Set the socket send buffer size for optimal performance. + */ +void nbd_set_socket_send_buffer(QIOChannelSocket *sioc); + #endif diff --git a/nbd/server.c b/nbd/server.c index 2076fb2..d242be9 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -3291,6 +3291,8 @@ void nbd_client_new(QIOChannelSocket *sioc, client->close_fn = close_fn; client->owner = owner; + nbd_set_socket_send_buffer(sioc); + co = qemu_coroutine_create(nbd_co_client_start, client); qemu_coroutine_enter(co); } diff --git a/net/socket.c b/net/socket.c index 8e3702e..784dda6 100644 --- a/net/socket.c +++ b/net/socket.c @@ -157,7 +157,7 @@ static void net_socket_send(void *opaque) NetSocketState *s = opaque; int size; int ret; - uint8_t buf1[NET_BUFSIZE]; + QEMU_UNINITIALIZED uint8_t buf1[NET_BUFSIZE]; const uint8_t *buf; size = recv(s->fd, buf1, sizeof(buf1), 0); diff --git a/net/stream.c b/net/stream.c index 4de5613..6152d2a 100644 --- a/net/stream.c +++ b/net/stream.c @@ -148,7 +148,7 @@ static gboolean net_stream_send(QIOChannel *ioc, NetStreamState *s = data; int size; int ret; - char buf1[NET_BUFSIZE]; + QEMU_UNINITIALIZED char buf1[NET_BUFSIZE]; const char *buf; size = qio_channel_read(s->ioc, buf1, sizeof(buf1), NULL); diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 7ca8b46..58d7389 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -235,6 +235,7 @@ static void vhost_vdpa_cleanup(NetClientState *nc) return; } qemu_close(s->vhost_vdpa.shared->device_fd); + g_clear_pointer(&s->vhost_vdpa.shared->iova_tree, vhost_iova_tree_delete); g_free(s->vhost_vdpa.shared); } @@ -362,14 +363,8 @@ static int vdpa_net_migration_state_notifier(NotifierWithReturn *notifier, static void vhost_vdpa_net_data_start_first(VhostVDPAState *s) { - struct vhost_vdpa *v = &s->vhost_vdpa; - migration_add_notifier(&s->migration_state, vdpa_net_migration_state_notifier); - if (v->shadow_vqs_enabled) { - v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first, - v->shared->iova_range.last); - } } static int vhost_vdpa_net_data_start(NetClientState *nc) @@ -416,19 +411,12 @@ static int vhost_vdpa_net_data_load(NetClientState *nc) static void vhost_vdpa_net_client_stop(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); - struct vhost_dev *dev; assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); if (s->vhost_vdpa.index == 0) { migration_remove_notifier(&s->migration_state); } - - dev = s->vhost_vdpa.dev; - if (dev->vq_index + dev->nvqs == dev->vq_index_end) { - g_clear_pointer(&s->vhost_vdpa.shared->iova_tree, - vhost_iova_tree_delete); - } } static NetClientInfo net_vhost_vdpa_info = { @@ -600,24 +588,6 @@ out: return 0; } - /* - * If other vhost_vdpa already have an iova_tree, reuse it for simplicity, - * whether CVQ shares ASID with guest or not, because: - * - Memory listener need access to guest's memory addresses allocated in - * the IOVA tree. - * - There should be plenty of IOVA address space for both ASID not to - * worry about collisions between them. Guest's translations are still - * validated with virtio virtqueue_pop so there is no risk for the guest - * to access memory that it shouldn't. - * - * To allocate a iova tree per ASID is doable but it complicates the code - * and it is not worth it for the moment. - */ - if (!v->shared->iova_tree) { - v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first, - v->shared->iova_range.last); - } - r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len(), false); if (unlikely(r < 0)) { @@ -1726,6 +1696,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, s->vhost_vdpa.shared->device_fd = vdpa_device_fd; s->vhost_vdpa.shared->iova_range = iova_range; s->vhost_vdpa.shared->shadow_data = svq; + s->vhost_vdpa.shared->iova_tree = vhost_iova_tree_new(iova_range.first, + iova_range.last); } else if (!is_datapath) { s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), PROT_READ | PROT_WRITE, diff --git a/pc-bios/bios-256k.bin b/pc-bios/bios-256k.bin Binary files differindex 48c3707..509f398 100644 --- a/pc-bios/bios-256k.bin +++ b/pc-bios/bios-256k.bin diff --git a/pc-bios/bios-microvm.bin b/pc-bios/bios-microvm.bin Binary files differindex c98351e..4870015 100644 --- a/pc-bios/bios-microvm.bin +++ b/pc-bios/bios-microvm.bin diff --git a/pc-bios/bios.bin b/pc-bios/bios.bin Binary files differindex 7e2d062..4b81a96 100644 --- a/pc-bios/bios.bin +++ b/pc-bios/bios.bin diff --git a/pc-bios/dtb/meson.build b/pc-bios/dtb/meson.build index 7a71835..9930329 100644 --- a/pc-bios/dtb/meson.build +++ b/pc-bios/dtb/meson.build @@ -9,7 +9,7 @@ dtc = find_program('dtc', required: false) if dtc.found() foreach out : dtbs f = fs.replace_suffix(out, '.dts') - custom_target(f, + custom_target(out, build_by_default: have_system, input: files(f), output: out, diff --git a/pc-bios/meson.build b/pc-bios/meson.build index 79bb2e1..3c41620 100644 --- a/pc-bios/meson.build +++ b/pc-bios/meson.build @@ -88,7 +88,7 @@ blobs = [ ] if get_option('install_blobs') - install_data(blobs, install_dir: qemu_datadir) + install_data(blobs, install_dir: qemu_datadir, install_mode: 'rw-r--r--') endif subdir('descriptors') diff --git a/pc-bios/vgabios-ati.bin b/pc-bios/vgabios-ati.bin Binary files differindex e10cd26..011359e 100644 --- a/pc-bios/vgabios-ati.bin +++ b/pc-bios/vgabios-ati.bin diff --git a/pc-bios/vgabios-bochs-display.bin b/pc-bios/vgabios-bochs-display.bin Binary files differindex 416036d..1d233af 100644 --- a/pc-bios/vgabios-bochs-display.bin +++ b/pc-bios/vgabios-bochs-display.bin diff --git a/pc-bios/vgabios-cirrus.bin b/pc-bios/vgabios-cirrus.bin Binary files differindex 4ffaa43..f7b06f2 100644 --- a/pc-bios/vgabios-cirrus.bin +++ b/pc-bios/vgabios-cirrus.bin diff --git a/pc-bios/vgabios-qxl.bin b/pc-bios/vgabios-qxl.bin Binary files differindex 1b7a383..50dfeb2 100644 --- a/pc-bios/vgabios-qxl.bin +++ b/pc-bios/vgabios-qxl.bin diff --git a/pc-bios/vgabios-ramfb.bin b/pc-bios/vgabios-ramfb.bin Binary files differindex dba6cb8..b72279f 100644 --- a/pc-bios/vgabios-ramfb.bin +++ b/pc-bios/vgabios-ramfb.bin diff --git a/pc-bios/vgabios-stdvga.bin b/pc-bios/vgabios-stdvga.bin Binary files differindex 0d541c5..5b48ca8 100644 --- a/pc-bios/vgabios-stdvga.bin +++ b/pc-bios/vgabios-stdvga.bin diff --git a/pc-bios/vgabios-virtio.bin b/pc-bios/vgabios-virtio.bin Binary files differindex 2ce3557..f580c33 100644 --- a/pc-bios/vgabios-virtio.bin +++ b/pc-bios/vgabios-virtio.bin diff --git a/pc-bios/vgabios-vmware.bin b/pc-bios/vgabios-vmware.bin Binary files differindex b7cab15..03b6dbd 100644 --- a/pc-bios/vgabios-vmware.bin +++ b/pc-bios/vgabios-vmware.bin diff --git a/pc-bios/vgabios.bin b/pc-bios/vgabios.bin Binary files differindex ee748f6..3f71aae 100644 --- a/pc-bios/vgabios.bin +++ b/pc-bios/vgabios.bin diff --git a/plugins/meson.build b/plugins/meson.build index 5383c7b..62c991d 100644 --- a/plugins/meson.build +++ b/plugins/meson.build @@ -33,7 +33,7 @@ if host_os == 'windows' input: qemu_plugin_symbols, output: 'qemu_plugin_api.def', capture: true, - command: ['sed', '-e', '0,/^/s//EXPORTS/; s/[{};]//g', '@INPUT@']) + command: [python, '-c', 'import fileinput, re; print("EXPORTS", end=""); [print(re.sub(r"[{};]", "", line), end="") for line in fileinput.input()]', '@INPUT@']) # then use dlltool to assemble a delaylib. # The delaylib will have an "imaginary" name (qemu.exe), that is used by the @@ -61,8 +61,8 @@ endif user_ss.add(files('user.c', 'api-user.c')) system_ss.add(files('system.c', 'api-system.c')) -libuser_ss.add(files('api.c', 'core.c')) -libsystem_ss.add(files('api.c', 'core.c')) +user_ss.add(files('api.c', 'core.c')) +system_ss.add(files('api.c', 'core.c')) common_ss.add(files('loader.c')) diff --git a/python/scripts/vendor.py b/python/scripts/vendor.py index 0405e91..b47db00 100755 --- a/python/scripts/vendor.py +++ b/python/scripts/vendor.py @@ -41,8 +41,8 @@ def main() -> int: parser.parse_args() packages = { - "meson==1.5.0": - "52b34f4903b882df52ad0d533146d4b992c018ea77399f825579737672ae7b20", + "meson==1.8.1": + "374bbf71247e629475fc10b0bd2ef66fc418c2d8f4890572f74de0f97d0d42da", } vendor_dir = Path(__file__, "..", "..", "wheels").resolve() diff --git a/python/setup.cfg b/python/setup.cfg index c48dff2..d7f5dc7 100644 --- a/python/setup.cfg +++ b/python/setup.cfg @@ -46,6 +46,7 @@ devel = urwid >= 2.1.2 urwid-readline >= 0.13 Pygments >= 2.9.0 + sphinx >= 3.4.3 # Provides qom-fuse functionality fuse = @@ -78,7 +79,6 @@ exclude = __pycache__, [mypy] strict = True python_version = 3.9 -warn_unused_configs = True namespace_packages = True warn_unused_ignores = False diff --git a/python/tests/minreqs.txt b/python/tests/minreqs.txt index 6445407..cd2e2a8 100644 --- a/python/tests/minreqs.txt +++ b/python/tests/minreqs.txt @@ -11,6 +11,15 @@ # When adding new dependencies, pin the very oldest non-yanked version # on PyPI that allows the test suite to pass. +# For some reason, the presence of packaging==14.0 below requires us to +# also pin setuptools to version 70 or below. Otherwise, the +# installation of the QEMU package itself fails, failing to find +# setuptools. +setuptools<=70 + +# Dependencies for qapidoc/qapi_domain et al +sphinx==3.4.3 + # Dependencies for the TUI addon (Required for successful linting) urwid==2.1.2 urwid-readline==0.13 @@ -38,10 +47,32 @@ pyflakes==2.5.0 # Transitive mypy dependencies mypy-extensions==1.0.0 +tomli==1.1.0 typing-extensions==4.7.1 # Transitive pylint dependencies astroid==2.15.4 +dill==0.2 lazy-object-proxy==1.4.0 +platformdirs==2.2.0 toml==0.10.0 +tomlkit==0.10.1 wrapt==1.14.0 + +# Transitive sphinx dependencies +Jinja2==2.7 +MarkupSafe==1.1.0 +alabaster==0.7.1 +babel==1.3 +docutils==0.12 +imagesize==0.5.0 +packaging==14.0 +pytz==2011b0 +requests==2.5.0 +snowballstemmer==1.1 +sphinxcontrib-applehelp==1.0.0 +sphinxcontrib-devhelp==1.0.0 +sphinxcontrib-htmlhelp==1.0.0 +sphinxcontrib-jsmath==1.0.0 +sphinxcontrib-qthelp==1.0.0 +sphinxcontrib-serializinghtml==1.0.0 diff --git a/python/tests/qapi-flake8.sh b/python/tests/qapi-flake8.sh new file mode 100755 index 0000000..c69f9ea --- /dev/null +++ b/python/tests/qapi-flake8.sh @@ -0,0 +1,6 @@ +#!/bin/sh -e +# SPDX-License-Identifier: GPL-2.0-or-later + +python3 -m flake8 ../scripts/qapi/ \ + ../docs/sphinx/qapidoc.py \ + ../docs/sphinx/qapi_domain.py diff --git a/python/tests/qapi-isort.sh b/python/tests/qapi-isort.sh new file mode 100755 index 0000000..78dd947 --- /dev/null +++ b/python/tests/qapi-isort.sh @@ -0,0 +1,8 @@ +#!/bin/sh -e +# SPDX-License-Identifier: GPL-2.0-or-later + +python3 -m isort --sp . -c ../scripts/qapi/ +# Force isort to recognize "compat" as a local module and not third-party +python3 -m isort --sp . -c -p compat -p qapidoc_legacy \ + ../docs/sphinx/qapi_domain.py \ + ../docs/sphinx/qapidoc.py diff --git a/python/tests/qapi-mypy.sh b/python/tests/qapi-mypy.sh new file mode 100755 index 0000000..363dbaf --- /dev/null +++ b/python/tests/qapi-mypy.sh @@ -0,0 +1,4 @@ +#!/bin/sh -e +# SPDX-License-Identifier: GPL-2.0-or-later + +python3 -m mypy ../scripts/qapi diff --git a/python/tests/qapi-pylint.sh b/python/tests/qapi-pylint.sh new file mode 100755 index 0000000..8767d9d --- /dev/null +++ b/python/tests/qapi-pylint.sh @@ -0,0 +1,8 @@ +#!/bin/sh -e +# SPDX-License-Identifier: GPL-2.0-or-later + +SETUPTOOLS_USE_DISTUTILS=stdlib python3 -m pylint \ + --rcfile=../scripts/qapi/pylintrc \ + ../scripts/qapi/ \ + ../docs/sphinx/qapidoc.py \ + ../docs/sphinx/qapi_domain.py diff --git a/python/wheels/meson-1.5.0-py3-none-any.whl b/python/wheels/meson-1.5.0-py3-none-any.whl Binary files differdeleted file mode 100644 index c7edeb3..0000000 --- a/python/wheels/meson-1.5.0-py3-none-any.whl +++ /dev/null diff --git a/python/wheels/meson-1.8.1-py3-none-any.whl b/python/wheels/meson-1.8.1-py3-none-any.whl Binary files differnew file mode 100644 index 0000000..a885f0e --- /dev/null +++ b/python/wheels/meson-1.8.1-py3-none-any.whl diff --git a/pythondeps.toml b/pythondeps.toml index 7eaaa0f..7884ab5 100644 --- a/pythondeps.toml +++ b/pythondeps.toml @@ -19,7 +19,7 @@ [meson] # The install key should match the version in python/wheels/ -meson = { accepted = ">=1.5.0", installed = "1.5.0", canary = "meson" } +meson = { accepted = ">=1.5.0", installed = "1.8.1", canary = "meson" } pycotap = { accepted = ">=1.1.0", installed = "1.3.1" } [docs] diff --git a/qapi/acpi.json b/qapi/acpi.json index 045dab6..2d53b82 100644 --- a/qapi/acpi.json +++ b/qapi/acpi.json @@ -80,7 +80,7 @@ ## # @ACPIOSTInfo: # -# OSPM Status Indication for a device For description of possible +# OSPM Status Indication for a device. For description of possible # values of @source and @status fields see "_OST (OSPM Status # Indication)" chapter of ACPI5.0 spec. # diff --git a/qapi/audio.json b/qapi/audio.json index dd5a58d..16de231 100644 --- a/qapi/audio.json +++ b/qapi/audio.json @@ -96,7 +96,7 @@ # @period-length: the period length in microseconds # # @try-poll: attempt to use poll mode, falling back to non-polling -# access on failure (default true) +# access on failure (default false) # # Since: 4.0 ## @@ -309,9 +309,9 @@ # # @name: name of the sink/source to use # -# @stream-name: name of the PulseAudio stream created by qemu. Can be +# @stream-name: name of the PulseAudio stream created by QEMU. Can be # used to identify the stream in PulseAudio when you create -# multiple PulseAudio devices or run multiple qemu instances +# multiple PulseAudio devices or run multiple QEMU instances # (default: audiodev's id, since 4.2) # # @latency: latency you want PulseAudio to achieve in microseconds @@ -353,9 +353,9 @@ # # @name: name of the sink/source to use # -# @stream-name: name of the PipeWire stream created by qemu. Can be +# @stream-name: name of the PipeWire stream created by QEMU. Can be # used to identify the stream in PipeWire when you create multiple -# PipeWire devices or run multiple qemu instances (default: +# PipeWire devices or run multiple QEMU instances (default: # audiodev's id) # # @latency: latency you want PipeWire to achieve in microseconds @@ -533,7 +533,7 @@ ## # @query-audiodevs: # -# Returns information about audiodev configuration +# Return information about audiodev configuration # # Returns: array of @Audiodev # diff --git a/qapi/block-core.json b/qapi/block-core.json index b411511..1df6644 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -31,8 +31,8 @@ # @icount: Current instruction count. Appears when execution # record/replay is enabled. Used for "time-traveling" to match # the moment in the recorded execution with the snapshots. This -# counter may be obtained through @query-replay command (since -# 5.2) +# counter may be obtained through @query-replay command +# (since 5.2) # # Since: 1.3 ## @@ -488,7 +488,7 @@ # # @active: true if the backend is active; typical cases for inactive backends # are on the migration source instance after migration completes and on the -# destination before it completes. (since: 10.0) +# destination before it completes. (since: 10.0) # # @encrypted: true if the backing device is encrypted # @@ -510,11 +510,11 @@ # # @bps_max: total throughput limit during bursts, in bytes (Since 1.7) # -# @bps_rd_max: read throughput limit during bursts, in bytes (Since -# 1.7) +# @bps_rd_max: read throughput limit during bursts, in bytes +# (Since 1.7) # -# @bps_wr_max: write throughput limit during bursts, in bytes (Since -# 1.7) +# @bps_wr_max: write throughput limit during bursts, in bytes +# (Since 1.7) # # @iops_max: total I/O operations per second during bursts, in bytes # (Since 1.7) @@ -951,11 +951,11 @@ # @unmap_operations: The number of unmap operations performed by the # device (Since 4.2) # -# @rd_total_time_ns: Total time spent on reads in nanoseconds (since -# 0.15). +# @rd_total_time_ns: Total time spent on reads in nanoseconds +# (since 0.15) # -# @wr_total_time_ns: Total time spent on writes in nanoseconds (since -# 0.15). +# @wr_total_time_ns: Total time spent on writes in nanoseconds +# (since 0.15) # # @zone_append_total_time_ns: Total time spent on zone append writes # in nanoseconds (since 8.1) @@ -1322,8 +1322,8 @@ # @incremental: only copy data described by the dirty bitmap. # (since: 2.4) # -# @bitmap: only copy data described by the dirty bitmap. (since: 4.2) -# Behavior on completion is determined by the BitmapSyncMode. +# @bitmap: only copy data described by the dirty bitmap. Behavior on +# completion is determined by the BitmapSyncMode. (since: 4.2) # # Since: 1.3 ## @@ -1337,7 +1337,7 @@ # bitmap when used for data copy operations. # # @on-success: The bitmap is only synced when the operation is -# successful. This is the behavior always used for 'INCREMENTAL' +# successful. This is the behavior always used for incremental # backups. # # @never: The bitmap is never synchronized with the operation, and is @@ -1417,8 +1417,8 @@ # @auto-finalize: Job will finalize itself when PENDING, moving to the # CONCLUDED state. (since 2.12) # -# @auto-dismiss: Job will dismiss itself when CONCLUDED, moving to the -# NULL state and disappearing from the query list. (since 2.12) +# @auto-dismiss: Job will dismiss itself when CONCLUDED, and +# disappear. (since 2.12) # # @error: Error information if the job did not complete successfully. # Not set if the job completed successfully. (since 2.12.1) @@ -1502,15 +1502,15 @@ # # @device: the name of the device to take a snapshot of. # -# @node-name: graph node name to generate the snapshot from (Since -# 2.0) +# @node-name: graph node name to generate the snapshot from +# (Since 2.0) # # @snapshot-file: the target of the new overlay image. If the file # exists, or if it is a device, the overlay will be created in the # existing file/device. Otherwise, a new file will be created. # -# @snapshot-node-name: the graph node name of the new image (Since -# 2.0) +# @snapshot-node-name: the graph node name of the new image +# (Since 2.0) # # @format: the format of the overlay image, default is 'qcow2'. # @@ -1589,7 +1589,7 @@ # # @bitmap-mode: Specifies the type of data the bitmap should contain # after the operation concludes. Must be present if a bitmap was -# provided, Must NOT be present otherwise. (Since 4.2) +# provided, must **not** be present otherwise. (Since 4.2) # # @compress: true to compress data, if the target format supports it. # (default: false) (since 2.8) @@ -1606,16 +1606,15 @@ # copy-before-write jobs; defaults to break-guest-write. (Since 10.1) # # @auto-finalize: When false, this job will wait in a PENDING state -# after it has finished its work, waiting for @block-job-finalize -# before making any block graph changes. When true, this job will +# after it has finished its work, waiting for @job-finalize before +# making any block graph changes. When true, this job will # automatically perform its abort or commit actions. Defaults to # true. (Since 2.12) # # @auto-dismiss: When false, this job will wait in a CONCLUDED state # after it has completely ceased all work, and awaits -# @block-job-dismiss. When true, this job will automatically -# disappear from the query list without user intervention. -# Defaults to true. (Since 2.12) +# @job-dismiss. When true, this job will automatically disappear +# without user intervention. Defaults to true. (Since 2.12) # # @filter-node-name: the node name that should be assigned to the # filter driver that the backup job inserts into the graph above @@ -1785,8 +1784,7 @@ # If top == base, that is an error. If top has no overlays on top of # it, or if it is in use by a writer, the job will not be completed by # itself. The user needs to complete the job with the -# block-job-complete command after getting the ready event. (Since -# 2.0) +# job-complete command after getting the ready event. (Since 2.0) # # If the base image is smaller than top, then the base image will be # resized to be the same size as top. If top is smaller than the base @@ -1840,7 +1838,7 @@ # @speed: the maximum speed, in bytes per second # # @on-error: the action to take on an error. 'ignore' means that the -# request should be retried. (default: report; Since: 5.0) +# request should be retried. (default: report; since: 5.0) # # @filter-node-name: the node name that should be assigned to the # filter driver that the commit job inserts into the graph above @@ -1848,16 +1846,15 @@ # autogenerated. (Since: 2.9) # # @auto-finalize: When false, this job will wait in a PENDING state -# after it has finished its work, waiting for @block-job-finalize -# before making any block graph changes. When true, this job will +# after it has finished its work, waiting for @job-finalize before +# making any block graph changes. When true, this job will # automatically perform its abort or commit actions. Defaults to # true. (Since 3.1) # # @auto-dismiss: When false, this job will wait in a CONCLUDED state # after it has completely ceased all work, and awaits -# @block-job-dismiss. When true, this job will automatically -# disappear from the query list without user intervention. -# Defaults to true. (Since 3.1) +# @job-dismiss. When true, this job will automatically disappear +# without user intervention. Defaults to true. (Since 3.1) # # Features: # @@ -1895,7 +1892,7 @@ # The status of ongoing drive-backup operations can be checked with # query-block-jobs where the BlockJobInfo.type field has the value # 'backup'. The operation can be stopped before it has completed -# using the block-job-cancel command. +# using the job-cancel or block-job-cancel command. # # Features: # @@ -1926,7 +1923,7 @@ # The status of ongoing blockdev-backup operations can be checked with # query-block-jobs where the BlockJobInfo.type field has the value # 'backup'. The operation can be stopped before it has completed -# using the block-job-cancel command. +# using the job-cancel or block-job-cancel command. # # Errors: # - If @device is not a valid block device, DeviceNotFound @@ -2030,7 +2027,7 @@ # # @id: Block graph node identifier. This @id is generated only for # x-debug-query-block-graph and does not relate to any other -# identifiers in Qemu. +# identifiers in QEMU. # # @type: Type of graph node. Can be one of block-backend, block-job # or block-driver-state. @@ -2169,8 +2166,8 @@ # @format: the format of the new destination, default is to probe if # @mode is 'existing', else the format of the source # -# @node-name: the new block driver state node name in the graph (Since -# 2.1) +# @node-name: the new block driver state node name in the graph +# (Since 2.1) # # @replaces: with sync=full graph node name to be replaced by the new # image when a whole image copy is done. This can be used to @@ -2212,16 +2209,15 @@ # 'background' (Since: 3.0) # # @auto-finalize: When false, this job will wait in a PENDING state -# after it has finished its work, waiting for @block-job-finalize -# before making any block graph changes. When true, this job will +# after it has finished its work, waiting for @job-finalize before +# making any block graph changes. When true, this job will # automatically perform its abort or commit actions. Defaults to # true. (Since 3.1) # # @auto-dismiss: When false, this job will wait in a CONCLUDED state # after it has completely ceased all work, and awaits -# @block-job-dismiss. When true, this job will automatically -# disappear from the query list without user intervention. -# Defaults to true. (Since 3.1) +# @job-dismiss. When true, this job will automatically disappear +# without user intervention. Defaults to true. (Since 3.1) # # Since: 1.3 ## @@ -2531,16 +2527,15 @@ # 'background' (Since: 3.0) # # @auto-finalize: When false, this job will wait in a PENDING state -# after it has finished its work, waiting for @block-job-finalize -# before making any block graph changes. When true, this job will +# after it has finished its work, waiting for @job-finalize before +# making any block graph changes. When true, this job will # automatically perform its abort or commit actions. Defaults to # true. (Since 3.1) # # @auto-dismiss: When false, this job will wait in a CONCLUDED state # after it has completely ceased all work, and awaits -# @block-job-dismiss. When true, this job will automatically -# disappear from the query list without user intervention. -# Defaults to true. (Since 3.1) +# @job-dismiss. When true, this job will automatically disappear +# without user intervention. Defaults to true. (Since 3.1) # # @target-is-zero: Assume the destination reads as all zeroes before # the mirror started. Setting this to true can speed up the @@ -2593,11 +2588,11 @@ # # @bps_max: total throughput limit during bursts, in bytes (Since 1.7) # -# @bps_rd_max: read throughput limit during bursts, in bytes (Since -# 1.7) +# @bps_rd_max: read throughput limit during bursts, in bytes +# (Since 1.7) # -# @bps_wr_max: write throughput limit during bursts, in bytes (Since -# 1.7) +# @bps_wr_max: write throughput limit during bursts, in bytes +# (Since 1.7) # # @iops_max: total I/O operations per second during bursts, in bytes # (Since 1.7) @@ -2667,7 +2662,7 @@ # @iops-total-max: I/O operations burst # # @iops-total-max-length: length of the iops-total-max burst period, -# in seconds It must only be set if @iops-total-max is set as +# in seconds. It must only be set if @iops-total-max is set as # well. # # @iops-read: limit read operations per second @@ -2675,14 +2670,14 @@ # @iops-read-max: I/O operations read burst # # @iops-read-max-length: length of the iops-read-max burst period, in -# seconds It must only be set if @iops-read-max is set as well. +# seconds. It must only be set if @iops-read-max is set as well. # # @iops-write: limit write operations per second # # @iops-write-max: I/O operations write burst # # @iops-write-max-length: length of the iops-write-max burst period, -# in seconds It must only be set if @iops-write-max is set as +# in seconds. It must only be set if @iops-write-max is set as # well. # # @bps-total: limit total bytes per second @@ -2697,14 +2692,14 @@ # @bps-read-max: total bytes read burst # # @bps-read-max-length: length of the bps-read-max burst period, in -# seconds It must only be set if @bps-read-max is set as well. +# seconds. It must only be set if @bps-read-max is set as well. # # @bps-write: limit write bytes per second # # @bps-write-max: total bytes write burst # # @bps-write-max-length: length of the bps-write-max burst period, in -# seconds It must only be set if @bps-write-max is set as well. +# seconds. It must only be set if @bps-write-max is set as well. # # @iops-size: when limiting by iops max size of an I/O in bytes # @@ -2789,12 +2784,12 @@ # immediately once streaming has started. The status of ongoing block # streaming operations can be checked with query-block-jobs. The # operation can be stopped before it has completed using the -# block-job-cancel command. +# job-cancel or block-job-cancel command. # # The node that receives the data is called the top image, can be # located in any part of the chain (but always above the base image; # see below) and can be specified using its device or node name. -# Earlier qemu versions only allowed 'device' to name the top level +# Earlier QEMU versions only allowed 'device' to name the top level # node; presence of the 'base-node' parameter during introspection can # be used as a witness of the enhanced semantics of 'device'. # @@ -2859,16 +2854,15 @@ # autogenerated. (Since: 6.0) # # @auto-finalize: When false, this job will wait in a PENDING state -# after it has finished its work, waiting for @block-job-finalize -# before making any block graph changes. When true, this job will +# after it has finished its work, waiting for @job-finalize before +# making any block graph changes. When true, this job will # automatically perform its abort or commit actions. Defaults to # true. (Since 3.1) # # @auto-dismiss: When false, this job will wait in a CONCLUDED state # after it has completely ceased all work, and awaits -# @block-job-dismiss. When true, this job will automatically -# disappear from the query list without user intervention. -# Defaults to true. (Since 3.1) +# @job-dismiss. When true, this job will automatically disappear +# without user intervention. Defaults to true. (Since 3.1) # # Errors: # - If @device does not exist, DeviceNotFound. @@ -3030,10 +3024,10 @@ # state. Completing the job in any other state is an error. # # This is supported only for drive mirroring, where it also switches -# the device to write to the target path only. Note that drive +# the device to write to the target path only. Note that drive # mirroring includes drive-mirror, blockdev-mirror and block-commit # job (only in case of "active commit", when the node being commited -# is used by the guest). The ability to complete is signaled with a +# is used by the guest). The ability to complete is signaled with a # BLOCK_JOB_READY event. # # This command completes an active background block operation @@ -3068,16 +3062,16 @@ # # Deletes a job that is in the CONCLUDED state. This command only # needs to be run explicitly for jobs that don't have automatic -# dismiss enabled. In turn, automatic dismiss may be enabled only +# dismiss enabled. In turn, automatic dismiss may be enabled only # for jobs that have @auto-dismiss option, which are drive-backup, # blockdev-backup, drive-mirror, blockdev-mirror, block-commit and -# block-stream. @auto-dismiss is enabled by default for these +# block-stream. @auto-dismiss is enabled by default for these # jobs. # # This command will refuse to operate on any job that has not yet -# reached its terminal state, JOB_STATUS_CONCLUDED. For jobs that -# make use of the BLOCK_JOB_READY event, block-job-cancel or -# block-job-complete will still need to be used as appropriate. +# reached its terminal state, CONCLUDED. For jobs that make use of +# the BLOCK_JOB_READY event, job-cancel, block-job-cancel or +# job-complete will still need to be used as appropriate. # # @id: The job identifier. # @@ -3196,7 +3190,7 @@ # # Selects the AIO backend to handle I/O requests # -# @threads: Use qemu's thread pool +# @threads: Use QEMU's thread pool # # @native: Use native AIO backend (only Linux and Windows) # @@ -3415,8 +3409,8 @@ # Driver specific block device options for LUKS. # # @key-secret: the ID of a QCryptoSecret object providing the -# decryption key (since 2.6). Mandatory except when doing a -# metadata-only probe of the image. +# decryption key. Mandatory except when doing a metadata-only +# probe of the image. (since 2.6) # # @header: block device holding a detached LUKS header. (since 9.0) # @@ -3655,8 +3649,8 @@ # this feature. (since 2.5) # # @encrypt: Image decryption options. Mandatory for encrypted images, -# except when doing a metadata-only probe of the image. (since -# 2.10) +# except when doing a metadata-only probe of the image. +# (since 2.10) # # @data-file: reference to or definition of the external data file. # This may only be specified for images that require an external @@ -4326,8 +4320,8 @@ # @user: Ceph id name. # # @auth-client-required: Acceptable authentication modes. This maps -# to Ceph configuration option "auth_client_required". (Since -# 3.0) +# to Ceph configuration option "auth_client_required". +# (Since 3.0) # # @key-secret: ID of a QCryptoSecret object providing a key for cephx # authentication. This maps to Ceph configuration option "key". @@ -4581,8 +4575,8 @@ # error. During the first @reconnect-delay seconds, all requests # are paused and will be rerun on a successful reconnect. After # that time, any delayed requests and all future requests before a -# successful reconnect will immediately fail. Default 0 (Since -# 4.2) +# successful reconnect will immediately fail. Default 0 +# (Since 4.2) # # @open-timeout: In seconds. If zero, the nbd driver tries the # connection only once, and fails to open if the connection fails. @@ -4724,11 +4718,11 @@ # # @driver: block driver name # -# @node-name: the node name of the new node (Since 2.0). This option -# is required on the top level of blockdev-add. Valid node names -# start with an alphabetic character and may contain only -# alphanumeric characters, '-', '.' and '_'. Their maximum length -# is 31 characters. +# @node-name: the node name of the new node. This option is required +# on the top level of blockdev-add. Valid node names start with +# an alphabetic character and may contain only alphanumeric +# characters, '-', '.' and '_'. Their maximum length is 31 +# characters. (Since 2.0) # # @discard: discard-related options (default: ignore) # @@ -4737,7 +4731,7 @@ # @active: whether the block node should be activated (default: true). # Having inactive block nodes is useful primarily for migration because it # allows opening an image on the destination while the source is still -# holding locks for it. (Since 10.0) +# holding locks for it. (Since 10.0) # # @read-only: whether the block device should be read-only (default: # false). Note that some block drivers support only read-only @@ -4947,7 +4941,7 @@ # 3) A reference to a different node: the current child is replaced # with the specified one. # -# 4) NULL: the current child (if any) is detached. +# 4) null: the current child (if any) is detached. # # Options (1) and (2) are supported in all cases. Option (3) is # supported for @file and @backing, and option (4) for @backing only. @@ -4999,14 +4993,14 @@ ## # @blockdev-set-active: # -# Activate or inactivate a block device. Use this to manage the handover of +# Activate or inactivate a block device. Use this to manage the handover of # block devices on migration with qemu-storage-daemon. # # Activating a node automatically activates all of its child nodes first. # Inactivating a node automatically inactivates any of its child nodes that are # not in use by a still active node. # -# @node-name: Name of the graph node to activate or inactivate. By default, all +# @node-name: Name of the graph node to activate or inactivate. By default, all # nodes are affected by the operation. # # @active: true if the nodes should be active when the command returns success, @@ -5157,10 +5151,10 @@ ## # @BlockdevQcow2Version: # -# @v2: The original QCOW2 format as introduced in qemu 0.10 (version +# @v2: The original QCOW2 format as introduced in QEMU 0.10 (version # 2) # -# @v3: The extended QCOW2 format as introduced in qemu 1.1 (version 3) +# @v3: The extended QCOW2 format as introduced in QEMU 1.1 (version 3) # # Since: 2.12 ## @@ -5580,7 +5574,7 @@ # @x-blockdev-amend: # # Starts a job to amend format specific options of an existing open -# block device The job is automatically finalized, but a manual +# block device. The job is automatically finalized, but a manual # job-dismiss is required. # # @job-id: Identifier for the newly created job. @@ -5589,7 +5583,7 @@ # # @options: Options (driver specific) # -# @force: Allow unsafe operations, format specific For luks that +# @force: Allow unsafe operations, format specific. For luks that # allows erase of the last active keyslot (permanent loss of # data), and replacement of an active keyslot (possible loss of # data if IO error happens) @@ -5866,7 +5860,7 @@ # @BLOCK_JOB_PENDING: # # Emitted when a block job is awaiting explicit authorization to -# finalize graph changes via @block-job-finalize. If this job is part +# finalize graph changes via @job-finalize. If this job is part # of a transaction, it will not emit this event until the transaction # has converged first. # diff --git a/qapi/block-export.json b/qapi/block-export.json index c783e01..ed4deb5 100644 --- a/qapi/block-export.json +++ b/qapi/block-export.json @@ -169,7 +169,7 @@ # @growable: Whether writes beyond the EOF should grow the block node # accordingly. (default: false) # -# @allow-other: If this is off, only qemu's user is allowed access to +# @allow-other: If this is off, only QEMU's user is allowed access to # this export. That cannot be changed even with chmod or chown. # Enabling this option will allow other users access to the export # with the FUSE mount option "allow_other". Note that using @@ -373,9 +373,9 @@ # (since: 5.2) # # @allow-inactive: If true, the export allows the exported node to be inactive. -# If it is created for an inactive block node, the node remains inactive. If +# If it is created for an inactive block node, the node remains inactive. If # the export type doesn't support running on an inactive node, an error is -# returned. If false, inactive block nodes are automatically activated before +# returned. If false, inactive block nodes are automatically activated before # creating the export and trying to inactivate them later fails. # (since: 10.0; default: false) # diff --git a/qapi/block.json b/qapi/block.json index e66666f..1490a1a 100644 --- a/qapi/block.json +++ b/qapi/block.json @@ -48,7 +48,7 @@ ## # @FloppyDriveType: # -# Type of Floppy drive to be emulated by the Floppy Disk Controller. +# Type of floppy drive to be emulated by the Floppy Disk Controller. # # @144: 1.44MB 3.5" drive # @@ -83,7 +83,7 @@ ## # @query-pr-managers: # -# Returns a list of information about each persistent reservation +# Return a list of information about each persistent reservation # manager. # # Returns: a list of @PRManagerInfo for each persistent reservation diff --git a/qapi/char.json b/qapi/char.json index dde2f95..df6e325 100644 --- a/qapi/char.json +++ b/qapi/char.json @@ -34,7 +34,7 @@ ## # @query-chardev: # -# Returns information about current character devices. +# Return information about current character devices. # # Returns: a list of @ChardevInfo # @@ -80,7 +80,7 @@ ## # @query-chardev-backends: # -# Returns information about character device backends. +# Return information about character device backends. # # Returns: a list of @ChardevBackendInfo # @@ -274,7 +274,7 @@ # @reconnect: For a client socket, if a socket is disconnected, then # attempt a reconnect after the given number of seconds. Setting # this to zero disables this function. The use of this member is -# deprecated, use @reconnect-ms instead. (default: 0) (Since: 2.2) +# deprecated, use @reconnect-ms instead. (default: 0) (Since: 2.2) # # @reconnect-ms: For a client socket, if a socket is disconnected, # then attempt a reconnect after the given number of milliseconds. @@ -351,7 +351,7 @@ # Configuration info for stdio chardevs. # # @signal: Allow signals (such as SIGINT triggered by ^C) be delivered -# to qemu. Default: true. +# to QEMU. Default: true. # # Since: 1.5 ## @@ -443,7 +443,7 @@ ## # @ChardevQemuVDAgent: # -# Configuration info for qemu vdagent implementation. +# Configuration info for QEMU vdagent implementation. # # @mouse: enable/disable mouse, default is enabled. # @@ -656,7 +656,7 @@ ## # @ChardevQemuVDAgentWrapper: # -# @data: Configuration info for qemu vdagent implementation +# @data: Configuration info for QEMU vdagent implementation # # Since: 6.1 ## diff --git a/qapi/control.json b/qapi/control.json index 336386f..34b733f 100644 --- a/qapi/control.json +++ b/qapi/control.json @@ -91,7 +91,7 @@ ## # @query-version: # -# Returns the current version of QEMU. +# Return the current version of QEMU. # # Returns: A @VersionInfo object describing the current version of # QEMU. diff --git a/qapi/crypto.json b/qapi/crypto.json index c9d967d..9ec6301 100644 --- a/qapi/crypto.json +++ b/qapi/crypto.json @@ -55,7 +55,8 @@ # @sha512: SHA-512. (since 2.7) # # @ripemd160: RIPEMD-160. (since 2.7) -# @sm3: SM3. (since 9.2.0) +# +# @sm3: SM3. (since 9.2.0) # # Since: 2.6 ## @@ -202,19 +203,19 @@ # # The options that apply to LUKS encryption format initialization # -# @cipher-alg: the cipher algorithm for data encryption Currently +# @cipher-alg: the cipher algorithm for data encryption. Currently # defaults to 'aes-256'. # -# @cipher-mode: the cipher mode for data encryption Currently defaults -# to 'xts' +# @cipher-mode: the cipher mode for data encryption. Currently +# defaults to 'xts' # -# @ivgen-alg: the initialization vector generator Currently defaults +# @ivgen-alg: the initialization vector generator. Currently defaults # to 'plain64' # -# @ivgen-hash-alg: the initialization vector generator hash Currently -# defaults to 'sha256' +# @ivgen-hash-alg: the initialization vector generator hash. +# Currently defaults to 'sha256' # -# @hash-alg: the master key hash algorithm Currently defaults to +# @hash-alg: the master key hash algorithm. Currently defaults to # 'sha256' # # @iter-time: number of milliseconds to spend in PBKDF passphrase @@ -370,11 +371,11 @@ # @new-secret: The ID of a QCryptoSecret object providing the password # to be written into added active keyslots # -# @old-secret: Optional (for deactivation only) If given will +# @old-secret: Optional (for deactivation only). If given will # deactivate all keyslots that match password located in # QCryptoSecret with this ID # -# @iter-time: Optional (for activation only) Number of milliseconds to +# @iter-time: Optional (for activation only). Number of milliseconds to # spend in PBKDF passphrase processing for the newly activated # keyslot. Currently defaults to 2000. # diff --git a/qapi/cryptodev.json b/qapi/cryptodev.json index 04d0e21..b13db26 100644 --- a/qapi/cryptodev.json +++ b/qapi/cryptodev.json @@ -15,7 +15,7 @@ # # @sym: symmetric encryption # -# @asym: asymmetric Encryption +# @asym: asymmetric encryption # # Since: 8.0 ## @@ -94,7 +94,7 @@ ## # @query-cryptodev: # -# Returns information about current crypto devices. +# Return information about current crypto devices. # # Returns: a list of @QCryptodevInfo # diff --git a/qapi/cxl.json b/qapi/cxl.json index dd947d3..8f2e923 100644 --- a/qapi/cxl.json +++ b/qapi/cxl.json @@ -117,7 +117,7 @@ # @nibble-mask: Identifies one or more nibbles that the error affects # # @bank-group: Bank group of the memory event location, incorporating -# a number of Banks. +# a number of banks. # # @bank: Bank of the memory event location. A single bank is accessed # per read or write of the memory. diff --git a/qapi/dump.json b/qapi/dump.json index d7826c0..d0ba1f0 100644 --- a/qapi/dump.json +++ b/qapi/dump.json @@ -54,9 +54,9 @@ # @paging: if true, do paging to get guest's memory mapping. This # allows using gdb to process the core file. # -# IMPORTANT: this option can make QEMU allocate several gigabytes -# of RAM. This can happen for a large guest, or a malicious guest -# pretending to be large. +# **Important**: this option can make QEMU allocate several +# gigabytes of RAM. This can happen for a large guest, or a +# malicious guest pretending to be large. # # Also, paging=true has the following limitations: # @@ -195,7 +195,7 @@ ## # @query-dump-guest-memory-capability: # -# Returns the available formats for dump-guest-memory +# Return the available formats for dump-guest-memory # # Returns: A @DumpGuestMemoryCapability object listing available # formats for dump-guest-memory diff --git a/qapi/introspect.json b/qapi/introspect.json index 01bb242..e9e0297 100644 --- a/qapi/introspect.json +++ b/qapi/introspect.json @@ -26,9 +26,9 @@ # the QAPI schema. # # Furthermore, while we strive to keep the QMP wire format -# backwards-compatible across qemu versions, the introspection output +# backwards-compatible across QEMU versions, the introspection output # is not guaranteed to have the same stability. For example, one -# version of qemu may list an object member as an optional +# version of QEMU may list an object member as an optional # non-variant, while another lists the same member only through the # object's variants; or the type of a member may change from a generic # string into a specific enum or from one specific type into an @@ -154,8 +154,8 @@ # # Additional SchemaInfo members for meta-type 'enum'. # -# @members: the enum type's members, in no particular order (since -# 6.2). +# @members: the enum type's members, in no particular order. +# (since 6.2) # # @values: the enumeration type's member names, in no particular # order. Redundant with @members. Just for backward diff --git a/qapi/job.json b/qapi/job.json index b03f80b..126fa5c 100644 --- a/qapi/job.json +++ b/qapi/job.json @@ -20,14 +20,14 @@ # # @create: image creation job type, see "blockdev-create" (since 3.0) # -# @amend: image options amend job type, see "x-blockdev-amend" (since -# 5.1) +# @amend: image options amend job type, see "x-blockdev-amend" +# (since 5.1) # -# @snapshot-load: snapshot load job type, see "snapshot-load" (since -# 6.0) +# @snapshot-load: snapshot load job type, see "snapshot-load" +# (since 6.0) # -# @snapshot-save: snapshot save job type, see "snapshot-save" (since -# 6.0) +# @snapshot-save: snapshot save job type, see "snapshot-save" +# (since 6.0) # # @snapshot-delete: snapshot delete job type, see "snapshot-delete" # (since 6.0) @@ -74,7 +74,7 @@ # process. # # @concluded: The job has finished all work. If auto-dismiss was set -# to false, the job will remain in the query list until it is +# to false, the job will remain in this state until it is # dismissed via @job-dismiss. # # @null: The job is in the process of being dismantled. This state @@ -191,10 +191,10 @@ # state. Completing the job in any other state is an error. # # This is supported only for drive mirroring, where it also switches -# the device to write to the target path only. Note that drive +# the device to write to the target path only. Note that drive # mirroring includes drive-mirror, blockdev-mirror and block-commit # job (only in case of "active commit", when the node being commited -# is used by the guest). The ability to complete is signaled with a +# is used by the guest). The ability to complete is signaled with a # BLOCK_JOB_READY event. # # This command completes an active background block operation @@ -216,16 +216,16 @@ # # Deletes a job that is in the CONCLUDED state. This command only # needs to be run explicitly for jobs that don't have automatic -# dismiss enabled. In turn, automatic dismiss may be enabled only +# dismiss enabled. In turn, automatic dismiss may be enabled only # for jobs that have @auto-dismiss option, which are drive-backup, # blockdev-backup, drive-mirror, blockdev-mirror, block-commit and -# block-stream. @auto-dismiss is enabled by default for these +# block-stream. @auto-dismiss is enabled by default for these # jobs. # # This command will refuse to operate on any job that has not yet -# reached its terminal state, JOB_STATUS_CONCLUDED. For jobs that -# make use of JOB_READY event, job-cancel or job-complete will still -# need to be used as appropriate. +# reached its terminal state, CONCLUDED. For jobs that make use of +# the JOB_READY event, job-cancel or job-complete will still need to +# be used as appropriate. # # @id: The job identifier. # diff --git a/qapi/machine-s390x.json b/qapi/machine-s390x.json new file mode 100644 index 0000000..966dbd6 --- /dev/null +++ b/qapi/machine-s390x.json @@ -0,0 +1,121 @@ +# -*- Mode: Python -*- +# vim: filetype=python +# +# SPDX-License-Identifier: GPL-2.0-or-later +# This work is licensed under the terms of the GNU GPL, version 2 or later. +# See the COPYING file in the top-level directory. + +{ 'include': 'machine-common.json' } + +## +# @S390CpuPolarization: +# +# An enumeration of CPU polarization that can be assumed by a virtual +# S390 CPU +# +# Since: 8.2 +## +{ 'enum': 'S390CpuPolarization', + 'data': [ 'horizontal', 'vertical' ] +} + +## +# @set-cpu-topology: +# +# Modify the topology by moving the CPU inside the topology tree, or +# by changing a modifier attribute of a CPU. Absent values will not +# be modified. +# +# @core-id: the vCPU ID to be moved +# +# @socket-id: destination socket to move the vCPU to +# +# @book-id: destination book to move the vCPU to +# +# @drawer-id: destination drawer to move the vCPU to +# +# @entitlement: entitlement to set +# +# @dedicated: whether the provisioning of real to virtual CPU is +# dedicated +# +# Features: +# +# @unstable: This command is experimental. +# +# Since: 8.2 +## +{ 'command': 'set-cpu-topology', + 'data': { + 'core-id': 'uint16', + '*socket-id': 'uint16', + '*book-id': 'uint16', + '*drawer-id': 'uint16', + '*entitlement': 'S390CpuEntitlement', + '*dedicated': 'bool' + }, + 'features': [ 'unstable' ] +} + +## +# @CPU_POLARIZATION_CHANGE: +# +# Emitted when the guest asks to change the polarization. +# +# The guest can tell the host (via the PTF instruction) whether the +# CPUs should be provisioned using horizontal or vertical +# polarization. +# +# On horizontal polarization the host is expected to provision all +# vCPUs equally. +# +# On vertical polarization the host can provision each vCPU +# differently. The guest will get information on the details of the +# provisioning the next time it uses the STSI(15) instruction. +# +# @polarization: polarization specified by the guest +# +# Features: +# +# @unstable: This event is experimental. +# +# Since: 8.2 +# +# .. qmp-example:: +# +# <- { "event": "CPU_POLARIZATION_CHANGE", +# "data": { "polarization": "horizontal" }, +# "timestamp": { "seconds": 1401385907, "microseconds": 422329 } } +## +{ 'event': 'CPU_POLARIZATION_CHANGE', + 'data': { 'polarization': 'S390CpuPolarization' }, + 'features': [ 'unstable' ] +} + +## +# @CpuPolarizationInfo: +# +# The result of a CPU polarization query. +# +# @polarization: the CPU polarization +# +# Since: 8.2 +## +{ 'struct': 'CpuPolarizationInfo', + 'data': { 'polarization': 'S390CpuPolarization' } +} + +## +# @query-s390x-cpu-polarization: +# +# Features: +# +# @unstable: This command is experimental. +# +# Returns: the machine's CPU polarization +# +# Since: 8.2 +## +{ 'command': 'query-s390x-cpu-polarization', 'returns': 'CpuPolarizationInfo', + 'features': [ 'unstable' ] +} diff --git a/qapi/machine-target.json b/qapi/machine-target.json deleted file mode 100644 index 426ce4e..0000000 --- a/qapi/machine-target.json +++ /dev/null @@ -1,523 +0,0 @@ -# -*- Mode: Python -*- -# vim: filetype=python -# -# This work is licensed under the terms of the GNU GPL, version 2 or later. -# See the COPYING file in the top-level directory. - -{ 'include': 'machine-common.json' } - -## -# @CpuModelInfo: -# -# Virtual CPU model. -# -# A CPU model consists of the name of a CPU definition, to which delta -# changes are applied (e.g. features added/removed). Most magic -# values that an architecture might require should be hidden behind -# the name. However, if required, architectures can expose relevant -# properties. -# -# @name: the name of the CPU definition the model is based on -# -# @props: a dictionary of QOM properties to be applied -# -# Since: 2.8 -## -{ 'struct': 'CpuModelInfo', - 'data': { 'name': 'str', - '*props': 'any' } } - -## -# @CpuModelExpansionType: -# -# An enumeration of CPU model expansion types. -# -# @static: Expand to a static CPU model, a combination of a static -# base model name and property delta changes. As the static base -# model will never change, the expanded CPU model will be the -# same, independent of QEMU version, machine type, machine -# options, and accelerator options. Therefore, the resulting -# model can be used by tooling without having to specify a -# compatibility machine - e.g. when displaying the "host" model. -# The @static CPU models are migration-safe. -# -# @full: Expand all properties. The produced model is not guaranteed -# to be migration-safe, but allows tooling to get an insight and -# work with model details. -# -# .. note:: When a non-migration-safe CPU model is expanded in static -# mode, some features enabled by the CPU model may be omitted, -# because they can't be implemented by a static CPU model -# definition (e.g. cache info passthrough and PMU passthrough in -# x86). If you need an accurate representation of the features -# enabled by a non-migration-safe CPU model, use @full. If you -# need a static representation that will keep ABI compatibility -# even when changing QEMU version or machine-type, use @static (but -# keep in mind that some features may be omitted). -# -# Since: 2.8 -## -{ 'enum': 'CpuModelExpansionType', - 'data': [ 'static', 'full' ] } - -## -# @CpuModelCompareResult: -# -# An enumeration of CPU model comparison results. The result is -# usually calculated using e.g. CPU features or CPU generations. -# -# @incompatible: If model A is incompatible to model B, model A is not -# guaranteed to run where model B runs and the other way around. -# -# @identical: If model A is identical to model B, model A is -# guaranteed to run where model B runs and the other way around. -# -# @superset: If model A is a superset of model B, model B is -# guaranteed to run where model A runs. There are no guarantees -# about the other way. -# -# @subset: If model A is a subset of model B, model A is guaranteed to -# run where model B runs. There are no guarantees about the other -# way. -# -# Since: 2.8 -## -{ 'enum': 'CpuModelCompareResult', - 'data': [ 'incompatible', 'identical', 'superset', 'subset' ] } - -## -# @CpuModelBaselineInfo: -# -# The result of a CPU model baseline. -# -# @model: the baselined CpuModelInfo. -# -# Since: 2.8 -## -{ 'struct': 'CpuModelBaselineInfo', - 'data': { 'model': 'CpuModelInfo' }, - 'if': 'TARGET_S390X' } - -## -# @CpuModelCompareInfo: -# -# The result of a CPU model comparison. -# -# @result: The result of the compare operation. -# -# @responsible-properties: List of properties that led to the -# comparison result not being identical. -# -# @responsible-properties is a list of QOM property names that led to -# both CPUs not being detected as identical. For identical models, -# this list is empty. If a QOM property is read-only, that means -# there's no known way to make the CPU models identical. If the -# special property name "type" is included, the models are by -# definition not identical and cannot be made identical. -# -# Since: 2.8 -## -{ 'struct': 'CpuModelCompareInfo', - 'data': { 'result': 'CpuModelCompareResult', - 'responsible-properties': ['str'] }, - 'if': 'TARGET_S390X' } - -## -# @query-cpu-model-comparison: -# -# Compares two CPU models, @modela and @modelb, returning how they -# compare in a specific configuration. The results indicates how -# both models compare regarding runnability. This result can be -# used by tooling to make decisions if a certain CPU model will -# run in a certain configuration or if a compatible CPU model has -# to be created by baselining. -# -# Usually, a CPU model is compared against the maximum possible CPU -# model of a certain configuration (e.g. the "host" model for KVM). -# If that CPU model is identical or a subset, it will run in that -# configuration. -# -# The result returned by this command may be affected by: -# -# * QEMU version: CPU models may look different depending on the QEMU -# version. (Except for CPU models reported as "static" in -# query-cpu-definitions.) -# * machine-type: CPU model may look different depending on the -# machine-type. (Except for CPU models reported as "static" in -# query-cpu-definitions.) -# * machine options (including accelerator): in some architectures, -# CPU models may look different depending on machine and accelerator -# options. (Except for CPU models reported as "static" in -# query-cpu-definitions.) -# * "-cpu" arguments and global properties: arguments to the -cpu -# option and global properties may affect expansion of CPU models. -# Using query-cpu-model-expansion while using these is not advised. -# -# Some architectures may not support comparing CPU models. s390x -# supports comparing CPU models. -# -# @modela: description of the first CPU model to compare, referred to -# as "model A" in CpuModelCompareResult -# -# @modelb: description of the second CPU model to compare, referred to -# as "model B" in CpuModelCompareResult -# -# Returns: a CpuModelCompareInfo describing how both CPU models -# compare -# -# Errors: -# - if comparing CPU models is not supported -# - if a model cannot be used -# - if a model contains an unknown cpu definition name, unknown -# properties or properties with wrong types. -# -# .. note:: This command isn't specific to s390x, but is only -# implemented on this architecture currently. -# -# Since: 2.8 -## -{ 'command': 'query-cpu-model-comparison', - 'data': { 'modela': 'CpuModelInfo', 'modelb': 'CpuModelInfo' }, - 'returns': 'CpuModelCompareInfo', - 'if': 'TARGET_S390X' } - -## -# @query-cpu-model-baseline: -# -# Baseline two CPU models, @modela and @modelb, creating a compatible -# third model. The created model will always be a static, -# migration-safe CPU model (see "static" CPU model expansion for -# details). -# -# This interface can be used by tooling to create a compatible CPU -# model out two CPU models. The created CPU model will be identical -# to or a subset of both CPU models when comparing them. Therefore, -# the created CPU model is guaranteed to run where the given CPU -# models run. -# -# The result returned by this command may be affected by: -# -# * QEMU version: CPU models may look different depending on the QEMU -# version. (Except for CPU models reported as "static" in -# query-cpu-definitions.) -# * machine-type: CPU model may look different depending on the -# machine-type. (Except for CPU models reported as "static" in -# query-cpu-definitions.) -# * machine options (including accelerator): in some architectures, -# CPU models may look different depending on machine and accelerator -# options. (Except for CPU models reported as "static" in -# query-cpu-definitions.) -# * "-cpu" arguments and global properties: arguments to the -cpu -# option and global properties may affect expansion of CPU models. -# Using query-cpu-model-expansion while using these is not advised. -# -# Some architectures may not support baselining CPU models. s390x -# supports baselining CPU models. -# -# @modela: description of the first CPU model to baseline -# -# @modelb: description of the second CPU model to baseline -# -# Returns: a CpuModelBaselineInfo describing the baselined CPU model -# -# Errors: -# - if baselining CPU models is not supported -# - if a model cannot be used -# - if a model contains an unknown cpu definition name, unknown -# properties or properties with wrong types. -# -# .. note:: This command isn't specific to s390x, but is only -# implemented on this architecture currently. -# -# Since: 2.8 -## -{ 'command': 'query-cpu-model-baseline', - 'data': { 'modela': 'CpuModelInfo', - 'modelb': 'CpuModelInfo' }, - 'returns': 'CpuModelBaselineInfo', - 'if': 'TARGET_S390X' } - -## -# @CpuModelExpansionInfo: -# -# The result of a cpu model expansion. -# -# @model: the expanded CpuModelInfo. -# -# @deprecated-props: a list of properties that are flagged as -# deprecated by the CPU vendor. The list depends on the -# CpuModelExpansionType: "static" properties are a subset of the -# enabled-properties for the expanded model; "full" properties are -# a set of properties that are deprecated across all models for -# the architecture. (since: 9.1). -# -# Since: 2.8 -## -{ 'struct': 'CpuModelExpansionInfo', - 'data': { 'model': 'CpuModelInfo', - 'deprecated-props' : { 'type': ['str'], - 'if': 'TARGET_S390X' } }, - 'if': { 'any': [ 'TARGET_S390X', - 'TARGET_I386', - 'TARGET_ARM', - 'TARGET_LOONGARCH64', - 'TARGET_RISCV' ] } } - -## -# @query-cpu-model-expansion: -# -# Expands a given CPU model, @model, (or a combination of CPU model + -# additional options) to different granularities, specified by @type, -# allowing tooling to get an understanding what a specific CPU model -# looks like in QEMU under a certain configuration. -# -# This interface can be used to query the "host" CPU model. -# -# The data returned by this command may be affected by: -# -# * QEMU version: CPU models may look different depending on the QEMU -# version. (Except for CPU models reported as "static" in -# query-cpu-definitions.) -# * machine-type: CPU model may look different depending on the -# machine-type. (Except for CPU models reported as "static" in -# query-cpu-definitions.) -# * machine options (including accelerator): in some architectures, -# CPU models may look different depending on machine and accelerator -# options. (Except for CPU models reported as "static" in -# query-cpu-definitions.) -# * "-cpu" arguments and global properties: arguments to the -cpu -# option and global properties may affect expansion of CPU models. -# Using query-cpu-model-expansion while using these is not advised. -# -# Some architectures may not support all expansion types. s390x -# supports "full" and "static". Arm only supports "full". -# -# @model: description of the CPU model to expand -# -# @type: expansion type, specifying how to expand the CPU model -# -# Returns: a CpuModelExpansionInfo describing the expanded CPU model -# -# Errors: -# - if expanding CPU models is not supported -# - if the model cannot be expanded -# - if the model contains an unknown CPU definition name, unknown -# properties or properties with a wrong type -# - if an expansion type is not supported -# -# Since: 2.8 -## -{ 'command': 'query-cpu-model-expansion', - 'data': { 'type': 'CpuModelExpansionType', - 'model': 'CpuModelInfo' }, - 'returns': 'CpuModelExpansionInfo', - 'if': { 'any': [ 'TARGET_S390X', - 'TARGET_I386', - 'TARGET_ARM', - 'TARGET_LOONGARCH64', - 'TARGET_RISCV' ] } } - -## -# @CpuDefinitionInfo: -# -# Virtual CPU definition. -# -# @name: the name of the CPU definition -# -# @migration-safe: whether a CPU definition can be safely used for -# migration in combination with a QEMU compatibility machine when -# migrating between different QEMU versions and between hosts with -# different sets of (hardware or software) capabilities. If not -# provided, information is not available and callers should not -# assume the CPU definition to be migration-safe. (since 2.8) -# -# @static: whether a CPU definition is static and will not change -# depending on QEMU version, machine type, machine options and -# accelerator options. A static model is always migration-safe. -# (since 2.8) -# -# @unavailable-features: List of properties that prevent the CPU model -# from running in the current host. (since 2.8) -# -# @typename: Type name that can be used as argument to -# @device-list-properties, to introspect properties configurable -# using -cpu or -global. (since 2.9) -# -# @alias-of: Name of CPU model this model is an alias for. The target -# of the CPU model alias may change depending on the machine type. -# Management software is supposed to translate CPU model aliases -# in the VM configuration, because aliases may stop being -# migration-safe in the future (since 4.1) -# -# @deprecated: If true, this CPU model is deprecated and may be -# removed in some future version of QEMU according to the QEMU -# deprecation policy. (since 5.2) -# -# @unavailable-features is a list of QOM property names that represent -# CPU model attributes that prevent the CPU from running. If the QOM -# property is read-only, that means there's no known way to make the -# CPU model run in the current host. Implementations that choose not -# to provide specific information return the property name "type". If -# the property is read-write, it means that it MAY be possible to run -# the CPU model in the current host if that property is changed. -# Management software can use it as hints to suggest or choose an -# alternative for the user, or just to generate meaningful error -# messages explaining why the CPU model can't be used. If -# @unavailable-features is an empty list, the CPU model is runnable -# using the current host and machine-type. If @unavailable-features -# is not present, runnability information for the CPU is not -# available. -# -# Since: 1.2 -## -{ 'struct': 'CpuDefinitionInfo', - 'data': { 'name': 'str', - '*migration-safe': 'bool', - 'static': 'bool', - '*unavailable-features': [ 'str' ], - 'typename': 'str', - '*alias-of' : 'str', - 'deprecated' : 'bool' }, - 'if': { 'any': [ 'TARGET_PPC', - 'TARGET_ARM', - 'TARGET_I386', - 'TARGET_S390X', - 'TARGET_MIPS', - 'TARGET_LOONGARCH64', - 'TARGET_RISCV' ] } } - -## -# @query-cpu-definitions: -# -# Return a list of supported virtual CPU definitions -# -# Returns: a list of CpuDefinitionInfo -# -# Since: 1.2 -## -{ 'command': 'query-cpu-definitions', 'returns': ['CpuDefinitionInfo'], - 'if': { 'any': [ 'TARGET_PPC', - 'TARGET_ARM', - 'TARGET_I386', - 'TARGET_S390X', - 'TARGET_MIPS', - 'TARGET_LOONGARCH64', - 'TARGET_RISCV' ] } } - -## -# @S390CpuPolarization: -# -# An enumeration of CPU polarization that can be assumed by a virtual -# S390 CPU -# -# Since: 8.2 -## -{ 'enum': 'S390CpuPolarization', - 'data': [ 'horizontal', 'vertical' ], - 'if': 'TARGET_S390X' -} - -## -# @set-cpu-topology: -# -# Modify the topology by moving the CPU inside the topology tree, or -# by changing a modifier attribute of a CPU. Absent values will not -# be modified. -# -# @core-id: the vCPU ID to be moved -# -# @socket-id: destination socket to move the vCPU to -# -# @book-id: destination book to move the vCPU to -# -# @drawer-id: destination drawer to move the vCPU to -# -# @entitlement: entitlement to set -# -# @dedicated: whether the provisioning of real to virtual CPU is -# dedicated -# -# Features: -# -# @unstable: This command is experimental. -# -# Since: 8.2 -## -{ 'command': 'set-cpu-topology', - 'data': { - 'core-id': 'uint16', - '*socket-id': 'uint16', - '*book-id': 'uint16', - '*drawer-id': 'uint16', - '*entitlement': 'S390CpuEntitlement', - '*dedicated': 'bool' - }, - 'features': [ 'unstable' ], - 'if': { 'all': [ 'TARGET_S390X' , 'CONFIG_KVM' ] } -} - -## -# @CPU_POLARIZATION_CHANGE: -# -# Emitted when the guest asks to change the polarization. -# -# The guest can tell the host (via the PTF instruction) whether the -# CPUs should be provisioned using horizontal or vertical -# polarization. -# -# On horizontal polarization the host is expected to provision all -# vCPUs equally. -# -# On vertical polarization the host can provision each vCPU -# differently. The guest will get information on the details of the -# provisioning the next time it uses the STSI(15) instruction. -# -# @polarization: polarization specified by the guest -# -# Features: -# -# @unstable: This event is experimental. -# -# Since: 8.2 -# -# .. qmp-example:: -# -# <- { "event": "CPU_POLARIZATION_CHANGE", -# "data": { "polarization": "horizontal" }, -# "timestamp": { "seconds": 1401385907, "microseconds": 422329 } } -## -{ 'event': 'CPU_POLARIZATION_CHANGE', - 'data': { 'polarization': 'S390CpuPolarization' }, - 'features': [ 'unstable' ], - 'if': { 'all': [ 'TARGET_S390X', 'CONFIG_KVM' ] } -} - -## -# @CpuPolarizationInfo: -# -# The result of a CPU polarization query. -# -# @polarization: the CPU polarization -# -# Since: 8.2 -## -{ 'struct': 'CpuPolarizationInfo', - 'data': { 'polarization': 'S390CpuPolarization' }, - 'if': { 'all': [ 'TARGET_S390X', 'CONFIG_KVM' ] } -} - -## -# @query-s390x-cpu-polarization: -# -# Features: -# -# @unstable: This command is experimental. -# -# Returns: the machine's CPU polarization -# -# Since: 8.2 -## -{ 'command': 'query-s390x-cpu-polarization', 'returns': 'CpuPolarizationInfo', - 'features': [ 'unstable' ], - 'if': { 'all': [ 'TARGET_S390X', 'CONFIG_KVM' ] } -} diff --git a/qapi/machine.json b/qapi/machine.json index c8feb9f..0650b8d 100644 --- a/qapi/machine.json +++ b/qapi/machine.json @@ -99,7 +99,7 @@ ## # @query-cpus-fast: # -# Returns information about all virtual CPUs. +# Return information about all virtual CPUs. # # Returns: list of @CpuInfoFast # @@ -182,8 +182,8 @@ # @default-cpu-type: default CPU model typename if none is requested # via the -cpu argument. (since 4.2) # -# @default-ram-id: the default ID of initial RAM memory backend (since -# 5.2) +# @default-ram-id: the default ID of initial RAM memory backend +# (since 5.2) # # @acpi: machine type supports ACPI (since 8.0) # @@ -467,7 +467,7 @@ ## # @query-kvm: # -# Returns information about KVM acceleration +# Return information about KVM acceleration # # Returns: @KvmInfo # @@ -694,7 +694,7 @@ # Structure of HMAT (Heterogeneous Memory Attribute Table) # # For more information about @HmatLBDataType, see chapter 5.2.27.4: -# Table 5-146: Field "Data Type" of ACPI 6.3 spec. +# Table 5-146: Field "Data Type" of ACPI 6.3 spec. # # @access-latency: access latency (nanoseconds) # @@ -811,7 +811,7 @@ # # @policy: the write policy, none/write-back/write-through. # -# @line: the cache Line size in bytes. +# @line: the cache line size in bytes. # # Since: 5.0 ## @@ -930,7 +930,7 @@ ## # @query-memdev: # -# Returns information for all memory backends. +# Return information for all memory backends. # # Returns: a list of @Memdev. # @@ -1089,7 +1089,7 @@ # :annotated: # # For s390x-virtio-ccw machine type started with -# ``-smp 1,maxcpus=2 -cpu qemu`` (Since: 2.11):: +# ``-smp 1,maxcpus=2 -cpu qemu``:: # # -> { "execute": "query-hotpluggable-cpus" } # <- {"return": [ @@ -1160,7 +1160,7 @@ # # Information about the guest balloon device. # -# @actual: the logical size of the VM in bytes Formula used: +# @actual: the logical size of the VM in bytes. Formula used: # logical_vm_size = vm_ram_size - balloon_size # # Since: 0.14 @@ -1199,7 +1199,7 @@ # is equivalent to the @actual field return by the 'query-balloon' # command # -# @actual: the logical size of the VM in bytes Formula used: +# @actual: the logical size of the VM in bytes. Formula used: # logical_vm_size = vm_ram_size - balloon_size # # .. note:: This event is rate-limited. @@ -1235,7 +1235,7 @@ ## # @query-hv-balloon-status-report: # -# Returns the hv-balloon driver data contained in the last received +# Return the hv-balloon driver data contained in the last received # "STATUS" message from the guest. # # Returns: @@ -1916,3 +1916,366 @@ ## { 'command': 'dump-skeys', 'data': { 'filename': 'str' } } + +## +# @CpuModelInfo: +# +# Virtual CPU model. +# +# A CPU model consists of the name of a CPU definition, to which delta +# changes are applied (e.g. features added/removed). Most magic +# values that an architecture might require should be hidden behind +# the name. However, if required, architectures can expose relevant +# properties. +# +# @name: the name of the CPU definition the model is based on +# +# @props: a dictionary of QOM properties to be applied +# +# Since: 2.8 +## +{ 'struct': 'CpuModelInfo', + 'data': { 'name': 'str', + '*props': 'any' } } + +## +# @CpuModelExpansionType: +# +# An enumeration of CPU model expansion types. +# +# @static: Expand to a static CPU model, a combination of a static +# base model name and property delta changes. As the static base +# model will never change, the expanded CPU model will be the +# same, independent of QEMU version, machine type, machine +# options, and accelerator options. Therefore, the resulting +# model can be used by tooling without having to specify a +# compatibility machine - e.g. when displaying the "host" model. +# The @static CPU models are migration-safe. +# +# @full: Expand all properties. The produced model is not guaranteed +# to be migration-safe, but allows tooling to get an insight and +# work with model details. +# +# .. note:: When a non-migration-safe CPU model is expanded in static +# mode, some features enabled by the CPU model may be omitted, +# because they can't be implemented by a static CPU model +# definition (e.g. cache info passthrough and PMU passthrough in +# x86). If you need an accurate representation of the features +# enabled by a non-migration-safe CPU model, use @full. If you +# need a static representation that will keep ABI compatibility +# even when changing QEMU version or machine-type, use @static (but +# keep in mind that some features may be omitted). +# +# Since: 2.8 +## +{ 'enum': 'CpuModelExpansionType', + 'data': [ 'static', 'full' ] } + +## +# @CpuModelCompareResult: +# +# An enumeration of CPU model comparison results. The result is +# usually calculated using e.g. CPU features or CPU generations. +# +# @incompatible: If model A is incompatible to model B, model A is not +# guaranteed to run where model B runs and the other way around. +# +# @identical: If model A is identical to model B, model A is +# guaranteed to run where model B runs and the other way around. +# +# @superset: If model A is a superset of model B, model B is +# guaranteed to run where model A runs. There are no guarantees +# about the other way. +# +# @subset: If model A is a subset of model B, model A is guaranteed to +# run where model B runs. There are no guarantees about the other +# way. +# +# Since: 2.8 +## +{ 'enum': 'CpuModelCompareResult', + 'data': [ 'incompatible', 'identical', 'superset', 'subset' ] } + +## +# @CpuModelBaselineInfo: +# +# The result of a CPU model baseline. +# +# @model: the baselined CpuModelInfo. +# +# Since: 2.8 +## +{ 'struct': 'CpuModelBaselineInfo', + 'data': { 'model': 'CpuModelInfo' } } + +## +# @CpuModelCompareInfo: +# +# The result of a CPU model comparison. +# +# @result: The result of the compare operation. +# +# @responsible-properties: List of properties that led to the +# comparison result not being identical. +# +# @responsible-properties is a list of QOM property names that led to +# both CPUs not being detected as identical. For identical models, +# this list is empty. If a QOM property is read-only, that means +# there's no known way to make the CPU models identical. If the +# special property name "type" is included, the models are by +# definition not identical and cannot be made identical. +# +# Since: 2.8 +## +{ 'struct': 'CpuModelCompareInfo', + 'data': { 'result': 'CpuModelCompareResult', + 'responsible-properties': ['str'] } } + +## +# @query-cpu-model-comparison: +# +# Compares two CPU models, @modela and @modelb, returning how they +# compare in a specific configuration. The results indicates how +# both models compare regarding runnability. This result can be +# used by tooling to make decisions if a certain CPU model will +# run in a certain configuration or if a compatible CPU model has +# to be created by baselining. +# +# Usually, a CPU model is compared against the maximum possible CPU +# model of a certain configuration (e.g. the "host" model for KVM). +# If that CPU model is identical or a subset, it will run in that +# configuration. +# +# The result returned by this command may be affected by: +# +# * QEMU version: CPU models may look different depending on the QEMU +# version. (Except for CPU models reported as "static" in +# query-cpu-definitions.) +# * machine-type: CPU model may look different depending on the +# machine-type. (Except for CPU models reported as "static" in +# query-cpu-definitions.) +# * machine options (including accelerator): in some architectures, +# CPU models may look different depending on machine and accelerator +# options. (Except for CPU models reported as "static" in +# query-cpu-definitions.) +# * "-cpu" arguments and global properties: arguments to the -cpu +# option and global properties may affect expansion of CPU models. +# Using query-cpu-model-expansion while using these is not advised. +# +# Some architectures may not support comparing CPU models. s390x +# supports comparing CPU models. +# +# @modela: description of the first CPU model to compare, referred to +# as "model A" in CpuModelCompareResult +# +# @modelb: description of the second CPU model to compare, referred to +# as "model B" in CpuModelCompareResult +# +# Returns: a CpuModelCompareInfo describing how both CPU models +# compare +# +# Errors: +# - if comparing CPU models is not supported by the target +# - if a model cannot be used +# - if a model contains an unknown cpu definition name, unknown +# properties or properties with wrong types. +# +# Since: 2.8 +## +{ 'command': 'query-cpu-model-comparison', + 'data': { 'modela': 'CpuModelInfo', 'modelb': 'CpuModelInfo' }, + 'returns': 'CpuModelCompareInfo' } + +## +# @query-cpu-model-baseline: +# +# Baseline two CPU models, @modela and @modelb, creating a compatible +# third model. The created model will always be a static, +# migration-safe CPU model (see "static" CPU model expansion for +# details). +# +# This interface can be used by tooling to create a compatible CPU +# model out two CPU models. The created CPU model will be identical +# to or a subset of both CPU models when comparing them. Therefore, +# the created CPU model is guaranteed to run where the given CPU +# models run. +# +# The result returned by this command may be affected by: +# +# * QEMU version: CPU models may look different depending on the QEMU +# version. (Except for CPU models reported as "static" in +# query-cpu-definitions.) +# * machine-type: CPU model may look different depending on the +# machine-type. (Except for CPU models reported as "static" in +# query-cpu-definitions.) +# * machine options (including accelerator): in some architectures, +# CPU models may look different depending on machine and accelerator +# options. (Except for CPU models reported as "static" in +# query-cpu-definitions.) +# * "-cpu" arguments and global properties: arguments to the -cpu +# option and global properties may affect expansion of CPU models. +# Using query-cpu-model-expansion while using these is not advised. +# +# Some architectures may not support baselining CPU models. s390x +# supports baselining CPU models. +# +# @modela: description of the first CPU model to baseline +# +# @modelb: description of the second CPU model to baseline +# +# Returns: a CpuModelBaselineInfo describing the baselined CPU model +# +# Errors: +# - if baselining CPU models is not supported by the target +# - if a model cannot be used +# - if a model contains an unknown cpu definition name, unknown +# properties or properties with wrong types. +# +# Since: 2.8 +## +{ 'command': 'query-cpu-model-baseline', + 'data': { 'modela': 'CpuModelInfo', + 'modelb': 'CpuModelInfo' }, + 'returns': 'CpuModelBaselineInfo' } + +## +# @CpuModelExpansionInfo: +# +# The result of a cpu model expansion. +# +# @model: the expanded CpuModelInfo. +# +# @deprecated-props: an optional list of properties that are flagged as +# deprecated by the CPU vendor. The list depends on the +# CpuModelExpansionType: "static" properties are a subset of the +# enabled-properties for the expanded model; "full" properties are +# a set of properties that are deprecated across all models for +# the architecture. (since: 10.1 -- since 9.1 on s390x --). +# +# Since: 2.8 +## +{ 'struct': 'CpuModelExpansionInfo', + 'data': { 'model': 'CpuModelInfo', + '*deprecated-props' : ['str'] } } + +## +# @query-cpu-model-expansion: +# +# Expands a given CPU model, @model, (or a combination of CPU model + +# additional options) to different granularities, specified by @type, +# allowing tooling to get an understanding what a specific CPU model +# looks like in QEMU under a certain configuration. +# +# This interface can be used to query the "host" CPU model. +# +# The data returned by this command may be affected by: +# +# * QEMU version: CPU models may look different depending on the QEMU +# version. (Except for CPU models reported as "static" in +# query-cpu-definitions.) +# * machine-type: CPU model may look different depending on the +# machine-type. (Except for CPU models reported as "static" in +# query-cpu-definitions.) +# * machine options (including accelerator): in some architectures, +# CPU models may look different depending on machine and accelerator +# options. (Except for CPU models reported as "static" in +# query-cpu-definitions.) +# * "-cpu" arguments and global properties: arguments to the -cpu +# option and global properties may affect expansion of CPU models. +# Using query-cpu-model-expansion while using these is not advised. +# +# Some architectures may not support all expansion types. s390x +# supports "full" and "static". Arm only supports "full". +# +# @model: description of the CPU model to expand +# +# @type: expansion type, specifying how to expand the CPU model +# +# Returns: a CpuModelExpansionInfo describing the expanded CPU model +# +# Errors: +# - if expanding CPU models is not supported +# - if the model cannot be expanded +# - if the model contains an unknown CPU definition name, unknown +# properties or properties with a wrong type +# - if an expansion type is not supported +# +# Since: 2.8 +## +{ 'command': 'query-cpu-model-expansion', + 'data': { 'type': 'CpuModelExpansionType', + 'model': 'CpuModelInfo' }, + 'returns': 'CpuModelExpansionInfo' } + +## +# @CpuDefinitionInfo: +# +# Virtual CPU definition. +# +# @name: the name of the CPU definition +# +# @migration-safe: whether a CPU definition can be safely used for +# migration in combination with a QEMU compatibility machine when +# migrating between different QEMU versions and between hosts with +# different sets of (hardware or software) capabilities. If not +# provided, information is not available and callers should not +# assume the CPU definition to be migration-safe. (since 2.8) +# +# @static: whether a CPU definition is static and will not change +# depending on QEMU version, machine type, machine options and +# accelerator options. A static model is always migration-safe. +# (since 2.8) +# +# @unavailable-features: List of properties that prevent the CPU model +# from running in the current host. (since 2.8) +# +# @typename: Type name that can be used as argument to +# @device-list-properties, to introspect properties configurable +# using -cpu or -global. (since 2.9) +# +# @alias-of: Name of CPU model this model is an alias for. The target +# of the CPU model alias may change depending on the machine type. +# Management software is supposed to translate CPU model aliases +# in the VM configuration, because aliases may stop being +# migration-safe in the future (since 4.1) +# +# @deprecated: If true, this CPU model is deprecated and may be +# removed in some future version of QEMU according to the QEMU +# deprecation policy. (since 5.2) +# +# @unavailable-features is a list of QOM property names that represent +# CPU model attributes that prevent the CPU from running. If the QOM +# property is read-only, that means there's no known way to make the +# CPU model run in the current host. Implementations that choose not +# to provide specific information return the property name "type". If +# the property is read-write, it means that it MAY be possible to run +# the CPU model in the current host if that property is changed. +# Management software can use it as hints to suggest or choose an +# alternative for the user, or just to generate meaningful error +# messages explaining why the CPU model can't be used. If +# @unavailable-features is an empty list, the CPU model is runnable +# using the current host and machine-type. If @unavailable-features +# is not present, runnability information for the CPU is not +# available. +# +# Since: 1.2 +## +{ 'struct': 'CpuDefinitionInfo', + 'data': { 'name': 'str', + '*migration-safe': 'bool', + 'static': 'bool', + '*unavailable-features': [ 'str' ], + 'typename': 'str', + '*alias-of' : 'str', + 'deprecated' : 'bool' } } + +## +# @query-cpu-definitions: +# +# Return a list of supported virtual CPU definitions +# +# Returns: a list of CpuDefinitionInfo +# +# Since: 1.2 +## +{ 'command': 'query-cpu-definitions', 'returns': ['CpuDefinitionInfo'] } diff --git a/qapi/meson.build b/qapi/meson.build index eadde4d..3b035ae 100644 --- a/qapi/meson.build +++ b/qapi/meson.build @@ -39,10 +39,9 @@ qapi_all_modules = [ 'job', 'machine-common', 'machine', - 'machine-target', + 'machine-s390x', 'migration', 'misc', - 'misc-target', 'net', 'pragma', 'qom', @@ -64,6 +63,8 @@ if have_system 'qdev', 'pci', 'rocker', + 'misc-arm', + 'misc-i386', 'tpm', 'uefi', ] @@ -84,14 +85,12 @@ qapi_nonmodule_outputs = [ 'qapi-emit-events.c', 'qapi-emit-events.h', ] -# First build all sources -qapi_util_outputs = [ +qapi_outputs = qapi_nonmodule_outputs + [ 'qapi-builtin-types.c', 'qapi-builtin-visit.c', 'qapi-builtin-types.h', 'qapi-builtin-visit.h', ] qapi_inputs = [] -qapi_specific_outputs = [] foreach module : qapi_all_modules qapi_inputs += [ files(module + '.json') ] qapi_module_outputs = [ @@ -109,24 +108,17 @@ foreach module : qapi_all_modules 'qapi-commands-@0@.trace-events'.format(module), ] endif - if module.endswith('-target') - qapi_specific_outputs += qapi_module_outputs - else - qapi_util_outputs += qapi_module_outputs - endif + qapi_outputs += qapi_module_outputs endforeach qapi_files = custom_target('shared QAPI source files', - output: qapi_util_outputs + qapi_specific_outputs + qapi_nonmodule_outputs, + output: qapi_outputs, input: [ files('qapi-schema.json') ], command: [ qapi_gen, '-o', 'qapi', '-b', '@INPUT0@' ], depend_files: [ qapi_inputs, qapi_gen_depends ]) -# Now go through all the outputs and add them to the right sourceset. -# These loops must be synchronized with the output of the above custom target. - i = 0 -foreach output : qapi_util_outputs +foreach output : qapi_outputs if output.endswith('.h') genh += qapi_files[i] endif @@ -136,14 +128,3 @@ foreach output : qapi_util_outputs util_ss.add(qapi_files[i]) i = i + 1 endforeach - -foreach output : qapi_specific_outputs + qapi_nonmodule_outputs - if output.endswith('.h') - genh += qapi_files[i] - endif - if output.endswith('.trace-events') - qapi_trace_events += qapi_files[i] - endif - specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_true: qapi_files[i]) - i = i + 1 -endforeach diff --git a/qapi/migration.json b/qapi/migration.json index 8b9c535..4963f6c 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -57,8 +57,8 @@ # # @dirty-sync-missed-zero-copy: Number of times dirty RAM # synchronization could not avoid copying dirty pages. This is -# between 0 and @dirty-sync-count * @multifd-channels. (since -# 7.1) +# between 0 and @dirty-sync-count * @multifd-channels. +# (since 7.1) # # Since: 0.14 ## @@ -137,16 +137,16 @@ # # @active: in the process of doing migration. # -# @postcopy-active: like active, but now in postcopy mode. (since -# 2.5) +# @postcopy-active: like active, but now in postcopy mode. +# (since 2.5) # # @postcopy-paused: during postcopy but paused. (since 3.0) # # @postcopy-recover-setup: setup phase for a postcopy recovery # process, preparing for a recovery phase to start. (since 9.1) # -# @postcopy-recover: trying to recover from a paused postcopy. (since -# 3.0) +# @postcopy-recover: trying to recover from a paused postcopy. +# (since 3.0) # # @completed: migration is finished. # @@ -282,7 +282,7 @@ ## # @query-migrate: # -# Returns information about current migration process. If migration +# Return information about current migration process. If migration # is active there will be another json-object with RAM migration # status. # @@ -407,7 +407,7 @@ # @postcopy-ram: Start executing on the migration target before all of # RAM has been migrated, pulling the remaining pages along as # needed. The capacity must have the same setting on both source -# and target or migration will not even start. NOTE: If the +# and target or migration will not even start. **Note:** if the # migration fails during postcopy the VM will fail. (since 2.6) # # @x-colo: If enabled, migration will never end, and the state of the @@ -415,15 +415,15 @@ # on secondary side, this process is called COarse-Grain LOck # Stepping (COLO) for Non-stop Service. (since 2.8) # -# @release-ram: if enabled, qemu will free the migrated ram pages on +# @release-ram: if enabled, QEMU will free the migrated ram pages on # the source during postcopy-ram migration. (since 2.9) # # @return-path: If enabled, migration will use the return path even # for precopy. (since 2.10) # # @pause-before-switchover: Pause outgoing migration before -# serialising device state and before disabling block IO (since -# 2.11) +# serialising device state and before disabling block IO +# (since 2.11) # # @multifd: Use more than one fd for migration (since 4.0) # @@ -535,7 +535,7 @@ ## # @query-migrate-capabilities: # -# Returns information about the current migration capabilities status +# Return information about the current migration capabilities status # # Returns: @MigrationCapabilityStatus # @@ -697,8 +697,8 @@ # @alias: An alias name for migration (for example the bitmap name on # the opposite site). # -# @transform: Allows the modification of the migrated bitmap. (since -# 6.0) +# @transform: Allows the modification of the migrated bitmap. +# (since 6.0) # # Since: 5.2 ## @@ -760,9 +760,9 @@ # auto-converge detects that migration is not making progress. # The default value is 10. (Since 2.7) # -# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage At -# the tail stage of throttling, the Guest is very sensitive to CPU -# percentage while the @cpu-throttle -increment is excessive +# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage. +# At the tail stage of throttling, the Guest is very sensitive to +# CPU percentage while the @cpu-throttle -increment is excessive # usually at tail stage. If this parameter is true, we will # compute the ideal CPU percentage used by the Guest, which may # exactly make the dirty rate match the dirty rate threshold. @@ -770,8 +770,8 @@ # specified by @cpu-throttle-increment and the one generated by # ideal CPU percentage. Therefore, it is compatible to # traditional throttling, meanwhile the throttle increment won't -# be excessive at tail stage. The default value is false. (Since -# 5.1) +# be excessive at tail stage. The default value is false. +# (Since 5.1) # # @tls-creds: ID of the 'tls-creds' object that provides credentials # for establishing a TLS connection over the migration data @@ -801,10 +801,10 @@ # (Since 2.8) # # @avail-switchover-bandwidth: to set the available bandwidth that -# migration can use during switchover phase. NOTE! This does not -# limit the bandwidth during switchover, but only for calculations -# when making decisions to switchover. By default, this value is -# zero, which means QEMU will estimate the bandwidth +# migration can use during switchover phase. **Note:** this does +# not limit the bandwidth during switchover, but only for +# calculations when making decisions to switchover. By default, +# this value is zero, which means QEMU will estimate the bandwidth # automatically. This can be set when the estimated value is not # accurate, while the user is able to guarantee such bandwidth is # available when switching over. When specified correctly, this @@ -842,9 +842,9 @@ # more CPU. Defaults to 1. (Since 5.0) # # @multifd-qatzip-level: Set the compression level to be used in live -# migration. The level is an integer between 1 and 9, where 1 means +# migration. The level is an integer between 1 and 9, where 1 means # the best compression speed, and 9 means the best compression -# ratio which will consume more CPU. Defaults to 1. (Since 9.2) +# ratio which will consume more CPU. Defaults to 1. (Since 9.2) # # @multifd-zstd-level: Set the compression level to be used in live # migration, the compression level is an integer between 0 and 20, @@ -941,9 +941,9 @@ # auto-converge detects that migration is not making progress. # The default value is 10. (Since 2.7) # -# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage At -# the tail stage of throttling, the Guest is very sensitive to CPU -# percentage while the @cpu-throttle -increment is excessive +# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage. +# At the tail stage of throttling, the Guest is very sensitive to +# CPU percentage while the @cpu-throttle -increment is excessive # usually at tail stage. If this parameter is true, we will # compute the ideal CPU percentage used by the Guest, which may # exactly make the dirty rate match the dirty rate threshold. @@ -951,8 +951,8 @@ # specified by @cpu-throttle-increment and the one generated by # ideal CPU percentage. Therefore, it is compatible to # traditional throttling, meanwhile the throttle increment won't -# be excessive at tail stage. The default value is false. (Since -# 5.1) +# be excessive at tail stage. The default value is false. +# (Since 5.1) # # @tls-creds: ID of the 'tls-creds' object that provides credentials # for establishing a TLS connection over the migration data @@ -982,10 +982,10 @@ # (Since 2.8) # # @avail-switchover-bandwidth: to set the available bandwidth that -# migration can use during switchover phase. NOTE! This does not -# limit the bandwidth during switchover, but only for calculations -# when making decisions to switchover. By default, this value is -# zero, which means QEMU will estimate the bandwidth +# migration can use during switchover phase. **Note:** this does +# not limit the bandwidth during switchover, but only for +# calculations when making decisions to switchover. By default, +# this value is zero, which means QEMU will estimate the bandwidth # automatically. This can be set when the estimated value is not # accurate, while the user is able to guarantee such bandwidth is # available when switching over. When specified correctly, this @@ -1023,9 +1023,9 @@ # more CPU. Defaults to 1. (Since 5.0) # # @multifd-qatzip-level: Set the compression level to be used in live -# migration. The level is an integer between 1 and 9, where 1 means +# migration. The level is an integer between 1 and 9, where 1 means # the best compression speed, and 9 means the best compression -# ratio which will consume more CPU. Defaults to 1. (Since 9.2) +# ratio which will consume more CPU. Defaults to 1. (Since 9.2) # # @multifd-zstd-level: Set the compression level to be used in live # migration, the compression level is an integer between 0 and 20, @@ -1148,16 +1148,16 @@ # percentage. The default value is 50. (Since 5.0) # # @cpu-throttle-initial: Initial percentage of time guest cpus are -# throttled when migration auto-converge is activated. (Since -# 2.7) +# throttled when migration auto-converge is activated. +# (Since 2.7) # # @cpu-throttle-increment: throttle percentage increase each time # auto-converge detects that migration is not making progress. # (Since 2.7) # -# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage At -# the tail stage of throttling, the Guest is very sensitive to CPU -# percentage while the @cpu-throttle -increment is excessive +# @cpu-throttle-tailslow: Make CPU throttling slower at tail stage. +# At the tail stage of throttling, the Guest is very sensitive to +# CPU percentage while the @cpu-throttle -increment is excessive # usually at tail stage. If this parameter is true, we will # compute the ideal CPU percentage used by the Guest, which may # exactly make the dirty rate match the dirty rate threshold. @@ -1165,8 +1165,8 @@ # specified by @cpu-throttle-increment and the one generated by # ideal CPU percentage. Therefore, it is compatible to # traditional throttling, meanwhile the throttle increment won't -# be excessive at tail stage. The default value is false. (Since -# 5.1) +# be excessive at tail stage. The default value is false. +# (Since 5.1) # # @tls-creds: ID of the 'tls-creds' object that provides credentials # for establishing a TLS connection over the migration data @@ -1192,10 +1192,10 @@ # (Since 2.8) # # @avail-switchover-bandwidth: to set the available bandwidth that -# migration can use during switchover phase. NOTE! This does not -# limit the bandwidth during switchover, but only for calculations -# when making decisions to switchover. By default, this value is -# zero, which means QEMU will estimate the bandwidth +# migration can use during switchover phase. **Note:** this does +# not limit the bandwidth during switchover, but only for +# calculations when making decisions to switchover. By default, +# this value is zero, which means QEMU will estimate the bandwidth # automatically. This can be set when the estimated value is not # accurate, while the user is able to guarantee such bandwidth is # available when switching over. When specified correctly, this @@ -1233,9 +1233,9 @@ # more CPU. Defaults to 1. (Since 5.0) # # @multifd-qatzip-level: Set the compression level to be used in live -# migration. The level is an integer between 1 and 9, where 1 means +# migration. The level is an integer between 1 and 9, where 1 means # the best compression speed, and 9 means the best compression -# ratio which will consume more CPU. Defaults to 1. (Since 9.2) +# ratio which will consume more CPU. Defaults to 1. (Since 9.2) # # @multifd-zstd-level: Set the compression level to be used in live # migration, the compression level is an integer between 0 and 20, @@ -1320,7 +1320,7 @@ ## # @query-migrate-parameters: # -# Returns information about the current migration parameters +# Return information about the current migration parameters # # Returns: @MigrationParameters # @@ -1500,7 +1500,7 @@ ## # @x-colo-lost-heartbeat: # -# Tell qemu that heartbeat is lost, request it to do takeover +# Tell QEMU that heartbeat is lost, request it to do takeover # procedures. If this command is sent to the PVM, the Primary side # will exit COLO mode. If sent to the Secondary, the Secondary side # will run failover work, then takes over server operation to become @@ -1660,6 +1660,10 @@ # # @resume: resume one paused migration, default "off". (since 3.0) # +# Features: +# +# @deprecated: Argument @detach is deprecated. +# # Since: 0.14 # # .. admonition:: Notes @@ -1668,19 +1672,14 @@ # migration's progress and final result (this information is # provided by the 'status' member). # -# 2. All boolean arguments default to false. -# -# 3. The user Monitor's "detach" argument is invalid in QMP and -# should not be used. -# -# 4. The uri argument should have the Uniform Resource Identifier +# 2. The uri argument should have the Uniform Resource Identifier # of default destination VM. This connection will be bound to # default network. # -# 5. For now, number of migration streams is restricted to one, +# 3. For now, number of migration streams is restricted to one, # i.e. number of items in 'channels' list is just 1. # -# 6. The 'uri' and 'channels' arguments are mutually exclusive; +# 4. The 'uri' and 'channels' arguments are mutually exclusive; # exactly one of the two should be present. # # .. qmp-example:: @@ -1724,13 +1723,14 @@ { 'command': 'migrate', 'data': {'*uri': 'str', '*channels': [ 'MigrationChannel' ], - '*detach': 'bool', '*resume': 'bool' } } + '*detach': { 'type': 'bool', 'features': [ 'deprecated' ] }, + '*resume': 'bool' } } ## # @migrate-incoming: # -# Start an incoming migration, the qemu must have been started with -# -incoming defer +# Start an incoming migration. QEMU must have been started with +# -incoming defer. # # @uri: The Uniform Resource Identifier identifying the source or # address to listen on @@ -2294,7 +2294,7 @@ ## # @query-vcpu-dirty-limit: # -# Returns information about virtual CPU dirty page rate limits, if +# Return information about virtual CPU dirty page rate limits, if # any. # # Since: 7.1 @@ -2327,7 +2327,7 @@ ## # @query-migrationthreads: # -# Returns information of migration threads +# Return information of migration threads # # Features: # diff --git a/qapi/misc-arm.json b/qapi/misc-arm.json new file mode 100644 index 0000000..f534137 --- /dev/null +++ b/qapi/misc-arm.json @@ -0,0 +1,49 @@ +# -*- Mode: Python -*- +# vim: filetype=python +# +# SPDX-License-Identifier: GPL-2.0-or-later + +## +# @GICCapability: +# +# The struct describes capability for a specific GIC (Generic +# Interrupt Controller) version. These bits are not only decided by +# QEMU/KVM software version, but also decided by the hardware that the +# program is running upon. +# +# @version: version of GIC to be described. Currently, only 2 and 3 +# are supported. +# +# @emulated: whether current QEMU/hardware supports emulated GIC +# device in user space. +# +# @kernel: whether current QEMU/hardware supports hardware accelerated +# GIC device in kernel. +# +# Since: 2.6 +## +{ 'struct': 'GICCapability', + 'data': { 'version': 'int', + 'emulated': 'bool', + 'kernel': 'bool' } } + +## +# @query-gic-capabilities: +# +# It will return a list of GICCapability objects that describe its +# capability bits. +# +# On non-ARM targets this command will report an error as the GIC +# technology is not applicable. +# +# Returns: a list of GICCapability objects. +# +# Since: 2.6 +# +# .. qmp-example:: +# +# -> { "execute": "query-gic-capabilities" } +# <- { "return": [{ "version": 2, "emulated": true, "kernel": false }, +# { "version": 3, "emulated": false, "kernel": true } ] } +## +{ 'command': 'query-gic-capabilities', 'returns': ['GICCapability'] } diff --git a/qapi/misc-target.json b/qapi/misc-i386.json index f7ec695..5fefa0a 100644 --- a/qapi/misc-target.json +++ b/qapi/misc-i386.json @@ -1,6 +1,7 @@ # -*- Mode: Python -*- # vim: filetype=python # +# SPDX-License-Identifier: GPL-2.0-or-later ## # @rtc-reset-reinjection: @@ -9,6 +10,10 @@ # be used if another mechanism to synchronize guest time is in effect, # for example QEMU guest agent's guest-set-time command. # +# Use of this command is only applicable for x86 machines with an RTC, +# and on other machines will silently return without performing any +# action. +# # Since: 2.1 # # .. qmp-example:: @@ -16,8 +21,7 @@ # -> { "execute": "rtc-reset-reinjection" } # <- { "return": {} } ## -{ 'command': 'rtc-reset-reinjection', - 'if': 'TARGET_I386' } +{ 'command': 'rtc-reset-reinjection' } ## # @SevState: @@ -44,8 +48,7 @@ ## { 'enum': 'SevState', 'data': ['uninit', 'launch-update', 'launch-secret', 'running', - 'send-update', 'receive-update' ], - 'if': 'TARGET_I386' } + 'send-update', 'receive-update' ] } ## # @SevGuestType: @@ -59,8 +62,7 @@ # Since: 6.2 ## { 'enum': 'SevGuestType', - 'data': [ 'sev', 'sev-snp' ], - 'if': 'TARGET_I386' } + 'data': [ 'sev', 'sev-snp' ] } ## # @SevGuestInfo: @@ -75,8 +77,7 @@ ## { 'struct': 'SevGuestInfo', 'data': { 'policy': 'uint32', - 'handle': 'uint32' }, - 'if': 'TARGET_I386' } + 'handle': 'uint32' } } ## # @SevSnpGuestInfo: @@ -88,8 +89,7 @@ # Since: 9.1 ## { 'struct': 'SevSnpGuestInfo', - 'data': { 'snp-policy': 'uint64' }, - 'if': 'TARGET_I386' } + 'data': { 'snp-policy': 'uint64' } } ## # @SevInfo: @@ -120,14 +120,17 @@ 'discriminator': 'sev-type', 'data': { 'sev': 'SevGuestInfo', - 'sev-snp': 'SevSnpGuestInfo' }, - 'if': 'TARGET_I386' } + 'sev-snp': 'SevSnpGuestInfo' } } ## # @query-sev: # -# Returns information about SEV +# Return information about SEV/SEV-ES/SEV-SNP. +# +# If unavailable due to an incompatible configuration the returned +# @enabled field is set to 'false' and the state of all other fields +# is unspecified. # # Returns: @SevInfo # @@ -140,8 +143,7 @@ # "build-id" : 0, "policy" : 0, "state" : "running", # "handle" : 1 } } ## -{ 'command': 'query-sev', 'returns': 'SevInfo', - 'if': 'TARGET_I386' } +{ 'command': 'query-sev', 'returns': 'SevInfo' } ## # @SevLaunchMeasureInfo: @@ -152,16 +154,24 @@ # # Since: 2.12 ## -{ 'struct': 'SevLaunchMeasureInfo', 'data': {'data': 'str'}, - 'if': 'TARGET_I386' } +{ 'struct': 'SevLaunchMeasureInfo', 'data': {'data': 'str'} } ## # @query-sev-launch-measure: # -# Query the SEV guest launch information. +# Query the SEV/SEV-ES guest launch information. +# +# This is only valid on x86 machines configured with KVM and the +# 'sev-guest' confidential virtualization object. The launch +# measurement for SEV-SNP guests is only available within the guest. # # Returns: The @SevLaunchMeasureInfo for the guest # +# Errors: +# - If the launch measurement is unavailable, either due to an +# invalid guest configuration or if the guest has not reached +# the required SEV state, GenericError +# # Since: 2.12 # # .. qmp-example:: @@ -169,8 +179,7 @@ # -> { "execute": "query-sev-launch-measure" } # <- { "return": { "data": "4l8LXeNlSPUDlXPJG5966/8%YZ" } } ## -{ 'command': 'query-sev-launch-measure', 'returns': 'SevLaunchMeasureInfo', - 'if': 'TARGET_I386' } +{ 'command': 'query-sev-launch-measure', 'returns': 'SevLaunchMeasureInfo' } ## # @SevCapability: @@ -186,7 +195,7 @@ # # @cbitpos: C-bit location in page table entry # -# @reduced-phys-bits: Number of physical Address bit reduction when +# @reduced-phys-bits: Number of physical address bit reduction when # SEV is enabled # # Since: 2.12 @@ -196,17 +205,20 @@ 'cert-chain': 'str', 'cpu0-id': 'str', 'cbitpos': 'int', - 'reduced-phys-bits': 'int'}, - 'if': 'TARGET_I386' } + 'reduced-phys-bits': 'int'} } ## # @query-sev-capabilities: # -# This command is used to get the SEV capabilities, and is supported -# on AMD X86 platforms only. +# Get SEV capabilities. +# +# This is only supported on AMD X86 platforms with KVM enabled. # # Returns: SevCapability objects. # +# Errors: +# - If SEV is not available on the platform, GenericError +# # Since: 2.12 # # .. qmp-example:: @@ -216,13 +228,17 @@ # "cpu0-id": "2lvmGwo+...61iEinw==", # "cbitpos": 47, "reduced-phys-bits": 1}} ## -{ 'command': 'query-sev-capabilities', 'returns': 'SevCapability', - 'if': 'TARGET_I386' } +{ 'command': 'query-sev-capabilities', 'returns': 'SevCapability' } ## # @sev-inject-launch-secret: # -# This command injects a secret blob into memory of SEV guest. +# This command injects a secret blob into memory of a SEV/SEV-ES +# guest. +# +# This is only valid on x86 machines configured with KVM and the +# 'sev-guest' confidential virtualization object. SEV-SNP guests do +# not support launch secret injection. # # @packet-header: the launch secret packet header encoded in base64 # @@ -230,11 +246,15 @@ # # @gpa: the guest physical address where secret will be injected. # +# Errors: +# - If launch secret injection is not possible, either due to +# an invalid guest configuration, or if the guest has not +# reached the required SEV state, GenericError +# # Since: 6.0 ## { 'command': 'sev-inject-launch-secret', - 'data': { 'packet-header': 'str', 'secret': 'str', '*gpa': 'uint64' }, - 'if': 'TARGET_I386' } + 'data': { 'packet-header': 'str', 'secret': 'str', '*gpa': 'uint64' } } ## # @SevAttestationReport: @@ -247,20 +267,28 @@ # Since: 6.1 ## { 'struct': 'SevAttestationReport', - 'data': { 'data': 'str'}, - 'if': 'TARGET_I386' } + 'data': { 'data': 'str'} } ## # @query-sev-attestation-report: # -# This command is used to get the SEV attestation report, and is -# supported on AMD X86 platforms only. +# This command is used to get the SEV attestation report. +# +# This is only valid on x86 machines configured with KVM and the +# 'sev-guest' confidential virtualization object. The attestation +# report for SEV-SNP guests is only available within the guest. # # @mnonce: a random 16 bytes value encoded in base64 (it will be # included in report) # # Returns: SevAttestationReport objects. # +# Errors: +# - This will return an error if the attestation report is +# unavailable, either due to an invalid guest configuration +# or if the guest has not reached the required SEV state, +# GenericError +# # Since: 6.1 # # .. qmp-example:: @@ -271,52 +299,7 @@ ## { 'command': 'query-sev-attestation-report', 'data': { 'mnonce': 'str' }, - 'returns': 'SevAttestationReport', - 'if': 'TARGET_I386' } - -## -# @GICCapability: -# -# The struct describes capability for a specific GIC (Generic -# Interrupt Controller) version. These bits are not only decided by -# QEMU/KVM software version, but also decided by the hardware that the -# program is running upon. -# -# @version: version of GIC to be described. Currently, only 2 and 3 -# are supported. -# -# @emulated: whether current QEMU/hardware supports emulated GIC -# device in user space. -# -# @kernel: whether current QEMU/hardware supports hardware accelerated -# GIC device in kernel. -# -# Since: 2.6 -## -{ 'struct': 'GICCapability', - 'data': { 'version': 'int', - 'emulated': 'bool', - 'kernel': 'bool' }, - 'if': 'TARGET_ARM' } - -## -# @query-gic-capabilities: -# -# This command is ARM-only. It will return a list of GICCapability -# objects that describe its capability bits. -# -# Returns: a list of GICCapability objects. -# -# Since: 2.6 -# -# .. qmp-example:: -# -# -> { "execute": "query-gic-capabilities" } -# <- { "return": [{ "version": 2, "emulated": true, "kernel": false }, -# { "version": 3, "emulated": false, "kernel": true } ] } -## -{ 'command': 'query-gic-capabilities', 'returns': ['GICCapability'], - 'if': 'TARGET_ARM' } + 'returns': 'SevAttestationReport' } ## # @SgxEpcSection: @@ -355,13 +338,12 @@ 'sgx1': 'bool', 'sgx2': 'bool', 'flc': 'bool', - 'sections': ['SgxEpcSection']}, - 'if': 'TARGET_I386' } + 'sections': ['SgxEpcSection']} } ## # @query-sgx: # -# Returns information about configured SGX capabilities of guest +# Return information about configured SGX capabilities of guest # # Returns: @SgxInfo # @@ -375,12 +357,12 @@ # "sections": [{"node": 0, "size": 67108864}, # {"node": 1, "size": 29360128}]} } ## -{ 'command': 'query-sgx', 'returns': 'SgxInfo', 'if': 'TARGET_I386' } +{ 'command': 'query-sgx', 'returns': 'SgxInfo' } ## # @query-sgx-capabilities: # -# Returns information about SGX capabilities of host +# Return information about SGX capabilities of host # # Returns: @SgxInfo # @@ -394,8 +376,7 @@ # "section" : [{"node": 0, "size": 67108864}, # {"node": 1, "size": 29360128}]} } ## -{ 'command': 'query-sgx-capabilities', 'returns': 'SgxInfo', 'if': 'TARGET_I386' } - +{ 'command': 'query-sgx-capabilities', 'returns': 'SgxInfo' } ## # @EvtchnPortType: @@ -417,8 +398,7 @@ # Since: 8.0 ## { 'enum': 'EvtchnPortType', - 'data': ['closed', 'unbound', 'interdomain', 'pirq', 'virq', 'ipi'], - 'if': 'TARGET_I386' } + 'data': ['closed', 'unbound', 'interdomain', 'pirq', 'virq', 'ipi'] } ## # @EvtchnInfo: @@ -448,8 +428,7 @@ 'remote-domain': 'str', 'target': 'uint16', 'pending': 'bool', - 'masked': 'bool'}, - 'if': 'TARGET_I386' } + 'masked': 'bool'} } ## @@ -487,8 +466,7 @@ # } ## { 'command': 'xen-event-list', - 'returns': ['EvtchnInfo'], - 'if': 'TARGET_I386' } + 'returns': ['EvtchnInfo'] } ## # @xen-event-inject: @@ -505,5 +483,4 @@ # <- { "return": { } } ## { 'command': 'xen-event-inject', - 'data': { 'port': 'uint32' }, - 'if': 'TARGET_I386' } + 'data': { 'port': 'uint32' } } diff --git a/qapi/misc.json b/qapi/misc.json index 559b66f..4b9e601 100644 --- a/qapi/misc.json +++ b/qapi/misc.json @@ -101,7 +101,7 @@ ## # @query-iothreads: # -# Returns a list of information about each iothread. +# Return a list of information about each iothread. # # .. note:: This list excludes the QEMU main loop thread, which is not # declared using the ``-object iothread`` command-line option. It @@ -222,8 +222,8 @@ # .. note:: This command only exists as a stop-gap. Its use is highly # discouraged. The semantics of this command are not guaranteed: # this means that command names, arguments and responses can change -# or be removed at ANY time. Applications that rely on long term -# stability guarantees should NOT use this command. +# or be removed at **any** time. Applications that rely on long +# term stability guarantees should **not** use this command. # # Known limitations: # diff --git a/qapi/net.json b/qapi/net.json index 310cc4f..97ea183 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -150,12 +150,12 @@ # @domainname: guest-visible domain name of the virtual nameserver # (since 3.0) # -# @ipv6-prefix: IPv6 network prefix (default is fec0::) (since 2.6). -# The network prefix is given in the usual hexadecimal IPv6 -# address notation. +# @ipv6-prefix: IPv6 network prefix (default is fec0::). The network +# prefix is given in the usual hexadecimal IPv6 address notation. +# (since 2.6) # -# @ipv6-prefixlen: IPv6 network prefix length (default is 64) (since -# 2.6) +# @ipv6-prefixlen: IPv6 network prefix length (default is 64) +# (since 2.6) # # @ipv6-host: guest-visible IPv6 address of the host (since 2.6) # @@ -387,8 +387,8 @@ # # @hubid: hub identifier number # -# @netdev: used to connect hub to a netdev instead of a device (since -# 2.12) +# @netdev: used to connect hub to a netdev instead of a device +# (since 2.12) # # Since: 1.2 ## @@ -510,8 +510,8 @@ # @queues: number of queues to be created for multiqueue vhost-vdpa # (default: 1) # -# @x-svq: Start device with (experimental) shadow virtqueue. (Since -# 7.1) (default: false) +# @x-svq: Start device with (experimental) shadow virtqueue. +# (Since 7.1) (default: false) # # Features: # diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json index 7bc600b..a8f6616 100644 --- a/qapi/qapi-schema.json +++ b/qapi/qapi-schema.json @@ -57,11 +57,12 @@ { 'include': 'qdev.json' } { 'include': 'machine-common.json' } { 'include': 'machine.json' } -{ 'include': 'machine-target.json' } +{ 'include': 'machine-s390x.json' } { 'include': 'replay.json' } { 'include': 'yank.json' } { 'include': 'misc.json' } -{ 'include': 'misc-target.json' } +{ 'include': 'misc-arm.json' } +{ 'include': 'misc-i386.json' } { 'include': 'audio.json' } { 'include': 'acpi.json' } { 'include': 'pci.json' } diff --git a/qapi/qom.json b/qapi/qom.json index 04c118e..b133b06 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -870,7 +870,7 @@ # information read from devices and switches in conjunction with # link characteristics read from PCIe Configuration space. # To get the full path latency from CPU to CXL attached DRAM -# CXL device: Add the latency from CPU to Generic Port (from +# CXL device: Add the latency from CPU to Generic Port (from # HMAT indexed via the node ID in this SRAT structure) to # that for CXL bus links, the latency across intermediate switches # and from the EP port to the actual memory. Bandwidth is more @@ -1048,6 +1048,45 @@ '*vcek-disabled': 'bool' } } ## +# @TdxGuestProperties: +# +# Properties for tdx-guest objects. +# +# @attributes: The 'attributes' of a TD guest that is passed to +# KVM_TDX_INIT_VM +# +# @sept-ve-disable: toggle bit 28 of TD attributes to control disabling +# of EPT violation conversion to #VE on guest TD access of PENDING +# pages. Some guest OS (e.g., Linux TD guest) may require this to +# be set, otherwise they refuse to boot. +# +# @mrconfigid: ID for non-owner-defined configuration of the guest TD, +# e.g., run-time or OS configuration (base64 encoded SHA384 digest). +# Defaults to all zeros. +# +# @mrowner: ID for the guest TD’s owner (base64 encoded SHA384 digest). +# Defaults to all zeros. +# +# @mrownerconfig: ID for owner-defined configuration of the guest TD, +# e.g., specific to the workload rather than the run-time or OS +# (base64 encoded SHA384 digest). Defaults to all zeros. +# +# @quote-generation-socket: socket address for Quote Generation +# Service (QGS). QGS is a daemon running on the host. Without +# it, the guest will not be able to get a TD quote for +# attestation. +# +# Since: 10.1 +## +{ 'struct': 'TdxGuestProperties', + 'data': { '*attributes': 'uint64', + '*sept-ve-disable': 'bool', + '*mrconfigid': 'str', + '*mrowner': 'str', + '*mrownerconfig': 'str', + '*quote-generation-socket': 'SocketAddress' } } + +## # @ThreadContextProperties: # # Properties for thread context objects. @@ -1132,6 +1171,7 @@ 'sev-snp-guest', 'thread-context', 's390-pv-guest', + 'tdx-guest', 'throttle-group', 'tls-creds-anon', 'tls-creds-psk', @@ -1204,6 +1244,7 @@ 'if': 'CONFIG_SECRET_KEYRING' }, 'sev-guest': 'SevGuestProperties', 'sev-snp-guest': 'SevSnpGuestProperties', + 'tdx-guest': 'TdxGuestProperties', 'thread-context': 'ThreadContextProperties', 'throttle-group': 'ThrottleGroupProperties', 'tls-creds-anon': 'TlsCredsAnonProperties', diff --git a/qapi/run-state.json b/qapi/run-state.json index ce95cfa..fd09beb 100644 --- a/qapi/run-state.json +++ b/qapi/run-state.json @@ -62,7 +62,7 @@ ## # @ShutdownCause: # -# An enumeration of reasons for a Shutdown. +# An enumeration of reasons for a shutdown. # # @none: No shutdown request pending # @@ -135,19 +135,19 @@ ## # @SHUTDOWN: # -# Emitted when the virtual machine has shut down, indicating that qemu +# Emitted when the virtual machine has shut down, indicating that QEMU # is about to exit. # # @guest: If true, the shutdown was triggered by a guest request (such # as a guest-initiated ACPI shutdown request or other # hardware-specific action) rather than a host request (such as -# sending qemu a SIGINT). (since 2.10) +# sending QEMU a SIGINT). (since 2.10) # # @reason: The @ShutdownCause which resulted in the SHUTDOWN. # (since 4.0) # # .. note:: If the command-line option ``-no-shutdown`` has been -# specified, qemu will not exit, and a STOP event will eventually +# specified, QEMU will not exit, and a STOP event will eventually # follow the SHUTDOWN event. # # Since: 0.12 @@ -365,8 +365,8 @@ # @shutdown: Shutdown the VM and exit, according to the shutdown # action # -# @exit-failure: Shutdown the VM and exit with nonzero status (since -# 7.1) +# @exit-failure: Shutdown the VM and exit with nonzero status +# (since 7.1) # # Since: 6.0 ## @@ -501,10 +501,12 @@ # # @s390: s390 guest panic information type (Since: 2.12) # +# @tdx: tdx guest panic information type (Since: 10.1) +# # Since: 2.9 ## { 'enum': 'GuestPanicInformationType', - 'data': [ 'hyper-v', 's390' ] } + 'data': [ 'hyper-v', 's390', 'tdx' ] } ## # @GuestPanicInformation: @@ -519,7 +521,8 @@ 'base': {'type': 'GuestPanicInformationType'}, 'discriminator': 'type', 'data': {'hyper-v': 'GuestPanicInformationHyperV', - 's390': 'GuestPanicInformationS390'}} + 's390': 'GuestPanicInformationS390', + 'tdx' : 'GuestPanicInformationTdx'}} ## # @GuestPanicInformationHyperV: @@ -599,6 +602,30 @@ 'reason': 'S390CrashReason'}} ## +# @GuestPanicInformationTdx: +# +# TDX Guest panic information specific to TDX, as specified in the +# "Guest-Hypervisor Communication Interface (GHCI) Specification", +# section TDG.VP.VMCALL<ReportFatalError>. +# +# @error-code: TD-specific error code +# +# @message: Human-readable error message provided by the guest. Not +# to be trusted. +# +# @gpa: guest-physical address of a page that contains more verbose +# error information, as zero-terminated string. Present when the +# "GPA valid" bit (bit 63) is set in @error-code. +# +# +# Since: 10.1 +## +{'struct': 'GuestPanicInformationTdx', + 'data': {'error-code': 'uint32', + 'message': 'str', + '*gpa': 'uint64'}} + +## # @MEMORY_FAILURE: # # Emitted when a memory failure occurs on host side. diff --git a/qapi/transaction.json b/qapi/transaction.json index 021e383..9d9e7af 100644 --- a/qapi/transaction.json +++ b/qapi/transaction.json @@ -21,7 +21,7 @@ ## # @ActionCompletionMode: # -# An enumeration of Transactional completion modes. +# An enumeration of transactional completion modes. # # @individual: Do not attempt to cancel any other Actions if any # Actions fail after the Transaction request succeeds. All @@ -223,7 +223,7 @@ # exists, the request will be rejected. Only some image formats # support it, for example, qcow2, and rbd, # -# On failure, qemu will try delete the newly created internal snapshot +# On failure, QEMU will try delete the newly created internal snapshot # in the transaction. When an I/O error occurs during deletion, the # user needs to fix it later with qemu-img or other command. # diff --git a/qapi/uefi.json b/qapi/uefi.json index bdfcabe..6592183 100644 --- a/qapi/uefi.json +++ b/qapi/uefi.json @@ -5,7 +5,7 @@ ## # = UEFI Variable Store # -# The qemu efi variable store implementation (hw/uefi/) uses this to +# The QEMU efi variable store implementation (hw/uefi/) uses this to # store non-volatile variables in json format on disk. # # This is an existing format already supported by (at least) two other diff --git a/qapi/ui.json b/qapi/ui.json index c536d4e..514fa15 100644 --- a/qapi/ui.json +++ b/qapi/ui.json @@ -175,8 +175,8 @@ # @filename: the path of a new file to store the image # # @device: ID of the display device that should be dumped. If this -# parameter is missing, the primary display will be used. (Since -# 2.12) +# parameter is missing, the primary display will be used. +# (Since 2.12) # # @head: head to use in case the device supports multiple heads. If # this parameter is missing, head #0 will be used. Also note that @@ -323,7 +323,7 @@ ## # @query-spice: # -# Returns information about the current SPICE server +# Return information about the current SPICE server # # Returns: @SpiceInfo # @@ -654,7 +654,7 @@ ## # @query-vnc: # -# Returns information about the current VNC server +# Return information about the current VNC server # # Returns: @VncInfo # @@ -685,7 +685,7 @@ ## # @query-vnc-servers: # -# Returns a list of vnc servers. The list can be empty. +# Return a list of vnc servers. The list can be empty. # # Returns: a list of @VncInfo2 # @@ -820,7 +820,7 @@ ## # @query-mice: # -# Returns information about each active mouse device +# Return information about each active mouse device # # Returns: a list of @MouseInfo for each device # @@ -1526,12 +1526,12 @@ # # Display (user interface) options. # -# @type: Which DisplayType qemu should use. +# @type: Which DisplayType QEMU should use. # # @full-screen: Start user interface in fullscreen mode # (default: off). # -# @window-close: Allow to quit qemu with window close button +# @window-close: Allow to quit QEMU with window close button # (default: on). # # @show-cursor: Force showing the mouse cursor (default: off). @@ -1562,7 +1562,7 @@ ## # @query-display-options: # -# Returns information about display configuration +# Return information about display configuration # # Returns: @DisplayOptions # diff --git a/qapi/virtio.json b/qapi/virtio.json index d351d21..73df718 100644 --- a/qapi/virtio.json +++ b/qapi/virtio.json @@ -24,7 +24,7 @@ ## # @x-query-virtio: # -# Returns a list of all realized VirtIODevices +# Return a list of all realized VirtIODevices # # Features: # @@ -3505,6 +3505,7 @@ static int img_snapshot(int argc, char **argv) break; case SNAPSHOT_DELETE: + bdrv_drain_all_begin(); bdrv_graph_rdlock_main_loop(); ret = bdrv_snapshot_find(bs, &sn, snapshot_name); if (ret < 0) { @@ -3520,6 +3521,7 @@ static int img_snapshot(int argc, char **argv) } } bdrv_graph_rdunlock_main_loop(); + bdrv_drain_all_end(); break; } @@ -4488,10 +4490,10 @@ static void bench_cb(void *opaque, int ret) */ b->in_flight++; b->offset += b->step; - if (b->image_size == 0) { + if (b->image_size <= b->bufsize) { b->offset = 0; } else { - b->offset %= b->image_size; + b->offset %= b->image_size - b->bufsize; } if (b->write) { acb = blk_aio_pwritev(b->blk, offset, b->qiov, 0, bench_cb, b); diff --git a/qemu-options.hx b/qemu-options.hx index aab53bc..1f862b1 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -965,7 +965,7 @@ SRST Sets the period length in microseconds. ``in|out.try-poll=on|off`` - Attempt to use poll mode with the device. Default is on. + Attempt to use poll mode with the device. Default is off. ``threshold=threshold`` Threshold (in microseconds) when playback starts. Default is 0. @@ -1002,7 +1002,7 @@ SRST ``in|out.buffer-count=count`` Sets the count of the buffers. - ``in|out.try-poll=on|of`` + ``in|out.try-poll=on|off`` Attempt to use poll mode with the device. Default is on. ``try-mmap=on|off`` @@ -4936,13 +4936,13 @@ SRST with actual performance. When the virtual cpu is sleeping, the virtual time will advance at - default speed unless ``sleep=on`` is specified. With - ``sleep=on``, the virtual time will jump to the next timer + default speed unless ``sleep=off`` is specified. With + ``sleep=off``, the virtual time will jump to the next timer deadline instantly whenever the virtual cpu goes to sleep mode and will not advance if no timer is enabled. This behavior gives deterministic execution times from the guest point of view. - The default if icount is enabled is ``sleep=off``. - ``sleep=on`` cannot be used together with either ``shift=auto`` + The default if icount is enabled is ``sleep=on``. + ``sleep=off`` cannot be used together with either ``shift=auto`` or ``align=on``. ``align=on`` will activate the delay algorithm which will try to diff --git a/qga/commands-win32.c b/qga/commands-win32.c index d448253..8227480 100644 --- a/qga/commands-win32.c +++ b/qga/commands-win32.c @@ -27,6 +27,7 @@ #include <lm.h> #include <wtsapi32.h> #include <wininet.h> +#include <pdh.h> #include "guest-agent-core.h" #include "vss-win32.h" @@ -119,6 +120,28 @@ static OpenFlags guest_file_open_modes[] = { {"a+b", FILE_GENERIC_APPEND | GENERIC_READ, OPEN_ALWAYS } }; +/* + * We use an exponentially weighted moving average, just like Unix systems do + * https://en.wikipedia.org/wiki/Load_(computing)#Unix-style_load_calculation + * + * These constants serve as the damping factor and are calculated with + * 1 / exp(sampling interval in seconds / window size in seconds) + * + * This formula comes from linux's include/linux/sched/loadavg.h + * https://github.com/torvalds/linux/blob/345671ea0f9258f410eb057b9ced9cefbbe5dc78/include/linux/sched/loadavg.h#L20-L23 + */ +#define LOADAVG_FACTOR_1F 0.9200444146293232478931553241 +#define LOADAVG_FACTOR_5F 0.9834714538216174894737477501 +#define LOADAVG_FACTOR_15F 0.9944598480048967508795473394 +/* + * The time interval in seconds between taking load counts, same as Linux + */ +#define LOADAVG_SAMPLING_INTERVAL 5 + +double load_avg_1m; +double load_avg_5m; +double load_avg_15m; + #define debug_error(msg) do { \ char *suffix = g_win32_error_message(GetLastError()); \ g_debug("%s: %s", (msg), suffix); \ @@ -2444,3 +2467,128 @@ char *qga_get_host_name(Error **errp) return g_utf16_to_utf8(tmp, size, NULL, NULL, NULL); } + + +static VOID CALLBACK load_avg_callback(PVOID hCounter, BOOLEAN timedOut) +{ + PDH_FMT_COUNTERVALUE displayValue; + double currentLoad; + PDH_STATUS err; + + err = PdhGetFormattedCounterValue( + (PDH_HCOUNTER)hCounter, PDH_FMT_DOUBLE, 0, &displayValue); + /* Skip updating the load if we can't get the value successfully */ + if (err != ERROR_SUCCESS) { + slog("PdhGetFormattedCounterValue failed to get load value with 0x%lx", + err); + return; + } + currentLoad = displayValue.doubleValue; + + load_avg_1m = load_avg_1m * LOADAVG_FACTOR_1F + currentLoad * \ + (1.0 - LOADAVG_FACTOR_1F); + load_avg_5m = load_avg_5m * LOADAVG_FACTOR_5F + currentLoad * \ + (1.0 - LOADAVG_FACTOR_5F); + load_avg_15m = load_avg_15m * LOADAVG_FACTOR_15F + currentLoad * \ + (1.0 - LOADAVG_FACTOR_15F); +} + +static BOOL init_load_avg_counter(Error **errp) +{ + CONST WCHAR *szCounterPath = L"\\System\\Processor Queue Length"; + PDH_STATUS status; + BOOL ret; + HQUERY hQuery; + HCOUNTER hCounter; + HANDLE event; + HANDLE waitHandle; + + status = PdhOpenQueryW(NULL, 0, &hQuery); + if (status != ERROR_SUCCESS) { + /* + * If the function fails, the return value is a system error code or + * a PDH error code. error_setg_win32 cant translate PDH error code + * properly, so just report it as is. + */ + error_setg_win32(errp, (DWORD)status, + "PdhOpenQueryW failed with 0x%lx", status); + return FALSE; + } + + status = PdhAddEnglishCounterW(hQuery, szCounterPath, 0, &hCounter); + if (status != ERROR_SUCCESS) { + error_setg_win32(errp, (DWORD)status, + "PdhAddEnglishCounterW failed with 0x%lx. Performance counters may be disabled.", + status); + PdhCloseQuery(hQuery); + return FALSE; + } + + event = CreateEventW(NULL, FALSE, FALSE, L"LoadUpdateEvent"); + if (event == NULL) { + error_setg_win32(errp, GetLastError(), "Create LoadUpdateEvent failed"); + PdhCloseQuery(hQuery); + return FALSE; + } + + status = PdhCollectQueryDataEx(hQuery, LOADAVG_SAMPLING_INTERVAL, event); + if (status != ERROR_SUCCESS) { + error_setg_win32(errp, (DWORD)status, + "PdhCollectQueryDataEx failed with 0x%lx", status); + CloseHandle(event); + PdhCloseQuery(hQuery); + return FALSE; + } + + ret = RegisterWaitForSingleObject( + &waitHandle, + event, + (WAITORTIMERCALLBACK)load_avg_callback, + (PVOID)hCounter, + INFINITE, + WT_EXECUTEDEFAULT); + + if (ret == 0) { + error_setg_win32(errp, GetLastError(), + "RegisterWaitForSingleObject failed"); + CloseHandle(event); + PdhCloseQuery(hQuery); + return FALSE; + } + + ga_set_load_avg_wait_handle(ga_state, waitHandle); + ga_set_load_avg_event(ga_state, event); + ga_set_load_avg_pdh_query(ga_state, hQuery); + + return TRUE; +} + +GuestLoadAverage *qmp_guest_get_load(Error **errp) +{ + /* + * The load average logic calls PerformaceCounterAPI, which can result + * in a performance penalty. This avoids running the load average logic + * until a management application actually requests it. The load average + * will not initially be very accurate, but assuming that any interested + * management application will request it repeatedly throughout the lifetime + * of the VM, this seems like a good mitigation. + */ + if (ga_get_load_avg_pdh_query(ga_state) == NULL) { + /* set initial values */ + load_avg_1m = 0; + load_avg_5m = 0; + load_avg_15m = 0; + + if (init_load_avg_counter(errp) == false) { + return NULL; + } + } + + GuestLoadAverage *ret = NULL; + + ret = g_new0(GuestLoadAverage, 1); + ret->load1m = load_avg_1m; + ret->load5m = load_avg_5m; + ret->load15m = load_avg_15m; + return ret; +} diff --git a/qga/guest-agent-core.h b/qga/guest-agent-core.h index a536d07..d9f3922 100644 --- a/qga/guest-agent-core.h +++ b/qga/guest-agent-core.h @@ -13,6 +13,10 @@ #ifndef GUEST_AGENT_CORE_H #define GUEST_AGENT_CORE_H +#ifdef _WIN32 +#include <pdh.h> +#endif + #include "qapi/qmp-registry.h" #include "qga-qapi-types.h" @@ -41,6 +45,12 @@ void ga_set_response_delimited(GAState *s); bool ga_is_frozen(GAState *s); void ga_set_frozen(GAState *s); void ga_unset_frozen(GAState *s); +#ifdef _WIN32 +void ga_set_load_avg_event(GAState *s, HANDLE event); +void ga_set_load_avg_wait_handle(GAState *s, HANDLE wait_handle); +void ga_set_load_avg_pdh_query(GAState *s, HQUERY query); +HQUERY ga_get_load_avg_pdh_query(GAState *s); +#endif const char *ga_fsfreeze_hook(GAState *s); int64_t ga_get_fd_handle(GAState *s, Error **errp); int ga_parse_whence(GuestFileWhence *whence, Error **errp); @@ -33,6 +33,7 @@ #include "qemu-version.h" #ifdef _WIN32 #include <dbt.h> +#include <pdh.h> #include "qga/service-win32.h" #include "qga/vss-win32.h" #endif @@ -105,6 +106,9 @@ struct GAState { GAService service; HANDLE wakeup_event; HANDLE event_log; + HANDLE load_avg_wait_handle; + HANDLE load_avg_event; + HQUERY load_avg_pdh_query; #endif bool delimit_response; bool frozen; @@ -582,6 +586,25 @@ const char *ga_fsfreeze_hook(GAState *s) } #endif +#ifdef _WIN32 +void ga_set_load_avg_wait_handle(GAState *s, HANDLE wait_handle) +{ + s->load_avg_wait_handle = wait_handle; +} +void ga_set_load_avg_event(GAState *s, HANDLE event) +{ + s->load_avg_event = event; +} +void ga_set_load_avg_pdh_query(GAState *s, HQUERY query) +{ + s->load_avg_pdh_query = query; +} +HQUERY ga_get_load_avg_pdh_query(GAState *s) +{ + return s->load_avg_pdh_query; +} +#endif + static void become_daemon(const char *pidfile) { #ifndef _WIN32 @@ -1402,6 +1425,10 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation) g_debug("Guest agent version %s started", QEMU_FULL_VERSION); #ifdef _WIN32 + s->load_avg_wait_handle = INVALID_HANDLE_VALUE; + s->load_avg_event = INVALID_HANDLE_VALUE; + s->load_avg_pdh_query = NULL; + s->event_log = RegisterEventSource(NULL, "qemu-ga"); if (!s->event_log) { g_autofree gchar *errmsg = g_win32_error_message(GetLastError()); @@ -1506,6 +1533,18 @@ static void cleanup_agent(GAState *s) #ifdef _WIN32 CloseHandle(s->wakeup_event); CloseHandle(s->event_log); + + if (s->load_avg_wait_handle != INVALID_HANDLE_VALUE) { + UnregisterWait(s->load_avg_wait_handle); + } + + if (s->load_avg_event != INVALID_HANDLE_VALUE) { + CloseHandle(s->load_avg_event); + } + + if (s->load_avg_pdh_query) { + PdhCloseQuery(s->load_avg_pdh_query); + } #endif if (s->command_state) { ga_command_state_cleanup_all(s->command_state); diff --git a/qga/meson.build b/qga/meson.build index 587ec4e..89a4a8f 100644 --- a/qga/meson.build +++ b/qga/meson.build @@ -95,7 +95,7 @@ gen_tlb = [] qga_libs = [] if host_os == 'windows' qga_libs += ['-lws2_32', '-lwinmm', '-lpowrprof', '-lwtsapi32', '-lwininet', '-liphlpapi', '-lnetapi32', - '-lsetupapi', '-lcfgmgr32', '-luserenv'] + '-lsetupapi', '-lcfgmgr32', '-luserenv', '-lpdh' ] if have_qga_vss qga_libs += ['-lole32', '-loleaut32', '-lshlwapi', '-lstdc++', '-Wl,--enable-stdcall-fixup'] subdir('vss-win32') diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json index 5316bfa..6d770f7 100644 --- a/qga/qapi-schema.json +++ b/qga/qapi-schema.json @@ -1880,7 +1880,7 @@ 'load5m': 'number', 'load15m': 'number' }, - 'if': 'CONFIG_GETLOADAVG' + 'if': { 'any': ['CONFIG_WIN32', 'CONFIG_GETLOADAVG'] } } ## @@ -1888,13 +1888,18 @@ # # Retrieve CPU process load information # +# .. note:: Windows does not have load average API, so QGA emulates it by +# calculating the average CPU usage in the last 1, 5, 15 minutes +# similar as Linux does this. +# Calculation starts from the first time this command is called. +# # Returns: load information # # Since: 10.0 ## { 'command': 'guest-get-load', 'returns': 'GuestLoadAverage', - 'if': 'CONFIG_GETLOADAVG' + 'if': { 'any': ['CONFIG_WIN32', 'CONFIG_GETLOADAVG'] } } ## diff --git a/roms/seabios b/roms/seabios -Subproject a6ed6b701f0a57db0569ab98b0661c12a6ec3ff +Subproject b52ca86e094d19b58e2304417787e96b940e39c diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 13d580c..b785c71 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -3,6 +3,12 @@ version = 3 [[package]] +name = "anyhow" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" + +[[package]] name = "arbitrary-int" version = "1.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -32,12 +38,28 @@ dependencies = [ ] [[package]] +name = "bits" +version = "0.1.0" +dependencies = [ + "qemu_api_macros", +] + +[[package]] name = "either" version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" [[package]] +name = "foreign" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17ca1b5be8c9d320daf386f1809c7acc0cb09accbae795c2001953fa50585846" +dependencies = [ + "libc", +] + +[[package]] name = "hpet" version = "0.1.0" dependencies = [ @@ -66,6 +88,7 @@ version = "0.1.0" dependencies = [ "bilge", "bilge-impl", + "bits", "qemu_api", "qemu_api_macros", ] @@ -106,6 +129,8 @@ dependencies = [ name = "qemu_api" version = "0.1.0" dependencies = [ + "anyhow", + "foreign", "libc", "qemu_api_macros", ] diff --git a/rust/Cargo.toml b/rust/Cargo.toml index d9faeec..0868e1b 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -1,6 +1,7 @@ [workspace] resolver = "2" members = [ + "bits", "qemu-api-macros", "qemu-api", "hw/char/pl011", @@ -63,10 +64,10 @@ ignored_unit_patterns = "deny" implicit_clone = "deny" macro_use_imports = "deny" missing_safety_doc = "deny" -multiple_crate_versions = "deny" mut_mut = "deny" needless_bitwise_bool = "deny" needless_pass_by_ref_mut = "deny" +needless_update = "deny" no_effect_underscore_binding = "deny" option_option = "deny" or_fun_call = "deny" diff --git a/rust/bits/Cargo.toml b/rust/bits/Cargo.toml new file mode 100644 index 0000000..1ff38a4 --- /dev/null +++ b/rust/bits/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "bits" +version = "0.1.0" +authors = ["Paolo Bonzini <pbonzini@redhat.com>"] +description = "const-friendly bit flags" +resolver = "2" +publish = false + +edition.workspace = true +homepage.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true + +[dependencies] +qemu_api_macros = { path = "../qemu-api-macros" } + +[lints] +workspace = true diff --git a/rust/bits/meson.build b/rust/bits/meson.build new file mode 100644 index 0000000..2a41e13 --- /dev/null +++ b/rust/bits/meson.build @@ -0,0 +1,16 @@ +_bits_rs = static_library( + 'bits', + 'src/lib.rs', + override_options: ['rust_std=2021', 'build.rust_std=2021'], + rust_abi: 'rust', + dependencies: [qemu_api_macros], +) + +bits_rs = declare_dependency(link_with: _bits_rs) + +rust.test('rust-bits-tests', _bits_rs, + suite: ['unit', 'rust']) + +rust.doctest('rust-bits-doctests', _bits_rs, + dependencies: bits_rs, + suite: ['doc', 'rust']) diff --git a/rust/bits/src/lib.rs b/rust/bits/src/lib.rs new file mode 100644 index 0000000..d485d6b --- /dev/null +++ b/rust/bits/src/lib.rs @@ -0,0 +1,443 @@ +// SPDX-License-Identifier: MIT or Apache-2.0 or GPL-2.0-or-later + +/// # Definition entry point +/// +/// Define a struct with a single field of type $type. Include public constants +/// for each element listed in braces. +/// +/// The unnamed element at the end, if present, can be used to enlarge the set +/// of valid bits. Bits that are valid but not listed are treated normally for +/// the purpose of arithmetic operations, and are printed with their hexadecimal +/// value. +/// +/// The struct implements the following traits: [`BitAnd`](std::ops::BitAnd), +/// [`BitOr`](std::ops::BitOr), [`BitXor`](std::ops::BitXor), +/// [`Not`](std::ops::Not), [`Sub`](std::ops::Sub); [`Debug`](std::fmt::Debug), +/// [`Display`](std::fmt::Display), [`Binary`](std::fmt::Binary), +/// [`Octal`](std::fmt::Octal), [`LowerHex`](std::fmt::LowerHex), +/// [`UpperHex`](std::fmt::UpperHex); [`From`]`<type>`/[`Into`]`<type>` where +/// type is the type specified in the definition. +/// +/// ## Example +/// +/// ``` +/// # use bits::bits; +/// bits! { +/// pub struct Colors(u8) { +/// BLACK = 0, +/// RED = 1, +/// GREEN = 1 << 1, +/// BLUE = 1 << 2, +/// WHITE = (1 << 0) | (1 << 1) | (1 << 2), +/// } +/// } +/// ``` +/// +/// ``` +/// # use bits::bits; +/// # bits! { pub struct Colors(u8) { BLACK = 0, RED = 1, GREEN = 1 << 1, BLUE = 1 << 2, } } +/// +/// bits! { +/// pub struct Colors8(u8) { +/// BLACK = 0, +/// RED = 1, +/// GREEN = 1 << 1, +/// BLUE = 1 << 2, +/// WHITE = (1 << 0) | (1 << 1) | (1 << 2), +/// +/// _ = 255, +/// } +/// } +/// +/// // The previously defined struct ignores bits not explicitly defined. +/// assert_eq!( +/// Colors::from(255).into_bits(), +/// (Colors::RED | Colors::GREEN | Colors::BLUE).into_bits() +/// ); +/// +/// // Adding "_ = 255" makes it retain other bits as well. +/// assert_eq!(Colors8::from(255).into_bits(), 255); +/// +/// // all() does not include the additional bits, valid_bits() does +/// assert_eq!(Colors8::all().into_bits(), Colors::all().into_bits()); +/// assert_eq!(Colors8::valid_bits().into_bits(), 255); +/// ``` +/// +/// # Evaluation entry point +/// +/// Return a constant corresponding to the boolean expression `$expr`. +/// Identifiers in the expression correspond to values defined for the +/// type `$type`. Supported operators are `!` (unary), `-`, `&`, `^`, `|`. +/// +/// ## Examples +/// +/// ``` +/// # use bits::bits; +/// bits! { +/// pub struct Colors(u8) { +/// BLACK = 0, +/// RED = 1, +/// GREEN = 1 << 1, +/// BLUE = 1 << 2, +/// // same as "WHITE = 7", +/// WHITE = bits!(Self as u8: RED | GREEN | BLUE), +/// } +/// } +/// +/// let rgb = bits! { Colors: RED | GREEN | BLUE }; +/// assert_eq!(rgb, Colors::WHITE); +/// ``` +#[macro_export] +macro_rules! bits { + { + $(#[$struct_meta:meta])* + $struct_vis:vis struct $struct_name:ident($field_vis:vis $type:ty) { + $($(#[$const_meta:meta])* $const:ident = $val:expr),+ + $(,_ = $mask:expr)? + $(,)? + } + } => { + $(#[$struct_meta])* + #[derive(Clone, Copy, PartialEq, Eq)] + #[repr(transparent)] + $struct_vis struct $struct_name($field_vis $type); + + impl $struct_name { + $( #[allow(dead_code)] $(#[$const_meta])* + pub const $const: $struct_name = $struct_name($val); )+ + + #[doc(hidden)] + const VALID__: $type = $( Self::$const.0 )|+ $(|$mask)?; + + #[allow(dead_code)] + #[inline(always)] + pub const fn empty() -> Self { + Self(0) + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn all() -> Self { + Self($( Self::$const.0 )|+) + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn valid_bits() -> Self { + Self(Self::VALID__) + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn valid(val: $type) -> bool { + (val & !Self::VALID__) == 0 + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn any_set(self, mask: Self) -> bool { + (self.0 & mask.0) != 0 + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn all_set(self, mask: Self) -> bool { + (self.0 & mask.0) == mask.0 + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn none_set(self, mask: Self) -> bool { + (self.0 & mask.0) == 0 + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn from_bits(value: $type) -> Self { + $struct_name(value) + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn into_bits(self) -> $type { + self.0 + } + + #[allow(dead_code)] + #[inline(always)] + pub fn set(&mut self, rhs: Self) { + self.0 |= rhs.0; + } + + #[allow(dead_code)] + #[inline(always)] + pub fn clear(&mut self, rhs: Self) { + self.0 &= !rhs.0; + } + + #[allow(dead_code)] + #[inline(always)] + pub fn toggle(&mut self, rhs: Self) { + self.0 ^= rhs.0; + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn intersection(self, rhs: Self) -> Self { + $struct_name(self.0 & rhs.0) + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn difference(self, rhs: Self) -> Self { + $struct_name(self.0 & !rhs.0) + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn symmetric_difference(self, rhs: Self) -> Self { + $struct_name(self.0 ^ rhs.0) + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn union(self, rhs: Self) -> Self { + $struct_name(self.0 | rhs.0) + } + + #[allow(dead_code)] + #[inline(always)] + pub const fn invert(self) -> Self { + $struct_name(self.0 ^ Self::VALID__) + } + } + + impl ::std::fmt::Binary for $struct_name { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + // If no width, use the highest valid bit + let width = f.width().unwrap_or((Self::VALID__.ilog2() + 1) as usize); + write!(f, "{:0>width$.precision$b}", self.0, + width = width, + precision = f.precision().unwrap_or(width)) + } + } + + impl ::std::fmt::LowerHex for $struct_name { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + <$type as ::std::fmt::LowerHex>::fmt(&self.0, f) + } + } + + impl ::std::fmt::Octal for $struct_name { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + <$type as ::std::fmt::Octal>::fmt(&self.0, f) + } + } + + impl ::std::fmt::UpperHex for $struct_name { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + <$type as ::std::fmt::UpperHex>::fmt(&self.0, f) + } + } + + impl ::std::fmt::Debug for $struct_name { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + write!(f, "{}({})", stringify!($struct_name), self) + } + } + + impl ::std::fmt::Display for $struct_name { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + use ::std::fmt::Display; + let mut first = true; + let mut left = self.0; + $(if Self::$const.0.is_power_of_two() && (self & Self::$const).0 != 0 { + if first { first = false } else { Display::fmt(&'|', f)?; } + Display::fmt(stringify!($const), f)?; + left -= Self::$const.0; + })+ + if first { + Display::fmt(&'0', f) + } else if left != 0 { + write!(f, "|{left:#x}") + } else { + Ok(()) + } + } + } + + impl ::std::cmp::PartialEq<$type> for $struct_name { + fn eq(&self, rhs: &$type) -> bool { + self.0 == *rhs + } + } + + impl ::std::ops::BitAnd<$struct_name> for &$struct_name { + type Output = $struct_name; + fn bitand(self, rhs: $struct_name) -> Self::Output { + $struct_name(self.0 & rhs.0) + } + } + + impl ::std::ops::BitAndAssign<$struct_name> for $struct_name { + fn bitand_assign(&mut self, rhs: $struct_name) { + self.0 = self.0 & rhs.0 + } + } + + impl ::std::ops::BitXor<$struct_name> for &$struct_name { + type Output = $struct_name; + fn bitxor(self, rhs: $struct_name) -> Self::Output { + $struct_name(self.0 ^ rhs.0) + } + } + + impl ::std::ops::BitXorAssign<$struct_name> for $struct_name { + fn bitxor_assign(&mut self, rhs: $struct_name) { + self.0 = self.0 ^ rhs.0 + } + } + + impl ::std::ops::BitOr<$struct_name> for &$struct_name { + type Output = $struct_name; + fn bitor(self, rhs: $struct_name) -> Self::Output { + $struct_name(self.0 | rhs.0) + } + } + + impl ::std::ops::BitOrAssign<$struct_name> for $struct_name { + fn bitor_assign(&mut self, rhs: $struct_name) { + self.0 = self.0 | rhs.0 + } + } + + impl ::std::ops::Sub<$struct_name> for &$struct_name { + type Output = $struct_name; + fn sub(self, rhs: $struct_name) -> Self::Output { + $struct_name(self.0 & !rhs.0) + } + } + + impl ::std::ops::SubAssign<$struct_name> for $struct_name { + fn sub_assign(&mut self, rhs: $struct_name) { + self.0 = self.0 - rhs.0 + } + } + + impl ::std::ops::Not for &$struct_name { + type Output = $struct_name; + fn not(self) -> Self::Output { + $struct_name(self.0 ^ $struct_name::VALID__) + } + } + + impl ::std::ops::BitAnd<$struct_name> for $struct_name { + type Output = Self; + fn bitand(self, rhs: Self) -> Self::Output { + $struct_name(self.0 & rhs.0) + } + } + + impl ::std::ops::BitXor<$struct_name> for $struct_name { + type Output = Self; + fn bitxor(self, rhs: Self) -> Self::Output { + $struct_name(self.0 ^ rhs.0) + } + } + + impl ::std::ops::BitOr<$struct_name> for $struct_name { + type Output = Self; + fn bitor(self, rhs: Self) -> Self::Output { + $struct_name(self.0 | rhs.0) + } + } + + impl ::std::ops::Sub<$struct_name> for $struct_name { + type Output = Self; + fn sub(self, rhs: Self) -> Self::Output { + $struct_name(self.0 & !rhs.0) + } + } + + impl ::std::ops::Not for $struct_name { + type Output = Self; + fn not(self) -> Self::Output { + $struct_name(self.0 ^ Self::VALID__) + } + } + + impl From<$struct_name> for $type { + fn from(x: $struct_name) -> $type { + x.0 + } + } + + impl From<$type> for $struct_name { + fn from(x: $type) -> Self { + $struct_name(x & Self::VALID__) + } + } + }; + + { $type:ty: $expr:expr } => { + ::qemu_api_macros::bits_const_internal! { $type @ ($expr) } + }; + + { $type:ty as $int_type:ty: $expr:expr } => { + (::qemu_api_macros::bits_const_internal! { $type @ ($expr) }.into_bits()) as $int_type + }; +} + +#[cfg(test)] +mod test { + bits! { + pub struct InterruptMask(u32) { + OE = 1 << 10, + BE = 1 << 9, + PE = 1 << 8, + FE = 1 << 7, + RT = 1 << 6, + TX = 1 << 5, + RX = 1 << 4, + DSR = 1 << 3, + DCD = 1 << 2, + CTS = 1 << 1, + RI = 1 << 0, + + E = bits!(Self as u32: OE | BE | PE | FE), + MS = bits!(Self as u32: RI | DSR | DCD | CTS), + } + } + + #[test] + pub fn test_not() { + assert_eq!( + !InterruptMask::from(InterruptMask::RT.0), + InterruptMask::E | InterruptMask::MS | InterruptMask::TX | InterruptMask::RX + ); + } + + #[test] + pub fn test_and() { + assert_eq!( + InterruptMask::from(0), + InterruptMask::MS & InterruptMask::OE + ) + } + + #[test] + pub fn test_or() { + assert_eq!( + InterruptMask::E, + InterruptMask::OE | InterruptMask::BE | InterruptMask::PE | InterruptMask::FE + ); + } + + #[test] + pub fn test_xor() { + assert_eq!( + InterruptMask::E ^ InterruptMask::BE, + InterruptMask::OE | InterruptMask::PE | InterruptMask::FE + ); + } +} diff --git a/rust/hw/char/pl011/Cargo.toml b/rust/hw/char/pl011/Cargo.toml index a1f431a..003ef96 100644 --- a/rust/hw/char/pl011/Cargo.toml +++ b/rust/hw/char/pl011/Cargo.toml @@ -18,6 +18,7 @@ crate-type = ["staticlib"] [dependencies] bilge = { version = "0.2.0" } bilge-impl = { version = "0.2.0" } +bits = { path = "../../../bits" } qemu_api = { path = "../../../qemu-api" } qemu_api_macros = { path = "../../../qemu-api-macros" } diff --git a/rust/hw/char/pl011/meson.build b/rust/hw/char/pl011/meson.build index 547cca5..2a1be32 100644 --- a/rust/hw/char/pl011/meson.build +++ b/rust/hw/char/pl011/meson.build @@ -1,17 +1,12 @@ -subproject('bilge-0.2-rs', required: true) -subproject('bilge-impl-0.2-rs', required: true) - -bilge_dep = dependency('bilge-0.2-rs') -bilge_impl_dep = dependency('bilge-impl-0.2-rs') - _libpl011_rs = static_library( 'pl011', files('src/lib.rs'), override_options: ['rust_std=2021', 'build.rust_std=2021'], rust_abi: 'rust', dependencies: [ - bilge_dep, - bilge_impl_dep, + bilge_rs, + bilge_impl_rs, + bits_rs, qemu_api, qemu_api_macros, ], @@ -21,6 +16,6 @@ rust_devices_ss.add(when: 'CONFIG_X_PL011_RUST', if_true: [declare_dependency( link_whole: [_libpl011_rs], # Putting proc macro crates in `dependencies` is necessary for Meson to find # them when compiling the root per-target static rust lib. - dependencies: [bilge_impl_dep, qemu_api_macros], + dependencies: [bilge_impl_rs, qemu_api_macros], variables: {'crate': 'pl011'}, )]) diff --git a/rust/hw/char/pl011/src/device.rs b/rust/hw/char/pl011/src/device.rs index bde3be6..5b53f26 100644 --- a/rust/hw/char/pl011/src/device.rs +++ b/rust/hw/char/pl011/src/device.rs @@ -2,18 +2,21 @@ // Author(s): Manos Pitsidianakis <manos.pitsidianakis@linaro.org> // SPDX-License-Identifier: GPL-2.0-or-later -use std::{ffi::CStr, mem::size_of, ptr::addr_of_mut}; +use std::{ffi::CStr, mem::size_of}; use qemu_api::{ chardev::{CharBackend, Chardev, Event}, impl_vmstate_forward, irq::{IRQState, InterruptSource}, + log::Log, + log_mask_ln, memory::{hwaddr, MemoryRegion, MemoryRegionOps, MemoryRegionOpsBuilder}, prelude::*, qdev::{Clock, ClockEvent, DeviceImpl, DeviceState, Property, ResetType, ResettablePhasesImpl}, - qom::{ObjectImpl, Owned, ParentField}, + qom::{ObjectImpl, Owned, ParentField, ParentInit}, static_assert, sysbus::{SysBusDevice, SysBusDeviceImpl}, + uninit_field_mut, vmstate::VMStateDescription, }; @@ -85,8 +88,8 @@ pub struct PL011Registers { #[doc(alias = "cr")] pub control: registers::Control, pub dmacr: u32, - pub int_enabled: u32, - pub int_level: u32, + pub int_enabled: Interrupt, + pub int_level: Interrupt, pub read_fifo: Fifo, pub ilpr: u32, pub ibrd: u32, @@ -163,7 +166,7 @@ impl PL011Impl for PL011State { impl ObjectImpl for PL011State { type ParentType = SysBusDevice; - const INSTANCE_INIT: Option<unsafe fn(&mut Self)> = Some(Self::init); + const INSTANCE_INIT: Option<unsafe fn(ParentInit<Self>)> = Some(Self::init); const INSTANCE_POST_INIT: Option<fn(&Self)> = Some(Self::post_init); const CLASS_INIT: fn(&mut Self::Class) = Self::Class::class_init::<Self>; } @@ -175,7 +178,7 @@ impl DeviceImpl for PL011State { fn vmsd() -> Option<&'static VMStateDescription> { Some(&device_class::VMSTATE_PL011) } - const REALIZE: Option<fn(&Self)> = Some(Self::realize); + const REALIZE: Option<fn(&Self) -> qemu_api::Result<()>> = Some(Self::realize); } impl ResettablePhasesImpl for PL011State { @@ -199,9 +202,9 @@ impl PL011Registers { LCR_H => u32::from(self.line_control), CR => u32::from(self.control), FLS => self.ifl, - IMSC => self.int_enabled, - RIS => self.int_level, - MIS => self.int_level & self.int_enabled, + IMSC => u32::from(self.int_enabled), + RIS => u32::from(self.int_level), + MIS => u32::from(self.int_level & self.int_enabled), ICR => { // "The UARTICR Register is the interrupt clear register and is write-only" // Source: ARM DDI 0183G 3.3.13 Interrupt Clear Register, UARTICR @@ -263,20 +266,19 @@ impl PL011Registers { self.set_read_trigger(); } IMSC => { - self.int_enabled = value; + self.int_enabled = Interrupt::from(value); return true; } RIS => {} MIS => {} ICR => { - self.int_level &= !value; + self.int_level &= !Interrupt::from(value); return true; } DMACR => { self.dmacr = value; if value & 3 > 0 { - // qemu_log_mask(LOG_UNIMP, "pl011: DMA not implemented\n"); - eprintln!("pl011: DMA not implemented"); + log_mask_ln!(Log::Unimp, "pl011: DMA not implemented"); } } } @@ -295,7 +297,7 @@ impl PL011Registers { self.flags.set_receive_fifo_empty(true); } if self.read_count + 1 == self.read_trigger { - self.int_level &= !Interrupt::RX.0; + self.int_level &= !Interrupt::RX; } self.receive_status_error_clear.set_from_data(c); *update = true; @@ -303,9 +305,15 @@ impl PL011Registers { } fn write_data_register(&mut self, value: u32) -> bool { + if !self.control.enable_uart() { + log_mask_ln!(Log::GuestError, "PL011 data written to disabled UART"); + } + if !self.control.enable_transmit() { + log_mask_ln!(Log::GuestError, "PL011 data written to disabled TX UART"); + } // interrupts always checked let _ = self.loopback_tx(value.into()); - self.int_level |= Interrupt::TX.0; + self.int_level |= Interrupt::TX; true } @@ -361,19 +369,19 @@ impl PL011Registers { // Change interrupts based on updated FR let mut il = self.int_level; - il &= !Interrupt::MS.0; + il &= !Interrupt::MS; if self.flags.data_set_ready() { - il |= Interrupt::DSR.0; + il |= Interrupt::DSR; } if self.flags.data_carrier_detect() { - il |= Interrupt::DCD.0; + il |= Interrupt::DCD; } if self.flags.clear_to_send() { - il |= Interrupt::CTS.0; + il |= Interrupt::CTS; } if self.flags.ring_indicator() { - il |= Interrupt::RI.0; + il |= Interrupt::RI; } self.int_level = il; true @@ -391,8 +399,8 @@ impl PL011Registers { self.line_control.reset(); self.receive_status_error_clear.reset(); self.dmacr = 0; - self.int_enabled = 0; - self.int_level = 0; + self.int_enabled = 0.into(); + self.int_level = 0.into(); self.ilpr = 0; self.ibrd = 0; self.fbrd = 0; @@ -451,7 +459,7 @@ impl PL011Registers { } if self.read_count == self.read_trigger { - self.int_level |= Interrupt::RX.0; + self.int_level |= Interrupt::RX; return true; } false @@ -480,15 +488,15 @@ impl PL011Registers { } impl PL011State { - /// Initializes a pre-allocated, unitialized instance of `PL011State`. + /// Initializes a pre-allocated, uninitialized instance of `PL011State`. /// /// # Safety /// /// `self` must point to a correctly sized and aligned location for the /// `PL011State` type. It must not be called more than once on the same - /// location/instance. All its fields are expected to hold unitialized + /// location/instance. All its fields are expected to hold uninitialized /// values with the sole exception of `parent_obj`. - unsafe fn init(&mut self) { + unsafe fn init(mut this: ParentInit<Self>) { static PL011_OPS: MemoryRegionOps<PL011State> = MemoryRegionOpsBuilder::<PL011State>::new() .read(&PL011State::read) .write(&PL011State::write) @@ -496,28 +504,23 @@ impl PL011State { .impl_sizes(4, 4) .build(); - // SAFETY: - // - // self and self.iomem are guaranteed to be valid at this point since callers - // must make sure the `self` reference is valid. + // SAFETY: this and this.iomem are guaranteed to be valid at this point MemoryRegion::init_io( - unsafe { &mut *addr_of_mut!(self.iomem) }, - addr_of_mut!(*self), + &mut uninit_field_mut!(*this, iomem), &PL011_OPS, "pl011", 0x1000, ); - self.regs = Default::default(); + uninit_field_mut!(*this, regs).write(Default::default()); - // SAFETY: - // - // self.clock is not initialized at this point; but since `Owned<_>` is - // not Drop, we can overwrite the undefined value without side effects; - // it's not sound but, because for all PL011State instances are created - // by QOM code which calls this function to initialize the fields, at - // leastno code is able to access an invalid self.clock value. - self.clock = self.init_clock_in("clk", &Self::clock_update, ClockEvent::ClockUpdate); + let clock = DeviceState::init_clock_in( + &mut this, + "clk", + &Self::clock_update, + ClockEvent::ClockUpdate, + ); + uninit_field_mut!(*this, clock).write(clock); } const fn clock_update(&self, _event: ClockEvent) { @@ -538,7 +541,7 @@ impl PL011State { u64::from(device_id[(offset - 0xfe0) >> 2]) } Err(_) => { - // qemu_log_mask(LOG_GUEST_ERROR, "pl011_read: Bad offset 0x%x\n", (int)offset); + log_mask_ln!(Log::GuestError, "PL011State::read: Bad offset {offset}"); 0 } Ok(field) => { @@ -570,7 +573,10 @@ impl PL011State { .borrow_mut() .write(field, value as u32, &self.char_backend); } else { - eprintln!("write bad offset {offset} value {value}"); + log_mask_ln!( + Log::GuestError, + "PL011State::write: Bad offset {offset} value {value}" + ); } if update_irq { self.update(); @@ -619,9 +625,10 @@ impl PL011State { } } - fn realize(&self) { + fn realize(&self) -> qemu_api::Result<()> { self.char_backend .enable_handlers(self, Self::can_receive, Self::receive, Self::event); + Ok(()) } fn reset_hold(&self, _type: ResetType) { @@ -632,7 +639,7 @@ impl PL011State { let regs = self.regs.borrow(); let flags = regs.int_level & regs.int_enabled; for (irq, i) in self.interrupts.iter().zip(IRQMASK) { - irq.set(flags & i != 0); + irq.set(flags.any_set(i)); } } @@ -642,14 +649,13 @@ impl PL011State { } /// Which bits in the interrupt status matter for each outbound IRQ line ? -const IRQMASK: [u32; 6] = [ - /* combined IRQ */ - Interrupt::E.0 | Interrupt::MS.0 | Interrupt::RT.0 | Interrupt::TX.0 | Interrupt::RX.0, - Interrupt::RX.0, - Interrupt::TX.0, - Interrupt::RT.0, - Interrupt::MS.0, - Interrupt::E.0, +const IRQMASK: [Interrupt; 6] = [ + Interrupt::all(), + Interrupt::RX, + Interrupt::TX, + Interrupt::RT, + Interrupt::MS, + Interrupt::E, ]; /// # Safety diff --git a/rust/hw/char/pl011/src/registers.rs b/rust/hw/char/pl011/src/registers.rs index 690feb6..7ececd3 100644 --- a/rust/hw/char/pl011/src/registers.rs +++ b/rust/hw/char/pl011/src/registers.rs @@ -9,7 +9,8 @@ // https://developer.arm.com/documentation/ddi0183/latest/ use bilge::prelude::*; -use qemu_api::impl_vmstate_bitsized; +use bits::bits; +use qemu_api::{impl_vmstate_bitsized, impl_vmstate_forward}; /// Offset of each register from the base memory address of the device. #[doc(alias = "offset")] @@ -326,22 +327,24 @@ impl Default for Control { } } -/// Interrupt status bits in UARTRIS, UARTMIS, UARTIMSC -pub struct Interrupt(pub u32); +bits! { + /// Interrupt status bits in UARTRIS, UARTMIS, UARTIMSC + #[derive(Default)] + pub struct Interrupt(u32) { + OE = 1 << 10, + BE = 1 << 9, + PE = 1 << 8, + FE = 1 << 7, + RT = 1 << 6, + TX = 1 << 5, + RX = 1 << 4, + DSR = 1 << 3, + DCD = 1 << 2, + CTS = 1 << 1, + RI = 1 << 0, -impl Interrupt { - pub const OE: Self = Self(1 << 10); - pub const BE: Self = Self(1 << 9); - pub const PE: Self = Self(1 << 8); - pub const FE: Self = Self(1 << 7); - pub const RT: Self = Self(1 << 6); - pub const TX: Self = Self(1 << 5); - pub const RX: Self = Self(1 << 4); - pub const DSR: Self = Self(1 << 3); - pub const DCD: Self = Self(1 << 2); - pub const CTS: Self = Self(1 << 1); - pub const RI: Self = Self(1 << 0); - - pub const E: Self = Self(Self::OE.0 | Self::BE.0 | Self::PE.0 | Self::FE.0); - pub const MS: Self = Self(Self::RI.0 | Self::DSR.0 | Self::DCD.0 | Self::CTS.0); + E = bits!(Self as u32: OE | BE | PE | FE), + MS = bits!(Self as u32: RI | DSR | DCD | CTS), + } } +impl_vmstate_forward!(Interrupt); diff --git a/rust/hw/timer/hpet/src/hpet.rs b/rust/hw/timer/hpet/src/device.rs index 779681d..acf7251 100644 --- a/rust/hw/timer/hpet/src/hpet.rs +++ b/rust/hw/timer/hpet/src/device.rs @@ -1,9 +1,10 @@ // Copyright (C) 2024 Intel Corporation. -// Author(s): Zhao Liu <zhai1.liu@intel.com> +// Author(s): Zhao Liu <zhao1.liu@intel.com> // SPDX-License-Identifier: GPL-2.0-or-later use std::{ ffi::{c_int, c_void, CStr}, + mem::MaybeUninit, pin::Pin, ptr::{addr_of_mut, null_mut, NonNull}, slice::from_ref, @@ -12,7 +13,7 @@ use std::{ use qemu_api::{ bindings::{ address_space_memory, address_space_stl_le, qdev_prop_bit, qdev_prop_bool, - qdev_prop_uint32, qdev_prop_uint8, + qdev_prop_uint32, qdev_prop_usize, }, cell::{BqlCell, BqlRefCell}, irq::InterruptSource, @@ -20,11 +21,12 @@ use qemu_api::{ hwaddr, MemoryRegion, MemoryRegionOps, MemoryRegionOpsBuilder, MEMTXATTRS_UNSPECIFIED, }, prelude::*, - qdev::{DeviceImpl, DeviceMethods, DeviceState, Property, ResetType, ResettablePhasesImpl}, - qom::{ObjectImpl, ObjectType, ParentField}, + qdev::{DeviceImpl, DeviceState, Property, ResetType, ResettablePhasesImpl}, + qom::{ObjectImpl, ObjectType, ParentField, ParentInit}, qom_isa, sysbus::{SysBusDevice, SysBusDeviceImpl}, timer::{Timer, CLOCK_VIRTUAL, NANOSECONDS_PER_SECOND}, + uninit_field_mut, vmstate::VMStateDescription, vmstate_fields, vmstate_of, vmstate_struct, vmstate_subsections, vmstate_validate, zeroable::Zeroable, @@ -36,9 +38,9 @@ use crate::fw_cfg::HPETFwConfig; const HPET_REG_SPACE_LEN: u64 = 0x400; // 1024 bytes /// Minimum recommended hardware implementation. -const HPET_MIN_TIMERS: u8 = 3; +const HPET_MIN_TIMERS: usize = 3; /// Maximum timers in each timer block. -const HPET_MAX_TIMERS: u8 = 32; +const HPET_MAX_TIMERS: usize = 32; /// Flags that HPETState.flags supports. const HPET_FLAG_MSI_SUPPORT_SHIFT: usize = 0; @@ -212,13 +214,13 @@ pub struct HPETTimer { } impl HPETTimer { - fn init(&mut self, index: u8, state: &HPETState) { - *self = HPETTimer { + fn new(index: u8, state: *const HPETState) -> HPETTimer { + HPETTimer { index, // SAFETY: the HPETTimer will only be used after the timer // is initialized below. qemu_timer: unsafe { Timer::new() }, - state: NonNull::new((state as *const HPETState).cast_mut()).unwrap(), + state: NonNull::new(state.cast_mut()).unwrap(), config: 0, cmp: 0, fsb: 0, @@ -226,19 +228,15 @@ impl HPETTimer { period: 0, wrap_flag: 0, last: 0, - }; + } + } + fn init_timer_with_cell(cell: &BqlRefCell<Self>) { + let mut timer = cell.borrow_mut(); // SAFETY: HPETTimer is only used as part of HPETState, which is // always pinned. - let qemu_timer = unsafe { Pin::new_unchecked(&mut self.qemu_timer) }; - qemu_timer.init_full( - None, - CLOCK_VIRTUAL, - Timer::NS, - 0, - timer_handler, - &state.timers[self.index as usize], - ) + let qemu_timer = unsafe { Pin::new_unchecked(&mut timer.qemu_timer) }; + qemu_timer.init_full(None, CLOCK_VIRTUAL, Timer::NS, 0, timer_handler, cell); } fn get_state(&self) -> &HPETState { @@ -561,8 +559,8 @@ pub struct HPETState { /// HPET timer array managed by this timer block. #[doc(alias = "timer")] - timers: [BqlRefCell<HPETTimer>; HPET_MAX_TIMERS as usize], - num_timers: BqlCell<u8>, + timers: [BqlRefCell<HPETTimer>; HPET_MAX_TIMERS], + num_timers: usize, num_timers_save: BqlCell<u8>, /// Instance id (HPET timer block ID). @@ -570,11 +568,6 @@ pub struct HPETState { } impl HPETState { - // Get num_timers with `usize` type, which is useful to play with array index. - fn get_num_timers(&self) -> usize { - self.num_timers.get().into() - } - const fn has_msi_flag(&self) -> bool { self.flags & (1 << HPET_FLAG_MSI_SUPPORT_SHIFT) != 0 } @@ -612,9 +605,18 @@ impl HPETState { } } - fn init_timer(&self) { - for (index, timer) in self.timers.iter().enumerate() { - timer.borrow_mut().init(index.try_into().unwrap(), self); + fn init_timers(this: &mut MaybeUninit<Self>) { + let state = this.as_ptr(); + for index in 0..HPET_MAX_TIMERS { + let mut timer = uninit_field_mut!(*this, timers[index]); + + // Initialize in two steps, to avoid calling Timer::init_full on a + // temporary that can be moved. + let timer = timer.write(BqlRefCell::new(HPETTimer::new( + index.try_into().unwrap(), + state, + ))); + HPETTimer::init_timer_with_cell(timer); } } @@ -636,7 +638,7 @@ impl HPETState { self.hpet_offset .set(ticks_to_ns(self.counter.get()) - CLOCK_VIRTUAL.get_ns()); - for timer in self.timers.iter().take(self.get_num_timers()) { + for timer in self.timers.iter().take(self.num_timers) { let mut t = timer.borrow_mut(); if t.is_int_enabled() && t.is_int_active() { @@ -648,7 +650,7 @@ impl HPETState { // Halt main counter and disable interrupt generation. self.counter.set(self.get_ticks()); - for timer in self.timers.iter().take(self.get_num_timers()) { + for timer in self.timers.iter().take(self.num_timers) { timer.borrow_mut().del_timer(); } } @@ -671,7 +673,7 @@ impl HPETState { let new_val = val << shift; let cleared = new_val & self.int_status.get(); - for (index, timer) in self.timers.iter().take(self.get_num_timers()).enumerate() { + for (index, timer) in self.timers.iter().take(self.num_timers).enumerate() { if cleared & (1 << index) != 0 { timer.borrow_mut().update_irq(false); } @@ -695,7 +697,7 @@ impl HPETState { .set(self.counter.get().deposit(shift, len, val)); } - unsafe fn init(&mut self) { + unsafe fn init(mut this: ParentInit<Self>) { static HPET_RAM_OPS: MemoryRegionOps<HPETState> = MemoryRegionOpsBuilder::<HPETState>::new() .read(&HPETState::read) @@ -705,16 +707,14 @@ impl HPETState { .impl_sizes(4, 8) .build(); - // SAFETY: - // self and self.iomem are guaranteed to be valid at this point since callers - // must make sure the `self` reference is valid. MemoryRegion::init_io( - unsafe { &mut *addr_of_mut!(self.iomem) }, - addr_of_mut!(*self), + &mut uninit_field_mut!(*this, iomem), &HPET_RAM_OPS, "hpet", HPET_REG_SPACE_LEN, ); + + Self::init_timers(&mut this); } fn post_init(&self) { @@ -724,37 +724,35 @@ impl HPETState { } } - fn realize(&self) { + fn realize(&self) -> qemu_api::Result<()> { + if self.num_timers < HPET_MIN_TIMERS || self.num_timers > HPET_MAX_TIMERS { + Err(format!( + "hpet.num_timers must be between {HPET_MIN_TIMERS} and {HPET_MAX_TIMERS}" + ))?; + } if self.int_route_cap == 0 { - // TODO: Add error binding: warn_report() - println!("Hpet's hpet-intcap property not initialized"); + Err("hpet.hpet-intcap property not initialized")?; } - self.hpet_id.set(HPETFwConfig::assign_hpet_id()); + self.hpet_id.set(HPETFwConfig::assign_hpet_id()?); - if self.num_timers.get() < HPET_MIN_TIMERS { - self.num_timers.set(HPET_MIN_TIMERS); - } else if self.num_timers.get() > HPET_MAX_TIMERS { - self.num_timers.set(HPET_MAX_TIMERS); - } - - self.init_timer(); // 64-bit General Capabilities and ID Register; LegacyReplacementRoute. self.capability.set( HPET_CAP_REV_ID_VALUE << HPET_CAP_REV_ID_SHIFT | 1 << HPET_CAP_COUNT_SIZE_CAP_SHIFT | 1 << HPET_CAP_LEG_RT_CAP_SHIFT | HPET_CAP_VENDER_ID_VALUE << HPET_CAP_VENDER_ID_SHIFT | - ((self.get_num_timers() - 1) as u64) << HPET_CAP_NUM_TIM_SHIFT | // indicate the last timer + ((self.num_timers - 1) as u64) << HPET_CAP_NUM_TIM_SHIFT | // indicate the last timer (HPET_CLK_PERIOD * FS_PER_NS) << HPET_CAP_CNT_CLK_PERIOD_SHIFT, // 10 ns ); self.init_gpio_in(2, HPETState::handle_legacy_irq); self.init_gpio_out(from_ref(&self.pit_enabled)); + Ok(()) } fn reset_hold(&self, _type: ResetType) { - for timer in self.timers.iter().take(self.get_num_timers()) { + for timer in self.timers.iter().take(self.num_timers) { timer.borrow_mut().reset(); } @@ -773,7 +771,7 @@ impl HPETState { self.rtc_irq_level.set(0); } - fn decode(&self, mut addr: hwaddr, size: u32) -> HPETAddrDecode { + fn decode(&self, mut addr: hwaddr, size: u32) -> HPETAddrDecode<'_> { let shift = ((addr & 4) * 8) as u32; let len = std::cmp::min(size * 8, 64 - shift); @@ -782,7 +780,7 @@ impl HPETState { GlobalRegister::try_from(addr).map(HPETRegister::Global) } else { let timer_id: usize = ((addr - 0x100) / 0x20) as usize; - if timer_id <= self.get_num_timers() { + if timer_id < self.num_timers { // TODO: Add trace point - trace_hpet_ram_[read|write]_timer_id(timer_id) TimerRegister::try_from(addr & 0x18) .map(|reg| HPETRegister::Timer(&self.timers[timer_id], reg)) @@ -853,12 +851,12 @@ impl HPETState { * also added to the migration stream. Check that it matches the value * that was configured. */ - self.num_timers_save.set(self.num_timers.get()); + self.num_timers_save.set(self.num_timers as u8); 0 } fn post_load(&self, _version_id: u8) -> i32 { - for timer in self.timers.iter().take(self.get_num_timers()) { + for timer in self.timers.iter().take(self.num_timers) { let mut t = timer.borrow_mut(); t.cmp64 = t.calculate_cmp64(t.get_state().counter.get(), t.cmp); @@ -883,7 +881,7 @@ impl HPETState { } fn validate_num_timers(&self, _version_id: u8) -> bool { - self.num_timers.get() == self.num_timers_save.get() + self.num_timers == self.num_timers_save.get().into() } } @@ -898,7 +896,7 @@ unsafe impl ObjectType for HPETState { impl ObjectImpl for HPETState { type ParentType = SysBusDevice; - const INSTANCE_INIT: Option<unsafe fn(&mut Self)> = Some(Self::init); + const INSTANCE_INIT: Option<unsafe fn(ParentInit<Self>)> = Some(Self::init); const INSTANCE_POST_INIT: Option<fn(&Self)> = Some(Self::post_init); const CLASS_INIT: fn(&mut Self::Class) = Self::Class::class_init::<Self>; } @@ -910,7 +908,7 @@ qemu_api::declare_properties! { c"timers", HPETState, num_timers, - unsafe { &qdev_prop_uint8 }, + unsafe { &qdev_prop_usize }, u8, default = HPET_MIN_TIMERS ), @@ -1015,16 +1013,16 @@ const VALIDATE_TIMERS_NAME: &CStr = c"num_timers must match"; static VMSTATE_HPET: VMStateDescription = VMStateDescription { name: c"hpet".as_ptr(), version_id: 2, - minimum_version_id: 1, + minimum_version_id: 2, pre_save: Some(hpet_pre_save), post_load: Some(hpet_post_load), fields: vmstate_fields! { vmstate_of!(HPETState, config), vmstate_of!(HPETState, int_status), vmstate_of!(HPETState, counter), - vmstate_of!(HPETState, num_timers_save).with_version_id(2), + vmstate_of!(HPETState, num_timers_save), vmstate_validate!(HPETState, VALIDATE_TIMERS_NAME, HPETState::validate_num_timers), - vmstate_struct!(HPETState, timers[0 .. num_timers], &VMSTATE_HPET_TIMER, BqlRefCell<HPETTimer>, HPETState::validate_num_timers).with_version_id(0), + vmstate_struct!(HPETState, timers[0 .. num_timers_save], &VMSTATE_HPET_TIMER, BqlRefCell<HPETTimer>, HPETState::validate_num_timers).with_version_id(0), }, subsections: vmstate_subsections! { VMSTATE_HPET_RTC_IRQ_LEVEL, @@ -1042,7 +1040,7 @@ impl DeviceImpl for HPETState { Some(&VMSTATE_HPET) } - const REALIZE: Option<fn(&Self)> = Some(Self::realize); + const REALIZE: Option<fn(&Self) -> qemu_api::Result<()>> = Some(Self::realize); } impl ResettablePhasesImpl for HPETState { diff --git a/rust/hw/timer/hpet/src/fw_cfg.rs b/rust/hw/timer/hpet/src/fw_cfg.rs index aa08d28..619d662 100644 --- a/rust/hw/timer/hpet/src/fw_cfg.rs +++ b/rust/hw/timer/hpet/src/fw_cfg.rs @@ -1,5 +1,5 @@ // Copyright (C) 2024 Intel Corporation. -// Author(s): Zhao Liu <zhai1.liu@intel.com> +// Author(s): Zhao Liu <zhao1.liu@intel.com> // SPDX-License-Identifier: GPL-2.0-or-later use std::ptr::addr_of_mut; @@ -36,7 +36,7 @@ pub static mut hpet_fw_cfg: HPETFwConfig = HPETFwConfig { }; impl HPETFwConfig { - pub(crate) fn assign_hpet_id() -> usize { + pub(crate) fn assign_hpet_id() -> Result<usize, &'static str> { assert!(bql_locked()); // SAFETY: all accesses go through these methods, which guarantee // that the accesses are protected by the BQL. @@ -48,13 +48,12 @@ impl HPETFwConfig { } if fw_cfg.count == 8 { - // TODO: Add error binding: error_setg() - panic!("Only 8 instances of HPET is allowed"); + Err("Only 8 instances of HPET are allowed")?; } let id: usize = fw_cfg.count.into(); fw_cfg.count += 1; - id + Ok(id) } pub(crate) fn update_hpet_cfg(hpet_id: usize, timer_block_id: u32, address: u64) { diff --git a/rust/hw/timer/hpet/src/lib.rs b/rust/hw/timer/hpet/src/lib.rs index 1954584..a95cf14 100644 --- a/rust/hw/timer/hpet/src/lib.rs +++ b/rust/hw/timer/hpet/src/lib.rs @@ -1,5 +1,5 @@ // Copyright (C) 2024 Intel Corporation. -// Author(s): Zhao Liu <zhai1.liu@intel.com> +// Author(s): Zhao Liu <zhao1.liu@intel.com> // SPDX-License-Identifier: GPL-2.0-or-later //! # HPET QEMU Device Model @@ -7,7 +7,7 @@ //! This library implements a device model for the IA-PC HPET (High //! Precision Event Timers) device in QEMU. +pub mod device; pub mod fw_cfg; -pub mod hpet; pub const TYPE_HPET: &::std::ffi::CStr = c"hpet"; diff --git a/rust/meson.build b/rust/meson.build index 91e52b8..331f11b 100644 --- a/rust/meson.build +++ b/rust/meson.build @@ -1,4 +1,29 @@ +subproject('anyhow-1-rs', required: true) +subproject('bilge-0.2-rs', required: true) +subproject('bilge-impl-0.2-rs', required: true) +subproject('foreign-0.3-rs', required: true) +subproject('libc-0.2-rs', required: true) + +anyhow_rs = dependency('anyhow-1-rs') +bilge_rs = dependency('bilge-0.2-rs') +bilge_impl_rs = dependency('bilge-impl-0.2-rs') +foreign_rs = dependency('foreign-0.3-rs') +libc_rs = dependency('libc-0.2-rs') + +subproject('proc-macro2-1-rs', required: true) +subproject('quote-1-rs', required: true) +subproject('syn-2-rs', required: true) + +quote_rs_native = dependency('quote-1-rs', native: true) +syn_rs_native = dependency('syn-2-rs', native: true) +proc_macro2_rs_native = dependency('proc-macro2-1-rs', native: true) + +qemuutil_rs = qemuutil.partial_dependency(link_args: true, links: true) + +genrs = [] + subdir('qemu-api-macros') +subdir('bits') subdir('qemu-api') subdir('hw') @@ -6,21 +31,9 @@ subdir('hw') cargo = find_program('cargo', required: false) if cargo.found() - run_target('clippy', - command: [config_host['MESON'], 'devenv', - '--workdir', '@CURRENT_SOURCE_DIR@', - cargo, 'clippy', '--tests'], - depends: bindings_rs) - run_target('rustfmt', command: [config_host['MESON'], 'devenv', '--workdir', '@CURRENT_SOURCE_DIR@', cargo, 'fmt'], - depends: bindings_rs) - - run_target('rustdoc', - command: [config_host['MESON'], 'devenv', - '--workdir', '@CURRENT_SOURCE_DIR@', - cargo, 'doc', '--no-deps', '--document-private-items'], - depends: bindings_rs) + depends: genrs) endif diff --git a/rust/qemu-api-macros/meson.build b/rust/qemu-api-macros/meson.build index 6f94a4b..8610ce1 100644 --- a/rust/qemu-api-macros/meson.build +++ b/rust/qemu-api-macros/meson.build @@ -1,11 +1,3 @@ -subproject('proc-macro2-1-rs', required: true) -subproject('quote-1-rs', required: true) -subproject('syn-2-rs', required: true) - -quote_dep = dependency('quote-1-rs', native: true) -syn_dep = dependency('syn-2-rs', native: true) -proc_macro2_dep = dependency('proc-macro2-1-rs', native: true) - _qemu_api_macros_rs = rust.proc_macro( 'qemu_api_macros', files('src/lib.rs'), @@ -16,9 +8,9 @@ _qemu_api_macros_rs = rust.proc_macro( '--cfg', 'feature="proc-macro"', ], dependencies: [ - proc_macro2_dep, - quote_dep, - syn_dep, + proc_macro2_rs_native, + quote_rs_native, + syn_rs_native, ], ) diff --git a/rust/qemu-api-macros/src/bits.rs b/rust/qemu-api-macros/src/bits.rs new file mode 100644 index 0000000..5ba8475 --- /dev/null +++ b/rust/qemu-api-macros/src/bits.rs @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: MIT or Apache-2.0 or GPL-2.0-or-later + +// shadowing is useful together with "if let" +#![allow(clippy::shadow_unrelated)] + +use proc_macro2::{ + Delimiter, Group, Ident, Punct, Spacing, Span, TokenStream, TokenTree, TokenTree as TT, +}; + +use crate::utils::MacroError; + +pub struct BitsConstInternal { + typ: TokenTree, +} + +fn paren(ts: TokenStream) -> TokenTree { + TT::Group(Group::new(Delimiter::Parenthesis, ts)) +} + +fn ident(s: &'static str) -> TokenTree { + TT::Ident(Ident::new(s, Span::call_site())) +} + +fn punct(ch: char) -> TokenTree { + TT::Punct(Punct::new(ch, Spacing::Alone)) +} + +/// Implements a recursive-descent parser that translates Boolean expressions on +/// bitmasks to invocations of `const` functions defined by the `bits!` macro. +impl BitsConstInternal { + // primary ::= '(' or ')' + // | ident + // | '!' ident + fn parse_primary( + &self, + tok: TokenTree, + it: &mut dyn Iterator<Item = TokenTree>, + out: &mut TokenStream, + ) -> Result<Option<TokenTree>, MacroError> { + let next = match tok { + TT::Group(ref g) => { + if g.delimiter() != Delimiter::Parenthesis && g.delimiter() != Delimiter::None { + return Err(MacroError::Message("expected parenthesis".into(), g.span())); + } + let mut stream = g.stream().into_iter(); + let Some(first_tok) = stream.next() else { + return Err(MacroError::Message( + "expected operand, found ')'".into(), + g.span(), + )); + }; + let mut output = TokenStream::new(); + // start from the lowest precedence + let next = self.parse_or(first_tok, &mut stream, &mut output)?; + if let Some(tok) = next { + return Err(MacroError::Message( + format!("unexpected token {tok}"), + tok.span(), + )); + } + out.extend(Some(paren(output))); + it.next() + } + TT::Ident(_) => { + let mut output = TokenStream::new(); + output.extend([ + self.typ.clone(), + TT::Punct(Punct::new(':', Spacing::Joint)), + TT::Punct(Punct::new(':', Spacing::Joint)), + tok, + ]); + out.extend(Some(paren(output))); + it.next() + } + TT::Punct(ref p) => { + if p.as_char() != '!' { + return Err(MacroError::Message("expected operand".into(), p.span())); + } + let Some(rhs_tok) = it.next() else { + return Err(MacroError::Message( + "expected operand at end of input".into(), + p.span(), + )); + }; + let next = self.parse_primary(rhs_tok, it, out)?; + out.extend([punct('.'), ident("invert"), paren(TokenStream::new())]); + next + } + _ => { + return Err(MacroError::Message("unexpected literal".into(), tok.span())); + } + }; + Ok(next) + } + + fn parse_binop< + F: Fn( + &Self, + TokenTree, + &mut dyn Iterator<Item = TokenTree>, + &mut TokenStream, + ) -> Result<Option<TokenTree>, MacroError>, + >( + &self, + tok: TokenTree, + it: &mut dyn Iterator<Item = TokenTree>, + out: &mut TokenStream, + ch: char, + f: F, + method: &'static str, + ) -> Result<Option<TokenTree>, MacroError> { + let mut next = f(self, tok, it, out)?; + while next.is_some() { + let op = next.as_ref().unwrap(); + let TT::Punct(ref p) = op else { break }; + if p.as_char() != ch { + break; + } + + let Some(rhs_tok) = it.next() else { + return Err(MacroError::Message( + "expected operand at end of input".into(), + p.span(), + )); + }; + let mut rhs = TokenStream::new(); + next = f(self, rhs_tok, it, &mut rhs)?; + out.extend([punct('.'), ident(method), paren(rhs)]); + } + Ok(next) + } + + // sub ::= primary ('-' primary)* + pub fn parse_sub( + &self, + tok: TokenTree, + it: &mut dyn Iterator<Item = TokenTree>, + out: &mut TokenStream, + ) -> Result<Option<TokenTree>, MacroError> { + self.parse_binop(tok, it, out, '-', Self::parse_primary, "difference") + } + + // and ::= sub ('&' sub)* + fn parse_and( + &self, + tok: TokenTree, + it: &mut dyn Iterator<Item = TokenTree>, + out: &mut TokenStream, + ) -> Result<Option<TokenTree>, MacroError> { + self.parse_binop(tok, it, out, '&', Self::parse_sub, "intersection") + } + + // xor ::= and ('&' and)* + fn parse_xor( + &self, + tok: TokenTree, + it: &mut dyn Iterator<Item = TokenTree>, + out: &mut TokenStream, + ) -> Result<Option<TokenTree>, MacroError> { + self.parse_binop(tok, it, out, '^', Self::parse_and, "symmetric_difference") + } + + // or ::= xor ('|' xor)* + pub fn parse_or( + &self, + tok: TokenTree, + it: &mut dyn Iterator<Item = TokenTree>, + out: &mut TokenStream, + ) -> Result<Option<TokenTree>, MacroError> { + self.parse_binop(tok, it, out, '|', Self::parse_xor, "union") + } + + pub fn parse( + it: &mut dyn Iterator<Item = TokenTree>, + ) -> Result<proc_macro2::TokenStream, MacroError> { + let mut pos = Span::call_site(); + let mut typ = proc_macro2::TokenStream::new(); + + // Gobble everything up to an `@` sign, which is followed by a + // parenthesized expression; that is, all token trees except the + // last two form the type. + let next = loop { + let tok = it.next(); + if let Some(ref t) = tok { + pos = t.span(); + } + match tok { + None => break None, + Some(TT::Punct(ref p)) if p.as_char() == '@' => { + let tok = it.next(); + if let Some(ref t) = tok { + pos = t.span(); + } + break tok; + } + Some(x) => typ.extend(Some(x)), + } + }; + + let Some(tok) = next else { + return Err(MacroError::Message( + "expected expression, do not call this macro directly".into(), + pos, + )); + }; + let TT::Group(ref _group) = tok else { + return Err(MacroError::Message( + "expected parenthesis, do not call this macro directly".into(), + tok.span(), + )); + }; + let mut out = TokenStream::new(); + let state = Self { + typ: TT::Group(Group::new(Delimiter::None, typ)), + }; + + let next = state.parse_primary(tok, it, &mut out)?; + + // A parenthesized expression is a single production of the grammar, + // so the input must have reached the last token. + if let Some(tok) = next { + return Err(MacroError::Message( + format!("unexpected token {tok}"), + tok.span(), + )); + } + Ok(out) + } +} diff --git a/rust/qemu-api-macros/src/lib.rs b/rust/qemu-api-macros/src/lib.rs index f97449b..c18bb4e 100644 --- a/rust/qemu-api-macros/src/lib.rs +++ b/rust/qemu-api-macros/src/lib.rs @@ -12,6 +12,9 @@ use syn::{ mod utils; use utils::MacroError; +mod bits; +use bits::BitsConstInternal; + fn get_fields<'a>( input: &'a DeriveInput, msg: &str, @@ -190,23 +193,52 @@ fn get_variants(input: &DeriveInput) -> Result<&Punctuated<Variant, Comma>, Macr } #[rustfmt::skip::macros(quote)] +fn derive_tryinto_body( + name: &Ident, + variants: &Punctuated<Variant, Comma>, + repr: &Path, +) -> Result<proc_macro2::TokenStream, MacroError> { + let discriminants: Vec<&Ident> = variants.iter().map(|f| &f.ident).collect(); + + Ok(quote! { + #(const #discriminants: #repr = #name::#discriminants as #repr;)*; + match value { + #(#discriminants => core::result::Result::Ok(#name::#discriminants),)* + _ => core::result::Result::Err(value), + } + }) +} + +#[rustfmt::skip::macros(quote)] fn derive_tryinto_or_error(input: DeriveInput) -> Result<proc_macro2::TokenStream, MacroError> { let repr = get_repr_uN(&input, "#[derive(TryInto)]")?; - let name = &input.ident; - let variants = get_variants(&input)?; - let discriminants: Vec<&Ident> = variants.iter().map(|f| &f.ident).collect(); + let body = derive_tryinto_body(name, get_variants(&input)?, &repr)?; + let errmsg = format!("invalid value for {name}"); Ok(quote! { + impl #name { + #[allow(dead_code)] + pub const fn into_bits(self) -> #repr { + self as #repr + } + + #[allow(dead_code)] + pub const fn from_bits(value: #repr) -> Self { + match ({ + #body + }) { + Ok(x) => x, + Err(_) => panic!(#errmsg) + } + } + } impl core::convert::TryFrom<#repr> for #name { type Error = #repr; - fn try_from(value: #repr) -> Result<Self, Self::Error> { - #(const #discriminants: #repr = #name::#discriminants as #repr;)*; - match value { - #(#discriminants => Ok(Self::#discriminants),)* - _ => Err(value), - } + #[allow(ambiguous_associated_items)] + fn try_from(value: #repr) -> Result<Self, #repr> { + #body } } }) @@ -219,3 +251,12 @@ pub fn derive_tryinto(input: TokenStream) -> TokenStream { TokenStream::from(expanded) } + +#[proc_macro] +pub fn bits_const_internal(ts: TokenStream) -> TokenStream { + let ts = proc_macro2::TokenStream::from(ts); + let mut it = ts.into_iter(); + + let expanded = BitsConstInternal::parse(&mut it).unwrap_or_else(Into::into); + TokenStream::from(expanded) +} diff --git a/rust/qemu-api/Cargo.toml b/rust/qemu-api/Cargo.toml index c96cf50..db7000d 100644 --- a/rust/qemu-api/Cargo.toml +++ b/rust/qemu-api/Cargo.toml @@ -15,7 +15,9 @@ rust-version.workspace = true [dependencies] qemu_api_macros = { path = "../qemu-api-macros" } +anyhow = "~1.0" libc = "0.2.162" +foreign = "~0.3.1" [features] default = ["debug_cell"] diff --git a/rust/qemu-api/build.rs b/rust/qemu-api/build.rs index 1e72064..7849486 100644 --- a/rust/qemu-api/build.rs +++ b/rust/qemu-api/build.rs @@ -14,7 +14,7 @@ fn main() -> Result<()> { let path = env::var("MESON_BUILD_ROOT") .unwrap_or_else(|_| format!("{}/src", env!("CARGO_MANIFEST_DIR"))); - let file = format!("{path}/bindings.inc.rs"); + let file = format!("{path}/rust/qemu-api/bindings.inc.rs"); let file = Path::new(&file); if !Path::new(&file).exists() { panic!(concat!( diff --git a/rust/qemu-api/meson.build b/rust/qemu-api/meson.build index 1696df7..a090297 100644 --- a/rust/qemu-api/meson.build +++ b/rust/qemu-api/meson.build @@ -2,13 +2,52 @@ _qemu_api_cfg = run_command(rustc_args, '--config-headers', config_host_h, '--features', files('Cargo.toml'), capture: true, check: true).stdout().strip().splitlines() -libc_dep = dependency('libc-0.2-rs') - # _qemu_api_cfg += ['--cfg', 'feature="allocator"'] if get_option('debug_mutex') _qemu_api_cfg += ['--cfg', 'feature="debug_cell"'] endif +c_enums = [ + 'DeviceCategory', + 'GpioPolarity', + 'MachineInitPhase', + 'MemoryDeviceInfoKind', + 'MigrationPolicy', + 'MigrationPriority', + 'QEMUChrEvent', + 'QEMUClockType', + 'ResetType', + 'device_endian', + 'module_init_type', +] +_qemu_api_bindgen_args = [] +foreach enum : c_enums + _qemu_api_bindgen_args += ['--rustified-enum', enum] +endforeach +c_bitfields = [ + 'ClockEvent', + 'VMStateFlags', +] +foreach enum : c_bitfields + _qemu_api_bindgen_args += ['--bitfield-enum', enum] +endforeach + +# TODO: Remove this comment when the clang/libclang mismatch issue is solved. +# +# Rust bindings generation with `bindgen` might fail in some cases where the +# detected `libclang` does not match the expected `clang` version/target. In +# this case you must pass the path to `clang` and `libclang` to your build +# command invocation using the environment variables CLANG_PATH and +# LIBCLANG_PATH +_qemu_api_bindings_inc_rs = rust.bindgen( + input: 'wrapper.h', + dependencies: common_ss.all_dependencies(), + output: 'bindings.inc.rs', + include_directories: bindings_incdir, + bindgen_version: ['>=0.60.0'], + args: bindgen_args_common + _qemu_api_bindgen_args, + ) + _qemu_api_rs = static_library( 'qemu_api', structured_sources( @@ -21,7 +60,9 @@ _qemu_api_rs = static_library( 'src/cell.rs', 'src/chardev.rs', 'src/errno.rs', + 'src/error.rs', 'src/irq.rs', + 'src/log.rs', 'src/memory.rs', 'src/module.rs', 'src/prelude.rs', @@ -29,40 +70,33 @@ _qemu_api_rs = static_library( 'src/qom.rs', 'src/sysbus.rs', 'src/timer.rs', + 'src/uninit.rs', 'src/vmstate.rs', 'src/zeroable.rs', ], - {'.' : bindings_rs}, + {'.' : _qemu_api_bindings_inc_rs}, ), override_options: ['rust_std=2021', 'build.rust_std=2021'], rust_abi: 'rust', rust_args: _qemu_api_cfg, - dependencies: [libc_dep, qemu_api_macros], + dependencies: [anyhow_rs, foreign_rs, libc_rs, qemu_api_macros, qemuutil_rs, + qom, hwcore, chardev, migration], ) rust.test('rust-qemu-api-tests', _qemu_api_rs, suite: ['unit', 'rust']) -qemu_api = declare_dependency(link_with: _qemu_api_rs) +qemu_api = declare_dependency(link_with: [_qemu_api_rs], + dependencies: [qemu_api_macros, qom, hwcore, chardev, migration]) -# Rust executables do not support objects, so add an intermediate step. -rust_qemu_api_objs = static_library( - 'rust_qemu_api_objs', - objects: [libqom.extract_all_objects(recursive: false), - libhwcore.extract_all_objects(recursive: false), - libchardev.extract_all_objects(recursive: false), - libcrypto.extract_all_objects(recursive: false), - libauthz.extract_all_objects(recursive: false), - libio.extract_all_objects(recursive: false), - libmigration.extract_all_objects(recursive: false)]) -rust_qemu_api_deps = declare_dependency( - dependencies: [ - qom_ss.dependencies(), - chardev_ss.dependencies(), - crypto_ss.dependencies(), - authz_ss.dependencies(), - io_ss.dependencies()], - link_whole: [rust_qemu_api_objs, libqemuutil]) +# Doctests are essentially integration tests, so they need the same dependencies. +# Note that running them requires the object files for C code, so place them +# in a separate suite that is run by the "build" CI jobs rather than "check". +rust.doctest('rust-qemu-api-doctests', + _qemu_api_rs, + protocol: 'rust', + dependencies: qemu_api, + suite: ['doc', 'rust']) test('rust-qemu-api-integration', executable( @@ -71,7 +105,7 @@ test('rust-qemu-api-integration', override_options: ['rust_std=2021', 'build.rust_std=2021'], rust_args: ['--test'], install: false, - dependencies: [qemu_api, qemu_api_macros, rust_qemu_api_deps]), + dependencies: [qemu_api]), args: [ '--test', '--test-threads', '1', '--format', 'pretty', diff --git a/rust/qemu-api/src/bindings.rs b/rust/qemu-api/src/bindings.rs index 3c1d297..057de4b 100644 --- a/rust/qemu-api/src/bindings.rs +++ b/rust/qemu-api/src/bindings.rs @@ -11,6 +11,7 @@ clippy::restriction, clippy::style, clippy::missing_const_for_fn, + clippy::ptr_offset_with_cast, clippy::useless_transmute, clippy::missing_safety_doc )] diff --git a/rust/qemu-api/src/bitops.rs b/rust/qemu-api/src/bitops.rs index 023ec1a..b1e3a53 100644 --- a/rust/qemu-api/src/bitops.rs +++ b/rust/qemu-api/src/bitops.rs @@ -1,5 +1,5 @@ // Copyright (C) 2024 Intel Corporation. -// Author(s): Zhao Liu <zhai1.liu@intel.com> +// Author(s): Zhao Liu <zhao1.liu@intel.com> // SPDX-License-Identifier: GPL-2.0-or-later //! This module provides bit operation extensions to integer types. diff --git a/rust/qemu-api/src/cell.rs b/rust/qemu-api/src/cell.rs index 05ce09f..27063b0 100644 --- a/rust/qemu-api/src/cell.rs +++ b/rust/qemu-api/src/cell.rs @@ -225,27 +225,23 @@ use crate::bindings; /// An internal function that is used by doctests. pub fn bql_start_test() { - if cfg!(MESON) { - // SAFETY: integration tests are run with --test-threads=1, while - // unit tests and doctests are not multithreaded and do not have - // any BQL-protected data. Just set bql_locked to true. - unsafe { - bindings::rust_bql_mock_lock(); - } + // SAFETY: integration tests are run with --test-threads=1, while + // unit tests and doctests are not multithreaded and do not have + // any BQL-protected data. Just set bql_locked to true. + unsafe { + bindings::rust_bql_mock_lock(); } } pub fn bql_locked() -> bool { // SAFETY: the function does nothing but return a thread-local bool - !cfg!(MESON) || unsafe { bindings::bql_locked() } + unsafe { bindings::bql_locked() } } fn bql_block_unlock(increase: bool) { - if cfg!(MESON) { - // SAFETY: this only adjusts a counter - unsafe { - bindings::bql_block_unlock(increase); - } + // SAFETY: this only adjusts a counter + unsafe { + bindings::bql_block_unlock(increase); } } diff --git a/rust/qemu-api/src/error.rs b/rust/qemu-api/src/error.rs new file mode 100644 index 0000000..e114fc4 --- /dev/null +++ b/rust/qemu-api/src/error.rs @@ -0,0 +1,416 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +//! Error propagation for QEMU Rust code +//! +//! This module contains [`Error`], the bridge between Rust errors and +//! [`Result`](std::result::Result)s and QEMU's C [`Error`](bindings::Error) +//! struct. +//! +//! For FFI code, [`Error`] provides functions to simplify conversion between +//! the Rust ([`Result<>`](std::result::Result)) and C (`Error**`) conventions: +//! +//! * [`ok_or_propagate`](crate::Error::ok_or_propagate), +//! [`bool_or_propagate`](crate::Error::bool_or_propagate), +//! [`ptr_or_propagate`](crate::Error::ptr_or_propagate) can be used to build +//! a C return value while also propagating an error condition +//! +//! * [`err_or_else`](crate::Error::err_or_else) and +//! [`err_or_unit`](crate::Error::err_or_unit) can be used to build a `Result` +//! +//! This module is most commonly used at the boundary between C and Rust code; +//! other code will usually access it through the +//! [`qemu_api::Result`](crate::Result) type alias, and will use the +//! [`std::error::Error`] interface to let C errors participate in Rust's error +//! handling functionality. +//! +//! Rust code can also create use this module to create an error object that +//! will be passed up to C code, though in most cases this will be done +//! transparently through the `?` operator. Errors can be constructed from a +//! simple error string, from an [`anyhow::Error`] to pass any other Rust error +//! type up to C code, or from a combination of the two. +//! +//! The third case, corresponding to [`Error::with_error`], is the only one that +//! requires mentioning [`qemu_api::Error`](crate::Error) explicitly. Similar +//! to how QEMU's C code handles errno values, the string and the +//! `anyhow::Error` object will be concatenated with `:` as the separator. + +use std::{ + borrow::Cow, + ffi::{c_char, c_int, c_void, CStr}, + fmt::{self, Display}, + panic, ptr, +}; + +use foreign::{prelude::*, OwnedPointer}; + +use crate::bindings; + +pub type Result<T> = std::result::Result<T, Error>; + +#[derive(Debug)] +pub struct Error { + msg: Option<Cow<'static, str>>, + /// Appends the print string of the error to the msg if not None + cause: Option<anyhow::Error>, + file: &'static str, + line: u32, +} + +impl std::error::Error for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + self.cause.as_ref().map(AsRef::as_ref) + } + + #[allow(deprecated)] + fn description(&self) -> &str { + self.msg + .as_deref() + .or_else(|| self.cause.as_deref().map(std::error::Error::description)) + .expect("no message nor cause?") + } +} + +impl Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut prefix = ""; + if let Some(ref msg) = self.msg { + write!(f, "{msg}")?; + prefix = ": "; + } + if let Some(ref cause) = self.cause { + write!(f, "{prefix}{cause}")?; + } else if prefix.is_empty() { + panic!("no message nor cause?"); + } + Ok(()) + } +} + +impl From<String> for Error { + #[track_caller] + fn from(msg: String) -> Self { + let location = panic::Location::caller(); + Error { + msg: Some(Cow::Owned(msg)), + cause: None, + file: location.file(), + line: location.line(), + } + } +} + +impl From<&'static str> for Error { + #[track_caller] + fn from(msg: &'static str) -> Self { + let location = panic::Location::caller(); + Error { + msg: Some(Cow::Borrowed(msg)), + cause: None, + file: location.file(), + line: location.line(), + } + } +} + +impl From<anyhow::Error> for Error { + #[track_caller] + fn from(error: anyhow::Error) -> Self { + let location = panic::Location::caller(); + Error { + msg: None, + cause: Some(error), + file: location.file(), + line: location.line(), + } + } +} + +impl Error { + /// Create a new error, prepending `msg` to the + /// description of `cause` + #[track_caller] + pub fn with_error(msg: impl Into<Cow<'static, str>>, cause: impl Into<anyhow::Error>) -> Self { + let location = panic::Location::caller(); + Error { + msg: Some(msg.into()), + cause: Some(cause.into()), + file: location.file(), + line: location.line(), + } + } + + /// Consume a result, returning `false` if it is an error and + /// `true` if it is successful. The error is propagated into + /// `errp` like the C API `error_propagate` would do. + /// + /// # Safety + /// + /// `errp` must be a valid argument to `error_propagate`; + /// typically it is received from C code and need not be + /// checked further at the Rust↔C boundary. + pub unsafe fn bool_or_propagate(result: Result<()>, errp: *mut *mut bindings::Error) -> bool { + // SAFETY: caller guarantees errp is valid + unsafe { Self::ok_or_propagate(result, errp) }.is_some() + } + + /// Consume a result, returning a `NULL` pointer if it is an error and + /// a C representation of the contents if it is successful. This is + /// similar to the C API `error_propagate`, but it panics if `*errp` + /// is not `NULL`. + /// + /// # Safety + /// + /// `errp` must be a valid argument to `error_propagate`; + /// typically it is received from C code and need not be + /// checked further at the Rust↔C boundary. + /// + /// See [`propagate`](Error::propagate) for more information. + #[must_use] + pub unsafe fn ptr_or_propagate<T: CloneToForeign>( + result: Result<T>, + errp: *mut *mut bindings::Error, + ) -> *mut T::Foreign { + // SAFETY: caller guarantees errp is valid + unsafe { Self::ok_or_propagate(result, errp) }.clone_to_foreign_ptr() + } + + /// Consume a result in the same way as `self.ok()`, but also propagate + /// a possible error into `errp`. This is similar to the C API + /// `error_propagate`, but it panics if `*errp` is not `NULL`. + /// + /// # Safety + /// + /// `errp` must be a valid argument to `error_propagate`; + /// typically it is received from C code and need not be + /// checked further at the Rust↔C boundary. + /// + /// See [`propagate`](Error::propagate) for more information. + pub unsafe fn ok_or_propagate<T>( + result: Result<T>, + errp: *mut *mut bindings::Error, + ) -> Option<T> { + result.map_err(|err| unsafe { err.propagate(errp) }).ok() + } + + /// Equivalent of the C function `error_propagate`. Fill `*errp` + /// with the information container in `self` if `errp` is not NULL; + /// then consume it. + /// + /// This is similar to the C API `error_propagate`, but it panics if + /// `*errp` is not `NULL`. + /// + /// # Safety + /// + /// `errp` must be a valid argument to `error_propagate`; it can be + /// `NULL` or it can point to any of: + /// * `error_abort` + /// * `error_fatal` + /// * a local variable of (C) type `Error *` + /// + /// Typically `errp` is received from C code and need not be + /// checked further at the Rust↔C boundary. + pub unsafe fn propagate(self, errp: *mut *mut bindings::Error) { + if errp.is_null() { + return; + } + + // SAFETY: caller guarantees that errp and *errp are valid + unsafe { + assert_eq!(*errp, ptr::null_mut()); + bindings::error_propagate(errp, self.clone_to_foreign_ptr()); + } + } + + /// Convert a C `Error*` into a Rust `Result`, using + /// `Ok(())` if `c_error` is NULL. Free the `Error*`. + /// + /// # Safety + /// + /// `c_error` must be `NULL` or valid; typically it was initialized + /// with `ptr::null_mut()` and passed by reference to a C function. + pub unsafe fn err_or_unit(c_error: *mut bindings::Error) -> Result<()> { + // SAFETY: caller guarantees c_error is valid + unsafe { Self::err_or_else(c_error, || ()) } + } + + /// Convert a C `Error*` into a Rust `Result`, calling `f()` to + /// obtain an `Ok` value if `c_error` is NULL. Free the `Error*`. + /// + /// # Safety + /// + /// `c_error` must be `NULL` or point to a valid C [`struct + /// Error`](bindings::Error); typically it was initialized with + /// `ptr::null_mut()` and passed by reference to a C function. + pub unsafe fn err_or_else<T, F: FnOnce() -> T>( + c_error: *mut bindings::Error, + f: F, + ) -> Result<T> { + // SAFETY: caller guarantees c_error is valid + let err = unsafe { Option::<Self>::from_foreign(c_error) }; + match err { + None => Ok(f()), + Some(err) => Err(err), + } + } +} + +impl FreeForeign for Error { + type Foreign = bindings::Error; + + unsafe fn free_foreign(p: *mut bindings::Error) { + // SAFETY: caller guarantees p is valid + unsafe { + bindings::error_free(p); + } + } +} + +impl CloneToForeign for Error { + fn clone_to_foreign(&self) -> OwnedPointer<Self> { + // SAFETY: all arguments are controlled by this function + unsafe { + let err: *mut c_void = libc::malloc(std::mem::size_of::<bindings::Error>()); + let err: &mut bindings::Error = &mut *err.cast(); + *err = bindings::Error { + msg: format!("{self}").clone_to_foreign_ptr(), + err_class: bindings::ERROR_CLASS_GENERIC_ERROR, + src_len: self.file.len() as c_int, + src: self.file.as_ptr().cast::<c_char>(), + line: self.line as c_int, + func: ptr::null_mut(), + hint: ptr::null_mut(), + }; + OwnedPointer::new(err) + } + } +} + +impl FromForeign for Error { + unsafe fn cloned_from_foreign(c_error: *const bindings::Error) -> Self { + // SAFETY: caller guarantees c_error is valid + unsafe { + let error = &*c_error; + let file = if error.src_len < 0 { + // NUL-terminated + CStr::from_ptr(error.src).to_str() + } else { + // Can become str::from_utf8 with Rust 1.87.0 + std::str::from_utf8(std::slice::from_raw_parts( + &*error.src.cast::<u8>(), + error.src_len as usize, + )) + }; + + Error { + msg: FromForeign::cloned_from_foreign(error.msg), + cause: None, + file: file.unwrap(), + line: error.line as u32, + } + } + } +} + +#[cfg(test)] +mod tests { + use std::ffi::CStr; + + use anyhow::anyhow; + use foreign::OwnedPointer; + + use super::*; + use crate::{assert_match, bindings}; + + #[track_caller] + fn error_for_test(msg: &CStr) -> OwnedPointer<Error> { + // SAFETY: all arguments are controlled by this function + let location = panic::Location::caller(); + unsafe { + let err: *mut c_void = libc::malloc(std::mem::size_of::<bindings::Error>()); + let err: &mut bindings::Error = &mut *err.cast(); + *err = bindings::Error { + msg: msg.clone_to_foreign_ptr(), + err_class: bindings::ERROR_CLASS_GENERIC_ERROR, + src_len: location.file().len() as c_int, + src: location.file().as_ptr().cast::<c_char>(), + line: location.line() as c_int, + func: ptr::null_mut(), + hint: ptr::null_mut(), + }; + OwnedPointer::new(err) + } + } + + unsafe fn error_get_pretty<'a>(local_err: *mut bindings::Error) -> &'a CStr { + unsafe { CStr::from_ptr(bindings::error_get_pretty(local_err)) } + } + + #[test] + #[allow(deprecated)] + fn test_description() { + use std::error::Error; + + assert_eq!(super::Error::from("msg").description(), "msg"); + assert_eq!(super::Error::from("msg".to_owned()).description(), "msg"); + } + + #[test] + fn test_display() { + assert_eq!(&*format!("{}", Error::from("msg")), "msg"); + assert_eq!(&*format!("{}", Error::from("msg".to_owned())), "msg"); + assert_eq!(&*format!("{}", Error::from(anyhow!("msg"))), "msg"); + + assert_eq!( + &*format!("{}", Error::with_error("msg", anyhow!("cause"))), + "msg: cause" + ); + } + + #[test] + fn test_bool_or_propagate() { + unsafe { + let mut local_err: *mut bindings::Error = ptr::null_mut(); + + assert!(Error::bool_or_propagate(Ok(()), &mut local_err)); + assert_eq!(local_err, ptr::null_mut()); + + let my_err = Error::from("msg"); + assert!(!Error::bool_or_propagate(Err(my_err), &mut local_err)); + assert_ne!(local_err, ptr::null_mut()); + assert_eq!(error_get_pretty(local_err), c"msg"); + bindings::error_free(local_err); + } + } + + #[test] + fn test_ptr_or_propagate() { + unsafe { + let mut local_err: *mut bindings::Error = ptr::null_mut(); + + let ret = Error::ptr_or_propagate(Ok("abc".to_owned()), &mut local_err); + assert_eq!(String::from_foreign(ret), "abc"); + assert_eq!(local_err, ptr::null_mut()); + + let my_err = Error::from("msg"); + assert_eq!( + Error::ptr_or_propagate(Err::<String, _>(my_err), &mut local_err), + ptr::null_mut() + ); + assert_ne!(local_err, ptr::null_mut()); + assert_eq!(error_get_pretty(local_err), c"msg"); + bindings::error_free(local_err); + } + } + + #[test] + fn test_err_or_unit() { + unsafe { + let result = Error::err_or_unit(ptr::null_mut()); + assert_match!(result, Ok(())); + + let err = error_for_test(c"msg"); + let err = Error::err_or_unit(err.into_inner()).unwrap_err(); + assert_eq!(&*format!("{err}"), "msg"); + } + } +} diff --git a/rust/qemu-api/src/lib.rs b/rust/qemu-api/src/lib.rs index 234a94e..86dcd8e 100644 --- a/rust/qemu-api/src/lib.rs +++ b/rust/qemu-api/src/lib.rs @@ -19,13 +19,16 @@ pub mod callbacks; pub mod cell; pub mod chardev; pub mod errno; +pub mod error; pub mod irq; +pub mod log; pub mod memory; pub mod module; pub mod qdev; pub mod qom; pub mod sysbus; pub mod timer; +pub mod uninit; pub mod vmstate; pub mod zeroable; @@ -34,6 +37,8 @@ use std::{ ffi::c_void, }; +pub use error::{Error, Result}; + #[cfg(HAVE_GLIB_WITH_ALIGNED_ALLOC)] extern "C" { fn g_aligned_alloc0( diff --git a/rust/qemu-api/src/log.rs b/rust/qemu-api/src/log.rs new file mode 100644 index 0000000..d6c3d6c --- /dev/null +++ b/rust/qemu-api/src/log.rs @@ -0,0 +1,73 @@ +// Copyright 2025 Bernhard Beschow <shentey@gmail.com> +// SPDX-License-Identifier: GPL-2.0-or-later + +//! Bindings for QEMU's logging infrastructure + +#[repr(u32)] +/// Represents specific error categories within QEMU's logging system. +/// +/// The `Log` enum provides a Rust abstraction for logging errors, corresponding +/// to a subset of the error categories defined in the C implementation. +pub enum Log { + /// Log invalid access caused by the guest. + /// Corresponds to `LOG_GUEST_ERROR` in the C implementation. + GuestError = crate::bindings::LOG_GUEST_ERROR, + + /// Log guest access of unimplemented functionality. + /// Corresponds to `LOG_UNIMP` in the C implementation. + Unimp = crate::bindings::LOG_UNIMP, +} + +/// A macro to log messages conditionally based on a provided mask. +/// +/// The `log_mask_ln` macro checks whether the given mask matches the current +/// log level and, if so, formats and logs the message. It is the Rust +/// counterpart of the `qemu_log_mask()` macro in the C implementation. +/// +/// # Parameters +/// +/// - `$mask`: A log level mask. This should be a variant of the `Log` enum. +/// - `$fmt`: A format string following the syntax and rules of the `format!` +/// macro. It specifies the structure of the log message. +/// - `$args`: Optional arguments to be interpolated into the format string. +/// +/// # Example +/// +/// ``` +/// use qemu_api::{log::Log, log_mask_ln}; +/// +/// let error_address = 0xbad; +/// log_mask_ln!(Log::GuestError, "Address 0x{error_address:x} out of range"); +/// ``` +/// +/// It is also possible to use printf-style formatting, as well as having a +/// trailing `,`: +/// +/// ``` +/// use qemu_api::{log::Log, log_mask_ln}; +/// +/// let error_address = 0xbad; +/// log_mask_ln!( +/// Log::GuestError, +/// "Address 0x{:x} out of range", +/// error_address, +/// ); +/// ``` +#[macro_export] +macro_rules! log_mask_ln { + ($mask:expr, $fmt:tt $($args:tt)*) => {{ + // Type assertion to enforce type `Log` for $mask + let _: Log = $mask; + + if unsafe { + (::qemu_api::bindings::qemu_loglevel & ($mask as std::os::raw::c_int)) != 0 + } { + let formatted_string = format!("{}\n", format_args!($fmt $($args)*)); + let c_string = std::ffi::CString::new(formatted_string).unwrap(); + + unsafe { + ::qemu_api::bindings::qemu_log(c_string.as_ptr()); + } + } + }}; +} diff --git a/rust/qemu-api/src/memory.rs b/rust/qemu-api/src/memory.rs index 9ef2694..e40fad6 100644 --- a/rust/qemu-api/src/memory.rs +++ b/rust/qemu-api/src/memory.rs @@ -16,6 +16,7 @@ use crate::{ callbacks::FnCall, cell::Opaque, prelude::*, + uninit::MaybeUninitField, zeroable::Zeroable, }; @@ -147,7 +148,7 @@ impl MemoryRegion { #[inline(always)] unsafe fn do_init_io( slot: *mut bindings::MemoryRegion, - owner: *mut Object, + owner: *mut bindings::Object, ops: &'static bindings::MemoryRegionOps, name: &'static str, size: u64, @@ -156,7 +157,7 @@ impl MemoryRegion { let cstr = CString::new(name).unwrap(); memory_region_init_io( slot, - owner.cast::<bindings::Object>(), + owner, ops, owner.cast::<c_void>(), cstr.as_ptr(), @@ -166,16 +167,15 @@ impl MemoryRegion { } pub fn init_io<T: IsA<Object>>( - &mut self, - owner: *mut T, + this: &mut MaybeUninitField<'_, T, Self>, ops: &'static MemoryRegionOps<T>, name: &'static str, size: u64, ) { unsafe { Self::do_init_io( - self.0.as_mut_ptr(), - owner.cast::<Object>(), + this.as_mut_ptr().cast(), + MaybeUninitField::parent_mut(this).cast(), &ops.0, name, size, diff --git a/rust/qemu-api/src/prelude.rs b/rust/qemu-api/src/prelude.rs index 43bfcd5..8f9e23e 100644 --- a/rust/qemu-api/src/prelude.rs +++ b/rust/qemu-api/src/prelude.rs @@ -11,6 +11,8 @@ pub use crate::cell::BqlRefCell; pub use crate::errno; +pub use crate::log_mask_ln; + pub use crate::qdev::DeviceMethods; pub use crate::qom::InterfaceType; diff --git a/rust/qemu-api/src/qdev.rs b/rust/qemu-api/src/qdev.rs index 1279d7a..36f02fb 100644 --- a/rust/qemu-api/src/qdev.rs +++ b/rust/qemu-api/src/qdev.rs @@ -12,13 +12,14 @@ use std::{ pub use bindings::{ClockEvent, DeviceClass, Property, ResetType}; use crate::{ - bindings::{self, qdev_init_gpio_in, qdev_init_gpio_out, Error, ResettableClass}, + bindings::{self, qdev_init_gpio_in, qdev_init_gpio_out, ResettableClass}, callbacks::FnCall, cell::{bql_locked, Opaque}, chardev::Chardev, + error::{Error, Result}, irq::InterruptSource, prelude::*, - qom::{ObjectClass, ObjectImpl, Owned}, + qom::{ObjectClass, ObjectImpl, Owned, ParentInit}, vmstate::VMStateDescription, }; @@ -108,7 +109,7 @@ pub trait DeviceImpl: ObjectImpl + ResettablePhasesImpl + IsA<DeviceState> { /// /// If not `None`, the parent class's `realize` method is overridden /// with the function pointed to by `REALIZE`. - const REALIZE: Option<fn(&Self)> = None; + const REALIZE: Option<fn(&Self) -> Result<()>> = None; /// An array providing the properties that the user can set on the /// device. Not a `const` because referencing statics in constants @@ -134,10 +135,13 @@ pub trait DeviceImpl: ObjectImpl + ResettablePhasesImpl + IsA<DeviceState> { /// readable/writeable from one thread at any time. unsafe extern "C" fn rust_realize_fn<T: DeviceImpl>( dev: *mut bindings::DeviceState, - _errp: *mut *mut Error, + errp: *mut *mut bindings::Error, ) { let state = NonNull::new(dev).unwrap().cast::<T>(); - T::REALIZE.unwrap()(unsafe { state.as_ref() }); + let result = T::REALIZE.unwrap()(unsafe { state.as_ref() }); + unsafe { + Error::ok_or_propagate(result, errp); + } } unsafe impl InterfaceType for ResettableClass { @@ -243,15 +247,9 @@ unsafe impl ObjectType for DeviceState { } qom_isa!(DeviceState: Object); -/// Trait for methods exposed by the [`DeviceState`] class. The methods can be -/// called on all objects that have the trait `IsA<DeviceState>`. -/// -/// The trait should only be used through the blanket implementation, -/// which guarantees safety via `IsA`. -pub trait DeviceMethods: ObjectDeref -where - Self::Target: IsA<DeviceState>, -{ +/// Initialization methods take a [`ParentInit`] and can be called as +/// associated functions. +impl DeviceState { /// Add an input clock named `name`. Invoke the callback with /// `self` as the first parameter for the events that are requested. /// @@ -262,12 +260,15 @@ where /// which Rust code has a reference to a child object) it would be /// possible for this function to return a `&Clock` too. #[inline] - fn init_clock_in<F: for<'a> FnCall<(&'a Self::Target, ClockEvent)>>( - &self, + pub fn init_clock_in<T: DeviceImpl, F: for<'a> FnCall<(&'a T, ClockEvent)>>( + this: &mut ParentInit<T>, name: &str, _cb: &F, events: ClockEvent, - ) -> Owned<Clock> { + ) -> Owned<Clock> + where + T::ParentType: IsA<DeviceState>, + { fn do_init_clock_in( dev: &DeviceState, name: &str, @@ -283,10 +284,10 @@ where unsafe { let cstr = CString::new(name).unwrap(); let clk = bindings::qdev_init_clock_in( - dev.as_mut_ptr(), + dev.0.as_mut_ptr(), cstr.as_ptr(), cb, - dev.as_void_ptr(), + dev.0.as_void_ptr(), events.0, ); @@ -303,12 +304,12 @@ where // SAFETY: the opaque is "this", which is indeed a pointer to T F::call((unsafe { &*(opaque.cast::<T>()) }, event)) } - Some(rust_clock_cb::<Self::Target, F>) + Some(rust_clock_cb::<T, F>) } else { None }; - do_init_clock_in(self.upcast(), name, cb, events) + do_init_clock_in(unsafe { this.upcast_mut() }, name, cb, events) } /// Add an output clock named `name`. @@ -320,16 +321,30 @@ where /// which Rust code has a reference to a child object) it would be /// possible for this function to return a `&Clock` too. #[inline] - fn init_clock_out(&self, name: &str) -> Owned<Clock> { + pub fn init_clock_out<T: DeviceImpl>(this: &mut ParentInit<T>, name: &str) -> Owned<Clock> + where + T::ParentType: IsA<DeviceState>, + { unsafe { let cstr = CString::new(name).unwrap(); - let clk = bindings::qdev_init_clock_out(self.upcast().as_mut_ptr(), cstr.as_ptr()); + let dev: &mut DeviceState = this.upcast_mut(); + let clk = bindings::qdev_init_clock_out(dev.0.as_mut_ptr(), cstr.as_ptr()); let clk: &Clock = Clock::from_raw(clk); Owned::from(clk) } } +} +/// Trait for methods exposed by the [`DeviceState`] class. The methods can be +/// called on all objects that have the trait `IsA<DeviceState>`. +/// +/// The trait should only be used through the blanket implementation, +/// which guarantees safety via `IsA`. +pub trait DeviceMethods: ObjectDeref +where + Self::Target: IsA<DeviceState>, +{ fn prop_set_chr(&self, propname: &str, chr: &Owned<Chardev>) { assert!(bql_locked()); let c_propname = CString::new(propname).unwrap(); diff --git a/rust/qemu-api/src/qom.rs b/rust/qemu-api/src/qom.rs index 41e5a5e..e20ee01 100644 --- a/rust/qemu-api/src/qom.rs +++ b/rust/qemu-api/src/qom.rs @@ -95,7 +95,8 @@ use std::{ ffi::{c_void, CStr}, fmt, - mem::ManuallyDrop, + marker::PhantomData, + mem::{ManuallyDrop, MaybeUninit}, ops::{Deref, DerefMut}, ptr::NonNull, }; @@ -206,13 +207,190 @@ impl<T: fmt::Display + ObjectType> fmt::Display for ParentField<T> { } } +/// This struct knows that the superclasses of the object have already been +/// initialized. +/// +/// The declaration of `ParentInit` is.. *"a kind of magic"*. It uses a +/// technique that is found in several crates, the main ones probably being +/// `ghost-cell` (in fact it was introduced by the [`GhostCell` paper](https://plv.mpi-sws.org/rustbelt/ghostcell/)) +/// and `generativity`. +/// +/// The `PhantomData` makes the `ParentInit` type *invariant* with respect to +/// the lifetime argument `'init`. This, together with the `for<'...>` in +/// `[ParentInit::with]`, block any attempt of the compiler to be creative when +/// operating on types of type `ParentInit` and to extend their lifetimes. In +/// particular, it ensures that the `ParentInit` cannot be made to outlive the +/// `rust_instance_init()` function that creates it, and therefore that the +/// `&'init T` reference is valid. +/// +/// This implementation of the same concept, without the QOM baggage, can help +/// understanding the effect: +/// +/// ``` +/// use std::marker::PhantomData; +/// +/// #[derive(PartialEq, Eq)] +/// pub struct Jail<'closure, T: Copy>(&'closure T, PhantomData<fn(&'closure ()) -> &'closure ()>); +/// +/// impl<'closure, T: Copy> Jail<'closure, T> { +/// fn get(&self) -> T { +/// *self.0 +/// } +/// +/// #[inline] +/// fn with<U>(v: T, f: impl for<'id> FnOnce(Jail<'id, T>) -> U) -> U { +/// let parent_init = Jail(&v, PhantomData); +/// f(parent_init) +/// } +/// } +/// ``` +/// +/// It's impossible to escape the `Jail`; `token1` cannot be moved out of the +/// closure: +/// +/// ```ignore +/// let x = 42; +/// let escape = Jail::with(&x, |token1| { +/// println!("{}", token1.get()); +/// // fails to compile... +/// token1 +/// }); +/// // ... so you cannot do this: +/// println!("{}", escape.get()); +/// ``` +/// +/// Likewise, in the QOM case the `ParentInit` cannot be moved out of +/// `instance_init()`. Without this trick it would be possible to stash a +/// `ParentInit` and use it later to access uninitialized memory. +/// +/// Here is another example, showing how separately-created "identities" stay +/// isolated: +/// +/// ```ignore +/// impl<'closure, T: Copy> Clone for Jail<'closure, T> { +/// fn clone(&self) -> Jail<'closure, T> { +/// Jail(self.0, PhantomData) +/// } +/// } +/// +/// fn main() { +/// Jail::with(42, |token1| { +/// // this works and returns true: the clone has the same "identity" +/// println!("{}", token1 == token1.clone()); +/// Jail::with(42, |token2| { +/// // here the outer token remains accessible... +/// println!("{}", token1.get()); +/// // ... but the two are separate: this fails to compile: +/// println!("{}", token1 == token2); +/// }); +/// }); +/// } +/// ``` +pub struct ParentInit<'init, T>( + &'init mut MaybeUninit<T>, + PhantomData<fn(&'init ()) -> &'init ()>, +); + +impl<'init, T> ParentInit<'init, T> { + #[inline] + pub fn with(obj: &'init mut MaybeUninit<T>, f: impl for<'id> FnOnce(ParentInit<'id, T>)) { + let parent_init = ParentInit(obj, PhantomData); + f(parent_init) + } +} + +impl<T: ObjectType> ParentInit<'_, T> { + /// Return the receiver as a mutable raw pointer to Object. + /// + /// # Safety + /// + /// Fields beyond `Object` could be uninitialized and it's your + /// responsibility to avoid that they're used when the pointer is + /// dereferenced, either directly or through a cast. + pub fn as_object_mut_ptr(&self) -> *mut bindings::Object { + self.as_object_ptr().cast_mut() + } + + /// Return the receiver as a mutable raw pointer to Object. + /// + /// # Safety + /// + /// Fields beyond `Object` could be uninitialized and it's your + /// responsibility to avoid that they're used when the pointer is + /// dereferenced, either directly or through a cast. + pub fn as_object_ptr(&self) -> *const bindings::Object { + self.0.as_ptr().cast() + } +} + +impl<'a, T: ObjectImpl> ParentInit<'a, T> { + /// Convert from a derived type to one of its parent types, which + /// have already been initialized. + /// + /// # Safety + /// + /// Structurally this is always a safe operation; the [`IsA`] trait + /// provides static verification trait that `Self` dereferences to `U` or + /// a child of `U`, and only parent types of `T` are allowed. + /// + /// However, while the fields of the resulting reference are initialized, + /// calls might use uninitialized fields of the subclass. It is your + /// responsibility to avoid this. + pub unsafe fn upcast<U: ObjectType>(&self) -> &'a U + where + T::ParentType: IsA<U>, + { + // SAFETY: soundness is declared via IsA<U>, which is an unsafe trait; + // the parent has been initialized before `instance_init `is called + unsafe { &*(self.0.as_ptr().cast::<U>()) } + } + + /// Convert from a derived type to one of its parent types, which + /// have already been initialized. + /// + /// # Safety + /// + /// Structurally this is always a safe operation; the [`IsA`] trait + /// provides static verification trait that `Self` dereferences to `U` or + /// a child of `U`, and only parent types of `T` are allowed. + /// + /// However, while the fields of the resulting reference are initialized, + /// calls might use uninitialized fields of the subclass. It is your + /// responsibility to avoid this. + pub unsafe fn upcast_mut<U: ObjectType>(&mut self) -> &'a mut U + where + T::ParentType: IsA<U>, + { + // SAFETY: soundness is declared via IsA<U>, which is an unsafe trait; + // the parent has been initialized before `instance_init `is called + unsafe { &mut *(self.0.as_mut_ptr().cast::<U>()) } + } +} + +impl<T> Deref for ParentInit<'_, T> { + type Target = MaybeUninit<T>; + + fn deref(&self) -> &Self::Target { + self.0 + } +} + +impl<T> DerefMut for ParentInit<'_, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.0 + } +} + unsafe extern "C" fn rust_instance_init<T: ObjectImpl>(obj: *mut bindings::Object) { - let mut state = NonNull::new(obj).unwrap().cast::<T>(); + let mut state = NonNull::new(obj).unwrap().cast::<MaybeUninit<T>>(); + // SAFETY: obj is an instance of T, since rust_instance_init<T> // is called from QOM core as the instance_init function // for class T unsafe { - T::INSTANCE_INIT.unwrap()(state.as_mut()); + ParentInit::with(state.as_mut(), |parent_init| { + T::INSTANCE_INIT.unwrap()(parent_init); + }); } } @@ -291,7 +469,7 @@ pub unsafe trait ObjectType: Sized { } /// Return the receiver as a const raw pointer to Object. - /// This is preferrable to `as_object_mut_ptr()` if a C + /// This is preferable to `as_object_mut_ptr()` if a C /// function only needs a `const Object *`. fn as_object_ptr(&self) -> *const bindings::Object { self.as_object().as_ptr() @@ -479,13 +657,13 @@ pub trait ObjectImpl: ObjectType + IsA<Object> { /// /// FIXME: The argument is not really a valid reference. `&mut /// MaybeUninit<Self>` would be a better description. - const INSTANCE_INIT: Option<unsafe fn(&mut Self)> = None; + const INSTANCE_INIT: Option<unsafe fn(ParentInit<Self>)> = None; /// Function that is called to finish initialization of an object, once /// `INSTANCE_INIT` functions have been called. const INSTANCE_POST_INIT: Option<fn(&Self)> = None; - /// Called on descendent classes after all parent class initialization + /// Called on descendant classes after all parent class initialization /// has occurred, but before the class itself is initialized. This /// is only useful if a class is not a leaf, and can be used to undo /// the effects of copying the contents of the parent's class struct diff --git a/rust/qemu-api/src/timer.rs b/rust/qemu-api/src/timer.rs index 868bd88..0a2d111 100644 --- a/rust/qemu-api/src/timer.rs +++ b/rust/qemu-api/src/timer.rs @@ -1,5 +1,5 @@ // Copyright (C) 2024 Intel Corporation. -// Author(s): Zhao Liu <zhai1.liu@intel.com> +// Author(s): Zhao Liu <zhao1.liu@intel.com> // SPDX-License-Identifier: GPL-2.0-or-later use std::{ diff --git a/rust/qemu-api/src/uninit.rs b/rust/qemu-api/src/uninit.rs new file mode 100644 index 0000000..04123b4 --- /dev/null +++ b/rust/qemu-api/src/uninit.rs @@ -0,0 +1,85 @@ +//! Access fields of a [`MaybeUninit`] + +use std::{ + mem::MaybeUninit, + ops::{Deref, DerefMut}, +}; + +pub struct MaybeUninitField<'a, T, U> { + parent: &'a mut MaybeUninit<T>, + child: *mut U, +} + +impl<'a, T, U> MaybeUninitField<'a, T, U> { + #[doc(hidden)] + pub fn new(parent: &'a mut MaybeUninit<T>, child: *mut U) -> Self { + MaybeUninitField { parent, child } + } + + /// Return a constant pointer to the containing object of the field. + /// + /// Because the `MaybeUninitField` remembers the containing object, + /// it is possible to use it in foreign APIs that initialize the + /// child. + pub fn parent(f: &Self) -> *const T { + f.parent.as_ptr() + } + + /// Return a mutable pointer to the containing object. + /// + /// Because the `MaybeUninitField` remembers the containing object, + /// it is possible to use it in foreign APIs that initialize the + /// child. + pub fn parent_mut(f: &mut Self) -> *mut T { + f.parent.as_mut_ptr() + } +} + +impl<'a, T, U> Deref for MaybeUninitField<'a, T, U> { + type Target = MaybeUninit<U>; + + fn deref(&self) -> &MaybeUninit<U> { + // SAFETY: self.child was obtained by dereferencing a valid mutable + // reference; the content of the memory may be invalid or uninitialized + // but MaybeUninit<_> makes no assumption on it + unsafe { &*(self.child.cast()) } + } +} + +impl<'a, T, U> DerefMut for MaybeUninitField<'a, T, U> { + fn deref_mut(&mut self) -> &mut MaybeUninit<U> { + // SAFETY: self.child was obtained by dereferencing a valid mutable + // reference; the content of the memory may be invalid or uninitialized + // but MaybeUninit<_> makes no assumption on it + unsafe { &mut *(self.child.cast()) } + } +} + +/// ``` +/// #[derive(Debug)] +/// struct S { +/// x: u32, +/// y: u32, +/// } +/// +/// # use std::mem::MaybeUninit; +/// # use qemu_api::{assert_match, uninit_field_mut}; +/// +/// let mut s: MaybeUninit<S> = MaybeUninit::zeroed(); +/// uninit_field_mut!(s, x).write(5); +/// let s = unsafe { s.assume_init() }; +/// assert_match!(s, S { x: 5, y: 0 }); +/// ``` +#[macro_export] +macro_rules! uninit_field_mut { + ($container:expr, $($field:tt)+) => {{ + let container__: &mut ::std::mem::MaybeUninit<_> = &mut $container; + let container_ptr__ = container__.as_mut_ptr(); + + // SAFETY: the container is not used directly, only through a MaybeUninit<>, + // so the safety is delegated to the caller and to final invocation of + // assume_init() + let target__ = unsafe { std::ptr::addr_of_mut!((*container_ptr__).$($field)+) }; + $crate::uninit::MaybeUninitField::new(container__, target__) + }}; +} diff --git a/rust/qemu-api/src/vmstate.rs b/rust/qemu-api/src/vmstate.rs index 9c8b239..812f390 100644 --- a/rust/qemu-api/src/vmstate.rs +++ b/rust/qemu-api/src/vmstate.rs @@ -9,7 +9,7 @@ //! * [`vmstate_unused!`](crate::vmstate_unused) and //! [`vmstate_of!`](crate::vmstate_of), which are used to express the //! migration format for a struct. This is based on the [`VMState`] trait, -//! which is defined by all migrateable types. +//! which is defined by all migratable types. //! //! * [`impl_vmstate_forward`](crate::impl_vmstate_forward) and //! [`impl_vmstate_bitsized`](crate::impl_vmstate_bitsized), which help with diff --git a/rust/qemu-api/tests/vmstate_tests.rs b/rust/qemu-api/tests/vmstate_tests.rs index ad0fc5c..bded836 100644 --- a/rust/qemu-api/tests/vmstate_tests.rs +++ b/rust/qemu-api/tests/vmstate_tests.rs @@ -1,5 +1,5 @@ // Copyright (C) 2025 Intel Corporation. -// Author(s): Zhao Liu <zhai1.liu@intel.com> +// Author(s): Zhao Liu <zhao1.liu@intel.com> // SPDX-License-Identifier: GPL-2.0-or-later use std::{ diff --git a/rust/wrapper.h b/rust/qemu-api/wrapper.h index beddd9a..15a1b19 100644 --- a/rust/wrapper.h +++ b/rust/qemu-api/wrapper.h @@ -48,6 +48,8 @@ typedef enum memory_order { #endif /* __CLANG_STDATOMIC_H */ #include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/log-for-trace.h" #include "qemu/module.h" #include "qemu-io.h" #include "system/system.h" @@ -60,6 +62,7 @@ typedef enum memory_order { #include "hw/qdev-properties-system.h" #include "hw/irq.h" #include "qapi/error.h" +#include "qapi/error-internal.h" #include "migration/vmstate.h" #include "chardev/char-serial.h" #include "exec/memattrs.h" diff --git a/scripts/archive-source.sh b/scripts/archive-source.sh index e461c15..035828c 100755 --- a/scripts/archive-source.sh +++ b/scripts/archive-source.sh @@ -27,8 +27,9 @@ sub_file="${sub_tdir}/submodule.tar" # in their checkout, because the build environment is completely # different to the host OS. subprojects="keycodemapdb libvfio-user berkeley-softfloat-3 - berkeley-testfloat-3 arbitrary-int-1-rs bilge-0.2-rs - bilge-impl-0.2-rs either-1-rs itertools-0.11-rs libc-0.2-rs proc-macro2-1-rs + berkeley-testfloat-3 anyhow-1-rs arbitrary-int-1-rs bilge-0.2-rs + bilge-impl-0.2-rs either-1-rs foreign-0.3-rs itertools-0.11-rs + libc-0.2-rs proc-macro2-1-rs proc-macro-error-1-rs proc-macro-error-attr-1-rs quote-1-rs syn-2-rs unicode-ident-1-rs" sub_deinit="" diff --git a/scripts/make-release b/scripts/make-release index 8c3594a..4509a9f 100755 --- a/scripts/make-release +++ b/scripts/make-release @@ -40,8 +40,9 @@ fi # Only include wraps that are invoked with subproject() SUBPROJECTS="libvfio-user keycodemapdb berkeley-softfloat-3 - berkeley-testfloat-3 arbitrary-int-1-rs bilge-0.2-rs - bilge-impl-0.2-rs either-1-rs itertools-0.11-rs libc-0.2-rs proc-macro2-1-rs + berkeley-testfloat-3 anyhow-1-rs arbitrary-int-1-rs bilge-0.2-rs + bilge-impl-0.2-rs either-1-rs foreign-0.3-rs itertools-0.11-rs + libc-0.2-rs proc-macro2-1-rs proc-macro-error-1-rs proc-macro-error-attr-1-rs quote-1-rs syn-2-rs unicode-ident-1-rs" diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index f09ef96..73e0770 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -80,7 +80,7 @@ meson_options_help() { printf "%s\n" ' --tls-priority=VALUE Default TLS protocol/cipher priority string' printf "%s\n" ' [NORMAL]' printf "%s\n" ' --with-coroutine=CHOICE coroutine backend to use (choices:' - printf "%s\n" ' auto/sigaltstack/ucontext/windows/wasm)' + printf "%s\n" ' auto/sigaltstack/ucontext/wasm/windows)' printf "%s\n" ' --with-pkgversion=VALUE use specified string as sub-version of the' printf "%s\n" ' package' printf "%s\n" ' --with-suffix=VALUE Suffix for QEMU data/modules/config directories' diff --git a/scripts/qapi/.flake8 b/scripts/qapi/.flake8 deleted file mode 100644 index a873ff6..0000000 --- a/scripts/qapi/.flake8 +++ /dev/null @@ -1,3 +0,0 @@ -[flake8] -# Prefer pylint's bare-except checks to flake8's -extend-ignore = E722 diff --git a/scripts/qapi/.isort.cfg b/scripts/qapi/.isort.cfg deleted file mode 100644 index 643caa1..0000000 --- a/scripts/qapi/.isort.cfg +++ /dev/null @@ -1,7 +0,0 @@ -[settings] -force_grid_wrap=4 -force_sort_within_sections=True -include_trailing_comma=True -line_length=72 -lines_after_imports=2 -multi_line_output=3 diff --git a/scripts/qapi/backend.py b/scripts/qapi/backend.py index 14e60aa6..49ae6ec 100644 --- a/scripts/qapi/backend.py +++ b/scripts/qapi/backend.py @@ -13,6 +13,7 @@ from .visit import gen_visit class QAPIBackend(ABC): + # pylint: disable=too-few-public-methods @abstractmethod def generate(self, @@ -36,6 +37,7 @@ class QAPIBackend(ABC): class QAPICBackend(QAPIBackend): + # pylint: disable=too-few-public-methods def generate(self, schema: QAPISchema, diff --git a/scripts/qapi/mypy.ini b/scripts/qapi/mypy.ini deleted file mode 100644 index c9dbcec..0000000 --- a/scripts/qapi/mypy.ini +++ /dev/null @@ -1,4 +0,0 @@ -[mypy] -strict = True -disallow_untyped_calls = False -python_version = 3.9 diff --git a/scripts/qapi/pylintrc b/scripts/qapi/pylintrc index d24eece..e16283a 100644 --- a/scripts/qapi/pylintrc +++ b/scripts/qapi/pylintrc @@ -19,6 +19,7 @@ disable=consider-using-f-string, too-many-instance-attributes, too-many-positional-arguments, too-many-statements, + unknown-option-value, useless-option-value, [REPORTS] diff --git a/scripts/rdma-migration-helper.sh b/scripts/rdma-migration-helper.sh index a39f2fb..d784d15 100755 --- a/scripts/rdma-migration-helper.sh +++ b/scripts/rdma-migration-helper.sh @@ -8,23 +8,44 @@ get_ipv4_addr() head -1 | tr -d '\n' } +get_ipv6_addr() { + ipv6=$(ip -6 -o addr show dev "$1" | + sed -n 's/.*[[:blank:]]inet6[[:blank:]]*\([^[:blank:]/]*\).*/\1/p' | + head -1 | tr -d '\n') + + [ $? -eq 0 ] || return + + if [[ "$ipv6" =~ ^fe80: ]]; then + echo -n "[$ipv6%$1]" + else + echo -n "[$ipv6]" + fi +} + # existing rdma interfaces rdma_interfaces() { - rdma link show | sed -nE 's/^link .* netdev ([^ ]+).*$/\1 /p' + rdma link show | sed -nE 's/^link .* netdev ([^ ]+).*$/\1 /p' | + grep -Ev '^(lo|tun|tap)' } # existing valid ipv4 interfaces ipv4_interfaces() { - ip -o addr show | awk '/inet / {print $2}' | grep -v -w lo + ip -o addr show | awk '/inet / {print $2}' | grep -Ev '^(lo|tun|tap)' +} + +ipv6_interfaces() +{ + ip -o addr show | awk '/inet6 / {print $2}' | grep -Ev '^(lo|tun|tap)' } rdma_rxe_detect() { + family=$1 for r in $(rdma_interfaces) do - ipv4_interfaces | grep -qw $r && get_ipv4_addr $r && return + "$family"_interfaces | grep -qw $r && get_"$family"_addr $r && return done return 1 @@ -32,16 +53,23 @@ rdma_rxe_detect() rdma_rxe_setup() { - for i in $(ipv4_interfaces) + family=$1 + for i in $("$family"_interfaces) do - rdma_interfaces | grep -qw $i && continue + if rdma_interfaces | grep -qw $i; then + echo "$family: Reuse the existing rdma/rxe ${i}_rxe" \ + "for $i with $(get_"$family"_addr $i)" + return + fi + rdma link add "${i}_rxe" type rxe netdev "$i" && { - echo "Setup new rdma/rxe ${i}_rxe for $i with $(get_ipv4_addr $i)" + echo "$family: Setup new rdma/rxe ${i}_rxe" \ + "for $i with $(get_"$family"_addr $i)" return } done - echo "Failed to setup any new rdma/rxe link" >&2 + echo "$family: Failed to setup any new rdma/rxe link" >&2 return 1 } @@ -50,6 +78,12 @@ rdma_rxe_clean() modprobe -r rdma_rxe } +IP_FAMILY=${IP_FAMILY:-ipv4} +if [ "$IP_FAMILY" != "ipv6" ] && [ "$IP_FAMILY" != "ipv4" ]; then + echo "Unknown ip family '$IP_FAMILY', only ipv4 or ipv6 is supported." >&2 + exit 1 +fi + operation=${1:-detect} command -v rdma >/dev/null || { @@ -62,9 +96,14 @@ if [ "$operation" == "setup" ] || [ "$operation" == "clean" ]; then echo "Root privilege is required to setup/clean a rdma/rxe link" >&2 exit 1 } - rdma_rxe_"$operation" + if [ "$operation" == "setup" ]; then + rdma_rxe_setup ipv4 + rdma_rxe_setup ipv6 + else + rdma_rxe_clean + fi elif [ "$operation" == "detect" ]; then - rdma_rxe_detect + rdma_rxe_detect "$IP_FAMILY" else echo "Usage: $0 [setup | detect | clean]" fi diff --git a/scripts/rust/rustc_args.py b/scripts/rust/rustc_args.py index 2633157..63b0748 100644 --- a/scripts/rust/rustc_args.py +++ b/scripts/rust/rustc_args.py @@ -104,10 +104,7 @@ def generate_lint_flags(cargo_toml: CargoTOML, strict_lints: bool) -> Iterable[s else: raise Exception(f"invalid level {level} for {prefix}{lint}") - # This may change if QEMU ever invokes clippy-driver or rustdoc by - # hand. For now, check the syntax but do not add non-rustc lints to - # the command line. - if k == "rust" and not (strict_lints and lint in STRICT_LINTS): + if not (strict_lints and lint in STRICT_LINTS): lint_list.append(LintFlag(flags=[flag, prefix + lint], priority=priority)) if strict_lints: diff --git a/scripts/tracetool/__init__.py b/scripts/tracetool/__init__.py index bc03238..6dfcbf7 100644 --- a/scripts/tracetool/__init__.py +++ b/scripts/tracetool/__init__.py @@ -12,12 +12,14 @@ __maintainer__ = "Stefan Hajnoczi" __email__ = "stefanha@redhat.com" +import os import re import sys import weakref +from pathlib import PurePath -import tracetool.format import tracetool.backend +import tracetool.format def error_write(*lines): @@ -36,7 +38,7 @@ out_fobj = sys.stdout def out_open(filename): global out_filename, out_fobj - out_filename = filename + out_filename = posix_relpath(filename) out_fobj = open(filename, 'wt') def out(*lines, **kwargs): @@ -308,7 +310,7 @@ class Event(object): fmt = [fmt_trans, fmt] args = Arguments.build(groups["args"]) - return Event(name, props, fmt, args, lineno, filename) + return Event(name, props, fmt, args, lineno, posix_relpath(filename)) def __repr__(self): """Evaluable string representation for this object.""" @@ -447,3 +449,10 @@ def generate(events, group, format, backends, tracetool.backend.dtrace.PROBEPREFIX = probe_prefix tracetool.format.generate(events, format, backend, group) + +def posix_relpath(path, start=None): + try: + path = os.path.relpath(path, start) + except ValueError: + pass + return PurePath(path).as_posix() diff --git a/scripts/tracetool/backend/ftrace.py b/scripts/tracetool/backend/ftrace.py index baed2ae..5fa30cc 100644 --- a/scripts/tracetool/backend/ftrace.py +++ b/scripts/tracetool/backend/ftrace.py @@ -12,8 +12,6 @@ __maintainer__ = "Stefan Hajnoczi" __email__ = "stefanha@redhat.com" -import os.path - from tracetool import out @@ -47,7 +45,7 @@ def generate_h(event, group): args=event.args, event_id="TRACE_" + event.name.upper(), event_lineno=event.lineno, - event_filename=os.path.relpath(event.filename), + event_filename=event.filename, fmt=event.fmt.rstrip("\n"), argnames=argnames) diff --git a/scripts/tracetool/backend/log.py b/scripts/tracetool/backend/log.py index de27b7e..17ba1cd 100644 --- a/scripts/tracetool/backend/log.py +++ b/scripts/tracetool/backend/log.py @@ -12,8 +12,6 @@ __maintainer__ = "Stefan Hajnoczi" __email__ = "stefanha@redhat.com" -import os.path - from tracetool import out @@ -55,7 +53,7 @@ def generate_h(event, group): ' }', cond=cond, event_lineno=event.lineno, - event_filename=os.path.relpath(event.filename), + event_filename=event.filename, name=event.name, fmt=event.fmt.rstrip("\n"), argnames=argnames) diff --git a/scripts/tracetool/backend/simple.py b/scripts/tracetool/backend/simple.py index a74d61f..2688d4b 100644 --- a/scripts/tracetool/backend/simple.py +++ b/scripts/tracetool/backend/simple.py @@ -36,8 +36,17 @@ def generate_h_begin(events, group): def generate_h(event, group): - out(' _simple_%(api)s(%(args)s);', + event_id = 'TRACE_' + event.name.upper() + if "vcpu" in event.properties: + # already checked on the generic format code + cond = "true" + else: + cond = "trace_event_get_state(%s)" % event_id + out(' if (%(cond)s) {', + ' _simple_%(api)s(%(args)s);', + ' }', api=event.api(), + cond=cond, args=", ".join(event.args.names())) @@ -72,22 +81,10 @@ def generate_c(event, group): if len(event.args) == 0: sizestr = '0' - event_id = 'TRACE_' + event.name.upper() - if "vcpu" in event.properties: - # already checked on the generic format code - cond = "true" - else: - cond = "trace_event_get_state(%s)" % event_id - out('', - ' if (!%(cond)s) {', - ' return;', - ' }', - '', ' if (trace_record_start(&rec, %(event_obj)s.id, %(size_str)s)) {', ' return; /* Trace Buffer Full, Event Dropped ! */', ' }', - cond=cond, event_obj=event.api(event.QEMU_EVENT), size_str=sizestr) diff --git a/scripts/tracetool/backend/syslog.py b/scripts/tracetool/backend/syslog.py index 012970f..5a3a00f 100644 --- a/scripts/tracetool/backend/syslog.py +++ b/scripts/tracetool/backend/syslog.py @@ -12,8 +12,6 @@ __maintainer__ = "Stefan Hajnoczi" __email__ = "stefanha@redhat.com" -import os.path - from tracetool import out @@ -43,7 +41,7 @@ def generate_h(event, group): ' }', cond=cond, event_lineno=event.lineno, - event_filename=os.path.relpath(event.filename), + event_filename=event.filename, name=event.name, fmt=event.fmt.rstrip("\n"), argnames=argnames) diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py index 25aca83..2335e25 100755 --- a/scripts/vmstate-static-checker.py +++ b/scripts/vmstate-static-checker.py @@ -91,6 +91,7 @@ def check_fields_match(name, s_field, d_field): 'mem_win_size', 'mig_mem_win_size', 'io_win_addr', 'mig_io_win_addr', 'io_win_size', 'mig_io_win_size'], + 'hpet': ['num_timers', 'num_timers_save'], } if not name in changed_names: diff --git a/stubs/meson.build b/stubs/meson.build index 63392f5..cef046e 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -77,6 +77,14 @@ if have_system stub_ss.add(files('target-monitor-defs.c')) stub_ss.add(files('win32-kbd-hook.c')) stub_ss.add(files('xen-hw-stub.c')) + stub_ss.add(files('monitor-arm-gic.c')) + stub_ss.add(files('monitor-i386-rtc.c')) + stub_ss.add(files('monitor-i386-sev.c')) + stub_ss.add(files('monitor-i386-sgx.c')) + stub_ss.add(files('monitor-i386-xen.c')) + stub_ss.add(files('monitor-cpu.c')) + stub_ss.add(files('monitor-cpu-s390x.c')) + stub_ss.add(files('monitor-cpu-s390x-kvm.c')) endif if have_system or have_user diff --git a/stubs/monitor-arm-gic.c b/stubs/monitor-arm-gic.c new file mode 100644 index 0000000..b342924 --- /dev/null +++ b/stubs/monitor-arm-gic.c @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-misc-arm.h" + + +GICCapabilityList *qmp_query_gic_capabilities(Error **errp) +{ + error_setg(errp, "GIC hardware is not available on this target"); + return NULL; +} diff --git a/stubs/monitor-cpu-s390x-kvm.c b/stubs/monitor-cpu-s390x-kvm.c new file mode 100644 index 0000000..8683dd2 --- /dev/null +++ b/stubs/monitor-cpu-s390x-kvm.c @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-machine-s390x.h" + +void qmp_set_cpu_topology(uint16_t core, + bool has_socket, uint16_t socket, + bool has_book, uint16_t book, + bool has_drawer, uint16_t drawer, + bool has_entitlement, S390CpuEntitlement entitlement, + bool has_dedicated, bool dedicated, + Error **errp) +{ + error_setg(errp, "CPU topology change is not supported on this target"); +} + +CpuPolarizationInfo *qmp_query_s390x_cpu_polarization(Error **errp) +{ + error_setg(errp, "CPU polarization is not supported on this target"); + return NULL; +} diff --git a/stubs/monitor-cpu-s390x.c b/stubs/monitor-cpu-s390x.c new file mode 100644 index 0000000..71e7944 --- /dev/null +++ b/stubs/monitor-cpu-s390x.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-machine.h" + +CpuModelCompareInfo * +qmp_query_cpu_model_comparison(CpuModelInfo *infoa, + CpuModelInfo *infob, + Error **errp) +{ + error_setg(errp, "CPU model comparison is not supported on this target"); + return NULL; +} + +CpuModelBaselineInfo * +qmp_query_cpu_model_baseline(CpuModelInfo *infoa, + CpuModelInfo *infob, + Error **errp) +{ + error_setg(errp, "CPU model baseline is not supported on this target"); + return NULL; +} diff --git a/stubs/monitor-cpu.c b/stubs/monitor-cpu.c new file mode 100644 index 0000000..a8c7ee8 --- /dev/null +++ b/stubs/monitor-cpu.c @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-machine.h" + +CpuModelExpansionInfo * +qmp_query_cpu_model_expansion(CpuModelExpansionType type, + CpuModelInfo *model, + Error **errp) +{ + error_setg(errp, "CPU model expansion is not supported on this target"); + return NULL; +} + +CpuDefinitionInfoList * +qmp_query_cpu_definitions(Error **errp) +{ + error_setg(errp, "CPU model definitions are not supported on this target"); + return NULL; +} diff --git a/stubs/monitor-i386-rtc.c b/stubs/monitor-i386-rtc.c new file mode 100644 index 0000000..8420d7c --- /dev/null +++ b/stubs/monitor-i386-rtc.c @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-misc-i386.h" + +void qmp_rtc_reset_reinjection(Error **errp) +{ + error_setg(errp, + "RTC interrupt reinjection backlog reset is not available for" + "this machine"); +} diff --git a/stubs/monitor-i386-sev.c b/stubs/monitor-i386-sev.c new file mode 100644 index 0000000..d4f0241 --- /dev/null +++ b/stubs/monitor-i386-sev.c @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-misc-i386.h" + +SevInfo *qmp_query_sev(Error **errp) +{ + error_setg(errp, "SEV is not available in this QEMU"); + return NULL; +} + +SevLaunchMeasureInfo *qmp_query_sev_launch_measure(Error **errp) +{ + error_setg(errp, "SEV is not available in this QEMU"); + return NULL; +} + +SevCapability *qmp_query_sev_capabilities(Error **errp) +{ + error_setg(errp, "SEV is not available in this QEMU"); + return NULL; +} + +void qmp_sev_inject_launch_secret(const char *packet_header, const char *secret, + bool has_gpa, uint64_t gpa, Error **errp) +{ + error_setg(errp, "SEV is not available in this QEMU"); +} + +SevAttestationReport *qmp_query_sev_attestation_report(const char *mnonce, + Error **errp) +{ + error_setg(errp, "SEV is not available in this QEMU"); + return NULL; +} diff --git a/stubs/monitor-i386-sgx.c b/stubs/monitor-i386-sgx.c new file mode 100644 index 0000000..00e081d --- /dev/null +++ b/stubs/monitor-i386-sgx.c @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-misc-i386.h" + +SgxInfo *qmp_query_sgx(Error **errp) +{ + error_setg(errp, "SGX support is not compiled in"); + return NULL; +} + +SgxInfo *qmp_query_sgx_capabilities(Error **errp) +{ + error_setg(errp, "SGX support is not compiled in"); + return NULL; +} diff --git a/stubs/monitor-i386-xen.c b/stubs/monitor-i386-xen.c new file mode 100644 index 0000000..95b826f --- /dev/null +++ b/stubs/monitor-i386-xen.c @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-misc-i386.h" + +EvtchnInfoList *qmp_xen_event_list(Error **errp) +{ + error_setg(errp, "Xen event channel emulation not enabled"); + return NULL; +} + +void qmp_xen_event_inject(uint32_t port, Error **errp) +{ + error_setg(errp, "Xen event channel emulation not enabled"); +} diff --git a/subprojects/.gitignore b/subprojects/.gitignore index d12d346..f428193 100644 --- a/subprojects/.gitignore +++ b/subprojects/.gitignore @@ -6,10 +6,12 @@ /keycodemapdb /libvfio-user /slirp +/anyhow-1.0.98 /arbitrary-int-1.2.7 /bilge-0.2.0 /bilge-impl-0.2.0 /either-1.12.0 +/foreign-0.3.1 /itertools-0.11.0 /libc-0.2.162 /proc-macro-error-1.0.4 diff --git a/subprojects/anyhow-1-rs.wrap b/subprojects/anyhow-1-rs.wrap new file mode 100644 index 0000000..a69a364 --- /dev/null +++ b/subprojects/anyhow-1-rs.wrap @@ -0,0 +1,7 @@ +[wrap-file] +directory = anyhow-1.0.98 +source_url = https://crates.io/api/v1/crates/anyhow/1.0.98/download +source_filename = anyhow-1.0.98.tar.gz +source_hash = e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487 +#method = cargo +patch_directory = anyhow-1-rs diff --git a/subprojects/foreign-0.3-rs.wrap b/subprojects/foreign-0.3-rs.wrap new file mode 100644 index 0000000..0d218ec --- /dev/null +++ b/subprojects/foreign-0.3-rs.wrap @@ -0,0 +1,7 @@ +[wrap-file] +directory = foreign-0.3.1 +source_url = https://crates.io/api/v1/crates/foreign/0.3.1/download +source_filename = foreign-0.3.1.tar.gz +source_hash = 17ca1b5be8c9d320daf386f1809c7acc0cb09accbae795c2001953fa50585846 +#method = cargo +patch_directory = foreign-0.3-rs diff --git a/subprojects/packagefiles/anyhow-1-rs/meson.build b/subprojects/packagefiles/anyhow-1-rs/meson.build new file mode 100644 index 0000000..348bab9 --- /dev/null +++ b/subprojects/packagefiles/anyhow-1-rs/meson.build @@ -0,0 +1,33 @@ +project('anyhow-1-rs', 'rust', + meson_version: '>=1.5.0', + version: '1.0.98', + license: 'MIT OR Apache-2.0', + default_options: []) + +rustc = meson.get_compiler('rust') + +rust_args = ['--cap-lints', 'allow'] +rust_args += ['--cfg', 'feature="std"'] +if rustc.version().version_compare('<1.65.0') + error('rustc version ' + rustc.version() + ' is unsupported. Please upgrade to at least 1.65.0') +endif +rust_args += [ '--cfg', 'std_backtrace' ] # >= 1.65.0 +if rustc.version().version_compare('<1.81.0') + rust_args += [ '--cfg', 'anyhow_no_core_error' ] +endif + +_anyhow_rs = static_library( + 'anyhow', + files('src/lib.rs'), + gnu_symbol_visibility: 'hidden', + override_options: ['rust_std=2018', 'build.rust_std=2018'], + rust_abi: 'rust', + rust_args: rust_args, + dependencies: [], +) + +anyhow_dep = declare_dependency( + link_with: _anyhow_rs, +) + +meson.override_dependency('anyhow-1-rs', anyhow_dep) diff --git a/subprojects/packagefiles/foreign-0.3-rs/meson.build b/subprojects/packagefiles/foreign-0.3-rs/meson.build new file mode 100644 index 0000000..0901c02 --- /dev/null +++ b/subprojects/packagefiles/foreign-0.3-rs/meson.build @@ -0,0 +1,26 @@ +project('foreign-0.3-rs', 'rust', + meson_version: '>=1.5.0', + version: '0.2.0', + license: 'MIT OR Apache-2.0', + default_options: []) + +subproject('libc-0.2-rs', required: true) +libc_rs = dependency('libc-0.2-rs') + +_foreign_rs = static_library( + 'foreign', + files('src/lib.rs'), + gnu_symbol_visibility: 'hidden', + override_options: ['rust_std=2021', 'build.rust_std=2021'], + rust_abi: 'rust', + rust_args: [ + '--cap-lints', 'allow', + ], + dependencies: [libc_rs], +) + +foreign_dep = declare_dependency( + link_with: _foreign_rs, +) + +meson.override_dependency('foreign-0.3-rs', foreign_dep) diff --git a/system/main.c b/system/main.c index 1c02206..b8f7157 100644 --- a/system/main.c +++ b/system/main.c @@ -69,8 +69,21 @@ int (*qemu_main)(void) = os_darwin_cfrunloop_main; int main(int argc, char **argv) { qemu_init(argc, argv); + + /* + * qemu_init acquires the BQL and replay mutex lock. BQL is acquired when + * initializing cpus, to block associated threads until initialization is + * complete. Replay_mutex lock is acquired on initialization, because it + * must be held when configuring icount_mode. + * + * On MacOS, qemu main event loop runs in a background thread, as main + * thread must be reserved for UI. Thus, we need to transfer lock ownership, + * and the simplest way to do that is to release them, and reacquire them + * from qemu_default_main. + */ bql_unlock(); replay_mutex_unlock(); + if (qemu_main) { QemuThread main_loop_thread; qemu_thread_create(&main_loop_thread, "qemu_main", diff --git a/system/memory.c b/system/memory.c index 63b983e..306e9ff 100644 --- a/system/memory.c +++ b/system/memory.c @@ -2174,18 +2174,14 @@ void ram_discard_manager_unregister_listener(RamDiscardManager *rdm, } /* Called with rcu_read_lock held. */ -bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, - ram_addr_t *ram_addr, bool *read_only, - bool *mr_has_discard_manager, Error **errp) +MemoryRegion *memory_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p, + Error **errp) { MemoryRegion *mr; hwaddr xlat; hwaddr len = iotlb->addr_mask + 1; bool writable = iotlb->perm & IOMMU_WO; - if (mr_has_discard_manager) { - *mr_has_discard_manager = false; - } /* * The IOMMU TLB entry we have just covers translation through * this IOMMU to its immediate target. We need to translate @@ -2195,7 +2191,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, &xlat, &len, writable, MEMTXATTRS_UNSPECIFIED); if (!memory_region_is_ram(mr)) { error_setg(errp, "iommu map to non memory area %" HWADDR_PRIx "", xlat); - return false; + return NULL; } else if (memory_region_has_ram_discard_manager(mr)) { RamDiscardManager *rdm = memory_region_get_ram_discard_manager(mr); MemoryRegionSection tmp = { @@ -2203,9 +2199,6 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, .offset_within_region = xlat, .size = int128_make64(len), }; - if (mr_has_discard_manager) { - *mr_has_discard_manager = true; - } /* * Malicious VMs can map memory into the IOMMU, which is expected * to remain discarded. vfio will pin all pages, populating memory. @@ -2216,7 +2209,7 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, error_setg(errp, "iommu map to discarded memory (e.g., unplugged" " via virtio-mem): %" HWADDR_PRIx "", iotlb->translated_addr); - return false; + return NULL; } } @@ -2226,22 +2219,11 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, */ if (len & iotlb->addr_mask) { error_setg(errp, "iommu has granularity incompatible with target AS"); - return false; - } - - if (vaddr) { - *vaddr = memory_region_get_ram_ptr(mr) + xlat; - } - - if (ram_addr) { - *ram_addr = memory_region_get_ram_addr(mr) + xlat; - } - - if (read_only) { - *read_only = !writable || mr->readonly; + return NULL; } - return true; + *xlat_p = xlat; + return mr; } void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client) diff --git a/system/meson.build b/system/meson.build index c2f0082..7514bf3 100644 --- a/system/meson.build +++ b/system/meson.build @@ -7,7 +7,7 @@ system_ss.add(files( 'vl.c', ), sdl, libpmem, libdaxctl) -libsystem_ss.add(files( +system_ss.add(files( 'balloon.c', 'bootdevice.c', 'cpus.c', diff --git a/system/runstate.c b/system/runstate.c index de74d96..38900c9 100644 --- a/system/runstate.c +++ b/system/runstate.c @@ -590,6 +590,58 @@ static void qemu_system_wakeup(void) } } +static char *tdx_parse_panic_message(char *message) +{ + bool printable = false; + char *buf = NULL; + int len = 0, i; + + /* + * Although message is defined as a json string, we shouldn't + * unconditionally treat it as is because the guest generated it and + * it's not necessarily trustable. + */ + if (message) { + /* The caller guarantees the NULL-terminated string. */ + len = strlen(message); + + printable = len > 0; + for (i = 0; i < len; i++) { + if (!(0x20 <= message[i] && message[i] <= 0x7e)) { + printable = false; + break; + } + } + } + + if (len == 0) { + buf = g_malloc(1); + buf[0] = '\0'; + } else { + if (!printable) { + /* 3 = length of "%02x " */ + buf = g_malloc(len * 3); + for (i = 0; i < len; i++) { + if (message[i] == '\0') { + break; + } else { + sprintf(buf + 3 * i, "%02x ", message[i]); + } + } + if (i > 0) { + /* replace the last ' '(space) to NULL */ + buf[i * 3 - 1] = '\0'; + } else { + buf[0] = '\0'; + } + } else { + buf = g_strdup(message); + } + } + + return buf; +} + void qemu_system_guest_panicked(GuestPanicInformation *info) { qemu_log_mask(LOG_GUEST_ERROR, "Guest crashed"); @@ -631,7 +683,20 @@ void qemu_system_guest_panicked(GuestPanicInformation *info) S390CrashReason_str(info->u.s390.reason), info->u.s390.psw_mask, info->u.s390.psw_addr); + } else if (info->type == GUEST_PANIC_INFORMATION_TYPE_TDX) { + char *message = tdx_parse_panic_message(info->u.tdx.message); + qemu_log_mask(LOG_GUEST_ERROR, + "\nTDX guest reports fatal error." + " error code: 0x%" PRIx32 " error message:\"%s\"\n", + info->u.tdx.error_code, message); + g_free(message); + if (info->u.tdx.gpa != -1ull) { + qemu_log_mask(LOG_GUEST_ERROR, "Additional error information " + "can be found at gpa page: 0x%" PRIx64 "\n", + info->u.tdx.gpa); + } } + qapi_free_GuestPanicInformation(info); } } diff --git a/system/vl.c b/system/vl.c index fd402b8..3b7057e 100644 --- a/system/vl.c +++ b/system/vl.c @@ -1192,10 +1192,7 @@ static int parse_fw_cfg(void *opaque, QemuOpts *opts, Error **errp) return -1; } } - /* For legacy, keep user files in a specific global order. */ - fw_cfg_set_order_override(fw_cfg, FW_CFG_ORDER_OVERRIDE_USER); fw_cfg_add_file(fw_cfg, name, buf, size); - fw_cfg_reset_order_override(fw_cfg); return 0; } @@ -2745,7 +2742,6 @@ static void qemu_create_cli_devices(void) } /* init generic devices */ - rom_set_order_override(FW_CFG_ORDER_OVERRIDE_DEVICE); qemu_opts_foreach(qemu_find_opts("device"), device_init_func, NULL, &error_fatal); QTAILQ_FOREACH(opt, &device_opts, next) { @@ -2756,7 +2752,6 @@ static void qemu_create_cli_devices(void) assert(ret_data == NULL); /* error_fatal aborts */ loc_pop(&opt->loc); } - rom_reset_order_override(); } static bool qemu_machine_creation_done(Error **errp) diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c index 890b84c..2082db4 100644 --- a/target/alpha/cpu.c +++ b/target/alpha/cpu.c @@ -261,6 +261,7 @@ static const TCGCPUOps alpha_tcg_ops = { .record_sigbus = alpha_cpu_record_sigbus, #else .tlb_fill = alpha_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_notreached, .cpu_exec_interrupt = alpha_cpu_exec_interrupt, .cpu_exec_halt = alpha_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/arm/arm-qmp-cmds.c b/target/arm/arm-qmp-cmds.c index a1a944a..cefd235 100644 --- a/target/arm/arm-qmp-cmds.c +++ b/target/arm/arm-qmp-cmds.c @@ -26,10 +26,11 @@ #include "qapi/error.h" #include "qapi/visitor.h" #include "qapi/qobject-input-visitor.h" -#include "qapi/qapi-commands-machine-target.h" -#include "qapi/qapi-commands-misc-target.h" +#include "qapi/qapi-commands-machine.h" +#include "qapi/qapi-commands-misc-arm.h" #include "qobject/qdict.h" #include "qom/qom-qobject.h" +#include "cpu.h" static GICCapability *gic_cap_new(int version) { diff --git a/target/arm/cpregs.h b/target/arm/cpregs.h index 2183de8..c1a7ae3 100644 --- a/target/arm/cpregs.h +++ b/target/arm/cpregs.h @@ -23,6 +23,7 @@ #include "hw/registerfields.h" #include "target/arm/kvm-consts.h" +#include "cpu.h" /* * ARMCPRegInfo type field bits: diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h index 525e4ce..4452e7c 100644 --- a/target/arm/cpu-features.h +++ b/target/arm/cpu-features.h @@ -22,6 +22,7 @@ #include "hw/registerfields.h" #include "qemu/host-utils.h" +#include "cpu.h" /* * Naming convention for isar_feature functions: diff --git a/target/arm/cpu.c b/target/arm/cpu.c index ca5ed78..e025e24 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -2703,6 +2703,29 @@ static const struct SysemuCPUOps arm_sysemu_ops = { #endif #ifdef CONFIG_TCG +#ifndef CONFIG_USER_ONLY +static vaddr aprofile_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + /* + * The Stage2 and Phys indexes are only used for ptw on arm32, + * and all pte's are aligned, so we never produce a wrap for these. + * Double check that we're not truncating a 40-bit physical address. + */ + assert((unsigned)mmu_idx < (ARMMMUIdx_Stage2_S & ARM_MMU_IDX_COREIDX_MASK)); + + if (!is_a64(cpu_env(cs))) { + return (uint32_t)result; + } + + /* + * TODO: For FEAT_CPA2, decide how to we want to resolve + * Unpredictable_CPACHECK in AddressIncrement. + */ + return result; +} +#endif /* !CONFIG_USER_ONLY */ + static const TCGCPUOps arm_tcg_ops = { .mttcg_supported = true, /* ARM processors have a weak memory model */ @@ -2722,6 +2745,7 @@ static const TCGCPUOps arm_tcg_ops = { .untagged_addr = aarch64_untagged_addr, #else .tlb_fill_align = arm_cpu_tlb_fill_align, + .pointer_wrap = aprofile_pointer_wrap, .cpu_exec_interrupt = arm_cpu_exec_interrupt, .cpu_exec_halt = arm_cpu_exec_halt, .cpu_exec_reset = cpu_reset, diff --git a/target/arm/helper.c b/target/arm/helper.c index 7631210..889d308 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -1904,7 +1904,7 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmcnten), .accessfn = pmreg_access, .fgt = FGT_PMCNTEN, - .writefn = pmcntenclr_write, + .writefn = pmcntenclr_write, .raw_writefn = raw_write, .type = ARM_CP_ALIAS | ARM_CP_IO }, { .name = "PMCNTENCLR_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 9, .crm = 12, .opc2 = 2, @@ -1912,7 +1912,7 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { .fgt = FGT_PMCNTEN, .type = ARM_CP_ALIAS | ARM_CP_IO, .fieldoffset = offsetof(CPUARMState, cp15.c9_pmcnten), - .writefn = pmcntenclr_write }, + .writefn = pmcntenclr_write, .raw_writefn = raw_write }, { .name = "PMOVSR", .cp = 15, .crn = 9, .crm = 12, .opc1 = 0, .opc2 = 3, .access = PL0_RW, .type = ARM_CP_IO, .fieldoffset = offsetoflow32(CPUARMState, cp15.c9_pmovsr), @@ -2029,16 +2029,16 @@ static const ARMCPRegInfo v7_cp_reginfo[] = { { .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2, .access = PL1_RW, .accessfn = access_tpm, .fgt = FGT_PMINTEN, - .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW, + .type = ARM_CP_ALIAS | ARM_CP_IO, .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), - .writefn = pmintenclr_write, }, + .writefn = pmintenclr_write, .raw_writefn = raw_write }, { .name = "PMINTENCLR_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 2, .access = PL1_RW, .accessfn = access_tpm, .fgt = FGT_PMINTEN, - .type = ARM_CP_ALIAS | ARM_CP_IO | ARM_CP_NO_RAW, + .type = ARM_CP_ALIAS | ARM_CP_IO, .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten), - .writefn = pmintenclr_write }, + .writefn = pmintenclr_write, .raw_writefn = raw_write }, { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH, .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 1, .opc2 = 0, .access = PL1_R, diff --git a/target/arm/hvf-stub.c b/target/arm/hvf-stub.c new file mode 100644 index 0000000..ff13726 --- /dev/null +++ b/target/arm/hvf-stub.c @@ -0,0 +1,20 @@ +/* + * QEMU Hypervisor.framework (HVF) stubs for ARM + * + * Copyright (c) Linaro + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hvf_arm.h" + +uint32_t hvf_arm_get_default_ipa_bit_size(void) +{ + g_assert_not_reached(); +} + +uint32_t hvf_arm_get_max_ipa_bit_size(void) +{ + g_assert_not_reached(); +} diff --git a/target/arm/hvf_arm.h b/target/arm/hvf_arm.h index 26c717b..ea82f26 100644 --- a/target/arm/hvf_arm.h +++ b/target/arm/hvf_arm.h @@ -11,7 +11,7 @@ #ifndef QEMU_HVF_ARM_H #define QEMU_HVF_ARM_H -#include "cpu.h" +#include "target/arm/cpu-qom.h" /** * hvf_arm_init_debug() - initialize guest debug capabilities @@ -22,23 +22,7 @@ void hvf_arm_init_debug(void); void hvf_arm_set_cpu_features_from_host(ARMCPU *cpu); -#ifdef CONFIG_HVF - uint32_t hvf_arm_get_default_ipa_bit_size(void); uint32_t hvf_arm_get_max_ipa_bit_size(void); -#else - -static inline uint32_t hvf_arm_get_default_ipa_bit_size(void) -{ - return 0; -} - -static inline uint32_t hvf_arm_get_max_ipa_bit_size(void) -{ - return 0; -} - -#endif - #endif diff --git a/target/arm/kvm.c b/target/arm/kvm.c index a2791aa..74fda8b 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -1846,6 +1846,11 @@ static int kvm_arm_sve_set_vls(ARMCPU *cpu) #define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { int ret; diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h index c4178d1..7dc83ca 100644 --- a/target/arm/kvm_arm.h +++ b/target/arm/kvm_arm.h @@ -12,6 +12,7 @@ #define QEMU_KVM_ARM_H #include "system/kvm.h" +#include "target/arm/cpu-qom.h" #define KVM_ARM_VGIC_V2 (1 << 0) #define KVM_ARM_VGIC_V3 (1 << 1) diff --git a/target/arm/meson.build b/target/arm/meson.build index b404fa5..7aa81e3 100644 --- a/target/arm/meson.build +++ b/target/arm/meson.build @@ -3,7 +3,6 @@ arm_common_ss = ss.source_set() arm_ss.add(files( 'gdbstub.c', )) -arm_ss.add(zlib) arm_ss.add(when: 'TARGET_AARCH64', if_true: files( 'cpu64.c', @@ -28,10 +27,11 @@ arm_user_ss.add(files( 'vfp_fpscr.c', )) -arm_common_system_ss.add(files('cpu.c'), capstone) +arm_common_system_ss.add(files('cpu.c')) arm_common_system_ss.add(when: 'TARGET_AARCH64', if_false: files( 'cpu32-stubs.c')) arm_common_system_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c')) +arm_common_system_ss.add(when: 'CONFIG_HVF', if_false: files('hvf-stub.c')) arm_common_system_ss.add(files( 'arch_dump.c', 'arm-powerctl.c', @@ -48,7 +48,7 @@ subdir('hvf') if 'CONFIG_TCG' in config_all_accel subdir('tcg') else - arm_ss.add(files('tcg-stubs.c')) + arm_common_system_ss.add(files('tcg-stubs.c')) endif target_arch += {'arm': arm_ss} diff --git a/target/arm/tcg/cpu-v7m.c b/target/arm/tcg/cpu-v7m.c index 95b23d9..8e1a083 100644 --- a/target/arm/tcg/cpu-v7m.c +++ b/target/arm/tcg/cpu-v7m.c @@ -249,6 +249,7 @@ static const TCGCPUOps arm_v7m_tcg_ops = { .record_sigbus = arm_cpu_record_sigbus, #else .tlb_fill_align = arm_cpu_tlb_fill_align, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = arm_v7m_cpu_exec_interrupt, .cpu_exec_halt = arm_cpu_exec_halt, .cpu_exec_reset = cpu_reset, diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build index 2d1502b..c59f0f0 100644 --- a/target/arm/tcg/meson.build +++ b/target/arm/tcg/meson.build @@ -56,6 +56,8 @@ arm_system_ss.add(files( arm_system_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('cpu-v7m.c')) arm_user_ss.add(when: 'TARGET_AARCH64', if_false: files('cpu-v7m.c')) +arm_common_ss.add(zlib) + arm_common_ss.add(files( 'arith_helper.c', 'crypto_helper.c', diff --git a/target/avr/cpu.c b/target/avr/cpu.c index 2502415..6995de6 100644 --- a/target/avr/cpu.c +++ b/target/avr/cpu.c @@ -250,6 +250,12 @@ static const TCGCPUOps avr_tcg_ops = { .cpu_exec_reset = cpu_reset, .tlb_fill = avr_cpu_tlb_fill, .do_interrupt = avr_cpu_do_interrupt, + /* + * TODO: code and data wrapping are different, but for the most part + * AVR only references bytes or aligned code fetches. But we use + * non-aligned MO_16 accesses for stack push/pop. + */ + .pointer_wrap = cpu_pointer_wrap_uint32, }; static void avr_cpu_class_init(ObjectClass *oc, const void *data) diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c index 6465181..2477772 100644 --- a/target/hppa/cpu.c +++ b/target/hppa/cpu.c @@ -269,6 +269,7 @@ static const TCGCPUOps hppa_tcg_ops = { #ifndef CONFIG_USER_ONLY .tlb_fill_align = hppa_cpu_tlb_fill_align, + .pointer_wrap = cpu_pointer_wrap_notreached, .cpu_exec_interrupt = hppa_cpu_exec_interrupt, .cpu_exec_halt = hppa_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c index ddd0a34..4535320 100644 --- a/target/hppa/fpu_helper.c +++ b/target/hppa/fpu_helper.c @@ -94,7 +94,8 @@ static void update_fr0_op(CPUHPPAState *env, uintptr_t ra) { uint32_t soft_exp = get_float_exception_flags(&env->fp_status); uint32_t hard_exp = 0; - uint32_t shadow = env->fr0_shadow; + uint32_t shadow = env->fr0_shadow & 0x3ffffff; + uint32_t fr1 = 0; if (likely(soft_exp == 0)) { env->fr[0] = (uint64_t)shadow << 32; @@ -107,9 +108,22 @@ static void update_fr0_op(CPUHPPAState *env, uintptr_t ra) hard_exp |= CONVERT_BIT(soft_exp, float_flag_overflow, R_FPSR_ENA_O_MASK); hard_exp |= CONVERT_BIT(soft_exp, float_flag_divbyzero, R_FPSR_ENA_Z_MASK); hard_exp |= CONVERT_BIT(soft_exp, float_flag_invalid, R_FPSR_ENA_V_MASK); - shadow |= hard_exp << (R_FPSR_FLAGS_SHIFT - R_FPSR_ENABLES_SHIFT); + if (hard_exp & shadow) { + shadow = FIELD_DP32(shadow, FPSR, T, 1); + /* fill exception register #1, which is lower 32-bits of fr[0] */ +#if !defined(CONFIG_USER_ONLY) + if (hard_exp & (R_FPSR_ENA_O_MASK | R_FPSR_ENA_U_MASK)) { + /* over- and underflow both set overflow flag only */ + fr1 = FIELD_DP32(fr1, FPSR, C, 1); + fr1 = FIELD_DP32(fr1, FPSR, FLG_O, 1); + } else +#endif + { + fr1 |= hard_exp << (R_FPSR_FLAGS_SHIFT - R_FPSR_ENABLES_SHIFT); + } + } env->fr0_shadow = shadow; - env->fr[0] = (uint64_t)shadow << 32; + env->fr[0] = (uint64_t)shadow << 32 | fr1; if (hard_exp & shadow) { hppa_dynamic_excp(env, EXCP_ASSIST, ra); diff --git a/target/hppa/int_helper.c b/target/hppa/int_helper.c index 7d48643..191ae19 100644 --- a/target/hppa/int_helper.c +++ b/target/hppa/int_helper.c @@ -177,6 +177,10 @@ void hppa_cpu_do_interrupt(CPUState *cs) } } env->cr[CR_IIR] = ldl_phys(cs->as, paddr); + if (i == EXCP_ASSIST) { + /* stuff insn code into bits of FP exception register #1 */ + env->fr[0] |= (env->cr[CR_IIR] & 0x03ffffff); + } } break; diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h index 164be76..48b88db 100644 --- a/target/i386/confidential-guest.h +++ b/target/i386/confidential-guest.h @@ -39,8 +39,10 @@ struct X86ConfidentialGuestClass { /* <public> */ int (*kvm_type)(X86ConfidentialGuest *cg); - uint32_t (*mask_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, - int reg, uint32_t value); + void (*cpu_instance_init)(X86ConfidentialGuest *cg, CPUState *cpu); + uint32_t (*adjust_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, + uint32_t index, int reg, uint32_t value); + int (*check_features)(X86ConfidentialGuest *cg, CPUState *cs); }; /** @@ -59,25 +61,47 @@ static inline int x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg) } } +static inline void x86_confidential_guest_cpu_instance_init(X86ConfidentialGuest *cg, + CPUState *cpu) +{ + X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); + + if (klass->cpu_instance_init) { + klass->cpu_instance_init(cg, cpu); + } +} + /** - * x86_confidential_guest_mask_cpuid_features: + * x86_confidential_guest_adjust_cpuid_features: * - * Removes unsupported features from a confidential guest's CPUID values, returns - * the value with the bits removed. The bits removed should be those that KVM - * provides independent of host-supported CPUID features, but are not supported by - * the confidential computing firmware. + * Adjust the supported features from a confidential guest's CPUID values, + * returns the adjusted value. There are bits being removed that are not + * supported by the confidential computing firmware or bits being added that + * are forcibly exposed to guest by the confidential computing firmware. */ -static inline int x86_confidential_guest_mask_cpuid_features(X86ConfidentialGuest *cg, +static inline int x86_confidential_guest_adjust_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, int reg, uint32_t value) { X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); - if (klass->mask_cpuid_features) { - return klass->mask_cpuid_features(cg, feature, index, reg, value); + if (klass->adjust_cpuid_features) { + return klass->adjust_cpuid_features(cg, feature, index, reg, value); } else { return value; } } +static inline int x86_confidential_guest_check_features(X86ConfidentialGuest *cg, + CPUState *cs) +{ + X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); + + if (klass->check_features) { + return klass->check_features(cg, cs); + } + + return 0; +} + #endif diff --git a/target/i386/cpu-system.c b/target/i386/cpu-system.c index 55f192e..b1494aa 100644 --- a/target/i386/cpu-system.c +++ b/target/i386/cpu-system.c @@ -24,7 +24,7 @@ #include "qobject/qdict.h" #include "qapi/qobject-input-visitor.h" #include "qom/qom-qobject.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qapi/qapi-commands-machine.h" #include "cpu-internal.h" diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 9689f63..0d35e95 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -37,8 +37,9 @@ #include "hw/i386/topology.h" #include "exec/watchpoint.h" #ifndef CONFIG_USER_ONLY +#include "confidential-guest.h" #include "system/reset.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qapi/qapi-commands-machine.h" #include "system/address-spaces.h" #include "hw/boards.h" #include "hw/i386/sgx-epc.h" @@ -899,6 +900,7 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #define TCG_7_1_EAX_FEATURES (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | \ CPUID_7_1_EAX_FSRC | CPUID_7_1_EAX_CMPCCXADD) +#define TCG_7_1_ECX_FEATURES 0 #define TCG_7_1_EDX_FEATURES 0 #define TCG_7_2_EDX_FEATURES 0 #define TCG_APM_FEATURES 0 @@ -1149,6 +1151,25 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .tcg_features = TCG_7_1_EAX_FEATURES, }, + [FEAT_7_1_ECX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + NULL, "msr-imm", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { + .eax = 7, + .needs_ecx = true, .ecx = 1, + .reg = R_ECX, + }, + .tcg_features = TCG_7_1_ECX_FEATURES, + }, [FEAT_7_1_EDX] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -1252,12 +1273,12 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { [FEAT_8000_0021_EAX] = { .type = CPUID_FEATURE_WORD, .feat_names = { - "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, + "no-nested-data-bp", "fs-gs-base-ns", "lfence-always-serializing", NULL, NULL, NULL, "null-sel-clr-base", NULL, "auto-ibrs", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + "prefetchi", NULL, NULL, NULL, "eraps", NULL, NULL, "sbpb", "ibpb-brtype", "srso-no", "srso-user-kernel-no", NULL, }, @@ -1677,14 +1698,21 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, }; -typedef struct FeatureMask { - FeatureWord index; - uint64_t mask; -} FeatureMask; +bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg) +{ + FeatureWordInfo *wi; + FeatureWord w; -typedef struct FeatureDep { - FeatureMask from, to; -} FeatureDep; + for (w = 0; w < FEATURE_WORDS; w++) { + wi = &feature_word_info[w]; + if (wi->type == CPUID_FEATURE_WORD && wi->cpuid.eax == feature && + (!wi->cpuid.needs_ecx || wi->cpuid.ecx == index) && + wi->cpuid.reg == reg) { + return true; + } + } + return false; +} static FeatureDep feature_dependencies[] = { { @@ -1796,10 +1824,6 @@ static FeatureDep feature_dependencies[] = { .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, }, { - .from = { FEAT_7_1_EAX, CPUID_7_1_EAX_WRMSRNS }, - .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, - }, - { .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_SGX }, .to = { FEAT_7_0_ECX, CPUID_7_0_ECX_SGX_LC }, }, @@ -1854,9 +1878,6 @@ static const X86RegisterInfo32 x86_reg_info_32[CPU_NB_REGS32] = { }; #undef REGISTER -/* CPUID feature bits available in XSS */ -#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK) - ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = { [XSTATE_FP_BIT] = { /* x87 FP state component is always enabled if XSAVE is supported */ @@ -2206,6 +2227,60 @@ static CPUCaches epyc_v4_cache_info = { }, }; +static CPUCaches epyc_v5_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 64 * KiB, + .line_size = 64, + .associativity = 4, + .partitions = 1, + .sets = 256, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 8 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 8192, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + static const CPUCaches epyc_rome_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -2314,6 +2389,60 @@ static const CPUCaches epyc_rome_v3_cache_info = { }, }; +static const CPUCaches epyc_rome_v5_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 16384, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + static const CPUCaches epyc_milan_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -2422,6 +2551,60 @@ static const CPUCaches epyc_milan_v2_cache_info = { }, }; +static const CPUCaches epyc_milan_v3_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + static const CPUCaches epyc_genoa_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, @@ -2476,6 +2659,114 @@ static const CPUCaches epyc_genoa_cache_info = { }, }; +static const CPUCaches epyc_genoa_v2_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 1 * MiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 2048, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + +static const CPUCaches epyc_turin_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 48 * KiB, + .line_size = 64, + .associativity = 12, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 1 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .share_level = CPU_TOPOLOGY_LEVEL_CORE, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .no_invd_sharing = true, + .complex_indexing = false, + .share_level = CPU_TOPOLOGY_LEVEL_DIE, + }, +}; + /* The following VMX features are not supported by KVM and are left out in the * CPU definitions: * @@ -5233,6 +5524,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, .cache_info = &epyc_v4_cache_info }, + { + .version = 5, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "model-id", + "AMD EPYC-v5 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_v5_cache_info + }, { /* end of list */ } } }, @@ -5371,6 +5681,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } }, }, + { + .version = 5, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "model-id", + "AMD EPYC-Rome-v5 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_rome_v5_cache_info + }, { /* end of list */ } } }, @@ -5446,6 +5775,25 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, .cache_info = &epyc_milan_v2_cache_info }, + { + .version = 3, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "model-id", + "AMD EPYC-Milan-v3 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_milan_v3_cache_info + }, { /* end of list */ } } }, @@ -5520,6 +5868,31 @@ static const X86CPUDefinition builtin_x86_defs[] = { .xlevel = 0x80000022, .model_id = "AMD EPYC-Genoa Processor", .cache_info = &epyc_genoa_cache_info, + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .props = (PropValue[]) { + { "overflow-recov", "on" }, + { "succor", "on" }, + { "lbrv", "on" }, + { "tsc-scale", "on" }, + { "vmcb-clean", "on" }, + { "flushbyasid", "on" }, + { "pause-filter", "on" }, + { "pfthreshold", "on" }, + { "v-vmsave-vmload", "on" }, + { "vgif", "on" }, + { "fs-gs-base-ns", "on" }, + { "perfmon-v2", "on" }, + { "model-id", + "AMD EPYC-Genoa-v2 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_genoa_v2_cache_info + }, + { /* end of list */ } + } }, { .name = "YongFeng", @@ -5657,6 +6030,89 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .name = "EPYC-Turin", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 26, + .model = 0, + .stepping = 0, + .features[FEAT_1_ECX] = + CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | + CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | + CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | + CPUID_EXT_PCID | CPUID_EXT_CX16 | CPUID_EXT_FMA | + CPUID_EXT_SSSE3 | CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | + CPUID_EXT_SSE3, + .features[FEAT_1_EDX] = + CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | + CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | + CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | + CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | + CPUID_VME | CPUID_FP87, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | + CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_AVX512F | + CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA | + CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | + CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | + CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 | + CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_MOVDIRI | + CPUID_7_0_ECX_MOVDIR64B, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_AVX512_VP2INTERSECT, + .features[FEAT_7_1_EAX] = + CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | + CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | + CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | + CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | + CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0007_EBX] = + CPUID_8000_0007_EBX_OVERFLOW_RECOV | CPUID_8000_0007_EBX_SUCCOR, + .features[FEAT_8000_0008_EBX] = + CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | + CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | + CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | + CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | + CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD, + .features[FEAT_8000_0021_EAX] = + CPUID_8000_0021_EAX_NO_NESTED_DATA_BP | + CPUID_8000_0021_EAX_FS_GS_BASE_NS | + CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING | + CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | + CPUID_8000_0021_EAX_AUTO_IBRS | CPUID_8000_0021_EAX_PREFETCHI | + CPUID_8000_0021_EAX_SBPB | CPUID_8000_0021_EAX_IBPB_BRTYPE | + CPUID_8000_0021_EAX_SRSO_USER_KERNEL_NO, + .features[FEAT_8000_0022_EAX] = + CPUID_8000_0022_EAX_PERFMON_V2, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, + .features[FEAT_SVM] = + CPUID_SVM_NPT | CPUID_SVM_LBRV | CPUID_SVM_NRIPSAVE | + CPUID_SVM_TSCSCALE | CPUID_SVM_VMCBCLEAN | CPUID_SVM_FLUSHASID | + CPUID_SVM_PAUSEFILTER | CPUID_SVM_PFTHRESHOLD | + CPUID_SVM_V_VMSAVE_VMLOAD | CPUID_SVM_VGIF | + CPUID_SVM_VNMI | CPUID_SVM_SVME_ADDR_CHK, + .xlevel = 0x80000022, + .model_id = "AMD EPYC-Turin Processor", + .cache_info = &epyc_turin_cache_info, + }, }; /* @@ -5766,7 +6222,7 @@ static const TypeInfo max_x86_cpu_type_info = { .class_init = max_x86_cpu_class_init, }; -static char *feature_word_description(FeatureWordInfo *f, uint32_t bit) +static char *feature_word_description(FeatureWordInfo *f) { assert(f->type == CPUID_FEATURE_WORD || f->type == MSR_FEATURE_WORD); @@ -5775,11 +6231,15 @@ static char *feature_word_description(FeatureWordInfo *f, uint32_t bit) { const char *reg = get_register_name_32(f->cpuid.reg); assert(reg); - return g_strdup_printf("CPUID.%02XH:%s", - f->cpuid.eax, reg); + if (!f->cpuid.needs_ecx) { + return g_strdup_printf("CPUID[eax=%02Xh].%s", f->cpuid.eax, reg); + } else { + return g_strdup_printf("CPUID[eax=%02Xh,ecx=%02Xh].%s", + f->cpuid.eax, f->cpuid.ecx, reg); + } } case MSR_FEATURE_WORD: - return g_strdup_printf("MSR(%02XH)", + return g_strdup_printf("MSR(%02Xh)", f->msr.index); } @@ -5799,12 +6259,13 @@ static bool x86_cpu_have_filtered_features(X86CPU *cpu) return false; } -static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, - const char *verbose_prefix) +void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix) { CPUX86State *env = &cpu->env; FeatureWordInfo *f = &feature_word_info[w]; int i; + g_autofree char *feat_word_str = feature_word_description(f); if (!cpu->force_features) { env->features[w] &= ~mask; @@ -5817,7 +6278,35 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, for (i = 0; i < 64; ++i) { if ((1ULL << i) & mask) { - g_autofree char *feat_word_str = feature_word_description(f, i); + warn_report("%s: %s%s%s [bit %d]", + verbose_prefix, + feat_word_str, + f->feat_names[i] ? "." : "", + f->feat_names[i] ? f->feat_names[i] : "", i); + } + } +} + +void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix) +{ + CPUX86State *env = &cpu->env; + FeatureWordInfo *f = &feature_word_info[w]; + int i; + + if (!cpu->force_features) { + env->features[w] |= mask; + } + + cpu->forced_on_features[w] |= mask; + + if (!verbose_prefix) { + return; + } + + for (i = 0; i < 64; ++i) { + if ((1ULL << i) & mask) { + g_autofree char *feat_word_str = feature_word_description(f); warn_report("%s: %s%s%s [bit %d]", verbose_prefix, feat_word_str, @@ -6860,9 +7349,6 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, if (threads_per_pkg > 1) { *ebx |= threads_per_pkg << 16; } - if (!cpu->enable_pmu) { - *ecx &= ~CPUID_EXT_PDCM; - } break; case 2: /* cache info: needed for Pentium Pro compatibility */ @@ -6973,9 +7459,9 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *edx = env->features[FEAT_7_0_EDX]; /* Feature flags */ } else if (count == 1) { *eax = env->features[FEAT_7_1_EAX]; + *ecx = env->features[FEAT_7_1_ECX]; *edx = env->features[FEAT_7_1_EDX]; *ebx = 0; - *ecx = 0; } else if (count == 2) { *edx = env->features[FEAT_7_2_EDX]; *eax = 0; @@ -7044,7 +7530,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0x1F: /* V2 Extended Topology Enumeration Leaf */ - if (!x86_has_extended_topo(env->avail_cpu_topo)) { + if (!x86_has_cpuid_0x1f(cpu)) { *eax = *ebx = *ecx = *edx = 0; break; } @@ -7852,6 +8338,13 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) } } + if (!cpu->enable_pmu) { + mark_unavailable_features(cpu, FEAT_1_ECX, + env->user_features[FEAT_1_ECX] & CPUID_EXT_PDCM, + "This feature is not available due to PMU being disabled"); + env->features[FEAT_1_ECX] &= ~CPUID_EXT_PDCM; + } + for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) { FeatureDep *d = &feature_dependencies[i]; if (!(env->features[d->from.index] & d->from.mask)) { @@ -7880,6 +8373,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) x86_cpu_adjust_feat_level(cpu, FEAT_6_EAX); x86_cpu_adjust_feat_level(cpu, FEAT_7_0_ECX); x86_cpu_adjust_feat_level(cpu, FEAT_7_1_EAX); + x86_cpu_adjust_feat_level(cpu, FEAT_7_1_ECX); x86_cpu_adjust_feat_level(cpu, FEAT_7_1_EDX); x86_cpu_adjust_feat_level(cpu, FEAT_7_2_EDX); x86_cpu_adjust_feat_level(cpu, FEAT_8000_0001_EDX); @@ -7908,7 +8402,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) * cpu->vendor_cpuid_only has been unset for compatibility with older * machine types. */ - if (x86_has_extended_topo(env->avail_cpu_topo) && + if (x86_has_cpuid_0x1f(cpu) && (IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) { x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F); } @@ -8543,6 +9037,13 @@ static void x86_cpu_post_initfn(Object *obj) } accel_cpu_instance_init(CPU(obj)); + +#ifndef CONFIG_USER_ONLY + if (current_machine && current_machine->cgs) { + x86_confidential_guest_cpu_instance_init( + X86_CONFIDENTIAL_GUEST(current_machine->cgs), (CPU(obj))); + } +#endif } static void x86_cpu_init_default_topo(X86CPU *cpu) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index c51e0a4..51e1013 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -584,6 +584,7 @@ typedef enum X86Seg { #define XSTATE_OPMASK_BIT 5 #define XSTATE_ZMM_Hi256_BIT 6 #define XSTATE_Hi16_ZMM_BIT 7 +#define XSTATE_PT_BIT 8 #define XSTATE_PKRU_BIT 9 #define XSTATE_ARCH_LBR_BIT 15 #define XSTATE_XTILE_CFG_BIT 17 @@ -597,6 +598,7 @@ typedef enum X86Seg { #define XSTATE_OPMASK_MASK (1ULL << XSTATE_OPMASK_BIT) #define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT) #define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT) +#define XSTATE_PT_MASK (1ULL << XSTATE_PT_BIT) #define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT) #define XSTATE_ARCH_LBR_MASK (1ULL << XSTATE_ARCH_LBR_BIT) #define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT) @@ -619,6 +621,11 @@ typedef enum X86Seg { XSTATE_Hi16_ZMM_MASK | XSTATE_PKRU_MASK | \ XSTATE_XTILE_CFG_MASK | XSTATE_XTILE_DATA_MASK) +/* CPUID feature bits available in XSS */ +#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK) + +#define CPUID_XSTATE_MASK (CPUID_XSTATE_XCR0_MASK | CPUID_XSTATE_XSS_MASK) + /* CPUID feature words */ typedef enum FeatureWord { FEAT_1_EDX, /* CPUID[1].EDX */ @@ -661,12 +668,22 @@ typedef enum FeatureWord { FEAT_SGX_12_1_EAX, /* CPUID[EAX=0x12,ECX=1].EAX (SGX ATTRIBUTES[31:0]) */ FEAT_XSAVE_XSS_LO, /* CPUID[EAX=0xd,ECX=1].ECX */ FEAT_XSAVE_XSS_HI, /* CPUID[EAX=0xd,ECX=1].EDX */ + FEAT_7_1_ECX, /* CPUID[EAX=7,ECX=1].ECX */ FEAT_7_1_EDX, /* CPUID[EAX=7,ECX=1].EDX */ FEAT_7_2_EDX, /* CPUID[EAX=7,ECX=2].EDX */ FEAT_24_0_EBX, /* CPUID[EAX=0x24,ECX=0].EBX */ FEATURE_WORDS, } FeatureWord; +typedef struct FeatureMask { + FeatureWord index; + uint64_t mask; +} FeatureMask; + +typedef struct FeatureDep { + FeatureMask from, to; +} FeatureDep; + typedef uint64_t FeatureWordArray[FEATURE_WORDS]; uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); @@ -899,6 +916,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_0_ECX_LA57 (1U << 16) /* Read Processor ID */ #define CPUID_7_0_ECX_RDPID (1U << 22) +/* KeyLocker */ +#define CPUID_7_0_ECX_KeyLocker (1U << 23) /* Bus Lock Debug Exception */ #define CPUID_7_0_ECX_BUS_LOCK_DETECT (1U << 24) /* Cache Line Demote Instruction */ @@ -920,6 +939,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_0_EDX_FSRM (1U << 4) /* AVX512 Vector Pair Intersection to a Pair of Mask Registers */ #define CPUID_7_0_EDX_AVX512_VP2INTERSECT (1U << 8) + /* "md_clear" VERW clears CPU buffers */ +#define CPUID_7_0_EDX_MD_CLEAR (1U << 10) /* SERIALIZE instruction */ #define CPUID_7_0_EDX_SERIALIZE (1U << 14) /* TSX Suspend Load Address Tracking instruction */ @@ -957,6 +978,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_1_EAX_AVX_VNNI (1U << 4) /* AVX512 BFloat16 Instruction */ #define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) +/* Linear address space separation */ +#define CPUID_7_1_EAX_LASS (1U << 6) /* CMPCCXADD Instructions */ #define CPUID_7_1_EAX_CMPCCXADD (1U << 7) /* Fast Zero REP MOVS */ @@ -978,6 +1001,9 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* Linear Address Masking */ #define CPUID_7_1_EAX_LAM (1U << 26) +/* The immediate form of MSR access instructions */ +#define CPUID_7_1_ECX_MSR_IMM (1U << 5) + /* Support for VPDPB[SU,UU,SS]D[,S] */ #define CPUID_7_1_EDX_AVX_VNNI_INT8 (1U << 4) /* AVX NE CONVERT Instructions */ @@ -1001,6 +1027,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_7_2_EDX_DDPD_U (1U << 3) /* Indicate bit 10 of the IA32_SPEC_CTRL MSR is supported */ #define CPUID_7_2_EDX_BHI_CTRL (1U << 4) + /* Do not exhibit MXCSR Configuration Dependent Timing (MCDT) behavior */ #define CPUID_7_2_EDX_MCDT_NO (1U << 5) @@ -1070,12 +1097,16 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); /* Processor ignores nested data breakpoints */ #define CPUID_8000_0021_EAX_NO_NESTED_DATA_BP (1U << 0) +/* WRMSR to FS_BASE, GS_BASE, or KERNEL_GS_BASE is non-serializing */ +#define CPUID_8000_0021_EAX_FS_GS_BASE_NS (1U << 1) /* LFENCE is always serializing */ #define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) /* Null Selector Clears Base */ #define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) /* Automatic IBRS */ #define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) +/* Indicates support for IC prefetch */ +#define CPUID_8000_0021_EAX_PREFETCHI (1U << 20) /* Enhanced Return Address Predictor Scurity */ #define CPUID_8000_0021_EAX_ERAPS (1U << 24) /* Selective Branch Predictor Barrier */ @@ -1100,6 +1131,7 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_XSAVE_XSAVEC (1U << 1) #define CPUID_XSAVE_XGETBV1 (1U << 2) #define CPUID_XSAVE_XSAVES (1U << 3) +#define CPUID_XSAVE_XFD (1U << 4) #define CPUID_6_EAX_ARAT (1U << 2) @@ -1741,12 +1773,6 @@ typedef enum TPRAccess { /* Cache information data structures: */ -enum CacheType { - DATA_CACHE, - INSTRUCTION_CACHE, - UNIFIED_CACHE -}; - typedef struct CPUCacheInfo { enum CacheType type; uint8_t level; @@ -2192,6 +2218,9 @@ struct ArchCPU { /* Features that were filtered out because of missing host capabilities */ FeatureWordArray filtered_features; + /* Features that are forced enabled by underlying hypervisor, e.g., TDX */ + FeatureWordArray forced_on_features; + /* Enable PMU CPUID bits. This can't be enabled by default yet because * it doesn't have ABI stability guarantees, as it passes all PMU CPUID * bits returned by GET_SUPPORTED_CPUID (that depend on host CPU and kernel @@ -2239,6 +2268,9 @@ struct ArchCPU { /* Compatibility bits for old machine types: */ bool enable_cpuid_0xb; + /* Force to enable cpuid 0x1f */ + bool force_cpuid_0x1f; + /* Enable auto level-increase for all CPUID leaves */ bool full_cpuid_auto_level; @@ -2499,6 +2531,17 @@ void cpu_set_apic_feature(CPUX86State *env); void host_cpuid(uint32_t function, uint32_t count, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); bool cpu_has_x2apic_feature(CPUX86State *env); +bool is_feature_word_cpuid(uint32_t feature, uint32_t index, int reg); +void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix); +void mark_forced_on_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix); + +static inline bool x86_has_cpuid_0x1f(X86CPU *cpu) +{ + return cpu->force_cpuid_0x1f || + x86_has_extended_topo(cpu->env.avail_cpu_topo); +} /* helper.c */ void x86_cpu_set_a20(X86CPU *cpu, int a20_state); diff --git a/target/i386/emulate/x86_flags.c b/target/i386/emulate/x86_flags.c index 47bc197..cc138c7 100644 --- a/target/i386/emulate/x86_flags.c +++ b/target/i386/emulate/x86_flags.c @@ -255,19 +255,19 @@ void lflags_to_rflags(CPUX86State *env) void rflags_to_lflags(CPUX86State *env) { - target_ulong cf_xor_of; + target_ulong cf_af, cf_xor_of; + /* Leave the low byte zero so that parity is always even... */ + env->cc_dst = !(env->eflags & CC_Z) << 8; + + /* ... and therefore cc_src always uses opposite polarity. */ env->cc_src = CC_P; env->cc_src ^= env->eflags & (CC_S | CC_P); /* rotate right by one to move CF and AF into the carry-out positions */ - env->cc_src |= ( - (env->eflags >> 1) | - (env->eflags << (TARGET_LONG_BITS - 1))) & (CC_C | CC_A); + cf_af = env->eflags & (CC_C | CC_A); + env->cc_src |= ((cf_af >> 1) | (cf_af << (TARGET_LONG_BITS - 1))); - cf_xor_of = (env->eflags & (CC_C | CC_O)) + (CC_O - CC_C); + cf_xor_of = ((env->eflags & (CC_C | CC_O)) + (CC_O - CC_C)) & CC_O; env->cc_src |= -cf_xor_of & LF_MASK_PO; - - /* Leave the low byte zero so that parity is not affected. */ - env->cc_dst = !(env->eflags & CC_Z) << 8; } diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c index a2d3830..7512567 100644 --- a/target/i386/host-cpu.c +++ b/target/i386/host-cpu.c @@ -15,7 +15,7 @@ #include "system/system.h" /* Note: Only safe for use on x86(-64) hosts */ -static uint32_t host_cpu_phys_bits(void) +uint32_t host_cpu_phys_bits(void) { uint32_t eax; uint32_t host_phys_bits; diff --git a/target/i386/host-cpu.h b/target/i386/host-cpu.h index 6a9bc91..b97ec01 100644 --- a/target/i386/host-cpu.h +++ b/target/i386/host-cpu.h @@ -10,6 +10,7 @@ #ifndef HOST_CPU_H #define HOST_CPU_H +uint32_t host_cpu_phys_bits(void); void host_cpu_instance_init(X86CPU *cpu); void host_cpu_max_instance_init(X86CPU *cpu); bool host_cpu_realizefn(CPUState *cs, Error **errp); diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index c9a3c02..234878c 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -38,6 +38,7 @@ #include "kvm_i386.h" #include "../confidential-guest.h" #include "sev.h" +#include "tdx.h" #include "xen-emu.h" #include "hyperv.h" #include "hyperv-proto.h" @@ -192,6 +193,7 @@ static const char *vm_type_name[] = { [KVM_X86_SEV_VM] = "SEV", [KVM_X86_SEV_ES_VM] = "SEV-ES", [KVM_X86_SNP_VM] = "SEV-SNP", + [KVM_X86_TDX_VM] = "TDX", }; bool kvm_is_vm_type_supported(int type) @@ -326,7 +328,7 @@ void kvm_synchronize_all_tsc(void) { CPUState *cpu; - if (kvm_enabled()) { + if (kvm_enabled() && !is_tdx_vm()) { CPU_FOREACH(cpu) { run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL); } @@ -392,7 +394,7 @@ static bool host_tsx_broken(void) /* Returns the value for a specific register on the cpuid entry */ -static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) +uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) { uint32_t ret = 0; switch (reg) { @@ -414,9 +416,9 @@ static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) /* Find matching entry for function/index on kvm_cpuid2 struct */ -static struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, - uint32_t function, - uint32_t index) +struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, + uint32_t function, + uint32_t index) { int i; for (i = 0; i < cpuid->nent; ++i) { @@ -572,7 +574,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, } if (current_machine->cgs) { - ret = x86_confidential_guest_mask_cpuid_features( + ret = x86_confidential_guest_adjust_cpuid_features( X86_CONFIDENTIAL_GUEST(current_machine->cgs), function, index, reg, ret); } @@ -868,6 +870,15 @@ static int kvm_arch_set_tsc_khz(CPUState *cs) int r, cur_freq; bool set_ioctl = false; + /* + * TSC of TD vcpu is immutable, it cannot be set/changed via vcpu scope + * VM_SET_TSC_KHZ, but only be initialized via VM scope VM_SET_TSC_KHZ + * before ioctl KVM_TDX_INIT_VM in tdx_pre_create_vcpu() + */ + if (is_tdx_vm()) { + return 0; + } + if (!env->tsc_khz) { return 0; } @@ -1779,8 +1790,6 @@ static int hyperv_init_vcpu(X86CPU *cpu) static Error *invtsc_mig_blocker; -#define KVM_MAX_CPUID_ENTRIES 100 - static void kvm_init_xsave(CPUX86State *env) { if (has_xsave2) { @@ -1823,9 +1832,8 @@ static void kvm_init_nested_state(CPUX86State *env) } } -static uint32_t kvm_x86_build_cpuid(CPUX86State *env, - struct kvm_cpuid_entry2 *entries, - uint32_t cpuid_i) +uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries, + uint32_t cpuid_i) { uint32_t limit, i, j; uint32_t unused; @@ -1864,7 +1872,7 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env, break; } case 0x1f: - if (!x86_has_extended_topo(env->avail_cpu_topo)) { + if (!x86_has_cpuid_0x1f(env_archcpu(env))) { cpuid_i--; break; } @@ -2052,6 +2060,15 @@ full: abort(); } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + if (is_tdx_vm()) { + return tdx_pre_create_vcpu(cpu, errp); + } + + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { struct { @@ -2076,6 +2093,14 @@ int kvm_arch_init_vcpu(CPUState *cs) int r; Error *local_err = NULL; + if (current_machine->cgs) { + r = x86_confidential_guest_check_features( + X86_CONFIDENTIAL_GUEST(current_machine->cgs), cs); + if (r < 0) { + return r; + } + } + memset(&cpuid_data, 0, sizeof(cpuid_data)); cpuid_i = 0; @@ -3206,16 +3231,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) Error *local_err = NULL; /* - * Initialize SEV context, if required - * - * If no memory encryption is requested (ms->cgs == NULL) this is - * a no-op. - * - * It's also a no-op if a non-SEV confidential guest support - * mechanism is selected. SEV is the only mechanism available to - * select on x86 at present, so this doesn't arise, but if new - * mechanisms are supported in future (e.g. TDX), they'll need - * their own initialization either here or elsewhere. + * Initialize confidential guest (SEV/TDX) context, if required */ if (ms->cgs) { ret = confidential_guest_kvm_init(ms->cgs, &local_err); @@ -3856,32 +3872,34 @@ static void kvm_init_msrs(X86CPU *cpu) CPUX86State *env = &cpu->env; kvm_msr_buf_reset(cpu); - if (has_msr_arch_capabs) { - kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, - env->features[FEAT_ARCH_CAPABILITIES]); - } - if (has_msr_core_capabs) { - kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, - env->features[FEAT_CORE_CAPABILITY]); - } + if (!is_tdx_vm()) { + if (has_msr_arch_capabs) { + kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, + env->features[FEAT_ARCH_CAPABILITIES]); + } - if (has_msr_perf_capabs && cpu->enable_pmu) { - kvm_msr_entry_add_perf(cpu, env->features); + if (has_msr_core_capabs) { + kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, + env->features[FEAT_CORE_CAPABILITY]); + } + + if (has_msr_perf_capabs && cpu->enable_pmu) { + kvm_msr_entry_add_perf(cpu, env->features); + } + + /* + * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but + * all kernels with MSR features should have them. + */ + if (kvm_feature_msrs && cpu_has_vmx(env)) { + kvm_msr_entry_add_vmx(cpu, env->features); + } } if (has_msr_ucode_rev) { kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); } - - /* - * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but - * all kernels with MSR features should have them. - */ - if (kvm_feature_msrs && cpu_has_vmx(env)) { - kvm_msr_entry_add_vmx(cpu, env->features); - } - assert(kvm_buf_set_msrs(cpu) == 0); } @@ -6000,9 +6018,11 @@ static bool host_supports_vmx(void) * because private/shared page tracking is already provided through other * means, these 2 use-cases should be treated as being mutually-exclusive. */ -static int kvm_handle_hc_map_gpa_range(struct kvm_run *run) +static int kvm_handle_hc_map_gpa_range(X86CPU *cpu, struct kvm_run *run) { + struct kvm_pre_fault_memory mem; uint64_t gpa, size, attributes; + int ret; if (!machine_require_guest_memfd(current_machine)) return -EINVAL; @@ -6013,13 +6033,32 @@ static int kvm_handle_hc_map_gpa_range(struct kvm_run *run) trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags); - return kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED); + ret = kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED); + if (ret || !kvm_pre_fault_memory_supported) { + return ret; + } + + /* + * Opportunistically pre-fault memory in. Failures are ignored so that any + * errors in faulting in the memory will get captured in KVM page fault + * path when the guest first accesses the page. + */ + memset(&mem, 0, sizeof(mem)); + mem.gpa = gpa; + mem.size = size; + while (mem.size) { + if (kvm_vcpu_ioctl(CPU(cpu), KVM_PRE_FAULT_MEMORY, &mem)) { + break; + } + } + + return 0; } -static int kvm_handle_hypercall(struct kvm_run *run) +static int kvm_handle_hypercall(X86CPU *cpu, struct kvm_run *run) { if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE) - return kvm_handle_hc_map_gpa_range(run); + return kvm_handle_hc_map_gpa_range(cpu, run); return -EINVAL; } @@ -6119,7 +6158,32 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) break; #endif case KVM_EXIT_HYPERCALL: - ret = kvm_handle_hypercall(run); + ret = kvm_handle_hypercall(cpu, run); + break; + case KVM_EXIT_SYSTEM_EVENT: + switch (run->system_event.type) { + case KVM_SYSTEM_EVENT_TDX_FATAL: + ret = tdx_handle_report_fatal_error(cpu, run); + break; + default: + ret = -1; + break; + } + break; + case KVM_EXIT_TDX: + /* + * run->tdx is already set up for the case where userspace + * does not handle the TDVMCALL. + */ + switch (run->tdx.nr) { + case TDVMCALL_GET_QUOTE: + tdx_handle_get_quote(cpu, run); + break; + case TDVMCALL_GET_TD_VM_CALL_INFO: + tdx_handle_get_tdvmcall_info(cpu, run); + break; + } + ret = 0; break; default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index 88565e8..5f83e88 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -13,6 +13,8 @@ #include "system/kvm.h" +#define KVM_MAX_CPUID_ENTRIES 100 + /* always false if !CONFIG_KVM */ #define kvm_pit_in_kernel() \ (kvm_irqchip_in_kernel() && !kvm_irqchip_is_split()) @@ -42,6 +44,13 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); #ifdef CONFIG_KVM +#include <linux/kvm.h> + +typedef struct KvmCpuidInfo { + struct kvm_cpuid2 cpuid; + struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; +} KvmCpuidInfo; + bool kvm_is_vm_type_supported(int type); bool kvm_has_adjust_clock_stable(void); bool kvm_has_exception_payload(void); @@ -57,6 +66,12 @@ uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address); void kvm_update_msi_routes_all(void *private, bool global, uint32_t index, uint32_t mask); +struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, + uint32_t function, + uint32_t index); +uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg); +uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries, + uint32_t cpuid_i); #endif /* CONFIG_KVM */ void kvm_pc_setup_irq_routing(bool pci_enabled); diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build index 3996caf..2675bf8 100644 --- a/target/i386/kvm/meson.build +++ b/target/i386/kvm/meson.build @@ -8,6 +8,8 @@ i386_kvm_ss.add(files( i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c')) +i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c', 'tdx-quote-generator.c'), if_false: files('tdx-stub.c')) + i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c')) i386_system_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss) diff --git a/target/i386/kvm/tdx-quote-generator.c b/target/i386/kvm/tdx-quote-generator.c new file mode 100644 index 0000000..f59715f --- /dev/null +++ b/target/i386/kvm/tdx-quote-generator.c @@ -0,0 +1,300 @@ +/* + * QEMU TDX Quote Generation Support + * + * Copyright (c) 2025 Intel Corporation + * + * Author: + * Xiaoyao Li <xiaoyao.li@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qapi/qapi-visit-sockets.h" + +#include "tdx-quote-generator.h" + +#define QGS_MSG_LIB_MAJOR_VER 1 +#define QGS_MSG_LIB_MINOR_VER 1 + +typedef enum _qgs_msg_type_t { + GET_QUOTE_REQ = 0, + GET_QUOTE_RESP = 1, + GET_COLLATERAL_REQ = 2, + GET_COLLATERAL_RESP = 3, + GET_PLATFORM_INFO_REQ = 4, + GET_PLATFORM_INFO_RESP = 5, + QGS_MSG_TYPE_MAX +} qgs_msg_type_t; + +typedef struct _qgs_msg_header_t { + uint16_t major_version; + uint16_t minor_version; + uint32_t type; + uint32_t size; // size of the whole message, include this header, in byte + uint32_t error_code; // used in response only +} qgs_msg_header_t; + +typedef struct _qgs_msg_get_quote_req_t { + qgs_msg_header_t header; // header.type = GET_QUOTE_REQ + uint32_t report_size; // cannot be 0 + uint32_t id_list_size; // length of id_list, in byte, can be 0 +} qgs_msg_get_quote_req_t; + +typedef struct _qgs_msg_get_quote_resp_s { + qgs_msg_header_t header; // header.type = GET_QUOTE_RESP + uint32_t selected_id_size; // can be 0 in case only one id is sent in request + uint32_t quote_size; // length of quote_data, in byte + uint8_t id_quote[]; // selected id followed by quote +} qgs_msg_get_quote_resp_t; + +#define HEADER_SIZE 4 + +static uint32_t decode_header(const char *buf, size_t len) { + if (len < HEADER_SIZE) { + return 0; + } + uint32_t msg_size = 0; + for (uint32_t i = 0; i < HEADER_SIZE; ++i) { + msg_size = msg_size * 256 + (buf[i] & 0xFF); + } + return msg_size; +} + +static void encode_header(char *buf, size_t len, uint32_t size) { + assert(len >= HEADER_SIZE); + buf[0] = ((size >> 24) & 0xFF); + buf[1] = ((size >> 16) & 0xFF); + buf[2] = ((size >> 8) & 0xFF); + buf[3] = (size & 0xFF); +} + +static void tdx_generate_quote_cleanup(TdxGenerateQuoteTask *task) +{ + timer_del(&task->timer); + + g_source_remove(task->watch); + qio_channel_close(QIO_CHANNEL(task->sioc), NULL); + object_unref(OBJECT(task->sioc)); + + task->completion(task); +} + +static gboolean tdx_get_quote_read(QIOChannel *ioc, GIOCondition condition, + gpointer opaque) +{ + TdxGenerateQuoteTask *task = opaque; + Error *err = NULL; + int ret; + + ret = qio_channel_read(ioc, task->receive_buf + task->receive_buf_received, + task->payload_len - task->receive_buf_received, &err); + if (ret < 0) { + if (ret == QIO_CHANNEL_ERR_BLOCK) { + return G_SOURCE_CONTINUE; + } else { + error_report_err(err); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + } + + if (ret == 0) { + error_report("End of file before reply received"); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + + task->receive_buf_received += ret; + if (task->receive_buf_received >= HEADER_SIZE) { + uint32_t len = decode_header(task->receive_buf, + task->receive_buf_received); + if (len == 0 || + len > (task->payload_len - HEADER_SIZE)) { + error_report("Message len %u must be non-zero & less than %zu", + len, (task->payload_len - HEADER_SIZE)); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + + /* Now we know the size, shrink to fit */ + task->payload_len = HEADER_SIZE + len; + task->receive_buf = g_renew(char, + task->receive_buf, + task->payload_len); + } + + if (task->receive_buf_received >= (sizeof(qgs_msg_header_t) + HEADER_SIZE)) { + qgs_msg_header_t *hdr = (qgs_msg_header_t *)(task->receive_buf + HEADER_SIZE); + if (hdr->major_version != QGS_MSG_LIB_MAJOR_VER || + hdr->minor_version != QGS_MSG_LIB_MINOR_VER) { + error_report("Invalid QGS message header version %d.%d", + hdr->major_version, + hdr->minor_version); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + if (hdr->type != GET_QUOTE_RESP) { + error_report("Invalid QGS message type %d", + hdr->type); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + if (hdr->size > (task->payload_len - HEADER_SIZE)) { + error_report("QGS message size %d exceeds payload capacity %zu", + hdr->size, task->payload_len); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + if (hdr->error_code != 0) { + error_report("QGS message error code %d", + hdr->error_code); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + } + if (task->receive_buf_received >= (sizeof(qgs_msg_get_quote_resp_t) + HEADER_SIZE)) { + qgs_msg_get_quote_resp_t *msg = (qgs_msg_get_quote_resp_t *)(task->receive_buf + HEADER_SIZE); + if (msg->selected_id_size != 0) { + error_report("QGS message selected ID was %d not 0", + msg->selected_id_size); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + + if ((task->payload_len - HEADER_SIZE - sizeof(qgs_msg_get_quote_resp_t)) != + msg->quote_size) { + error_report("QGS quote size %d should be %zu", + msg->quote_size, + (task->payload_len - sizeof(qgs_msg_get_quote_resp_t))); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + goto end; + } + } + + if (task->receive_buf_received == task->payload_len) { + size_t strip = HEADER_SIZE + sizeof(qgs_msg_get_quote_resp_t); + memmove(task->receive_buf, + task->receive_buf + strip, + task->receive_buf_received - strip); + task->receive_buf_received -= strip; + task->status_code = TDX_VP_GET_QUOTE_SUCCESS; + goto end; + } + + return G_SOURCE_CONTINUE; + +end: + tdx_generate_quote_cleanup(task); + return G_SOURCE_REMOVE; +} + +static gboolean tdx_send_report(QIOChannel *ioc, GIOCondition condition, + gpointer opaque) +{ + TdxGenerateQuoteTask *task = opaque; + Error *err = NULL; + int ret; + + ret = qio_channel_write(ioc, task->send_data + task->send_data_sent, + task->send_data_size - task->send_data_sent, &err); + if (ret < 0) { + if (ret == QIO_CHANNEL_ERR_BLOCK) { + ret = 0; + } else { + error_report_err(err); + task->status_code = TDX_VP_GET_QUOTE_ERROR; + tdx_generate_quote_cleanup(task); + goto end; + } + } + task->send_data_sent += ret; + + if (task->send_data_sent == task->send_data_size) { + task->watch = qio_channel_add_watch(QIO_CHANNEL(task->sioc), G_IO_IN, + tdx_get_quote_read, task, NULL); + goto end; + } + + return G_SOURCE_CONTINUE; + +end: + return G_SOURCE_REMOVE; +} + +static void tdx_quote_generator_connected(QIOTask *qio_task, gpointer opaque) +{ + TdxGenerateQuoteTask *task = opaque; + Error *err = NULL; + int ret; + + ret = qio_task_propagate_error(qio_task, &err); + if (ret) { + error_report_err(err); + task->status_code = TDX_VP_GET_QUOTE_QGS_UNAVAILABLE; + tdx_generate_quote_cleanup(task); + return; + } + + task->watch = qio_channel_add_watch(QIO_CHANNEL(task->sioc), G_IO_OUT, + tdx_send_report, task, NULL); +} + +#define TRANSACTION_TIMEOUT 30000 + +static void getquote_expired(void *opaque) +{ + TdxGenerateQuoteTask *task = opaque; + + task->status_code = TDX_VP_GET_QUOTE_ERROR; + tdx_generate_quote_cleanup(task); +} + +static void setup_get_quote_timer(TdxGenerateQuoteTask *task) +{ + int64_t time; + + timer_init_ms(&task->timer, QEMU_CLOCK_VIRTUAL, getquote_expired, task); + time = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + timer_mod(&task->timer, time + TRANSACTION_TIMEOUT); +} + +void tdx_generate_quote(TdxGenerateQuoteTask *task, + SocketAddress *qg_sock_addr) +{ + QIOChannelSocket *sioc; + qgs_msg_get_quote_req_t msg; + + /* Prepare a QGS message prelude */ + msg.header.major_version = QGS_MSG_LIB_MAJOR_VER; + msg.header.minor_version = QGS_MSG_LIB_MINOR_VER; + msg.header.type = GET_QUOTE_REQ; + msg.header.size = sizeof(msg) + task->send_data_size; + msg.header.error_code = 0; + msg.report_size = task->send_data_size; + msg.id_list_size = 0; + + /* Make room to add the QGS message prelude */ + task->send_data = g_renew(char, + task->send_data, + task->send_data_size + sizeof(msg) + HEADER_SIZE); + memmove(task->send_data + sizeof(msg) + HEADER_SIZE, + task->send_data, + task->send_data_size); + memcpy(task->send_data + HEADER_SIZE, + &msg, + sizeof(msg)); + encode_header(task->send_data, HEADER_SIZE, task->send_data_size + sizeof(msg)); + task->send_data_size += sizeof(msg) + HEADER_SIZE; + + sioc = qio_channel_socket_new(); + task->sioc = sioc; + + setup_get_quote_timer(task); + + qio_channel_socket_connect_async(sioc, qg_sock_addr, + tdx_quote_generator_connected, task, + NULL, NULL); +} diff --git a/target/i386/kvm/tdx-quote-generator.h b/target/i386/kvm/tdx-quote-generator.h new file mode 100644 index 0000000..3bd9b8e --- /dev/null +++ b/target/i386/kvm/tdx-quote-generator.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef QEMU_I386_TDX_QUOTE_GENERATOR_H +#define QEMU_I386_TDX_QUOTE_GENERATOR_H + +#include "qom/object_interfaces.h" +#include "io/channel-socket.h" +#include "exec/hwaddr.h" + +#define TDX_GET_QUOTE_STRUCTURE_VERSION 1ULL + +#define TDX_VP_GET_QUOTE_SUCCESS 0ULL +#define TDX_VP_GET_QUOTE_IN_FLIGHT (-1ULL) +#define TDX_VP_GET_QUOTE_ERROR 0x8000000000000000ULL +#define TDX_VP_GET_QUOTE_QGS_UNAVAILABLE 0x8000000000000001ULL + +/* Limit to avoid resource starvation. */ +#define TDX_GET_QUOTE_MAX_BUF_LEN (128 * 1024) +#define TDX_MAX_GET_QUOTE_REQUEST 16 + +#define TDX_GET_QUOTE_HDR_SIZE 24 + +/* Format of pages shared with guest. */ +struct tdx_get_quote_header { + /* Format version: must be 1 in little endian. */ + uint64_t structure_version; + + /* + * GetQuote status code in little endian: + * Guest must set error_code to 0 to avoid information leak. + * Qemu sets this before interrupting guest. + */ + uint64_t error_code; + + /* + * in-message size in little endian: The message will follow this header. + * The in-message will be send to QGS. + */ + uint32_t in_len; + + /* + * out-message size in little endian: + * On request, out_len must be zero to avoid information leak. + * On return, message size from QGS. Qemu overwrites this field. + * The message will follows this header. The in-message is overwritten. + */ + uint32_t out_len; + + /* + * Message buffer follows. + * Guest sets message that will be send to QGS. If out_len > in_len, guest + * should zero remaining buffer to avoid information leak. + * Qemu overwrites this buffer with a message returned from QGS. + */ +}; + +typedef struct TdxGenerateQuoteTask { + hwaddr buf_gpa; + hwaddr payload_gpa; + uint64_t payload_len; + + char *send_data; + uint64_t send_data_size; + uint64_t send_data_sent; + + char *receive_buf; + uint64_t receive_buf_received; + + uint64_t status_code; + struct tdx_get_quote_header hdr; + + QIOChannelSocket *sioc; + guint watch; + QEMUTimer timer; + + void (*completion)(struct TdxGenerateQuoteTask *task); + void *opaque; +} TdxGenerateQuoteTask; + +void tdx_generate_quote(TdxGenerateQuoteTask *task, SocketAddress *qg_sock_addr); + +#endif /* QEMU_I386_TDX_QUOTE_GENERATOR_H */ diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c new file mode 100644 index 0000000..76fee49 --- /dev/null +++ b/target/i386/kvm/tdx-stub.c @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" + +#include "tdx.h" + +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return -EINVAL; +} + +int tdx_parse_tdvf(void *flash_ptr, int size) +{ + return -EINVAL; +} + +int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) +{ + return -EINVAL; +} + +void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run) +{ +} + +void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run) +{ +} diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c new file mode 100644 index 0000000..e809e4b --- /dev/null +++ b/target/i386/kvm/tdx.c @@ -0,0 +1,1487 @@ +/* + * QEMU TDX support + * + * Copyright (c) 2025 Intel Corporation + * + * Author: + * Xiaoyao Li <xiaoyao.li@intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qemu/base64.h" +#include "qemu/mmap-alloc.h" +#include "qapi/error.h" +#include "qapi/qapi-visit-sockets.h" +#include "qom/object_interfaces.h" +#include "crypto/hash.h" +#include "system/kvm_int.h" +#include "system/runstate.h" +#include "system/system.h" +#include "system/ramblock.h" +#include "system/address-spaces.h" + +#include <linux/kvm_para.h> + +#include "cpu.h" +#include "cpu-internal.h" +#include "host-cpu.h" +#include "hw/i386/e820_memory_layout.h" +#include "hw/i386/tdvf.h" +#include "hw/i386/x86.h" +#include "hw/i386/tdvf-hob.h" +#include "kvm_i386.h" +#include "tdx.h" +#include "tdx-quote-generator.h" + +#include "standard-headers/asm-x86/kvm_para.h" + +#define TDX_MIN_TSC_FREQUENCY_KHZ (100 * 1000) +#define TDX_MAX_TSC_FREQUENCY_KHZ (10 * 1000 * 1000) + +#define TDX_TD_ATTRIBUTES_DEBUG BIT_ULL(0) +#define TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE BIT_ULL(28) +#define TDX_TD_ATTRIBUTES_PKS BIT_ULL(30) +#define TDX_TD_ATTRIBUTES_PERFMON BIT_ULL(63) + +#define TDX_SUPPORTED_TD_ATTRS (TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE |\ + TDX_TD_ATTRIBUTES_PKS | \ + TDX_TD_ATTRIBUTES_PERFMON) + +#define TDX_SUPPORTED_KVM_FEATURES ((1U << KVM_FEATURE_NOP_IO_DELAY) | \ + (1U << KVM_FEATURE_PV_UNHALT) | \ + (1U << KVM_FEATURE_PV_TLB_FLUSH) | \ + (1U << KVM_FEATURE_PV_SEND_IPI) | \ + (1U << KVM_FEATURE_POLL_CONTROL) | \ + (1U << KVM_FEATURE_PV_SCHED_YIELD) | \ + (1U << KVM_FEATURE_MSI_EXT_DEST_ID)) + +static TdxGuest *tdx_guest; + +static struct kvm_tdx_capabilities *tdx_caps; +static struct kvm_cpuid2 *tdx_supported_cpuid; + +/* Valid after kvm_arch_init()->confidential_guest_kvm_init()->tdx_kvm_init() */ +bool is_tdx_vm(void) +{ + return !!tdx_guest; +} + +enum tdx_ioctl_level { + TDX_VM_IOCTL, + TDX_VCPU_IOCTL, +}; + +static int tdx_ioctl_internal(enum tdx_ioctl_level level, void *state, + int cmd_id, __u32 flags, void *data, + Error **errp) +{ + struct kvm_tdx_cmd tdx_cmd = {}; + int r; + + const char *tdx_ioctl_name[] = { + [KVM_TDX_CAPABILITIES] = "KVM_TDX_CAPABILITIES", + [KVM_TDX_INIT_VM] = "KVM_TDX_INIT_VM", + [KVM_TDX_INIT_VCPU] = "KVM_TDX_INIT_VCPU", + [KVM_TDX_INIT_MEM_REGION] = "KVM_TDX_INIT_MEM_REGION", + [KVM_TDX_FINALIZE_VM] = "KVM_TDX_FINALIZE_VM", + [KVM_TDX_GET_CPUID] = "KVM_TDX_GET_CPUID", + }; + + tdx_cmd.id = cmd_id; + tdx_cmd.flags = flags; + tdx_cmd.data = (__u64)(unsigned long)data; + + switch (level) { + case TDX_VM_IOCTL: + r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); + break; + case TDX_VCPU_IOCTL: + r = kvm_vcpu_ioctl(state, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd); + break; + default: + error_setg(errp, "Invalid tdx_ioctl_level %d", level); + return -EINVAL; + } + + if (r < 0) { + error_setg_errno(errp, -r, "TDX ioctl %s failed, hw_errors: 0x%llx", + tdx_ioctl_name[cmd_id], tdx_cmd.hw_error); + } + return r; +} + +static inline int tdx_vm_ioctl(int cmd_id, __u32 flags, void *data, + Error **errp) +{ + return tdx_ioctl_internal(TDX_VM_IOCTL, NULL, cmd_id, flags, data, errp); +} + +static inline int tdx_vcpu_ioctl(CPUState *cpu, int cmd_id, __u32 flags, + void *data, Error **errp) +{ + return tdx_ioctl_internal(TDX_VCPU_IOCTL, cpu, cmd_id, flags, data, errp); +} + +static int get_tdx_capabilities(Error **errp) +{ + struct kvm_tdx_capabilities *caps; + /* 1st generation of TDX reports 6 cpuid configs */ + int nr_cpuid_configs = 6; + size_t size; + int r; + + do { + Error *local_err = NULL; + size = sizeof(struct kvm_tdx_capabilities) + + nr_cpuid_configs * sizeof(struct kvm_cpuid_entry2); + caps = g_malloc0(size); + caps->cpuid.nent = nr_cpuid_configs; + + r = tdx_vm_ioctl(KVM_TDX_CAPABILITIES, 0, caps, &local_err); + if (r == -E2BIG) { + g_free(caps); + nr_cpuid_configs *= 2; + if (nr_cpuid_configs > KVM_MAX_CPUID_ENTRIES) { + error_report("KVM TDX seems broken that number of CPUID entries" + " in kvm_tdx_capabilities exceeds limit: %d", + KVM_MAX_CPUID_ENTRIES); + error_propagate(errp, local_err); + return r; + } + error_free(local_err); + } else if (r < 0) { + g_free(caps); + error_propagate(errp, local_err); + return r; + } + } while (r == -E2BIG); + + tdx_caps = caps; + + return 0; +} + +void tdx_set_tdvf_region(MemoryRegion *tdvf_mr) +{ + assert(!tdx_guest->tdvf_mr); + tdx_guest->tdvf_mr = tdvf_mr; +} + +static TdxFirmwareEntry *tdx_get_hob_entry(TdxGuest *tdx) +{ + TdxFirmwareEntry *entry; + + for_each_tdx_fw_entry(&tdx->tdvf, entry) { + if (entry->type == TDVF_SECTION_TYPE_TD_HOB) { + return entry; + } + } + error_report("TDVF metadata doesn't specify TD_HOB location."); + exit(1); +} + +static void tdx_add_ram_entry(uint64_t address, uint64_t length, + enum TdxRamType type) +{ + uint32_t nr_entries = tdx_guest->nr_ram_entries; + tdx_guest->ram_entries = g_renew(TdxRamEntry, tdx_guest->ram_entries, + nr_entries + 1); + + tdx_guest->ram_entries[nr_entries].address = address; + tdx_guest->ram_entries[nr_entries].length = length; + tdx_guest->ram_entries[nr_entries].type = type; + tdx_guest->nr_ram_entries++; +} + +static int tdx_accept_ram_range(uint64_t address, uint64_t length) +{ + uint64_t head_start, tail_start, head_length, tail_length; + uint64_t tmp_address, tmp_length; + TdxRamEntry *e; + int i = 0; + + do { + if (i == tdx_guest->nr_ram_entries) { + return -1; + } + + e = &tdx_guest->ram_entries[i++]; + } while (address + length <= e->address || address >= e->address + e->length); + + /* + * The to-be-accepted ram range must be fully contained by one + * RAM entry. + */ + if (e->address > address || + e->address + e->length < address + length) { + return -1; + } + + if (e->type == TDX_RAM_ADDED) { + return 0; + } + + tmp_address = e->address; + tmp_length = e->length; + + e->address = address; + e->length = length; + e->type = TDX_RAM_ADDED; + + head_length = address - tmp_address; + if (head_length > 0) { + head_start = tmp_address; + tdx_add_ram_entry(head_start, head_length, TDX_RAM_UNACCEPTED); + } + + tail_start = address + length; + if (tail_start < tmp_address + tmp_length) { + tail_length = tmp_address + tmp_length - tail_start; + tdx_add_ram_entry(tail_start, tail_length, TDX_RAM_UNACCEPTED); + } + + return 0; +} + +static int tdx_ram_entry_compare(const void *lhs_, const void* rhs_) +{ + const TdxRamEntry *lhs = lhs_; + const TdxRamEntry *rhs = rhs_; + + if (lhs->address == rhs->address) { + return 0; + } + if (le64_to_cpu(lhs->address) > le64_to_cpu(rhs->address)) { + return 1; + } + return -1; +} + +static void tdx_init_ram_entries(void) +{ + unsigned i, j, nr_e820_entries; + + nr_e820_entries = e820_get_table(NULL); + tdx_guest->ram_entries = g_new(TdxRamEntry, nr_e820_entries); + + for (i = 0, j = 0; i < nr_e820_entries; i++) { + uint64_t addr, len; + + if (e820_get_entry(i, E820_RAM, &addr, &len)) { + tdx_guest->ram_entries[j].address = addr; + tdx_guest->ram_entries[j].length = len; + tdx_guest->ram_entries[j].type = TDX_RAM_UNACCEPTED; + j++; + } + } + tdx_guest->nr_ram_entries = j; +} + +static void tdx_post_init_vcpus(void) +{ + TdxFirmwareEntry *hob; + CPUState *cpu; + + hob = tdx_get_hob_entry(tdx_guest); + CPU_FOREACH(cpu) { + tdx_vcpu_ioctl(cpu, KVM_TDX_INIT_VCPU, 0, (void *)(uintptr_t)hob->address, + &error_fatal); + } +} + +static void tdx_finalize_vm(Notifier *notifier, void *unused) +{ + TdxFirmware *tdvf = &tdx_guest->tdvf; + TdxFirmwareEntry *entry; + RAMBlock *ram_block; + Error *local_err = NULL; + int r; + + tdx_init_ram_entries(); + + for_each_tdx_fw_entry(tdvf, entry) { + switch (entry->type) { + case TDVF_SECTION_TYPE_BFV: + case TDVF_SECTION_TYPE_CFV: + entry->mem_ptr = tdvf->mem_ptr + entry->data_offset; + break; + case TDVF_SECTION_TYPE_TD_HOB: + case TDVF_SECTION_TYPE_TEMP_MEM: + entry->mem_ptr = qemu_ram_mmap(-1, entry->size, + qemu_real_host_page_size(), 0, 0); + if (entry->mem_ptr == MAP_FAILED) { + error_report("Failed to mmap memory for TDVF section %d", + entry->type); + exit(1); + } + if (tdx_accept_ram_range(entry->address, entry->size)) { + error_report("Failed to accept memory for TDVF section %d", + entry->type); + qemu_ram_munmap(-1, entry->mem_ptr, entry->size); + exit(1); + } + break; + default: + error_report("Unsupported TDVF section %d", entry->type); + exit(1); + } + } + + qsort(tdx_guest->ram_entries, tdx_guest->nr_ram_entries, + sizeof(TdxRamEntry), &tdx_ram_entry_compare); + + tdvf_hob_create(tdx_guest, tdx_get_hob_entry(tdx_guest)); + + tdx_post_init_vcpus(); + + for_each_tdx_fw_entry(tdvf, entry) { + struct kvm_tdx_init_mem_region region; + uint32_t flags; + + region = (struct kvm_tdx_init_mem_region) { + .source_addr = (uintptr_t)entry->mem_ptr, + .gpa = entry->address, + .nr_pages = entry->size >> 12, + }; + + flags = entry->attributes & TDVF_SECTION_ATTRIBUTES_MR_EXTEND ? + KVM_TDX_MEASURE_MEMORY_REGION : 0; + + do { + error_free(local_err); + local_err = NULL; + r = tdx_vcpu_ioctl(first_cpu, KVM_TDX_INIT_MEM_REGION, flags, + ®ion, &local_err); + } while (r == -EAGAIN || r == -EINTR); + if (r < 0) { + error_report_err(local_err); + exit(1); + } + + if (entry->type == TDVF_SECTION_TYPE_TD_HOB || + entry->type == TDVF_SECTION_TYPE_TEMP_MEM) { + qemu_ram_munmap(-1, entry->mem_ptr, entry->size); + entry->mem_ptr = NULL; + } + } + + /* + * TDVF image has been copied into private region above via + * KVM_MEMORY_MAPPING. It becomes useless. + */ + ram_block = tdx_guest->tdvf_mr->ram_block; + ram_block_discard_range(ram_block, 0, ram_block->max_length); + + tdx_vm_ioctl(KVM_TDX_FINALIZE_VM, 0, NULL, &error_fatal); + CONFIDENTIAL_GUEST_SUPPORT(tdx_guest)->ready = true; +} + +static Notifier tdx_machine_done_notify = { + .notify = tdx_finalize_vm, +}; + +/* + * Some CPUID bits change from fixed1 to configurable bits when TDX module + * supports TDX_FEATURES0.VE_REDUCTION. e.g., MCA/MCE/MTRR/CORE_CAPABILITY. + * + * To make QEMU work with all the versions of TDX module, keep the fixed1 bits + * here if they are ever fixed1 bits in any of the version though not fixed1 in + * the latest version. Otherwise, with the older version of TDX module, QEMU may + * treat the fixed1 bit as unsupported. + * + * For newer TDX module, it does no harm to keep them in tdx_fixed1_bits even + * though they changed to configurable bits. Because tdx_fixed1_bits is used to + * setup the supported bits. + */ +KvmCpuidInfo tdx_fixed1_bits = { + .cpuid.nent = 8, + .entries[0] = { + .function = 0x1, + .index = 0, + .ecx = CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_DTES64 | + CPUID_EXT_DSCPL | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | + CPUID_EXT_PDCM | CPUID_EXT_PCID | CPUID_EXT_SSE41 | + CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | + CPUID_EXT_POPCNT | CPUID_EXT_AES | CPUID_EXT_XSAVE | + CPUID_EXT_RDRAND | CPUID_EXT_HYPERVISOR, + .edx = CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | + CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | + CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | + CPUID_PAT | CPUID_CLFLUSH | CPUID_DTS | CPUID_MMX | CPUID_FXSR | + CPUID_SSE | CPUID_SSE2, + }, + .entries[1] = { + .function = 0x6, + .index = 0, + .eax = CPUID_6_EAX_ARAT, + }, + .entries[2] = { + .function = 0x7, + .index = 0, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .ebx = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_FDP_EXCPTN_ONLY | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_INVPCID | + CPUID_7_0_EBX_ZERO_FCS_FDS | CPUID_7_0_EBX_RDSEED | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | + CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_SHA_NI, + .ecx = CPUID_7_0_ECX_BUS_LOCK_DETECT | CPUID_7_0_ECX_MOVDIRI | + CPUID_7_0_ECX_MOVDIR64B, + .edx = CPUID_7_0_EDX_MD_CLEAR | CPUID_7_0_EDX_SPEC_CTRL | + CPUID_7_0_EDX_STIBP | CPUID_7_0_EDX_FLUSH_L1D | + CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_CORE_CAPABILITY | + CPUID_7_0_EDX_SPEC_CTRL_SSBD, + }, + .entries[3] = { + .function = 0x7, + .index = 2, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .edx = CPUID_7_2_EDX_PSFD | CPUID_7_2_EDX_IPRED_CTRL | + CPUID_7_2_EDX_RRSBA_CTRL | CPUID_7_2_EDX_BHI_CTRL, + }, + .entries[4] = { + .function = 0xD, + .index = 0, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .eax = XSTATE_FP_MASK | XSTATE_SSE_MASK, + }, + .entries[5] = { + .function = 0xD, + .index = 1, + .flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX, + .eax = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC| + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, + }, + .entries[6] = { + .function = 0x80000001, + .index = 0, + .ecx = CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, + /* + * Strictly speaking, SYSCALL is not fixed1 bit since it depends on + * the CPU to be in 64-bit mode. But here fixed1 is used to serve the + * purpose of supported bits for TDX. In this sense, SYACALL is always + * supported. + */ + .edx = CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, + }, + .entries[7] = { + .function = 0x80000007, + .index = 0, + .edx = CPUID_APM_INVTSC, + }, +}; + +typedef struct TdxAttrsMap { + uint32_t attr_index; + uint32_t cpuid_leaf; + uint32_t cpuid_subleaf; + int cpuid_reg; + uint32_t feat_mask; +} TdxAttrsMap; + +static TdxAttrsMap tdx_attrs_maps[] = { + {.attr_index = 27, + .cpuid_leaf = 7, + .cpuid_subleaf = 1, + .cpuid_reg = R_EAX, + .feat_mask = CPUID_7_1_EAX_LASS,}, + + {.attr_index = 30, + .cpuid_leaf = 7, + .cpuid_subleaf = 0, + .cpuid_reg = R_ECX, + .feat_mask = CPUID_7_0_ECX_PKS,}, + + {.attr_index = 31, + .cpuid_leaf = 7, + .cpuid_subleaf = 0, + .cpuid_reg = R_ECX, + .feat_mask = CPUID_7_0_ECX_KeyLocker,}, +}; + +typedef struct TdxXFAMDep { + int xfam_bit; + FeatureMask feat_mask; +} TdxXFAMDep; + +/* + * Note, only the CPUID bits whose virtualization type are "XFAM & Native" are + * defiend here. + * + * For those whose virtualization type are "XFAM & Configured & Native", they + * are reported as configurable bits. And they are not supported if not in the + * configureable bits list from KVM even if the corresponding XFAM bit is + * supported. + */ +TdxXFAMDep tdx_xfam_deps[] = { + { XSTATE_YMM_BIT, { FEAT_1_ECX, CPUID_EXT_FMA }}, + { XSTATE_YMM_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_AVX2 }}, + { XSTATE_OPMASK_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_AVX512_VBMI}}, + { XSTATE_OPMASK_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AVX512_FP16}}, + { XSTATE_PT_BIT, { FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT}}, + { XSTATE_PKRU_BIT, { FEAT_7_0_ECX, CPUID_7_0_ECX_PKU}}, + { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_BF16 }}, + { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_TILE }}, + { XSTATE_XTILE_CFG_BIT, { FEAT_7_0_EDX, CPUID_7_0_EDX_AMX_INT8 }}, +}; + +static struct kvm_cpuid_entry2 *find_in_supported_entry(uint32_t function, + uint32_t index) +{ + struct kvm_cpuid_entry2 *e; + + e = cpuid_find_entry(tdx_supported_cpuid, function, index); + if (!e) { + if (tdx_supported_cpuid->nent >= KVM_MAX_CPUID_ENTRIES) { + error_report("tdx_supported_cpuid requries more space than %d entries", + KVM_MAX_CPUID_ENTRIES); + exit(1); + } + e = &tdx_supported_cpuid->entries[tdx_supported_cpuid->nent++]; + e->function = function; + e->index = index; + } + + return e; +} + +static void tdx_add_supported_cpuid_by_fixed1_bits(void) +{ + struct kvm_cpuid_entry2 *e, *e1; + int i; + + for (i = 0; i < tdx_fixed1_bits.cpuid.nent; i++) { + e = &tdx_fixed1_bits.entries[i]; + + e1 = find_in_supported_entry(e->function, e->index); + e1->eax |= e->eax; + e1->ebx |= e->ebx; + e1->ecx |= e->ecx; + e1->edx |= e->edx; + } +} + +static void tdx_add_supported_cpuid_by_attrs(void) +{ + struct kvm_cpuid_entry2 *e; + TdxAttrsMap *map; + int i; + + for (i = 0; i < ARRAY_SIZE(tdx_attrs_maps); i++) { + map = &tdx_attrs_maps[i]; + if (!((1ULL << map->attr_index) & tdx_caps->supported_attrs)) { + continue; + } + + e = find_in_supported_entry(map->cpuid_leaf, map->cpuid_subleaf); + + switch(map->cpuid_reg) { + case R_EAX: + e->eax |= map->feat_mask; + break; + case R_EBX: + e->ebx |= map->feat_mask; + break; + case R_ECX: + e->ecx |= map->feat_mask; + break; + case R_EDX: + e->edx |= map->feat_mask; + break; + } + } +} + +static void tdx_add_supported_cpuid_by_xfam(void) +{ + struct kvm_cpuid_entry2 *e; + int i; + + const TdxXFAMDep *xfam_dep; + const FeatureWordInfo *f; + for (i = 0; i < ARRAY_SIZE(tdx_xfam_deps); i++) { + xfam_dep = &tdx_xfam_deps[i]; + if (!((1ULL << xfam_dep->xfam_bit) & tdx_caps->supported_xfam)) { + continue; + } + + f = &feature_word_info[xfam_dep->feat_mask.index]; + if (f->type != CPUID_FEATURE_WORD) { + continue; + } + + e = find_in_supported_entry(f->cpuid.eax, f->cpuid.ecx); + switch(f->cpuid.reg) { + case R_EAX: + e->eax |= xfam_dep->feat_mask.mask; + break; + case R_EBX: + e->ebx |= xfam_dep->feat_mask.mask; + break; + case R_ECX: + e->ecx |= xfam_dep->feat_mask.mask; + break; + case R_EDX: + e->edx |= xfam_dep->feat_mask.mask; + break; + } + } + + e = find_in_supported_entry(0xd, 0); + e->eax |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK); + e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XCR0_MASK) >> 32; + + e = find_in_supported_entry(0xd, 1); + /* + * Mark XFD always support for TDX, it will be cleared finally in + * tdx_adjust_cpuid_features() if XFD is unavailable on the hardware + * because in this case the original data has it as 0. + */ + e->eax |= CPUID_XSAVE_XFD; + e->ecx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK); + e->edx |= (tdx_caps->supported_xfam & CPUID_XSTATE_XSS_MASK) >> 32; +} + +static void tdx_add_supported_kvm_features(void) +{ + struct kvm_cpuid_entry2 *e; + + e = find_in_supported_entry(0x40000001, 0); + e->eax = TDX_SUPPORTED_KVM_FEATURES; +} + +static void tdx_setup_supported_cpuid(void) +{ + if (tdx_supported_cpuid) { + return; + } + + tdx_supported_cpuid = g_malloc0(sizeof(*tdx_supported_cpuid) + + KVM_MAX_CPUID_ENTRIES * sizeof(struct kvm_cpuid_entry2)); + + memcpy(tdx_supported_cpuid->entries, tdx_caps->cpuid.entries, + tdx_caps->cpuid.nent * sizeof(struct kvm_cpuid_entry2)); + tdx_supported_cpuid->nent = tdx_caps->cpuid.nent; + + tdx_add_supported_cpuid_by_fixed1_bits(); + tdx_add_supported_cpuid_by_attrs(); + tdx_add_supported_cpuid_by_xfam(); + + tdx_add_supported_kvm_features(); +} + +static int tdx_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + X86MachineState *x86ms = X86_MACHINE(ms); + TdxGuest *tdx = TDX_GUEST(cgs); + int r = 0; + + kvm_mark_guest_state_protected(); + + if (x86ms->smm == ON_OFF_AUTO_AUTO) { + x86ms->smm = ON_OFF_AUTO_OFF; + } else if (x86ms->smm == ON_OFF_AUTO_ON) { + error_setg(errp, "TDX VM doesn't support SMM"); + return -EINVAL; + } + + if (x86ms->pic == ON_OFF_AUTO_AUTO) { + x86ms->pic = ON_OFF_AUTO_OFF; + } else if (x86ms->pic == ON_OFF_AUTO_ON) { + error_setg(errp, "TDX VM doesn't support PIC"); + return -EINVAL; + } + + if (kvm_state->kernel_irqchip_split == ON_OFF_AUTO_AUTO) { + kvm_state->kernel_irqchip_split = ON_OFF_AUTO_ON; + } else if (kvm_state->kernel_irqchip_split != ON_OFF_AUTO_ON) { + error_setg(errp, "TDX VM requires kernel_irqchip to be split"); + return -EINVAL; + } + + if (!tdx_caps) { + r = get_tdx_capabilities(errp); + if (r) { + return r; + } + } + + tdx_setup_supported_cpuid(); + + /* TDX relies on KVM_HC_MAP_GPA_RANGE to handle TDG.VP.VMCALL<MapGPA> */ + if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) { + return -EOPNOTSUPP; + } + + /* + * Set kvm_readonly_mem_allowed to false, because TDX only supports readonly + * memory for shared memory but not for private memory. Besides, whether a + * memslot is private or shared is not determined by QEMU. + * + * Thus, just mark readonly memory not supported for simplicity. + */ + kvm_readonly_mem_allowed = false; + + qemu_add_machine_init_done_notifier(&tdx_machine_done_notify); + + tdx_guest = tdx; + return 0; +} + +static int tdx_kvm_type(X86ConfidentialGuest *cg) +{ + /* Do the object check */ + TDX_GUEST(cg); + + return KVM_X86_TDX_VM; +} + +static void tdx_cpu_instance_init(X86ConfidentialGuest *cg, CPUState *cpu) +{ + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); + X86CPU *x86cpu = X86_CPU(cpu); + + if (xcc->model) { + error_report("Named cpu model is not supported for TDX yet!"); + exit(1); + } + + object_property_set_bool(OBJECT(cpu), "pmu", false, &error_abort); + + /* invtsc is fixed1 for TD guest */ + object_property_set_bool(OBJECT(cpu), "invtsc", true, &error_abort); + + x86cpu->force_cpuid_0x1f = true; +} + +static uint32_t tdx_adjust_cpuid_features(X86ConfidentialGuest *cg, + uint32_t feature, uint32_t index, + int reg, uint32_t value) +{ + struct kvm_cpuid_entry2 *e; + + e = cpuid_find_entry(&tdx_fixed1_bits.cpuid, feature, index); + if (e) { + value |= cpuid_entry_get_reg(e, reg); + } + + if (is_feature_word_cpuid(feature, index, reg)) { + e = cpuid_find_entry(tdx_supported_cpuid, feature, index); + if (e) { + value &= cpuid_entry_get_reg(e, reg); + } + } + + return value; +} + +static struct kvm_cpuid2 *tdx_fetch_cpuid(CPUState *cpu, int *ret) +{ + struct kvm_cpuid2 *fetch_cpuid; + int size = KVM_MAX_CPUID_ENTRIES; + Error *local_err = NULL; + int r; + + do { + error_free(local_err); + local_err = NULL; + + fetch_cpuid = g_malloc0(sizeof(*fetch_cpuid) + + sizeof(struct kvm_cpuid_entry2) * size); + fetch_cpuid->nent = size; + r = tdx_vcpu_ioctl(cpu, KVM_TDX_GET_CPUID, 0, fetch_cpuid, &local_err); + if (r == -E2BIG) { + g_free(fetch_cpuid); + size = fetch_cpuid->nent; + } + } while (r == -E2BIG); + + if (r < 0) { + error_report_err(local_err); + *ret = r; + return NULL; + } + + return fetch_cpuid; +} + +static int tdx_check_features(X86ConfidentialGuest *cg, CPUState *cs) +{ + uint64_t actual, requested, unavailable, forced_on; + g_autofree struct kvm_cpuid2 *fetch_cpuid; + const char *forced_on_prefix = NULL; + const char *unav_prefix = NULL; + struct kvm_cpuid_entry2 *entry; + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + FeatureWordInfo *wi; + FeatureWord w; + bool mismatch = false; + int r; + + fetch_cpuid = tdx_fetch_cpuid(cs, &r); + if (!fetch_cpuid) { + return r; + } + + if (cpu->check_cpuid || cpu->enforce_cpuid) { + unav_prefix = "TDX doesn't support requested feature"; + forced_on_prefix = "TDX forcibly sets the feature"; + } + + for (w = 0; w < FEATURE_WORDS; w++) { + wi = &feature_word_info[w]; + actual = 0; + + switch (wi->type) { + case CPUID_FEATURE_WORD: + entry = cpuid_find_entry(fetch_cpuid, wi->cpuid.eax, wi->cpuid.ecx); + if (!entry) { + /* + * If KVM doesn't report it means it's totally configurable + * by QEMU + */ + continue; + } + + actual = cpuid_entry_get_reg(entry, wi->cpuid.reg); + break; + case MSR_FEATURE_WORD: + /* + * TODO: + * validate MSR features when KVM has interface report them. + */ + continue; + } + + /* Fixup for special cases */ + switch (w) { + case FEAT_8000_0001_EDX: + /* + * Intel enumerates SYSCALL bit as 1 only when processor in 64-bit + * mode and before vcpu running it's not in 64-bit mode. + */ + actual |= CPUID_EXT2_SYSCALL; + break; + default: + break; + } + + requested = env->features[w]; + unavailable = requested & ~actual; + mark_unavailable_features(cpu, w, unavailable, unav_prefix); + if (unavailable) { + mismatch = true; + } + + forced_on = actual & ~requested; + mark_forced_on_features(cpu, w, forced_on, forced_on_prefix); + if (forced_on) { + mismatch = true; + } + } + + if (cpu->enforce_cpuid && mismatch) { + return -EINVAL; + } + + if (cpu->phys_bits != host_cpu_phys_bits()) { + error_report("TDX requires guest CPU physical bits (%u) " + "to match host CPU physical bits (%u)", + cpu->phys_bits, host_cpu_phys_bits()); + return -EINVAL; + } + + return 0; +} + +static int tdx_validate_attributes(TdxGuest *tdx, Error **errp) +{ + if ((tdx->attributes & ~tdx_caps->supported_attrs)) { + error_setg(errp, "Invalid attributes 0x%"PRIx64" for TDX VM " + "(KVM supported: 0x%"PRIx64")", tdx->attributes, + (uint64_t)tdx_caps->supported_attrs); + return -1; + } + + if (tdx->attributes & ~TDX_SUPPORTED_TD_ATTRS) { + error_setg(errp, "Some QEMU unsupported TD attribute bits being " + "requested: 0x%"PRIx64" (QEMU supported: 0x%"PRIx64")", + tdx->attributes, (uint64_t)TDX_SUPPORTED_TD_ATTRS); + return -1; + } + + return 0; +} + +static int setup_td_guest_attributes(X86CPU *x86cpu, Error **errp) +{ + CPUX86State *env = &x86cpu->env; + + tdx_guest->attributes |= (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS) ? + TDX_TD_ATTRIBUTES_PKS : 0; + tdx_guest->attributes |= x86cpu->enable_pmu ? TDX_TD_ATTRIBUTES_PERFMON : 0; + + return tdx_validate_attributes(tdx_guest, errp); +} + +static int setup_td_xfam(X86CPU *x86cpu, Error **errp) +{ + CPUX86State *env = &x86cpu->env; + uint64_t xfam; + + xfam = env->features[FEAT_XSAVE_XCR0_LO] | + env->features[FEAT_XSAVE_XCR0_HI] | + env->features[FEAT_XSAVE_XSS_LO] | + env->features[FEAT_XSAVE_XSS_HI]; + + if (xfam & ~tdx_caps->supported_xfam) { + error_setg(errp, "Invalid XFAM 0x%"PRIx64" for TDX VM (supported: 0x%"PRIx64"))", + xfam, (uint64_t)tdx_caps->supported_xfam); + return -1; + } + + tdx_guest->xfam = xfam; + return 0; +} + +static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids) +{ + int i, dest_cnt = 0; + struct kvm_cpuid_entry2 *src, *dest, *conf; + + for (i = 0; i < cpuids->nent; i++) { + src = cpuids->entries + i; + conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index); + if (!conf) { + continue; + } + dest = cpuids->entries + dest_cnt; + + dest->function = src->function; + dest->index = src->index; + dest->flags = src->flags; + dest->eax = src->eax & conf->eax; + dest->ebx = src->ebx & conf->ebx; + dest->ecx = src->ecx & conf->ecx; + dest->edx = src->edx & conf->edx; + + dest_cnt++; + } + cpuids->nent = dest_cnt++; +} + +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + X86CPU *x86cpu = X86_CPU(cpu); + CPUX86State *env = &x86cpu->env; + g_autofree struct kvm_tdx_init_vm *init_vm = NULL; + Error *local_err = NULL; + size_t data_len; + int retry = 10000; + int r = 0; + + QEMU_LOCK_GUARD(&tdx_guest->lock); + if (tdx_guest->initialized) { + return r; + } + + init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) + + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); + + if (!kvm_check_extension(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS)) { + error_setg(errp, "KVM doesn't support KVM_CAP_X86_APIC_BUS_CYCLES_NS"); + return -EOPNOTSUPP; + } + + r = kvm_vm_enable_cap(kvm_state, KVM_CAP_X86_APIC_BUS_CYCLES_NS, + 0, TDX_APIC_BUS_CYCLES_NS); + if (r < 0) { + error_setg_errno(errp, -r, + "Unable to set core crystal clock frequency to 25MHz"); + return r; + } + + if (env->tsc_khz && (env->tsc_khz < TDX_MIN_TSC_FREQUENCY_KHZ || + env->tsc_khz > TDX_MAX_TSC_FREQUENCY_KHZ)) { + error_setg(errp, "Invalid TSC %"PRId64" KHz, must specify cpu_frequency " + "between [%d, %d] kHz", env->tsc_khz, + TDX_MIN_TSC_FREQUENCY_KHZ, TDX_MAX_TSC_FREQUENCY_KHZ); + return -EINVAL; + } + + if (env->tsc_khz % (25 * 1000)) { + error_setg(errp, "Invalid TSC %"PRId64" KHz, it must be multiple of 25MHz", + env->tsc_khz); + return -EINVAL; + } + + /* it's safe even env->tsc_khz is 0. KVM uses host's tsc_khz in this case */ + r = kvm_vm_ioctl(kvm_state, KVM_SET_TSC_KHZ, env->tsc_khz); + if (r < 0) { + error_setg_errno(errp, -r, "Unable to set TSC frequency to %"PRId64" kHz", + env->tsc_khz); + return r; + } + + if (tdx_guest->mrconfigid) { + g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrconfigid, + strlen(tdx_guest->mrconfigid), &data_len, errp); + if (!data) { + return -1; + } + if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { + error_setg(errp, "TDX 'mrconfigid' sha384 digest was %ld bytes, " + "expected %d bytes", data_len, + QCRYPTO_HASH_DIGEST_LEN_SHA384); + return -1; + } + memcpy(init_vm->mrconfigid, data, data_len); + } + + if (tdx_guest->mrowner) { + g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrowner, + strlen(tdx_guest->mrowner), &data_len, errp); + if (!data) { + return -1; + } + if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { + error_setg(errp, "TDX 'mrowner' sha384 digest was %ld bytes, " + "expected %d bytes", data_len, + QCRYPTO_HASH_DIGEST_LEN_SHA384); + return -1; + } + memcpy(init_vm->mrowner, data, data_len); + } + + if (tdx_guest->mrownerconfig) { + g_autofree uint8_t *data = qbase64_decode(tdx_guest->mrownerconfig, + strlen(tdx_guest->mrownerconfig), &data_len, errp); + if (!data) { + return -1; + } + if (data_len != QCRYPTO_HASH_DIGEST_LEN_SHA384) { + error_setg(errp, "TDX 'mrownerconfig' sha384 digest was %ld bytes, " + "expected %d bytes", data_len, + QCRYPTO_HASH_DIGEST_LEN_SHA384); + return -1; + } + memcpy(init_vm->mrownerconfig, data, data_len); + } + + r = setup_td_guest_attributes(x86cpu, errp); + if (r) { + return r; + } + + r = setup_td_xfam(x86cpu, errp); + if (r) { + return r; + } + + init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0); + tdx_filter_cpuid(&init_vm->cpuid); + + init_vm->attributes = tdx_guest->attributes; + init_vm->xfam = tdx_guest->xfam; + + /* + * KVM_TDX_INIT_VM gets -EAGAIN when KVM side SEAMCALL(TDH_MNG_CREATE) + * gets TDX_RND_NO_ENTROPY due to Random number generation (e.g., RDRAND or + * RDSEED) is busy. + * + * Retry for the case. + */ + do { + error_free(local_err); + local_err = NULL; + r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm, &local_err); + } while (r == -EAGAIN && --retry); + + if (r < 0) { + if (!retry) { + error_append_hint(&local_err, "Hardware RNG (Random Number " + "Generator) is busy occupied by someone (via RDRAND/RDSEED) " + "maliciously, which leads to KVM_TDX_INIT_VM keeping failure " + "due to lack of entropy.\n"); + } + error_propagate(errp, local_err); + return r; + } + + tdx_guest->initialized = true; + + return 0; +} + +int tdx_parse_tdvf(void *flash_ptr, int size) +{ + return tdvf_parse_metadata(&tdx_guest->tdvf, flash_ptr, size); +} + +static void tdx_get_quote_completion(TdxGenerateQuoteTask *task) +{ + TdxGuest *tdx = task->opaque; + int ret; + + /* Maintain the number of in-flight requests. */ + qemu_mutex_lock(&tdx->lock); + tdx->num--; + qemu_mutex_unlock(&tdx->lock); + + if (task->status_code == TDX_VP_GET_QUOTE_SUCCESS) { + ret = address_space_write(&address_space_memory, task->payload_gpa, + MEMTXATTRS_UNSPECIFIED, task->receive_buf, + task->receive_buf_received); + if (ret != MEMTX_OK) { + error_report("TDX: get-quote: failed to write quote data."); + } else { + task->hdr.out_len = cpu_to_le64(task->receive_buf_received); + } + } + task->hdr.error_code = cpu_to_le64(task->status_code); + + /* Publish the response contents before marking this request completed. */ + smp_wmb(); + ret = address_space_write(&address_space_memory, task->buf_gpa, + MEMTXATTRS_UNSPECIFIED, &task->hdr, + TDX_GET_QUOTE_HDR_SIZE); + if (ret != MEMTX_OK) { + error_report("TDX: get-quote: failed to update GetQuote header."); + } + + g_free(task->send_data); + g_free(task->receive_buf); + g_free(task); + object_unref(tdx); +} + +void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run) +{ + TdxGenerateQuoteTask *task; + struct tdx_get_quote_header hdr; + hwaddr buf_gpa = run->tdx.get_quote.gpa; + uint64_t buf_len = run->tdx.get_quote.size; + + QEMU_BUILD_BUG_ON(sizeof(struct tdx_get_quote_header) != TDX_GET_QUOTE_HDR_SIZE); + + run->tdx.get_quote.ret = TDG_VP_VMCALL_INVALID_OPERAND; + + if (buf_len == 0) { + return; + } + + if (!QEMU_IS_ALIGNED(buf_gpa, 4096) || !QEMU_IS_ALIGNED(buf_len, 4096)) { + run->tdx.get_quote.ret = TDG_VP_VMCALL_ALIGN_ERROR; + return; + } + + if (address_space_read(&address_space_memory, buf_gpa, MEMTXATTRS_UNSPECIFIED, + &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { + error_report("TDX: get-quote: failed to read GetQuote header."); + return; + } + + if (le64_to_cpu(hdr.structure_version) != TDX_GET_QUOTE_STRUCTURE_VERSION) { + return; + } + + /* Only safe-guard check to avoid too large buffer size. */ + if (buf_len > TDX_GET_QUOTE_MAX_BUF_LEN || + le32_to_cpu(hdr.in_len) > buf_len - TDX_GET_QUOTE_HDR_SIZE) { + return; + } + + if (!tdx_guest->qg_sock_addr) { + hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_QGS_UNAVAILABLE); + if (address_space_write(&address_space_memory, buf_gpa, + MEMTXATTRS_UNSPECIFIED, + &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { + error_report("TDX: failed to update GetQuote header."); + return; + } + run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS; + return; + } + + qemu_mutex_lock(&tdx_guest->lock); + if (tdx_guest->num >= TDX_MAX_GET_QUOTE_REQUEST) { + qemu_mutex_unlock(&tdx_guest->lock); + run->tdx.get_quote.ret = TDG_VP_VMCALL_RETRY; + return; + } + tdx_guest->num++; + qemu_mutex_unlock(&tdx_guest->lock); + + task = g_new(TdxGenerateQuoteTask, 1); + task->buf_gpa = buf_gpa; + task->payload_gpa = buf_gpa + TDX_GET_QUOTE_HDR_SIZE; + task->payload_len = buf_len - TDX_GET_QUOTE_HDR_SIZE; + task->hdr = hdr; + task->completion = tdx_get_quote_completion; + + task->send_data_size = le32_to_cpu(hdr.in_len); + task->send_data = g_malloc(task->send_data_size); + task->send_data_sent = 0; + + if (address_space_read(&address_space_memory, task->payload_gpa, + MEMTXATTRS_UNSPECIFIED, task->send_data, + task->send_data_size) != MEMTX_OK) { + goto out_free; + } + + /* Mark the buffer in-flight. */ + hdr.error_code = cpu_to_le64(TDX_VP_GET_QUOTE_IN_FLIGHT); + if (address_space_write(&address_space_memory, buf_gpa, + MEMTXATTRS_UNSPECIFIED, + &hdr, TDX_GET_QUOTE_HDR_SIZE) != MEMTX_OK) { + goto out_free; + } + + task->receive_buf = g_malloc0(task->payload_len); + task->receive_buf_received = 0; + task->opaque = tdx_guest; + + object_ref(tdx_guest); + tdx_generate_quote(task, tdx_guest->qg_sock_addr); + run->tdx.get_quote.ret = TDG_VP_VMCALL_SUCCESS; + return; + +out_free: + g_free(task->send_data); + g_free(task); +} + +void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run) +{ + if (run->tdx.get_tdvmcall_info.leaf != 1) { + return; + } + + run->tdx.get_tdvmcall_info.r11 = TDG_VP_VMCALL_SUBFUNC_GET_QUOTE; + run->tdx.get_tdvmcall_info.r12 = 0; + run->tdx.get_tdvmcall_info.r13 = 0; + run->tdx.get_tdvmcall_info.r14 = 0; +} + +static void tdx_panicked_on_fatal_error(X86CPU *cpu, uint64_t error_code, + char *message, uint64_t gpa) +{ + GuestPanicInformation *panic_info; + + panic_info = g_new0(GuestPanicInformation, 1); + panic_info->type = GUEST_PANIC_INFORMATION_TYPE_TDX; + panic_info->u.tdx.error_code = (uint32_t) error_code; + panic_info->u.tdx.message = message; + panic_info->u.tdx.gpa = gpa; + + qemu_system_guest_panicked(panic_info); +} + +/* + * Only 8 registers can contain valid ASCII byte stream to form the fatal + * message, and their sequence is: R14, R15, RBX, RDI, RSI, R8, R9, RDX + */ +#define TDX_FATAL_MESSAGE_MAX 64 + +#define TDX_REPORT_FATAL_ERROR_GPA_VALID BIT_ULL(63) + +int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run) +{ + uint64_t error_code = run->system_event.data[R_R12]; + uint64_t reg_mask = run->system_event.data[R_ECX]; + char *message = NULL; + uint64_t *tmp; + uint64_t gpa = -1ull; + + if (error_code & 0xffff) { + error_report("TDX: REPORT_FATAL_ERROR: invalid error code: 0x%"PRIx64, + error_code); + return -1; + } + + if (reg_mask) { + message = g_malloc0(TDX_FATAL_MESSAGE_MAX + 1); + tmp = (uint64_t *)message; + +#define COPY_REG(REG) \ + do { \ + if (reg_mask & BIT_ULL(REG)) { \ + *(tmp++) = run->system_event.data[REG]; \ + } \ + } while (0) + + COPY_REG(R_R14); + COPY_REG(R_R15); + COPY_REG(R_EBX); + COPY_REG(R_EDI); + COPY_REG(R_ESI); + COPY_REG(R_R8); + COPY_REG(R_R9); + COPY_REG(R_EDX); + *((char *)tmp) = '\0'; + } +#undef COPY_REG + + if (error_code & TDX_REPORT_FATAL_ERROR_GPA_VALID) { + gpa = run->system_event.data[R_R13]; + } + + tdx_panicked_on_fatal_error(cpu, error_code, message, gpa); + + return -1; +} + +static bool tdx_guest_get_sept_ve_disable(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return !!(tdx->attributes & TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE); +} + +static void tdx_guest_set_sept_ve_disable(Object *obj, bool value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + if (value) { + tdx->attributes |= TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; + } else { + tdx->attributes &= ~TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; + } +} + +static char *tdx_guest_get_mrconfigid(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return g_strdup(tdx->mrconfigid); +} + +static void tdx_guest_set_mrconfigid(Object *obj, const char *value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + g_free(tdx->mrconfigid); + tdx->mrconfigid = g_strdup(value); +} + +static char *tdx_guest_get_mrowner(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return g_strdup(tdx->mrowner); +} + +static void tdx_guest_set_mrowner(Object *obj, const char *value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + g_free(tdx->mrowner); + tdx->mrowner = g_strdup(value); +} + +static char *tdx_guest_get_mrownerconfig(Object *obj, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + return g_strdup(tdx->mrownerconfig); +} + +static void tdx_guest_set_mrownerconfig(Object *obj, const char *value, Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + g_free(tdx->mrownerconfig); + tdx->mrownerconfig = g_strdup(value); +} + +static void tdx_guest_get_qgs(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + + if (!tdx->qg_sock_addr) { + error_setg(errp, "quote-generation-socket is not set"); + return; + } + visit_type_SocketAddress(v, name, &tdx->qg_sock_addr, errp); +} + +static void tdx_guest_set_qgs(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + TdxGuest *tdx = TDX_GUEST(obj); + SocketAddress *sock = NULL; + + if (!visit_type_SocketAddress(v, name, &sock, errp)) { + return; + } + + if (tdx->qg_sock_addr) { + qapi_free_SocketAddress(tdx->qg_sock_addr); + } + + tdx->qg_sock_addr = sock; +} + +/* tdx guest */ +OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest, + tdx_guest, + TDX_GUEST, + X86_CONFIDENTIAL_GUEST, + { TYPE_USER_CREATABLE }, + { NULL }) + +static void tdx_guest_init(Object *obj) +{ + ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj); + TdxGuest *tdx = TDX_GUEST(obj); + + qemu_mutex_init(&tdx->lock); + + cgs->require_guest_memfd = true; + tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE; + + object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes, + OBJ_PROP_FLAG_READWRITE); + object_property_add_bool(obj, "sept-ve-disable", + tdx_guest_get_sept_ve_disable, + tdx_guest_set_sept_ve_disable); + object_property_add_str(obj, "mrconfigid", + tdx_guest_get_mrconfigid, + tdx_guest_set_mrconfigid); + object_property_add_str(obj, "mrowner", + tdx_guest_get_mrowner, tdx_guest_set_mrowner); + object_property_add_str(obj, "mrownerconfig", + tdx_guest_get_mrownerconfig, + tdx_guest_set_mrownerconfig); + + object_property_add(obj, "quote-generation-socket", "SocketAddress", + tdx_guest_get_qgs, + tdx_guest_set_qgs, + NULL, NULL); + + qemu_mutex_init(&tdx->lock); +} + +static void tdx_guest_finalize(Object *obj) +{ +} + +static void tdx_guest_class_init(ObjectClass *oc, const void *data) +{ + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + + klass->kvm_init = tdx_kvm_init; + x86_klass->kvm_type = tdx_kvm_type; + x86_klass->cpu_instance_init = tdx_cpu_instance_init; + x86_klass->adjust_cpuid_features = tdx_adjust_cpuid_features; + x86_klass->check_features = tdx_check_features; +} diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h new file mode 100644 index 0000000..35a09c1 --- /dev/null +++ b/target/i386/kvm/tdx.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef QEMU_I386_TDX_H +#define QEMU_I386_TDX_H + +#ifndef CONFIG_USER_ONLY +#include CONFIG_DEVICES /* CONFIG_TDX */ +#endif + +#include "confidential-guest.h" +#include "cpu.h" +#include "hw/i386/tdvf.h" + +#include "tdx-quote-generator.h" + +#define TYPE_TDX_GUEST "tdx-guest" +#define TDX_GUEST(obj) OBJECT_CHECK(TdxGuest, (obj), TYPE_TDX_GUEST) + +typedef struct TdxGuestClass { + X86ConfidentialGuestClass parent_class; +} TdxGuestClass; + +/* TDX requires bus frequency 25MHz */ +#define TDX_APIC_BUS_CYCLES_NS 40 + +#define TDVMCALL_GET_TD_VM_CALL_INFO 0x10000 +#define TDVMCALL_GET_QUOTE 0x10002 + +#define TDG_VP_VMCALL_SUCCESS 0x0000000000000000ULL +#define TDG_VP_VMCALL_RETRY 0x0000000000000001ULL +#define TDG_VP_VMCALL_INVALID_OPERAND 0x8000000000000000ULL +#define TDG_VP_VMCALL_GPA_INUSE 0x8000000000000001ULL +#define TDG_VP_VMCALL_ALIGN_ERROR 0x8000000000000002ULL + +#define TDG_VP_VMCALL_SUBFUNC_GET_QUOTE 0x0000000000000001ULL + +enum TdxRamType { + TDX_RAM_UNACCEPTED, + TDX_RAM_ADDED, +}; + +typedef struct TdxRamEntry { + uint64_t address; + uint64_t length; + enum TdxRamType type; +} TdxRamEntry; + +typedef struct TdxGuest { + X86ConfidentialGuest parent_obj; + + QemuMutex lock; + + bool initialized; + uint64_t attributes; /* TD attributes */ + uint64_t xfam; + char *mrconfigid; /* base64 encoded sha384 digest */ + char *mrowner; /* base64 encoded sha384 digest */ + char *mrownerconfig; /* base64 encoded sha384 digest */ + + MemoryRegion *tdvf_mr; + TdxFirmware tdvf; + + uint32_t nr_ram_entries; + TdxRamEntry *ram_entries; + + /* GetQuote */ + SocketAddress *qg_sock_addr; + int num; +} TdxGuest; + +#ifdef CONFIG_TDX +bool is_tdx_vm(void); +#else +#define is_tdx_vm() 0 +#endif /* CONFIG_TDX */ + +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp); +void tdx_set_tdvf_region(MemoryRegion *tdvf_mr); +int tdx_parse_tdvf(void *flash_ptr, int size); +int tdx_handle_report_fatal_error(X86CPU *cpu, struct kvm_run *run); +void tdx_handle_get_quote(X86CPU *cpu, struct kvm_run *run); +void tdx_handle_get_tdvmcall_info(X86CPU *cpu, struct kvm_run *run); + +#endif /* QEMU_I386_TDX_H */ diff --git a/target/i386/machine.c b/target/i386/machine.c index 6cb561c..dd2dac1 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -1060,9 +1060,8 @@ static bool tsc_khz_needed(void *opaque) { X86CPU *cpu = opaque; CPUX86State *env = &cpu->env; - MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); - X86MachineClass *x86mc = X86_MACHINE_CLASS(mc); - return env->tsc_khz && x86mc->save_tsc_khz; + + return env->tsc_khz; } static const VMStateDescription vmstate_tsc_khz = { diff --git a/target/i386/monitor.c b/target/i386/monitor.c index 3ea92b0..3c9b6ca 100644 --- a/target/i386/monitor.c +++ b/target/i386/monitor.c @@ -29,7 +29,6 @@ #include "monitor/hmp.h" #include "qobject/qdict.h" #include "qapi/error.h" -#include "qapi/qapi-commands-misc-target.h" #include "qapi/qapi-commands-misc.h" /* Perform linear address sign extension */ diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index f0aa189..a2e4d48 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -842,7 +842,7 @@ int64_t helper_cvttsd2sq(CPUX86State *env, ZMMReg *s) void glue(helper_rsqrtps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); int i; for (i = 0; i < 2 << SHIFT; i++) { d->ZMM_S(i) = float32_div(float32_one, @@ -855,7 +855,7 @@ void glue(helper_rsqrtps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) #if SHIFT == 1 void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); int i; d->ZMM_S(0) = float32_div(float32_one, float32_sqrt(s->ZMM_S(0), &env->sse_status), @@ -869,7 +869,7 @@ void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s) void glue(helper_rcpps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); int i; for (i = 0; i < 2 << SHIFT; i++) { d->ZMM_S(i) = float32_div(float32_one, s->ZMM_S(i), &env->sse_status); @@ -880,7 +880,7 @@ void glue(helper_rcpps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) #if SHIFT == 1 void helper_rcpss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); int i; d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status); for (i = 1; i < 2 << SHIFT; i++) { @@ -1714,7 +1714,7 @@ void glue(helper_phminposuw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mode) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; int i; @@ -1738,7 +1738,7 @@ void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mode) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; int i; @@ -1763,7 +1763,7 @@ void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, uint32_t mode) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; int i; @@ -1788,7 +1788,7 @@ void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, uint32_t mode) { - uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; int i; diff --git a/target/i386/sev-system-stub.c b/target/i386/sev-system-stub.c index d5bf886..7c5c02a 100644 --- a/target/i386/sev-system-stub.c +++ b/target/i386/sev-system-stub.c @@ -14,34 +14,9 @@ #include "qemu/osdep.h" #include "monitor/monitor.h" #include "monitor/hmp-target.h" -#include "qapi/qapi-commands-misc-target.h" #include "qapi/error.h" #include "sev.h" -SevInfo *qmp_query_sev(Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); - return NULL; -} - -SevLaunchMeasureInfo *qmp_query_sev_launch_measure(Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); - return NULL; -} - -SevCapability *qmp_query_sev_capabilities(Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); - return NULL; -} - -void qmp_sev_inject_launch_secret(const char *packet_header, const char *secret, - bool has_gpa, uint64_t gpa, Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); -} - int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) { g_assert_not_reached(); @@ -56,13 +31,6 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) g_assert_not_reached(); } -SevAttestationReport *qmp_query_sev_attestation_report(const char *mnonce, - Error **errp) -{ - error_setg(errp, "SEV is not available in this QEMU"); - return NULL; -} - void hmp_info_sev(Monitor *mon, const QDict *qdict) { monitor_printf(mon, "SEV is not available in this QEMU\n"); diff --git a/target/i386/sev.c b/target/i386/sev.c index 7ee700d..1a12f06 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -37,7 +37,7 @@ #include "qom/object.h" #include "monitor/monitor.h" #include "monitor/hmp-target.h" -#include "qapi/qapi-commands-misc-target.h" +#include "qapi/qapi-commands-misc-i386.h" #include "confidential-guest.h" #include "hw/i386/pc.h" #include "system/address-spaces.h" @@ -212,14 +212,6 @@ static const char *const sev_fw_errlist[] = { #define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist) -/* <linux/kvm.h> doesn't expose this, so re-use the max from kvm.c */ -#define KVM_MAX_CPUID_ENTRIES 100 - -typedef struct KvmCpuidInfo { - struct kvm_cpuid2 cpuid; - struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; -} KvmCpuidInfo; - #define SNP_CPUID_FUNCTION_MAXCOUNT 64 #define SNP_CPUID_FUNCTION_UNKNOWN 0xFFFFFFFF @@ -947,7 +939,7 @@ out: } static uint32_t -sev_snp_mask_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, +sev_snp_adjust_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, int reg, uint32_t value) { switch (feature) { @@ -2405,7 +2397,7 @@ sev_snp_guest_class_init(ObjectClass *oc, const void *data) klass->launch_finish = sev_snp_launch_finish; klass->launch_update_data = sev_snp_launch_update_data; klass->kvm_init = sev_snp_kvm_init; - x86_klass->mask_cpuid_features = sev_snp_mask_cpuid_features; + x86_klass->adjust_cpuid_features = sev_snp_adjust_cpuid_features; x86_klass->kvm_type = sev_snp_kvm_type; object_class_property_add(oc, "policy", "uint64", diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 1cbadb1..b3b2382 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -189,25 +189,25 @@ void cpu_init_fp_statuses(CPUX86State *env) set_float_default_nan_pattern(0b11000000, &env->mmx_status); set_float_default_nan_pattern(0b11000000, &env->sse_status); /* - * TODO: x86 does flush-to-zero detection after rounding (the SDM + * x86 does flush-to-zero detection after rounding (the SDM * section 10.2.3.3 on the FTZ bit of MXCSR says that we flush * when we detect underflow, which x86 does after rounding). */ - set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status); - set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status); - set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status); + set_float_ftz_detection(float_ftz_after_rounding, &env->fp_status); + set_float_ftz_detection(float_ftz_after_rounding, &env->mmx_status); + set_float_ftz_detection(float_ftz_after_rounding, &env->sse_status); } -static inline uint8_t save_exception_flags(CPUX86State *env) +static inline int save_exception_flags(CPUX86State *env) { - uint8_t old_flags = get_float_exception_flags(&env->fp_status); + int old_flags = get_float_exception_flags(&env->fp_status); set_float_exception_flags(0, &env->fp_status); return old_flags; } -static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) +static void merge_exception_flags(CPUX86State *env, int old_flags) { - uint8_t new_flags = get_float_exception_flags(&env->fp_status); + int new_flags = get_float_exception_flags(&env->fp_status); float_raise(old_flags, &env->fp_status); fpu_set_exception(env, ((new_flags & float_flag_invalid ? FPUS_IE : 0) | @@ -215,12 +215,12 @@ static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) (new_flags & float_flag_overflow ? FPUS_OE : 0) | (new_flags & float_flag_underflow ? FPUS_UE : 0) | (new_flags & float_flag_inexact ? FPUS_PE : 0) | - (new_flags & float_flag_input_denormal_flushed ? FPUS_DE : 0))); + (new_flags & float_flag_input_denormal_used ? FPUS_DE : 0))); } static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); floatx80 ret = floatx80_div(a, b, &env->fp_status); merge_exception_flags(env, old_flags); return ret; @@ -240,7 +240,7 @@ static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) void helper_flds_FT0(CPUX86State *env, uint32_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float32 f; uint32_t i; @@ -253,7 +253,7 @@ void helper_flds_FT0(CPUX86State *env, uint32_t val) void helper_fldl_FT0(CPUX86State *env, uint64_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float64 f; uint64_t i; @@ -271,7 +271,7 @@ void helper_fildl_FT0(CPUX86State *env, int32_t val) void helper_flds_ST0(CPUX86State *env, uint32_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int new_fpstt; union { float32 f; @@ -288,7 +288,7 @@ void helper_flds_ST0(CPUX86State *env, uint32_t val) void helper_fldl_ST0(CPUX86State *env, uint64_t val) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int new_fpstt; union { float64 f; @@ -338,7 +338,7 @@ void helper_fildll_ST0(CPUX86State *env, int64_t val) uint32_t helper_fsts_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float32 f; uint32_t i; @@ -351,7 +351,7 @@ uint32_t helper_fsts_ST0(CPUX86State *env) uint64_t helper_fstl_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); union { float64 f; uint64_t i; @@ -364,7 +364,7 @@ uint64_t helper_fstl_ST0(CPUX86State *env) int32_t helper_fist_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32(ST0, &env->fp_status); @@ -378,7 +378,7 @@ int32_t helper_fist_ST0(CPUX86State *env) int32_t helper_fistl_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32(ST0, &env->fp_status); @@ -391,7 +391,7 @@ int32_t helper_fistl_ST0(CPUX86State *env) int64_t helper_fistll_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int64_t val; val = floatx80_to_int64(ST0, &env->fp_status); @@ -404,7 +404,7 @@ int64_t helper_fistll_ST0(CPUX86State *env) int32_t helper_fistt_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); @@ -418,7 +418,7 @@ int32_t helper_fistt_ST0(CPUX86State *env) int32_t helper_fisttl_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int32_t val; val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); @@ -431,7 +431,7 @@ int32_t helper_fisttl_ST0(CPUX86State *env) int64_t helper_fisttll_ST0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int64_t val; val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); @@ -527,7 +527,7 @@ static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; void helper_fcom_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); FloatRelation ret; ret = floatx80_compare(ST0, FT0, &env->fp_status); @@ -537,7 +537,7 @@ void helper_fcom_ST0_FT0(CPUX86State *env) void helper_fucom_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); FloatRelation ret; ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); @@ -549,7 +549,7 @@ static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; void helper_fcomi_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int eflags; FloatRelation ret; @@ -562,7 +562,7 @@ void helper_fcomi_ST0_FT0(CPUX86State *env) void helper_fucomi_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int eflags; FloatRelation ret; @@ -575,28 +575,28 @@ void helper_fucomi_ST0_FT0(CPUX86State *env) void helper_fadd_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_add(ST0, FT0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fmul_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_mul(ST0, FT0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsub_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_sub(ST0, FT0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsubr_ST0_FT0(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_sub(FT0, ST0, &env->fp_status); merge_exception_flags(env, old_flags); } @@ -615,28 +615,28 @@ void helper_fdivr_ST0_FT0(CPUX86State *env) void helper_fadd_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fmul_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsub_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); merge_exception_flags(env, old_flags); } @@ -861,7 +861,7 @@ void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); int v; target_ulong mem_ref, mem_end; int64_t val; @@ -1136,7 +1136,7 @@ static const struct f2xm1_data f2xm1_table[65] = { void helper_f2xm1(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t sig = extractFloatx80Frac(ST0); int32_t exp = extractFloatx80Exp(ST0); bool sign = extractFloatx80Sign(ST0); @@ -1369,7 +1369,7 @@ static const struct fpatan_data fpatan_table[9] = { void helper_fpatan(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t arg0_sig = extractFloatx80Frac(ST0); int32_t arg0_exp = extractFloatx80Exp(ST0); bool arg0_sign = extractFloatx80Sign(ST0); @@ -1808,7 +1808,7 @@ void helper_fpatan(CPUX86State *env) void helper_fxtract(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); CPU_LDoubleU temp; temp.d = ST0; @@ -1857,7 +1857,7 @@ void helper_fxtract(CPUX86State *env) static void helper_fprem_common(CPUX86State *env, bool mod) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t quotient; CPU_LDoubleU temp0, temp1; int exp0, exp1, expdiff; @@ -2053,7 +2053,7 @@ static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, void helper_fyl2xp1(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t arg0_sig = extractFloatx80Frac(ST0); int32_t arg0_exp = extractFloatx80Exp(ST0); bool arg0_sign = extractFloatx80Sign(ST0); @@ -2151,7 +2151,7 @@ void helper_fyl2xp1(CPUX86State *env) void helper_fyl2x(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); uint64_t arg0_sig = extractFloatx80Frac(ST0); int32_t arg0_exp = extractFloatx80Exp(ST0); bool arg0_sign = extractFloatx80Sign(ST0); @@ -2298,7 +2298,7 @@ void helper_fyl2x(CPUX86State *env) void helper_fsqrt(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); if (floatx80_is_neg(ST0)) { env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ env->fpus |= 0x400; @@ -2324,14 +2324,14 @@ void helper_fsincos(CPUX86State *env) void helper_frndint(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); ST0 = floatx80_round_to_int(ST0, &env->fp_status); merge_exception_flags(env, old_flags); } void helper_fscale(CPUX86State *env) { - uint8_t old_flags = save_exception_flags(env); + int old_flags = save_exception_flags(env); if (floatx80_invalid_encoding(ST1, &env->fp_status) || floatx80_invalid_encoding(ST0, &env->fp_status)) { float_raise(float_flag_invalid, &env->fp_status); @@ -2369,7 +2369,7 @@ void helper_fscale(CPUX86State *env) } else { int n; FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision; - uint8_t save_flags = get_float_exception_flags(&env->fp_status); + int save_flags = get_float_exception_flags(&env->fp_status); set_float_exception_flags(0, &env->fp_status); n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); set_float_exception_flags(save_flags, &env->fp_status); @@ -3254,6 +3254,7 @@ void update_mxcsr_status(CPUX86State *env) /* Set exception flags. */ set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | + (mxcsr & FPUS_DE ? float_flag_input_denormal_used : 0) | (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | (mxcsr & FPUS_OE ? float_flag_overflow : 0) | (mxcsr & FPUS_UE ? float_flag_underflow : 0) | @@ -3269,15 +3270,9 @@ void update_mxcsr_status(CPUX86State *env) void update_mxcsr_from_sse_status(CPUX86State *env) { - uint8_t flags = get_float_exception_flags(&env->sse_status); - /* - * The MXCSR denormal flag has opposite semantics to - * float_flag_input_denormal_flushed (the softfloat code sets that flag - * only when flushing input denormals to zero, but SSE sets it - * only when not flushing them to zero), so is not converted - * here. - */ + int flags = get_float_exception_flags(&env->sse_status); env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | + (flags & float_flag_input_denormal_used ? FPUS_DE : 0) | (flags & float_flag_divbyzero ? FPUS_ZE : 0) | (flags & float_flag_overflow ? FPUS_OE : 0) | (flags & float_flag_underflow ? FPUS_UE : 0) | diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h index 6b3f198..be011b0 100644 --- a/target/i386/tcg/helper-tcg.h +++ b/target/i386/tcg/helper-tcg.h @@ -97,7 +97,7 @@ static inline unsigned int compute_pf(uint8_t x) /* misc_helper.c */ void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask); -/* sysemu/svm_helper.c */ +/* system/svm_helper.c */ #ifndef CONFIG_USER_ONLY G_NORETURN void cpu_vmexit(CPUX86State *nenv, uint32_t exit_code, uint64_t exit_info_1, uintptr_t retaddr); @@ -115,7 +115,7 @@ int exception_has_error_code(int intno); /* smm_helper.c */ void do_smm_enter(X86CPU *cpu); -/* sysemu/bpt_helper.c */ +/* system/bpt_helper.c */ bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update); /* diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c index 179dfdf..6f5dc06 100644 --- a/target/i386/tcg/tcg-cpu.c +++ b/target/i386/tcg/tcg-cpu.c @@ -149,6 +149,12 @@ static void x86_cpu_exec_reset(CPUState *cs) do_cpu_init(env_archcpu(env)); cs->exception_index = EXCP_HALTED; } + +static vaddr x86_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + return cpu_env(cs)->hflags & HF_CS64_MASK ? result : (uint32_t)result; +} #endif const TCGCPUOps x86_tcg_ops = { @@ -172,6 +178,7 @@ const TCGCPUOps x86_tcg_ops = { .record_sigbus = x86_cpu_record_sigbus, #else .tlb_fill = x86_cpu_tlb_fill, + .pointer_wrap = x86_pointer_wrap, .do_interrupt = x86_cpu_do_interrupt, .cpu_exec_halt = x86_cpu_exec_halt, .cpu_exec_interrupt = x86_cpu_exec_interrupt, diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 0fcddc2..0cb87d0 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2033,8 +2033,11 @@ static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src, bool inhibit tcg_gen_trunc_tl_i32(sel, src); gen_helper_load_seg(tcg_env, tcg_constant_i32(seg_reg), sel); - /* For move to DS/ES/SS, the addseg or ss32 flags may change. */ - if (CODE32(s) && seg_reg < R_FS) { + /* + * For moves to SS, the SS32 flag may change. For CODE32 only, changes + * to SS, DS and ES may change the ADDSEG flags. + */ + if (seg_reg == R_SS || (CODE32(s) && seg_reg < R_FS)) { s->base.is_jmp = DISAS_EOB_NEXT; } } else { diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c index f7535d1..abad84c 100644 --- a/target/loongarch/cpu.c +++ b/target/loongarch/cpu.c @@ -334,6 +334,12 @@ static bool loongarch_cpu_exec_interrupt(CPUState *cs, int interrupt_request) } return false; } + +static vaddr loongarch_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + return is_va32(cpu_env(cs)) ? (uint32_t)result : result; +} #endif static TCGTBCPUState loongarch_get_tb_cpu_state(CPUState *cs) @@ -889,6 +895,7 @@ static const TCGCPUOps loongarch_tcg_ops = { #ifndef CONFIG_USER_ONLY .tlb_fill = loongarch_cpu_tlb_fill, + .pointer_wrap = loongarch_pointer_wrap, .cpu_exec_interrupt = loongarch_cpu_exec_interrupt, .cpu_exec_halt = loongarch_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c index 1bda570..c66bdd5 100644 --- a/target/loongarch/kvm/kvm.c +++ b/target/loongarch/kvm/kvm.c @@ -1071,7 +1071,11 @@ static int kvm_cpu_check_pv_features(CPUState *cs, Error **errp) env->pv_features |= BIT(KVM_FEATURE_VIRT_EXTIOI); } } + return 0; +} +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ return 0; } diff --git a/target/loongarch/loongarch-qmp-cmds.c b/target/loongarch/loongarch-qmp-cmds.c index 6f732d8..f5f1cd0 100644 --- a/target/loongarch/loongarch-qmp-cmds.c +++ b/target/loongarch/loongarch-qmp-cmds.c @@ -8,7 +8,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qapi/qapi-commands-machine.h" #include "cpu.h" #include "qobject/qdict.h" #include "qapi/qobject-input-visitor.h" diff --git a/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc b/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc index 3babf69..6a2c030 100644 --- a/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc @@ -4,10 +4,15 @@ */ /* bit0(signaling/quiet) bit1(lt) bit2(eq) bit3(un) bit4(neq) */ -static uint32_t get_fcmp_flags(int cond) +static uint32_t get_fcmp_flags(DisasContext *ctx, int cond) { uint32_t flags = 0; + /*check cond , cond =[0-8,10,12] */ + if ((cond > 8) &&(cond != 10) && (cond != 12)) { + return -1; + } + if (cond & 0x1) { flags |= FCMP_LT; } @@ -26,9 +31,14 @@ static uint32_t get_fcmp_flags(int cond) static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a) { TCGv var, src1, src2; - uint32_t flags; + uint32_t flags = get_fcmp_flags(ctx, a->fcond >>1); void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32); + if (flags == -1) { + generate_exception(ctx, EXCCODE_INE); + return true; + } + if (!avail_FP_SP(ctx)) { return false; } @@ -39,8 +49,6 @@ static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a) src1 = get_fpr(ctx, a->fj); src2 = get_fpr(ctx, a->fk); fn = (a->fcond & 1 ? gen_helper_fcmp_s_s : gen_helper_fcmp_c_s); - flags = get_fcmp_flags(a->fcond >> 1); - fn(var, tcg_env, src1, src2, tcg_constant_i32(flags)); tcg_gen_st8_tl(var, tcg_env, offsetof(CPULoongArchState, cf[a->cd])); @@ -50,9 +58,14 @@ static bool trans_fcmp_cond_s(DisasContext *ctx, arg_fcmp_cond_s *a) static bool trans_fcmp_cond_d(DisasContext *ctx, arg_fcmp_cond_d *a) { TCGv var, src1, src2; - uint32_t flags; + uint32_t flags = get_fcmp_flags(ctx, a->fcond >> 1); void (*fn)(TCGv, TCGv_env, TCGv, TCGv, TCGv_i32); + if (flags == -1) { + generate_exception(ctx, EXCCODE_INE); + return true; + } + if (!avail_FP_DP(ctx)) { return false; } @@ -63,8 +76,6 @@ static bool trans_fcmp_cond_d(DisasContext *ctx, arg_fcmp_cond_d *a) src1 = get_fpr(ctx, a->fj); src2 = get_fpr(ctx, a->fk); fn = (a->fcond & 1 ? gen_helper_fcmp_s_d : gen_helper_fcmp_c_d); - flags = get_fcmp_flags(a->fcond >> 1); - fn(var, tcg_env, src1, src2, tcg_constant_i32(flags)); tcg_gen_st8_tl(var, tcg_env, offsetof(CPULoongArchState, cf[a->cd])); diff --git a/target/loongarch/tcg/insn_trans/trans_vec.c.inc b/target/loongarch/tcg/insn_trans/trans_vec.c.inc index dff9277..d6f0560 100644 --- a/target/loongarch/tcg/insn_trans/trans_vec.c.inc +++ b/target/loongarch/tcg/insn_trans/trans_vec.c.inc @@ -4655,19 +4655,23 @@ TRANS(xvslti_du, LASX, do_xcmpi, MO_64, TCG_COND_LTU) static bool do_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz) { - uint32_t flags; + uint32_t flags = get_fcmp_flags(ctx, a->fcond >> 1); void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); TCGv_i32 vd = tcg_constant_i32(a->vd); TCGv_i32 vj = tcg_constant_i32(a->vj); TCGv_i32 vk = tcg_constant_i32(a->vk); TCGv_i32 oprsz = tcg_constant_i32(sz); + if(flags == -1){ + generate_exception(ctx, EXCCODE_INE); + return true; + } + if (!check_vec(ctx, sz)) { return true; } fn = (a->fcond & 1 ? gen_helper_vfcmp_s_s : gen_helper_vfcmp_c_s); - flags = get_fcmp_flags(a->fcond >> 1); fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags)); return true; @@ -4675,19 +4679,23 @@ static bool do_vfcmp_cond_s(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz) static bool do_vfcmp_cond_d(DisasContext *ctx, arg_vvv_fcond *a, uint32_t sz) { - uint32_t flags; + uint32_t flags = get_fcmp_flags(ctx, a->fcond >> 1); void (*fn)(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32); TCGv_i32 vd = tcg_constant_i32(a->vd); TCGv_i32 vj = tcg_constant_i32(a->vj); TCGv_i32 vk = tcg_constant_i32(a->vk); TCGv_i32 oprsz = tcg_constant_i32(sz); + if (flags == -1) { + generate_exception(ctx, EXCCODE_INE); + return true; + } + if (!check_vec(ctx, sz)) { return true; } fn = (a->fcond & 1 ? gen_helper_vfcmp_s_d : gen_helper_vfcmp_c_d); - flags = get_fcmp_flags(a->fcond >> 1); fn(tcg_env, oprsz, vd, vj, vk, tcg_constant_i32(flags)); return true; diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c index c5196a6..6a09db3 100644 --- a/target/m68k/cpu.c +++ b/target/m68k/cpu.c @@ -619,6 +619,7 @@ static const TCGCPUOps m68k_tcg_ops = { #ifndef CONFIG_USER_ONLY .tlb_fill = m68k_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = m68k_cpu_exec_interrupt, .cpu_exec_halt = m68k_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c index 615a959..ee0a869 100644 --- a/target/microblaze/cpu.c +++ b/target/microblaze/cpu.c @@ -447,6 +447,7 @@ static const TCGCPUOps mb_tcg_ops = { #ifndef CONFIG_USER_ONLY .tlb_fill = mb_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = mb_cpu_exec_interrupt, .cpu_exec_halt = mb_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h index 6ad8643..3ce28b3 100644 --- a/target/microblaze/cpu.h +++ b/target/microblaze/cpu.h @@ -248,7 +248,7 @@ struct CPUArchState { uint32_t pc; uint32_t msr; /* All bits of MSR except MSR[C] and MSR[CC] */ uint32_t msr_c; /* MSR[C], in low bit; other bits must be 0 */ - target_ulong ear; + uint64_t ear; uint32_t esr; uint32_t fsr; uint32_t btr; diff --git a/target/microblaze/helper.c b/target/microblaze/helper.c index 9203192..ef0e2f9 100644 --- a/target/microblaze/helper.c +++ b/target/microblaze/helper.c @@ -26,8 +26,51 @@ #include "exec/target_page.h" #include "qemu/host-utils.h" #include "exec/log.h" +#include "exec/helper-proto.h" + + +G_NORETURN +static void mb_unaligned_access_internal(CPUState *cs, uint64_t addr, + uintptr_t retaddr) +{ + CPUMBState *env = cpu_env(cs); + uint32_t esr, iflags; + + /* Recover the pc and iflags from the corresponding insn_start. */ + cpu_restore_state(cs, retaddr); + iflags = env->iflags; + + qemu_log_mask(CPU_LOG_INT, + "Unaligned access addr=0x%" PRIx64 " pc=%x iflags=%x\n", + addr, env->pc, iflags); + + esr = ESR_EC_UNALIGNED_DATA; + if (likely(iflags & ESR_ESS_FLAG)) { + esr |= iflags & ESR_ESS_MASK; + } else { + qemu_log_mask(LOG_UNIMP, "Unaligned access without ESR_ESS_FLAG\n"); + } + + env->ear = addr; + env->esr = esr; + cs->exception_index = EXCP_HW_EXCP; + cpu_loop_exit(cs); +} + +void mb_cpu_do_unaligned_access(CPUState *cs, vaddr addr, + MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr) +{ + mb_unaligned_access_internal(cs, addr, retaddr); +} #ifndef CONFIG_USER_ONLY + +void HELPER(unaligned_access)(CPUMBState *env, uint64_t addr) +{ + mb_unaligned_access_internal(env_cpu(env), addr, GETPC()); +} + static bool mb_cpu_access_is_secure(MicroBlazeCPU *cpu, MMUAccessType access_type) { @@ -269,31 +312,3 @@ bool mb_cpu_exec_interrupt(CPUState *cs, int interrupt_request) } #endif /* !CONFIG_USER_ONLY */ - -void mb_cpu_do_unaligned_access(CPUState *cs, vaddr addr, - MMUAccessType access_type, - int mmu_idx, uintptr_t retaddr) -{ - MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs); - uint32_t esr, iflags; - - /* Recover the pc and iflags from the corresponding insn_start. */ - cpu_restore_state(cs, retaddr); - iflags = cpu->env.iflags; - - qemu_log_mask(CPU_LOG_INT, - "Unaligned access addr=" TARGET_FMT_lx " pc=%x iflags=%x\n", - (target_ulong)addr, cpu->env.pc, iflags); - - esr = ESR_EC_UNALIGNED_DATA; - if (likely(iflags & ESR_ESS_FLAG)) { - esr |= iflags & ESR_ESS_MASK; - } else { - qemu_log_mask(LOG_UNIMP, "Unaligned access without ESR_ESS_FLAG\n"); - } - - cpu->env.ear = addr; - cpu->env.esr = esr; - cs->exception_index = EXCP_HW_EXCP; - cpu_loop_exit(cs); -} diff --git a/target/microblaze/helper.h b/target/microblaze/helper.h index f740835..ef4fad9 100644 --- a/target/microblaze/helper.h +++ b/target/microblaze/helper.h @@ -20,12 +20,22 @@ DEF_HELPER_FLAGS_3(fcmp_ne, TCG_CALL_NO_WG, i32, env, i32, i32) DEF_HELPER_FLAGS_3(fcmp_ge, TCG_CALL_NO_WG, i32, env, i32, i32) DEF_HELPER_FLAGS_2(pcmpbf, TCG_CALL_NO_RWG_SE, i32, i32, i32) -#if !defined(CONFIG_USER_ONLY) -DEF_HELPER_FLAGS_3(mmu_read, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_4(mmu_write, TCG_CALL_NO_RWG, void, env, i32, i32, i32) -#endif - DEF_HELPER_FLAGS_2(stackprot, TCG_CALL_NO_WG, void, env, tl) - DEF_HELPER_FLAGS_2(get, TCG_CALL_NO_RWG, i32, i32, i32) DEF_HELPER_FLAGS_3(put, TCG_CALL_NO_RWG, void, i32, i32, i32) + +#ifndef CONFIG_USER_ONLY +DEF_HELPER_FLAGS_3(mmu_read, TCG_CALL_NO_RWG, i32, env, i32, i32) +DEF_HELPER_FLAGS_4(mmu_write, TCG_CALL_NO_RWG, void, env, i32, i32, i32) +DEF_HELPER_FLAGS_2(unaligned_access, TCG_CALL_NO_WG, noreturn, env, i64) +DEF_HELPER_FLAGS_2(lbuea, TCG_CALL_NO_WG, i32, env, i64) +DEF_HELPER_FLAGS_2(lhuea_be, TCG_CALL_NO_WG, i32, env, i64) +DEF_HELPER_FLAGS_2(lhuea_le, TCG_CALL_NO_WG, i32, env, i64) +DEF_HELPER_FLAGS_2(lwea_be, TCG_CALL_NO_WG, i32, env, i64) +DEF_HELPER_FLAGS_2(lwea_le, TCG_CALL_NO_WG, i32, env, i64) +DEF_HELPER_FLAGS_3(sbea, TCG_CALL_NO_WG, void, env, i32, i64) +DEF_HELPER_FLAGS_3(shea_be, TCG_CALL_NO_WG, void, env, i32, i64) +DEF_HELPER_FLAGS_3(shea_le, TCG_CALL_NO_WG, void, env, i32, i64) +DEF_HELPER_FLAGS_3(swea_be, TCG_CALL_NO_WG, void, env, i32, i64) +DEF_HELPER_FLAGS_3(swea_le, TCG_CALL_NO_WG, void, env, i32, i64) +#endif diff --git a/target/microblaze/mmu.c b/target/microblaze/mmu.c index 95a12e1..8703ff5 100644 --- a/target/microblaze/mmu.c +++ b/target/microblaze/mmu.c @@ -172,7 +172,8 @@ unsigned int mmu_translate(MicroBlazeCPU *cpu, MicroBlazeMMULookup *lu, } done: qemu_log_mask(CPU_LOG_MMU, - "MMU vaddr=%" PRIx64 " rw=%d tlb_wr=%d tlb_ex=%d hit=%d\n", + "MMU vaddr=0x" TARGET_FMT_lx + " rw=%d tlb_wr=%d tlb_ex=%d hit=%d\n", vaddr, rw, tlb_wr, tlb_ex, hit); return hit; } diff --git a/target/microblaze/op_helper.c b/target/microblaze/op_helper.c index 9e838df..b8365b3 100644 --- a/target/microblaze/op_helper.c +++ b/target/microblaze/op_helper.c @@ -382,6 +382,8 @@ void helper_stackprot(CPUMBState *env, target_ulong addr) } #if !defined(CONFIG_USER_ONLY) +#include "system/memory.h" + /* Writes/reads to the MMU's special regs end up here. */ uint32_t helper_mmu_read(CPUMBState *env, uint32_t ext, uint32_t rn) { @@ -393,38 +395,90 @@ void helper_mmu_write(CPUMBState *env, uint32_t ext, uint32_t rn, uint32_t v) mmu_write(env, ext, rn, v); } +static void mb_transaction_failed_internal(CPUState *cs, hwaddr physaddr, + uint64_t addr, unsigned size, + MMUAccessType access_type, + uintptr_t retaddr) +{ + CPUMBState *env = cpu_env(cs); + MicroBlazeCPU *cpu = env_archcpu(env); + const char *access_name = "INVALID"; + bool take = env->msr & MSR_EE; + uint32_t esr = ESR_EC_DATA_BUS; + + switch (access_type) { + case MMU_INST_FETCH: + access_name = "INST_FETCH"; + esr = ESR_EC_INSN_BUS; + take &= cpu->cfg.iopb_bus_exception; + break; + case MMU_DATA_LOAD: + access_name = "DATA_LOAD"; + take &= cpu->cfg.dopb_bus_exception; + break; + case MMU_DATA_STORE: + access_name = "DATA_STORE"; + take &= cpu->cfg.dopb_bus_exception; + break; + } + + qemu_log_mask(CPU_LOG_INT, "Transaction failed: addr 0x%" PRIx64 + "physaddr 0x" HWADDR_FMT_plx " size %d access-type %s (%s)\n", + addr, physaddr, size, access_name, + take ? "TAKEN" : "DROPPED"); + + if (take) { + env->esr = esr; + env->ear = addr; + cs->exception_index = EXCP_HW_EXCP; + cpu_loop_exit_restore(cs, retaddr); + } +} + void mb_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr, unsigned size, MMUAccessType access_type, int mmu_idx, MemTxAttrs attrs, MemTxResult response, uintptr_t retaddr) { - MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs); - CPUMBState *env = &cpu->env; + mb_transaction_failed_internal(cs, physaddr, addr, size, + access_type, retaddr); +} - qemu_log_mask(CPU_LOG_INT, "Transaction failed: vaddr 0x%" VADDR_PRIx - " physaddr 0x" HWADDR_FMT_plx " size %d access type %s\n", - addr, physaddr, size, - access_type == MMU_INST_FETCH ? "INST_FETCH" : - (access_type == MMU_DATA_LOAD ? "DATA_LOAD" : "DATA_STORE")); +#define LD_EA(NAME, TYPE, FUNC) \ +uint32_t HELPER(NAME)(CPUMBState *env, uint64_t ea) \ +{ \ + CPUState *cs = env_cpu(env); \ + MemTxResult txres; \ + TYPE ret = FUNC(cs->as, ea, MEMTXATTRS_UNSPECIFIED, &txres); \ + if (unlikely(txres != MEMTX_OK)) { \ + mb_transaction_failed_internal(cs, ea, ea, sizeof(TYPE), \ + MMU_DATA_LOAD, GETPC()); \ + } \ + return ret; \ +} - if (!(env->msr & MSR_EE)) { - return; - } +LD_EA(lbuea, uint8_t, address_space_ldub) +LD_EA(lhuea_be, uint16_t, address_space_lduw_be) +LD_EA(lhuea_le, uint16_t, address_space_lduw_le) +LD_EA(lwea_be, uint32_t, address_space_ldl_be) +LD_EA(lwea_le, uint32_t, address_space_ldl_le) + +#define ST_EA(NAME, TYPE, FUNC) \ +void HELPER(NAME)(CPUMBState *env, uint32_t data, uint64_t ea) \ +{ \ + CPUState *cs = env_cpu(env); \ + MemTxResult txres; \ + FUNC(cs->as, ea, data, MEMTXATTRS_UNSPECIFIED, &txres); \ + if (unlikely(txres != MEMTX_OK)) { \ + mb_transaction_failed_internal(cs, ea, ea, sizeof(TYPE), \ + MMU_DATA_STORE, GETPC()); \ + } \ +} - if (access_type == MMU_INST_FETCH) { - if (!cpu->cfg.iopb_bus_exception) { - return; - } - env->esr = ESR_EC_INSN_BUS; - } else { - if (!cpu->cfg.dopb_bus_exception) { - return; - } - env->esr = ESR_EC_DATA_BUS; - } +ST_EA(sbea, uint8_t, address_space_stb) +ST_EA(shea_be, uint16_t, address_space_stw_be) +ST_EA(shea_le, uint16_t, address_space_stw_le) +ST_EA(swea_be, uint32_t, address_space_stl_be) +ST_EA(swea_le, uint32_t, address_space_stl_le) - env->ear = addr; - cs->exception_index = EXCP_HW_EXCP; - cpu_loop_exit_restore(cs, retaddr); -} #endif diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c index 671b1ae..5098a1d 100644 --- a/target/microblaze/translate.c +++ b/target/microblaze/translate.c @@ -63,9 +63,6 @@ typedef struct DisasContext { DisasContextBase base; const MicroBlazeCPUConfig *cfg; - TCGv_i32 r0; - bool r0_set; - /* Decoder. */ uint32_t ext_imm; unsigned int tb_flags; @@ -179,14 +176,7 @@ static TCGv_i32 reg_for_read(DisasContext *dc, int reg) if (likely(reg != 0)) { return cpu_R[reg]; } - if (!dc->r0_set) { - if (dc->r0 == NULL) { - dc->r0 = tcg_temp_new_i32(); - } - tcg_gen_movi_i32(dc->r0, 0); - dc->r0_set = true; - } - return dc->r0; + return tcg_constant_i32(0); } static TCGv_i32 reg_for_write(DisasContext *dc, int reg) @@ -194,10 +184,7 @@ static TCGv_i32 reg_for_write(DisasContext *dc, int reg) if (likely(reg != 0)) { return cpu_R[reg]; } - if (dc->r0 == NULL) { - dc->r0 = tcg_temp_new_i32(); - } - return dc->r0; + return tcg_temp_new_i32(); } static bool do_typea(DisasContext *dc, arg_typea *arg, bool side_effects, @@ -619,19 +606,18 @@ DO_TYPEBI(xori, false, tcg_gen_xori_i32) static TCGv compute_ldst_addr_typea(DisasContext *dc, int ra, int rb) { - TCGv ret = tcg_temp_new(); + TCGv ret; /* If any of the regs is r0, set t to the value of the other reg. */ if (ra && rb) { - TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_add_i32(tmp, cpu_R[ra], cpu_R[rb]); - tcg_gen_extu_i32_tl(ret, tmp); + ret = tcg_temp_new_i32(); + tcg_gen_add_i32(ret, cpu_R[ra], cpu_R[rb]); } else if (ra) { - tcg_gen_extu_i32_tl(ret, cpu_R[ra]); + ret = cpu_R[ra]; } else if (rb) { - tcg_gen_extu_i32_tl(ret, cpu_R[rb]); + ret = cpu_R[rb]; } else { - tcg_gen_movi_tl(ret, 0); + ret = tcg_constant_i32(0); } if ((ra == 1 || rb == 1) && dc->cfg->stackprot) { @@ -642,15 +628,16 @@ static TCGv compute_ldst_addr_typea(DisasContext *dc, int ra, int rb) static TCGv compute_ldst_addr_typeb(DisasContext *dc, int ra, int imm) { - TCGv ret = tcg_temp_new(); + TCGv ret; /* If any of the regs is r0, set t to the value of the other reg. */ - if (ra) { - TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_addi_i32(tmp, cpu_R[ra], imm); - tcg_gen_extu_i32_tl(ret, tmp); + if (ra && imm) { + ret = tcg_temp_new_i32(); + tcg_gen_addi_i32(ret, cpu_R[ra], imm); + } else if (ra) { + ret = cpu_R[ra]; } else { - tcg_gen_movi_tl(ret, (uint32_t)imm); + ret = tcg_constant_i32(imm); } if (ra == 1 && dc->cfg->stackprot) { @@ -660,23 +647,23 @@ static TCGv compute_ldst_addr_typeb(DisasContext *dc, int ra, int imm) } #ifndef CONFIG_USER_ONLY -static TCGv compute_ldst_addr_ea(DisasContext *dc, int ra, int rb) +static TCGv_i64 compute_ldst_addr_ea(DisasContext *dc, int ra, int rb) { int addr_size = dc->cfg->addr_size; - TCGv ret = tcg_temp_new(); + TCGv_i64 ret = tcg_temp_new_i64(); if (addr_size == 32 || ra == 0) { if (rb) { - tcg_gen_extu_i32_tl(ret, cpu_R[rb]); + tcg_gen_extu_i32_i64(ret, cpu_R[rb]); } else { - tcg_gen_movi_tl(ret, 0); + return tcg_constant_i64(0); } } else { if (rb) { tcg_gen_concat_i32_i64(ret, cpu_R[rb], cpu_R[ra]); } else { - tcg_gen_extu_i32_tl(ret, cpu_R[ra]); - tcg_gen_shli_tl(ret, ret, 32); + tcg_gen_extu_i32_i64(ret, cpu_R[ra]); + tcg_gen_shli_i64(ret, ret, 32); } if (addr_size < 64) { /* Mask off out of range bits. */ @@ -700,6 +687,20 @@ static void record_unaligned_ess(DisasContext *dc, int rd, tcg_set_insn_start_param(dc->base.insn_start, 1, iflags); } + +static void gen_alignment_check_ea(DisasContext *dc, TCGv_i64 ea, int rb, + int rd, MemOp size, bool store) +{ + if (rb && (dc->tb_flags & MSR_EE) && dc->cfg->unaligned_exceptions) { + TCGLabel *over = gen_new_label(); + + record_unaligned_ess(dc, rd, size, store); + + tcg_gen_brcondi_i64(TCG_COND_TSTEQ, ea, (1 << size) - 1, over); + gen_helper_unaligned_access(tcg_env, ea); + gen_set_label(over); + } +} #endif static inline MemOp mo_endian(DisasContext *dc) @@ -765,10 +766,11 @@ static bool trans_lbuea(DisasContext *dc, arg_typea *arg) return true; } #ifdef CONFIG_USER_ONLY - return true; + g_assert_not_reached(); #else - TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); - return do_load(dc, arg->rd, addr, MO_UB, MMU_NOMMU_IDX, false); + TCGv_i64 addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + gen_helper_lbuea(reg_for_write(dc, arg->rd), tcg_env, addr); + return true; #endif } @@ -796,10 +798,13 @@ static bool trans_lhuea(DisasContext *dc, arg_typea *arg) return true; } #ifdef CONFIG_USER_ONLY - return true; + g_assert_not_reached(); #else - TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); - return do_load(dc, arg->rd, addr, MO_UW, MMU_NOMMU_IDX, false); + TCGv_i64 addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + gen_alignment_check_ea(dc, addr, arg->rb, arg->rd, MO_16, false); + (mo_endian(dc) == MO_BE ? gen_helper_lhuea_be : gen_helper_lhuea_le) + (reg_for_write(dc, arg->rd), tcg_env, addr); + return true; #endif } @@ -827,10 +832,13 @@ static bool trans_lwea(DisasContext *dc, arg_typea *arg) return true; } #ifdef CONFIG_USER_ONLY - return true; + g_assert_not_reached(); #else - TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); - return do_load(dc, arg->rd, addr, MO_UL, MMU_NOMMU_IDX, false); + TCGv_i64 addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + gen_alignment_check_ea(dc, addr, arg->rb, arg->rd, MO_32, false); + (mo_endian(dc) == MO_BE ? gen_helper_lwea_be : gen_helper_lwea_le) + (reg_for_write(dc, arg->rd), tcg_env, addr); + return true; #endif } @@ -918,10 +926,11 @@ static bool trans_sbea(DisasContext *dc, arg_typea *arg) return true; } #ifdef CONFIG_USER_ONLY - return true; + g_assert_not_reached(); #else - TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); - return do_store(dc, arg->rd, addr, MO_UB, MMU_NOMMU_IDX, false); + TCGv_i64 addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + gen_helper_sbea(tcg_env, reg_for_read(dc, arg->rd), addr); + return true; #endif } @@ -949,10 +958,13 @@ static bool trans_shea(DisasContext *dc, arg_typea *arg) return true; } #ifdef CONFIG_USER_ONLY - return true; + g_assert_not_reached(); #else - TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); - return do_store(dc, arg->rd, addr, MO_UW, MMU_NOMMU_IDX, false); + TCGv_i64 addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + gen_alignment_check_ea(dc, addr, arg->rb, arg->rd, MO_16, true); + (mo_endian(dc) == MO_BE ? gen_helper_shea_be : gen_helper_shea_le) + (tcg_env, reg_for_read(dc, arg->rd), addr); + return true; #endif } @@ -980,10 +992,13 @@ static bool trans_swea(DisasContext *dc, arg_typea *arg) return true; } #ifdef CONFIG_USER_ONLY - return true; + g_assert_not_reached(); #else - TCGv addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); - return do_store(dc, arg->rd, addr, MO_UL, MMU_NOMMU_IDX, false); + TCGv_i64 addr = compute_ldst_addr_ea(dc, arg->ra, arg->rb); + gen_alignment_check_ea(dc, addr, arg->rb, arg->rd, MO_32, true); + (mo_endian(dc) == MO_BE ? gen_helper_swea_be : gen_helper_swea_le) + (tcg_env, reg_for_read(dc, arg->rd), addr); + return true; #endif } @@ -1607,8 +1622,6 @@ static void mb_tr_init_disas_context(DisasContextBase *dcb, CPUState *cs) dc->cfg = &cpu->cfg; dc->tb_flags = dc->base.tb->flags; dc->ext_imm = dc->base.tb->cs_base; - dc->r0 = NULL; - dc->r0_set = false; dc->mem_index = cpu_mmu_index(cs, false); dc->jmp_cond = dc->tb_flags & D_FLAG ? TCG_COND_ALWAYS : TCG_COND_NEVER; dc->jmp_dest = -1; @@ -1647,11 +1660,6 @@ static void mb_tr_translate_insn(DisasContextBase *dcb, CPUState *cs) trap_illegal(dc, true); } - if (dc->r0) { - dc->r0 = NULL; - dc->r0_set = false; - } - /* Discard the imm global when its contents cannot be used. */ if ((dc->tb_flags & ~dc->tb_flags_to_set) & IMM_FLAG) { tcg_gen_discard_i32(cpu_imm); @@ -1829,7 +1837,7 @@ void mb_cpu_dump_state(CPUState *cs, FILE *f, int flags) } qemu_fprintf(f, "\nesr=0x%04x fsr=0x%02x btr=0x%08x edr=0x%x\n" - "ear=0x" TARGET_FMT_lx " slr=0x%x shr=0x%x\n", + "ear=0x%" PRIx64 " slr=0x%x shr=0x%x\n", env->esr, env->fsr, env->btr, env->edr, env->ear, env->slr, env->shr); diff --git a/target/mips/cpu.c b/target/mips/cpu.c index 4cbfb94..1f6c41f 100644 --- a/target/mips/cpu.c +++ b/target/mips/cpu.c @@ -560,6 +560,14 @@ static TCGTBCPUState mips_get_tb_cpu_state(CPUState *cs) }; } +#ifndef CONFIG_USER_ONLY +static vaddr mips_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + return cpu_env(cs)->hflags & MIPS_HFLAG_AWRAP ? (int32_t)result : result; +} +#endif + static const TCGCPUOps mips_tcg_ops = { .mttcg_supported = TARGET_LONG_BITS == 32, .guest_default_memory_order = 0, @@ -573,6 +581,7 @@ static const TCGCPUOps mips_tcg_ops = { #if !defined(CONFIG_USER_ONLY) .tlb_fill = mips_cpu_tlb_fill, + .pointer_wrap = mips_pointer_wrap, .cpu_exec_interrupt = mips_cpu_exec_interrupt, .cpu_exec_halt = mips_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/mips/kvm.c b/target/mips/kvm.c index d67b7c1..ec53acb 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -61,6 +61,11 @@ int kvm_arch_irqchip_create(KVMState *s) return 0; } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { CPUMIPSState *env = cpu_env(cs); diff --git a/target/mips/system/mips-qmp-cmds.c b/target/mips/system/mips-qmp-cmds.c index 7340ac7..d98d662 100644 --- a/target/mips/system/mips-qmp-cmds.c +++ b/target/mips/system/mips-qmp-cmds.c @@ -7,9 +7,19 @@ */ #include "qemu/osdep.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-machine.h" #include "cpu.h" +CpuModelExpansionInfo * +qmp_query_cpu_model_expansion(CpuModelExpansionType type, + CpuModelInfo *model, + Error **errp) +{ + error_setg(errp, "CPU model expansion is not supported on this target"); + return NULL; +} + static void mips_cpu_add_definition(gpointer data, gpointer user_data) { ObjectClass *oc = data; diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c index 054ad33..dfbb2df 100644 --- a/target/openrisc/cpu.c +++ b/target/openrisc/cpu.c @@ -265,6 +265,7 @@ static const TCGCPUOps openrisc_tcg_ops = { #ifndef CONFIG_USER_ONLY .tlb_fill = openrisc_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = openrisc_cpu_exec_interrupt, .cpu_exec_halt = openrisc_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 9642812..a0e77f2 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -7386,6 +7386,12 @@ static void ppc_cpu_exec_exit(CPUState *cs) cpu->vhyp_class->cpu_exec_exit(cpu->vhyp, cpu); } } + +static vaddr ppc_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + return (cpu_env(cs)->hflags >> HFLAGS_64) & 1 ? result : (uint32_t)result; +} #endif /* CONFIG_TCG */ #endif /* !CONFIG_USER_ONLY */ @@ -7490,6 +7496,7 @@ static const TCGCPUOps ppc_tcg_ops = { .record_sigsegv = ppc_cpu_record_sigsegv, #else .tlb_fill = ppc_cpu_tlb_fill, + .pointer_wrap = ppc_pointer_wrap, .cpu_exec_interrupt = ppc_cpu_exec_interrupt, .cpu_exec_halt = ppc_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 8a957c3..0156580 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -479,6 +479,11 @@ static void kvmppc_hw_debug_points_init(CPUPPCState *cenv) } } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); diff --git a/target/ppc/ppc-qmp-cmds.c b/target/ppc/ppc-qmp-cmds.c index a25d86a..7022564 100644 --- a/target/ppc/ppc-qmp-cmds.c +++ b/target/ppc/ppc-qmp-cmds.c @@ -28,7 +28,8 @@ #include "qemu/ctype.h" #include "monitor/hmp-target.h" #include "monitor/hmp.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qapi/error.h" +#include "qapi/qapi-commands-machine.h" #include "cpu-models.h" #include "cpu-qom.h" @@ -175,6 +176,15 @@ int target_get_monitor_def(CPUState *cs, const char *name, uint64_t *pval) return -EINVAL; } +CpuModelExpansionInfo * +qmp_query_cpu_model_expansion(CpuModelExpansionType type, + CpuModelInfo *model, + Error **errp) +{ + error_setg(errp, "CPU model expansion is not supported on this target"); + return NULL; +} + static void ppc_cpu_defs_entry(gpointer data, gpointer user_data) { ObjectClass *oc = data; diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index efb41fa..e1a04be 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -1472,6 +1472,11 @@ static int kvm_vcpu_enable_sbi_dbcn(RISCVCPU *cpu, CPUState *cs) return kvm_set_one_reg(cs, kvm_sbi_dbcn.kvm_reg_id, ®); } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { int ret = 0; diff --git a/target/riscv/riscv-qmp-cmds.c b/target/riscv/riscv-qmp-cmds.c index d0a3243..8ba8aa0 100644 --- a/target/riscv/riscv-qmp-cmds.c +++ b/target/riscv/riscv-qmp-cmds.c @@ -25,7 +25,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qapi/qapi-commands-machine.h" #include "qobject/qbool.h" #include "qobject/qdict.h" #include "qapi/qobject-input-visitor.h" diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index 305912b..55fd9e5 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -237,6 +237,31 @@ static void riscv_restore_state_to_opc(CPUState *cs, env->excp_uw2 = data[2]; } +#ifndef CONFIG_USER_ONLY +static vaddr riscv_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + CPURISCVState *env = cpu_env(cs); + uint32_t pm_len; + bool pm_signext; + + if (cpu_address_xl(env) == MXL_RV32) { + return (uint32_t)result; + } + + pm_len = riscv_pm_get_pmlen(riscv_pm_get_pmm(env)); + if (pm_len == 0) { + return result; + } + + pm_signext = riscv_cpu_virt_mem_enabled(env); + if (pm_signext) { + return sextract64(result, 0, 64 - pm_len); + } + return extract64(result, 0, 64 - pm_len); +} +#endif + const TCGCPUOps riscv_tcg_ops = { .mttcg_supported = true, .guest_default_memory_order = 0, @@ -250,6 +275,7 @@ const TCGCPUOps riscv_tcg_ops = { #ifndef CONFIG_USER_ONLY .tlb_fill = riscv_cpu_tlb_fill, + .pointer_wrap = riscv_pointer_wrap, .cpu_exec_interrupt = riscv_cpu_exec_interrupt, .cpu_exec_halt = riscv_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/rx/cpu.c b/target/rx/cpu.c index 36eba75..c6dd5d6 100644 --- a/target/rx/cpu.c +++ b/target/rx/cpu.c @@ -225,6 +225,7 @@ static const TCGCPUOps rx_tcg_ops = { .restore_state_to_opc = rx_restore_state_to_opc, .mmu_index = rx_cpu_mmu_index, .tlb_fill = rx_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = rx_cpu_exec_interrupt, .cpu_exec_halt = rx_cpu_has_work, diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c index 9c1158e..f05ce31 100644 --- a/target/s390x/cpu.c +++ b/target/s390x/cpu.c @@ -347,6 +347,14 @@ static TCGTBCPUState s390x_get_tb_cpu_state(CPUState *cs) }; } +#ifndef CONFIG_USER_ONLY +static vaddr s390_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ + return wrap_address(cpu_env(cs), result); +} +#endif + static const TCGCPUOps s390_tcg_ops = { .mttcg_supported = true, .precise_smc = true, @@ -367,6 +375,7 @@ static const TCGCPUOps s390_tcg_ops = { .record_sigbus = s390_cpu_record_sigbus, #else .tlb_fill = s390_cpu_tlb_fill, + .pointer_wrap = s390_pointer_wrap, .cpu_exec_interrupt = s390_cpu_exec_interrupt, .cpu_exec_halt = s390_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/s390x/cpu_models_system.c b/target/s390x/cpu_models_system.c index 4351182..9d84faa 100644 --- a/target/s390x/cpu_models_system.c +++ b/target/s390x/cpu_models_system.c @@ -19,7 +19,7 @@ #include "qapi/visitor.h" #include "qapi/qobject-input-visitor.h" #include "qobject/qdict.h" -#include "qapi/qapi-commands-machine-target.h" +#include "qapi/qapi-commands-machine.h" static void list_add_feat(const char *name, void *opaque); diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c index a814ece..8218e64 100644 --- a/target/s390x/gen-features.c +++ b/target/s390x/gen-features.c @@ -884,9 +884,6 @@ static uint16_t qemu_MIN[] = { */ S390_FEAT_FLOATING_POINT_EXT, S390_FEAT_ZPCI, -}; - -static uint16_t qemu_V4_1[] = { S390_FEAT_STFLE_53, S390_FEAT_VECTOR, }; @@ -1049,7 +1046,6 @@ static FeatGroupDefSpec FeatGroupDef[] = { *******************************/ static FeatGroupDefSpec QemuFeatDef[] = { QEMU_FEAT_INITIALIZER(MIN), - QEMU_FEAT_INITIALIZER(V4_1), QEMU_FEAT_INITIALIZER(V6_0), QEMU_FEAT_INITIALIZER(V6_2), QEMU_FEAT_INITIALIZER(V7_0), diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c index fe62ba5..2320dd4 100644 --- a/target/s390x/ioinst.c +++ b/target/s390x/ioinst.c @@ -18,6 +18,7 @@ #include "trace.h" #include "hw/s390x/s390-pci-bus.h" #include "target/s390x/kvm/pv.h" +#include "hw/s390x/ap-bridge.h" /* All I/O instructions but chsc use the s format */ static uint64_t get_address_from_regs(CPUS390XState *env, uint32_t ipb, @@ -574,13 +575,19 @@ out: static int chsc_sei_nt0_get_event(void *res) { - /* no events yet */ + if (s390_has_feat(S390_FEAT_AP)) { + return ap_chsc_sei_nt0_get_event(res); + } + return 1; } static int chsc_sei_nt0_have_event(void) { - /* no events yet */ + if (s390_has_feat(S390_FEAT_AP)) { + return ap_chsc_sei_nt0_have_event(); + } + return 0; } diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 6cd2ebc..67d9a19 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -398,6 +398,11 @@ unsigned long kvm_arch_vcpu_id(CPUState *cpu) return cpu->cpu_index; } +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp) +{ + return 0; +} + int kvm_arch_init_vcpu(CPUState *cs) { unsigned int max_cpus = MACHINE(qdev_get_machine())->smp.max_cpus; diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c index b35f18e..4f561e8 100644 --- a/target/sh4/cpu.c +++ b/target/sh4/cpu.c @@ -296,6 +296,7 @@ static const TCGCPUOps superh_tcg_ops = { #ifndef CONFIG_USER_ONLY .tlb_fill = superh_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_notreached, .cpu_exec_interrupt = superh_cpu_exec_interrupt, .cpu_exec_halt = superh_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/sh4/translate.c b/target/sh4/translate.c index bf8828f..70fd13a 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -54,7 +54,7 @@ typedef struct DisasContext { #define UNALIGN(C) (ctx->tbflags & TB_FLAG_UNALIGN ? MO_UNALN : MO_ALIGN) #else #define IS_USER(ctx) (!(ctx->tbflags & (1u << SR_MD))) -#define UNALIGN(C) 0 +#define UNALIGN(C) MO_ALIGN #endif /* Target-specific values for ctx->base.is_jmp. */ diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c index 2a3e408..ed7701b 100644 --- a/target/sparc/cpu.c +++ b/target/sparc/cpu.c @@ -1002,6 +1002,18 @@ static const struct SysemuCPUOps sparc_sysemu_ops = { #ifdef CONFIG_TCG #include "accel/tcg/cpu-ops.h" +#ifndef CONFIG_USER_ONLY +static vaddr sparc_pointer_wrap(CPUState *cs, int mmu_idx, + vaddr result, vaddr base) +{ +#ifdef TARGET_SPARC64 + return cpu_env(cs)->pstate & PS_AM ? (uint32_t)result : result; +#else + return (uint32_t)result; +#endif +} +#endif + static const TCGCPUOps sparc_tcg_ops = { /* * From Oracle SPARC Architecture 2015: @@ -1036,6 +1048,7 @@ static const TCGCPUOps sparc_tcg_ops = { #ifndef CONFIG_USER_ONLY .tlb_fill = sparc_cpu_tlb_fill, + .pointer_wrap = sparc_pointer_wrap, .cpu_exec_interrupt = sparc_cpu_exec_interrupt, .cpu_exec_halt = sparc_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c index a493341..29fd166 100644 --- a/target/sparc/fop_helper.c +++ b/target/sparc/fop_helper.c @@ -445,7 +445,6 @@ static uint32_t finish_fcmp(CPUSPARCState *env, FloatRelation r, uintptr_t ra) case float_relation_greater: return 2; case float_relation_unordered: - env->fsr |= FSR_NVA; return 3; } g_assert_not_reached(); diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c index e56f90f..4f035b6 100644 --- a/target/tricore/cpu.c +++ b/target/tricore/cpu.c @@ -190,6 +190,7 @@ static const TCGCPUOps tricore_tcg_ops = { .restore_state_to_opc = tricore_restore_state_to_opc, .mmu_index = tricore_cpu_mmu_index, .tlb_fill = tricore_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = tricore_cpu_exec_interrupt, .cpu_exec_halt = tricore_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c index 91b71b6c..ea9b6df 100644 --- a/target/xtensa/cpu.c +++ b/target/xtensa/cpu.c @@ -318,6 +318,7 @@ static const TCGCPUOps xtensa_tcg_ops = { #ifndef CONFIG_USER_ONLY .tlb_fill = xtensa_cpu_tlb_fill, + .pointer_wrap = cpu_pointer_wrap_uint32, .cpu_exec_interrupt = xtensa_cpu_exec_interrupt, .cpu_exec_halt = xtensa_cpu_has_work, .cpu_exec_reset = cpu_reset, diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 4cb647c..3b088b7 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1661,7 +1661,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, unsigned s_mask = (1u << s_bits) - 1; unsigned mem_index = get_mmuidx(oi); TCGReg addr_adj; - TCGType mask_type; uint64_t compare_mask; ldst = new_ldst_label(s); @@ -1669,9 +1668,6 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, ldst->oi = oi; ldst->addr_reg = addr_reg; - mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32 - ? TCG_TYPE_I64 : TCG_TYPE_I32); - /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8); @@ -1679,9 +1675,9 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tlb_mask_table_ofs(s, mem_index), 1, 0); /* Extract the TLB index from the address into X0. */ - tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64, + tcg_out_insn(s, 3502S, AND_LSR, TCG_TYPE_I64, TCG_REG_TMP0, TCG_REG_TMP0, addr_reg, - s->page_bits - CPU_TLB_ENTRY_BITS); + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); /* Add the tlb_table pointer, forming the CPUTLBEntry address. */ tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0); @@ -1707,7 +1703,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_insn(s, 3401, ADDI, addr_type, addr_adj, addr_reg, s_mask - a_mask); } - compare_mask = (uint64_t)s->page_mask | a_mask; + compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; /* Store the page mask part of the address into TMP2. */ tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2, diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 447e435..836894b 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1427,7 +1427,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, /* Extract the tlb index from the address into R0. */ tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addr, - SHIFT_IMM_LSR(s->page_bits - CPU_TLB_ENTRY_BITS)); + SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)); /* * Add the tlb_table pointer, creating the CPUTLBEntry address in R1. @@ -1463,8 +1463,8 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr, addr, s_mask - a_mask); } - if (use_armv7_instructions && s->page_bits <= 16) { - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(s->page_mask | a_mask)); + if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask)); tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP, t_addr, TCG_REG_TMP, 0); tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, @@ -1475,10 +1475,10 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addr, a_mask); } tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr, - SHIFT_IMM_LSR(s->page_bits)); + SHIFT_IMM_LSR(TARGET_PAGE_BITS)); tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, - SHIFT_IMM_LSL(s->page_bits)); + SHIFT_IMM_LSL(TARGET_PAGE_BITS)); } } else if (a_mask) { ldst = new_ldst_label(s); diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 09fce27..088c6c9 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2199,16 +2199,14 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW); if (TCG_TYPE_PTR == TCG_TYPE_I64) { hrexw = P_REXW; - if (s->page_bits + s->tlb_dyn_max_bits > 32) { - tlbtype = TCG_TYPE_I64; - tlbrexw = P_REXW; - } + tlbtype = TCG_TYPE_I64; + tlbrexw = P_REXW; } } tcg_out_mov(s, tlbtype, TCG_REG_L0, addr); tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0, - s->page_bits - CPU_TLB_ENTRY_BITS); + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0, fast_ofs + offsetof(CPUTLBDescFast, mask)); @@ -2227,7 +2225,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1, addr, s_mask - a_mask); } - tlb_mask = s->page_mask | a_mask; + tlb_mask = TARGET_PAGE_MASK | a_mask; tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0); /* cmp 0(TCG_REG_L0), TCG_REG_L1 */ diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index e5580d6..10c6921 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1065,7 +1065,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs); tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg, - s->page_bits - CPU_TLB_ENTRY_BITS); + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0); tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); @@ -1091,7 +1091,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg); } tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO, - a_bits, s->page_bits - 1); + a_bits, TARGET_PAGE_BITS - 1); /* Compare masked address with the TLB entry. */ ldst->label_ptr[0] = s->code_ptr; diff --git a/tcg/meson.build b/tcg/meson.build index bd2821e..706a6eb 100644 --- a/tcg/meson.build +++ b/tcg/meson.build @@ -27,5 +27,5 @@ if host_os == 'linux' tcg_ss.add(files('perf.c')) endif -libuser_ss.add_all(tcg_ss) -libsystem_ss.add_all(tcg_ss) +user_ss.add_all(tcg_ss) +system_ss.add_all(tcg_ss) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 2c0457e..400eafb 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1199,9 +1199,9 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, /* Extract the TLB index from the address into TMP3. */ if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) { tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, addr, - s->page_bits - CPU_TLB_ENTRY_BITS); + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); } else { - tcg_out_dsrl(s, TCG_TMP3, addr, s->page_bits - CPU_TLB_ENTRY_BITS); + tcg_out_dsrl(s, TCG_TMP3, addr, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); } tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0); @@ -1224,7 +1224,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, * For unaligned accesses, compare against the end of the access to * verify that it does not cross a page boundary. */ - tcg_out_movi(s, addr_type, TCG_TMP1, s->page_mask | a_mask); + tcg_out_movi(s, addr_type, TCG_TMP1, TARGET_PAGE_MASK | a_mask); if (a_mask < s_mask) { tcg_out_opc_imm(s, (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32 @@ -334,7 +334,7 @@ void perf_report_code(uint64_t guest_pc, TranslationBlock *tb, /* FIXME: This replicates the restore_state_to_opc() logic. */ q[insn].address = gen_insn_data[insn * INSN_START_WORDS + 0]; if (tb_cflags(tb) & CF_PCREL) { - q[insn].address |= (guest_pc & qemu_target_page_mask()); + q[insn].address |= guest_pc & TARGET_PAGE_MASK; } q[insn].flags = DEBUGINFO_SYMBOL | (jitdump ? DEBUGINFO_LINE : 0); } diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 2e94778..b8b23d4 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2440,10 +2440,10 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, /* Extract the page index, shifted into place for tlb index. */ if (TCG_TARGET_REG_BITS == 32) { tcg_out_shri32(s, TCG_REG_R0, addr, - s->page_bits - CPU_TLB_ENTRY_BITS); + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); } else { tcg_out_shri64(s, TCG_REG_R0, addr, - s->page_bits - CPU_TLB_ENTRY_BITS); + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); } tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0)); @@ -2480,7 +2480,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, a_bits = s_bits; } tcg_out_rlw(s, RLWINM, TCG_REG_R0, addr, 0, - (32 - a_bits) & 31, 31 - s->page_bits); + (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS); } else { TCGReg t = addr; @@ -2501,13 +2501,13 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, /* Mask the address for the requested alignment. */ if (addr_type == TCG_TYPE_I32) { tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0, - (32 - a_bits) & 31, 31 - s->page_bits); + (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS); } else if (a_bits == 0) { - tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits); + tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS); } else { tcg_out_rld(s, RLDICL, TCG_REG_R0, t, - 64 - s->page_bits, s->page_bits - a_bits); - tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0); + 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits); + tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); } } diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index f9417d1..1800fd5 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1706,7 +1706,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase, tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs); tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg, - s->page_bits - CPU_TLB_ENTRY_BITS); + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0); tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1); @@ -1722,7 +1722,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase, tcg_out_opc_imm(s, addr_type == TCG_TYPE_I32 ? OPC_ADDIW : OPC_ADDI, addr_adj, addr_reg, s_mask - a_mask); } - compare_mask = s->page_mask | a_mask; + compare_mask = TARGET_PAGE_MASK | a_mask; if (compare_mask == sextreg(compare_mask, 0, 12)) { tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask); } else { diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 7ca0071..84a9e73 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -2004,7 +2004,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, ldst->addr_reg = addr_reg; tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE, - s->page_bits - CPU_TLB_ENTRY_BITS); + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); tcg_out_insn(s, RXY, NG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, mask_off); tcg_out_insn(s, RXY, AG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, table_off); @@ -2016,7 +2016,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, * byte of the access. */ a_off = (a_mask >= s_mask ? 0 : s_mask - a_mask); - tlb_mask = (uint64_t)s->page_mask | a_mask; + tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; if (a_off == 0) { tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask); } else { diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 9e004fb..5e5c3f1 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1120,7 +1120,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, /* Extract the page index, shifted into place for tlb index. */ tcg_out_arithi(s, TCG_REG_T1, addr_reg, - s->page_bits - CPU_TLB_ENTRY_BITS, SHIFT_SRL); + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, SHIFT_SRL); tcg_out_arith(s, TCG_REG_T1, TCG_REG_T1, TCG_REG_T2, ARITH_AND); /* Add the tlb_table pointer, creating the CPUTLBEntry address into R2. */ @@ -1136,7 +1136,7 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, h->base = TCG_REG_T1; /* Mask out the page offset, except for the required alignment. */ - compare_mask = s->page_mask | a_mask; + compare_mask = TARGET_PAGE_MASK | a_mask; if (check_fit_tl(compare_mask, 13)) { tcg_out_arithi(s, TCG_REG_T3, addr_reg, compare_mask, ARITH_AND); } else { diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c index fa9e522..5484960 100644 --- a/tcg/tcg-op-ldst.c +++ b/tcg/tcg-op-ldst.c @@ -27,6 +27,7 @@ #include "tcg/tcg-temp-internal.h" #include "tcg/tcg-op-common.h" #include "tcg/tcg-mo.h" +#include "exec/target_page.h" #include "exec/translation-block.h" #include "exec/plugin-gen.h" #include "tcg-internal.h" @@ -40,7 +41,7 @@ static void check_max_alignment(unsigned a_bits) * FIXME: Must keep the count up-to-date with "exec/tlb-flags.h". */ if (tcg_use_softmmu) { - tcg_debug_assert(a_bits + 5 <= tcg_ctx->page_bits); + tcg_debug_assert(a_bits + 5 <= TARGET_PAGE_BITS); } } @@ -34,6 +34,7 @@ #include "qemu/cacheflush.h" #include "qemu/cacheinfo.h" #include "qemu/timer.h" +#include "exec/target_page.h" #include "exec/translation-block.h" #include "exec/tlb-common.h" #include "tcg/startup.h" diff --git a/tests/data/uefi-boot-images/bios-tables-test.loongarch64.iso.qcow2 b/tests/data/uefi-boot-images/bios-tables-test.loongarch64.iso.qcow2 Binary files differnew file mode 100644 index 0000000..18daee0 --- /dev/null +++ b/tests/data/uefi-boot-images/bios-tables-test.loongarch64.iso.qcow2 diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include index fa1cbb6..3959d8a 100644 --- a/tests/docker/Makefile.include +++ b/tests/docker/Makefile.include @@ -185,8 +185,10 @@ docker: docker-help: docker +# Where QEMU caches build artefacts +DOCKER_QEMU_CACHE_DIR := $$HOME/.cache/qemu # Use a global constant ccache directory to speed up repetitive builds -DOCKER_CCACHE_DIR := $$HOME/.cache/qemu-docker-ccache +DOCKER_QEMU_CCACHE_DIR := DOCKER_QEMU_CACHE_DIR/docker-ccache # This rule if for directly running against an arbitrary docker target. # It is called by the expanded docker targets (e.g. make @@ -195,7 +197,7 @@ DOCKER_CCACHE_DIR := $$HOME/.cache/qemu-docker-ccache # For example: make docker-run TEST="test-quick" IMAGE="debian:arm64" EXECUTABLE=./aarch64-linux-user/qemu-aarch64 # docker-run: docker-qemu-src - @mkdir -p "$(DOCKER_CCACHE_DIR)" + @mkdir -p "$(DOCKER_QEMU_CCACHE_DIR)" @if test -z "$(IMAGE)" || test -z "$(TEST)"; \ then echo "Invalid target $(IMAGE)/$(TEST)"; exit 1; \ fi @@ -222,8 +224,8 @@ docker-run: docker-qemu-src -e V=$V -e J=$J -e DEBUG=$(DEBUG) \ -e SHOW_ENV=$(SHOW_ENV) \ $(if $(NOUSER),, \ - -e CCACHE_DIR=/var/tmp/ccache \ - -v $(DOCKER_CCACHE_DIR):/var/tmp/ccache:z \ + -v $(DOCKER_QEMU_CACHE_DIR):$(DOCKER_QEMU_CACHE_DIR) \ + -e CCACHE_DIR=$(DOCKER_QEMU_CCACHE_DIR) \ ) \ -v $$(readlink -e $(DOCKER_SRC_COPY)):/var/tmp/qemu:z$(COMMA)ro \ $(IMAGE) \ diff --git a/tests/docker/dockerfiles/fedora-rust-nightly.docker b/tests/docker/dockerfiles/fedora-rust-nightly.docker index fe4a6ed..4a03330 100644 --- a/tests/docker/dockerfiles/fedora-rust-nightly.docker +++ b/tests/docker/dockerfiles/fedora-rust-nightly.docker @@ -156,6 +156,7 @@ ENV PYTHON "/usr/bin/python3" RUN dnf install -y wget ENV RUSTUP_HOME=/usr/local/rustup CARGO_HOME=/usr/local/cargo ENV RUSTC=/usr/local/rustup/toolchains/nightly-x86_64-unknown-linux-gnu/bin/rustc +ENV RUSTDOC=/usr/local/rustup/toolchains/nightly-x86_64-unknown-linux-gnu/bin/rustdoc ENV CARGO=/usr/local/rustup/toolchains/nightly-x86_64-unknown-linux-gnu/bin/cargo RUN set -eux && \ rustArch='x86_64-unknown-linux-gnu' && \ @@ -170,6 +171,7 @@ RUN set -eux && \ /usr/local/cargo/bin/rustup run nightly cargo --version && \ /usr/local/cargo/bin/rustup run nightly rustc --version && \ test "$CARGO" = "$(/usr/local/cargo/bin/rustup +nightly which cargo)" && \ + test "$RUSTDOC" = "$(/usr/local/cargo/bin/rustup +nightly which rustdoc)" && \ test "$RUSTC" = "$(/usr/local/cargo/bin/rustup +nightly which rustc)" ENV PATH=$CARGO_HOME/bin:$PATH RUN /usr/local/cargo/bin/rustup run nightly cargo install bindgen-cli diff --git a/tests/docker/dockerfiles/ubuntu2204.docker b/tests/docker/dockerfiles/ubuntu2204.docker index 4a1cf2b..28a6f93 100644 --- a/tests/docker/dockerfiles/ubuntu2204.docker +++ b/tests/docker/dockerfiles/ubuntu2204.docker @@ -151,6 +151,7 @@ ENV MAKE "/usr/bin/make" ENV NINJA "/usr/bin/ninja" ENV PYTHON "/usr/bin/python3" ENV RUSTC=/usr/bin/rustc-1.77 +ENV RUSTDOC=/usr/bin/rustdoc-1.77 ENV CARGO_HOME=/usr/local/cargo ENV PATH=$CARGO_HOME/bin:$PATH RUN DEBIAN_FRONTEND=noninteractive eatmydata \ diff --git a/tests/functional/meson.build b/tests/functional/meson.build index 52b4706..e9f19d5 100644 --- a/tests/functional/meson.build +++ b/tests/functional/meson.build @@ -135,8 +135,10 @@ tests_arm_system_thorough = [ 'arm_orangepi', 'arm_quanta_gsj', 'arm_raspi2', + 'arm_realview', 'arm_replay', 'arm_smdkc210', + 'arm_stellaris', 'arm_sx1', 'arm_vexpress', 'arm_virt', @@ -149,6 +151,7 @@ tests_arm_linuxuser_thorough = [ tests_avr_system_thorough = [ 'avr_mega2560', + 'avr_uno', ] tests_hppa_system_quick = [ @@ -311,6 +314,7 @@ tests_x86_64_system_quick = [ 'virtio_version', 'x86_cpu_model_versions', 'vnc', + 'memlock', ] tests_x86_64_system_thorough = [ @@ -413,4 +417,4 @@ endforeach run_target('precache-functional', depends: precache_all, - command: ['true']) + command: [python, '-c', '']) diff --git a/tests/functional/qemu_test/__init__.py b/tests/functional/qemu_test/__init__.py index af41c2c..6e666a0 100644 --- a/tests/functional/qemu_test/__init__.py +++ b/tests/functional/qemu_test/__init__.py @@ -15,6 +15,6 @@ from .testcase import QemuBaseTest, QemuUserTest, QemuSystemTest from .linuxkernel import LinuxKernelTest from .decorators import skipIfMissingCommands, skipIfNotMachine, \ skipFlakyTest, skipUntrustedTest, skipBigDataTest, skipSlowTest, \ - skipIfMissingImports, skipIfOperatingSystem + skipIfMissingImports, skipIfOperatingSystem, skipLockedMemoryTest from .archive import archive_extract from .uncompress import uncompress diff --git a/tests/functional/qemu_test/decorators.py b/tests/functional/qemu_test/decorators.py index 50d29de..c0d1567 100644 --- a/tests/functional/qemu_test/decorators.py +++ b/tests/functional/qemu_test/decorators.py @@ -5,6 +5,7 @@ import importlib import os import platform +import resource from unittest import skipIf, skipUnless from .cmd import which @@ -131,3 +132,20 @@ def skipIfMissingImports(*args): return skipUnless(has_imports, 'required import(s) "%s" not installed' % ", ".join(args)) + +''' +Decorator to skip execution of a test if the system's +locked memory limit is below the required threshold. +Takes required locked memory threshold in kB. +Example: + + @skipLockedMemoryTest(2_097_152) +''' +def skipLockedMemoryTest(locked_memory): + # get memlock hard limit in bytes + _, ulimit_memory = resource.getrlimit(resource.RLIMIT_MEMLOCK) + + return skipUnless( + ulimit_memory == resource.RLIM_INFINITY or ulimit_memory >= locked_memory * 1024, + f'Test required {locked_memory} kB of available locked memory', + ) diff --git a/tests/functional/test_arm_realview.py b/tests/functional/test_arm_realview.py new file mode 100755 index 0000000..82cc964 --- /dev/null +++ b/tests/functional/test_arm_realview.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# +# Functional test that boots a Linux kernel on a realview arm machine +# and checks the console +# +# SPDX-License-Identifier: GPL-2.0-or-later + +from qemu_test import LinuxKernelTest, exec_command_and_wait_for_pattern +from qemu_test import Asset + + +class RealviewMachine(LinuxKernelTest): + + ASSET_REALVIEW_MPCORE = Asset( + ('https://archive.openwrt.org/chaos_calmer/15.05.1/realview/generic/' + 'openwrt-15.05.1-realview-vmlinux-initramfs.elf'), + 'd3a01037f33e7512d46d50975588d5c3a0e0cbf25f37afab44775c2a2be523e6') + + def test_realview_ep_mpcore(self): + self.require_netdev('user') + self.set_machine('realview-eb-mpcore') + kernel_path = self.ASSET_REALVIEW_MPCORE.fetch() + self.vm.set_console() + kernel_param = 'console=ttyAMA0 mem=128M quiet' + self.vm.add_args('-kernel', kernel_path, + '-append', kernel_param) + self.vm.launch() + self.wait_for_console_pattern('Please press Enter to activate') + prompt = ':/#' + exec_command_and_wait_for_pattern(self, '', prompt) + exec_command_and_wait_for_pattern(self, 'dmesg', kernel_param) + self.wait_for_console_pattern(prompt) + exec_command_and_wait_for_pattern(self, + ('while ! dmesg | grep "br-lan: port 1(eth0) entered" ;' + ' do sleep 1 ; done'), + 'entered forwarding state') + self.wait_for_console_pattern(prompt) + exec_command_and_wait_for_pattern(self, + 'while ! ifconfig | grep "10.0.2.15" ; do sleep 1 ; done', + 'addr:10.0.2.15') + self.wait_for_console_pattern(prompt) + exec_command_and_wait_for_pattern(self, 'ping -c 1 10.0.2.2', + '1 packets received, 0% packet loss') + + +if __name__ == '__main__': + LinuxKernelTest.main() diff --git a/tests/functional/test_arm_stellaris.py b/tests/functional/test_arm_stellaris.py new file mode 100755 index 0000000..cbd21cb --- /dev/null +++ b/tests/functional/test_arm_stellaris.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +# +# Functional test that checks the serial console of the stellaris machines +# +# SPDX-License-Identifier: GPL-2.0-or-later + +from qemu_test import QemuSystemTest, Asset, exec_command_and_wait_for_pattern +from qemu_test import wait_for_console_pattern + + +class StellarisMachine(QemuSystemTest): + + ASSET_DAY22 = Asset( + 'https://www.qemu-advent-calendar.org/2023/download/day22.tar.gz', + 'ae3a63ef4b7a22c21bfc7fc0d85e402fe95e223308ed23ac854405016431ff51') + + def test_lm3s6965evb(self): + self.set_machine('lm3s6965evb') + kernel_path = self.archive_extract(self.ASSET_DAY22, + member='day22/day22.bin') + self.vm.set_console() + self.vm.add_args('-kernel', kernel_path) + self.vm.launch() + + wait_for_console_pattern(self, 'In a one horse open') + + ASSET_NOTMAIN = Asset( + 'https://github.com/Ahelion/QemuArmM4FDemoSw/raw/master/build/notmain.bin', + '6ceda031aa081a420fca2fca9e137fa681d6e3820d820ad1917736cb265e611a') + + def test_lm3s811evb(self): + self.set_machine('lm3s811evb') + kernel_path = self.ASSET_NOTMAIN.fetch() + + self.vm.set_console() + self.vm.add_args('-cpu', 'cortex-m4') + self.vm.add_args('-kernel', kernel_path) + self.vm.launch() + + # The test kernel emits an initial '!' and then waits for input. + # For each character that we send it responds with a certain + # other ASCII character. + wait_for_console_pattern(self, '!') + exec_command_and_wait_for_pattern(self, '789', 'cdf') + + +if __name__ == '__main__': + QemuSystemTest.main() diff --git a/tests/functional/test_avr_mega2560.py b/tests/functional/test_avr_mega2560.py index 8e47b42..6359b72 100755 --- a/tests/functional/test_avr_mega2560.py +++ b/tests/functional/test_avr_mega2560.py @@ -18,12 +18,10 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # -import time +from qemu_test import QemuSystemTest, Asset, wait_for_console_pattern -from qemu_test import QemuSystemTest, Asset class AVR6Machine(QemuSystemTest): - timeout = 5 ASSET_ROM = Asset(('https://github.com/seharris/qemu-avr-tests' '/raw/36c3e67b8755dcf/free-rtos/Demo' @@ -40,13 +38,12 @@ class AVR6Machine(QemuSystemTest): self.set_machine('arduino-mega-2560-v3') self.vm.add_args('-bios', rom_path) self.vm.add_args('-nographic') + self.vm.set_console() self.vm.launch() - time.sleep(2) - self.vm.shutdown() + wait_for_console_pattern(self, + 'XABCDEFGHIJKLMNOPQRSTUVWXABCDEFGHIJKLMNOPQRSTUVWXA') - self.assertIn('ABCDEFGHIJKLMNOPQRSTUVWXABCDEFGHIJKLMNOPQRSTUVWX', - self.vm.get_log()) if __name__ == '__main__': QemuSystemTest.main() diff --git a/tests/functional/test_avr_uno.py b/tests/functional/test_avr_uno.py new file mode 100755 index 0000000..adb3b73 --- /dev/null +++ b/tests/functional/test_avr_uno.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 +# +# QEMU AVR Arduino UNO functional test +# +# SPDX-License-Identifier: GPL-2.0-or-later + +from qemu_test import QemuSystemTest, Asset, wait_for_console_pattern + + +class UnoMachine(QemuSystemTest): + + ASSET_UNO = Asset( + ('https://github.com/RahulRNandan/LED_Blink_AVR/raw/' + 'c6d602cbb974a193/build/main.elf'), + '3009a4e2cf5c5b65142f538abdf66d4dc6bc6beab7e552fff9ae314583761b72') + + def test_uno(self): + """ + The binary constantly prints out 'LED Blink' + """ + self.set_machine('arduino-uno') + rom_path = self.ASSET_UNO.fetch() + + self.vm.add_args('-bios', rom_path) + self.vm.set_console() + self.vm.launch() + + wait_for_console_pattern(self, 'LED Blink') + + +if __name__ == '__main__': + QemuSystemTest.main() diff --git a/tests/functional/test_m68k_q800.py b/tests/functional/test_m68k_q800.py index 400b7ae..b3e6553 100755 --- a/tests/functional/test_m68k_q800.py +++ b/tests/functional/test_m68k_q800.py @@ -25,7 +25,8 @@ class Q800MachineTest(LinuxKernelTest): kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + 'console=ttyS0 vga=off') self.vm.add_args('-kernel', kernel_path, - '-append', kernel_command_line) + '-append', kernel_command_line, + '-audio', 'none') self.vm.launch() console_pattern = 'Kernel command line: %s' % kernel_command_line self.wait_for_console_pattern(console_pattern) diff --git a/tests/functional/test_m68k_replay.py b/tests/functional/test_m68k_replay.py index 18c1db5..213d6ae 100755 --- a/tests/functional/test_m68k_replay.py +++ b/tests/functional/test_m68k_replay.py @@ -24,7 +24,8 @@ class M68kReplay(ReplayKernelBase): kernel_command_line = (self.KERNEL_COMMON_COMMAND_LINE + 'console=ttyS0 vga=off') console_pattern = 'No filesystem could mount root' - self.run_rr(kernel_path, kernel_command_line, console_pattern) + self.run_rr(kernel_path, kernel_command_line, console_pattern, + args=('-audio', 'none')) ASSET_MCF5208 = Asset( 'https://qemu-advcal.gitlab.io/qac-best-of-multiarch/download/day07.tar.xz', diff --git a/tests/functional/test_mem_addr_space.py b/tests/functional/test_mem_addr_space.py index 2d9d31e..61b4a19 100755 --- a/tests/functional/test_mem_addr_space.py +++ b/tests/functional/test_mem_addr_space.py @@ -58,8 +58,8 @@ class MemAddrCheck(QemuSystemTest): should start fine. """ self.ensure_64bit_binary() - self.vm.add_args('-S', '-machine', 'q35', '-m', - '512,slots=1,maxmem=59.6G', + self.set_machine('q35') + self.vm.add_args('-S', '-m', '512,slots=1,maxmem=59.6G', '-cpu', 'pentium,pse36=on', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -76,8 +76,8 @@ class MemAddrCheck(QemuSystemTest): with pse36 above. """ self.ensure_64bit_binary() - self.vm.add_args('-S', '-machine', 'q35', '-m', - '512,slots=1,maxmem=59.6G', + self.set_machine('q35') + self.vm.add_args('-S', '-m', '512,slots=1,maxmem=59.6G', '-cpu', 'pentium,pae=on', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -93,8 +93,8 @@ class MemAddrCheck(QemuSystemTest): same options as the failing case above with pse36 cpu feature. """ self.ensure_64bit_binary() - self.vm.add_args('-machine', 'q35', '-m', - '512,slots=1,maxmem=59.5G', + self.set_machine('q35') + self.vm.add_args('-m', '512,slots=1,maxmem=59.5G', '-cpu', 'pentium,pse36=on', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -111,8 +111,8 @@ class MemAddrCheck(QemuSystemTest): with the same options as the case above. """ self.ensure_64bit_binary() - self.vm.add_args('-machine', 'q35', '-m', - '512,slots=1,maxmem=59.5G', + self.set_machine('q35') + self.vm.add_args('-m', '512,slots=1,maxmem=59.5G', '-cpu', 'pentium,pae=on', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -128,8 +128,8 @@ class MemAddrCheck(QemuSystemTest): with pse36 ON. """ self.ensure_64bit_binary() - self.vm.add_args('-machine', 'q35', '-m', - '512,slots=1,maxmem=59.5G', + self.set_machine('q35') + self.vm.add_args('-m', '512,slots=1,maxmem=59.5G', '-cpu', 'pentium2', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -148,8 +148,8 @@ class MemAddrCheck(QemuSystemTest): above 4 GiB due to the PCI hole and simplicity. """ self.ensure_64bit_binary() - self.vm.add_args('-S', '-machine', 'q35', '-m', - '512,slots=1,maxmem=4G', + self.set_machine('q35') + self.vm.add_args('-S', '-m', '512,slots=1,maxmem=4G', '-cpu', 'pentium', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -176,8 +176,8 @@ class MemAddrCheck(QemuSystemTest): make QEMU fail with the error message. """ self.ensure_64bit_binary() - self.vm.add_args('-S', '-machine', 'pc-q35-7.0', '-m', - '512,slots=1,maxmem=988G', + self.set_machine('pc-q35-7.0') + self.vm.add_args('-S', '-m', '512,slots=1,maxmem=988G', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -197,8 +197,8 @@ class MemAddrCheck(QemuSystemTest): than 988 GiB). """ self.ensure_64bit_binary() - self.vm.add_args('-S', '-machine', 'pc-q35-7.1', '-m', - '512,slots=1,maxmem=976G', + self.set_machine('pc-q35-7.1') + self.vm.add_args('-S', '-m', '512,slots=1,maxmem=976G', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -214,8 +214,8 @@ class MemAddrCheck(QemuSystemTest): successfully start when maxmem is < 988G. """ self.ensure_64bit_binary() - self.vm.add_args('-S', '-machine', 'pc-q35-7.0', '-m', - '512,slots=1,maxmem=987.5G', + self.set_machine('pc-q35-7.0') + self.vm.add_args('-S', '-m', '512,slots=1,maxmem=987.5G', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -231,8 +231,8 @@ class MemAddrCheck(QemuSystemTest): successfully start when maxmem is < 976G. """ self.ensure_64bit_binary() - self.vm.add_args('-S', '-machine', 'pc-q35-7.1', '-m', - '512,slots=1,maxmem=975.5G', + self.set_machine('pc-q35-7.1') + self.vm.add_args('-S', '-m', '512,slots=1,maxmem=975.5G', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -249,9 +249,9 @@ class MemAddrCheck(QemuSystemTest): "above_4G" memory starts at 4G. """ self.ensure_64bit_binary() + self.set_machine('pc-q35-7.1') self.vm.add_args('-S', '-cpu', 'Skylake-Server', - '-machine', 'pc-q35-7.1', '-m', - '512,slots=1,maxmem=976G', + '-m', '512,slots=1,maxmem=976G', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -274,9 +274,9 @@ class MemAddrCheck(QemuSystemTest): fail to start. """ self.ensure_64bit_binary() + self.set_machine('pc-q35-7.1') self.vm.add_args('-S', '-cpu', 'EPYC-v4,phys-bits=41', - '-machine', 'pc-q35-7.1', '-m', - '512,slots=1,maxmem=992G', + '-m', '512,slots=1,maxmem=992G', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -293,9 +293,9 @@ class MemAddrCheck(QemuSystemTest): QEMU should start fine. """ self.ensure_64bit_binary() + self.set_machine('pc-q35-7.1') self.vm.add_args('-S', '-cpu', 'EPYC-v4,phys-bits=41', - '-machine', 'pc-q35-7.1', '-m', - '512,slots=1,maxmem=990G', + '-m', '512,slots=1,maxmem=990G', '-display', 'none', '-object', 'memory-backend-ram,id=mem1,size=1G', '-device', 'pc-dimm,id=vm0,memdev=mem1') @@ -314,12 +314,12 @@ class MemAddrCheck(QemuSystemTest): alignment constraints with 40 bits (1 TiB) of processor physical bits. """ self.ensure_64bit_binary() + self.set_machine('q35') self.vm.add_args('-S', '-cpu', 'Skylake-Server,phys-bits=40', - '-machine', 'q35,cxl=on', '-m', - '512,slots=1,maxmem=987G', + '-m', '512,slots=1,maxmem=987G', '-display', 'none', '-device', 'pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1', - '-M', 'cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=1G') + '-M', 'cxl=on,cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=1G') self.vm.set_qmp_monitor(enabled=False) self.vm.launch() self.vm.wait() @@ -333,9 +333,10 @@ class MemAddrCheck(QemuSystemTest): with cxl enabled. """ self.ensure_64bit_binary() + self.set_machine('q35') self.vm.add_args('-S', '-cpu', 'Skylake-Server,phys-bits=40', - '-machine', 'q35,cxl=on', '-m', - '512,slots=1,maxmem=987G', + '-machine', 'cxl=on', + '-m', '512,slots=1,maxmem=987G', '-display', 'none', '-device', 'pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1') self.vm.set_qmp_monitor(enabled=False) diff --git a/tests/functional/test_memlock.py b/tests/functional/test_memlock.py new file mode 100755 index 0000000..2b515ff --- /dev/null +++ b/tests/functional/test_memlock.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 +# +# Functional test that check overcommit memlock options +# +# Copyright (c) Yandex Technologies LLC, 2025 +# +# Author: +# Alexandr Moshkov <dtalexundeer@yandex-team.ru> +# +# SPDX-License-Identifier: GPL-2.0-or-later + +import re + +from typing import Dict + +from qemu_test import QemuSystemTest +from qemu_test import skipLockedMemoryTest + + +STATUS_VALUE_PATTERN = re.compile(r'^(\w+):\s+(\d+) kB', re.MULTILINE) + + +@skipLockedMemoryTest(2_097_152) # 2GB +class MemlockTest(QemuSystemTest): + """ + Runs a guest with memlock options. + Then verify, that this options is working correctly + by checking the status file of the QEMU process. + """ + + def common_vm_setup_with_memlock(self, memlock): + self.vm.add_args('-overcommit', f'mem-lock={memlock}') + self.vm.launch() + + def test_memlock_off(self): + self.common_vm_setup_with_memlock('off') + + status = self.get_process_status_values(self.vm.get_pid()) + + self.assertTrue(status['VmLck'] == 0) + + def test_memlock_on(self): + self.common_vm_setup_with_memlock('on') + + status = self.get_process_status_values(self.vm.get_pid()) + + # VmLck > 0 kB and almost all memory is resident + self.assertTrue(status['VmLck'] > 0) + self.assertTrue(status['VmRSS'] >= status['VmSize'] * 0.70) + + def test_memlock_onfault(self): + self.common_vm_setup_with_memlock('on-fault') + + status = self.get_process_status_values(self.vm.get_pid()) + + # VmLck > 0 kB and only few memory is resident + self.assertTrue(status['VmLck'] > 0) + self.assertTrue(status['VmRSS'] <= status['VmSize'] * 0.30) + + def get_process_status_values(self, pid: int) -> Dict[str, int]: + result = {} + raw_status = self._get_raw_process_status(pid) + + for line in raw_status.split('\n'): + if m := STATUS_VALUE_PATTERN.match(line): + result[m.group(1)] = int(m.group(2)) + + return result + + def _get_raw_process_status(self, pid: int) -> str: + try: + with open(f'/proc/{pid}/status', 'r') as f: + return f.read() + except FileNotFoundError: + self.skipTest("Can't open status file of the process") + + +if __name__ == '__main__': + MemlockTest.main() diff --git a/tests/functional/test_microblaze_s3adsp1800.py b/tests/functional/test_microblaze_s3adsp1800.py index c93fa14..f093b16 100755 --- a/tests/functional/test_microblaze_s3adsp1800.py +++ b/tests/functional/test_microblaze_s3adsp1800.py @@ -25,12 +25,14 @@ class MicroblazeMachine(QemuSystemTest): ('http://www.qemu-advent-calendar.org/2023/download/day13.tar.gz'), 'b9b3d43c5dd79db88ada495cc6e0d1f591153fe41355e925d791fbf44de50c22') - def do_ballerina_be_test(self, machine): - self.set_machine(machine) + def do_ballerina_be_test(self, force_endianness=False): + self.set_machine('petalogix-s3adsp1800') self.archive_extract(self.ASSET_IMAGE_BE) self.vm.set_console() self.vm.add_args('-kernel', self.scratch_file('day17', 'ballerina.bin')) + if force_endianness: + self.vm.add_args('-M', 'endianness=big') self.vm.launch() wait_for_console_pattern(self, 'This architecture does not have ' 'kernel memory protection') @@ -39,12 +41,14 @@ class MicroblazeMachine(QemuSystemTest): # message, that's why we don't test for a later string here. This # needs some investigation by a microblaze wizard one day... - def do_xmaton_le_test(self, machine): + def do_xmaton_le_test(self, force_endianness=False): self.require_netdev('user') - self.set_machine(machine) + self.set_machine('petalogix-s3adsp1800') self.archive_extract(self.ASSET_IMAGE_LE) self.vm.set_console() self.vm.add_args('-kernel', self.scratch_file('day13', 'xmaton.bin')) + if force_endianness: + self.vm.add_args('-M', 'endianness=little') tftproot = self.scratch_file('day13') self.vm.add_args('-nic', f'user,tftp={tftproot}') self.vm.launch() @@ -59,9 +63,13 @@ class MicroblazeMachine(QemuSystemTest): class MicroblazeBigEndianMachine(MicroblazeMachine): ASSET_IMAGE_BE = MicroblazeMachine.ASSET_IMAGE_BE + ASSET_IMAGE_LE = MicroblazeMachine.ASSET_IMAGE_LE def test_microblaze_s3adsp1800_legacy_be(self): - self.do_ballerina_be_test('petalogix-s3adsp1800') + self.do_ballerina_be_test() + + def test_microblaze_s3adsp1800_legacy_le(self): + self.do_xmaton_le_test(force_endianness=True) if __name__ == '__main__': diff --git a/tests/functional/test_microblazeel_s3adsp1800.py b/tests/functional/test_microblazeel_s3adsp1800.py index ab59941..915902d 100755 --- a/tests/functional/test_microblazeel_s3adsp1800.py +++ b/tests/functional/test_microblazeel_s3adsp1800.py @@ -13,9 +13,13 @@ from test_microblaze_s3adsp1800 import MicroblazeMachine class MicroblazeLittleEndianMachine(MicroblazeMachine): ASSET_IMAGE_LE = MicroblazeMachine.ASSET_IMAGE_LE + ASSET_IMAGE_BE = MicroblazeMachine.ASSET_IMAGE_BE def test_microblaze_s3adsp1800_legacy_le(self): - self.do_xmaton_le_test('petalogix-s3adsp1800') + self.do_xmaton_le_test() + + def test_microblaze_s3adsp1800_legacy_be(self): + self.do_ballerina_be_test(force_endianness=True) if __name__ == '__main__': diff --git a/tests/functional/test_mips_malta.py b/tests/functional/test_mips_malta.py index 89b9556..30279f0 100755 --- a/tests/functional/test_mips_malta.py +++ b/tests/functional/test_mips_malta.py @@ -80,10 +80,8 @@ def mips_check_wheezy(test, kernel_path, image_path, kernel_command_line, exec_command_and_wait_for_pattern(test, 'cat /proc/devices', 'usb') exec_command_and_wait_for_pattern(test, 'cat /proc/ioports', ' : piix4_smbus') - # lspci for the host bridge does not work on big endian targets: - # https://gitlab.com/qemu-project/qemu/-/issues/2826 - # exec_command_and_wait_for_pattern(test, 'lspci -d 11ab:4620', - # 'GT-64120') + exec_command_and_wait_for_pattern(test, 'lspci -d 11ab:4620', + 'GT-64120') exec_command_and_wait_for_pattern(test, 'cat /sys/bus/i2c/devices/i2c-0/name', 'SMBus PIIX4 adapter') diff --git a/tests/functional/test_sparc64_tuxrun.py b/tests/functional/test_sparc64_tuxrun.py index 3be08d6..0d7b43d 100755 --- a/tests/functional/test_sparc64_tuxrun.py +++ b/tests/functional/test_sparc64_tuxrun.py @@ -24,6 +24,7 @@ class TuxRunSparc64Test(TuxRunBaselineTest): '479c3dc104c82b68be55e2c0c5c38cd473d0b37ad4badccde4775bb88ce34611') def test_sparc64(self): + self.set_machine('sun4u') self.root='sda' self.wait_for_shutdown=False self.common_tuxrun(kernel_asset=self.ASSET_SPARC64_KERNEL, diff --git a/tests/functional/test_vnc.py b/tests/functional/test_vnc.py index 5c0ee5f..f1dd159 100755 --- a/tests/functional/test_vnc.py +++ b/tests/functional/test_vnc.py @@ -31,6 +31,7 @@ def check_connect(port: int) -> bool: class Vnc(QemuSystemTest): def test_no_vnc_change_password(self): + self.set_machine('none') self.vm.add_args('-nodefaults', '-S') self.vm.launch() @@ -62,6 +63,7 @@ class Vnc(QemuSystemTest): raise excp def test_change_password_requires_a_password(self): + self.set_machine('none') self.vm.add_args('-nodefaults', '-S', '-vnc', ':1,to=999') self.launch_guarded() self.assertTrue(self.vm.qmp('query-vnc')['return']['enabled']) @@ -74,6 +76,7 @@ class Vnc(QemuSystemTest): 'Could not set password') def test_change_password(self): + self.set_machine('none') self.vm.add_args('-nodefaults', '-S', '-vnc', ':1,to=999,password=on') self.launch_guarded() self.assertTrue(self.vm.qmp('query-vnc')['return']['enabled']) @@ -103,6 +106,7 @@ class Vnc(QemuSystemTest): self.assertTrue(check_connect(c)) def test_change_listen(self): + self.set_machine('none') with Ports() as ports: a, b, c = ports.find_free_ports(3) self.do_test_change_listen(a, b, c) diff --git a/tests/include/meson.build b/tests/include/meson.build index 9abba30..8e8d1ec 100644 --- a/tests/include/meson.build +++ b/tests/include/meson.build @@ -13,4 +13,4 @@ test_qapi_outputs_extra = [ test_qapi_files_extra = custom_target('QAPI test (include)', output: test_qapi_outputs_extra, input: test_qapi_files, - command: 'true') + command: [python, '-c', '']) diff --git a/tests/lcitool/mappings.yml b/tests/lcitool/mappings.yml index 673baf3..8f0e95e 100644 --- a/tests/lcitool/mappings.yml +++ b/tests/lcitool/mappings.yml @@ -8,6 +8,10 @@ mappings: meson: OpenSUSELeap15: + # Use Meson from PyPI wherever Rust is enabled + Debian: + Fedora: + Ubuntu: python3: OpenSUSELeap15: python311-base @@ -72,7 +76,7 @@ mappings: pypi_mappings: # Request more recent version meson: - default: meson==1.5.0 + default: meson==1.8.1 # Drop packages that need devel headers python3-numpy: diff --git a/tests/lcitool/refresh b/tests/lcitool/refresh index 8474ea8..d3488b2 100755 --- a/tests/lcitool/refresh +++ b/tests/lcitool/refresh @@ -121,6 +121,7 @@ fedora_rustup_nightly_extras = [ "RUN dnf install -y wget\n", "ENV RUSTUP_HOME=/usr/local/rustup CARGO_HOME=/usr/local/cargo\n", "ENV RUSTC=/usr/local/rustup/toolchains/nightly-x86_64-unknown-linux-gnu/bin/rustc\n", + "ENV RUSTDOC=/usr/local/rustup/toolchains/nightly-x86_64-unknown-linux-gnu/bin/rustdoc\n", "ENV CARGO=/usr/local/rustup/toolchains/nightly-x86_64-unknown-linux-gnu/bin/cargo\n", "RUN set -eux && \\\n", " rustArch='x86_64-unknown-linux-gnu' && \\\n", @@ -135,6 +136,7 @@ fedora_rustup_nightly_extras = [ " /usr/local/cargo/bin/rustup run nightly cargo --version && \\\n", " /usr/local/cargo/bin/rustup run nightly rustc --version && \\\n", ' test "$CARGO" = "$(/usr/local/cargo/bin/rustup +nightly which cargo)" && \\\n', + ' test "$RUSTDOC" = "$(/usr/local/cargo/bin/rustup +nightly which rustdoc)" && \\\n', ' test "$RUSTC" = "$(/usr/local/cargo/bin/rustup +nightly which rustc)"\n', 'ENV PATH=$CARGO_HOME/bin:$PATH\n', 'RUN /usr/local/cargo/bin/rustup run nightly cargo install bindgen-cli\n', @@ -143,6 +145,7 @@ fedora_rustup_nightly_extras = [ ubuntu2204_rust_extras = [ "ENV RUSTC=/usr/bin/rustc-1.77\n", + "ENV RUSTDOC=/usr/bin/rustdoc-1.77\n", "ENV CARGO_HOME=/usr/local/cargo\n", 'ENV PATH=$CARGO_HOME/bin:$PATH\n', "RUN DEBIAN_FRONTEND=noninteractive eatmydata \\\n", diff --git a/tests/qemu-iotests/106 b/tests/qemu-iotests/106 index ae0fc46..5554843 100755 --- a/tests/qemu-iotests/106 +++ b/tests/qemu-iotests/106 @@ -40,6 +40,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt raw _supported_proto file fuse _supported_os Linux +_require_disk_usage # in kB CREATION_SIZE=128 diff --git a/tests/qemu-iotests/125 b/tests/qemu-iotests/125 index 46279d6..708e7c5 100755 --- a/tests/qemu-iotests/125 +++ b/tests/qemu-iotests/125 @@ -35,7 +35,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 get_image_size_on_host() { - echo $(($(stat -c '%b * %B' "$TEST_IMG_FILE"))) + disk_usage "$TEST_IMG_FILE" } # get standard environment and filters diff --git a/tests/qemu-iotests/175 b/tests/qemu-iotests/175 index f74f053..bbbf550 100755 --- a/tests/qemu-iotests/175 +++ b/tests/qemu-iotests/175 @@ -77,6 +77,7 @@ _supported_os Linux _default_cache_mode none _supported_cache_modes none directsync +_require_disk_usage size=$((1 * 1024 * 1024)) diff --git a/tests/qemu-iotests/221 b/tests/qemu-iotests/221 index c463fd4..eba00b8 100755 --- a/tests/qemu-iotests/221 +++ b/tests/qemu-iotests/221 @@ -41,6 +41,7 @@ _supported_os Linux _default_cache_mode writeback _supported_cache_modes writeback writethrough unsafe +_require_disk_usage echo echo "=== Check mapping of unaligned raw image ===" diff --git a/tests/qemu-iotests/240 b/tests/qemu-iotests/240 index 9b281e1..f8af9ff 100755 --- a/tests/qemu-iotests/240 +++ b/tests/qemu-iotests/240 @@ -81,8 +81,6 @@ class TestCase(iotests.QMPTestCase): self.vm.qmp_log('device_del', id='scsi-hd0') self.vm.event_wait('DEVICE_DELETED') - self.vm.qmp_log('device_add', id='scsi-hd1', driver='scsi-hd', drive='hd0', bus="scsi1.0") - self.vm.qmp_log('device_del', id='scsi-hd1') self.vm.event_wait('DEVICE_DELETED') self.vm.qmp_log('blockdev-del', node_name='hd0') diff --git a/tests/qemu-iotests/240.out b/tests/qemu-iotests/240.out index 89ed25e..10dcc42 100644 --- a/tests/qemu-iotests/240.out +++ b/tests/qemu-iotests/240.out @@ -46,10 +46,8 @@ {"execute": "device_add", "arguments": {"bus": "scsi0.0", "drive": "hd0", "driver": "scsi-hd", "id": "scsi-hd0"}} {"return": {}} {"execute": "device_add", "arguments": {"bus": "scsi1.0", "drive": "hd0", "driver": "scsi-hd", "id": "scsi-hd1"}} -{"error": {"class": "GenericError", "desc": "Cannot change iothread of active block backend"}} -{"execute": "device_del", "arguments": {"id": "scsi-hd0"}} {"return": {}} -{"execute": "device_add", "arguments": {"bus": "scsi1.0", "drive": "hd0", "driver": "scsi-hd", "id": "scsi-hd1"}} +{"execute": "device_del", "arguments": {"id": "scsi-hd0"}} {"return": {}} {"execute": "device_del", "arguments": {"id": "scsi-hd1"}} {"return": {}} diff --git a/tests/qemu-iotests/253 b/tests/qemu-iotests/253 index 35039d2..6da85e6 100755 --- a/tests/qemu-iotests/253 +++ b/tests/qemu-iotests/253 @@ -41,6 +41,7 @@ _supported_os Linux _default_cache_mode none _supported_cache_modes none directsync +_require_disk_usage echo echo "=== Check mapping of unaligned raw image ===" diff --git a/tests/qemu-iotests/308 b/tests/qemu-iotests/308 index ea81dc4..6eced3a 100755 --- a/tests/qemu-iotests/308 +++ b/tests/qemu-iotests/308 @@ -51,6 +51,7 @@ _unsupported_fmt vpc _supported_proto file # We create the FUSE export manually _supported_os Linux # We need /dev/urandom +_require_disk_usage # $1: Export ID # $2: Options (beyond the node-name and ID) @@ -290,7 +291,7 @@ echo '--- Try growing non-growable export ---' # Get the current size so we can write beyond the EOF orig_len=$(get_proto_len "$EXT_MP" "$TEST_IMG") -orig_disk_usage=$(stat -c '%b' "$TEST_IMG") +orig_disk_usage=$(disk_usage "$TEST_IMG") # Should fail (exports are non-growable by default) # (Note that qemu-io can never write beyond the EOF, so we have to use @@ -312,7 +313,7 @@ else echo 'OK: Post-truncate image size is as expected' fi -new_disk_usage=$(stat -c '%b' "$TEST_IMG") +new_disk_usage=$(disk_usage "$TEST_IMG") if [ "$new_disk_usage" -gt "$orig_disk_usage" ]; then echo 'OK: Disk usage grew with fallocate' else diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 237f746..e977cb4 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -996,6 +996,36 @@ _require_large_file() rm "$FILENAME" } +# Check whether disk_usage can be reliably used. +_require_disk_usage() +{ + local unusable=false + # ZFS triggers known failures on this front; it does not immediately + # allocate files, and then aggressively compresses writes even when full + # allocation was requested. + if [ -z "$TEST_IMG_FILE" ]; then + FILENAME="$TEST_IMG" + else + FILENAME="$TEST_IMG_FILE" + fi + if [ -e "FILENAME" ]; then + echo "unwilling to overwrite existing file" + exit 1 + fi + $QEMU_IMG create -f raw "$FILENAME" 5M > /dev/null + if [ $(disk_usage "$FILENAME") -gt $((1024*1024)) ]; then + unusable=true + fi + $QEMU_IMG create -f raw -o preallocation=full "$FILENAME" 5M > /dev/null + if [ $(disk_usage "$FILENAME") -lt $((4*1024*1024)) ]; then + unusable=true + fi + rm -f "$FILENAME" + if $unusable; then + _notrun "file system on $TEST_DIR does not handle sparse files nicely" + fi +} + # Check that a set of devices is available in the QEMU binary # _require_devices() diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io index 194fda5..dca1167 100755 --- a/tests/qemu-iotests/tests/graph-changes-while-io +++ b/tests/qemu-iotests/tests/graph-changes-while-io @@ -27,6 +27,7 @@ from iotests import imgfmt, qemu_img, qemu_img_create, qemu_io, \ top = os.path.join(iotests.test_dir, 'top.img') +mid = os.path.join(iotests.test_dir, 'mid.img') nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') @@ -57,6 +58,16 @@ class TestGraphChangesWhileIO(QMPTestCase): def tearDown(self) -> None: self.qsd.stop() + os.remove(top) + + def _wait_for_blockjob(self, status: str) -> None: + done = False + while not done: + for event in self.qsd.get_qmp().get_events(wait=10.0): + if event['event'] != 'JOB_STATUS_CHANGE': + continue + if event['data']['status'] == status: + done = True def test_blockdev_add_while_io(self) -> None: # Run qemu-img bench in the background @@ -116,15 +127,92 @@ class TestGraphChangesWhileIO(QMPTestCase): 'device': 'job0', }) - cancelled = False - while not cancelled: - for event in self.qsd.get_qmp().get_events(wait=10.0): - if event['event'] != 'JOB_STATUS_CHANGE': - continue - if event['data']['status'] == 'null': - cancelled = True + self._wait_for_blockjob('null') + + bench_thr.join() + + def test_remove_lower_snapshot_while_io(self) -> None: + # Run qemu-img bench in the background + bench_thr = Thread(target=do_qemu_img_bench, args=(100000, )) + bench_thr.start() + + # While I/O is performed on 'node0' node, consequently add 2 snapshots + # on top of it, then remove (commit) them starting from lower one. + while bench_thr.is_alive(): + # Recreate snapshot images on every iteration + qemu_img_create('-f', imgfmt, mid, '1G') + qemu_img_create('-f', imgfmt, top, '1G') + + self.qsd.cmd('blockdev-add', { + 'driver': imgfmt, + 'node-name': 'mid', + 'file': { + 'driver': 'file', + 'filename': mid + } + }) + + self.qsd.cmd('blockdev-snapshot', { + 'node': 'node0', + 'overlay': 'mid', + }) + + self.qsd.cmd('blockdev-add', { + 'driver': imgfmt, + 'node-name': 'top', + 'file': { + 'driver': 'file', + 'filename': top + } + }) + + self.qsd.cmd('blockdev-snapshot', { + 'node': 'mid', + 'overlay': 'top', + }) + + self.qsd.cmd('block-commit', { + 'job-id': 'commit-mid', + 'device': 'top', + 'top-node': 'mid', + 'base-node': 'node0', + 'auto-finalize': True, + 'auto-dismiss': False, + }) + + self._wait_for_blockjob('concluded') + self.qsd.cmd('job-dismiss', { + 'id': 'commit-mid', + }) + + self.qsd.cmd('block-commit', { + 'job-id': 'commit-top', + 'device': 'top', + 'top-node': 'top', + 'base-node': 'node0', + 'auto-finalize': True, + 'auto-dismiss': False, + }) + + self._wait_for_blockjob('ready') + self.qsd.cmd('job-complete', { + 'id': 'commit-top', + }) + + self._wait_for_blockjob('concluded') + self.qsd.cmd('job-dismiss', { + 'id': 'commit-top', + }) + + self.qsd.cmd('blockdev-del', { + 'node-name': 'mid' + }) + self.qsd.cmd('blockdev-del', { + 'node-name': 'top' + }) bench_thr.join() + os.remove(mid) if __name__ == '__main__': # Format must support raw backing files diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out index fbc63e6..8d7e9967 100644 --- a/tests/qemu-iotests/tests/graph-changes-while-io.out +++ b/tests/qemu-iotests/tests/graph-changes-while-io.out @@ -1,5 +1,5 @@ -.. +... ---------------------------------------------------------------------- -Ran 2 tests +Ran 3 tests OK diff --git a/tests/qemu-iotests/tests/mirror-sparse b/tests/qemu-iotests/tests/mirror-sparse index 8c52a4e..cfcaa60 100755 --- a/tests/qemu-iotests/tests/mirror-sparse +++ b/tests/qemu-iotests/tests/mirror-sparse @@ -40,6 +40,7 @@ cd .. _supported_fmt qcow2 raw # Format of the source. dst is always raw file _supported_proto file _supported_os Linux +_require_disk_usage echo echo "=== Initial image setup ===" @@ -96,13 +97,15 @@ _send_qemu_cmd $h1 '{"execute": "blockdev-del", "arguments": {"node-name": "dst"}}' 'return' \ | _filter_block_job_offset | _filter_block_job_len $QEMU_IMG compare -U -f $IMGFMT -F raw $TEST_IMG.base $TEST_IMG +# Some filesystems can fudge allocations for various reasons; rather +# than expecting precise 2M and 20M images, it is better to allow for slop. result=$(disk_usage $TEST_IMG) -if test $result -lt $((3*1024*1024)); then +if test $result -lt $((4*1024*1024)); then actual=sparse -elif test $result = $((20*1024*1024)); then +elif test $result -gt $((19*1024*1024)); then actual=full else - actual=unknown + actual="unexpected size ($result)" fi echo "Destination is $actual; expected $expected" } diff --git a/tests/qemu-iotests/tests/write-zeroes-unmap b/tests/qemu-iotests/tests/write-zeroes-unmap index 7cfeeaf..f90fb8e 100755 --- a/tests/qemu-iotests/tests/write-zeroes-unmap +++ b/tests/qemu-iotests/tests/write-zeroes-unmap @@ -32,6 +32,7 @@ cd .. _supported_fmt raw _supported_proto file _supported_os Linux +_require_disk_usage create_test_image() { _make_test_img -f $IMGFMT 1m diff --git a/tests/qtest/aspeed-hace-utils.c b/tests/qtest/aspeed-hace-utils.c new file mode 100644 index 0000000..0f7f911 --- /dev/null +++ b/tests/qtest/aspeed-hace-utils.c @@ -0,0 +1,646 @@ +/* + * QTest testcase for the ASPEED Hash and Crypto Engine + * + * SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2021 IBM Corp. + */ + +#include "qemu/osdep.h" +#include "libqtest.h" +#include "qemu/bitops.h" +#include "aspeed-hace-utils.h" + +/* + * Test vector is the ascii "abc" + * + * Expected results were generated using command line utitiles: + * + * echo -n -e 'abc' | dd of=/tmp/test + * for hash in sha512sum sha384sum sha256sum md5sum; do $hash /tmp/test; done + * + */ +static const uint8_t test_vector[3] = {0x61, 0x62, 0x63}; + +static const uint8_t test_result_sha512[64] = { + 0xdd, 0xaf, 0x35, 0xa1, 0x93, 0x61, 0x7a, 0xba, 0xcc, 0x41, 0x73, 0x49, + 0xae, 0x20, 0x41, 0x31, 0x12, 0xe6, 0xfa, 0x4e, 0x89, 0xa9, 0x7e, 0xa2, + 0x0a, 0x9e, 0xee, 0xe6, 0x4b, 0x55, 0xd3, 0x9a, 0x21, 0x92, 0x99, 0x2a, + 0x27, 0x4f, 0xc1, 0xa8, 0x36, 0xba, 0x3c, 0x23, 0xa3, 0xfe, 0xeb, 0xbd, + 0x45, 0x4d, 0x44, 0x23, 0x64, 0x3c, 0xe8, 0x0e, 0x2a, 0x9a, 0xc9, 0x4f, + 0xa5, 0x4c, 0xa4, 0x9f}; + +static const uint8_t test_result_sha384[48] = { + 0xcb, 0x00, 0x75, 0x3f, 0x45, 0xa3, 0x5e, 0x8b, 0xb5, 0xa0, 0x3d, 0x69, + 0x9a, 0xc6, 0x50, 0x07, 0x27, 0x2c, 0x32, 0xab, 0x0e, 0xde, 0xd1, 0x63, + 0x1a, 0x8b, 0x60, 0x5a, 0x43, 0xff, 0x5b, 0xed, 0x80, 0x86, 0x07, 0x2b, + 0xa1, 0xe7, 0xcc, 0x23, 0x58, 0xba, 0xec, 0xa1, 0x34, 0xc8, 0x25, 0xa7}; + +static const uint8_t test_result_sha256[32] = { + 0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde, + 0x5d, 0xae, 0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, + 0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad}; + +static const uint8_t test_result_md5[16] = { + 0x90, 0x01, 0x50, 0x98, 0x3c, 0xd2, 0x4f, 0xb0, 0xd6, 0x96, 0x3f, 0x7d, + 0x28, 0xe1, 0x7f, 0x72}; + +/* + * The Scatter-Gather Test vector is the ascii "abc" "def" "ghi", broken + * into blocks of 3 characters as shown + * + * Expected results were generated using command line utitiles: + * + * echo -n -e 'abcdefghijkl' | dd of=/tmp/test + * for hash in sha512sum sha384sum sha256sum; do $hash /tmp/test; done + * + */ +static const uint8_t test_vector_sg1[6] = {0x61, 0x62, 0x63, 0x64, 0x65, 0x66}; +static const uint8_t test_vector_sg2[3] = {0x67, 0x68, 0x69}; +static const uint8_t test_vector_sg3[3] = {0x6a, 0x6b, 0x6c}; + +static const uint8_t test_result_sg_sha512[64] = { + 0x17, 0x80, 0x7c, 0x72, 0x8e, 0xe3, 0xba, 0x35, 0xe7, 0xcf, 0x7a, 0xf8, + 0x23, 0x11, 0x6d, 0x26, 0xe4, 0x1e, 0x5d, 0x4d, 0x6c, 0x2f, 0xf1, 0xf3, + 0x72, 0x0d, 0x3d, 0x96, 0xaa, 0xcb, 0x6f, 0x69, 0xde, 0x64, 0x2e, 0x63, + 0xd5, 0xb7, 0x3f, 0xc3, 0x96, 0xc1, 0x2b, 0xe3, 0x8b, 0x2b, 0xd5, 0xd8, + 0x84, 0x25, 0x7c, 0x32, 0xc8, 0xf6, 0xd0, 0x85, 0x4a, 0xe6, 0xb5, 0x40, + 0xf8, 0x6d, 0xda, 0x2e}; + +static const uint8_t test_result_sg_sha384[48] = { + 0x10, 0x3c, 0xa9, 0x6c, 0x06, 0xa1, 0xce, 0x79, 0x8f, 0x08, 0xf8, 0xef, + 0xf0, 0xdf, 0xb0, 0xcc, 0xdb, 0x56, 0x7d, 0x48, 0xb2, 0x85, 0xb2, 0x3d, + 0x0c, 0xd7, 0x73, 0x45, 0x46, 0x67, 0xa3, 0xc2, 0xfa, 0x5f, 0x1b, 0x58, + 0xd9, 0xcd, 0xf2, 0x32, 0x9b, 0xd9, 0x97, 0x97, 0x30, 0xbf, 0xaa, 0xff}; + +static const uint8_t test_result_sg_sha256[32] = { + 0xd6, 0x82, 0xed, 0x4c, 0xa4, 0xd9, 0x89, 0xc1, 0x34, 0xec, 0x94, 0xf1, + 0x55, 0x1e, 0x1e, 0xc5, 0x80, 0xdd, 0x6d, 0x5a, 0x6e, 0xcd, 0xe9, 0xf3, + 0xd3, 0x5e, 0x6e, 0x4a, 0x71, 0x7f, 0xbd, 0xe4}; + +/* + * The accumulative mode requires firmware to provide internal initial state + * and message padding (including length L at the end of padding). + * + * This test vector is a ascii text "abc" with padding message. + * + * Expected results were generated using command line utitiles: + * + * echo -n -e 'abc' | dd of=/tmp/test + * for hash in sha512sum sha384sum sha256sum; do $hash /tmp/test; done + */ +static const uint8_t test_vector_accum_512[128] = { + 0x61, 0x62, 0x63, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18}; + +static const uint8_t test_vector_accum_384[128] = { + 0x61, 0x62, 0x63, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18}; + +static const uint8_t test_vector_accum_256[64] = { + 0x61, 0x62, 0x63, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18}; + +static const uint8_t test_result_accum_sha512[64] = { + 0xdd, 0xaf, 0x35, 0xa1, 0x93, 0x61, 0x7a, 0xba, 0xcc, 0x41, 0x73, 0x49, + 0xae, 0x20, 0x41, 0x31, 0x12, 0xe6, 0xfa, 0x4e, 0x89, 0xa9, 0x7e, 0xa2, + 0x0a, 0x9e, 0xee, 0xe6, 0x4b, 0x55, 0xd3, 0x9a, 0x21, 0x92, 0x99, 0x2a, + 0x27, 0x4f, 0xc1, 0xa8, 0x36, 0xba, 0x3c, 0x23, 0xa3, 0xfe, 0xeb, 0xbd, + 0x45, 0x4d, 0x44, 0x23, 0x64, 0x3c, 0xe8, 0x0e, 0x2a, 0x9a, 0xc9, 0x4f, + 0xa5, 0x4c, 0xa4, 0x9f}; + +static const uint8_t test_result_accum_sha384[48] = { + 0xcb, 0x00, 0x75, 0x3f, 0x45, 0xa3, 0x5e, 0x8b, 0xb5, 0xa0, 0x3d, 0x69, + 0x9a, 0xc6, 0x50, 0x07, 0x27, 0x2c, 0x32, 0xab, 0x0e, 0xde, 0xd1, 0x63, + 0x1a, 0x8b, 0x60, 0x5a, 0x43, 0xff, 0x5b, 0xed, 0x80, 0x86, 0x07, 0x2b, + 0xa1, 0xe7, 0xcc, 0x23, 0x58, 0xba, 0xec, 0xa1, 0x34, 0xc8, 0x25, 0xa7}; + +static const uint8_t test_result_accum_sha256[32] = { + 0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde, + 0x5d, 0xae, 0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, + 0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad}; + +static void write_regs(QTestState *s, uint32_t base, uint64_t src, + uint32_t length, uint64_t out, uint32_t method) +{ + qtest_writel(s, base + HACE_HASH_SRC, extract64(src, 0, 32)); + qtest_writel(s, base + HACE_HASH_SRC_HI, extract64(src, 32, 32)); + qtest_writel(s, base + HACE_HASH_DIGEST, extract64(out, 0, 32)); + qtest_writel(s, base + HACE_HASH_DIGEST_HI, extract64(out, 32, 32)); + qtest_writel(s, base + HACE_HASH_DATA_LEN, length); + qtest_writel(s, base + HACE_HASH_CMD, HACE_SHA_BE_EN | method); +} + +void aspeed_test_md5(const char *machine, const uint32_t base, + const uint64_t src_addr) + +{ + QTestState *s = qtest_init(machine); + + uint64_t digest_addr = src_addr + 0x010000; + uint8_t digest[16] = {0}; + + /* Check engine is idle, no busy or irq bits set */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Write test vector into memory */ + qtest_memwrite(s, src_addr, test_vector, sizeof(test_vector)); + + write_regs(s, base, src_addr, sizeof(test_vector), + digest_addr, HACE_ALGO_MD5); + + /* Check hash IRQ status is asserted */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200); + + /* Clear IRQ status and check status is deasserted */ + qtest_writel(s, base + HACE_STS, 0x00000200); + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Read computed digest from memory */ + qtest_memread(s, digest_addr, digest, sizeof(digest)); + + /* Check result of computation */ + g_assert_cmpmem(digest, sizeof(digest), + test_result_md5, sizeof(digest)); + + qtest_quit(s); +} + +void aspeed_test_sha256(const char *machine, const uint32_t base, + const uint64_t src_addr) +{ + QTestState *s = qtest_init(machine); + + const uint64_t digest_addr = src_addr + 0x10000; + uint8_t digest[32] = {0}; + + /* Check engine is idle, no busy or irq bits set */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Write test vector into memory */ + qtest_memwrite(s, src_addr, test_vector, sizeof(test_vector)); + + write_regs(s, base, src_addr, sizeof(test_vector), digest_addr, + HACE_ALGO_SHA256); + + /* Check hash IRQ status is asserted */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200); + + /* Clear IRQ status and check status is deasserted */ + qtest_writel(s, base + HACE_STS, 0x00000200); + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Read computed digest from memory */ + qtest_memread(s, digest_addr, digest, sizeof(digest)); + + /* Check result of computation */ + g_assert_cmpmem(digest, sizeof(digest), + test_result_sha256, sizeof(digest)); + + qtest_quit(s); +} + +void aspeed_test_sha384(const char *machine, const uint32_t base, + const uint64_t src_addr) +{ + QTestState *s = qtest_init(machine); + + const uint64_t digest_addr = src_addr + 0x10000; + uint8_t digest[48] = {0}; + + /* Check engine is idle, no busy or irq bits set */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Write test vector into memory */ + qtest_memwrite(s, src_addr, test_vector, sizeof(test_vector)); + + write_regs(s, base, src_addr, sizeof(test_vector), digest_addr, + HACE_ALGO_SHA384); + + /* Check hash IRQ status is asserted */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200); + + /* Clear IRQ status and check status is deasserted */ + qtest_writel(s, base + HACE_STS, 0x00000200); + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Read computed digest from memory */ + qtest_memread(s, digest_addr, digest, sizeof(digest)); + + /* Check result of computation */ + g_assert_cmpmem(digest, sizeof(digest), + test_result_sha384, sizeof(digest)); + + qtest_quit(s); +} + +void aspeed_test_sha512(const char *machine, const uint32_t base, + const uint64_t src_addr) +{ + QTestState *s = qtest_init(machine); + + const uint64_t digest_addr = src_addr + 0x10000; + uint8_t digest[64] = {0}; + + /* Check engine is idle, no busy or irq bits set */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Write test vector into memory */ + qtest_memwrite(s, src_addr, test_vector, sizeof(test_vector)); + + write_regs(s, base, src_addr, sizeof(test_vector), digest_addr, + HACE_ALGO_SHA512); + + /* Check hash IRQ status is asserted */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200); + + /* Clear IRQ status and check status is deasserted */ + qtest_writel(s, base + HACE_STS, 0x00000200); + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Read computed digest from memory */ + qtest_memread(s, digest_addr, digest, sizeof(digest)); + + /* Check result of computation */ + g_assert_cmpmem(digest, sizeof(digest), + test_result_sha512, sizeof(digest)); + + qtest_quit(s); +} + +void aspeed_test_sha256_sg(const char *machine, const uint32_t base, + const uint64_t src_addr) +{ + QTestState *s = qtest_init(machine); + + const uint64_t src_addr_1 = src_addr + 0x10000; + const uint64_t src_addr_2 = src_addr + 0x20000; + const uint64_t src_addr_3 = src_addr + 0x30000; + const uint64_t digest_addr = src_addr + 0x40000; + uint8_t digest[32] = {0}; + struct AspeedSgList array[] = { + { cpu_to_le32(sizeof(test_vector_sg1)), + cpu_to_le32(src_addr_1) }, + { cpu_to_le32(sizeof(test_vector_sg2)), + cpu_to_le32(src_addr_2) }, + { cpu_to_le32(sizeof(test_vector_sg3) | SG_LIST_LEN_LAST), + cpu_to_le32(src_addr_3) }, + }; + + /* Check engine is idle, no busy or irq bits set */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Write test vector into memory */ + qtest_memwrite(s, src_addr_1, test_vector_sg1, sizeof(test_vector_sg1)); + qtest_memwrite(s, src_addr_2, test_vector_sg2, sizeof(test_vector_sg2)); + qtest_memwrite(s, src_addr_3, test_vector_sg3, sizeof(test_vector_sg3)); + qtest_memwrite(s, src_addr, array, sizeof(array)); + + write_regs(s, base, src_addr, + (sizeof(test_vector_sg1) + + sizeof(test_vector_sg2) + + sizeof(test_vector_sg3)), + digest_addr, HACE_ALGO_SHA256 | HACE_SG_EN); + + /* Check hash IRQ status is asserted */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200); + + /* Clear IRQ status and check status is deasserted */ + qtest_writel(s, base + HACE_STS, 0x00000200); + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Read computed digest from memory */ + qtest_memread(s, digest_addr, digest, sizeof(digest)); + + /* Check result of computation */ + g_assert_cmpmem(digest, sizeof(digest), + test_result_sg_sha256, sizeof(digest)); + + qtest_quit(s); +} + +void aspeed_test_sha384_sg(const char *machine, const uint32_t base, + const uint64_t src_addr) +{ + QTestState *s = qtest_init(machine); + + const uint64_t src_addr_1 = src_addr + 0x10000; + const uint64_t src_addr_2 = src_addr + 0x20000; + const uint64_t src_addr_3 = src_addr + 0x30000; + const uint64_t digest_addr = src_addr + 0x40000; + uint8_t digest[48] = {0}; + struct AspeedSgList array[] = { + { cpu_to_le32(sizeof(test_vector_sg1)), + cpu_to_le32(src_addr_1) }, + { cpu_to_le32(sizeof(test_vector_sg2)), + cpu_to_le32(src_addr_2) }, + { cpu_to_le32(sizeof(test_vector_sg3) | SG_LIST_LEN_LAST), + cpu_to_le32(src_addr_3) }, + }; + + /* Check engine is idle, no busy or irq bits set */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Write test vector into memory */ + qtest_memwrite(s, src_addr_1, test_vector_sg1, sizeof(test_vector_sg1)); + qtest_memwrite(s, src_addr_2, test_vector_sg2, sizeof(test_vector_sg2)); + qtest_memwrite(s, src_addr_3, test_vector_sg3, sizeof(test_vector_sg3)); + qtest_memwrite(s, src_addr, array, sizeof(array)); + + write_regs(s, base, src_addr, + (sizeof(test_vector_sg1) + + sizeof(test_vector_sg2) + + sizeof(test_vector_sg3)), + digest_addr, HACE_ALGO_SHA384 | HACE_SG_EN); + + /* Check hash IRQ status is asserted */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200); + + /* Clear IRQ status and check status is deasserted */ + qtest_writel(s, base + HACE_STS, 0x00000200); + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Read computed digest from memory */ + qtest_memread(s, digest_addr, digest, sizeof(digest)); + + /* Check result of computation */ + g_assert_cmpmem(digest, sizeof(digest), + test_result_sg_sha384, sizeof(digest)); + + qtest_quit(s); +} + +void aspeed_test_sha512_sg(const char *machine, const uint32_t base, + const uint64_t src_addr) +{ + QTestState *s = qtest_init(machine); + + const uint64_t src_addr_1 = src_addr + 0x10000; + const uint64_t src_addr_2 = src_addr + 0x20000; + const uint64_t src_addr_3 = src_addr + 0x30000; + const uint64_t digest_addr = src_addr + 0x40000; + uint8_t digest[64] = {0}; + struct AspeedSgList array[] = { + { cpu_to_le32(sizeof(test_vector_sg1)), + cpu_to_le32(src_addr_1) }, + { cpu_to_le32(sizeof(test_vector_sg2)), + cpu_to_le32(src_addr_2) }, + { cpu_to_le32(sizeof(test_vector_sg3) | SG_LIST_LEN_LAST), + cpu_to_le32(src_addr_3) }, + }; + + /* Check engine is idle, no busy or irq bits set */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Write test vector into memory */ + qtest_memwrite(s, src_addr_1, test_vector_sg1, sizeof(test_vector_sg1)); + qtest_memwrite(s, src_addr_2, test_vector_sg2, sizeof(test_vector_sg2)); + qtest_memwrite(s, src_addr_3, test_vector_sg3, sizeof(test_vector_sg3)); + qtest_memwrite(s, src_addr, array, sizeof(array)); + + write_regs(s, base, src_addr, + (sizeof(test_vector_sg1) + + sizeof(test_vector_sg2) + + sizeof(test_vector_sg3)), + digest_addr, HACE_ALGO_SHA512 | HACE_SG_EN); + + /* Check hash IRQ status is asserted */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200); + + /* Clear IRQ status and check status is deasserted */ + qtest_writel(s, base + HACE_STS, 0x00000200); + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Read computed digest from memory */ + qtest_memread(s, digest_addr, digest, sizeof(digest)); + + /* Check result of computation */ + g_assert_cmpmem(digest, sizeof(digest), + test_result_sg_sha512, sizeof(digest)); + + qtest_quit(s); +} + +void aspeed_test_sha256_accum(const char *machine, const uint32_t base, + const uint64_t src_addr) +{ + QTestState *s = qtest_init(machine); + + const uint64_t buffer_addr = src_addr + 0x10000; + const uint64_t digest_addr = src_addr + 0x40000; + uint8_t digest[32] = {0}; + struct AspeedSgList array[] = { + { cpu_to_le32(sizeof(test_vector_accum_256) | SG_LIST_LEN_LAST), + cpu_to_le32(buffer_addr) }, + }; + + /* Check engine is idle, no busy or irq bits set */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Write test vector into memory */ + qtest_memwrite(s, buffer_addr, test_vector_accum_256, + sizeof(test_vector_accum_256)); + qtest_memwrite(s, src_addr, array, sizeof(array)); + + write_regs(s, base, src_addr, sizeof(test_vector_accum_256), + digest_addr, HACE_ALGO_SHA256 | HACE_SG_EN | HACE_ACCUM_EN); + + /* Check hash IRQ status is asserted */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200); + + /* Clear IRQ status and check status is deasserted */ + qtest_writel(s, base + HACE_STS, 0x00000200); + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Read computed digest from memory */ + qtest_memread(s, digest_addr, digest, sizeof(digest)); + + /* Check result of computation */ + g_assert_cmpmem(digest, sizeof(digest), + test_result_accum_sha256, sizeof(digest)); + + qtest_quit(s); +} + +void aspeed_test_sha384_accum(const char *machine, const uint32_t base, + const uint64_t src_addr) +{ + QTestState *s = qtest_init(machine); + + const uint64_t buffer_addr = src_addr + 0x10000; + const uint64_t digest_addr = src_addr + 0x40000; + uint8_t digest[48] = {0}; + struct AspeedSgList array[] = { + { cpu_to_le32(sizeof(test_vector_accum_384) | SG_LIST_LEN_LAST), + cpu_to_le32(buffer_addr) }, + }; + + /* Check engine is idle, no busy or irq bits set */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Write test vector into memory */ + qtest_memwrite(s, buffer_addr, test_vector_accum_384, + sizeof(test_vector_accum_384)); + qtest_memwrite(s, src_addr, array, sizeof(array)); + + write_regs(s, base, src_addr, sizeof(test_vector_accum_384), + digest_addr, HACE_ALGO_SHA384 | HACE_SG_EN | HACE_ACCUM_EN); + + /* Check hash IRQ status is asserted */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200); + + /* Clear IRQ status and check status is deasserted */ + qtest_writel(s, base + HACE_STS, 0x00000200); + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Read computed digest from memory */ + qtest_memread(s, digest_addr, digest, sizeof(digest)); + + /* Check result of computation */ + g_assert_cmpmem(digest, sizeof(digest), + test_result_accum_sha384, sizeof(digest)); + + qtest_quit(s); +} + +void aspeed_test_sha512_accum(const char *machine, const uint32_t base, + const uint64_t src_addr) +{ + QTestState *s = qtest_init(machine); + + const uint64_t buffer_addr = src_addr + 0x10000; + const uint64_t digest_addr = src_addr + 0x40000; + uint8_t digest[64] = {0}; + struct AspeedSgList array[] = { + { cpu_to_le32(sizeof(test_vector_accum_512) | SG_LIST_LEN_LAST), + cpu_to_le32(buffer_addr) }, + }; + + /* Check engine is idle, no busy or irq bits set */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Write test vector into memory */ + qtest_memwrite(s, buffer_addr, test_vector_accum_512, + sizeof(test_vector_accum_512)); + qtest_memwrite(s, src_addr, array, sizeof(array)); + + write_regs(s, base, src_addr, sizeof(test_vector_accum_512), + digest_addr, HACE_ALGO_SHA512 | HACE_SG_EN | HACE_ACCUM_EN); + + /* Check hash IRQ status is asserted */ + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200); + + /* Clear IRQ status and check status is deasserted */ + qtest_writel(s, base + HACE_STS, 0x00000200); + g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); + + /* Read computed digest from memory */ + qtest_memread(s, digest_addr, digest, sizeof(digest)); + + /* Check result of computation */ + g_assert_cmpmem(digest, sizeof(digest), + test_result_accum_sha512, sizeof(digest)); + + qtest_quit(s); +} + +void aspeed_test_addresses(const char *machine, const uint32_t base, + const struct AspeedMasks *expected) +{ + QTestState *s = qtest_init(machine); + + /* + * Check command mode is zero, meaning engine is in direct access mode, + * as this affects the masking behavior of the HASH_SRC register. + */ + g_assert_cmphex(qtest_readl(s, base + HACE_CMD), ==, 0); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_SRC), ==, 0); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_SRC_HI), ==, 0); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DIGEST), ==, 0); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DIGEST_HI), ==, 0); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_KEY_BUFF), ==, 0); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_KEY_BUFF_HI), ==, 0); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DATA_LEN), ==, 0); + + /* Check that the address masking is correct */ + qtest_writel(s, base + HACE_HASH_SRC, 0xffffffff); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_SRC), ==, expected->src); + + qtest_writel(s, base + HACE_HASH_SRC_HI, 0xffffffff); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_SRC_HI), + ==, expected->src_hi); + + qtest_writel(s, base + HACE_HASH_DIGEST, 0xffffffff); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DIGEST), ==, + expected->dest); + + qtest_writel(s, base + HACE_HASH_DIGEST_HI, 0xffffffff); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DIGEST_HI), ==, + expected->dest_hi); + + qtest_writel(s, base + HACE_HASH_KEY_BUFF, 0xffffffff); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_KEY_BUFF), ==, + expected->key); + + qtest_writel(s, base + HACE_HASH_KEY_BUFF_HI, 0xffffffff); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_KEY_BUFF_HI), ==, + expected->key_hi); + + qtest_writel(s, base + HACE_HASH_DATA_LEN, 0xffffffff); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DATA_LEN), ==, + expected->len); + + /* Reset to zero */ + qtest_writel(s, base + HACE_HASH_SRC, 0); + qtest_writel(s, base + HACE_HASH_SRC_HI, 0); + qtest_writel(s, base + HACE_HASH_DIGEST, 0); + qtest_writel(s, base + HACE_HASH_DIGEST_HI, 0); + qtest_writel(s, base + HACE_HASH_KEY_BUFF, 0); + qtest_writel(s, base + HACE_HASH_KEY_BUFF_HI, 0); + qtest_writel(s, base + HACE_HASH_DATA_LEN, 0); + + /* Check that all bits are now zero */ + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_SRC), ==, 0); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_SRC_HI), ==, 0); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DIGEST), ==, 0); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DIGEST_HI), ==, 0); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_KEY_BUFF), ==, 0); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_KEY_BUFF_HI), ==, 0); + g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DATA_LEN), ==, 0); + + qtest_quit(s); +} + diff --git a/tests/qtest/aspeed-hace-utils.h b/tests/qtest/aspeed-hace-utils.h new file mode 100644 index 0000000..c8b2ec4 --- /dev/null +++ b/tests/qtest/aspeed-hace-utils.h @@ -0,0 +1,84 @@ +/* + * QTest testcase for the ASPEED Hash and Crypto Engine + * + * SPDX-License-Identifier: GPL-2.0-or-later + * Copyright 2021 IBM Corp. + */ + +#ifndef TESTS_ASPEED_HACE_UTILS_H +#define TESTS_ASPEED_HACE_UTILS_H + +#include "qemu/osdep.h" +#include "libqtest.h" +#include "qemu/bitops.h" + +#define HACE_CMD 0x10 +#define HACE_SHA_BE_EN BIT(3) +#define HACE_MD5_LE_EN BIT(2) +#define HACE_ALGO_MD5 0 +#define HACE_ALGO_SHA1 BIT(5) +#define HACE_ALGO_SHA224 BIT(6) +#define HACE_ALGO_SHA256 (BIT(4) | BIT(6)) +#define HACE_ALGO_SHA512 (BIT(5) | BIT(6)) +#define HACE_ALGO_SHA384 (BIT(5) | BIT(6) | BIT(10)) +#define HACE_SG_EN BIT(18) +#define HACE_ACCUM_EN BIT(8) + +#define HACE_STS 0x1c +#define HACE_RSA_ISR BIT(13) +#define HACE_CRYPTO_ISR BIT(12) +#define HACE_HASH_ISR BIT(9) +#define HACE_RSA_BUSY BIT(2) +#define HACE_CRYPTO_BUSY BIT(1) +#define HACE_HASH_BUSY BIT(0) +#define HACE_HASH_SRC 0x20 +#define HACE_HASH_DIGEST 0x24 +#define HACE_HASH_KEY_BUFF 0x28 +#define HACE_HASH_DATA_LEN 0x2c +#define HACE_HASH_CMD 0x30 +#define HACE_HASH_SRC_HI 0x90 +#define HACE_HASH_DIGEST_HI 0x94 +#define HACE_HASH_KEY_BUFF_HI 0x98 + +/* Scatter-Gather Hash */ +#define SG_LIST_LEN_LAST BIT(31) +struct AspeedSgList { + uint32_t len; + uint32_t addr; +} __attribute__ ((__packed__)); + +struct AspeedMasks { + uint32_t src; + uint32_t dest; + uint32_t key; + uint32_t len; + uint32_t src_hi; + uint32_t dest_hi; + uint32_t key_hi; +}; + +void aspeed_test_md5(const char *machine, const uint32_t base, + const uint64_t src_addr); +void aspeed_test_sha256(const char *machine, const uint32_t base, + const uint64_t src_addr); +void aspeed_test_sha384(const char *machine, const uint32_t base, + const uint64_t src_addr); +void aspeed_test_sha512(const char *machine, const uint32_t base, + const uint64_t src_addr); +void aspeed_test_sha256_sg(const char *machine, const uint32_t base, + const uint64_t src_addr); +void aspeed_test_sha384_sg(const char *machine, const uint32_t base, + const uint64_t src_addr); +void aspeed_test_sha512_sg(const char *machine, const uint32_t base, + const uint64_t src_addr); +void aspeed_test_sha256_accum(const char *machine, const uint32_t base, + const uint64_t src_addr); +void aspeed_test_sha384_accum(const char *machine, const uint32_t base, + const uint64_t src_addr); +void aspeed_test_sha512_accum(const char *machine, const uint32_t base, + const uint64_t src_addr); +void aspeed_test_addresses(const char *machine, const uint32_t base, + const struct AspeedMasks *expected); + +#endif /* TESTS_ASPEED_HACE_UTILS_H */ + diff --git a/tests/qtest/aspeed_hace-test.c b/tests/qtest/aspeed_hace-test.c index ce86a44..3877702 100644 --- a/tests/qtest/aspeed_hace-test.c +++ b/tests/qtest/aspeed_hace-test.c @@ -6,599 +6,222 @@ */ #include "qemu/osdep.h" - #include "libqtest.h" #include "qemu/bitops.h" +#include "aspeed-hace-utils.h" -#define HACE_CMD 0x10 -#define HACE_SHA_BE_EN BIT(3) -#define HACE_MD5_LE_EN BIT(2) -#define HACE_ALGO_MD5 0 -#define HACE_ALGO_SHA1 BIT(5) -#define HACE_ALGO_SHA224 BIT(6) -#define HACE_ALGO_SHA256 (BIT(4) | BIT(6)) -#define HACE_ALGO_SHA512 (BIT(5) | BIT(6)) -#define HACE_ALGO_SHA384 (BIT(5) | BIT(6) | BIT(10)) -#define HACE_SG_EN BIT(18) -#define HACE_ACCUM_EN BIT(8) - -#define HACE_STS 0x1c -#define HACE_RSA_ISR BIT(13) -#define HACE_CRYPTO_ISR BIT(12) -#define HACE_HASH_ISR BIT(9) -#define HACE_RSA_BUSY BIT(2) -#define HACE_CRYPTO_BUSY BIT(1) -#define HACE_HASH_BUSY BIT(0) -#define HACE_HASH_SRC 0x20 -#define HACE_HASH_DIGEST 0x24 -#define HACE_HASH_KEY_BUFF 0x28 -#define HACE_HASH_DATA_LEN 0x2c -#define HACE_HASH_CMD 0x30 -/* Scatter-Gather Hash */ -#define SG_LIST_LEN_LAST BIT(31) -struct AspeedSgList { - uint32_t len; - uint32_t addr; -} __attribute__ ((__packed__)); - -/* - * Test vector is the ascii "abc" - * - * Expected results were generated using command line utitiles: - * - * echo -n -e 'abc' | dd of=/tmp/test - * for hash in sha512sum sha256sum md5sum; do $hash /tmp/test; done - * - */ -static const uint8_t test_vector[] = {0x61, 0x62, 0x63}; - -static const uint8_t test_result_sha512[] = { - 0xdd, 0xaf, 0x35, 0xa1, 0x93, 0x61, 0x7a, 0xba, 0xcc, 0x41, 0x73, 0x49, - 0xae, 0x20, 0x41, 0x31, 0x12, 0xe6, 0xfa, 0x4e, 0x89, 0xa9, 0x7e, 0xa2, - 0x0a, 0x9e, 0xee, 0xe6, 0x4b, 0x55, 0xd3, 0x9a, 0x21, 0x92, 0x99, 0x2a, - 0x27, 0x4f, 0xc1, 0xa8, 0x36, 0xba, 0x3c, 0x23, 0xa3, 0xfe, 0xeb, 0xbd, - 0x45, 0x4d, 0x44, 0x23, 0x64, 0x3c, 0xe8, 0x0e, 0x2a, 0x9a, 0xc9, 0x4f, - 0xa5, 0x4c, 0xa4, 0x9f}; +static const struct AspeedMasks ast1030_masks = { + .src = 0x7fffffff, + .dest = 0x7ffffff8, + .key = 0x7ffffff8, + .len = 0x0fffffff, +}; -static const uint8_t test_result_sha256[] = { - 0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde, - 0x5d, 0xae, 0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, - 0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad}; +static const struct AspeedMasks ast2600_masks = { + .src = 0x7fffffff, + .dest = 0x7ffffff8, + .key = 0x7ffffff8, + .len = 0x0fffffff, +}; -static const uint8_t test_result_md5[] = { - 0x90, 0x01, 0x50, 0x98, 0x3c, 0xd2, 0x4f, 0xb0, 0xd6, 0x96, 0x3f, 0x7d, - 0x28, 0xe1, 0x7f, 0x72}; +static const struct AspeedMasks ast2500_masks = { + .src = 0x3fffffff, + .dest = 0x3ffffff8, + .key = 0x3fffffc0, + .len = 0x0fffffff, +}; -/* - * The Scatter-Gather Test vector is the ascii "abc" "def" "ghi", broken - * into blocks of 3 characters as shown - * - * Expected results were generated using command line utitiles: - * - * echo -n -e 'abcdefghijkl' | dd of=/tmp/test - * for hash in sha512sum sha256sum; do $hash /tmp/test; done - * - */ -static const uint8_t test_vector_sg1[] = {0x61, 0x62, 0x63, 0x64, 0x65, 0x66}; -static const uint8_t test_vector_sg2[] = {0x67, 0x68, 0x69}; -static const uint8_t test_vector_sg3[] = {0x6a, 0x6b, 0x6c}; - -static const uint8_t test_result_sg_sha512[] = { - 0x17, 0x80, 0x7c, 0x72, 0x8e, 0xe3, 0xba, 0x35, 0xe7, 0xcf, 0x7a, 0xf8, - 0x23, 0x11, 0x6d, 0x26, 0xe4, 0x1e, 0x5d, 0x4d, 0x6c, 0x2f, 0xf1, 0xf3, - 0x72, 0x0d, 0x3d, 0x96, 0xaa, 0xcb, 0x6f, 0x69, 0xde, 0x64, 0x2e, 0x63, - 0xd5, 0xb7, 0x3f, 0xc3, 0x96, 0xc1, 0x2b, 0xe3, 0x8b, 0x2b, 0xd5, 0xd8, - 0x84, 0x25, 0x7c, 0x32, 0xc8, 0xf6, 0xd0, 0x85, 0x4a, 0xe6, 0xb5, 0x40, - 0xf8, 0x6d, 0xda, 0x2e}; - -static const uint8_t test_result_sg_sha256[] = { - 0xd6, 0x82, 0xed, 0x4c, 0xa4, 0xd9, 0x89, 0xc1, 0x34, 0xec, 0x94, 0xf1, - 0x55, 0x1e, 0x1e, 0xc5, 0x80, 0xdd, 0x6d, 0x5a, 0x6e, 0xcd, 0xe9, 0xf3, - 0xd3, 0x5e, 0x6e, 0x4a, 0x71, 0x7f, 0xbd, 0xe4}; +static const struct AspeedMasks ast2400_masks = { + .src = 0x0fffffff, + .dest = 0x0ffffff8, + .key = 0x0fffffc0, + .len = 0x0fffffff, +}; -/* - * The accumulative mode requires firmware to provide internal initial state - * and message padding (including length L at the end of padding). - * - * This test vector is a ascii text "abc" with padding message. - * - * Expected results were generated using command line utitiles: - * - * echo -n -e 'abc' | dd of=/tmp/test - * for hash in sha512sum sha256sum; do $hash /tmp/test; done - */ -static const uint8_t test_vector_accum_512[] = { - 0x61, 0x62, 0x63, 0x80, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18}; - -static const uint8_t test_vector_accum_256[] = { - 0x61, 0x62, 0x63, 0x80, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18}; - -static const uint8_t test_result_accum_sha512[] = { - 0xdd, 0xaf, 0x35, 0xa1, 0x93, 0x61, 0x7a, 0xba, 0xcc, 0x41, 0x73, 0x49, - 0xae, 0x20, 0x41, 0x31, 0x12, 0xe6, 0xfa, 0x4e, 0x89, 0xa9, 0x7e, 0xa2, - 0x0a, 0x9e, 0xee, 0xe6, 0x4b, 0x55, 0xd3, 0x9a, 0x21, 0x92, 0x99, 0x2a, - 0x27, 0x4f, 0xc1, 0xa8, 0x36, 0xba, 0x3c, 0x23, 0xa3, 0xfe, 0xeb, 0xbd, - 0x45, 0x4d, 0x44, 0x23, 0x64, 0x3c, 0xe8, 0x0e, 0x2a, 0x9a, 0xc9, 0x4f, - 0xa5, 0x4c, 0xa4, 0x9f}; - -static const uint8_t test_result_accum_sha256[] = { - 0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde, - 0x5d, 0xae, 0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, - 0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad}; - -static void write_regs(QTestState *s, uint32_t base, uint32_t src, - uint32_t length, uint32_t out, uint32_t method) +/* ast1030 */ +static void test_md5_ast1030(void) { - qtest_writel(s, base + HACE_HASH_SRC, src); - qtest_writel(s, base + HACE_HASH_DIGEST, out); - qtest_writel(s, base + HACE_HASH_DATA_LEN, length); - qtest_writel(s, base + HACE_HASH_CMD, HACE_SHA_BE_EN | method); + aspeed_test_md5("-machine ast1030-evb", 0x7e6d0000, 0x00000000); } -static void test_md5(const char *machine, const uint32_t base, - const uint32_t src_addr) - +static void test_sha256_ast1030(void) { - QTestState *s = qtest_init(machine); - - uint32_t digest_addr = src_addr + 0x01000000; - uint8_t digest[16] = {0}; - - /* Check engine is idle, no busy or irq bits set */ - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); - - /* Write test vector into memory */ - qtest_memwrite(s, src_addr, test_vector, sizeof(test_vector)); - - write_regs(s, base, src_addr, sizeof(test_vector), digest_addr, HACE_ALGO_MD5); - - /* Check hash IRQ status is asserted */ - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200); - - /* Clear IRQ status and check status is deasserted */ - qtest_writel(s, base + HACE_STS, 0x00000200); - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); - - /* Read computed digest from memory */ - qtest_memread(s, digest_addr, digest, sizeof(digest)); - - /* Check result of computation */ - g_assert_cmpmem(digest, sizeof(digest), - test_result_md5, sizeof(digest)); - - qtest_quit(s); + aspeed_test_sha256("-machine ast1030-evb", 0x7e6d0000, 0x00000000); } -static void test_sha256(const char *machine, const uint32_t base, - const uint32_t src_addr) +static void test_sha256_sg_ast1030(void) { - QTestState *s = qtest_init(machine); - - const uint32_t digest_addr = src_addr + 0x1000000; - uint8_t digest[32] = {0}; - - /* Check engine is idle, no busy or irq bits set */ - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); - - /* Write test vector into memory */ - qtest_memwrite(s, src_addr, test_vector, sizeof(test_vector)); - - write_regs(s, base, src_addr, sizeof(test_vector), digest_addr, HACE_ALGO_SHA256); - - /* Check hash IRQ status is asserted */ - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200); - - /* Clear IRQ status and check status is deasserted */ - qtest_writel(s, base + HACE_STS, 0x00000200); - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); - - /* Read computed digest from memory */ - qtest_memread(s, digest_addr, digest, sizeof(digest)); - - /* Check result of computation */ - g_assert_cmpmem(digest, sizeof(digest), - test_result_sha256, sizeof(digest)); - - qtest_quit(s); + aspeed_test_sha256_sg("-machine ast1030-evb", 0x7e6d0000, 0x00000000); } -static void test_sha512(const char *machine, const uint32_t base, - const uint32_t src_addr) +static void test_sha384_ast1030(void) { - QTestState *s = qtest_init(machine); - - const uint32_t digest_addr = src_addr + 0x1000000; - uint8_t digest[64] = {0}; - - /* Check engine is idle, no busy or irq bits set */ - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); - - /* Write test vector into memory */ - qtest_memwrite(s, src_addr, test_vector, sizeof(test_vector)); - - write_regs(s, base, src_addr, sizeof(test_vector), digest_addr, HACE_ALGO_SHA512); - - /* Check hash IRQ status is asserted */ - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200); - - /* Clear IRQ status and check status is deasserted */ - qtest_writel(s, base + HACE_STS, 0x00000200); - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); - - /* Read computed digest from memory */ - qtest_memread(s, digest_addr, digest, sizeof(digest)); - - /* Check result of computation */ - g_assert_cmpmem(digest, sizeof(digest), - test_result_sha512, sizeof(digest)); - - qtest_quit(s); + aspeed_test_sha384("-machine ast1030-evb", 0x7e6d0000, 0x00000000); } -static void test_sha256_sg(const char *machine, const uint32_t base, - const uint32_t src_addr) +static void test_sha384_sg_ast1030(void) { - QTestState *s = qtest_init(machine); - - const uint32_t src_addr_1 = src_addr + 0x1000000; - const uint32_t src_addr_2 = src_addr + 0x2000000; - const uint32_t src_addr_3 = src_addr + 0x3000000; - const uint32_t digest_addr = src_addr + 0x4000000; - uint8_t digest[32] = {0}; - struct AspeedSgList array[] = { - { cpu_to_le32(sizeof(test_vector_sg1)), - cpu_to_le32(src_addr_1) }, - { cpu_to_le32(sizeof(test_vector_sg2)), - cpu_to_le32(src_addr_2) }, - { cpu_to_le32(sizeof(test_vector_sg3) | SG_LIST_LEN_LAST), - cpu_to_le32(src_addr_3) }, - }; - - /* Check engine is idle, no busy or irq bits set */ - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); - - /* Write test vector into memory */ - qtest_memwrite(s, src_addr_1, test_vector_sg1, sizeof(test_vector_sg1)); - qtest_memwrite(s, src_addr_2, test_vector_sg2, sizeof(test_vector_sg2)); - qtest_memwrite(s, src_addr_3, test_vector_sg3, sizeof(test_vector_sg3)); - qtest_memwrite(s, src_addr, array, sizeof(array)); - - write_regs(s, base, src_addr, - (sizeof(test_vector_sg1) - + sizeof(test_vector_sg2) - + sizeof(test_vector_sg3)), - digest_addr, HACE_ALGO_SHA256 | HACE_SG_EN); - - /* Check hash IRQ status is asserted */ - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200); - - /* Clear IRQ status and check status is deasserted */ - qtest_writel(s, base + HACE_STS, 0x00000200); - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); - - /* Read computed digest from memory */ - qtest_memread(s, digest_addr, digest, sizeof(digest)); - - /* Check result of computation */ - g_assert_cmpmem(digest, sizeof(digest), - test_result_sg_sha256, sizeof(digest)); - - qtest_quit(s); + aspeed_test_sha384_sg("-machine ast1030-evb", 0x7e6d0000, 0x00000000); } -static void test_sha512_sg(const char *machine, const uint32_t base, - const uint32_t src_addr) +static void test_sha512_ast1030(void) { - QTestState *s = qtest_init(machine); - - const uint32_t src_addr_1 = src_addr + 0x1000000; - const uint32_t src_addr_2 = src_addr + 0x2000000; - const uint32_t src_addr_3 = src_addr + 0x3000000; - const uint32_t digest_addr = src_addr + 0x4000000; - uint8_t digest[64] = {0}; - struct AspeedSgList array[] = { - { cpu_to_le32(sizeof(test_vector_sg1)), - cpu_to_le32(src_addr_1) }, - { cpu_to_le32(sizeof(test_vector_sg2)), - cpu_to_le32(src_addr_2) }, - { cpu_to_le32(sizeof(test_vector_sg3) | SG_LIST_LEN_LAST), - cpu_to_le32(src_addr_3) }, - }; - - /* Check engine is idle, no busy or irq bits set */ - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); - - /* Write test vector into memory */ - qtest_memwrite(s, src_addr_1, test_vector_sg1, sizeof(test_vector_sg1)); - qtest_memwrite(s, src_addr_2, test_vector_sg2, sizeof(test_vector_sg2)); - qtest_memwrite(s, src_addr_3, test_vector_sg3, sizeof(test_vector_sg3)); - qtest_memwrite(s, src_addr, array, sizeof(array)); - - write_regs(s, base, src_addr, - (sizeof(test_vector_sg1) - + sizeof(test_vector_sg2) - + sizeof(test_vector_sg3)), - digest_addr, HACE_ALGO_SHA512 | HACE_SG_EN); - - /* Check hash IRQ status is asserted */ - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200); - - /* Clear IRQ status and check status is deasserted */ - qtest_writel(s, base + HACE_STS, 0x00000200); - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); - - /* Read computed digest from memory */ - qtest_memread(s, digest_addr, digest, sizeof(digest)); - - /* Check result of computation */ - g_assert_cmpmem(digest, sizeof(digest), - test_result_sg_sha512, sizeof(digest)); - - qtest_quit(s); + aspeed_test_sha512("-machine ast1030-evb", 0x7e6d0000, 0x00000000); } -static void test_sha256_accum(const char *machine, const uint32_t base, - const uint32_t src_addr) +static void test_sha512_sg_ast1030(void) { - QTestState *s = qtest_init(machine); - - const uint32_t buffer_addr = src_addr + 0x1000000; - const uint32_t digest_addr = src_addr + 0x4000000; - uint8_t digest[32] = {0}; - struct AspeedSgList array[] = { - { cpu_to_le32(sizeof(test_vector_accum_256) | SG_LIST_LEN_LAST), - cpu_to_le32(buffer_addr) }, - }; - - /* Check engine is idle, no busy or irq bits set */ - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); - - /* Write test vector into memory */ - qtest_memwrite(s, buffer_addr, test_vector_accum_256, - sizeof(test_vector_accum_256)); - qtest_memwrite(s, src_addr, array, sizeof(array)); - - write_regs(s, base, src_addr, sizeof(test_vector_accum_256), - digest_addr, HACE_ALGO_SHA256 | HACE_SG_EN | HACE_ACCUM_EN); - - /* Check hash IRQ status is asserted */ - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200); - - /* Clear IRQ status and check status is deasserted */ - qtest_writel(s, base + HACE_STS, 0x00000200); - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); - - /* Read computed digest from memory */ - qtest_memread(s, digest_addr, digest, sizeof(digest)); - - /* Check result of computation */ - g_assert_cmpmem(digest, sizeof(digest), - test_result_accum_sha256, sizeof(digest)); - - qtest_quit(s); + aspeed_test_sha512_sg("-machine ast1030-evb", 0x7e6d0000, 0x00000000); } -static void test_sha512_accum(const char *machine, const uint32_t base, - const uint32_t src_addr) +static void test_sha256_accum_ast1030(void) { - QTestState *s = qtest_init(machine); - - const uint32_t buffer_addr = src_addr + 0x1000000; - const uint32_t digest_addr = src_addr + 0x4000000; - uint8_t digest[64] = {0}; - struct AspeedSgList array[] = { - { cpu_to_le32(sizeof(test_vector_accum_512) | SG_LIST_LEN_LAST), - cpu_to_le32(buffer_addr) }, - }; - - /* Check engine is idle, no busy or irq bits set */ - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); - - /* Write test vector into memory */ - qtest_memwrite(s, buffer_addr, test_vector_accum_512, - sizeof(test_vector_accum_512)); - qtest_memwrite(s, src_addr, array, sizeof(array)); - - write_regs(s, base, src_addr, sizeof(test_vector_accum_512), - digest_addr, HACE_ALGO_SHA512 | HACE_SG_EN | HACE_ACCUM_EN); - - /* Check hash IRQ status is asserted */ - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0x00000200); - - /* Clear IRQ status and check status is deasserted */ - qtest_writel(s, base + HACE_STS, 0x00000200); - g_assert_cmphex(qtest_readl(s, base + HACE_STS), ==, 0); - - /* Read computed digest from memory */ - qtest_memread(s, digest_addr, digest, sizeof(digest)); - - /* Check result of computation */ - g_assert_cmpmem(digest, sizeof(digest), - test_result_accum_sha512, sizeof(digest)); - - qtest_quit(s); + aspeed_test_sha256_accum("-machine ast1030-evb", 0x7e6d0000, 0x00000000); } -struct masks { - uint32_t src; - uint32_t dest; - uint32_t len; -}; - -static const struct masks ast2600_masks = { - .src = 0x7fffffff, - .dest = 0x7ffffff8, - .len = 0x0fffffff, -}; - -static const struct masks ast2500_masks = { - .src = 0x3fffffff, - .dest = 0x3ffffff8, - .len = 0x0fffffff, -}; - -static const struct masks ast2400_masks = { - .src = 0x0fffffff, - .dest = 0x0ffffff8, - .len = 0x0fffffff, -}; - -static void test_addresses(const char *machine, const uint32_t base, - const struct masks *expected) +static void test_sha384_accum_ast1030(void) { - QTestState *s = qtest_init(machine); - - /* - * Check command mode is zero, meaning engine is in direct access mode, - * as this affects the masking behavior of the HASH_SRC register. - */ - g_assert_cmphex(qtest_readl(s, base + HACE_CMD), ==, 0); - g_assert_cmphex(qtest_readl(s, base + HACE_HASH_SRC), ==, 0); - g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DIGEST), ==, 0); - g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DATA_LEN), ==, 0); - - - /* Check that the address masking is correct */ - qtest_writel(s, base + HACE_HASH_SRC, 0xffffffff); - g_assert_cmphex(qtest_readl(s, base + HACE_HASH_SRC), ==, expected->src); - - qtest_writel(s, base + HACE_HASH_DIGEST, 0xffffffff); - g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DIGEST), ==, expected->dest); - - qtest_writel(s, base + HACE_HASH_DATA_LEN, 0xffffffff); - g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DATA_LEN), ==, expected->len); - - /* Reset to zero */ - qtest_writel(s, base + HACE_HASH_SRC, 0); - qtest_writel(s, base + HACE_HASH_DIGEST, 0); - qtest_writel(s, base + HACE_HASH_DATA_LEN, 0); + aspeed_test_sha384_accum("-machine ast1030-evb", 0x7e6d0000, 0x00000000); +} - /* Check that all bits are now zero */ - g_assert_cmphex(qtest_readl(s, base + HACE_HASH_SRC), ==, 0); - g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DIGEST), ==, 0); - g_assert_cmphex(qtest_readl(s, base + HACE_HASH_DATA_LEN), ==, 0); +static void test_sha512_accum_ast1030(void) +{ + aspeed_test_sha512_accum("-machine ast1030-evb", 0x7e6d0000, 0x00000000); +} - qtest_quit(s); +static void test_addresses_ast1030(void) +{ + aspeed_test_addresses("-machine ast1030-evb", 0x7e6d0000, &ast1030_masks); } /* ast2600 */ static void test_md5_ast2600(void) { - test_md5("-machine ast2600-evb", 0x1e6d0000, 0x80000000); + aspeed_test_md5("-machine ast2600-evb", 0x1e6d0000, 0x80000000); } static void test_sha256_ast2600(void) { - test_sha256("-machine ast2600-evb", 0x1e6d0000, 0x80000000); + aspeed_test_sha256("-machine ast2600-evb", 0x1e6d0000, 0x80000000); } static void test_sha256_sg_ast2600(void) { - test_sha256_sg("-machine ast2600-evb", 0x1e6d0000, 0x80000000); + aspeed_test_sha256_sg("-machine ast2600-evb", 0x1e6d0000, 0x80000000); +} + +static void test_sha384_ast2600(void) +{ + aspeed_test_sha384("-machine ast2600-evb", 0x1e6d0000, 0x80000000); +} + +static void test_sha384_sg_ast2600(void) +{ + aspeed_test_sha384_sg("-machine ast2600-evb", 0x1e6d0000, 0x80000000); } static void test_sha512_ast2600(void) { - test_sha512("-machine ast2600-evb", 0x1e6d0000, 0x80000000); + aspeed_test_sha512("-machine ast2600-evb", 0x1e6d0000, 0x80000000); } static void test_sha512_sg_ast2600(void) { - test_sha512_sg("-machine ast2600-evb", 0x1e6d0000, 0x80000000); + aspeed_test_sha512_sg("-machine ast2600-evb", 0x1e6d0000, 0x80000000); } static void test_sha256_accum_ast2600(void) { - test_sha256_accum("-machine ast2600-evb", 0x1e6d0000, 0x80000000); + aspeed_test_sha256_accum("-machine ast2600-evb", 0x1e6d0000, 0x80000000); +} + +static void test_sha384_accum_ast2600(void) +{ + aspeed_test_sha384_accum("-machine ast2600-evb", 0x1e6d0000, 0x80000000); } static void test_sha512_accum_ast2600(void) { - test_sha512_accum("-machine ast2600-evb", 0x1e6d0000, 0x80000000); + aspeed_test_sha512_accum("-machine ast2600-evb", 0x1e6d0000, 0x80000000); } static void test_addresses_ast2600(void) { - test_addresses("-machine ast2600-evb", 0x1e6d0000, &ast2600_masks); + aspeed_test_addresses("-machine ast2600-evb", 0x1e6d0000, &ast2600_masks); } /* ast2500 */ static void test_md5_ast2500(void) { - test_md5("-machine ast2500-evb", 0x1e6e3000, 0x80000000); + aspeed_test_md5("-machine ast2500-evb", 0x1e6e3000, 0x80000000); } static void test_sha256_ast2500(void) { - test_sha256("-machine ast2500-evb", 0x1e6e3000, 0x80000000); + aspeed_test_sha256("-machine ast2500-evb", 0x1e6e3000, 0x80000000); } static void test_sha512_ast2500(void) { - test_sha512("-machine ast2500-evb", 0x1e6e3000, 0x80000000); + aspeed_test_sha512("-machine ast2500-evb", 0x1e6e3000, 0x80000000); } static void test_addresses_ast2500(void) { - test_addresses("-machine ast2500-evb", 0x1e6e3000, &ast2500_masks); + aspeed_test_addresses("-machine ast2500-evb", 0x1e6e3000, &ast2500_masks); } /* ast2400 */ static void test_md5_ast2400(void) { - test_md5("-machine palmetto-bmc", 0x1e6e3000, 0x40000000); + aspeed_test_md5("-machine palmetto-bmc", 0x1e6e3000, 0x40000000); } static void test_sha256_ast2400(void) { - test_sha256("-machine palmetto-bmc", 0x1e6e3000, 0x40000000); + aspeed_test_sha256("-machine palmetto-bmc", 0x1e6e3000, 0x40000000); } static void test_sha512_ast2400(void) { - test_sha512("-machine palmetto-bmc", 0x1e6e3000, 0x40000000); + aspeed_test_sha512("-machine palmetto-bmc", 0x1e6e3000, 0x40000000); } static void test_addresses_ast2400(void) { - test_addresses("-machine palmetto-bmc", 0x1e6e3000, &ast2400_masks); + aspeed_test_addresses("-machine palmetto-bmc", 0x1e6e3000, &ast2400_masks); } int main(int argc, char **argv) { g_test_init(&argc, &argv, NULL); + qtest_add_func("ast1030/hace/addresses", test_addresses_ast1030); + qtest_add_func("ast1030/hace/sha512", test_sha512_ast1030); + qtest_add_func("ast1030/hace/sha384", test_sha384_ast1030); + qtest_add_func("ast1030/hace/sha256", test_sha256_ast1030); + qtest_add_func("ast1030/hace/md5", test_md5_ast1030); + + qtest_add_func("ast1030/hace/sha512_sg", test_sha512_sg_ast1030); + qtest_add_func("ast1030/hace/sha384_sg", test_sha384_sg_ast1030); + qtest_add_func("ast1030/hace/sha256_sg", test_sha256_sg_ast1030); + + qtest_add_func("ast1030/hace/sha512_accum", test_sha512_accum_ast1030); + qtest_add_func("ast1030/hace/sha384_accum", test_sha384_accum_ast1030); + qtest_add_func("ast1030/hace/sha256_accum", test_sha256_accum_ast1030); + qtest_add_func("ast2600/hace/addresses", test_addresses_ast2600); qtest_add_func("ast2600/hace/sha512", test_sha512_ast2600); + qtest_add_func("ast2600/hace/sha384", test_sha384_ast2600); qtest_add_func("ast2600/hace/sha256", test_sha256_ast2600); qtest_add_func("ast2600/hace/md5", test_md5_ast2600); qtest_add_func("ast2600/hace/sha512_sg", test_sha512_sg_ast2600); + qtest_add_func("ast2600/hace/sha384_sg", test_sha384_sg_ast2600); qtest_add_func("ast2600/hace/sha256_sg", test_sha256_sg_ast2600); qtest_add_func("ast2600/hace/sha512_accum", test_sha512_accum_ast2600); + qtest_add_func("ast2600/hace/sha384_accum", test_sha384_accum_ast2600); qtest_add_func("ast2600/hace/sha256_accum", test_sha256_accum_ast2600); qtest_add_func("ast2500/hace/addresses", test_addresses_ast2500); diff --git a/tests/qtest/aspeed_smc-test.c b/tests/qtest/aspeed_smc-test.c index 4e13893..52a00e6 100644 --- a/tests/qtest/aspeed_smc-test.c +++ b/tests/qtest/aspeed_smc-test.c @@ -228,5 +228,10 @@ int main(int argc, char **argv) unlink(ast2500_evb_data.tmp_path); unlink(ast2600_evb_data.tmp_path); unlink(ast1030_evb_data.tmp_path); + g_free(palmetto_data.tmp_path); + g_free(ast2500_evb_data.tmp_path); + g_free(ast2600_evb_data.tmp_path); + g_free(ast1030_evb_data.tmp_path); + return ret; } diff --git a/tests/qtest/ast2700-hace-test.c b/tests/qtest/ast2700-hace-test.c new file mode 100644 index 0000000..a400e29 --- /dev/null +++ b/tests/qtest/ast2700-hace-test.c @@ -0,0 +1,98 @@ +/* + * QTest testcase for the ASPEED Hash and Crypto Engine + * + * SPDX-License-Identifier: GPL-2.0-or-later + * Copyright (C) 2025 ASPEED Technology Inc. + */ + +#include "qemu/osdep.h" +#include "libqtest.h" +#include "qemu/bitops.h" +#include "aspeed-hace-utils.h" + +static const struct AspeedMasks as2700_masks = { + .src = 0x7fffffff, + .dest = 0x7ffffff8, + .key = 0x7ffffff8, + .len = 0x0fffffff, + .src_hi = 0x00000003, + .dest_hi = 0x00000003, + .key_hi = 0x00000003, +}; + +/* ast2700 */ +static void test_md5_ast2700(void) +{ + aspeed_test_md5("-machine ast2700a1-evb", 0x12070000, 0x400000000); +} + +static void test_sha256_ast2700(void) +{ + aspeed_test_sha256("-machine ast2700a1-evb", 0x12070000, 0x400000000); +} + +static void test_sha256_sg_ast2700(void) +{ + aspeed_test_sha256_sg("-machine ast2700a1-evb", 0x12070000, 0x400000000); +} + +static void test_sha384_ast2700(void) +{ + aspeed_test_sha384("-machine ast2700a1-evb", 0x12070000, 0x400000000); +} + +static void test_sha384_sg_ast2700(void) +{ + aspeed_test_sha384_sg("-machine ast2700a1-evb", 0x12070000, 0x400000000); +} + +static void test_sha512_ast2700(void) +{ + aspeed_test_sha512("-machine ast2700a1-evb", 0x12070000, 0x400000000); +} + +static void test_sha512_sg_ast2700(void) +{ + aspeed_test_sha512_sg("-machine ast2700a1-evb", 0x12070000, 0x400000000); +} + +static void test_sha256_accum_ast2700(void) +{ + aspeed_test_sha256_accum("-machine ast2700a1-evb", 0x12070000, 0x400000000); +} + +static void test_sha384_accum_ast2700(void) +{ + aspeed_test_sha384_accum("-machine ast2700a1-evb", 0x12070000, 0x400000000); +} + +static void test_sha512_accum_ast2700(void) +{ + aspeed_test_sha512_accum("-machine ast2700a1-evb", 0x12070000, 0x400000000); +} + +static void test_addresses_ast2700(void) +{ + aspeed_test_addresses("-machine ast2700a1-evb", 0x12070000, &as2700_masks); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + + qtest_add_func("ast2700/hace/addresses", test_addresses_ast2700); + qtest_add_func("ast2700/hace/sha512", test_sha512_ast2700); + qtest_add_func("ast2700/hace/sha384", test_sha384_ast2700); + qtest_add_func("ast2700/hace/sha256", test_sha256_ast2700); + qtest_add_func("ast2700/hace/md5", test_md5_ast2700); + + qtest_add_func("ast2700/hace/sha512_sg", test_sha512_sg_ast2700); + qtest_add_func("ast2700/hace/sha384_sg", test_sha384_sg_ast2700); + qtest_add_func("ast2700/hace/sha256_sg", test_sha256_sg_ast2700); + + qtest_add_func("ast2700/hace/sha512_accum", test_sha512_accum_ast2700); + qtest_add_func("ast2700/hace/sha384_accum", test_sha384_accum_ast2700); + qtest_add_func("ast2700/hace/sha256_accum", test_sha256_accum_ast2700); + + return g_test_run(); +} diff --git a/tests/qtest/ast2700-smc-test.c b/tests/qtest/ast2700-smc-test.c index d1c4856..62d538d 100644 --- a/tests/qtest/ast2700-smc-test.c +++ b/tests/qtest/ast2700-smc-test.c @@ -67,5 +67,6 @@ int main(int argc, char **argv) qtest_quit(ast2700_evb_data.s); unlink(ast2700_evb_data.tmp_path); + g_free(ast2700_evb_data.tmp_path); return ret; } diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index 0a333ec..0b2bdf9 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -1622,7 +1622,7 @@ static void test_acpi_aarch64_virt_tcg_memhp(void) .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", .cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2", .ram_start = 0x40000000ULL, - .scan_len = 256ULL * 1024 * 1024, + .scan_len = 256ULL * MiB, }; data.variant = ".memhp"; @@ -1717,7 +1717,7 @@ static void test_acpi_riscv64_virt_tcg_numamem(void) .uefi_fl2 = "pc-bios/edk2-riscv-vars.fd", .cd = "tests/data/uefi-boot-images/bios-tables-test.riscv64.iso.qcow2", .ram_start = 0x80000000ULL, - .scan_len = 128ULL * 1024 * 1024, + .scan_len = 128ULL * MiB, }; data.variant = ".numamem"; @@ -1743,7 +1743,7 @@ static void test_acpi_aarch64_virt_tcg_numamem(void) .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", .cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2", .ram_start = 0x40000000ULL, - .scan_len = 128ULL * 1024 * 1024, + .scan_len = 128ULL * MiB, }; data.variant = ".numamem"; @@ -1765,7 +1765,7 @@ static void test_acpi_aarch64_virt_tcg_pxb(void) .uefi_fl1 = "pc-bios/edk2-aarch64-code.fd", .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", .ram_start = 0x40000000ULL, - .scan_len = 128ULL * 1024 * 1024, + .scan_len = 128ULL * MiB, }; /* * While using -cdrom, the cdrom would auto plugged into pxb-pcie, @@ -1841,7 +1841,7 @@ static void test_acpi_aarch64_virt_tcg_acpi_hmat(void) .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", .cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2", .ram_start = 0x40000000ULL, - .scan_len = 128ULL * 1024 * 1024, + .scan_len = 128ULL * MiB, }; data.variant = ".acpihmatvirt"; @@ -2095,7 +2095,7 @@ static void test_acpi_riscv64_virt_tcg(void) .uefi_fl2 = "pc-bios/edk2-riscv-vars.fd", .cd = "tests/data/uefi-boot-images/bios-tables-test.riscv64.iso.qcow2", .ram_start = 0x80000000ULL, - .scan_len = 128ULL * 1024 * 1024, + .scan_len = 128ULL * MiB, }; /* @@ -2117,7 +2117,7 @@ static void test_acpi_aarch64_virt_tcg(void) .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", .cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2", .ram_start = 0x40000000ULL, - .scan_len = 128ULL * 1024 * 1024, + .scan_len = 128ULL * MiB, }; data.smbios_cpu_max_speed = 2900; @@ -2138,7 +2138,7 @@ static void test_acpi_aarch64_virt_tcg_topology(void) .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", .cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2", .ram_start = 0x40000000ULL, - .scan_len = 128ULL * 1024 * 1024, + .scan_len = 128ULL * MiB, }; test_acpi_one("-cpu cortex-a57 " @@ -2223,7 +2223,7 @@ static void test_acpi_aarch64_virt_viot(void) .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", .cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2", .ram_start = 0x40000000ULL, - .scan_len = 128ULL * 1024 * 1024, + .scan_len = 128ULL * MiB, }; test_acpi_one("-cpu cortex-a57 " @@ -2407,7 +2407,7 @@ static void test_acpi_aarch64_virt_oem_fields(void) .uefi_fl2 = "pc-bios/edk2-arm-vars.fd", .cd = "tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2", .ram_start = 0x40000000ULL, - .scan_len = 128ULL * 1024 * 1024, + .scan_len = 128ULL * MiB, }; char *args; diff --git a/tests/qtest/libqos/igb.c b/tests/qtest/libqos/igb.c index f40c4ec..ab3ef6f 100644 --- a/tests/qtest/libqos/igb.c +++ b/tests/qtest/libqos/igb.c @@ -104,10 +104,10 @@ static void igb_pci_start_hw(QOSGraphObject *obj) e1000e_macreg_write(&d->e1000e, E1000_RDT(0), 0); e1000e_macreg_write(&d->e1000e, E1000_RDH(0), 0); e1000e_macreg_write(&d->e1000e, E1000_RA, - le32_to_cpu(*(uint32_t *)address)); + ldl_le_p(address)); e1000e_macreg_write(&d->e1000e, E1000_RA + 4, E1000_RAH_AV | E1000_RAH_POOL_1 | - le16_to_cpu(*(uint16_t *)(address + 4))); + lduw_le_p(address + 4)); /* Set supported receive descriptor mode */ e1000e_macreg_write(&d->e1000e, diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index 7daf619..8ad8490 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -208,15 +208,17 @@ qtests_npcm7xx = \ 'npcm7xx_sdhci-test', 'npcm7xx_smbus-test', 'npcm7xx_timer-test', - 'npcm7xx_watchdog_timer-test', - 'npcm_gmac-test'] + \ + 'npcm7xx_watchdog_timer-test'] + \ (slirp.found() ? ['npcm7xx_emc-test'] : []) +qtests_npcm8xx = \ + ['npcm_gmac-test'] qtests_aspeed = \ - ['aspeed_hace-test', - 'aspeed_smc-test', - 'aspeed_gpio-test'] + ['aspeed_gpio-test', + 'aspeed_hace-test', + 'aspeed_smc-test'] qtests_aspeed64 = \ ['ast2700-gpio-test', + 'ast2700-hace-test', 'ast2700-smc-test'] qtests_stm32l4x5 = \ @@ -258,6 +260,7 @@ qtests_aarch64 = \ (config_all_accel.has_key('CONFIG_TCG') and \ config_all_devices.has_key('CONFIG_TPM_TIS_I2C') ? ['tpm-tis-i2c-test'] : []) + \ (config_all_devices.has_key('CONFIG_ASPEED_SOC') ? qtests_aspeed64 : []) + \ + (config_all_devices.has_key('CONFIG_NPCM8XX') ? qtests_npcm8xx : []) + \ ['arm-cpu-features', 'numa-test', 'boot-serial-test', @@ -361,6 +364,10 @@ if gnutls.found() endif qtests = { + 'aspeed_hace-test': files('aspeed-hace-utils.c', 'aspeed_hace-test.c'), + 'aspeed_smc-test': files('aspeed-smc-utils.c', 'aspeed_smc-test.c'), + 'ast2700-hace-test': files('aspeed-hace-utils.c', 'ast2700-hace-test.c'), + 'ast2700-smc-test': files('aspeed-smc-utils.c', 'ast2700-smc-test.c'), 'bios-tables-test': [io, 'boot-sector.c', 'acpi-utils.c', 'tpm-emu.c'], 'cdrom-test': files('boot-sector.c'), 'dbus-vmstate-test': files('migration/migration-qmp.c', @@ -382,8 +389,6 @@ qtests = { 'virtio-net-failover': migration_files, 'vmgenid-test': files('boot-sector.c', 'acpi-utils.c'), 'netdev-socket': files('netdev-socket.c', '../unit/socket-helpers.c'), - 'aspeed_smc-test': files('aspeed-smc-utils.c', 'aspeed_smc-test.c'), - 'ast2700-smc-test': files('aspeed-smc-utils.c', 'ast2700-smc-test.c'), } if vnc.found() diff --git a/tests/qtest/migration/compression-tests.c b/tests/qtest/migration/compression-tests.c index 41e79f0..b827665 100644 --- a/tests/qtest/migration/compression-tests.c +++ b/tests/qtest/migration/compression-tests.c @@ -42,6 +42,20 @@ static void test_multifd_tcp_zstd(void) }; test_precopy_common(&args); } + +static void test_multifd_postcopy_tcp_zstd(void) +{ + MigrateCommon args = { + .listen_uri = "defer", + .start = { + .caps[MIGRATION_CAPABILITY_MULTIFD] = true, + .caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] = true, + }, + .start_hook = migrate_hook_start_precopy_tcp_multifd_zstd, + }; + + test_precopy_common(&args); +} #endif /* CONFIG_ZSTD */ #ifdef CONFIG_QATZIP @@ -184,6 +198,10 @@ void migration_test_add_compression(MigrationTestEnv *env) #ifdef CONFIG_ZSTD migration_test_add("/migration/multifd/tcp/plain/zstd", test_multifd_tcp_zstd); + if (env->has_uffd) { + migration_test_add("/migration/multifd+postcopy/tcp/plain/zstd", + test_multifd_postcopy_tcp_zstd); + } #endif #ifdef CONFIG_QATZIP diff --git a/tests/qtest/migration/postcopy-tests.c b/tests/qtest/migration/postcopy-tests.c index 483e3ff..3773525 100644 --- a/tests/qtest/migration/postcopy-tests.c +++ b/tests/qtest/migration/postcopy-tests.c @@ -94,6 +94,29 @@ static void migration_test_add_postcopy_smoke(MigrationTestEnv *env) } } +static void test_multifd_postcopy(void) +{ + MigrateCommon args = { + .start = { + .caps[MIGRATION_CAPABILITY_MULTIFD] = true, + }, + }; + + test_postcopy_common(&args); +} + +static void test_multifd_postcopy_preempt(void) +{ + MigrateCommon args = { + .start = { + .caps[MIGRATION_CAPABILITY_MULTIFD] = true, + .caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT] = true, + }, + }; + + test_postcopy_common(&args); +} + void migration_test_add_postcopy(MigrationTestEnv *env) { migration_test_add_postcopy_smoke(env); @@ -114,6 +137,10 @@ void migration_test_add_postcopy(MigrationTestEnv *env) "/migration/postcopy/recovery/double-failures/reconnect", test_postcopy_recovery_fail_reconnect); + migration_test_add("/migration/multifd+postcopy/plain", + test_multifd_postcopy); + migration_test_add("/migration/multifd+postcopy/preempt/plain", + test_multifd_postcopy_preempt); if (env->is_x86) { migration_test_add("/migration/postcopy/suspend", test_postcopy_suspend); diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c index 87b0a7e..bb38292 100644 --- a/tests/qtest/migration/precopy-tests.c +++ b/tests/qtest/migration/precopy-tests.c @@ -101,13 +101,43 @@ static void test_precopy_unix_dirty_ring(void) #ifdef CONFIG_RDMA +#include <sys/resource.h> + +/* + * During migration over RDMA, it will try to pin portions of guest memory, + * typically exceeding 100MB in this test, while the remainder will be + * transmitted as compressed zero pages. + * + * REQUIRED_MEMLOCK_SZ indicates the minimal mlock size in the current context. + */ +#define REQUIRED_MEMLOCK_SZ (128 << 20) /* 128MB */ + +/* check 'ulimit -l' */ +static bool mlock_check(void) +{ + uid_t uid; + struct rlimit rlim; + + uid = getuid(); + if (uid == 0) { + return true; + } + + if (getrlimit(RLIMIT_MEMLOCK, &rlim) != 0) { + return false; + } + + return rlim.rlim_cur >= REQUIRED_MEMLOCK_SZ; +} + #define RDMA_MIGRATION_HELPER "scripts/rdma-migration-helper.sh" -static int new_rdma_link(char *buffer) +static int new_rdma_link(char *buffer, bool ipv6) { char cmd[256]; bool verbose = g_getenv("QTEST_LOG"); - snprintf(cmd, sizeof(cmd), "%s detect %s", RDMA_MIGRATION_HELPER, + snprintf(cmd, sizeof(cmd), "IP_FAMILY=%s %s detect %s", + ipv6 ? "ipv6" : "ipv4", RDMA_MIGRATION_HELPER, verbose ? "" : "2>/dev/null"); FILE *pipe = popen(cmd, "r"); @@ -132,11 +162,16 @@ static int new_rdma_link(char *buffer) return -1; } -static void test_precopy_rdma_plain(void) +static void __test_precopy_rdma_plain(bool ipv6) { char buffer[128] = {}; - if (new_rdma_link(buffer)) { + if (!mlock_check()) { + g_test_skip("'ulimit -l' is too small, require >=128M"); + return; + } + + if (new_rdma_link(buffer, ipv6)) { g_test_skip("No rdma link available\n" "# To enable the test:\n" "# Run \'" RDMA_MIGRATION_HELPER " setup\' with root to " @@ -159,6 +194,16 @@ static void test_precopy_rdma_plain(void) test_precopy_common(&args); } + +static void test_precopy_rdma_plain(void) +{ + __test_precopy_rdma_plain(false); +} + +static void test_precopy_rdma_plain_ipv6(void) +{ + __test_precopy_rdma_plain(true); +} #endif static void test_precopy_tcp_plain(void) @@ -524,7 +569,7 @@ static void test_multifd_tcp_channels_none(void) * * And see that it works */ -static void test_multifd_tcp_cancel(void) +static void test_multifd_tcp_cancel(bool postcopy_ram) { MigrateStart args = { .hide_stderr = true, @@ -538,6 +583,11 @@ static void test_multifd_tcp_cancel(void) migrate_ensure_non_converge(from); migrate_prepare_for_dirty_mem(from); + if (postcopy_ram) { + migrate_set_capability(from, "postcopy-ram", true); + migrate_set_capability(to, "postcopy-ram", true); + } + migrate_set_parameter_int(from, "multifd-channels", 16); migrate_set_parameter_int(to, "multifd-channels", 16); @@ -579,6 +629,10 @@ static void test_multifd_tcp_cancel(void) return; } + if (postcopy_ram) { + migrate_set_capability(to2, "postcopy-ram", true); + } + migrate_set_parameter_int(to2, "multifd-channels", 16); migrate_set_capability(to2, "multifd", true); @@ -602,6 +656,16 @@ static void test_multifd_tcp_cancel(void) migrate_end(from, to2, true); } +static void test_multifd_precopy_tcp_cancel(void) +{ + test_multifd_tcp_cancel(false); +} + +static void test_multifd_postcopy_tcp_cancel(void) +{ + test_multifd_tcp_cancel(true); +} + static void test_cancel_src_after_failed(QTestState *from, QTestState *to, const char *uri, const char *phase) { @@ -1188,10 +1252,17 @@ static void migration_test_add_precopy_smoke(MigrationTestEnv *env) migration_test_add("/migration/multifd/tcp/uri/plain/none", test_multifd_tcp_uri_none); migration_test_add("/migration/multifd/tcp/plain/cancel", - test_multifd_tcp_cancel); + test_multifd_precopy_tcp_cancel); + if (env->has_uffd) { + migration_test_add("/migration/multifd+postcopy/tcp/plain/cancel", + test_multifd_postcopy_tcp_cancel); + } + #ifdef CONFIG_RDMA migration_test_add("/migration/precopy/rdma/plain", test_precopy_rdma_plain); + migration_test_add("/migration/precopy/rdma/plain/ipv6", + test_precopy_rdma_plain_ipv6); #endif } diff --git a/tests/qtest/migration/tls-tests.c b/tests/qtest/migration/tls-tests.c index 72f44de..21e9fec 100644 --- a/tests/qtest/migration/tls-tests.c +++ b/tests/qtest/migration/tls-tests.c @@ -395,6 +395,19 @@ static void test_postcopy_recovery_tls_psk(void) test_postcopy_recovery_common(&args); } +static void test_multifd_postcopy_recovery_tls_psk(void) +{ + MigrateCommon args = { + .start_hook = migrate_hook_start_tls_psk_match, + .end_hook = migrate_hook_end_tls_psk, + .start = { + .caps[MIGRATION_CAPABILITY_MULTIFD] = true, + }, + }; + + test_postcopy_recovery_common(&args); +} + /* This contains preempt+recovery+tls test altogether */ static void test_postcopy_preempt_all(void) { @@ -409,6 +422,20 @@ static void test_postcopy_preempt_all(void) test_postcopy_recovery_common(&args); } +static void test_multifd_postcopy_preempt_recovery_tls_psk(void) +{ + MigrateCommon args = { + .start_hook = migrate_hook_start_tls_psk_match, + .end_hook = migrate_hook_end_tls_psk, + .start = { + .caps[MIGRATION_CAPABILITY_MULTIFD] = true, + .caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT] = true, + }, + }; + + test_postcopy_recovery_common(&args); +} + static void test_precopy_unix_tls_psk(void) { g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); @@ -657,6 +684,21 @@ static void test_multifd_tcp_tls_psk_mismatch(void) test_precopy_common(&args); } +static void test_multifd_postcopy_tcp_tls_psk_match(void) +{ + MigrateCommon args = { + .start = { + .caps[MIGRATION_CAPABILITY_MULTIFD] = true, + .caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] = true, + }, + .listen_uri = "defer", + .start_hook = migrate_hook_start_multifd_tcp_tls_psk_match, + .end_hook = migrate_hook_end_tls_psk, + }; + + test_precopy_common(&args); +} + #ifdef CONFIG_TASN1 static void test_multifd_tcp_tls_x509_default_host(void) { @@ -774,6 +816,11 @@ void migration_test_add_tls(MigrationTestEnv *env) test_postcopy_preempt_tls_psk); migration_test_add("/migration/postcopy/preempt/recovery/tls/psk", test_postcopy_preempt_all); + migration_test_add("/migration/multifd+postcopy/recovery/tls/psk", + test_multifd_postcopy_recovery_tls_psk); + migration_test_add( + "/migration/multifd+postcopy/preempt/recovery/tls/psk", + test_multifd_postcopy_preempt_recovery_tls_psk); } #ifdef CONFIG_TASN1 migration_test_add("/migration/precopy/unix/tls/x509/default-host", @@ -805,6 +852,10 @@ void migration_test_add_tls(MigrationTestEnv *env) test_multifd_tcp_tls_psk_match); migration_test_add("/migration/multifd/tcp/tls/psk/mismatch", test_multifd_tcp_tls_psk_mismatch); + if (env->has_uffd) { + migration_test_add("/migration/multifd+postcopy/tcp/tls/psk/match", + test_multifd_postcopy_tcp_tls_psk_match); + } #ifdef CONFIG_TASN1 migration_test_add("/migration/multifd/tcp/tls/x509/default-host", test_multifd_tcp_tls_x509_default_host); diff --git a/tests/qtest/npcm_gmac-test.c b/tests/qtest/npcm_gmac-test.c index c28b471..1317da2 100644 --- a/tests/qtest/npcm_gmac-test.c +++ b/tests/qtest/npcm_gmac-test.c @@ -36,7 +36,7 @@ typedef struct TestData { const GMACModule *module; } TestData; -/* Values extracted from hw/arm/npcm7xx.c */ +/* Values extracted from hw/arm/npcm8xx.c */ static const GMACModule gmac_module_list[] = { { .irq = 14, @@ -46,6 +46,14 @@ static const GMACModule gmac_module_list[] = { .irq = 15, .base_addr = 0xf0804000 }, + { + .irq = 16, + .base_addr = 0xf0806000 + }, + { + .irq = 17, + .base_addr = 0xf0808000 + } }; /* Returns the index of the GMAC module. */ @@ -174,18 +182,32 @@ static uint32_t gmac_read(QTestState *qts, const GMACModule *mod, return qtest_readl(qts, mod->base_addr + regno); } +static uint16_t pcs_read(QTestState *qts, const GMACModule *mod, + NPCMRegister regno) +{ + uint32_t write_value = (regno & 0x3ffe00) >> 9; + qtest_writel(qts, PCS_BASE_ADDRESS + NPCM_PCS_IND_AC_BA, write_value); + uint32_t read_offset = regno & 0x1ff; + return qtest_readl(qts, PCS_BASE_ADDRESS + read_offset); +} + /* Check that GMAC registers are reset to default value */ static void test_init(gconstpointer test_data) { const TestData *td = test_data; const GMACModule *mod = td->module; - QTestState *qts = qtest_init("-machine npcm750-evb"); + QTestState *qts = qtest_init("-machine npcm845-evb"); #define CHECK_REG32(regno, value) \ do { \ g_assert_cmphex(gmac_read(qts, mod, (regno)), ==, (value)); \ } while (0) +#define CHECK_REG_PCS(regno, value) \ + do { \ + g_assert_cmphex(pcs_read(qts, mod, (regno)), ==, (value)); \ + } while (0) + CHECK_REG32(NPCM_DMA_BUS_MODE, 0x00020100); CHECK_REG32(NPCM_DMA_XMT_POLL_DEMAND, 0); CHECK_REG32(NPCM_DMA_RCV_POLL_DEMAND, 0); @@ -235,6 +257,63 @@ static void test_init(gconstpointer test_data) CHECK_REG32(NPCM_GMAC_PTP_TAR, 0); CHECK_REG32(NPCM_GMAC_PTP_TTSR, 0); + if (mod->base_addr == 0xf0802000) { + CHECK_REG_PCS(NPCM_PCS_SR_CTL_ID1, 0x699e); + CHECK_REG_PCS(NPCM_PCS_SR_CTL_ID2, 0); + CHECK_REG_PCS(NPCM_PCS_SR_CTL_STS, 0x8000); + + CHECK_REG_PCS(NPCM_PCS_SR_MII_CTRL, 0x1140); + CHECK_REG_PCS(NPCM_PCS_SR_MII_STS, 0x0109); + CHECK_REG_PCS(NPCM_PCS_SR_MII_DEV_ID1, 0x699e); + CHECK_REG_PCS(NPCM_PCS_SR_MII_DEV_ID2, 0x0ced0); + CHECK_REG_PCS(NPCM_PCS_SR_MII_AN_ADV, 0x0020); + CHECK_REG_PCS(NPCM_PCS_SR_MII_LP_BABL, 0); + CHECK_REG_PCS(NPCM_PCS_SR_MII_AN_EXPN, 0); + CHECK_REG_PCS(NPCM_PCS_SR_MII_EXT_STS, 0xc000); + + CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_ABL, 0x0003); + CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_TX_MAX_DLY_LWR, 0x0038); + CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_TX_MAX_DLY_UPR, 0); + CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_TX_MIN_DLY_LWR, 0x0038); + CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_TX_MIN_DLY_UPR, 0); + CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_RX_MAX_DLY_LWR, 0x0058); + CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_RX_MAX_DLY_UPR, 0); + CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_RX_MIN_DLY_LWR, 0x0048); + CHECK_REG_PCS(NPCM_PCS_SR_TIM_SYNC_RX_MIN_DLY_UPR, 0); + + CHECK_REG_PCS(NPCM_PCS_VR_MII_MMD_DIG_CTRL1, 0x2400); + CHECK_REG_PCS(NPCM_PCS_VR_MII_AN_CTRL, 0); + CHECK_REG_PCS(NPCM_PCS_VR_MII_AN_INTR_STS, 0x000a); + CHECK_REG_PCS(NPCM_PCS_VR_MII_TC, 0); + CHECK_REG_PCS(NPCM_PCS_VR_MII_DBG_CTRL, 0); + CHECK_REG_PCS(NPCM_PCS_VR_MII_EEE_MCTRL0, 0x899c); + CHECK_REG_PCS(NPCM_PCS_VR_MII_EEE_TXTIMER, 0); + CHECK_REG_PCS(NPCM_PCS_VR_MII_EEE_RXTIMER, 0); + CHECK_REG_PCS(NPCM_PCS_VR_MII_LINK_TIMER_CTRL, 0); + CHECK_REG_PCS(NPCM_PCS_VR_MII_EEE_MCTRL1, 0); + CHECK_REG_PCS(NPCM_PCS_VR_MII_DIG_STS, 0x0010); + CHECK_REG_PCS(NPCM_PCS_VR_MII_ICG_ERRCNT1, 0); + CHECK_REG_PCS(NPCM_PCS_VR_MII_MISC_STS, 0); + CHECK_REG_PCS(NPCM_PCS_VR_MII_RX_LSTS, 0); + CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_TX_BSTCTRL0, 0x00a); + CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_TX_LVLCTRL0, 0x007f); + CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_TX_GENCTRL0, 0x0001); + CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_TX_GENCTRL1, 0); + CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_TX_STS, 0); + CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_RX_GENCTRL0, 0x0100); + CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_RX_GENCTRL1, 0x1100); + CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_RX_LOS_CTRL0, 0x000e); + CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_MPLL_CTRL0, 0x0100); + CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_MPLL_CTRL1, 0x0032); + CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_MPLL_STS, 0x0001); + CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_MISC_CTRL2, 0); + CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_LVL_CTRL, 0x0019); + CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_MISC_CTRL0, 0); + CHECK_REG_PCS(NPCM_PCS_VR_MII_MP_MISC_CTRL1, 0); + CHECK_REG_PCS(NPCM_PCS_VR_MII_DIG_CTRL2, 0); + CHECK_REG_PCS(NPCM_PCS_VR_MII_DIG_ERRCNT_SEL, 0); + } + qtest_quit(qts); } @@ -242,7 +321,7 @@ static void gmac_add_test(const char *name, const TestData* td, GTestDataFunc fn) { g_autofree char *full_name = g_strdup_printf( - "npcm7xx_gmac/gmac[%d]/%s", gmac_module_index(td->module), name); + "npcm8xx_gmac/gmac[%d]/%s", gmac_module_index(td->module), name); qtest_add_data_func(full_name, td, fn); } diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c index 15c8824..040d042 100644 --- a/tests/qtest/qmp-cmd-test.c +++ b/tests/qtest/qmp-cmd-test.c @@ -100,6 +100,7 @@ static bool query_is_ignored(const char *cmd) /* Success depends on target arch: */ "query-cpu-definitions", /* arm, i386, ppc, s390x */ "query-gic-capabilities", /* arm */ + "query-s390x-cpu-polarization", /* s390x */ /* Success depends on target-specific build configuration: */ "query-pci", /* CONFIG_PCI */ "x-query-virtio", /* CONFIG_VIRTIO */ diff --git a/tests/qtest/test-x86-cpuid-compat.c b/tests/qtest/test-x86-cpuid-compat.c index c9de47b..456e2af 100644 --- a/tests/qtest/test-x86-cpuid-compat.c +++ b/tests/qtest/test-x86-cpuid-compat.c @@ -365,20 +365,6 @@ int main(int argc, char **argv) "level", 10); } - /* - * xlevel doesn't have any feature that triggers auto-level - * code on old machine-types. Just check that the compat code - * is working correctly: - */ - if (qtest_has_machine("pc-i440fx-2.4")) { - add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-off", - "SandyBridge", NULL, "pc-i440fx-2.4", - "xlevel", 0x80000008); - add_cpuid_test("x86/cpuid/xlevel-compat/pc-i440fx-2.4/npt-on", - "SandyBridge", "svm=on,npt=on", "pc-i440fx-2.4", - "xlevel", 0x80000008); - } - /* Test feature parsing */ add_feature_test("x86/cpuid/features/plus", "486", "+arat", diff --git a/tests/tcg/aarch64/Makefile.softmmu-target b/tests/tcg/aarch64/Makefile.softmmu-target index 9c52475..f7a7d2b 100644 --- a/tests/tcg/aarch64/Makefile.softmmu-target +++ b/tests/tcg/aarch64/Makefile.softmmu-target @@ -68,7 +68,8 @@ run-plugin-semiconsole-with-%: semiconsole # vtimer test needs EL2 QEMU_EL2_MACHINE=-machine virt,virtualization=on,gic-version=2 -cpu cortex-a57 -smp 4 -run-vtimer: QEMU_OPTS=$(QEMU_EL2_MACHINE) $(QEMU_BASE_ARGS) -kernel +QEMU_EL2_BASE_ARGS=-semihosting-config enable=on,target=native,chardev=output,arg="2" +run-vtimer: QEMU_OPTS=$(QEMU_EL2_MACHINE) $(QEMU_EL2_BASE_ARGS) -kernel # Simple Record/Replay Test .PHONY: memory-record diff --git a/tests/tcg/aarch64/system/boot.S b/tests/tcg/aarch64/system/boot.S index a5df9c1..8bfa4e4 100644 --- a/tests/tcg/aarch64/system/boot.S +++ b/tests/tcg/aarch64/system/boot.S @@ -16,6 +16,7 @@ #define semihosting_call hlt 0xf000 #define SYS_WRITEC 0x03 /* character to debug channel */ #define SYS_WRITE0 0x04 /* string to debug channel */ +#define SYS_GET_CMDLINE 0x15 /* get command line */ #define SYS_EXIT 0x18 .align 12 @@ -70,21 +71,172 @@ lower_a32_sync: lower_a32_irq: lower_a32_fiq: lower_a32_serror: + adr x1, .unexp_excp +exit_msg: mov x0, SYS_WRITE0 - adr x1, .error semihosting_call mov x0, 1 /* EXIT_FAILURE */ bl _exit /* never returns */ .section .rodata -.error: - .string "Terminated by exception.\n" +.unexp_excp: + .string "Unexpected exception.\n" +.high_el_msg: + .string "Started in lower EL than requested.\n" +.unexp_el0: + .string "Started in invalid EL.\n" + + .align 8 +.get_cmd: + .quad cmdline + .quad 128 .text .align 4 .global __start __start: + /* + * Initialise the stack for whatever EL we are in before + * anything else, we need it to be able to _exit cleanly. + * It's smaller than the stack we pass to the C code but we + * don't need much. + */ + adrp x0, system_stack_end + add x0, x0, :lo12:system_stack_end + mov sp, x0 + + /* + * The test can set the semihosting command line to the target + * EL needed for the test. However if no semihosting args are set we will + * end up with -kernel/-append data (see semihosting_arg_fallback). + * Keep the normalised target in w11. + */ + mov x0, SYS_GET_CMDLINE + adr x1, .get_cmd + semihosting_call + adrp x10, cmdline + add x10, x10, :lo12:cmdline + ldrb w11, [x10] + + /* sanity check, normalise char to EL, clamp to 1 if outside range */ + subs w11, w11, #'0' + b.lt el_default + cmp w11, #3 + b.gt el_default + b 1f + +el_high: + adr x1, .high_el_msg + b exit_msg + +el_default: + mov w11, #1 + +1: + /* Determine current Exception Level */ + mrs x0, CurrentEL + lsr x0, x0, #2 /* CurrentEL[3:2] contains the current EL */ + + /* Are we already in a lower EL than we want? */ + cmp w11, w0 + bgt el_high + + /* Branch based on current EL */ + cmp x0, #3 + b.eq setup_el3 + cmp x0, #2 + b.eq setup_el2 + cmp x0, #1 + b.eq at_testel /* Already at EL1, skip transition */ + + /* Should not be at EL0 - error out */ + adr x1, .unexp_el0 + b exit_msg + +setup_el3: + /* Ensure we trap if we get anything wrong */ + adr x0, vector_table + msr vbar_el3, x0 + + /* Does the test want to be at EL3? */ + cmp w11, #3 + beq at_testel + + /* Configure EL3 to for lower states (EL2 or EL1) */ + mrs x0, scr_el3 + orr x0, x0, #(1 << 10) /* RW = 1: EL2/EL1 execution state is AArch64 */ + orr x0, x0, #(1 << 0) /* NS = 1: Non-secure state */ + msr scr_el3, x0 + + /* + * We need to check if EL2 is actually enabled via ID_AA64PFR0_EL1, + * otherwise we should just jump straight to EL1. + */ + mrs x0, id_aa64pfr0_el1 + ubfx x0, x0, #8, #4 /* Extract EL2 field (bits 11:8) */ + cbz x0, el2_not_present /* If field is 0 no EL2 */ + + + /* Prepare SPSR for exception return to EL2 */ + mov x0, #0x3c9 /* DAIF bits and EL2h mode (9) */ + msr spsr_el3, x0 + + /* Set EL2 entry point */ + adr x0, setup_el2 + msr elr_el3, x0 + + /* Return to EL2 */ + eret + +el2_not_present: + /* Initialize SCTLR_EL1 with reset value */ + msr sctlr_el1, xzr + + /* Set EL1 entry point */ + adr x0, at_testel + msr elr_el3, x0 + + /* Prepare SPSR for exception return to EL1h with interrupts masked */ + mov x0, #0x3c5 /* DAIF bits and EL1h mode (5) */ + msr spsr_el3, x0 + + isb /* Synchronization barrier */ + eret /* Jump to EL1 */ + +setup_el2: + /* Ensure we trap if we get anything wrong */ + adr x0, vector_table + msr vbar_el2, x0 + + /* Does the test want to be at EL2? */ + cmp w11, #2 + beq at_testel + + /* Configure EL2 to allow transition to EL1 */ + mrs x0, hcr_el2 + orr x0, x0, #(1 << 31) /* RW = 1: EL1 execution state is AArch64 */ + msr hcr_el2, x0 + + /* Initialize SCTLR_EL1 with reset value */ + msr sctlr_el1, xzr + + /* Set EL1 entry point */ + adr x0, at_testel + msr elr_el2, x0 + + /* Prepare SPSR for exception return to EL1 */ + mov x0, #(0x5 << 0) /* EL1h (SPx), with interrupts disabled */ + msr spsr_el2, x0 + + /* Return to EL1 */ + eret + + /* + * At the target EL for the test, usually EL1. Note we still + * set everything up as if we were at EL1. + */ +at_testel: /* Installs a table of exception vectors to catch and handle all exceptions by terminating the process with a diagnostic. */ adr x0, vector_table @@ -100,7 +252,7 @@ __start: * maps RAM to the first Gb. The stage2 tables have two 2mb * translation block entries covering a series of adjacent * 4k pages. - */ + */ /* Stage 1 entry: indexed by IA[38:30] */ adr x1, . /* phys address */ @@ -198,7 +350,8 @@ __start: orr x0, x0, #(3 << 16) msr cpacr_el1, x0 - /* Setup some stack space and enter the test code. + /* + * Setup some stack space before we enter the test code. * Assume everything except the return value is garbage when we * return, we won't need it. */ @@ -233,6 +386,11 @@ __sys_outc: ret .data + + .align 8 +cmdline: + .space 128, 0 + .align 12 /* Translation table @@ -246,6 +404,10 @@ ttb_stage2: .space 4096, 0 .align 12 +system_stack: + .space 4096, 0 +system_stack_end: + stack: .space 65536, 0 stack_end: diff --git a/tests/tcg/plugins/meson.build b/tests/tcg/plugins/meson.build index 41f02f2..0293422 100644 --- a/tests/tcg/plugins/meson.build +++ b/tests/tcg/plugins/meson.build @@ -17,7 +17,7 @@ endif if t.length() > 0 alias_target('test-plugins', t) else - run_target('test-plugins', command: find_program('true')) + run_target('test-plugins', command: [python, '-c', '']) endif plugin_modules += t diff --git a/tests/tcg/x86_64/fma.c b/tests/tcg/x86_64/fma.c index 09c622e..3421961 100644 --- a/tests/tcg/x86_64/fma.c +++ b/tests/tcg/x86_64/fma.c @@ -79,14 +79,21 @@ static testdata tests[] = { /* * Flushing of denormal outputs to zero should also happen after * rounding, so setting FTZ should not affect the result or the flags. - * QEMU currently does not emulate this correctly because we do the - * flush-to-zero check before rounding, so we incorrectly produce a - * zero result and set Underflow as well as Precision. */ -#ifdef ENABLE_FAILING_TESTS { 0x3fdfffffffffffff, 0x001fffffffffffff, 0x801fffffffffffff, true, 0x8010000000000000, 0x20 }, /* Enabling FTZ shouldn't change flags */ -#endif + /* + * normal * 0 + a denormal. With FTZ disabled this gives an exact + * result (equal to the input denormal) that has consumed the denormal. + */ + { 0x3cc8000000000000, 0x0000000000000000, 0x8008000000000000, false, + 0x8008000000000000, 0x2 }, /* Denormal */ + /* + * With FTZ enabled, this consumes the denormal, returns zero (because + * flushed) and indicates also Underflow and Precision. + */ + { 0x3cc8000000000000, 0x0000000000000000, 0x8008000000000000, true, + 0x8000000000000000, 0x32 }, /* Precision, Underflow, Denormal */ }; int main(void) diff --git a/tests/uefi-test-tools/Makefile b/tests/uefi-test-tools/Makefile index f4eaebd..8ee6fb3 100644 --- a/tests/uefi-test-tools/Makefile +++ b/tests/uefi-test-tools/Makefile @@ -12,7 +12,7 @@ edk2_dir := ../../roms/edk2 images_dir := ../data/uefi-boot-images -emulation_targets := arm aarch64 i386 x86_64 riscv64 +emulation_targets := arm aarch64 i386 x86_64 riscv64 loongarch64 uefi_binaries := bios-tables-test intermediate_suffixes := .efi .fat .iso.raw @@ -56,7 +56,8 @@ Build/%.iso.raw: Build/%.fat # stripped from, the argument. map_arm_to_uefi = $(subst arm,ARM,$(1)) map_aarch64_to_uefi = $(subst aarch64,AA64,$(call map_arm_to_uefi,$(1))) -map_riscv64_to_uefi = $(subst riscv64,RISCV64,$(call map_aarch64_to_uefi,$(1))) +map_loongarch64_to_uefi = $(subst loongarch64,LOONGARCH64,$(call map_aarch64_to_uefi,$(1))) +map_riscv64_to_uefi = $(subst riscv64,RISCV64,$(call map_loongarch64_to_uefi,$(1))) map_i386_to_uefi = $(subst i386,IA32,$(call map_riscv64_to_uefi,$(1))) map_x86_64_to_uefi = $(subst x86_64,X64,$(call map_i386_to_uefi,$(1))) map_to_uefi = $(subst .,,$(call map_x86_64_to_uefi,$(1))) diff --git a/tests/uefi-test-tools/UefiTestToolsPkg/UefiTestToolsPkg.dsc b/tests/uefi-test-tools/UefiTestToolsPkg/UefiTestToolsPkg.dsc index 0902fd3..facf8df 100644 --- a/tests/uefi-test-tools/UefiTestToolsPkg/UefiTestToolsPkg.dsc +++ b/tests/uefi-test-tools/UefiTestToolsPkg/UefiTestToolsPkg.dsc @@ -19,7 +19,7 @@ PLATFORM_VERSION = 0.1 PLATFORM_NAME = UefiTestTools SKUID_IDENTIFIER = DEFAULT - SUPPORTED_ARCHITECTURES = ARM|AARCH64|IA32|X64|RISCV64 + SUPPORTED_ARCHITECTURES = ARM|AARCH64|IA32|X64|RISCV64|LOONGARCH64 BUILD_TARGETS = DEBUG [BuildOptions.IA32] @@ -65,6 +65,10 @@ [LibraryClasses.RISCV64] BaseMemoryLib|MdePkg/Library/BaseMemoryLib/BaseMemoryLib.inf +[LibraryClasses.LOONGARCH64] + BaseMemoryLib|MdePkg/Library/BaseMemoryLib/BaseMemoryLib.inf + StackCheckLib|MdePkg/Library/StackCheckLibNull/StackCheckLibNull.inf + [PcdsFixedAtBuild] gEfiMdePkgTokenSpaceGuid.PcdDebugPrintErrorLevel|0x8040004F gEfiMdePkgTokenSpaceGuid.PcdDebugPropertyMask|0x2F diff --git a/tests/uefi-test-tools/uefi-test-build.config b/tests/uefi-test-tools/uefi-test-build.config index a4c61fc..8bf4826 100644 --- a/tests/uefi-test-tools/uefi-test-build.config +++ b/tests/uefi-test-tools/uefi-test-build.config @@ -22,6 +22,16 @@ arch = AARCH64 cpy1 = AARCH64/BiosTablesTest.efi bios-tables-test.aarch64.efi #################################################################################### +# loongarch64 + +[build.loongarch64] +conf = UefiTestToolsPkg/UefiTestToolsPkg.dsc +plat = UefiTestTools +dest = ./Build +arch = LOONGARCH64 +cpy1 = LOONGARCH64/BiosTablesTest.efi bios-tables-test.loongarch64.efi + +#################################################################################### # riscv64 [build.riscv64] diff --git a/tests/unit/test-aio-multithread.c b/tests/unit/test-aio-multithread.c index 08d4570..0ead6bf 100644 --- a/tests/unit/test-aio-multithread.c +++ b/tests/unit/test-aio-multithread.c @@ -305,7 +305,9 @@ static void mcs_mutex_lock(void) prev = qatomic_xchg(&mutex_head, id); if (prev != -1) { qatomic_set(&nodes[prev].next, id); - qemu_futex_wait(&nodes[id].locked, 1); + while (qatomic_read(&nodes[id].locked) == 1) { + qemu_futex_wait(&nodes[id].locked, 1); + } } } @@ -328,7 +330,7 @@ static void mcs_mutex_unlock(void) /* Wake up the next in line. */ next = qatomic_read(&nodes[id].next); nodes[next].locked = 0; - qemu_futex_wake(&nodes[next].locked, 1); + qemu_futex_wake_single(&nodes[next].locked); } static void test_multi_fair_mutex_entry(void *opaque) diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c index 290cd2a..59c2793 100644 --- a/tests/unit/test-bdrv-drain.c +++ b/tests/unit/test-bdrv-drain.c @@ -772,9 +772,11 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, tjob->bs = src; job = &tjob->common; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); switch (result) { case TEST_JOB_SUCCESS: @@ -953,11 +955,13 @@ static void bdrv_test_top_close(BlockDriverState *bs) { BdrvChild *c, *next_c; + bdrv_drain_all_begin(); bdrv_graph_wrlock(); QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) { bdrv_unref_child(bs, c); } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); } static int coroutine_fn GRAPH_RDLOCK @@ -1014,7 +1018,9 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque) bdrv_graph_co_rdlock(); QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) { bdrv_graph_co_rdunlock(); + bdrv_drain_all_begin(); bdrv_co_unref_child(bs, c); + bdrv_drain_all_end(); bdrv_graph_co_rdlock(); } bdrv_graph_co_rdunlock(); @@ -1047,10 +1053,12 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, &error_abort); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, BDRV_CHILD_DATA, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); /* This child will be the one to pass to requests through to, and * it will stall until a drain occurs */ @@ -1058,21 +1066,25 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, &error_abort); child_bs->total_sectors = 65536 >> BDRV_SECTOR_BITS; /* Takes our reference to child_bs */ + bdrv_drain_all_begin(); bdrv_graph_wrlock(); tts->wait_child = bdrv_attach_child(bs, child_bs, "wait-child", &child_of_bds, BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); /* This child is just there to be deleted * (for detach_instead_of_delete == true) */ null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, &error_abort); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, BDRV_CHILD_DATA, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); blk_insert_bs(blk, bs, &error_abort); @@ -1155,6 +1167,7 @@ static void no_coroutine_fn detach_indirect_bh(void *opaque) bdrv_dec_in_flight(data->child_b->bs); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_unref_child(data->parent_b, data->child_b); @@ -1163,6 +1176,7 @@ static void no_coroutine_fn detach_indirect_bh(void *opaque) &child_of_bds, BDRV_CHILD_DATA, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); } static void coroutine_mixed_fn detach_by_parent_aio_cb(void *opaque, int ret) @@ -1260,6 +1274,7 @@ static void TSA_NO_TSA test_detach_indirect(bool by_parent_cb) /* Set child relationships */ bdrv_ref(b); bdrv_ref(a); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); child_b = bdrv_attach_child(parent_b, b, "PB-B", &child_of_bds, BDRV_CHILD_DATA, &error_abort); @@ -1271,6 +1286,7 @@ static void TSA_NO_TSA test_detach_indirect(bool by_parent_cb) by_parent_cb ? &child_of_bds : &detach_by_driver_cb_class, BDRV_CHILD_DATA, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); g_assert_cmpint(parent_a->refcnt, ==, 1); g_assert_cmpint(parent_b->refcnt, ==, 1); @@ -1396,14 +1412,10 @@ static void test_set_aio_context(void) bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, &error_abort); - bdrv_drained_begin(bs); bdrv_try_change_aio_context(bs, ctx_a, NULL, &error_abort); - bdrv_drained_end(bs); - bdrv_drained_begin(bs); bdrv_try_change_aio_context(bs, ctx_b, NULL, &error_abort); bdrv_try_change_aio_context(bs, qemu_get_aio_context(), NULL, &error_abort); - bdrv_drained_end(bs); bdrv_unref(bs); iothread_join(a); @@ -1687,6 +1699,7 @@ static void test_drop_intermediate_poll(void) * Establish the chain last, so the chain links are the first * elements in the BDS.parents lists */ + bdrv_drain_all_begin(); bdrv_graph_wrlock(); for (i = 0; i < 3; i++) { if (i) { @@ -1696,6 +1709,7 @@ static void test_drop_intermediate_poll(void) } } bdrv_graph_wrunlock(); + bdrv_drain_all_end(); job = block_job_create("job", &test_simple_job_driver, NULL, job_node, 0, BLK_PERM_ALL, 0, 0, NULL, NULL, &error_abort); @@ -1942,10 +1956,12 @@ static void do_test_replace_child_mid_drain(int old_drain_count, new_child_bs->total_sectors = 1; bdrv_ref(old_child_bs); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds, BDRV_CHILD_COW, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); parent_s->setup_completed = true; for (i = 0; i < old_drain_count; i++) { diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c index d743abb..7b03ebe 100644 --- a/tests/unit/test-bdrv-graph-mod.c +++ b/tests/unit/test-bdrv-graph-mod.c @@ -137,10 +137,12 @@ static void test_update_perm_tree(void) blk_insert_bs(root, bs, &error_abort); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_attach_child(filter, bs, "child", &child_of_bds, BDRV_CHILD_DATA, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); ret = bdrv_append(filter, bs, NULL); g_assert_cmpint(ret, <, 0); @@ -204,11 +206,13 @@ static void test_should_update_child(void) bdrv_set_backing_hd(target, bs, &error_abort); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); g_assert(target->backing->bs == bs); bdrv_attach_child(filter, target, "target", &child_of_bds, BDRV_CHILD_DATA, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); bdrv_append(filter, bs, &error_abort); bdrv_graph_rdlock_main_loop(); @@ -244,6 +248,7 @@ static void test_parallel_exclusive_write(void) bdrv_ref(base); bdrv_ref(fl1); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_attach_child(top, fl1, "backing", &child_of_bds, BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, @@ -257,6 +262,7 @@ static void test_parallel_exclusive_write(void) bdrv_replace_node(fl1, fl2, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); bdrv_drained_end(fl2); bdrv_drained_end(fl1); @@ -363,6 +369,7 @@ static void test_parallel_perm_update(void) */ bdrv_ref(base); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_attach_child(top, ws, "file", &child_of_bds, BDRV_CHILD_DATA, &error_abort); @@ -377,6 +384,7 @@ static void test_parallel_perm_update(void) BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); /* Select fl1 as first child to be active */ s->selected = c_fl1; @@ -430,11 +438,13 @@ static void test_append_greedy_filter(void) BlockDriverState *base = no_perm_node("base"); BlockDriverState *fl = exclusive_writer_node("fl1"); + bdrv_drain_all_begin(); bdrv_graph_wrlock(); bdrv_attach_child(top, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, &error_abort); bdrv_graph_wrunlock(); + bdrv_drain_all_end(); bdrv_append(fl, base, &error_abort); bdrv_unref(fl); diff --git a/tests/unit/test-char.c b/tests/unit/test-char.c index 60a843b..f30a39f 100644 --- a/tests/unit/test-char.c +++ b/tests/unit/test-char.c @@ -993,7 +993,7 @@ static void char_udp_test_internal(Chardev *reuse_chr, int sock) struct sockaddr_in other; SocketIdleData d = { 0, }; Chardev *chr; - CharBackend *be; + CharBackend stack_be, *be = &stack_be; socklen_t alen = sizeof(other); int ret; char buf[10]; @@ -1009,7 +1009,6 @@ static void char_udp_test_internal(Chardev *reuse_chr, int sock) chr = qemu_chr_new("client", tmp, NULL); g_assert_nonnull(chr); - be = g_alloca(sizeof(CharBackend)); qemu_chr_fe_init(be, chr, &error_abort); } diff --git a/tests/unit/test-qga.c b/tests/unit/test-qga.c index 541b08a..587e30c 100644 --- a/tests/unit/test-qga.c +++ b/tests/unit/test-qga.c @@ -332,6 +332,22 @@ static void test_qga_get_fsinfo(gconstpointer fix) } } +static void test_qga_get_load(gconstpointer fix) +{ + const TestFixture *fixture = fix; + g_autoptr(QDict) ret = NULL; + QDict *load; + + ret = qmp_fd(fixture->fd, "{'execute': 'guest-get-load'}"); + g_assert_nonnull(ret); + qmp_assert_no_error(ret); + + load = qdict_get_qdict(ret, "return"); + g_assert(qdict_haskey(load, "load1m")); + g_assert(qdict_haskey(load, "load5m")); + g_assert(qdict_haskey(load, "load15m")); +} + static void test_qga_get_memory_block_info(gconstpointer fix) { const TestFixture *fixture = fix; @@ -1105,6 +1121,7 @@ int main(int argc, char **argv) g_test_add_data_func("/qga/get-vcpus", &fix, test_qga_get_vcpus); } g_test_add_data_func("/qga/get-fsinfo", &fix, test_qga_get_fsinfo); + g_test_add_data_func("/qga/get-load", &fix, test_qga_get_load); g_test_add_data_func("/qga/get-memory-block-info", &fix, test_qga_get_memory_block_info); g_test_add_data_func("/qga/get-memory-blocks", &fix, diff --git a/tests/unit/test-util-sockets.c b/tests/unit/test-util-sockets.c index 8492f4d..ee66d72 100644 --- a/tests/unit/test-util-sockets.c +++ b/tests/unit/test-util-sockets.c @@ -341,8 +341,12 @@ static void inet_parse_test_helper(const char *str, int rc = inet_parse(&addr, str, &error); if (success) { + if (error) { + error_report_err(error); + } g_assert_cmpint(rc, ==, 0); } else { + error_free(error); g_assert_cmpint(rc, <, 0); } if (exp_addr != NULL) { diff --git a/tests/vm/README b/tests/vm/README index f9c04cc..14ac323 100644 --- a/tests/vm/README +++ b/tests/vm/README @@ -1 +1 @@ -See docs/devel/testing.rst for help. +See docs/devel/testing/main.rst for help. diff --git a/trace/meson.build b/trace/meson.build index 3df4549..9c42a57 100644 --- a/trace/meson.build +++ b/trace/meson.build @@ -4,7 +4,7 @@ trace_events_files = [] foreach item : [ '.' ] + trace_events_subdirs + qapi_trace_events if item in qapi_trace_events trace_events_file = item - group_name = item.full_path().split('/')[-1].underscorify() + group_name = fs.name(item).underscorify() else trace_events_file = meson.project_source_root() / item / 'trace-events' group_name = item == '.' ? 'root' : item.underscorify() @@ -57,10 +57,11 @@ foreach item : [ '.' ] + trace_events_subdirs + qapi_trace_events endif endforeach +cat = [ python, '-c', 'import fileinput; [print(line, end="") for line in fileinput.input()]', '@INPUT@' ] trace_events_all = custom_target('trace-events-all', output: 'trace-events-all', input: trace_events_files, - command: [ 'cat', '@INPUT@' ], + command: cat, capture: true, install: get_option('trace_backends') != [ 'nop' ], install_dir: qemu_datadir) diff --git a/ui/clipboard.c b/ui/clipboard.c index 132086e..ec00a0b 100644 --- a/ui/clipboard.c +++ b/ui/clipboard.c @@ -1,4 +1,5 @@ #include "qemu/osdep.h" +#include "system/runstate.h" #include "ui/clipboard.h" #include "trace.h" @@ -7,8 +8,62 @@ static NotifierList clipboard_notifiers = static QemuClipboardInfo *cbinfo[QEMU_CLIPBOARD_SELECTION__COUNT]; +static VMChangeStateEntry *cb_change_state_entry = NULL; + +static bool cb_reset_serial_on_resume = false; + +static const VMStateDescription vmstate_cbcontent = { + .name = "clipboard/content", + .version_id = 0, + .minimum_version_id = 0, + .fields = (const VMStateField[]) { + VMSTATE_BOOL(available, QemuClipboardContent), + VMSTATE_BOOL(requested, QemuClipboardContent), + VMSTATE_UINT32(size, QemuClipboardContent), + VMSTATE_VBUFFER_ALLOC_UINT32(data, QemuClipboardContent, 0, 0, size), + VMSTATE_END_OF_LIST() + } +}; + +const VMStateDescription vmstate_cbinfo = { + .name = "clipboard", + .version_id = 0, + .minimum_version_id = 0, + .fields = (const VMStateField[]) { + VMSTATE_INT32(selection, QemuClipboardInfo), + VMSTATE_BOOL(has_serial, QemuClipboardInfo), + VMSTATE_UINT32(serial, QemuClipboardInfo), + VMSTATE_STRUCT_ARRAY(types, QemuClipboardInfo, QEMU_CLIPBOARD_TYPE__COUNT, 0, vmstate_cbcontent, QemuClipboardContent), + VMSTATE_END_OF_LIST() + } +}; + +static void qemu_clipboard_change_state(void *opaque, bool running, RunState state) +{ + int i; + + if (!running) { + return; + } + + if (cb_reset_serial_on_resume) { + qemu_clipboard_reset_serial(); + } + + for (i = 0; i < QEMU_CLIPBOARD_SELECTION__COUNT; i++) { + if (cbinfo[i]) { + qemu_clipboard_update(cbinfo[i]); + } + } + +} + void qemu_clipboard_peer_register(QemuClipboardPeer *peer) { + if (cb_change_state_entry == NULL) { + cb_change_state_entry = qemu_add_vm_change_state_handler(qemu_clipboard_change_state, NULL); + } + notifier_list_add(&clipboard_notifiers, &peer->notifier); } @@ -83,7 +138,9 @@ void qemu_clipboard_update(QemuClipboardInfo *info) } } - notifier_list_notify(&clipboard_notifiers, ¬ify); + if (runstate_is_running() || runstate_check(RUN_STATE_SUSPENDED)) { + notifier_list_notify(&clipboard_notifiers, ¬ify); + } if (cbinfo[info->selection] != info) { qemu_clipboard_info_unref(cbinfo[info->selection]); @@ -163,7 +220,12 @@ void qemu_clipboard_reset_serial(void) info->serial = 0; } } - notifier_list_notify(&clipboard_notifiers, ¬ify); + + if (runstate_is_running() || runstate_check(RUN_STATE_SUSPENDED)) { + notifier_list_notify(&clipboard_notifiers, ¬ify); + } else { + cb_reset_serial_on_resume = true; + } } void qemu_clipboard_set_data(QemuClipboardPeer *peer, diff --git a/ui/egl-helpers.c b/ui/egl-helpers.c index 9cda2bb..5503a79 100644 --- a/ui/egl-helpers.c +++ b/ui/egl-helpers.c @@ -93,14 +93,18 @@ void egl_fb_destroy(egl_fb *fb) fb->width = 0; fb->height = 0; + fb->x = 0; + fb->y = 0; fb->texture = 0; fb->framebuffer = 0; } -void egl_fb_setup_default(egl_fb *fb, int width, int height) +void egl_fb_setup_default(egl_fb *fb, int width, int height, int x, int y) { fb->width = width; fb->height = height; + fb->x = x; + fb->y = y; fb->framebuffer = 0; /* default framebuffer */ } @@ -145,6 +149,7 @@ void egl_fb_blit(egl_fb *dst, egl_fb *src, bool flip) glBindFramebuffer(GL_READ_FRAMEBUFFER, src->framebuffer); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst->framebuffer); glViewport(0, 0, dst->width, dst->height); + glClear(GL_COLOR_BUFFER_BIT); if (src->dmabuf) { x1 = qemu_dmabuf_get_x(src->dmabuf); @@ -161,7 +166,8 @@ void egl_fb_blit(egl_fb *dst, egl_fb *src, bool flip) x2 = x1 + w; glBlitFramebuffer(x1, y1, x2, y2, - 0, 0, dst->width, dst->height, + dst->x, dst->y, + dst->x + dst->width, dst->y + dst->height, GL_COLOR_BUFFER_BIT, GL_LINEAR); } diff --git a/ui/gtk-clipboard.c b/ui/gtk-clipboard.c index 8d8a636..65d89ec 100644 --- a/ui/gtk-clipboard.c +++ b/ui/gtk-clipboard.c @@ -19,6 +19,7 @@ */ #include "qemu/osdep.h" +#include "qemu/error-report.h" #include "qemu/main-loop.h" #include "ui/gtk.h" @@ -95,11 +96,13 @@ static void gd_clipboard_update_info(GtkDisplayState *gd, gtk_clipboard_clear(gd->gtkcb[s]); if (targets) { gd->cbowner[s] = true; - gtk_clipboard_set_with_data(gd->gtkcb[s], - targets, n_targets, - gd_clipboard_get_data, - gd_clipboard_clear, - gd); + if (!gtk_clipboard_set_with_data(gd->gtkcb[s], + targets, n_targets, + gd_clipboard_get_data, + gd_clipboard_clear, + gd)) { + warn_report("Failed to set GTK clipboard"); + } gtk_target_table_free(targets, n_targets); } diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c index f7a428c..0b787be 100644 --- a/ui/gtk-egl.c +++ b/ui/gtk-egl.c @@ -70,16 +70,18 @@ void gd_egl_draw(VirtualConsole *vc) QemuDmaBuf *dmabuf = vc->gfx.guest_fb.dmabuf; int fence_fd; #endif - int ww, wh, ws; + int ww, wh, pw, ph, gs; if (!vc->gfx.gls) { return; } window = gtk_widget_get_window(vc->gfx.drawing_area); - ws = gdk_window_get_scale_factor(window); - ww = gdk_window_get_width(window) * ws; - wh = gdk_window_get_height(window) * ws; + gs = gdk_window_get_scale_factor(window); + ww = gdk_window_get_width(window); + wh = gdk_window_get_height(window); + pw = ww * gs; + ph = wh * gs; if (vc->gfx.scanout_mode) { #ifdef CONFIG_GBM @@ -93,8 +95,9 @@ void gd_egl_draw(VirtualConsole *vc) #endif gd_egl_scanout_flush(&vc->gfx.dcl, 0, 0, vc->gfx.w, vc->gfx.h); - vc->gfx.scale_x = (double)ww / surface_width(vc->gfx.ds); - vc->gfx.scale_y = (double)wh / surface_height(vc->gfx.ds); + gd_update_scale(vc, ww, wh, + surface_width(vc->gfx.ds), + surface_height(vc->gfx.ds)); glFlush(); #ifdef CONFIG_GBM @@ -115,13 +118,14 @@ void gd_egl_draw(VirtualConsole *vc) eglMakeCurrent(qemu_egl_display, vc->gfx.esurface, vc->gfx.esurface, vc->gfx.ectx); - surface_gl_setup_viewport(vc->gfx.gls, vc->gfx.ds, ww, wh); + surface_gl_setup_viewport(vc->gfx.gls, vc->gfx.ds, pw, ph); surface_gl_render_texture(vc->gfx.gls, vc->gfx.ds); eglSwapBuffers(qemu_egl_display, vc->gfx.esurface); - vc->gfx.scale_x = (double)ww / surface_width(vc->gfx.ds); - vc->gfx.scale_y = (double)wh / surface_height(vc->gfx.ds); + gd_update_scale(vc, ww, wh, + surface_width(vc->gfx.ds), + surface_height(vc->gfx.ds)); glFlush(); } @@ -336,7 +340,11 @@ void gd_egl_scanout_flush(DisplayChangeListener *dcl, { VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl); GdkWindow *window; - int ww, wh, ws; + int px_offset, py_offset; + int gs; + int pw_widget, ph_widget, pw_surface, ph_surface; + int ww_widget, wh_widget, ww_surface, wh_surface; + int fbw, fbh; if (!vc->gfx.scanout_mode) { return; @@ -349,10 +357,32 @@ void gd_egl_scanout_flush(DisplayChangeListener *dcl, vc->gfx.esurface, vc->gfx.ectx); window = gtk_widget_get_window(vc->gfx.drawing_area); - ws = gdk_window_get_scale_factor(window); - ww = gdk_window_get_width(window) * ws; - wh = gdk_window_get_height(window) * ws; - egl_fb_setup_default(&vc->gfx.win_fb, ww, wh); + gs = gdk_window_get_scale_factor(window); + ww_widget = gdk_window_get_width(window); + wh_widget = gdk_window_get_height(window); + fbw = surface_width(vc->gfx.ds); + fbh = surface_height(vc->gfx.ds); + + gd_update_scale(vc, ww_widget, wh_widget, fbw, fbh); + + ww_surface = fbw * vc->gfx.scale_x; + wh_surface = fbh * vc->gfx.scale_y; + pw_widget = ww_widget * gs; + ph_widget = wh_widget * gs; + pw_surface = ww_surface * gs; + ph_surface = wh_surface * gs; + + px_offset = 0; + py_offset = 0; + if (pw_widget > pw_surface) { + px_offset = (pw_widget - pw_surface) / 2; + } + if (ph_widget > ph_surface) { + py_offset = (ph_widget - ph_surface) / 2; + } + + egl_fb_setup_default(&vc->gfx.win_fb, pw_surface, ph_surface, + px_offset, py_offset); if (vc->gfx.cursor_fb.texture) { egl_texture_blit(vc->gfx.gls, &vc->gfx.win_fb, &vc->gfx.guest_fb, vc->gfx.y0_top); diff --git a/ui/gtk-gl-area.c b/ui/gtk-gl-area.c index 2c9a0db..8151cc4 100644 --- a/ui/gtk-gl-area.c +++ b/ui/gtk-gl-area.c @@ -42,16 +42,37 @@ void gd_gl_area_draw(VirtualConsole *vc) #ifdef CONFIG_GBM QemuDmaBuf *dmabuf = vc->gfx.guest_fb.dmabuf; #endif - int ww, wh, ws, y1, y2; + int pw, ph, gs, y1, y2; + int ww, wh; + int ww_surface, wh_surface; + int fbw, fbh; + int wx_offset, wy_offset; if (!vc->gfx.gls) { return; } gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area)); - ws = gdk_window_get_scale_factor(gtk_widget_get_window(vc->gfx.drawing_area)); - ww = gtk_widget_get_allocated_width(vc->gfx.drawing_area) * ws; - wh = gtk_widget_get_allocated_height(vc->gfx.drawing_area) * ws; + gs = gdk_window_get_scale_factor(gtk_widget_get_window(vc->gfx.drawing_area)); + fbw = surface_width(vc->gfx.ds); + fbh = surface_height(vc->gfx.ds); + ww = gtk_widget_get_allocated_width(vc->gfx.drawing_area); + wh = gtk_widget_get_allocated_height(vc->gfx.drawing_area); + pw = ww * gs; + ph = wh * gs; + + gd_update_scale(vc, ww, wh, fbw, fbh); + + ww_surface = fbw * vc->gfx.scale_x; + wh_surface = fbh * vc->gfx.scale_y; + + wx_offset = wy_offset = 0; + if (ww > ww_surface) { + wx_offset = (ww - ww_surface) / 2; + } + if (wh > wh_surface) { + wy_offset = (wh - wh_surface) / 2; + } if (vc->gfx.scanout_mode) { if (!vc->gfx.guest_fb.framebuffer) { @@ -71,11 +92,29 @@ void gd_gl_area_draw(VirtualConsole *vc) glBindFramebuffer(GL_READ_FRAMEBUFFER, vc->gfx.guest_fb.framebuffer); /* GtkGLArea sets GL_DRAW_FRAMEBUFFER for us */ - glViewport(0, 0, ww, wh); + if (wx_offset > 0) { + glEnable(GL_SCISSOR_TEST); + glScissor(0, 0, wx_offset * gs, wh * gs); + glClear(GL_COLOR_BUFFER_BIT); + glScissor((ww - wx_offset) * gs, 0, wx_offset * gs, wh * gs); + glClear(GL_COLOR_BUFFER_BIT); + glDisable(GL_SCISSOR_TEST); + } + if (wy_offset > 0) { + glEnable(GL_SCISSOR_TEST); + glScissor(0, 0, ww * gs, wy_offset * gs); + glClear(GL_COLOR_BUFFER_BIT); + glScissor(0, (wh - wy_offset) * gs, ww * gs, wy_offset * gs); + glClear(GL_COLOR_BUFFER_BIT); + glDisable(GL_SCISSOR_TEST); + } + + glViewport(0, 0, pw, ph); y1 = vc->gfx.y0_top ? 0 : vc->gfx.h; y2 = vc->gfx.y0_top ? vc->gfx.h : 0; glBlitFramebuffer(0, y1, vc->gfx.w, y2, - 0, 0, ww, wh, + wx_offset * gs, wy_offset * gs, + (ww - wx_offset) * gs, (wh - wy_offset) * gs, GL_COLOR_BUFFER_BIT, GL_NEAREST); #ifdef CONFIG_GBM if (dmabuf) { @@ -101,7 +140,7 @@ void gd_gl_area_draw(VirtualConsole *vc) } gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area)); - surface_gl_setup_viewport(vc->gfx.gls, vc->gfx.ds, ww, wh); + surface_gl_setup_viewport(vc->gfx.gls, vc->gfx.ds, pw, ph); surface_gl_render_texture(vc->gfx.gls, vc->gfx.ds); } } @@ -387,16 +387,16 @@ static void *gd_win32_get_hwnd(VirtualConsole *vc) /** DisplayState Callbacks **/ static void gd_update(DisplayChangeListener *dcl, - int x, int y, int w, int h) + int fbx, int fby, int fbw, int fbh) { VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl); GdkWindow *win; - int x1, x2, y1, y2; - int mx, my; - int fbw, fbh; - int ww, wh; + int wx1, wx2, wy1, wy2; + int wx_offset, wy_offset; + int ww_surface, wh_surface; + int ww_widget, wh_widget; - trace_gd_update(vc->label, x, y, w, h); + trace_gd_update(vc->label, fbx, fby, fbw, fbh); if (!gtk_widget_get_realized(vc->gfx.drawing_area)) { return; @@ -405,35 +405,36 @@ static void gd_update(DisplayChangeListener *dcl, if (vc->gfx.convert) { pixman_image_composite(PIXMAN_OP_SRC, vc->gfx.ds->image, NULL, vc->gfx.convert, - x, y, 0, 0, x, y, w, h); + fbx, fby, 0, 0, fbx, fby, fbw, fbh); } - x1 = floor(x * vc->gfx.scale_x); - y1 = floor(y * vc->gfx.scale_y); + wx1 = floor(fbx * vc->gfx.scale_x); + wy1 = floor(fby * vc->gfx.scale_y); - x2 = ceil(x * vc->gfx.scale_x + w * vc->gfx.scale_x); - y2 = ceil(y * vc->gfx.scale_y + h * vc->gfx.scale_y); + wx2 = ceil(fbx * vc->gfx.scale_x + fbw * vc->gfx.scale_x); + wy2 = ceil(fby * vc->gfx.scale_y + fbh * vc->gfx.scale_y); - fbw = surface_width(vc->gfx.ds) * vc->gfx.scale_x; - fbh = surface_height(vc->gfx.ds) * vc->gfx.scale_y; + ww_surface = surface_width(vc->gfx.ds) * vc->gfx.scale_x; + wh_surface = surface_height(vc->gfx.ds) * vc->gfx.scale_y; win = gtk_widget_get_window(vc->gfx.drawing_area); if (!win) { return; } - ww = gdk_window_get_width(win); - wh = gdk_window_get_height(win); + ww_widget = gdk_window_get_width(win); + wh_widget = gdk_window_get_height(win); - mx = my = 0; - if (ww > fbw) { - mx = (ww - fbw) / 2; + wx_offset = wy_offset = 0; + if (ww_widget > ww_surface) { + wx_offset = (ww_widget - ww_surface) / 2; } - if (wh > fbh) { - my = (wh - fbh) / 2; + if (wh_widget > wh_surface) { + wy_offset = (wh_widget - wh_surface) / 2; } gtk_widget_queue_draw_area(vc->gfx.drawing_area, - mx + x1, my + y1, (x2 - x1), (y2 - y1)); + wx_offset + wx1, wy_offset + wy1, + (wx2 - wx1), (wy2 - wy1)); } static void gd_refresh(DisplayChangeListener *dcl) @@ -771,8 +772,21 @@ static void gd_resize_event(GtkGLArea *area, gint width, gint height, gpointer *opaque) { VirtualConsole *vc = (void *)opaque; + double pw = width, ph = height; + double sx = vc->gfx.scale_x, sy = vc->gfx.scale_y; + GdkWindow *window = gtk_widget_get_window(GTK_WIDGET(area)); + const int gs = gdk_window_get_scale_factor(window); - gd_set_ui_size(vc, width, height); + if (!vc->s->free_scale && !vc->s->full_screen) { + pw /= sx; + ph /= sy; + } + + /** + * width and height here are in pixel coordinate, so we must divide it + * by global window scale (gs) + */ + gd_set_ui_size(vc, pw / gs, ph / gs); } #endif @@ -800,12 +814,95 @@ void gd_update_monitor_refresh_rate(VirtualConsole *vc, GtkWidget *widget) #endif } +void gd_update_scale(VirtualConsole *vc, int ww, int wh, int fbw, int fbh) +{ + if (!vc) { + return; + } + + if (vc->s->full_screen) { + vc->gfx.scale_x = (double)ww / fbw; + vc->gfx.scale_y = (double)wh / fbh; + } else if (vc->s->free_scale) { + double sx, sy; + + sx = (double)ww / fbw; + sy = (double)wh / fbh; + + vc->gfx.scale_x = vc->gfx.scale_y = MIN(sx, sy); + } +} +/** + * DOC: Coordinate handling. + * + * We are coping with sizes and positions in various coordinates and the + * handling of these coordinates is somewhat confusing. It would benefit us + * all if we define these coordinates explicitly and clearly. Besides, it's + * also helpful to follow the same naming convention for variables + * representing values in different coordinates. + * + * I. Definitions + * + * - (guest) buffer coordinate: this is the coordinates that the guest will + * see. The x/y offsets and width/height specified in commands sent by + * guest is basically in buffer coordinate. + * + * - (host) pixel coordinate: this is the coordinate in pixel level on the + * host destop. A window/widget of width 300 in pixel coordinate means it + * occupies 300 pixels horizontally. + * + * - (host) logical window coordinate: the existence of global scaling + * factor in desktop level makes this kind of coordinate play a role. It + * always holds that (logical window size) * (global scale factor) = + * (pixel size). + * + * - global scale factor: this is specified in desktop level and is + * typically invariant during the life cycle of the process. Users with + * high-DPI monitors might set this scale, for example, to 2, in order to + * make the UI look larger. + * + * - zooming scale: this can be freely controlled by the QEMU user to zoom + * in/out the guest content. + * + * II. Representation + * + * We'd like to use consistent representation for variables in different + * coordinates: + * - buffer coordinate: prefix fb + * - pixel coordinate: prefix p + * - logical window coordinate: prefix w + * + * For scales: + * - global scale factor: prefix gs + * - zooming scale: prefix scale/s + * + * Example: fbw, pw, ww for width in different coordinates + * + * III. Equation + * + * - fbw * gs * scale_x = pw + * - pw = gs * ww + * + * Consequently we have + * + * - fbw * scale_x = ww + * + * Example: assuming we are running QEMU on a 3840x2160 screen and have set + * global scaling factor to 2, if the guest buffer size is 1920x1080 and the + * zooming scale is 0.5, then we have: + * - fbw = 1920, fbh = 1080 + * - pw = 1920, ph = 1080 + * - ww = 960, wh = 540 + * A bonus of this configuration is that we can achieve pixel to pixel + * presentation of the guest content. + */ + static gboolean gd_draw_event(GtkWidget *widget, cairo_t *cr, void *opaque) { VirtualConsole *vc = opaque; GtkDisplayState *s = vc->s; - int mx, my; - int ww, wh; + int wx_offset, wy_offset; + int ww_widget, wh_widget, ww_surface, wh_surface; int fbw, fbh; #if defined(CONFIG_OPENGL) @@ -839,46 +936,37 @@ static gboolean gd_draw_event(GtkWidget *widget, cairo_t *cr, void *opaque) fbw = surface_width(vc->gfx.ds); fbh = surface_height(vc->gfx.ds); - ww = gdk_window_get_width(gtk_widget_get_window(widget)); - wh = gdk_window_get_height(gtk_widget_get_window(widget)); + ww_widget = gdk_window_get_width(gtk_widget_get_window(widget)); + wh_widget = gdk_window_get_height(gtk_widget_get_window(widget)); - if (s->full_screen) { - vc->gfx.scale_x = (double)ww / fbw; - vc->gfx.scale_y = (double)wh / fbh; - } else if (s->free_scale) { - double sx, sy; + gd_update_scale(vc, ww_widget, wh_widget, fbw, fbh); - sx = (double)ww / fbw; - sy = (double)wh / fbh; + ww_surface = fbw * vc->gfx.scale_x; + wh_surface = fbh * vc->gfx.scale_y; - vc->gfx.scale_x = vc->gfx.scale_y = MIN(sx, sy); + wx_offset = wy_offset = 0; + if (ww_widget > ww_surface) { + wx_offset = (ww_widget - ww_surface) / 2; } - - fbw *= vc->gfx.scale_x; - fbh *= vc->gfx.scale_y; - - mx = my = 0; - if (ww > fbw) { - mx = (ww - fbw) / 2; - } - if (wh > fbh) { - my = (wh - fbh) / 2; + if (wh_widget > wh_surface) { + wy_offset = (wh_widget - wh_surface) / 2; } - cairo_rectangle(cr, 0, 0, ww, wh); + cairo_rectangle(cr, 0, 0, ww_widget, wh_widget); /* Optionally cut out the inner area where the pixmap will be drawn. This avoids 'flashing' since we're not double-buffering. Note we're using the undocumented behaviour of drawing the rectangle from right to left to cut out the whole */ - cairo_rectangle(cr, mx + fbw, my, - -1 * fbw, fbh); + cairo_rectangle(cr, wx_offset + ww_surface, wy_offset, + -1 * ww_surface, wh_surface); cairo_fill(cr); cairo_scale(cr, vc->gfx.scale_x, vc->gfx.scale_y); cairo_set_source_surface(cr, vc->gfx.surface, - mx / vc->gfx.scale_x, my / vc->gfx.scale_y); + wx_offset / vc->gfx.scale_x, + wy_offset / vc->gfx.scale_y); cairo_paint(cr); return TRUE; @@ -889,19 +977,19 @@ static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion, { VirtualConsole *vc = opaque; GtkDisplayState *s = vc->s; - int x, y; - int mx, my; - int fbh, fbw; - int ww, wh; + int fbx, fby; + int wx_offset, wy_offset; + int wh_surface, ww_surface; + int ww_widget, wh_widget; if (!vc->gfx.ds) { return TRUE; } - fbw = surface_width(vc->gfx.ds) * vc->gfx.scale_x; - fbh = surface_height(vc->gfx.ds) * vc->gfx.scale_y; - ww = gtk_widget_get_allocated_width(widget); - wh = gtk_widget_get_allocated_height(widget); + ww_surface = surface_width(vc->gfx.ds) * vc->gfx.scale_x; + wh_surface = surface_height(vc->gfx.ds) * vc->gfx.scale_y; + ww_widget = gtk_widget_get_allocated_width(widget); + wh_widget = gtk_widget_get_allocated_height(widget); /* * `widget` may not have the same size with the frame buffer. @@ -909,41 +997,42 @@ static gboolean gd_motion_event(GtkWidget *widget, GdkEventMotion *motion, * To achieve that, `vc` will be displayed at (mx, my) * so that it is displayed at the center of the widget. */ - mx = my = 0; - if (ww > fbw) { - mx = (ww - fbw) / 2; + wx_offset = wy_offset = 0; + if (ww_widget > ww_surface) { + wx_offset = (ww_widget - ww_surface) / 2; } - if (wh > fbh) { - my = (wh - fbh) / 2; + if (wh_widget > wh_surface) { + wy_offset = (wh_widget - wh_surface) / 2; } /* * `motion` is reported in `widget` coordinates * so translating it to the coordinates in `vc`. */ - x = (motion->x - mx) / vc->gfx.scale_x; - y = (motion->y - my) / vc->gfx.scale_y; + fbx = (motion->x - wx_offset) / vc->gfx.scale_x; + fby = (motion->y - wy_offset) / vc->gfx.scale_y; - trace_gd_motion_event(ww, wh, gtk_widget_get_scale_factor(widget), x, y); + trace_gd_motion_event(ww_widget, wh_widget, + gtk_widget_get_scale_factor(widget), fbx, fby); if (qemu_input_is_absolute(vc->gfx.dcl.con)) { - if (x < 0 || y < 0 || - x >= surface_width(vc->gfx.ds) || - y >= surface_height(vc->gfx.ds)) { + if (fbx < 0 || fby < 0 || + fbx >= surface_width(vc->gfx.ds) || + fby >= surface_height(vc->gfx.ds)) { return TRUE; } - qemu_input_queue_abs(vc->gfx.dcl.con, INPUT_AXIS_X, x, + qemu_input_queue_abs(vc->gfx.dcl.con, INPUT_AXIS_X, fbx, 0, surface_width(vc->gfx.ds)); - qemu_input_queue_abs(vc->gfx.dcl.con, INPUT_AXIS_Y, y, + qemu_input_queue_abs(vc->gfx.dcl.con, INPUT_AXIS_Y, fby, 0, surface_height(vc->gfx.ds)); qemu_input_event_sync(); } else if (s->last_set && s->ptr_owner == vc) { - qemu_input_queue_rel(vc->gfx.dcl.con, INPUT_AXIS_X, x - s->last_x); - qemu_input_queue_rel(vc->gfx.dcl.con, INPUT_AXIS_Y, y - s->last_y); + qemu_input_queue_rel(vc->gfx.dcl.con, INPUT_AXIS_X, fbx - s->last_x); + qemu_input_queue_rel(vc->gfx.dcl.con, INPUT_AXIS_Y, fby - s->last_y); qemu_input_event_sync(); } - s->last_x = x; - s->last_y = y; + s->last_x = fbx; + s->last_y = fby; s->last_set = TRUE; if (!qemu_input_is_absolute(vc->gfx.dcl.con) && s->ptr_owner == vc) { @@ -1760,8 +1849,16 @@ static gboolean gd_configure(GtkWidget *widget, GdkEventConfigure *cfg, gpointer opaque) { VirtualConsole *vc = opaque; + const double sx = vc->gfx.scale_x, sy = vc->gfx.scale_y; + double width = cfg->width, height = cfg->height; + + if (!vc->s->free_scale && !vc->s->full_screen) { + width /= sx; + height /= sy; + } + + gd_set_ui_size(vc, width, height); - gd_set_ui_size(vc, cfg->width, cfg->height); return FALSE; } diff --git a/ui/sdl2-gl.c b/ui/sdl2-gl.c index e01d9ab..3be17d1 100644 --- a/ui/sdl2-gl.c +++ b/ui/sdl2-gl.c @@ -241,7 +241,7 @@ void sdl2_gl_scanout_flush(DisplayChangeListener *dcl, SDL_GL_MakeCurrent(scon->real_window, scon->winctx); SDL_GetWindowSize(scon->real_window, &ww, &wh); - egl_fb_setup_default(&scon->win_fb, ww, wh); + egl_fb_setup_default(&scon->win_fb, ww, wh, 0, 0); egl_fb_blit(&scon->win_fb, &scon->guest_fb, !scon->y0_top); SDL_GL_SwapWindow(scon->real_window); @@ -488,14 +488,14 @@ static void handle_mousemotion(SDL_Event *ev) { int max_x, max_y; struct sdl2_console *scon = get_scon_from_window(ev->motion.windowID); + int scr_w, scr_h, surf_w, surf_h, x, y, dx, dy; if (!scon || !qemu_console_is_graphic(scon->dcl.con)) { return; } + SDL_GetWindowSize(scon->real_window, &scr_w, &scr_h); if (qemu_input_is_absolute(scon->dcl.con) || absolute_enabled) { - int scr_w, scr_h; - SDL_GetWindowSize(scon->real_window, &scr_w, &scr_h); max_x = scr_w - 1; max_y = scr_h - 1; if (gui_grab && !gui_fullscreen @@ -509,9 +509,14 @@ static void handle_mousemotion(SDL_Event *ev) sdl_grab_start(scon); } } + surf_w = surface_width(scon->surface); + surf_h = surface_height(scon->surface); + x = (int64_t)ev->motion.x * surf_w / scr_w; + y = (int64_t)ev->motion.y * surf_h / scr_h; + dx = (int64_t)ev->motion.xrel * surf_w / scr_w; + dy = (int64_t)ev->motion.yrel * surf_h / scr_h; if (gui_grab || qemu_input_is_absolute(scon->dcl.con) || absolute_enabled) { - sdl_send_mouse_event(scon, ev->motion.xrel, ev->motion.yrel, - ev->motion.x, ev->motion.y, ev->motion.state); + sdl_send_mouse_event(scon, dx, dy, x, y, ev->motion.state); } } @@ -520,12 +525,17 @@ static void handle_mousebutton(SDL_Event *ev) int buttonstate = SDL_GetMouseState(NULL, NULL); SDL_MouseButtonEvent *bev; struct sdl2_console *scon = get_scon_from_window(ev->button.windowID); + int scr_w, scr_h, x, y; if (!scon || !qemu_console_is_graphic(scon->dcl.con)) { return; } bev = &ev->button; + SDL_GetWindowSize(scon->real_window, &scr_w, &scr_h); + x = (int64_t)bev->x * surface_width(scon->surface) / scr_w; + y = (int64_t)bev->y * surface_height(scon->surface) / scr_h; + if (!gui_grab && !qemu_input_is_absolute(scon->dcl.con)) { if (ev->type == SDL_MOUSEBUTTONUP && bev->button == SDL_BUTTON_LEFT) { /* start grabbing all events */ @@ -537,7 +547,7 @@ static void handle_mousebutton(SDL_Event *ev) } else { buttonstate &= ~SDL_BUTTON(bev->button); } - sdl_send_mouse_event(scon, 0, 0, bev->x, bev->y, buttonstate); + sdl_send_mouse_event(scon, 0, 0, x, y, buttonstate); } } diff --git a/ui/vdagent.c b/ui/vdagent.c index 04513de..c0746fe 100644 --- a/ui/vdagent.c +++ b/ui/vdagent.c @@ -6,10 +6,10 @@ #include "qemu/option.h" #include "qemu/units.h" #include "hw/qdev-core.h" -#include "migration/blocker.h" #include "ui/clipboard.h" #include "ui/console.h" #include "ui/input.h" +#include "migration/vmstate.h" #include "trace.h" #include "qapi/qapi-types-char.h" @@ -32,14 +32,12 @@ struct VDAgentChardev { Chardev parent; - /* TODO: migration isn't yet supported */ - Error *migration_blocker; - /* config */ bool mouse; bool clipboard; /* guest vdagent */ + bool connected; uint32_t caps; VDIChunkHeader chunk; uint32_t chunksize; @@ -47,7 +45,7 @@ struct VDAgentChardev { uint32_t msgsize; uint8_t *xbuf; uint32_t xoff, xsize; - Buffer outbuf; + GByteArray *outbuf; /* mouse */ DeviceState mouse_dev; @@ -142,16 +140,16 @@ static void vdagent_send_buf(VDAgentChardev *vd) { uint32_t len; - while (!buffer_empty(&vd->outbuf)) { + while (vd->outbuf->len) { len = qemu_chr_be_can_write(CHARDEV(vd)); if (len == 0) { return; } - if (len > vd->outbuf.offset) { - len = vd->outbuf.offset; + if (len > vd->outbuf->len) { + len = vd->outbuf->len; } - qemu_chr_be_write(CHARDEV(vd), vd->outbuf.buffer, len); - buffer_advance(&vd->outbuf, len); + qemu_chr_be_write(CHARDEV(vd), vd->outbuf->data, len); + g_byte_array_remove_range(vd->outbuf, 0, len); } } @@ -166,7 +164,7 @@ static void vdagent_send_msg(VDAgentChardev *vd, VDAgentMessage *msg) msg->protocol = VD_AGENT_PROTOCOL; - if (vd->outbuf.offset + msgsize > VDAGENT_BUFFER_LIMIT) { + if (vd->outbuf->len + msgsize > VDAGENT_BUFFER_LIMIT) { error_report("buffer full, dropping message"); return; } @@ -177,9 +175,8 @@ static void vdagent_send_msg(VDAgentChardev *vd, VDAgentMessage *msg) if (chunk.size > 1024) { chunk.size = 1024; } - buffer_reserve(&vd->outbuf, sizeof(chunk) + chunk.size); - buffer_append(&vd->outbuf, &chunk, sizeof(chunk)); - buffer_append(&vd->outbuf, msgbuf + msgoff, chunk.size); + g_byte_array_append(vd->outbuf, (void *)&chunk, sizeof(chunk)); + g_byte_array_append(vd->outbuf, msgbuf + msgoff, chunk.size); msgoff += chunk.size; } vdagent_send_buf(vd); @@ -672,10 +669,6 @@ static void vdagent_chr_open(Chardev *chr, return; #endif - if (migrate_add_blocker(&vd->migration_blocker, errp) != 0) { - return; - } - vd->mouse = VDAGENT_MOUSE_DEFAULT; if (cfg->has_mouse) { vd->mouse = cfg->mouse; @@ -694,6 +687,18 @@ static void vdagent_chr_open(Chardev *chr, *be_opened = true; } +static void vdagent_clipboard_peer_register(VDAgentChardev *vd) +{ + if (vd->cbpeer.notifier.notify != NULL) { + return; + } + + vd->cbpeer.name = "vdagent"; + vd->cbpeer.notifier.notify = vdagent_clipboard_notify; + vd->cbpeer.request = vdagent_clipboard_request; + qemu_clipboard_peer_register(&vd->cbpeer); +} + static void vdagent_chr_recv_caps(VDAgentChardev *vd, VDAgentMessage *msg) { VDAgentAnnounceCapabilities *caps = (void *)msg->data; @@ -720,13 +725,9 @@ static void vdagent_chr_recv_caps(VDAgentChardev *vd, VDAgentMessage *msg) memset(vd->last_serial, 0, sizeof(vd->last_serial)); - if (have_clipboard(vd) && vd->cbpeer.notifier.notify == NULL) { + if (have_clipboard(vd)) { qemu_clipboard_reset_serial(); - - vd->cbpeer.name = "vdagent"; - vd->cbpeer.notifier.notify = vdagent_clipboard_notify; - vd->cbpeer.request = vdagent_clipboard_request; - qemu_clipboard_peer_register(&vd->cbpeer); + vdagent_clipboard_peer_register(vd); } } @@ -859,7 +860,8 @@ static void vdagent_disconnect(VDAgentChardev *vd) { trace_vdagent_disconnect(); - buffer_reset(&vd->outbuf); + vd->connected = false; + g_byte_array_set_size(vd->outbuf, 0); vdagent_reset_bufs(vd); vd->caps = 0; if (vd->mouse_hs) { @@ -877,6 +879,10 @@ static void vdagent_chr_set_fe_open(struct Chardev *chr, int fe_open) trace_vdagent_fe_open(fe_open); + if (vd->connected == fe_open) { + return; + } + if (!fe_open) { trace_vdagent_close(); vdagent_disconnect(vd); @@ -886,6 +892,7 @@ static void vdagent_chr_set_fe_open(struct Chardev *chr, int fe_open) return; } + vd->connected = true; vdagent_send_caps(vd, true); } @@ -916,25 +923,163 @@ static void vdagent_chr_class_init(ObjectClass *oc, const void *data) cc->chr_accept_input = vdagent_chr_accept_input; } +static int post_load(void *opaque, int version_id) +{ + VDAgentChardev *vd = QEMU_VDAGENT_CHARDEV(opaque); + + if (have_mouse(vd) && vd->mouse_hs) { + qemu_input_handler_activate(vd->mouse_hs); + } + + if (have_clipboard(vd)) { + vdagent_clipboard_peer_register(vd); + } + + return 0; +} + +static const VMStateDescription vmstate_chunk = { + .name = "vdagent/chunk", + .version_id = 0, + .minimum_version_id = 0, + .fields = (const VMStateField[]) { + VMSTATE_UINT32(port, VDIChunkHeader), + VMSTATE_UINT32(size, VDIChunkHeader), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_vdba = { + .name = "vdagent/bytearray", + .version_id = 0, + .minimum_version_id = 0, + .fields = (const VMStateField[]) { + VMSTATE_UINT32(len, GByteArray), + VMSTATE_VBUFFER_ALLOC_UINT32(data, GByteArray, 0, 0, len), + VMSTATE_END_OF_LIST() + } +}; + +struct CBInfoArray { + uint32_t n; + QemuClipboardInfo cbinfo[QEMU_CLIPBOARD_SELECTION__COUNT]; +}; + +static const VMStateDescription vmstate_cbinfo_array = { + .name = "cbinfoarray", + .fields = (const VMStateField[]) { + VMSTATE_UINT32(n, struct CBInfoArray), + VMSTATE_STRUCT_VARRAY_UINT32(cbinfo, struct CBInfoArray, n, + 0, vmstate_cbinfo, QemuClipboardInfo), + VMSTATE_END_OF_LIST() + } +}; + +static int put_cbinfo(QEMUFile *f, void *pv, size_t size, + const VMStateField *field, JSONWriter *vmdesc) +{ + VDAgentChardev *vd = QEMU_VDAGENT_CHARDEV(pv); + struct CBInfoArray cbinfo = {}; + int i; + + if (!have_clipboard(vd)) { + return 0; + } + + for (i = 0; i < QEMU_CLIPBOARD_SELECTION__COUNT; i++) { + if (qemu_clipboard_peer_owns(&vd->cbpeer, i)) { + cbinfo.cbinfo[cbinfo.n++] = *qemu_clipboard_info(i); + } + } + + return vmstate_save_state(f, &vmstate_cbinfo_array, &cbinfo, vmdesc); +} + +static int get_cbinfo(QEMUFile *f, void *pv, size_t size, + const VMStateField *field) +{ + VDAgentChardev *vd = QEMU_VDAGENT_CHARDEV(pv); + struct CBInfoArray cbinfo = {}; + int i, ret; + + if (!have_clipboard(vd)) { + return 0; + } + + vdagent_clipboard_peer_register(vd); + + ret = vmstate_load_state(f, &vmstate_cbinfo_array, &cbinfo, 0); + if (ret) { + return ret; + } + + for (i = 0; i < cbinfo.n; i++) { + g_autoptr(QemuClipboardInfo) info = + qemu_clipboard_info_new(&vd->cbpeer, cbinfo.cbinfo[i].selection); + /* this will steal clipboard data pointer from cbinfo.types */ + memcpy(info->types, cbinfo.cbinfo[i].types, sizeof(cbinfo.cbinfo[i].types)); + qemu_clipboard_update(info); + } + + return 0; +} + +static const VMStateInfo vmstate_cbinfos = { + .name = "vdagent/cbinfos", + .get = get_cbinfo, + .put = put_cbinfo, +}; + +static const VMStateDescription vmstate_vdagent = { + .name = "vdagent", + .version_id = 0, + .minimum_version_id = 0, + .post_load = post_load, + .fields = (const VMStateField[]) { + VMSTATE_BOOL(connected, VDAgentChardev), + VMSTATE_UINT32(caps, VDAgentChardev), + VMSTATE_STRUCT(chunk, VDAgentChardev, 0, vmstate_chunk, VDIChunkHeader), + VMSTATE_UINT32(chunksize, VDAgentChardev), + VMSTATE_UINT32(msgsize, VDAgentChardev), + VMSTATE_VBUFFER_ALLOC_UINT32(msgbuf, VDAgentChardev, 0, 0, msgsize), + VMSTATE_UINT32(xsize, VDAgentChardev), + VMSTATE_UINT32(xoff, VDAgentChardev), + VMSTATE_VBUFFER_ALLOC_UINT32(xbuf, VDAgentChardev, 0, 0, xsize), + VMSTATE_STRUCT_POINTER(outbuf, VDAgentChardev, vmstate_vdba, GByteArray), + VMSTATE_UINT32(mouse_x, VDAgentChardev), + VMSTATE_UINT32(mouse_y, VDAgentChardev), + VMSTATE_UINT32(mouse_btn, VDAgentChardev), + VMSTATE_UINT32(mouse_display, VDAgentChardev), + VMSTATE_UINT32_ARRAY(last_serial, VDAgentChardev, + QEMU_CLIPBOARD_SELECTION__COUNT), + VMSTATE_UINT32_ARRAY(cbpending, VDAgentChardev, + QEMU_CLIPBOARD_SELECTION__COUNT), + { + .name = "cbinfos", + .info = &vmstate_cbinfos, + .flags = VMS_SINGLE, + }, + VMSTATE_END_OF_LIST() + } +}; + static void vdagent_chr_init(Object *obj) { VDAgentChardev *vd = QEMU_VDAGENT_CHARDEV(obj); - buffer_init(&vd->outbuf, "vdagent-outbuf"); - error_setg(&vd->migration_blocker, - "The vdagent chardev doesn't yet support migration"); + vd->outbuf = g_byte_array_new(); + vmstate_register_any(NULL, &vmstate_vdagent, vd); } static void vdagent_chr_fini(Object *obj) { VDAgentChardev *vd = QEMU_VDAGENT_CHARDEV(obj); - migrate_del_blocker(&vd->migration_blocker); vdagent_disconnect(vd); if (vd->mouse_hs) { qemu_input_handler_unregister(vd->mouse_hs); } - buffer_free(&vd->outbuf); + g_clear_pointer(&vd->outbuf, g_byte_array_unref); } static const TypeInfo vdagent_chr_type_info = { diff --git a/util/error.c b/util/error.c index 673011b..daea214 100644 --- a/util/error.c +++ b/util/error.c @@ -15,15 +15,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/error-report.h" - -struct Error -{ - char *msg; - ErrorClass err_class; - const char *src, *func; - int line; - GString *hint; -}; +#include "qapi/error-internal.h" Error *error_abort; Error *error_fatal; @@ -32,8 +24,13 @@ Error *error_warn; static void error_handle(Error **errp, Error *err) { if (errp == &error_abort) { - fprintf(stderr, "Unexpected error in %s() at %s:%d:\n", - err->func, err->src, err->line); + if (err->func) { + fprintf(stderr, "Unexpected error in %s() at %.*s:%d:\n", + err->func, err->src_len, err->src, err->line); + } else { + fprintf(stderr, "Unexpected error at %.*s:%d:\n", + err->src_len, err->src, err->line); + } error_report("%s", error_get_pretty(err)); if (err->hint) { error_printf("%s", err->hint->str); @@ -75,6 +72,7 @@ static void error_setv(Error **errp, g_free(msg); } err->err_class = err_class; + err->src_len = -1; err->src = src; err->line = line; err->func = func; diff --git a/util/event.c b/util/event.c new file mode 100644 index 0000000..5a8141c --- /dev/null +++ b/util/event.c @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "qemu/osdep.h" +#include "qemu/thread.h" + +/* + * Valid transitions: + * - FREE -> SET (qemu_event_set) + * - BUSY -> SET (qemu_event_set) + * - SET -> FREE (qemu_event_reset) + * - FREE -> BUSY (qemu_event_wait) + * + * With futex, the waking and blocking operations follow + * BUSY -> SET and FREE -> BUSY, respectively. + * + * Without futex, BUSY -> SET and FREE -> BUSY never happen. Instead, the waking + * operation follows FREE -> SET and the blocking operation will happen in + * qemu_event_wait() if the event is not SET. + * + * SET->BUSY does not happen (it can be observed from the outside but + * it really is SET->FREE->BUSY). + * + * busy->free provably cannot happen; to enforce it, the set->free transition + * is done with an OR, which becomes a no-op if the event has concurrently + * transitioned to free or busy. + */ + +#define EV_SET 0 +#define EV_FREE 1 +#define EV_BUSY -1 + +void qemu_event_init(QemuEvent *ev, bool init) +{ +#ifndef HAVE_FUTEX + pthread_mutex_init(&ev->lock, NULL); + pthread_cond_init(&ev->cond, NULL); +#endif + + ev->value = (init ? EV_SET : EV_FREE); + ev->initialized = true; +} + +void qemu_event_destroy(QemuEvent *ev) +{ + assert(ev->initialized); + ev->initialized = false; +#ifndef HAVE_FUTEX + pthread_mutex_destroy(&ev->lock); + pthread_cond_destroy(&ev->cond); +#endif +} + +void qemu_event_set(QemuEvent *ev) +{ + assert(ev->initialized); + +#ifdef HAVE_FUTEX + /* + * Pairs with both qemu_event_reset() and qemu_event_wait(). + * + * qemu_event_set has release semantics, but because it *loads* + * ev->value we need a full memory barrier here. + */ + smp_mb(); + if (qatomic_read(&ev->value) != EV_SET) { + int old = qatomic_xchg(&ev->value, EV_SET); + + /* Pairs with memory barrier in kernel futex_wait system call. */ + smp_mb__after_rmw(); + if (old == EV_BUSY) { + /* There were waiters, wake them up. */ + qemu_futex_wake_all(ev); + } + } +#else + pthread_mutex_lock(&ev->lock); + /* Pairs with qemu_event_reset()'s load acquire. */ + qatomic_store_release(&ev->value, EV_SET); + pthread_cond_broadcast(&ev->cond); + pthread_mutex_unlock(&ev->lock); +#endif +} + +void qemu_event_reset(QemuEvent *ev) +{ + assert(ev->initialized); + +#ifdef HAVE_FUTEX + /* + * If there was a concurrent reset (or even reset+wait), + * do nothing. Otherwise change EV_SET->EV_FREE. + */ + qatomic_or(&ev->value, EV_FREE); + + /* + * Order reset before checking the condition in the caller. + * Pairs with the first memory barrier in qemu_event_set(). + */ + smp_mb__after_rmw(); +#else + /* + * If futexes are not available, there are no EV_FREE->EV_BUSY + * transitions because wakeups are done entirely through the + * condition variable. Since qatomic_set() only writes EV_FREE, + * the load seems useless but in reality, the acquire synchronizes + * with qemu_event_set()'s store release: if qemu_event_reset() + * sees EV_SET here, then the caller will certainly see a + * successful condition and skip qemu_event_wait(): + * + * done = 1; if (done == 0) + * qemu_event_set() { qemu_event_reset() { + * lock(); + * ev->value = EV_SET -----> load ev->value + * ev->value = old value | EV_FREE + * cond_broadcast() + * unlock(); } + * } if (done == 0) + * // qemu_event_wait() not called + */ + qatomic_set(&ev->value, qatomic_load_acquire(&ev->value) | EV_FREE); +#endif +} + +void qemu_event_wait(QemuEvent *ev) +{ + assert(ev->initialized); + +#ifdef HAVE_FUTEX + while (true) { + /* + * qemu_event_wait must synchronize with qemu_event_set even if it does + * not go down the slow path, so this load-acquire is needed that + * synchronizes with the first memory barrier in qemu_event_set(). + */ + unsigned value = qatomic_load_acquire(&ev->value); + if (value == EV_SET) { + break; + } + + if (value == EV_FREE) { + /* + * Leave the event reset and tell qemu_event_set that there are + * waiters. No need to retry, because there cannot be a concurrent + * busy->free transition. After the CAS, the event will be either + * set or busy. + * + * This cmpxchg doesn't have particular ordering requirements if it + * succeeds (moving the store earlier can only cause + * qemu_event_set() to issue _more_ wakeups), the failing case needs + * acquire semantics like the load above. + */ + if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { + break; + } + } + + /* + * This is the final check for a concurrent set, so it does need + * a smp_mb() pairing with the second barrier of qemu_event_set(). + * The barrier is inside the FUTEX_WAIT system call. + */ + qemu_futex_wait(ev, EV_BUSY); + } +#else + pthread_mutex_lock(&ev->lock); + while (qatomic_read(&ev->value) != EV_SET) { + pthread_cond_wait(&ev->cond, &ev->lock); + } + pthread_mutex_unlock(&ev->lock); +#endif +} diff --git a/util/lockcnt.c b/util/lockcnt.c index d07c6cc..92c9f8c 100644 --- a/util/lockcnt.c +++ b/util/lockcnt.c @@ -12,10 +12,11 @@ #include "qemu/atomic.h" #include "trace.h" -#ifdef CONFIG_LINUX -#include "qemu/futex.h" +#ifdef HAVE_FUTEX -/* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter. +/* + * When futex is available, bits 0-1 are a futex-based lock, bits 2-31 are the + * counter. * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok, * this is not the most relaxing citation I could make...). It is similar * to mutex2 in the paper. @@ -106,7 +107,7 @@ static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val, static void lockcnt_wake(QemuLockCnt *lockcnt) { trace_lockcnt_futex_wake(lockcnt); - qemu_futex_wake(&lockcnt->count, 1); + qemu_futex_wake_single(&lockcnt->count); } void qemu_lockcnt_inc(QemuLockCnt *lockcnt) diff --git a/util/meson.build b/util/meson.build index 1adff96..3502938 100644 --- a/util/meson.build +++ b/util/meson.build @@ -27,7 +27,7 @@ else util_ss.add(files('event_notifier-win32.c')) util_ss.add(files('oslib-win32.c')) util_ss.add(files('qemu-thread-win32.c')) - util_ss.add(winmm, pathcch) + util_ss.add(winmm, pathcch, synchronization) endif util_ss.add(when: linux_io_uring, if_true: files('fdmon-io_uring.c')) if glib_has_gslice @@ -35,6 +35,7 @@ if glib_has_gslice endif util_ss.add(files('defer-call.c')) util_ss.add(files('envlist.c', 'path.c', 'module.c')) +util_ss.add(files('event.c')) util_ss.add(files('host-utils.c')) util_ss.add(files('bitmap.c', 'bitops.c')) util_ss.add(files('fifo8.c')) diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c index b2e26e2..ba72544 100644 --- a/util/qemu-thread-posix.c +++ b/util/qemu-thread-posix.c @@ -317,154 +317,6 @@ void qemu_sem_wait(QemuSemaphore *sem) qemu_mutex_unlock(&sem->mutex); } -#ifdef __linux__ -#include "qemu/futex.h" -#else -static inline void qemu_futex_wake(QemuEvent *ev, int n) -{ - assert(ev->initialized); - pthread_mutex_lock(&ev->lock); - if (n == 1) { - pthread_cond_signal(&ev->cond); - } else { - pthread_cond_broadcast(&ev->cond); - } - pthread_mutex_unlock(&ev->lock); -} - -static inline void qemu_futex_wait(QemuEvent *ev, unsigned val) -{ - assert(ev->initialized); - pthread_mutex_lock(&ev->lock); - if (ev->value == val) { - pthread_cond_wait(&ev->cond, &ev->lock); - } - pthread_mutex_unlock(&ev->lock); -} -#endif - -/* Valid transitions: - * - free->set, when setting the event - * - busy->set, when setting the event, followed by qemu_futex_wake - * - set->free, when resetting the event - * - free->busy, when waiting - * - * set->busy does not happen (it can be observed from the outside but - * it really is set->free->busy). - * - * busy->free provably cannot happen; to enforce it, the set->free transition - * is done with an OR, which becomes a no-op if the event has concurrently - * transitioned to free or busy. - */ - -#define EV_SET 0 -#define EV_FREE 1 -#define EV_BUSY -1 - -void qemu_event_init(QemuEvent *ev, bool init) -{ -#ifndef __linux__ - pthread_mutex_init(&ev->lock, NULL); - pthread_cond_init(&ev->cond, NULL); -#endif - - ev->value = (init ? EV_SET : EV_FREE); - ev->initialized = true; -} - -void qemu_event_destroy(QemuEvent *ev) -{ - assert(ev->initialized); - ev->initialized = false; -#ifndef __linux__ - pthread_mutex_destroy(&ev->lock); - pthread_cond_destroy(&ev->cond); -#endif -} - -void qemu_event_set(QemuEvent *ev) -{ - assert(ev->initialized); - - /* - * Pairs with both qemu_event_reset() and qemu_event_wait(). - * - * qemu_event_set has release semantics, but because it *loads* - * ev->value we need a full memory barrier here. - */ - smp_mb(); - if (qatomic_read(&ev->value) != EV_SET) { - int old = qatomic_xchg(&ev->value, EV_SET); - - /* Pairs with memory barrier in kernel futex_wait system call. */ - smp_mb__after_rmw(); - if (old == EV_BUSY) { - /* There were waiters, wake them up. */ - qemu_futex_wake(ev, INT_MAX); - } - } -} - -void qemu_event_reset(QemuEvent *ev) -{ - assert(ev->initialized); - - /* - * If there was a concurrent reset (or even reset+wait), - * do nothing. Otherwise change EV_SET->EV_FREE. - */ - qatomic_or(&ev->value, EV_FREE); - - /* - * Order reset before checking the condition in the caller. - * Pairs with the first memory barrier in qemu_event_set(). - */ - smp_mb__after_rmw(); -} - -void qemu_event_wait(QemuEvent *ev) -{ - unsigned value; - - assert(ev->initialized); - - /* - * qemu_event_wait must synchronize with qemu_event_set even if it does - * not go down the slow path, so this load-acquire is needed that - * synchronizes with the first memory barrier in qemu_event_set(). - * - * If we do go down the slow path, there is no requirement at all: we - * might miss a qemu_event_set() here but ultimately the memory barrier in - * qemu_futex_wait() will ensure the check is done correctly. - */ - value = qatomic_load_acquire(&ev->value); - if (value != EV_SET) { - if (value == EV_FREE) { - /* - * Leave the event reset and tell qemu_event_set that there are - * waiters. No need to retry, because there cannot be a concurrent - * busy->free transition. After the CAS, the event will be either - * set or busy. - * - * This cmpxchg doesn't have particular ordering requirements if it - * succeeds (moving the store earlier can only cause qemu_event_set() - * to issue _more_ wakeups), the failing case needs acquire semantics - * like the load above. - */ - if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { - return; - } - } - - /* - * This is the final check for a concurrent set, so it does need - * a smp_mb() pairing with the second barrier of qemu_event_set(). - * The barrier is inside the FUTEX_WAIT system call. - */ - qemu_futex_wait(ev, EV_BUSY); - } -} - static __thread NotifierList thread_exit; /* diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c index a7fe3cc..ca2e0b5 100644 --- a/util/qemu-thread-win32.c +++ b/util/qemu-thread-win32.c @@ -231,135 +231,6 @@ void qemu_sem_wait(QemuSemaphore *sem) } } -/* Wrap a Win32 manual-reset event with a fast userspace path. The idea - * is to reset the Win32 event lazily, as part of a test-reset-test-wait - * sequence. Such a sequence is, indeed, how QemuEvents are used by - * RCU and other subsystems! - * - * Valid transitions: - * - free->set, when setting the event - * - busy->set, when setting the event, followed by SetEvent - * - set->free, when resetting the event - * - free->busy, when waiting - * - * set->busy does not happen (it can be observed from the outside but - * it really is set->free->busy). - * - * busy->free provably cannot happen; to enforce it, the set->free transition - * is done with an OR, which becomes a no-op if the event has concurrently - * transitioned to free or busy (and is faster than cmpxchg). - */ - -#define EV_SET 0 -#define EV_FREE 1 -#define EV_BUSY -1 - -void qemu_event_init(QemuEvent *ev, bool init) -{ - /* Manual reset. */ - ev->event = CreateEvent(NULL, TRUE, TRUE, NULL); - ev->value = (init ? EV_SET : EV_FREE); - ev->initialized = true; -} - -void qemu_event_destroy(QemuEvent *ev) -{ - assert(ev->initialized); - ev->initialized = false; - CloseHandle(ev->event); -} - -void qemu_event_set(QemuEvent *ev) -{ - assert(ev->initialized); - - /* - * Pairs with both qemu_event_reset() and qemu_event_wait(). - * - * qemu_event_set has release semantics, but because it *loads* - * ev->value we need a full memory barrier here. - */ - smp_mb(); - if (qatomic_read(&ev->value) != EV_SET) { - int old = qatomic_xchg(&ev->value, EV_SET); - - /* Pairs with memory barrier after ResetEvent. */ - smp_mb__after_rmw(); - if (old == EV_BUSY) { - /* There were waiters, wake them up. */ - SetEvent(ev->event); - } - } -} - -void qemu_event_reset(QemuEvent *ev) -{ - assert(ev->initialized); - - /* - * If there was a concurrent reset (or even reset+wait), - * do nothing. Otherwise change EV_SET->EV_FREE. - */ - qatomic_or(&ev->value, EV_FREE); - - /* - * Order reset before checking the condition in the caller. - * Pairs with the first memory barrier in qemu_event_set(). - */ - smp_mb__after_rmw(); -} - -void qemu_event_wait(QemuEvent *ev) -{ - unsigned value; - - assert(ev->initialized); - - /* - * qemu_event_wait must synchronize with qemu_event_set even if it does - * not go down the slow path, so this load-acquire is needed that - * synchronizes with the first memory barrier in qemu_event_set(). - * - * If we do go down the slow path, there is no requirement at all: we - * might miss a qemu_event_set() here but ultimately the memory barrier in - * qemu_futex_wait() will ensure the check is done correctly. - */ - value = qatomic_load_acquire(&ev->value); - if (value != EV_SET) { - if (value == EV_FREE) { - /* - * Here the underlying kernel event is reset, but qemu_event_set is - * not yet going to call SetEvent. However, there will be another - * check for EV_SET below when setting EV_BUSY. At that point it - * is safe to call WaitForSingleObject. - */ - ResetEvent(ev->event); - - /* - * It is not clear whether ResetEvent provides this barrier; kernel - * APIs (KeResetEvent/KeClearEvent) do not. Better safe than sorry! - */ - smp_mb(); - - /* - * Leave the event reset and tell qemu_event_set that there are - * waiters. No need to retry, because there cannot be a concurrent - * busy->free transition. After the CAS, the event will be either - * set or busy. - */ - if (qatomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { - return; - } - } - - /* - * ev->value is now EV_BUSY. Since we didn't observe EV_SET, - * qemu_event_set() must observe EV_BUSY and call SetEvent(). - */ - WaitForSingleObject(ev->event, INFINITE); - } -} - struct QemuThreadData { /* Passed to win32_start_routine. */ void *(*start_routine)(void *); |