aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/9pfs/9p-synth.c2
-rw-r--r--hw/9pfs/9p-util-freebsd.c132
-rw-r--r--hw/9pfs/9p-util-generic.c1
-rw-r--r--hw/9pfs/9p-util.h20
-rw-r--r--hw/9pfs/9p.c19
-rw-r--r--hw/9pfs/9p.h3
-rw-r--r--hw/9pfs/meson.build2
-rw-r--r--hw/Kconfig1
-rw-r--r--hw/acpi/Kconfig5
-rw-r--r--hw/acpi/acpi-pci-hotplug-stub.c2
-rw-r--r--hw/acpi/aml-build.c37
-rw-r--r--hw/acpi/bios-linker-loader.c2
-rw-r--r--hw/acpi/core.c1
-rw-r--r--hw/acpi/cxl.c76
-rw-r--r--hw/acpi/generic_event_device.c121
-rw-r--r--hw/acpi/ghes-stub.c7
-rw-r--r--hw/acpi/ghes.c233
-rw-r--r--hw/acpi/ghes_cper.c40
-rw-r--r--hw/acpi/ghes_cper_stub.c20
-rw-r--r--hw/acpi/ich9.c7
-rw-r--r--hw/acpi/meson.build2
-rw-r--r--hw/acpi/nvdimm.c2
-rw-r--r--hw/acpi/pci-bridge.c54
-rw-r--r--hw/acpi/pci.c50
-rw-r--r--hw/acpi/pcihp.c441
-rw-r--r--hw/acpi/piix4.c5
-rw-r--r--hw/acpi/vmgenid.c6
-rw-r--r--hw/arm/Kconfig25
-rw-r--r--hw/arm/allwinner-r40.c1
-rw-r--r--hw/arm/aspeed.c426
-rw-r--r--hw/arm/aspeed_ast10x0.c91
-rw-r--r--hw/arm/aspeed_ast2400.c94
-rw-r--r--hw/arm/aspeed_ast2600.c193
-rw-r--r--hw/arm/aspeed_ast27x0-fc.c118
-rw-r--r--hw/arm/aspeed_ast27x0-ssp.c74
-rw-r--r--hw/arm/aspeed_ast27x0-tsp.c74
-rw-r--r--hw/arm/aspeed_ast27x0.c189
-rw-r--r--hw/arm/aspeed_coprocessor_common.c49
-rw-r--r--hw/arm/aspeed_eeprom.c21
-rw-r--r--hw/arm/aspeed_eeprom.h3
-rw-r--r--hw/arm/aspeed_soc_common.c159
-rw-r--r--hw/arm/boot.c143
-rw-r--r--hw/arm/fby35.c11
-rw-r--r--hw/arm/fsl-imx8mp.c4
-rw-r--r--hw/arm/highbank.c2
-rw-r--r--hw/arm/max78000_soc.c232
-rw-r--r--hw/arm/max78000fthr.c50
-rw-r--r--hw/arm/meson.build13
-rw-r--r--hw/arm/mps2.c4
-rw-r--r--hw/arm/npcm7xx.c2
-rw-r--r--hw/arm/npcm8xx.c55
-rw-r--r--hw/arm/raspi4b.c22
-rw-r--r--hw/arm/sbsa-ref.c8
-rw-r--r--hw/arm/smmu-common.c39
-rw-r--r--hw/arm/smmuv3.c2
-rw-r--r--hw/arm/stm32f205_soc.c10
-rw-r--r--hw/arm/virt-acpi-build.c433
-rw-r--r--hw/arm/virt.c284
-rw-r--r--hw/arm/xen-pvh.c1
-rw-r--r--hw/arm/xlnx-versal-virt.c743
-rw-r--r--hw/arm/xlnx-versal.c2424
-rw-r--r--hw/arm/xlnx-zynqmp.c103
-rw-r--r--hw/audio/ac97.c4
-rw-r--r--hw/audio/asc.c9
-rw-r--r--hw/audio/cs4231a.c4
-rw-r--r--hw/audio/es1370.c2
-rw-r--r--hw/audio/gus.c2
-rw-r--r--hw/audio/marvell_88w8618.c2
-rw-r--r--hw/audio/sb16.c2
-rw-r--r--hw/audio/via-ac97.c2
-rw-r--r--hw/block/Kconfig3
-rw-r--r--hw/block/hd-geometry.c1
-rw-r--r--hw/block/meson.build7
-rw-r--r--hw/block/nand.c835
-rw-r--r--hw/block/vhost-user-blk.c9
-rw-r--r--hw/block/virtio-blk.c6
-rw-r--r--hw/char/Kconfig3
-rw-r--r--hw/char/max78000_uart.c292
-rw-r--r--hw/char/meson.build1
-rw-r--r--hw/char/riscv_htif.c1
-rw-r--r--hw/char/sclpconsole-lm.c2
-rw-r--r--hw/char/serial-pci-multi.c3
-rw-r--r--hw/char/sh_serial.c24
-rw-r--r--hw/char/sifive_uart.c42
-rw-r--r--hw/char/trace-events14
-rw-r--r--hw/core/cpu-common.c41
-rw-r--r--hw/core/cpu-system.c13
-rw-r--r--hw/core/irq.c8
-rw-r--r--hw/core/loader.c20
-rw-r--r--hw/core/machine-hmp-cmds.c23
-rw-r--r--hw/core/machine-qmp-cmds.c34
-rw-r--r--hw/core/machine.c58
-rw-r--r--hw/core/meson.build4
-rw-r--r--hw/core/qdev-properties-system.c45
-rw-r--r--hw/core/qdev-properties.c67
-rw-r--r--hw/core/qdev.c29
-rw-r--r--hw/core/register.c1
-rw-r--r--hw/core/sysbus-fdt.c386
-rw-r--r--hw/core/sysbus.c11
-rw-r--r--hw/cxl/cxl-events.c40
-rw-r--r--hw/cxl/cxl-host-stubs.c7
-rw-r--r--hw/cxl/cxl-host.c175
-rw-r--r--hw/cxl/cxl-mailbox-utils.c552
-rw-r--r--hw/display/apple-gfx.m10
-rw-r--r--hw/display/artist.c1
-rw-r--r--hw/display/ati.c1
-rw-r--r--hw/display/bcm2835_fb.c1
-rw-r--r--hw/display/framebuffer.c6
-rw-r--r--hw/display/qxl-render.c11
-rw-r--r--hw/display/ramfb-standalone.c5
-rw-r--r--hw/display/ramfb-stubs.c2
-rw-r--r--hw/display/ramfb.c6
-rw-r--r--hw/display/sm501.c1
-rw-r--r--hw/display/vga.c2
-rw-r--r--hw/display/virtio-gpu-base.c27
-rw-r--r--hw/display/virtio-gpu-virgl.c44
-rw-r--r--hw/display/virtio-gpu.c13
-rw-r--r--hw/display/vmware_vga.c2
-rw-r--r--hw/display/xenfb.c3
-rw-r--r--hw/display/xlnx_dp.c10
-rw-r--r--hw/dma/xlnx_csu_dma.c2
-rw-r--r--hw/gpio/pca9552.c2
-rw-r--r--hw/gpio/pca9554.c2
-rw-r--r--hw/gpio/zaurus.c42
-rw-r--r--hw/hppa/machine.c102
-rw-r--r--hw/hyperv/hv-balloon-our_range_memslots.c1
-rw-r--r--hw/hyperv/hv-balloon.c21
-rw-r--r--hw/hyperv/syndbg.c6
-rw-r--r--hw/i386/Kconfig12
-rw-r--r--hw/i386/acpi-build.c538
-rw-r--r--hw/i386/acpi-build.h4
-rw-r--r--hw/i386/amd_iommu.c1183
-rw-r--r--hw/i386/amd_iommu.h112
-rw-r--r--hw/i386/intel_iommu.c539
-rw-r--r--hw/i386/intel_iommu_internal.h53
-rw-r--r--hw/i386/isapc.c189
-rw-r--r--hw/i386/kvm/apic.c5
-rw-r--r--hw/i386/kvm/xen-stubs.c13
-rw-r--r--hw/i386/kvm/xen_evtchn.c2
-rw-r--r--hw/i386/meson.build2
-rw-r--r--hw/i386/microvm.c3
-rw-r--r--hw/i386/monitor.c2
-rw-r--r--hw/i386/pc.c175
-rw-r--r--hw/i386/pc_piix.c311
-rw-r--r--hw/i386/pc_q35.c53
-rw-r--r--hw/i386/pc_sysfw.c38
-rw-r--r--hw/i386/sgx-stub.c6
-rw-r--r--hw/i386/sgx.c34
-rw-r--r--hw/i386/tdvf-hob.c130
-rw-r--r--hw/i386/tdvf-hob.h26
-rw-r--r--hw/i386/tdvf.c189
-rw-r--r--hw/i386/x86-common.c8
-rw-r--r--hw/i386/x86-iommu.c1
-rw-r--r--hw/i386/x86.c1
-rw-r--r--hw/ide/ich.c3
-rw-r--r--hw/input/hid.c1
-rw-r--r--hw/input/virtio-input-host.c3
-rw-r--r--hw/intc/Kconfig3
-rw-r--r--hw/intc/apic.c10
-rw-r--r--hw/intc/apic_common.c1
-rw-r--r--hw/intc/arm_gic.c2
-rw-r--r--hw/intc/arm_gic_common.c1
-rw-r--r--hw/intc/arm_gicv3_common.c4
-rw-r--r--hw/intc/arm_gicv3_cpuif.c12
-rw-r--r--hw/intc/arm_gicv3_kvm.c56
-rw-r--r--hw/intc/armv7m_nvic.c31
-rw-r--r--hw/intc/aspeed_intc.c12
-rw-r--r--hw/intc/ioapic.c20
-rw-r--r--hw/intc/loongarch_dintc.c213
-rw-r--r--hw/intc/loongarch_extioi.c54
-rw-r--r--hw/intc/loongarch_extioi_common.c9
-rw-r--r--hw/intc/loongarch_extioi_kvm.c139
-rw-r--r--hw/intc/loongarch_ipi.c29
-rw-r--r--hw/intc/loongarch_ipi_kvm.c90
-rw-r--r--hw/intc/loongarch_pch_msi.c10
-rw-r--r--hw/intc/loongarch_pch_pic.c62
-rw-r--r--hw/intc/loongarch_pic_kvm.c89
-rw-r--r--hw/intc/loongson_ipi_common.c33
-rw-r--r--hw/intc/meson.build13
-rw-r--r--hw/intc/pnv_xive.c16
-rw-r--r--hw/intc/pnv_xive2.c144
-rw-r--r--hw/intc/pnv_xive2_regs.h1
-rw-r--r--hw/intc/riscv_aclint.c12
-rw-r--r--hw/intc/riscv_aplic.c18
-rw-r--r--hw/intc/riscv_imsic.c10
-rw-r--r--hw/intc/s390_flic.c2
-rw-r--r--hw/intc/spapr_xive.c18
-rw-r--r--hw/intc/trace-events12
-rw-r--r--hw/intc/xics.c2
-rw-r--r--hw/intc/xive.c555
-rw-r--r--hw/intc/xive2.c734
-rw-r--r--hw/isa/isa-superio.c1
-rw-r--r--hw/isa/lpc_ich9.c7
-rw-r--r--hw/loongarch/Kconfig1
-rw-r--r--hw/loongarch/boot.c126
-rw-r--r--hw/loongarch/virt-acpi-build.c8
-rw-r--r--hw/loongarch/virt.c203
-rw-r--r--hw/m68k/virt.c9
-rw-r--r--hw/mem/cxl_type3.c83
-rw-r--r--hw/meson.build46
-rw-r--r--hw/microblaze/Kconfig4
-rw-r--r--hw/microblaze/petalogix_ml605_mmu.c15
-rw-r--r--hw/microblaze/petalogix_s3adsp1800_mmu.c42
-rw-r--r--hw/microblaze/xlnx-zynqmp-pmu.c7
-rw-r--r--hw/mips/Kconfig9
-rw-r--r--hw/mips/cps.c4
-rw-r--r--hw/mips/loongson3_virt.c1
-rw-r--r--hw/mips/malta.c3
-rw-r--r--hw/mips/meson.build1
-rw-r--r--hw/mips/mipssim.c249
-rw-r--r--hw/misc/Kconfig13
-rw-r--r--hw/misc/aspeed_hace.c479
-rw-r--r--hw/misc/aspeed_sbc.c197
-rw-r--r--hw/misc/aspeed_scu.c22
-rw-r--r--hw/misc/aspeed_sdmc.c3
-rw-r--r--hw/misc/ivshmem-flat.c9
-rw-r--r--hw/misc/ivshmem-pci.c16
-rw-r--r--hw/misc/max78000_aes.c229
-rw-r--r--hw/misc/max78000_gcr.c351
-rw-r--r--hw/misc/max78000_icc.c120
-rw-r--r--hw/misc/max78000_trng.c139
-rw-r--r--hw/misc/meson.build4
-rw-r--r--hw/misc/stm32_rcc.c2
-rw-r--r--hw/misc/trace-events14
-rw-r--r--hw/misc/xlnx-versal-cframe-reg.c9
-rw-r--r--hw/misc/xlnx-versal-crl.c602
-rw-r--r--hw/net/Kconfig3
-rw-r--r--hw/net/cadence_gem.c2
-rw-r--r--hw/net/can/can_sja1000.c1
-rw-r--r--hw/net/can/ctucan_core.c1
-rw-r--r--hw/net/can/xlnx-versal-canfd.c4
-rw-r--r--hw/net/e1000.c95
-rw-r--r--hw/net/e1000e_core.c10
-rw-r--r--hw/net/fsl_etsec/etsec.c1
-rw-r--r--hw/net/i82596.c38
-rw-r--r--hw/net/igb_core.c5
-rw-r--r--hw/net/igbvf.c6
-rw-r--r--hw/net/lan9118.c1
-rw-r--r--hw/net/meson.build1
-rw-r--r--hw/net/mipsnet.c297
-rw-r--r--hw/net/npcm_gmac.c26
-rw-r--r--hw/net/rocker/rocker.h14
-rw-r--r--hw/net/rocker/rocker_hw.h20
-rw-r--r--hw/net/rocker/rocker_of_dpa.c40
-rw-r--r--hw/net/rtl8139.c3
-rw-r--r--hw/net/trace-events7
-rw-r--r--hw/net/tulip.c2
-rw-r--r--hw/net/vhost_net-stub.c11
-rw-r--r--hw/net/vhost_net.c184
-rw-r--r--hw/net/virtio-net.c482
-rw-r--r--hw/net/vmxnet3.c58
-rw-r--r--hw/net/vmxnet3.h4
-rw-r--r--hw/net/xgmac.c2
-rw-r--r--hw/nvme/ctrl.c59
-rw-r--r--hw/nvram/aspeed_otp.c190
-rw-r--r--hw/nvram/fw_cfg.c110
-rw-r--r--hw/nvram/meson.build4
-rw-r--r--hw/nvram/trace-events5
-rw-r--r--hw/openrisc/cputimer.c2
-rw-r--r--hw/pci-bridge/pci_expander_bridge.c1
-rw-r--r--hw/pci-host/Kconfig5
-rw-r--r--hw/pci-host/aspeed_pcie.c1015
-rw-r--r--hw/pci-host/astro.c27
-rw-r--r--hw/pci-host/dino.c74
-rw-r--r--hw/pci-host/gpex-acpi.c74
-rw-r--r--hw/pci-host/gt64120.c83
-rw-r--r--hw/pci-host/meson.build1
-rw-r--r--hw/pci-host/pnv_phb3.c1
-rw-r--r--hw/pci-host/pnv_phb4.c1
-rw-r--r--hw/pci-host/ppce500.c9
-rw-r--r--hw/pci-host/raven.c87
-rw-r--r--hw/pci-host/sh_pci.c1
-rw-r--r--hw/pci-host/trace-events11
-rw-r--r--hw/pci/msix.c2
-rw-r--r--hw/pci/pci.c263
-rw-r--r--hw/pci/pci_host.c6
-rw-r--r--hw/pci/pcie.c86
-rw-r--r--hw/pci/pcie_sriov.c59
-rw-r--r--hw/ppc/Kconfig5
-rw-r--r--hw/ppc/e500.c28
-rw-r--r--hw/ppc/e500.h4
-rw-r--r--hw/ppc/e500plat.c2
-rw-r--r--hw/ppc/meson.build2
-rw-r--r--hw/ppc/mpc8544ds.c2
-rw-r--r--hw/ppc/pnv.c690
-rw-r--r--hw/ppc/pnv_chiptod.c59
-rw-r--r--hw/ppc/pnv_core.c17
-rw-r--r--hw/ppc/pnv_occ.c2
-rw-r--r--hw/ppc/ppc.c2
-rw-r--r--hw/ppc/ppc_booke.c7
-rw-r--r--hw/ppc/ppce500_spin.c3
-rw-r--r--hw/ppc/ppe42_machine.c101
-rw-r--r--hw/ppc/prep.c27
-rw-r--r--hw/ppc/spapr.c52
-rw-r--r--hw/ppc/spapr_caps.c1
-rw-r--r--hw/ppc/spapr_hcall.c11
-rw-r--r--hw/ppc/spapr_pci.c1
-rw-r--r--hw/ppc/spapr_pci_vfio.c16
-rw-r--r--hw/ppc/spapr_rtas.c2
-rw-r--r--hw/ppc/spapr_tpm_proxy.c4
-rw-r--r--hw/remote/memory.c1
-rw-r--r--hw/remote/proxy-memory-listener.c1
-rw-r--r--hw/remote/proxy.c6
-rw-r--r--hw/remote/remote-obj.c6
-rw-r--r--hw/remote/vfio-user-obj.c13
-rw-r--r--hw/riscv/Kconfig9
-rw-r--r--hw/riscv/boot.c2
-rw-r--r--hw/riscv/meson.build1
-rw-r--r--hw/riscv/riscv-iommu-bits.h1
-rw-r--r--hw/riscv/riscv-iommu-pci.c6
-rw-r--r--hw/riscv/riscv-iommu-sys.c6
-rw-r--r--hw/riscv/riscv-iommu.c163
-rw-r--r--hw/riscv/virt-acpi-build.c45
-rw-r--r--hw/riscv/virt.c71
-rw-r--r--hw/riscv/xiangshan_kmh.c220
-rw-r--r--hw/s390x/ap-stub.c21
-rw-r--r--hw/s390x/ccw-device.c2
-rw-r--r--hw/s390x/cpu-topology.c4
-rw-r--r--hw/s390x/event-facility.c2
-rw-r--r--hw/s390x/meson.build2
-rw-r--r--hw/s390x/s390-pci-bus.c48
-rw-r--r--hw/s390x/s390-pci-inst.c1
-rw-r--r--hw/s390x/s390-pci-vfio.c16
-rw-r--r--hw/s390x/s390-skeys.c1
-rw-r--r--hw/s390x/s390-stattrib-kvm.c2
-rw-r--r--hw/s390x/s390-stattrib.c4
-rw-r--r--hw/s390x/s390-virtio-ccw.c51
-rw-r--r--hw/s390x/sclp.c24
-rw-r--r--hw/s390x/sclpcpi.c212
-rw-r--r--hw/s390x/virtio-ccw.c4
-rw-r--r--hw/scsi/esp.c94
-rw-r--r--hw/scsi/lsi53c895a.c2
-rw-r--r--hw/scsi/megasas.c7
-rw-r--r--hw/scsi/mptsas.c6
-rw-r--r--hw/scsi/scsi-disk.c55
-rw-r--r--hw/scsi/spapr_vscsi.c6
-rw-r--r--hw/scsi/trace-events1
-rw-r--r--hw/scsi/virtio-scsi.c6
-rw-r--r--hw/scsi/vmw_pvscsi.c67
-rw-r--r--hw/scsi/vmw_pvscsi.h4
-rw-r--r--hw/sd/allwinner-sdhost.c7
-rw-r--r--hw/sd/bcm2835_sdhost.c7
-rw-r--r--hw/sd/core.c5
-rw-r--r--hw/sd/omap_mmc.c5
-rw-r--r--hw/sd/pl181.c6
-rw-r--r--hw/sd/sd.c235
-rw-r--r--hw/sd/sdhci.c10
-rw-r--r--hw/sd/sdmmc-internal.h3
-rw-r--r--hw/sd/ssi-sd.c104
-rw-r--r--hw/sd/trace-events4
-rw-r--r--hw/sensor/lsm303dlhc_mag.c1
-rw-r--r--hw/smbios/smbios.c12
-rw-r--r--hw/sparc/leon3.c2
-rw-r--r--hw/ssi/aspeed_smc.c3
-rw-r--r--hw/timer/hpet.c217
-rw-r--r--hw/uefi/trace.h2
-rw-r--r--hw/uefi/var-service-core.c6
-rw-r--r--hw/uefi/var-service-json.c2
-rw-r--r--hw/uefi/var-service-policy.c2
-rw-r--r--hw/uefi/var-service-utils.c2
-rw-r--r--hw/uefi/var-service-vars.c7
-rw-r--r--hw/ufs/lu.c2
-rw-r--r--hw/usb/dev-hid.c6
-rw-r--r--hw/usb/dev-network.c2
-rw-r--r--hw/usb/hcd-ohci.c2
-rw-r--r--hw/usb/hcd-uhci.c10
-rw-r--r--hw/vfio-user/Kconfig7
-rw-r--r--hw/vfio-user/container.c353
-rw-r--r--hw/vfio-user/container.h24
-rw-r--r--hw/vfio-user/device.c441
-rw-r--r--hw/vfio-user/device.h24
-rw-r--r--hw/vfio-user/meson.build11
-rw-r--r--hw/vfio-user/pci.c480
-rw-r--r--hw/vfio-user/protocol.h242
-rw-r--r--hw/vfio-user/proxy.c1363
-rw-r--r--hw/vfio-user/proxy.h135
-rw-r--r--hw/vfio-user/trace-events20
-rw-r--r--hw/vfio-user/trace.h4
-rw-r--r--hw/vfio/Kconfig18
-rw-r--r--hw/vfio/amd-xgbe.c61
-rw-r--r--hw/vfio/ap.c90
-rw-r--r--hw/vfio/calxeda-xgmac.c61
-rw-r--r--hw/vfio/ccw.c4
-rw-r--r--hw/vfio/container-base.c338
-rw-r--r--hw/vfio/container-legacy.c1260
-rw-r--r--hw/vfio/container.c1272
-rw-r--r--hw/vfio/cpr-iommufd.c226
-rw-r--r--hw/vfio/cpr-legacy.c288
-rw-r--r--hw/vfio/cpr.c276
-rw-r--r--hw/vfio/device.c89
-rw-r--r--hw/vfio/display.c4
-rw-r--r--hw/vfio/helpers.c28
-rw-r--r--hw/vfio/igd.c82
-rw-r--r--hw/vfio/iommufd-stubs.c18
-rw-r--r--hw/vfio/iommufd.c174
-rw-r--r--hw/vfio/listener.c201
-rw-r--r--hw/vfio/meson.build10
-rw-r--r--hw/vfio/migration-multifd.c105
-rw-r--r--hw/vfio/migration-multifd.h5
-rw-r--r--hw/vfio/migration.c14
-rw-r--r--hw/vfio/pci-quirks.c57
-rw-r--r--hw/vfio/pci.c776
-rw-r--r--hw/vfio/pci.h63
-rw-r--r--hw/vfio/platform.c716
-rw-r--r--hw/vfio/region.c12
-rw-r--r--hw/vfio/spapr.c57
-rw-r--r--hw/vfio/trace-events30
-rw-r--r--hw/vfio/trace.h3
-rw-r--r--hw/vfio/types.h23
-rw-r--r--hw/vfio/vfio-cpr.h15
-rw-r--r--hw/vfio/vfio-helpers.h2
-rw-r--r--hw/vfio/vfio-iommufd.h9
-rw-r--r--hw/vfio/vfio-listener.h4
-rw-r--r--hw/vfio/vfio-migration-internal.h2
-rw-r--r--hw/vfio/vfio-region.h48
-rw-r--r--hw/virtio/Kconfig5
-rw-r--r--hw/virtio/meson.build25
-rw-r--r--hw/virtio/trace-events1
-rw-r--r--hw/virtio/vdpa-dev.c7
-rw-r--r--hw/virtio/vhost-backend.c62
-rw-r--r--hw/virtio/vhost-user-test-device-pci.c (renamed from hw/virtio/vhost-user-device-pci.c)17
-rw-r--r--hw/virtio/vhost-user-test-device.c (renamed from hw/virtio/vhost-user-device.c)9
-rw-r--r--hw/virtio/vhost-user.c5
-rw-r--r--hw/virtio/vhost-vdpa.c116
-rw-r--r--hw/virtio/vhost-vsock.c8
-rw-r--r--hw/virtio/vhost.c181
-rw-r--r--hw/virtio/virtio-balloon.c1
-rw-r--r--hw/virtio/virtio-bus.c11
-rw-r--r--hw/virtio/virtio-config-io.c1
-rw-r--r--hw/virtio/virtio-hmp-cmds.c3
-rw-r--r--hw/virtio/virtio-mem.c85
-rw-r--r--hw/virtio/virtio-mmio.c5
-rw-r--r--hw/virtio/virtio-pci.c120
-rw-r--r--hw/virtio/virtio-qmp.c91
-rw-r--r--hw/virtio/virtio-qmp.h3
-rw-r--r--hw/virtio/virtio.c230
-rw-r--r--hw/vmapple/virtio-blk.c1
-rw-r--r--hw/vmapple/vmapple.c2
-rw-r--r--hw/watchdog/wdt_i6300esb.c2
-rw-r--r--hw/xen/xen-hvm-common.c8
-rw-r--r--hw/xen/xen_pt.c1
-rw-r--r--hw/xen/xen_pt_msi.c11
-rw-r--r--hw/xtensa/xtfpga.c2
443 files changed, 25391 insertions, 11137 deletions
diff --git a/hw/9pfs/9p-synth.c b/hw/9pfs/9p-synth.c
index 9cd1884..b3743f6 100644
--- a/hw/9pfs/9p-synth.c
+++ b/hw/9pfs/9p-synth.c
@@ -451,7 +451,7 @@ static int synth_statfs(FsContext *s, V9fsPath *fs_path,
stbuf->f_bsize = 512;
stbuf->f_blocks = 0;
stbuf->f_files = synth_node_count;
-#ifndef CONFIG_DARWIN
+#if !defined(CONFIG_DARWIN) && !defined(CONFIG_FREEBSD)
stbuf->f_namelen = NAME_MAX;
#endif
return 0;
diff --git a/hw/9pfs/9p-util-freebsd.c b/hw/9pfs/9p-util-freebsd.c
new file mode 100644
index 0000000..9dd1d06
--- /dev/null
+++ b/hw/9pfs/9p-util-freebsd.c
@@ -0,0 +1,132 @@
+/*
+ * 9p utilities (FreeBSD Implementation)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+/*
+ * Not so fast! You might want to read the 9p developer docs first:
+ * https://wiki.qemu.org/Documentation/9p
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/xattr.h"
+#include "9p-util.h"
+
+static int mangle_xattr_name(const char **namep)
+{
+ const char *name = *namep;
+
+ /*
+ * ZFS forbids attributes in starting with "user." or "system.".
+ */
+ if (strncmp(name, "system.", 7) == 0) {
+ *namep = name + 7;
+ return EXTATTR_NAMESPACE_SYSTEM;
+ }
+ if (strncmp(name, "user.", 5) == 0) {
+ *namep = name + 5;
+ }
+ return EXTATTR_NAMESPACE_USER;
+}
+
+ssize_t fgetxattr(int fd, const char *name, void *value, size_t size)
+{
+ int namespace;
+
+ namespace = mangle_xattr_name(&name);
+ return extattr_get_fd(fd, namespace, name, value, size);
+}
+
+ssize_t fgetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+ void *value, size_t size)
+{
+ ssize_t ret;
+ int fd, namespace;
+
+ fd = openat_file(dirfd, filename,
+ O_RDONLY | O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+ if (fd == -1) {
+ return -1;
+ }
+ namespace = mangle_xattr_name(&name);
+ ret = extattr_get_fd(fd, namespace, name, value, size);
+ close_preserve_errno(fd);
+ return ret;
+}
+
+ssize_t flistxattrat_nofollow(int dirfd, const char *filename,
+ char *list, size_t size)
+{
+ ssize_t ret;
+ int fd;
+
+ fd = openat_file(dirfd, filename,
+ O_RDONLY | O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+ if (fd == -1) {
+ return -1;
+ }
+ ret = extattr_list_fd(fd, EXTATTR_NAMESPACE_USER, list, size);
+ close_preserve_errno(fd);
+ return ret;
+}
+
+ssize_t fremovexattrat_nofollow(int dirfd, const char *filename,
+ const char *name)
+{
+ int fd, namespace, ret;
+
+ fd = openat_file(dirfd, filename,
+ O_RDONLY | O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+ if (fd == -1) {
+ return -1;
+ }
+ namespace = mangle_xattr_name(&name);
+ ret = extattr_delete_fd(fd, namespace, name);
+ close_preserve_errno(fd);
+ return ret;
+}
+
+int fsetxattrat_nofollow(int dirfd, const char *filename, const char *name,
+ void *value, size_t size, int flags)
+{
+ ssize_t ret;
+ int fd, namespace;
+
+ namespace = mangle_xattr_name(&name);
+ if (flags == (XATTR_CREATE | XATTR_REPLACE)) {
+ errno = EINVAL;
+ return -1;
+ }
+ fd = openat_file(dirfd, filename,
+ O_RDONLY | O_PATH_9P_UTIL | O_NOFOLLOW, 0);
+ if (fd == -1) {
+ return -1;
+ }
+ if (flags & (XATTR_CREATE | XATTR_REPLACE)) {
+ ret = extattr_get_fd(fd, namespace, name, NULL, 0);
+ if (ret == -1 && errno != ENOATTR) {
+ close_preserve_errno(fd);
+ return -1;
+ }
+ if (ret >= 0 && (flags & XATTR_CREATE)) {
+ errno = EEXIST;
+ close_preserve_errno(fd);
+ return -1;
+ }
+ if (ret == -1 && (flags & XATTR_REPLACE)) {
+ errno = ENOATTR;
+ close_preserve_errno(fd);
+ return -1;
+ }
+ }
+ ret = extattr_set_fd(fd, namespace, name, value, size);
+ close_preserve_errno(fd);
+ return ret;
+}
+
+int qemu_mknodat(int dirfd, const char *filename, mode_t mode, dev_t dev)
+{
+ return mknodat(dirfd, filename, mode, dev);
+}
diff --git a/hw/9pfs/9p-util-generic.c b/hw/9pfs/9p-util-generic.c
index 4c1e9c8..b71fa2c 100644
--- a/hw/9pfs/9p-util-generic.c
+++ b/hw/9pfs/9p-util-generic.c
@@ -2,7 +2,6 @@
#include "qemu/osdep.h"
#include "9p-util.h"
-#include <glib/gstrfuncs.h>
char *qemu_open_flags_tostr(int flags)
{
diff --git a/hw/9pfs/9p-util.h b/hw/9pfs/9p-util.h
index a1924fe..8dfa803 100644
--- a/hw/9pfs/9p-util.h
+++ b/hw/9pfs/9p-util.h
@@ -21,6 +21,15 @@
#define O_PATH_9P_UTIL 0
#endif
+#ifdef CONFIG_FREEBSD
+/*
+ * FreeBSD does not have these flags, so we can only emulate their intended
+ * behaviour (racily).
+ */
+#define XATTR_CREATE 0x1
+#define XATTR_REPLACE 0x2
+#endif
+
#if !defined(CONFIG_LINUX)
/*
@@ -64,9 +73,9 @@ static inline uint64_t host_dev_to_dotl_dev(dev_t dev)
static inline int errno_to_dotl(int err) {
#if defined(CONFIG_LINUX)
/* nothing to translate (Linux -> Linux) */
-#elif defined(CONFIG_DARWIN)
+#elif defined(CONFIG_DARWIN) || defined(CONFIG_FREEBSD)
/*
- * translation mandatory for macOS hosts
+ * translation mandatory for non-Linux hosts
*
* FIXME: Only most important errnos translated here yet, this should be
* extended to as many errnos being translated as possible in future.
@@ -155,13 +164,13 @@ static inline int openat_file(int dirfd, const char *name, int flags,
{
int fd, serrno, ret;
-#ifndef CONFIG_DARWIN
+#if !defined(CONFIG_DARWIN) && !defined(CONFIG_FREEBSD)
again:
#endif
fd = qemu_openat(dirfd, name, flags | O_NOFOLLOW | O_NOCTTY | O_NONBLOCK,
mode);
if (fd == -1) {
-#ifndef CONFIG_DARWIN
+#if !defined(CONFIG_DARWIN) && !defined(CONFIG_FREEBSD)
if (errno == EPERM && (flags & O_NOATIME)) {
/*
* The client passed O_NOATIME but we lack permissions to honor it.
@@ -202,6 +211,9 @@ again:
return fd;
}
+#ifdef CONFIG_FREEBSD
+ssize_t fgetxattr(int dirfd, const char *name, void *value, size_t size);
+#endif
ssize_t fgetxattrat_nofollow(int dirfd, const char *path, const char *name,
void *value, size_t size);
int fsetxattrat_nofollow(int dirfd, const char *path, const char *name,
diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index 8b001b9..bc4a016 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -136,8 +136,10 @@ static int dotl_to_open_flags(int flags)
{ P9_DOTL_NONBLOCK, O_NONBLOCK } ,
{ P9_DOTL_DSYNC, O_DSYNC },
{ P9_DOTL_FASYNC, FASYNC },
-#ifndef CONFIG_DARWIN
+#if !defined(CONFIG_DARWIN) && !defined(CONFIG_FREEBSD)
{ P9_DOTL_NOATIME, O_NOATIME },
+#endif
+#ifndef CONFIG_DARWIN
/*
* On Darwin, we could map to F_NOCACHE, which is
* similar, but doesn't quite have the same
@@ -201,8 +203,7 @@ void v9fs_path_free(V9fsPath *path)
}
-void G_GNUC_PRINTF(2, 3)
-v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...)
+void v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...)
{
va_list ap;
@@ -3659,7 +3660,7 @@ static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf)
f_bavail = stbuf->f_bavail / bsize_factor;
f_files = stbuf->f_files;
f_ffree = stbuf->f_ffree;
-#ifdef CONFIG_DARWIN
+#if defined(CONFIG_DARWIN) || defined(CONFIG_FREEBSD)
fsid_val = (unsigned int)stbuf->f_fsid.val[0] |
(unsigned long long)stbuf->f_fsid.val[1] << 32;
f_namelen = NAME_MAX;
@@ -4051,6 +4052,16 @@ out_nofid:
* Linux guests.
*/
#define P9_XATTR_SIZE_MAX 65536
+#elif defined(CONFIG_FREEBSD)
+/*
+ * FreeBSD similarly doesn't define a maximum xattr size, the limit is
+ * filesystem dependent. On UFS filesystems it's 2 times the filesystem block
+ * size, typically 32KB. On ZFS it depends on the value of the xattr property;
+ * with the default value there is no limit, and with xattr=sa it is 64KB.
+ *
+ * So, a limit of 64k seems reasonable here too.
+ */
+#define P9_XATTR_SIZE_MAX 65536
#else
#error Missing definition for P9_XATTR_SIZE_MAX for this host system
#endif
diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
index 259ad32..65cc45e 100644
--- a/hw/9pfs/9p.h
+++ b/hw/9pfs/9p.h
@@ -456,7 +456,8 @@ static inline uint8_t v9fs_request_cancelled(V9fsPDU *pdu)
void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu);
void v9fs_path_init(V9fsPath *path);
void v9fs_path_free(V9fsPath *path);
-void v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...);
+void G_GNUC_PRINTF(2, 3) v9fs_path_sprintf(V9fsPath *path, const char *fmt,
+ ...);
void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src);
size_t v9fs_readdir_response_size(V9fsString *name);
int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
diff --git a/hw/9pfs/meson.build b/hw/9pfs/meson.build
index d35d4f4..7f4d6e3 100644
--- a/hw/9pfs/meson.build
+++ b/hw/9pfs/meson.build
@@ -15,6 +15,8 @@ fs_ss.add(files(
))
if host_os == 'darwin'
fs_ss.add(files('9p-util-darwin.c'))
+elif host_os == 'freebsd'
+ fs_ss.add(files('9p-util-freebsd.c'))
elif host_os == 'linux'
fs_ss.add(files('9p-util-linux.c'))
endif
diff --git a/hw/Kconfig b/hw/Kconfig
index 9a86a6a..9e6c789 100644
--- a/hw/Kconfig
+++ b/hw/Kconfig
@@ -42,6 +42,7 @@ source ufs/Kconfig
source usb/Kconfig
source virtio/Kconfig
source vfio/Kconfig
+source vfio-user/Kconfig
source vmapple/Kconfig
source xen/Kconfig
source watchdog/Kconfig
diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig
index 1d4e9f0..daabbe6 100644
--- a/hw/acpi/Kconfig
+++ b/hw/acpi/Kconfig
@@ -51,6 +51,11 @@ config ACPI_APEI
bool
depends on ACPI
+config GHES_CPER
+ bool
+ depends on ACPI_APEI
+ default y
+
config ACPI_PCI
bool
depends on ACPI && PCI
diff --git a/hw/acpi/acpi-pci-hotplug-stub.c b/hw/acpi/acpi-pci-hotplug-stub.c
index b7bc6e4..d58ea72 100644
--- a/hw/acpi/acpi-pci-hotplug-stub.c
+++ b/hw/acpi/acpi-pci-hotplug-stub.c
@@ -4,7 +4,7 @@
const VMStateDescription vmstate_acpi_pcihp_pci_status;
-void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
+void acpi_pcihp_init(Object *owner, AcpiPciHpState *s,
MemoryRegion *address_space_io, uint16_t io_base)
{
}
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index f8f93a9..2d5826a 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -160,7 +160,7 @@ void crs_replace_with_free_ranges(GPtrArray *ranges,
*/
static void crs_range_merge(GPtrArray *range)
{
- GPtrArray *tmp = g_ptr_array_new_with_free_func(crs_range_free);
+ g_autoptr(GPtrArray) tmp = g_ptr_array_new_with_free_func(crs_range_free);
CrsRangeEntry *entry;
uint64_t range_base, range_limit;
int i;
@@ -191,7 +191,6 @@ static void crs_range_merge(GPtrArray *range)
entry = g_ptr_array_index(tmp, i);
crs_range_insert(range, entry->base, entry->limit);
}
- g_ptr_array_free(tmp, true);
}
static void
@@ -2153,6 +2152,7 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
int64_t socket_id = -1, cluster_id = -1, core_id = -1;
uint32_t socket_offset = 0, cluster_offset = 0, core_offset = 0;
uint32_t pptt_start = table_data->len;
+ uint32_t root_offset;
int n;
AcpiTable table = { .sig = "PPTT", .rev = 2,
.oem_id = oem_id, .oem_table_id = oem_table_id };
@@ -2160,6 +2160,18 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
acpi_table_begin(&table, table_data);
/*
+ * Build a root node for all the processor nodes. Otherwise when
+ * building a multi-socket system each socket tree is separated
+ * and will be hard for the OS like Linux to know whether the
+ * system is homogeneous.
+ */
+ root_offset = table_data->len - pptt_start;
+ build_processor_hierarchy_node(table_data,
+ (1 << 0) | /* Physical package */
+ (1 << 4), /* Identical Implementation */
+ 0, 0, NULL, 0);
+
+ /*
* This works with the assumption that cpus[n].props.*_id has been
* sorted from top to down levels in mc->possible_cpu_arch_ids().
* Otherwise, the unexpected and duplicated containers will be
@@ -2173,8 +2185,9 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
core_id = -1;
socket_offset = table_data->len - pptt_start;
build_processor_hierarchy_node(table_data,
- (1 << 0), /* Physical package */
- 0, socket_id, NULL, 0);
+ (1 << 0) | /* Physical package */
+ (1 << 4), /* Identical Implementation */
+ root_offset, socket_id, NULL, 0);
}
if (mc->smp_props.clusters_supported && mc->smp_props.has_clusters) {
@@ -2184,7 +2197,8 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
core_id = -1;
cluster_offset = table_data->len - pptt_start;
build_processor_hierarchy_node(table_data,
- (0 << 0), /* Not a physical package */
+ (0 << 0) | /* Not a physical package */
+ (1 << 4), /* Identical Implementation */
socket_offset, cluster_id, NULL, 0);
}
} else {
@@ -2202,7 +2216,8 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms,
core_id = cpus->cpus[n].props.core_id;
core_offset = table_data->len - pptt_start;
build_processor_hierarchy_node(table_data,
- (0 << 0), /* Not a physical package */
+ (0 << 0) | /* Not a physical package */
+ (1 << 4), /* Identical Implementation */
cluster_offset, core_id, NULL, 0);
}
@@ -2614,3 +2629,13 @@ Aml *aml_i2c_serial_bus_device(uint16_t address, const char *resource_source)
return var;
}
+
+/* ACPI 5.0b: 18.3.2.6.2 Event Notification For Generic Error Sources */
+Aml *aml_error_device(void)
+{
+ Aml *dev = aml_device(ACPI_APEI_ERROR_DEVICE);
+ aml_append(dev, aml_name_decl("_HID", aml_string("PNP0C33")));
+ aml_append(dev, aml_name_decl("_UID", aml_int(0)));
+
+ return dev;
+}
diff --git a/hw/acpi/bios-linker-loader.c b/hw/acpi/bios-linker-loader.c
index 1080618..c9ffe44 100644
--- a/hw/acpi/bios-linker-loader.c
+++ b/hw/acpi/bios-linker-loader.c
@@ -22,8 +22,6 @@
#include "hw/acpi/bios-linker-loader.h"
#include "hw/nvram/fw_cfg.h"
-#include "qemu/bswap.h"
-
/*
* Linker/loader is a paravirtualized interface that passes commands to guest.
* The commands can be used to request guest to
diff --git a/hw/acpi/core.c b/hw/acpi/core.c
index 58f8964..ff16582 100644
--- a/hw/acpi/core.c
+++ b/hw/acpi/core.c
@@ -547,6 +547,7 @@ void acpi_pm_tmr_init(ACPIREGS *ar, acpi_update_sci_fn update_sci,
ar->tmr.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, acpi_pm_tmr_timer, ar);
memory_region_init_io(&ar->tmr.io, memory_region_owner(parent),
&acpi_pm_tmr_ops, ar, "acpi-tmr", 4);
+ memory_region_enable_lockless_io(&ar->tmr.io);
memory_region_add_subregion(parent, 8, &ar->tmr.io);
}
diff --git a/hw/acpi/cxl.c b/hw/acpi/cxl.c
index 9cd7905..75d5b30 100644
--- a/hw/acpi/cxl.c
+++ b/hw/acpi/cxl.c
@@ -22,6 +22,7 @@
#include "hw/pci/pci_bridge.h"
#include "hw/pci/pci_host.h"
#include "hw/cxl/cxl.h"
+#include "hw/cxl/cxl_host.h"
#include "hw/mem/memory-device.h"
#include "hw/acpi/acpi.h"
#include "hw/acpi/aml-build.h"
@@ -135,55 +136,52 @@ static void cedt_build_chbs(GArray *table_data, PXBCXLDev *cxl)
* Interleave ways encoding in CXL 2.0 ECN: 3, 6, 12 and 16-way memory
* interleaving.
*/
-static void cedt_build_cfmws(GArray *table_data, CXLState *cxls)
+static void cedt_build_cfmws(CXLFixedWindow *fw, Aml *cedt)
{
- GList *it;
+ GArray *table_data = cedt->buf;
+ int i;
- for (it = cxls->fixed_windows; it; it = it->next) {
- CXLFixedWindow *fw = it->data;
- int i;
-
- /* Type */
- build_append_int_noprefix(table_data, 1, 1);
+ /* Type */
+ build_append_int_noprefix(table_data, 1, 1);
- /* Reserved */
- build_append_int_noprefix(table_data, 0, 1);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 1);
- /* Record Length */
- build_append_int_noprefix(table_data, 36 + 4 * fw->num_targets, 2);
+ /* Record Length */
+ build_append_int_noprefix(table_data, 36 + 4 * fw->num_targets, 2);
- /* Reserved */
- build_append_int_noprefix(table_data, 0, 4);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 4);
- /* Base HPA */
- build_append_int_noprefix(table_data, fw->mr.addr, 8);
+ /* Base HPA */
+ build_append_int_noprefix(table_data, fw->mr.addr, 8);
- /* Window Size */
- build_append_int_noprefix(table_data, fw->size, 8);
+ /* Window Size */
+ build_append_int_noprefix(table_data, fw->size, 8);
- /* Host Bridge Interleave Ways */
- build_append_int_noprefix(table_data, fw->enc_int_ways, 1);
+ /* Host Bridge Interleave Ways */
+ build_append_int_noprefix(table_data, fw->enc_int_ways, 1);
- /* Host Bridge Interleave Arithmetic */
- build_append_int_noprefix(table_data, 0, 1);
+ /* Host Bridge Interleave Arithmetic */
+ build_append_int_noprefix(table_data, 0, 1);
- /* Reserved */
- build_append_int_noprefix(table_data, 0, 2);
+ /* Reserved */
+ build_append_int_noprefix(table_data, 0, 2);
- /* Host Bridge Interleave Granularity */
- build_append_int_noprefix(table_data, fw->enc_int_gran, 4);
+ /* Host Bridge Interleave Granularity */
+ build_append_int_noprefix(table_data, fw->enc_int_gran, 4);
- /* Window Restrictions */
- build_append_int_noprefix(table_data, 0x0f, 2); /* No restrictions */
+ /* Window Restrictions */
+ build_append_int_noprefix(table_data, 0x0f, 2);
- /* QTG ID */
- build_append_int_noprefix(table_data, 0, 2);
+ /* QTG ID */
+ build_append_int_noprefix(table_data, 0, 2);
- /* Host Bridge List (list of UIDs - currently bus_nr) */
- for (i = 0; i < fw->num_targets; i++) {
- g_assert(fw->target_hbs[i]);
- build_append_int_noprefix(table_data, PXB_DEV(fw->target_hbs[i])->bus_nr, 4);
- }
+ /* Host Bridge List (list of UIDs - currently bus_nr) */
+ for (i = 0; i < fw->num_targets; i++) {
+ g_assert(fw->target_hbs[i]);
+ build_append_int_noprefix(table_data,
+ PXB_DEV(fw->target_hbs[i])->bus_nr, 4);
}
}
@@ -202,6 +200,7 @@ void cxl_build_cedt(GArray *table_offsets, GArray *table_data,
BIOSLinker *linker, const char *oem_id,
const char *oem_table_id, CXLState *cxl_state)
{
+ GSList *cfmws_list, *iter;
Aml *cedt;
AcpiTable table = { .sig = "CEDT", .rev = 1, .oem_id = oem_id,
.oem_table_id = oem_table_id };
@@ -213,7 +212,12 @@ void cxl_build_cedt(GArray *table_offsets, GArray *table_data,
/* reserve space for CEDT header */
object_child_foreach_recursive(object_get_root(), cxl_foreach_pxb_hb, cedt);
- cedt_build_cfmws(cedt->buf, cxl_state);
+
+ cfmws_list = cxl_fmws_get_all_sorted();
+ for (iter = cfmws_list; iter; iter = iter->next) {
+ cedt_build_cfmws(CXL_FMW(iter->data), cedt);
+ }
+ g_slist_free(cfmws_list);
/* copy AML table into ACPI tables blob and patch header there */
g_array_append_vals(table_data, cedt->buf->data, cedt->buf->len);
diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c
index 7a62f8d..e7b773d 100644
--- a/hw/acpi/generic_event_device.c
+++ b/hw/acpi/generic_event_device.c
@@ -12,10 +12,13 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "hw/acpi/acpi.h"
+#include "hw/acpi/pcihp.h"
#include "hw/acpi/generic_event_device.h"
+#include "hw/pci/pci.h"
#include "hw/irq.h"
#include "hw/mem/pc-dimm.h"
#include "hw/mem/nvdimm.h"
+#include "hw/pci/pci_device.h"
#include "hw/qdev-properties.h"
#include "migration/vmstate.h"
#include "qemu/error-report.h"
@@ -26,6 +29,8 @@ static const uint32_t ged_supported_events[] = {
ACPI_GED_PWR_DOWN_EVT,
ACPI_GED_NVDIMM_HOTPLUG_EVT,
ACPI_GED_CPU_HOTPLUG_EVT,
+ ACPI_GED_PCI_HOTPLUG_EVT,
+ ACPI_GED_ERROR_EVT,
};
/*
@@ -116,11 +121,27 @@ void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev,
aml_notify(aml_name(ACPI_POWER_BUTTON_DEVICE),
aml_int(0x80)));
break;
+ case ACPI_GED_ERROR_EVT:
+ /*
+ * ACPI 5.0b: 5.6.6 Device Object Notifications
+ * Table 5-135 Error Device Notification Values
+ * Defines 0x80 as the value to be used on notifications
+ */
+ aml_append(if_ctx,
+ aml_notify(aml_name(ACPI_APEI_ERROR_DEVICE),
+ aml_int(0x80)));
+ break;
case ACPI_GED_NVDIMM_HOTPLUG_EVT:
aml_append(if_ctx,
aml_notify(aml_name("\\_SB.NVDR"),
aml_int(0x80)));
break;
+ case ACPI_GED_PCI_HOTPLUG_EVT:
+ aml_append(if_ctx,
+ aml_acquire(aml_name("\\_SB.PCI0.BLCK"), 0xFFFF));
+ aml_append(if_ctx, aml_call0("\\_SB.PCI0.PCNT"));
+ aml_append(if_ctx, aml_release(aml_name("\\_SB.PCI0.BLCK")));
+ break;
default:
/*
* Please make sure all the events in ged_supported_events[]
@@ -227,6 +248,14 @@ static const MemoryRegionOps ged_regs_ops = {
},
};
+static void acpi_ged_device_pre_plug_cb(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+ acpi_pcihp_device_pre_plug_cb(hotplug_dev, dev, errp);
+ }
+}
+
static void acpi_ged_device_plug_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
@@ -240,6 +269,8 @@ static void acpi_ged_device_plug_cb(HotplugHandler *hotplug_dev,
}
} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
acpi_cpu_plug_cb(hotplug_dev, &s->cpuhp_state, dev, errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+ acpi_pcihp_device_plug_cb(hotplug_dev, &s->pcihp_state, dev, errp);
} else {
error_setg(errp, "virt: device plug request for unsupported device"
" type: %s", object_get_typename(OBJECT(dev)));
@@ -256,6 +287,9 @@ static void acpi_ged_unplug_request_cb(HotplugHandler *hotplug_dev,
acpi_memory_unplug_request_cb(hotplug_dev, &s->memhp_state, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
acpi_cpu_unplug_request_cb(hotplug_dev, &s->cpuhp_state, dev, errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+ acpi_pcihp_device_unplug_request_cb(hotplug_dev, &s->pcihp_state,
+ dev, errp);
} else {
error_setg(errp, "acpi: device unplug request for unsupported device"
" type: %s", object_get_typename(OBJECT(dev)));
@@ -271,6 +305,8 @@ static void acpi_ged_unplug_cb(HotplugHandler *hotplug_dev,
acpi_memory_unplug_cb(&s->memhp_state, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) {
acpi_cpu_unplug_cb(&s->cpuhp_state, dev, errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+ acpi_pcihp_device_unplug_cb(hotplug_dev, &s->pcihp_state, dev, errp);
} else {
error_setg(errp, "acpi: device unplug for unsupported device"
" type: %s", object_get_typename(OBJECT(dev)));
@@ -295,10 +331,14 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev)
sel = ACPI_GED_MEM_HOTPLUG_EVT;
} else if (ev & ACPI_POWER_DOWN_STATUS) {
sel = ACPI_GED_PWR_DOWN_EVT;
+ } else if (ev & ACPI_GENERIC_ERROR) {
+ sel = ACPI_GED_ERROR_EVT;
} else if (ev & ACPI_NVDIMM_HOTPLUG_STATUS) {
sel = ACPI_GED_NVDIMM_HOTPLUG_EVT;
} else if (ev & ACPI_CPU_HOTPLUG_STATUS) {
sel = ACPI_GED_CPU_HOTPLUG_EVT;
+ } else if (ev & ACPI_PCI_HOTPLUG_STATUS) {
+ sel = ACPI_GED_PCI_HOTPLUG_EVT;
} else {
/* Unknown event. Return without generating interrupt. */
warn_report("GED: Unsupported event %d. No irq injected", ev);
@@ -318,6 +358,12 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev)
static const Property acpi_ged_properties[] = {
DEFINE_PROP_UINT32("ged-event", AcpiGedState, ged_event_bitmap, 0),
+ DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, AcpiGedState,
+ pcihp_state.use_acpi_hotplug_bridge, 0),
+ DEFINE_PROP_LINK("bus", AcpiGedState, pcihp_state.root,
+ TYPE_PCI_BUS, PCIBus *),
+ DEFINE_PROP_BOOL("x-has-hest-addr", AcpiGedState,
+ ghes_state.use_hest_addr, true),
};
static const VMStateDescription vmstate_memhp_state = {
@@ -386,6 +432,53 @@ static const VMStateDescription vmstate_ghes_state = {
}
};
+static bool pcihp_needed(void *opaque)
+{
+ AcpiGedState *s = opaque;
+ return s->pcihp_state.use_acpi_hotplug_bridge;
+}
+
+static const VMStateDescription vmstate_pcihp_state = {
+ .name = "acpi-ged/pcihp",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = pcihp_needed,
+ .fields = (const VMStateField[]) {
+ VMSTATE_PCI_HOTPLUG(pcihp_state,
+ AcpiGedState,
+ NULL, NULL),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static const VMStateDescription vmstate_hest = {
+ .name = "acpi-hest",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (const VMStateField[]) {
+ VMSTATE_UINT64(hest_addr_le, AcpiGhesState),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static bool hest_needed(void *opaque)
+{
+ AcpiGedState *s = opaque;
+ return s->ghes_state.hest_addr_le;
+}
+
+static const VMStateDescription vmstate_hest_state = {
+ .name = "acpi-ged/hest",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = hest_needed,
+ .fields = (const VMStateField[]) {
+ VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,
+ vmstate_hest, AcpiGhesState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static const VMStateDescription vmstate_acpi_ged = {
.name = "acpi-ged",
.version_id = 1,
@@ -398,6 +491,8 @@ static const VMStateDescription vmstate_acpi_ged = {
&vmstate_memhp_state,
&vmstate_cpuhp_state,
&vmstate_ghes_state,
+ &vmstate_pcihp_state,
+ &vmstate_hest_state,
NULL
}
};
@@ -406,9 +501,13 @@ static void acpi_ged_realize(DeviceState *dev, Error **errp)
{
SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
AcpiGedState *s = ACPI_GED(dev);
+ AcpiPciHpState *pcihp_state = &s->pcihp_state;
uint32_t ged_events;
int i;
+ if (pcihp_state->use_acpi_hotplug_bridge) {
+ s->ged_event_bitmap |= ACPI_GED_PCI_HOTPLUG_EVT;
+ }
ged_events = ctpop32(s->ged_event_bitmap);
for (i = 0; i < ARRAY_SIZE(ged_supported_events) && ged_events; i++) {
@@ -428,6 +527,13 @@ static void acpi_ged_realize(DeviceState *dev, Error **errp)
cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev),
&s->cpuhp_state, 0);
break;
+ case ACPI_GED_PCI_HOTPLUG_EVT:
+ memory_region_init(&s->container_pcihp, OBJECT(dev),
+ ACPI_PCIHP_REGION_NAME, ACPI_PCIHP_SIZE);
+ sysbus_init_mmio(sbd, &s->container_pcihp);
+ acpi_pcihp_init(OBJECT(s), &s->pcihp_state,
+ &s->container_pcihp, 0);
+ qbus_set_hotplug_handler(BUS(s->pcihp_state.root), OBJECT(dev));
}
ged_events--;
}
@@ -469,20 +575,34 @@ static void acpi_ged_initfn(Object *obj)
sysbus_init_mmio(sbd, &ged_st->regs);
}
+static void ged_reset_hold(Object *obj, ResetType type)
+{
+ AcpiGedState *s = ACPI_GED(obj);
+
+ if (s->pcihp_state.use_acpi_hotplug_bridge) {
+ acpi_pcihp_reset(&s->pcihp_state);
+ }
+}
+
static void acpi_ged_class_init(ObjectClass *class, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(class);
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(class);
AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_CLASS(class);
+ ResettableClass *rc = RESETTABLE_CLASS(class);
+ AcpiGedClass *gedc = ACPI_GED_CLASS(class);
dc->desc = "ACPI Generic Event Device";
device_class_set_props(dc, acpi_ged_properties);
dc->vmsd = &vmstate_acpi_ged;
dc->realize = acpi_ged_realize;
+ hc->pre_plug = acpi_ged_device_pre_plug_cb;
hc->plug = acpi_ged_device_plug_cb;
hc->unplug_request = acpi_ged_unplug_request_cb;
hc->unplug = acpi_ged_unplug_cb;
+ resettable_class_set_parent_phases(rc, NULL, ged_reset_hold, NULL,
+ &gedc->parent_phases);
adevc->ospm_status = acpi_ged_ospm_status;
adevc->send_event = acpi_ged_send_event;
@@ -494,6 +614,7 @@ static const TypeInfo acpi_ged_info = {
.instance_size = sizeof(AcpiGedState),
.instance_init = acpi_ged_initfn,
.class_init = acpi_ged_class_init,
+ .class_size = sizeof(AcpiGedClass),
.interfaces = (const InterfaceInfo[]) {
{ TYPE_HOTPLUG_HANDLER },
{ TYPE_ACPI_DEVICE_IF },
diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c
index 7cec181..40f660c 100644
--- a/hw/acpi/ghes-stub.c
+++ b/hw/acpi/ghes-stub.c
@@ -11,12 +11,13 @@
#include "qemu/osdep.h"
#include "hw/acpi/ghes.h"
-int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address)
+int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
+ uint64_t physical_address)
{
return -1;
}
-bool acpi_ghes_present(void)
+AcpiGhesState *acpi_ghes_get_state(void)
{
- return false;
+ return NULL;
}
diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index b85bb48..0655590 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -30,6 +30,7 @@
#define ACPI_HW_ERROR_FW_CFG_FILE "etc/hardware_errors"
#define ACPI_HW_ERROR_ADDR_FW_CFG_FILE "etc/hardware_errors_addr"
+#define ACPI_HEST_ADDR_FW_CFG_FILE "etc/acpi_table_hest_addr"
/* The max size in bytes for one error block */
#define ACPI_GHES_MAX_RAW_DATA_LENGTH (1 * KiB)
@@ -41,6 +42,12 @@
#define GAS_ADDR_OFFSET 4
/*
+ * ACPI spec 1.0b
+ * 5.2.3 System Description Table Header
+ */
+#define ACPI_DESC_HEADER_OFFSET 36
+
+/*
* The total size of Generic Error Data Entry
* ACPI 6.1/6.2: 18.3.2.7.1 Generic Error Data,
* Table 18-343 Generic Error Data Entry
@@ -61,6 +68,30 @@
#define ACPI_GHES_GESB_SIZE 20
/*
+ * See the memory layout map at docs/specs/acpi_hest_ghes.rst.
+ */
+
+/*
+ * ACPI 6.1: 18.3.2.8 Generic Hardware Error Source version 2
+ * Table 18-344 Generic Hardware Error Source version 2 (GHESv2) Structure
+ */
+#define HEST_GHES_V2_ENTRY_SIZE 92
+
+/*
+ * ACPI 6.1: 18.3.2.8 Generic Hardware Error Source version 2
+ * Table 18-344 Generic Hardware Error Source version 2 (GHESv2) Structure
+ * Read Ack Register
+ */
+#define GHES_READ_ACK_ADDR_OFF 64
+
+/*
+ * ACPI 6.1: 18.3.2.7: Generic Hardware Error Source
+ * Table 18-341 Generic Hardware Error Source Structure
+ * Error Status Address
+ */
+#define GHES_ERR_STATUS_ADDR_OFF 20
+
+/*
* Values for error_severity field
*/
enum AcpiGenericErrorSeverity {
@@ -206,17 +237,18 @@ ghes_gen_err_data_uncorrectable_recoverable(GArray *block,
* Initialize "etc/hardware_errors" and "etc/hardware_errors_addr" fw_cfg blobs.
* See docs/specs/acpi_hest_ghes.rst for blobs format.
*/
-static void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker)
+static void build_ghes_error_table(AcpiGhesState *ags, GArray *hardware_errors,
+ BIOSLinker *linker, int num_sources)
{
int i, error_status_block_offset;
/* Build error_block_address */
- for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
+ for (i = 0; i < num_sources; i++) {
build_append_int_noprefix(hardware_errors, 0, sizeof(uint64_t));
}
/* Build read_ack_register */
- for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
+ for (i = 0; i < num_sources; i++) {
/*
* Initialize the value of read_ack_register to 1, so GHES can be
* writable after (re)boot.
@@ -231,13 +263,13 @@ static void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker)
/* Reserve space for Error Status Data Block */
acpi_data_push(hardware_errors,
- ACPI_GHES_MAX_RAW_DATA_LENGTH * ACPI_GHES_ERROR_SOURCE_COUNT);
+ ACPI_GHES_MAX_RAW_DATA_LENGTH * num_sources);
/* Tell guest firmware to place hardware_errors blob into RAM */
bios_linker_loader_alloc(linker, ACPI_HW_ERROR_FW_CFG_FILE,
hardware_errors, sizeof(uint64_t), false);
- for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) {
+ for (i = 0; i < num_sources; i++) {
/*
* Tell firmware to patch error_block_address entries to point to
* corresponding "Generic Error Status Block"
@@ -251,22 +283,26 @@ static void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker)
i * ACPI_GHES_MAX_RAW_DATA_LENGTH);
}
- /*
- * tell firmware to write hardware_errors GPA into
- * hardware_errors_addr fw_cfg, once the former has been initialized.
- */
- bios_linker_loader_write_pointer(linker, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, 0,
- sizeof(uint64_t),
- ACPI_HW_ERROR_FW_CFG_FILE, 0);
+ if (!ags->use_hest_addr) {
+ /*
+ * Tell firmware to write hardware_errors GPA into
+ * hardware_errors_addr fw_cfg, once the former has been initialized.
+ */
+ bios_linker_loader_write_pointer(linker, ACPI_HW_ERROR_ADDR_FW_CFG_FILE,
+ 0, sizeof(uint64_t),
+ ACPI_HW_ERROR_FW_CFG_FILE, 0);
+ }
}
/* Build Generic Hardware Error Source version 2 (GHESv2) */
-static void build_ghes_v2(GArray *table_data,
- BIOSLinker *linker,
- enum AcpiGhesNotifyType notify,
- uint16_t source_id)
+static void build_ghes_v2_entry(GArray *table_data,
+ BIOSLinker *linker,
+ const AcpiNotificationSourceId *notif_src,
+ uint16_t index, int num_sources)
{
uint64_t address_offset;
+ const uint16_t notify = notif_src->notify;
+ const uint16_t source_id = notif_src->source_id;
/*
* Type:
@@ -297,7 +333,7 @@ static void build_ghes_v2(GArray *table_data,
address_offset + GAS_ADDR_OFFSET,
sizeof(uint64_t),
ACPI_HW_ERROR_FW_CFG_FILE,
- source_id * sizeof(uint64_t));
+ index * sizeof(uint64_t));
/* Notification Structure */
build_ghes_hw_error_notification(table_data, notify);
@@ -317,8 +353,7 @@ static void build_ghes_v2(GArray *table_data,
address_offset + GAS_ADDR_OFFSET,
sizeof(uint64_t),
ACPI_HW_ERROR_FW_CFG_FILE,
- (ACPI_GHES_ERROR_SOURCE_COUNT + source_id)
- * sizeof(uint64_t));
+ (num_sources + index) * sizeof(uint64_t));
/*
* Read Ack Preserve field
@@ -331,23 +366,42 @@ static void build_ghes_v2(GArray *table_data,
}
/* Build Hardware Error Source Table */
-void acpi_build_hest(GArray *table_data, GArray *hardware_errors,
+void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
+ GArray *hardware_errors,
BIOSLinker *linker,
+ const AcpiNotificationSourceId *notif_source,
+ int num_sources,
const char *oem_id, const char *oem_table_id)
{
AcpiTable table = { .sig = "HEST", .rev = 1,
.oem_id = oem_id, .oem_table_id = oem_table_id };
+ uint32_t hest_offset;
+ int i;
- build_ghes_error_table(hardware_errors, linker);
+ hest_offset = table_data->len;
+
+ build_ghes_error_table(ags, hardware_errors, linker, num_sources);
acpi_table_begin(&table, table_data);
/* Error Source Count */
- build_append_int_noprefix(table_data, ACPI_GHES_ERROR_SOURCE_COUNT, 4);
- build_ghes_v2(table_data, linker,
- ACPI_GHES_NOTIFY_SEA, ACPI_HEST_SRC_ID_SEA);
+ build_append_int_noprefix(table_data, num_sources, 4);
+ for (i = 0; i < num_sources; i++) {
+ build_ghes_v2_entry(table_data, linker, &notif_source[i], i, num_sources);
+ }
acpi_table_end(linker, &table);
+
+ if (ags->use_hest_addr) {
+ /*
+ * Tell firmware to write into GPA the address of HEST via fw_cfg,
+ * once initialized.
+ */
+ bios_linker_loader_write_pointer(linker,
+ ACPI_HEST_ADDR_FW_CFG_FILE, 0,
+ sizeof(uint64_t),
+ ACPI_BUILD_TABLE_FILE, hest_offset);
+ }
}
void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
@@ -357,21 +411,20 @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s,
fw_cfg_add_file(s, ACPI_HW_ERROR_FW_CFG_FILE, hardware_error->data,
hardware_error->len);
- /* Create a read-write fw_cfg file for Address */
- fw_cfg_add_file_callback(s, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, NULL, NULL,
- NULL, &(ags->hw_error_le), sizeof(ags->hw_error_le), false);
-
- ags->present = true;
+ if (ags->use_hest_addr) {
+ fw_cfg_add_file_callback(s, ACPI_HEST_ADDR_FW_CFG_FILE, NULL, NULL,
+ NULL, &(ags->hest_addr_le), sizeof(ags->hest_addr_le), false);
+ } else {
+ /* Create a read-write fw_cfg file for Address */
+ fw_cfg_add_file_callback(s, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, NULL, NULL,
+ NULL, &(ags->hw_error_le), sizeof(ags->hw_error_le), false);
+ }
}
static void get_hw_error_offsets(uint64_t ghes_addr,
uint64_t *cper_addr,
uint64_t *read_ack_register_addr)
{
- if (!ghes_addr) {
- return;
- }
-
/*
* non-HEST version supports only one source, so no need to change
* the start offset based on the source ID. Also, we can't validate
@@ -390,35 +443,94 @@ static void get_hw_error_offsets(uint64_t ghes_addr,
*read_ack_register_addr = ghes_addr + sizeof(uint64_t);
}
-static void ghes_record_cper_errors(const void *cper, size_t len,
- uint16_t source_id, Error **errp)
+static void get_ghes_source_offsets(uint16_t source_id,
+ uint64_t hest_addr,
+ uint64_t *cper_addr,
+ uint64_t *read_ack_start_addr,
+ Error **errp)
{
- uint64_t cper_addr = 0, read_ack_register_addr = 0, read_ack_register;
- AcpiGedState *acpi_ged_state;
- AcpiGhesState *ags;
+ uint64_t hest_err_block_addr, hest_read_ack_addr;
+ uint64_t err_source_entry, error_block_addr;
+ uint32_t num_sources, i;
- if (len > ACPI_GHES_MAX_RAW_DATA_LENGTH) {
- error_setg(errp, "GHES CPER record is too big: %zd", len);
- return;
- }
+ hest_addr += ACPI_DESC_HEADER_OFFSET;
- acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
- NULL));
- if (!acpi_ged_state) {
- error_setg(errp, "Can't find ACPI_GED object");
+ cpu_physical_memory_read(hest_addr, &num_sources,
+ sizeof(num_sources));
+ num_sources = le32_to_cpu(num_sources);
+
+ err_source_entry = hest_addr + sizeof(num_sources);
+
+ /*
+ * Currently, HEST Error source navigates only for GHESv2 tables
+ */
+ for (i = 0; i < num_sources; i++) {
+ uint64_t addr = err_source_entry;
+ uint16_t type, src_id;
+
+ cpu_physical_memory_read(addr, &type, sizeof(type));
+ type = le16_to_cpu(type);
+
+ /* For now, we only know the size of GHESv2 table */
+ if (type != ACPI_GHES_SOURCE_GENERIC_ERROR_V2) {
+ error_setg(errp, "HEST: type %d not supported.", type);
+ return;
+ }
+
+ /* Compare CPER source ID at the GHESv2 structure */
+ addr += sizeof(type);
+ cpu_physical_memory_read(addr, &src_id, sizeof(src_id));
+ if (le16_to_cpu(src_id) == source_id) {
+ break;
+ }
+
+ err_source_entry += HEST_GHES_V2_ENTRY_SIZE;
+ }
+ if (i == num_sources) {
+ error_setg(errp, "HEST: Source %d not found.", source_id);
return;
}
- ags = &acpi_ged_state->ghes_state;
- assert(ACPI_GHES_ERROR_SOURCE_COUNT == 1);
- get_hw_error_offsets(le64_to_cpu(ags->hw_error_le),
- &cper_addr, &read_ack_register_addr);
+ /* Navigate through table address pointers */
+ hest_err_block_addr = err_source_entry + GHES_ERR_STATUS_ADDR_OFF +
+ GAS_ADDR_OFFSET;
+
+ cpu_physical_memory_read(hest_err_block_addr, &error_block_addr,
+ sizeof(error_block_addr));
+ error_block_addr = le64_to_cpu(error_block_addr);
- if (!cper_addr) {
- error_setg(errp, "can not find Generic Error Status Block");
+ cpu_physical_memory_read(error_block_addr, cper_addr,
+ sizeof(*cper_addr));
+ *cper_addr = le64_to_cpu(*cper_addr);
+
+ hest_read_ack_addr = err_source_entry + GHES_READ_ACK_ADDR_OFF +
+ GAS_ADDR_OFFSET;
+ cpu_physical_memory_read(hest_read_ack_addr, read_ack_start_addr,
+ sizeof(*read_ack_start_addr));
+ *read_ack_start_addr = le64_to_cpu(*read_ack_start_addr);
+}
+
+NotifierList acpi_generic_error_notifiers =
+ NOTIFIER_LIST_INITIALIZER(acpi_generic_error_notifiers);
+
+void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t len,
+ uint16_t source_id, Error **errp)
+{
+ uint64_t cper_addr = 0, read_ack_register_addr = 0, read_ack_register;
+
+ if (len > ACPI_GHES_MAX_RAW_DATA_LENGTH) {
+ error_setg(errp, "GHES CPER record is too big: %zd", len);
return;
}
+ if (!ags->use_hest_addr) {
+ get_hw_error_offsets(le64_to_cpu(ags->hw_error_le),
+ &cper_addr, &read_ack_register_addr);
+ } else {
+ get_ghes_source_offsets(source_id, le64_to_cpu(ags->hest_addr_le),
+ &cper_addr, &read_ack_register_addr, errp);
+ }
+
cpu_physical_memory_read(read_ack_register_addr,
&read_ack_register, sizeof(read_ack_register));
@@ -440,9 +552,12 @@ static void ghes_record_cper_errors(const void *cper, size_t len,
/* Write the generic error data entry into guest memory */
cpu_physical_memory_write(cper_addr, cper, len);
+
+ notifier_list_notify(&acpi_generic_error_notifiers, &source_id);
}
-int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address)
+int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
+ uint64_t physical_address)
{
/* Memory Error Section Type */
const uint8_t guid[] =
@@ -468,7 +583,7 @@ int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address)
acpi_ghes_build_append_mem_cper(block, physical_address);
/* Report the error */
- ghes_record_cper_errors(block->data, block->len, source_id, &errp);
+ ghes_record_cper_errors(ags, block->data, block->len, source_id, &errp);
g_array_free(block, true);
@@ -480,7 +595,7 @@ int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address)
return 0;
}
-bool acpi_ghes_present(void)
+AcpiGhesState *acpi_ghes_get_state(void)
{
AcpiGedState *acpi_ged_state;
AcpiGhesState *ags;
@@ -489,8 +604,12 @@ bool acpi_ghes_present(void)
NULL));
if (!acpi_ged_state) {
- return false;
+ return NULL;
}
ags = &acpi_ged_state->ghes_state;
- return ags->present;
+
+ if (!ags->hw_error_le && !ags->hest_addr_le) {
+ return NULL;
+ }
+ return ags;
}
diff --git a/hw/acpi/ghes_cper.c b/hw/acpi/ghes_cper.c
new file mode 100644
index 0000000..31cb2ff
--- /dev/null
+++ b/hw/acpi/ghes_cper.c
@@ -0,0 +1,40 @@
+/*
+ * CPER payload parser for error injection
+ *
+ * Copyright(C) 2024-2025 Huawei LTD.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+
+#include "qemu/base64.h"
+#include "qemu/error-report.h"
+#include "qemu/uuid.h"
+#include "qapi/qapi-commands-acpi-hest.h"
+#include "hw/acpi/ghes.h"
+
+void qmp_inject_ghes_v2_error(const char *qmp_cper, Error **errp)
+{
+ AcpiGhesState *ags;
+ uint8_t *cper;
+ size_t len;
+
+ ags = acpi_ghes_get_state();
+ if (!ags) {
+ return;
+ }
+
+ cper = qbase64_decode(qmp_cper, -1, &len, errp);
+ if (!cper) {
+ error_setg(errp, "missing GHES CPER payload");
+ return;
+ }
+
+ ghes_record_cper_errors(ags, cper, len, ACPI_HEST_SRC_ID_QMP, errp);
+
+ g_free(cper);
+}
diff --git a/hw/acpi/ghes_cper_stub.c b/hw/acpi/ghes_cper_stub.c
new file mode 100644
index 0000000..b16be73
--- /dev/null
+++ b/hw/acpi/ghes_cper_stub.c
@@ -0,0 +1,20 @@
+/*
+ * Stub interface for CPER payload parser for error injection
+ *
+ * Copyright(C) 2024-2025 Huawei LTD.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-acpi-hest.h"
+#include "hw/acpi/ghes.h"
+
+void qmp_inject_ghes_v2_error(const char *cper, Error **errp)
+{
+ error_setg(errp, "GHES QMP error inject is not compiled in");
+}
diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c
index 967b674..2b3b493 100644
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -322,9 +322,10 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, qemu_irq sci_irq)
}
if (pm->acpi_pci_hotplug.use_acpi_hotplug_bridge) {
+ object_property_set_link(OBJECT(lpc_pci), "bus",
+ OBJECT(pci_get_bus(lpc_pci)), &error_abort);
acpi_pcihp_init(OBJECT(lpc_pci),
&pm->acpi_pci_hotplug,
- pci_get_bus(lpc_pci),
pci_address_space_io(lpc_pci),
ACPI_PCIHP_ADDR_ICH9);
@@ -428,6 +429,10 @@ void ich9_pm_add_properties(Object *obj, ICH9LPCPMRegs *pm)
object_property_add_uint32_ptr(obj, ACPI_PM_PROP_PM_IO_BASE,
&pm->pm_io_base, OBJ_PROP_FLAG_READ);
+ object_property_add_link(obj, "bus", TYPE_PCI_BUS,
+ (Object **)&pm->acpi_pci_hotplug.root,
+ object_property_allow_set_link,
+ OBJ_PROP_LINK_STRONG);
object_property_add(obj, ACPI_PM_PROP_GPE0_BLK, "uint32",
ich9_pm_get_gpe0_blk,
NULL, NULL, pm);
diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build
index 73f02b9..56b5d1e 100644
--- a/hw/acpi/meson.build
+++ b/hw/acpi/meson.build
@@ -34,4 +34,6 @@ endif
system_ss.add(when: 'CONFIG_ACPI', if_false: files('acpi-stub.c', 'aml-build-stub.c', 'ghes-stub.c', 'acpi_interface.c'))
system_ss.add(when: 'CONFIG_ACPI_PCI_BRIDGE', if_false: files('pci-bridge-stub.c'))
system_ss.add_all(when: 'CONFIG_ACPI', if_true: acpi_ss)
+system_ss.add(when: 'CONFIG_GHES_CPER', if_true: files('ghes_cper.c'))
+system_ss.add(when: 'CONFIG_GHES_CPER', if_false: files('ghes_cper_stub.c'))
system_ss.add(files('acpi-qmp-cmds.c'))
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 9ba9080..732d613 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -535,7 +535,7 @@ nvdimm_dsm_no_payload(uint32_t func_ret_status, hwaddr dsm_mem_addr)
#define NVDIMM_QEMU_RSVD_HANDLE_ROOT 0x10000
-/* Read FIT data, defined in docs/specs/acpi_nvdimm.txt. */
+/* Read FIT data, defined in docs/specs/acpi_nvdimm.rst. */
static void nvdimm_dsm_func_read_fit(NVDIMMState *state, NvdimmDsmIn *in,
hwaddr dsm_mem_addr)
{
diff --git a/hw/acpi/pci-bridge.c b/hw/acpi/pci-bridge.c
index 7baa703..394a919 100644
--- a/hw/acpi/pci-bridge.c
+++ b/hw/acpi/pci-bridge.c
@@ -35,3 +35,57 @@ void build_pci_bridge_aml(AcpiDevAmlIf *adev, Aml *scope)
}
}
}
+
+Aml *build_pci_bridge_edsm(void)
+{
+ Aml *method, *ifctx;
+ Aml *zero = aml_int(0);
+ Aml *func = aml_arg(2);
+ Aml *ret = aml_local(0);
+ Aml *aidx = aml_local(1);
+ Aml *params = aml_arg(4);
+
+ method = aml_method("EDSM", 5, AML_SERIALIZED);
+
+ /* get supported functions */
+ ifctx = aml_if(aml_equal(func, zero));
+ {
+ /* 1: have supported functions */
+ /* 7: support for function 7 */
+ const uint8_t caps = 1 | BIT(7);
+ build_append_pci_dsm_func0_common(ifctx, ret);
+ aml_append(ifctx, aml_store(aml_int(caps), aml_index(ret, zero)));
+ aml_append(ifctx, aml_return(ret));
+ }
+ aml_append(method, ifctx);
+
+ /* handle specific functions requests */
+ /*
+ * PCI Firmware Specification 3.1
+ * 4.6.7. _DSM for Naming a PCI or PCI Express Device Under
+ * Operating Systems
+ */
+ ifctx = aml_if(aml_equal(func, aml_int(7)));
+ {
+ Aml *pkg = aml_package(2);
+ aml_append(pkg, zero);
+ /* optional, if not impl. should return null string */
+ aml_append(pkg, aml_string("%s", ""));
+ aml_append(ifctx, aml_store(pkg, ret));
+
+ /*
+ * IASL is fine when initializing Package with computational data,
+ * however it makes guest unhappy /it fails to process such AML/.
+ * So use runtime assignment to set acpi-index after initializer
+ * to make OSPM happy.
+ */
+ aml_append(ifctx,
+ aml_store(aml_derefof(aml_index(params, aml_int(0))), aidx));
+ aml_append(ifctx, aml_store(aidx, aml_index(ret, zero)));
+ aml_append(ifctx, aml_return(ret));
+ }
+ aml_append(method, ifctx);
+
+ return method;
+}
+
diff --git a/hw/acpi/pci.c b/hw/acpi/pci.c
index d511a85..2228f12 100644
--- a/hw/acpi/pci.c
+++ b/hw/acpi/pci.c
@@ -301,3 +301,53 @@ void build_srat_generic_affinity_structures(GArray *table_data)
object_child_foreach_recursive(object_get_root(), build_acpi_generic_port,
table_data);
}
+
+Aml *build_pci_host_bridge_osc_method(bool enable_native_pcie_hotplug)
+{
+ Aml *if_ctx;
+ Aml *if_ctx2;
+ Aml *else_ctx;
+ Aml *method;
+ Aml *a_cwd1 = aml_name("CDW1");
+ Aml *a_ctrl = aml_local(0);
+
+ method = aml_method("_OSC", 4, AML_NOTSERIALIZED);
+ aml_append(method, aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1"));
+
+ if_ctx = aml_if(aml_equal(
+ aml_arg(0), aml_touuid("33DB4D5B-1FF7-401C-9657-7441C03DD766")));
+ aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2"));
+ aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3"));
+
+ aml_append(if_ctx, aml_store(aml_name("CDW3"), a_ctrl));
+
+ /*
+ * Always allow native PME, AER (no dependencies)
+ * Allow SHPC (PCI bridges can have SHPC controller)
+ * Disable PCIe Native Hot-plug if ACPI PCI Hot-plug is enabled.
+ */
+ aml_append(if_ctx, aml_and(a_ctrl,
+ aml_int(0x1E | (enable_native_pcie_hotplug ? 0x1 : 0x0)), a_ctrl));
+
+ if_ctx2 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(1))));
+ /* Unknown revision */
+ aml_append(if_ctx2, aml_or(a_cwd1, aml_int(0x08), a_cwd1));
+ aml_append(if_ctx, if_ctx2);
+
+ if_ctx2 = aml_if(aml_lnot(aml_equal(aml_name("CDW3"), a_ctrl)));
+ /* Capabilities bits were masked */
+ aml_append(if_ctx2, aml_or(a_cwd1, aml_int(0x10), a_cwd1));
+ aml_append(if_ctx, if_ctx2);
+
+ /* Update DWORD3 in the buffer */
+ aml_append(if_ctx, aml_store(a_ctrl, aml_name("CDW3")));
+ aml_append(method, if_ctx);
+
+ else_ctx = aml_else();
+ /* Unrecognized UUID */
+ aml_append(else_ctx, aml_or(a_cwd1, aml_int(4), a_cwd1));
+ aml_append(method, else_ctx);
+
+ aml_append(method, aml_return(aml_arg(3)));
+ return method;
+}
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index aac9001..4922bbc 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -3,7 +3,7 @@
*
* QEMU supports PCI hotplug via ACPI. This module
* implements the interface between QEMU and the ACPI BIOS.
- * Interface specification - see docs/specs/acpi_pci_hotplug.txt
+ * Interface specification - see docs/specs/acpi_pci_hotplug.rst
*
* Copyright (c) 2013, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
* Copyright (c) 2006 Fabrice Bellard
@@ -26,7 +26,8 @@
#include "qemu/osdep.h"
#include "hw/acpi/pcihp.h"
-
+#include "hw/acpi/aml-build.h"
+#include "hw/acpi/acpi_aml_interface.h"
#include "hw/pci-host/i440fx.h"
#include "hw/pci/pci.h"
#include "hw/pci/pci_bridge.h"
@@ -39,9 +40,9 @@
#include "migration/vmstate.h"
#include "qapi/error.h"
#include "qom/qom-qobject.h"
+#include "qobject/qnum.h"
#include "trace.h"
-#define ACPI_PCIHP_SIZE 0x0018
#define PCI_UP_BASE 0x0000
#define PCI_DOWN_BASE 0x0004
#define PCI_EJ_BASE 0x0008
@@ -97,10 +98,10 @@ static void *acpi_set_bsel(PCIBus *bus, void *opaque)
return info;
}
-static void acpi_set_pci_info(bool has_bridge_hotplug)
+static void acpi_set_pci_info(AcpiPciHpState *s)
{
static bool bsel_is_set;
- Object *host = acpi_get_i386_pci_host();
+ bool has_bridge_hotplug = s->use_acpi_hotplug_bridge;
PCIBus *bus;
BSELInfo info = { .bsel_alloc = ACPI_PCIHP_BSEL_DEFAULT,
.has_bridge_hotplug = has_bridge_hotplug };
@@ -110,11 +111,8 @@ static void acpi_set_pci_info(bool has_bridge_hotplug)
}
bsel_is_set = true;
- if (!host) {
- return;
- }
- bus = PCI_HOST_BRIDGE(host)->bus;
+ bus = s->root;
if (bus) {
/* Scan all PCI buses. Set property to enable acpi based hotplug. */
pci_for_each_bus_depth_first(bus, acpi_set_bsel, NULL, &info);
@@ -264,7 +262,7 @@ static void acpi_pcihp_update(AcpiPciHpState *s)
void acpi_pcihp_reset(AcpiPciHpState *s)
{
- acpi_set_pci_info(s->use_acpi_hotplug_bridge);
+ acpi_set_pci_info(s);
acpi_pcihp_update(s);
}
@@ -495,13 +493,13 @@ static const MemoryRegionOps acpi_pcihp_io_ops = {
},
};
-void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
+void acpi_pcihp_init(Object *owner, AcpiPciHpState *s,
MemoryRegion *io, uint16_t io_base)
{
s->io_len = ACPI_PCIHP_SIZE;
s->io_base = io_base;
- s->root = root_bus;
+ assert(s->root);
memory_region_init_io(&s->io, owner, &acpi_pcihp_io_ops, s,
"acpi-pci-hotplug", s->io_len);
@@ -513,6 +511,425 @@ void acpi_pcihp_init(Object *owner, AcpiPciHpState *s, PCIBus *root_bus,
OBJ_PROP_FLAG_READ);
}
+void build_append_pci_dsm_func0_common(Aml *ctx, Aml *retvar)
+{
+ Aml *UUID, *ifctx1;
+ uint8_t byte_list[1] = { 0 }; /* nothing supported yet */
+
+ aml_append(ctx, aml_store(aml_buffer(1, byte_list), retvar));
+ /*
+ * PCI Firmware Specification 3.1
+ * 4.6. _DSM Definitions for PCI
+ */
+ UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D");
+ ifctx1 = aml_if(aml_lnot(aml_equal(aml_arg(0), UUID)));
+ {
+ /* call is for unsupported UUID, bail out */
+ aml_append(ifctx1, aml_return(retvar));
+ }
+ aml_append(ctx, ifctx1);
+
+ ifctx1 = aml_if(aml_lless(aml_arg(1), aml_int(2)));
+ {
+ /* call is for unsupported REV, bail out */
+ aml_append(ifctx1, aml_return(retvar));
+ }
+ aml_append(ctx, ifctx1);
+}
+
+static Aml *aml_pci_pdsm(void)
+{
+ Aml *method, *ifctx, *ifctx1;
+ Aml *ret = aml_local(0);
+ Aml *caps = aml_local(1);
+ Aml *acpi_index = aml_local(2);
+ Aml *zero = aml_int(0);
+ Aml *one = aml_int(1);
+ Aml *not_supp = aml_int(0xFFFFFFFF);
+ Aml *func = aml_arg(2);
+ Aml *params = aml_arg(4);
+ Aml *bnum = aml_derefof(aml_index(params, aml_int(0)));
+ Aml *sunum = aml_derefof(aml_index(params, aml_int(1)));
+
+ method = aml_method("PDSM", 5, AML_SERIALIZED);
+
+ /* get supported functions */
+ ifctx = aml_if(aml_equal(func, zero));
+ {
+ build_append_pci_dsm_func0_common(ifctx, ret);
+
+ aml_append(ifctx, aml_store(zero, caps));
+ aml_append(ifctx,
+ aml_store(aml_call2("AIDX", bnum, sunum), acpi_index));
+ /*
+ * advertise function 7 if device has acpi-index
+ * acpi_index values:
+ * 0: not present (default value)
+ * FFFFFFFF: not supported (old QEMU without PIDX reg)
+ * other: device's acpi-index
+ */
+ ifctx1 = aml_if(aml_lnot(
+ aml_or(aml_equal(acpi_index, zero),
+ aml_equal(acpi_index, not_supp), NULL)
+ ));
+ {
+ /* have supported functions */
+ aml_append(ifctx1, aml_or(caps, one, caps));
+ /* support for function 7 */
+ aml_append(ifctx1,
+ aml_or(caps, aml_shiftleft(one, aml_int(7)), caps));
+ }
+ aml_append(ifctx, ifctx1);
+
+ aml_append(ifctx, aml_store(caps, aml_index(ret, zero)));
+ aml_append(ifctx, aml_return(ret));
+ }
+ aml_append(method, ifctx);
+
+ /* handle specific functions requests */
+ /*
+ * PCI Firmware Specification 3.1
+ * 4.6.7. _DSM for Naming a PCI or PCI Express Device Under
+ * Operating Systems
+ */
+ ifctx = aml_if(aml_equal(func, aml_int(7)));
+ {
+ Aml *pkg = aml_package(2);
+
+ aml_append(ifctx, aml_store(aml_call2("AIDX", bnum, sunum), acpi_index));
+ aml_append(ifctx, aml_store(pkg, ret));
+ /*
+ * Windows calls func=7 without checking if it's available,
+ * as workaround Microsoft has suggested to return invalid for func7
+ * Package, so return 2 elements package but only initialize elements
+ * when acpi_index is supported and leave them uninitialized, which
+ * leads elements to being Uninitialized ObjectType and should trip
+ * Windows into discarding result as an unexpected and prevent setting
+ * bogus 'PCI Label' on the device.
+ */
+ ifctx1 = aml_if(aml_lnot(aml_lor(
+ aml_equal(acpi_index, zero), aml_equal(acpi_index, not_supp)
+ )));
+ {
+ aml_append(ifctx1, aml_store(acpi_index, aml_index(ret, zero)));
+ /*
+ * optional, if not impl. should return null string
+ */
+ aml_append(ifctx1, aml_store(aml_string("%s", ""),
+ aml_index(ret, one)));
+ }
+ aml_append(ifctx, ifctx1);
+
+ aml_append(ifctx, aml_return(ret));
+ }
+
+ aml_append(method, ifctx);
+ return method;
+}
+
+void build_acpi_pci_hotplug(Aml *table, AmlRegionSpace rs, uint64_t pcihp_addr)
+{
+ Aml *scope;
+ Aml *field;
+ Aml *method;
+
+ scope = aml_scope("_SB.PCI0");
+
+ aml_append(scope,
+ aml_operation_region("PCST", rs, aml_int(pcihp_addr), 0x08));
+ field = aml_field("PCST", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS);
+ aml_append(field, aml_named_field("PCIU", 32));
+ aml_append(field, aml_named_field("PCID", 32));
+ aml_append(scope, field);
+
+ aml_append(scope,
+ aml_operation_region("SEJ", rs,
+ aml_int(pcihp_addr + ACPI_PCIHP_SEJ_BASE), 0x04));
+ field = aml_field("SEJ", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS);
+ aml_append(field, aml_named_field("B0EJ", 32));
+ aml_append(scope, field);
+
+ aml_append(scope,
+ aml_operation_region("BNMR", rs,
+ aml_int(pcihp_addr + ACPI_PCIHP_BNMR_BASE), 0x08));
+ field = aml_field("BNMR", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS);
+ aml_append(field, aml_named_field("BNUM", 32));
+ aml_append(field, aml_named_field("PIDX", 32));
+ aml_append(scope, field);
+
+ aml_append(scope, aml_mutex("BLCK", 0));
+
+ method = aml_method("PCEJ", 2, AML_NOTSERIALIZED);
+ aml_append(method, aml_acquire(aml_name("BLCK"), 0xFFFF));
+ aml_append(method, aml_store(aml_arg(0), aml_name("BNUM")));
+ aml_append(method,
+ aml_store(aml_shiftleft(aml_int(1), aml_arg(1)), aml_name("B0EJ")));
+ aml_append(method, aml_release(aml_name("BLCK")));
+ aml_append(method, aml_return(aml_int(0)));
+ aml_append(scope, method);
+
+ method = aml_method("AIDX", 2, AML_NOTSERIALIZED);
+ aml_append(method, aml_acquire(aml_name("BLCK"), 0xFFFF));
+ aml_append(method, aml_store(aml_arg(0), aml_name("BNUM")));
+ aml_append(method,
+ aml_store(aml_shiftleft(aml_int(1), aml_arg(1)), aml_name("PIDX")));
+ aml_append(method, aml_store(aml_name("PIDX"), aml_local(0)));
+ aml_append(method, aml_release(aml_name("BLCK")));
+ aml_append(method, aml_return(aml_local(0)));
+ aml_append(scope, method);
+
+ aml_append(scope, aml_pci_pdsm());
+
+ aml_append(table, scope);
+}
+
+/* Reserve PCIHP resources */
+void build_append_pcihp_resources(Aml *scope /* \\_SB.PCI0 */,
+ uint64_t io_addr, uint64_t io_len)
+{
+ Aml *dev, *crs;
+
+ dev = aml_device("PHPR");
+ aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A06")));
+ aml_append(dev,
+ aml_name_decl("_UID", aml_string("PCI Hotplug resources")));
+ /* device present, functioning, decoding, not shown in UI */
+ aml_append(dev, aml_name_decl("_STA", aml_int(0xB)));
+ crs = aml_resource_template();
+ aml_append(crs, aml_io(AML_DECODE16, io_addr, io_addr, 1, io_len));
+ aml_append(dev, aml_name_decl("_CRS", crs));
+ aml_append(scope, dev);
+}
+
+bool build_append_notification_callback(Aml *parent_scope, const PCIBus *bus)
+{
+ Aml *method;
+ PCIBus *sec;
+ QObject *bsel;
+ int nr_notifiers = 0;
+ GQueue *pcnt_bus_list = g_queue_new();
+
+ QLIST_FOREACH(sec, &bus->child, sibling) {
+ Aml *br_scope = aml_scope("S%.02X", sec->parent_dev->devfn);
+ if (pci_bus_is_root(sec)) {
+ continue;
+ }
+ nr_notifiers = nr_notifiers +
+ build_append_notification_callback(br_scope, sec);
+ /*
+ * add new child scope to parent
+ * and keep track of bus that have PCNT,
+ * bus list is used later to call children PCNTs from this level PCNT
+ */
+ if (nr_notifiers) {
+ g_queue_push_tail(pcnt_bus_list, sec);
+ aml_append(parent_scope, br_scope);
+ }
+ }
+
+ /*
+ * Append PCNT method to notify about events on local and child buses.
+ * ps: hostbridge might not have hotplug (bsel) enabled but might have
+ * child bridges that do have bsel.
+ */
+ method = aml_method("PCNT", 0, AML_NOTSERIALIZED);
+
+ /* If bus supports hotplug select it and notify about local events */
+ bsel = object_property_get_qobject(OBJECT(bus), ACPI_PCIHP_PROP_BSEL, NULL);
+ if (bsel) {
+ uint64_t bsel_val = qnum_get_uint(qobject_to(QNum, bsel));
+
+ aml_append(method, aml_store(aml_int(bsel_val), aml_name("BNUM")));
+ aml_append(method, aml_call2("DVNT", aml_name("PCIU"),
+ aml_int(1))); /* Device Check */
+ aml_append(method, aml_call2("DVNT", aml_name("PCID"),
+ aml_int(3))); /* Eject Request */
+ nr_notifiers++;
+ }
+
+ /* Notify about child bus events in any case */
+ while ((sec = g_queue_pop_head(pcnt_bus_list))) {
+ aml_append(method, aml_name("^S%.02X.PCNT", sec->parent_dev->devfn));
+ }
+
+ aml_append(parent_scope, method);
+ qobject_unref(bsel);
+ g_queue_free(pcnt_bus_list);
+ return !!nr_notifiers;
+}
+
+static Aml *aml_pci_device_dsm(void)
+{
+ Aml *method;
+
+ method = aml_method("_DSM", 4, AML_SERIALIZED);
+ {
+ Aml *params = aml_local(0);
+ Aml *pkg = aml_package(2);
+ aml_append(pkg, aml_int(0));
+ aml_append(pkg, aml_int(0));
+ aml_append(method, aml_store(pkg, params));
+ aml_append(method,
+ aml_store(aml_name("BSEL"), aml_index(params, aml_int(0))));
+ aml_append(method,
+ aml_store(aml_name("ASUN"), aml_index(params, aml_int(1))));
+ aml_append(method,
+ aml_return(aml_call5("PDSM", aml_arg(0), aml_arg(1),
+ aml_arg(2), aml_arg(3), params))
+ );
+ }
+ return method;
+}
+
+static Aml *aml_pci_static_endpoint_dsm(PCIDevice *pdev)
+{
+ Aml *method;
+
+ g_assert(pdev->acpi_index != 0);
+ method = aml_method("_DSM", 4, AML_SERIALIZED);
+ {
+ Aml *params = aml_local(0);
+ Aml *pkg = aml_package(1);
+ aml_append(pkg, aml_int(pdev->acpi_index));
+ aml_append(method, aml_store(pkg, params));
+ aml_append(method,
+ aml_return(aml_call5("EDSM", aml_arg(0), aml_arg(1),
+ aml_arg(2), aml_arg(3), params))
+ );
+ }
+ return method;
+}
+
+static void build_append_pcihp_notify_entry(Aml *method, int slot)
+{
+ Aml *if_ctx;
+ int32_t devfn = PCI_DEVFN(slot, 0);
+
+ if_ctx = aml_if(aml_and(aml_arg(0), aml_int(0x1U << slot), NULL));
+ aml_append(if_ctx, aml_notify(aml_name("S%.02X", devfn), aml_arg(1)));
+ aml_append(method, if_ctx);
+}
+
+static bool is_devfn_ignored_generic(const int devfn, const PCIBus *bus)
+{
+ const PCIDevice *pdev = bus->devices[devfn];
+
+ if (PCI_FUNC(devfn)) {
+ if (IS_PCI_BRIDGE(pdev)) {
+ /*
+ * Ignore only hotplugged PCI bridges on !0 functions, but
+ * allow describing cold plugged bridges on all functions
+ */
+ if (DEVICE(pdev)->hotplugged) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+static bool is_devfn_ignored_hotplug(const int devfn, const PCIBus *bus)
+{
+ PCIDevice *pdev = bus->devices[devfn];
+ if (pdev) {
+ return is_devfn_ignored_generic(devfn, bus) ||
+ !DEVICE_GET_CLASS(pdev)->hotpluggable ||
+ /* Cold plugged bridges aren't themselves hot-pluggable */
+ (IS_PCI_BRIDGE(pdev) && !DEVICE(pdev)->hotplugged);
+ } else { /* non populated slots */
+ /*
+ * hotplug is supported only for non-multifunction device
+ * so generate device description only for function 0
+ */
+ if (PCI_FUNC(devfn) ||
+ (pci_bus_is_express(bus) && PCI_SLOT(devfn) > 0)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void build_append_pcihp_slots(Aml *parent_scope, PCIBus *bus)
+{
+ int devfn;
+ Aml *dev, *notify_method = NULL, *method;
+ QObject *bsel = object_property_get_qobject(OBJECT(bus),
+ ACPI_PCIHP_PROP_BSEL, NULL);
+ uint64_t bsel_val = qnum_get_uint(qobject_to(QNum, bsel));
+ qobject_unref(bsel);
+
+ aml_append(parent_scope, aml_name_decl("BSEL", aml_int(bsel_val)));
+ notify_method = aml_method("DVNT", 2, AML_NOTSERIALIZED);
+
+ for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
+ int slot = PCI_SLOT(devfn);
+ int adr = slot << 16 | PCI_FUNC(devfn);
+
+ if (is_devfn_ignored_hotplug(devfn, bus)) {
+ continue;
+ }
+
+ if (bus->devices[devfn]) {
+ dev = aml_scope("S%.02X", devfn);
+ } else {
+ dev = aml_device("S%.02X", devfn);
+ aml_append(dev, aml_name_decl("_ADR", aml_int(adr)));
+ }
+
+ /*
+ * Can't declare _SUN here for every device as it changes 'slot'
+ * enumeration order in linux kernel, so use another variable for it
+ */
+ aml_append(dev, aml_name_decl("ASUN", aml_int(slot)));
+ aml_append(dev, aml_pci_device_dsm());
+
+ aml_append(dev, aml_name_decl("_SUN", aml_int(slot)));
+ /* add _EJ0 to make slot hotpluggable */
+ method = aml_method("_EJ0", 1, AML_NOTSERIALIZED);
+ aml_append(method,
+ aml_call2("PCEJ", aml_name("BSEL"), aml_name("_SUN"))
+ );
+ aml_append(dev, method);
+
+ build_append_pcihp_notify_entry(notify_method, slot);
+
+ /* device descriptor has been composed, add it into parent context */
+ aml_append(parent_scope, dev);
+ }
+ aml_append(parent_scope, notify_method);
+}
+
+void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus)
+{
+ int devfn;
+ Aml *dev;
+
+ for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
+ /* ACPI spec: 1.0b: Table 6-2 _ADR Object Bus Types, PCI type */
+ int adr = PCI_SLOT(devfn) << 16 | PCI_FUNC(devfn);
+ PCIDevice *pdev = bus->devices[devfn];
+
+ if (!pdev || is_devfn_ignored_generic(devfn, bus)) {
+ continue;
+ }
+
+ /* start to compose PCI device descriptor */
+ dev = aml_device("S%.02X", devfn);
+ aml_append(dev, aml_name_decl("_ADR", aml_int(adr)));
+
+ call_dev_aml_func(DEVICE(bus->devices[devfn]), dev);
+ /* add _DSM if device has acpi-index set */
+ if (pdev->acpi_index &&
+ !object_property_get_bool(OBJECT(pdev), "hotpluggable",
+ &error_abort)) {
+ aml_append(dev, aml_pci_static_endpoint_dsm(pdev));
+ }
+
+ /* device descriptor has been composed, add it into parent context */
+ aml_append(parent_scope, dev);
+ }
+}
+
const VMStateDescription vmstate_acpi_pcihp_pci_status = {
.name = "acpi_pcihp_pci_status",
.version_id = 1,
diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c
index d98b80d..7a18f18 100644
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -567,7 +567,8 @@ static void piix4_acpi_system_hot_add_init(MemoryRegion *parent,
if (s->acpi_pci_hotplug.use_acpi_hotplug_bridge ||
s->acpi_pci_hotplug.use_acpi_root_pci_hotplug) {
- acpi_pcihp_init(OBJECT(s), &s->acpi_pci_hotplug, bus, parent,
+ object_property_set_link(OBJECT(s), "bus", OBJECT(bus), &error_abort);
+ acpi_pcihp_init(OBJECT(s), &s->acpi_pci_hotplug, parent,
ACPI_PCIHP_ADDR_PIIX4);
qbus_set_hotplug_handler(BUS(pci_get_bus(PCI_DEVICE(s))), OBJECT(s));
}
@@ -611,6 +612,8 @@ static const Property piix4_pm_properties[] = {
acpi_pci_hotplug.use_acpi_hotplug_bridge, true),
DEFINE_PROP_BOOL(ACPI_PM_PROP_ACPI_PCI_ROOTHP, PIIX4PMState,
acpi_pci_hotplug.use_acpi_root_pci_hotplug, true),
+ DEFINE_PROP_LINK("bus", PIIX4PMState, acpi_pci_hotplug.root,
+ TYPE_PCI_BUS, PCIBus *),
DEFINE_PROP_BOOL("memory-hotplug-support", PIIX4PMState,
acpi_memory_hotplug.is_enabled, true),
DEFINE_PROP_BOOL("smm-compat", PIIX4PMState, smm_compat, false),
diff --git a/hw/acpi/vmgenid.c b/hw/acpi/vmgenid.c
index fac3d6d..33c35c8 100644
--- a/hw/acpi/vmgenid.c
+++ b/hw/acpi/vmgenid.c
@@ -38,7 +38,7 @@ void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid,
guid_le = qemu_uuid_bswap(vms->guid);
/* The GUID is written at a fixed offset into the fw_cfg file
* in order to implement the "OVMF SDT Header probe suppressor"
- * see docs/specs/vmgenid.txt for more details
+ * see docs/specs/vmgenid.rst for more details
*/
g_array_insert_vals(guid, VMGENID_GUID_OFFSET, guid_le.data,
ARRAY_SIZE(guid_le.data));
@@ -101,7 +101,7 @@ void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid,
* < 4GB, but write 64 bits anyway.
* The address that is patched in is offset in order to implement
* the "OVMF SDT Header probe suppressor"
- * see docs/specs/vmgenid.txt for more details.
+ * see docs/specs/vmgenid.rst for more details.
*/
bios_linker_loader_write_pointer(linker,
VMGENID_ADDR_FW_CFG_FILE, 0, sizeof(uint64_t),
@@ -153,7 +153,7 @@ static void vmgenid_update_guest(VmGenIdState *vms)
guid_le = qemu_uuid_bswap(vms->guid);
/* The GUID is written at a fixed offset into the fw_cfg file
* in order to implement the "OVMF SDT Header probe suppressor"
- * see docs/specs/vmgenid.txt for more details.
+ * see docs/specs/vmgenid.rst for more details.
*/
cpu_physical_memory_write(vmgenid_addr, guid_le.data,
sizeof(guid_le.data));
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index a55b44d..b44b85f 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -5,9 +5,6 @@ config ARM_VIRT
depends on TCG || KVM || HVF
imply PCI_DEVICES
imply TEST_DEVICES
- imply VFIO_AMD_XGBE
- imply VFIO_PLATFORM
- imply VFIO_XGMAC
imply TPM_TIS_SYSBUS
imply TPM_TIS_I2C
imply NVDIMM
@@ -34,6 +31,8 @@ config ARM_VIRT
select ACPI_HW_REDUCED
select ACPI_APEI
select ACPI_VIOT
+ select ACPI_PCIHP
+ select ACPI_PCI_BRIDGE
select VIRTIO_MEM_SUPPORTED
select ACPI_CXL
select ACPI_HMAT
@@ -95,6 +94,12 @@ config INTEGRATOR
select PL181 # display
select SMC91C111
+config MAX78000FTHR
+ bool
+ default y
+ depends on TCG && ARM
+ select MAX78000_SOC
+
config MPS3R
bool
default y
@@ -147,7 +152,6 @@ config OMAP
bool
select FRAMEBUFFER
select I2C
- select NAND
select PFLASH_CFI01
select SD
select SERIAL_MM
@@ -358,6 +362,15 @@ config ALLWINNER_R40
select USB_EHCI_SYSBUS
select SD
+config MAX78000_SOC
+ bool
+ select ARM_V7M
+ select MAX78000_ICC
+ select MAX78000_UART
+ select MAX78000_GCR
+ select MAX78000_TRNG
+ select MAX78000_AES
+
config RASPI
bool
default y
@@ -528,11 +541,13 @@ config ASPEED_SOC
bool
default y
depends on TCG && ARM
+ imply PCI_DEVICES
select DS1338
select FTGMAC100
select I2C
select DPS310
select PCA9552
+ select PCA9554
select SERIAL_MM
select SMBUS_EEPROM
select PCA954X
@@ -547,6 +562,8 @@ config ASPEED_SOC
select MAX31785
select FSI_APB2OPB_ASPEED
select AT24C
+ select PCI_EXPRESS
+ select PCI_EXPRESS_ASPEED
config MPS2
bool
diff --git a/hw/arm/allwinner-r40.c b/hw/arm/allwinner-r40.c
index 0bf7008..c8eda39 100644
--- a/hw/arm/allwinner-r40.c
+++ b/hw/arm/allwinner-r40.c
@@ -20,7 +20,6 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
-#include "qemu/bswap.h"
#include "qemu/module.h"
#include "qemu/units.h"
#include "hw/boards.h"
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index d0b3336..21ee62f 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -19,15 +19,14 @@
#include "hw/i2c/i2c_mux_pca954x.h"
#include "hw/i2c/smbus_eeprom.h"
#include "hw/gpio/pca9552.h"
+#include "hw/gpio/pca9554.h"
#include "hw/nvram/eeprom_at24c.h"
#include "hw/sensor/tmp105.h"
#include "hw/misc/led.h"
#include "hw/qdev-properties.h"
#include "system/block-backend.h"
#include "system/reset.h"
-#include "hw/loader.h"
#include "qemu/error-report.h"
-#include "qemu/datadir.h"
#include "qemu/units.h"
#include "hw/qdev-clock.h"
#include "system/system.h"
@@ -197,9 +196,12 @@ struct AspeedMachineState {
#define FUJI_BMC_HW_STRAP2 0x00000000
/* Bletchley hardware value */
-/* TODO: Leave same as EVB for now. */
-#define BLETCHLEY_BMC_HW_STRAP1 AST2600_EVB_HW_STRAP1
-#define BLETCHLEY_BMC_HW_STRAP2 AST2600_EVB_HW_STRAP2
+#define BLETCHLEY_BMC_HW_STRAP1 0x00002000
+#define BLETCHLEY_BMC_HW_STRAP2 0x00000801
+
+/* GB200NVL hardware value */
+#define GB200NVL_BMC_HW_STRAP1 AST2600_EVB_HW_STRAP1
+#define GB200NVL_BMC_HW_STRAP2 AST2600_EVB_HW_STRAP2
/* Qualcomm DC-SCM hardware value */
#define QCOM_DC_SCM_V1_BMC_HW_STRAP1 0x00000000
@@ -259,102 +261,6 @@ static void aspeed_reset_secondary(ARMCPU *cpu,
cpu_set_pc(cs, info->smp_loader_start);
}
-static void write_boot_rom(BlockBackend *blk, hwaddr addr, size_t rom_size,
- Error **errp)
-{
- g_autofree void *storage = NULL;
- int64_t size;
-
- /*
- * The block backend size should have already been 'validated' by
- * the creation of the m25p80 object.
- */
- size = blk_getlength(blk);
- if (size <= 0) {
- error_setg(errp, "failed to get flash size");
- return;
- }
-
- if (rom_size > size) {
- rom_size = size;
- }
-
- storage = g_malloc0(rom_size);
- if (blk_pread(blk, 0, rom_size, storage, 0) < 0) {
- error_setg(errp, "failed to read the initial flash content");
- return;
- }
-
- rom_add_blob_fixed("aspeed.boot_rom", storage, rom_size, addr);
-}
-
-/*
- * Create a ROM and copy the flash contents at the expected address
- * (0x0). Boots faster than execute-in-place.
- */
-static void aspeed_install_boot_rom(AspeedMachineState *bmc, BlockBackend *blk,
- uint64_t rom_size)
-{
- AspeedSoCState *soc = bmc->soc;
- AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(soc);
-
- memory_region_init_rom(&bmc->boot_rom, NULL, "aspeed.boot_rom", rom_size,
- &error_abort);
- memory_region_add_subregion_overlap(&soc->spi_boot_container, 0,
- &bmc->boot_rom, 1);
- write_boot_rom(blk, sc->memmap[ASPEED_DEV_SPI_BOOT],
- rom_size, &error_abort);
-}
-
-#define VBOOTROM_FILE_NAME "ast27x0_bootrom.bin"
-
-/*
- * This function locates the vbootrom image file specified via the command line
- * using the -bios option. It loads the specified image into the vbootrom
- * memory region and handles errors if the file cannot be found or loaded.
- */
-static void aspeed_load_vbootrom(AspeedMachineState *bmc, const char *bios_name,
- Error **errp)
-{
- g_autofree char *filename = NULL;
- AspeedSoCState *soc = bmc->soc;
- int ret;
-
- filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
- if (!filename) {
- error_setg(errp, "Could not find vbootrom image '%s'", bios_name);
- return;
- }
-
- ret = load_image_mr(filename, &soc->vbootrom);
- if (ret < 0) {
- error_setg(errp, "Failed to load vbootrom image '%s'", bios_name);
- return;
- }
-}
-
-void aspeed_board_init_flashes(AspeedSMCState *s, const char *flashtype,
- unsigned int count, int unit0)
-{
- int i;
-
- if (!flashtype) {
- return;
- }
-
- for (i = 0; i < count; ++i) {
- DriveInfo *dinfo = drive_get(IF_MTD, 0, unit0 + i);
- DeviceState *dev;
-
- dev = qdev_new(flashtype);
- if (dinfo) {
- qdev_prop_set_drive(dev, "drive", blk_by_legacy_dinfo(dinfo));
- }
- qdev_prop_set_uint8(dev, "cs", i);
- qdev_realize_and_unref(dev, BUS(s->spi), &error_fatal);
- }
-}
-
static void sdhci_attach_drive(SDHCIState *sdhci, DriveInfo *dinfo, bool emmc,
bool boot_emmc)
{
@@ -393,12 +299,14 @@ static void connect_serial_hds_to_uarts(AspeedMachineState *bmc)
AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
int uart_chosen = bmc->uart_chosen ? bmc->uart_chosen : amc->uart_default;
- aspeed_soc_uart_set_chr(s, uart_chosen, serial_hd(0));
+ aspeed_soc_uart_set_chr(s->uart, uart_chosen, sc->uarts_base,
+ sc->uarts_num, serial_hd(0));
for (int i = 1, uart = sc->uarts_base; i < sc->uarts_num; uart++) {
if (uart == uart_chosen) {
continue;
}
- aspeed_soc_uart_set_chr(s, uart, serial_hd(i++));
+ aspeed_soc_uart_set_chr(s->uart, uart, sc->uarts_base, sc->uarts_num,
+ serial_hd(i++));
}
}
@@ -465,6 +373,8 @@ static void aspeed_machine_init(MachineState *machine)
aspeed_board_init_flashes(&bmc->soc->spi[0],
bmc->spi_model ? bmc->spi_model : amc->spi_model,
1, amc->num_cs);
+ aspeed_board_init_flashes(&bmc->soc->spi[1],
+ amc->spi2_model, 1, amc->num_cs2);
}
if (machine->kernel_filename && sc->num_cpus > 1) {
@@ -505,15 +415,16 @@ static void aspeed_machine_init(MachineState *machine)
if (fmc0 && !boot_emmc) {
uint64_t rom_size = memory_region_size(&bmc->soc->spi_boot);
- aspeed_install_boot_rom(bmc, fmc0, rom_size);
+ aspeed_install_boot_rom(bmc->soc, fmc0, &bmc->boot_rom, rom_size);
} else if (emmc0) {
- aspeed_install_boot_rom(bmc, blk_by_legacy_dinfo(emmc0), 64 * KiB);
+ aspeed_install_boot_rom(bmc->soc, blk_by_legacy_dinfo(emmc0),
+ &bmc->boot_rom, 64 * KiB);
}
}
if (amc->vbootrom) {
bios_name = machine->firmware ?: VBOOTROM_FILE_NAME;
- aspeed_load_vbootrom(bmc, bios_name, &error_abort);
+ aspeed_load_vbootrom(bmc->soc, bios_name, &error_abort);
}
arm_load_kernel(ARM_CPU(first_cpu), machine, &aspeed_board_binfo);
@@ -645,6 +556,12 @@ static void create_pca9552(AspeedSoCState *soc, int bus_id, int addr)
TYPE_PCA9552, addr);
}
+static I2CSlave *create_pca9554(AspeedSoCState *soc, int bus_id, int addr)
+{
+ return i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, bus_id),
+ TYPE_PCA9554, addr);
+}
+
static void sonorapass_bmc_i2c_init(AspeedMachineState *bmc)
{
AspeedSoCState *soc = bmc->soc;
@@ -1003,6 +920,180 @@ static void fuji_bmc_i2c_init(AspeedMachineState *bmc)
}
#define TYPE_TMP421 "tmp421"
+#define TYPE_DS1338 "ds1338"
+
+/* Catalina hardware value */
+#define CATALINA_BMC_HW_STRAP1 0x00002002
+#define CATALINA_BMC_HW_STRAP2 0x00000800
+
+#define CATALINA_BMC_RAM_SIZE ASPEED_RAM_SIZE(2 * GiB)
+
+static void catalina_bmc_i2c_init(AspeedMachineState *bmc)
+{
+ /* Reference from v6.16-rc2 aspeed-bmc-facebook-catalina.dts */
+
+ AspeedSoCState *soc = bmc->soc;
+ I2CBus *i2c[16] = {};
+ I2CSlave *i2c_mux;
+
+ /* busses 0-15 are all used. */
+ for (int i = 0; i < ARRAY_SIZE(i2c); i++) {
+ i2c[i] = aspeed_i2c_get_bus(&soc->i2c, i);
+ }
+
+ /* &i2c0 */
+ /* i2c-mux@71 (PCA9546) on i2c0 */
+ i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x71);
+
+ /* i2c-mux@72 (PCA9546) on i2c0 */
+ i2c_mux = i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x72);
+
+ /* i2c0mux1ch1 */
+ /* io_expander7 - pca9535@20 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 1),
+ TYPE_PCA9552, 0x20);
+ /* eeprom@50 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x50, 8 * KiB);
+
+ /* i2c-mux@73 (PCA9546) on i2c0 */
+ i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x73);
+
+ /* i2c-mux@75 (PCA9546) on i2c0 */
+ i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x75);
+
+ /* i2c-mux@76 (PCA9546) on i2c0 */
+ i2c_mux = i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x76);
+
+ /* i2c0mux4ch1 */
+ /* io_expander8 - pca9535@21 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 1),
+ TYPE_PCA9552, 0x21);
+ /* eeprom@50 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x50, 8 * KiB);
+
+ /* i2c-mux@77 (PCA9546) on i2c0 */
+ i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x77);
+
+
+ /* &i2c1 */
+ /* i2c-mux@70 (PCA9548) on i2c1 */
+ i2c_mux = i2c_slave_create_simple(i2c[1], TYPE_PCA9548, 0x70);
+ /* i2c1mux0ch0 */
+ /* ina238@41 - no model */
+ /* ina238@42 - no model */
+ /* ina238@44 - no model */
+ /* i2c1mux0ch1 */
+ /* ina238@41 - no model */
+ /* ina238@43 - no model */
+ /* i2c1mux0ch4 */
+ /* ltc4287@42 - no model */
+ /* ltc4287@43 - no model */
+
+ /* i2c1mux0ch5 */
+ /* eeprom@54 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 5), 0x54, 8 * KiB);
+ /* tpm75@4f */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 5), TYPE_TMP75, 0x4f);
+
+ /* i2c1mux0ch6 */
+ /* io_expander5 - pca9554@27 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 6),
+ TYPE_PCA9554, 0x27);
+ /* io_expander6 - pca9555@25 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 6),
+ TYPE_PCA9552, 0x25);
+ /* eeprom@51 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 6), 0x51, 8 * KiB);
+
+ /* i2c1mux0ch7 */
+ /* eeprom@53 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 7), 0x53, 8 * KiB);
+ /* temperature-sensor@4b - tmp75 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 7), TYPE_TMP75, 0x4b);
+
+ /* &i2c2 */
+ /* io_expander0 - pca9555@20 */
+ i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x20);
+ /* io_expander0 - pca9555@21 */
+ i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x21);
+ /* io_expander0 - pca9555@27 */
+ i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x27);
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[2], 0x50, 8 * KiB);
+ /* eeprom@51 */
+ at24c_eeprom_init(i2c[2], 0x51, 8 * KiB);
+
+ /* &i2c5 */
+ /* i2c-mux@70 (PCA9548) on i2c5 */
+ i2c_mux = i2c_slave_create_simple(i2c[5], TYPE_PCA9548, 0x70);
+ /* i2c5mux0ch6 */
+ /* eeprom@52 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 6), 0x52, 8 * KiB);
+ /* i2c5mux0ch7 */
+ /* ina230@40 - no model */
+ /* ina230@41 - no model */
+ /* ina230@44 - no model */
+ /* ina230@45 - no model */
+
+ /* &i2c6 */
+ /* io_expander3 - pca9555@21 */
+ i2c_slave_create_simple(i2c[6], TYPE_PCA9552, 0x21);
+ /* rtc@6f - nct3018y */
+ i2c_slave_create_simple(i2c[6], TYPE_DS1338, 0x6f);
+
+ /* &i2c9 */
+ /* io_expander4 - pca9555@4f */
+ i2c_slave_create_simple(i2c[9], TYPE_PCA9552, 0x4f);
+ /* temperature-sensor@4b - tpm75 */
+ i2c_slave_create_simple(i2c[9], TYPE_TMP75, 0x4b);
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[9], 0x50, 8 * KiB);
+ /* eeprom@56 */
+ at24c_eeprom_init(i2c[9], 0x56, 8 * KiB);
+
+ /* &i2c10 */
+ /* temperature-sensor@1f - tpm421 */
+ i2c_slave_create_simple(i2c[10], TYPE_TMP421, 0x1f);
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[10], 0x50, 8 * KiB);
+
+ /* &i2c11 */
+ /* ssif-bmc@10 - no model */
+
+ /* &i2c12 */
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[12], 0x50, 8 * KiB);
+
+ /* &i2c13 */
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[13], 0x50, 8 * KiB);
+ /* eeprom@54 */
+ at24c_eeprom_init(i2c[13], 0x54, 256);
+ /* eeprom@55 */
+ at24c_eeprom_init(i2c[13], 0x55, 256);
+ /* eeprom@57 */
+ at24c_eeprom_init(i2c[13], 0x57, 256);
+
+ /* &i2c14 */
+ /* io_expander9 - pca9555@10 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x10);
+ /* io_expander10 - pca9555@11 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x11);
+ /* io_expander11 - pca9555@12 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x12);
+ /* io_expander12 - pca9555@13 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x13);
+ /* io_expander13 - pca9555@14 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x14);
+ /* io_expander14 - pca9555@15 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x15);
+
+ /* &i2c15 */
+ /* temperature-sensor@1f - tmp421 */
+ i2c_slave_create_simple(i2c[15], TYPE_TMP421, 0x1f);
+ /* eeprom@52 */
+ at24c_eeprom_init(i2c[15], 0x52, 8 * KiB);
+}
static void bletchley_bmc_i2c_init(AspeedMachineState *bmc)
{
@@ -1050,6 +1141,45 @@ static void bletchley_bmc_i2c_init(AspeedMachineState *bmc)
i2c_slave_create_simple(i2c[12], TYPE_PCA9552, 0x67);
}
+
+static void gb200nvl_bmc_i2c_init(AspeedMachineState *bmc)
+{
+ AspeedSoCState *soc = bmc->soc;
+ I2CBus *i2c[15] = {};
+ DeviceState *dev;
+ for (int i = 0; i < sizeof(i2c) / sizeof(i2c[0]); i++) {
+ if ((i == 11) || (i == 12) || (i == 13)) {
+ continue;
+ }
+ i2c[i] = aspeed_i2c_get_bus(&soc->i2c, i);
+ }
+
+ /* Bus 5 Expander */
+ create_pca9554(soc, 4, 0x21);
+
+ /* Mux I2c Expanders */
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x71);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x72);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x73);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x75);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x76);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x77);
+
+ /* Bus 10 */
+ dev = DEVICE(create_pca9554(soc, 9, 0x20));
+
+ /* Set FPGA_READY */
+ object_property_set_str(OBJECT(dev), "pin1", "high", &error_fatal);
+
+ create_pca9554(soc, 9, 0x21);
+ at24c_eeprom_init(i2c[9], 0x50, 64 * KiB);
+ at24c_eeprom_init(i2c[9], 0x51, 64 * KiB);
+
+ /* Bus 11 */
+ at24c_eeprom_init_rom(i2c[10], 0x50, 256, gb200nvl_bmc_fruid,
+ gb200nvl_bmc_fruid_len);
+}
+
static void fby35_i2c_init(AspeedMachineState *bmc)
{
AspeedSoCState *soc = bmc->soc;
@@ -1182,8 +1312,8 @@ static void aspeed_set_bmc_console(Object *obj, const char *value, Error **errp)
AspeedMachineClass *amc = ASPEED_MACHINE_GET_CLASS(bmc);
AspeedSoCClass *sc = ASPEED_SOC_CLASS(object_class_by_name(amc->soc_name));
int val;
- int uart_first = aspeed_uart_first(sc);
- int uart_last = aspeed_uart_last(sc);
+ int uart_first = aspeed_uart_first(sc->uarts_base);
+ int uart_last = aspeed_uart_last(sc->uarts_base, sc->uarts_num);
if (sscanf(value, "uart%u", &val) != 1) {
error_setg(errp, "Bad value for \"uart\" property");
@@ -1290,7 +1420,6 @@ static void aspeed_machine_palmetto_class_init(ObjectClass *oc,
amc->spi_model = "mx25l25635f";
amc->num_cs = 1;
amc->i2c_init = palmetto_bmc_i2c_init;
- mc->auto_create_sdcard = true;
mc->default_ram_size = 256 * MiB;
aspeed_machine_class_init_cpus_defaults(mc);
};
@@ -1308,7 +1437,6 @@ static void aspeed_machine_quanta_q71l_class_init(ObjectClass *oc,
amc->spi_model = "mx25l25635e";
amc->num_cs = 1;
amc->i2c_init = quanta_q71l_bmc_i2c_init;
- mc->auto_create_sdcard = true;
mc->default_ram_size = 128 * MiB;
aspeed_machine_class_init_cpus_defaults(mc);
}
@@ -1327,7 +1455,6 @@ static void aspeed_machine_supermicrox11_bmc_class_init(ObjectClass *oc,
amc->num_cs = 1;
amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON;
amc->i2c_init = palmetto_bmc_i2c_init;
- mc->auto_create_sdcard = true;
mc->default_ram_size = 256 * MiB;
aspeed_machine_class_init_cpus_defaults(mc);
}
@@ -1346,7 +1473,6 @@ static void aspeed_machine_supermicro_x11spi_bmc_class_init(ObjectClass *oc,
amc->num_cs = 1;
amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON;
amc->i2c_init = palmetto_bmc_i2c_init;
- mc->auto_create_sdcard = true;
mc->default_ram_size = 512 * MiB;
aspeed_machine_class_init_cpus_defaults(mc);
}
@@ -1364,7 +1490,6 @@ static void aspeed_machine_ast2500_evb_class_init(ObjectClass *oc,
amc->spi_model = "mx25l25635f";
amc->num_cs = 1;
amc->i2c_init = ast2500_evb_i2c_init;
- mc->auto_create_sdcard = true;
mc->default_ram_size = 512 * MiB;
aspeed_machine_class_init_cpus_defaults(mc);
};
@@ -1383,7 +1508,6 @@ static void aspeed_machine_yosemitev2_class_init(ObjectClass *oc,
amc->spi_model = "mx25l25635e";
amc->num_cs = 2;
amc->i2c_init = yosemitev2_bmc_i2c_init;
- mc->auto_create_sdcard = true;
mc->default_ram_size = 512 * MiB;
aspeed_machine_class_init_cpus_defaults(mc);
};
@@ -1401,7 +1525,6 @@ static void aspeed_machine_romulus_class_init(ObjectClass *oc,
amc->spi_model = "mx66l1g45g";
amc->num_cs = 2;
amc->i2c_init = romulus_bmc_i2c_init;
- mc->auto_create_sdcard = true;
mc->default_ram_size = 512 * MiB;
aspeed_machine_class_init_cpus_defaults(mc);
};
@@ -1420,7 +1543,6 @@ static void aspeed_machine_tiogapass_class_init(ObjectClass *oc,
amc->spi_model = "mx25l25635e";
amc->num_cs = 2;
amc->i2c_init = tiogapass_bmc_i2c_init;
- mc->auto_create_sdcard = true;
mc->default_ram_size = 1 * GiB;
aspeed_machine_class_init_cpus_defaults(mc);
};
@@ -1432,13 +1554,13 @@ static void aspeed_machine_sonorapass_class_init(ObjectClass *oc,
AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
mc->desc = "OCP SonoraPass BMC (ARM1176)";
+ mc->deprecation_reason = "use 'ast2500-evb' instead";
amc->soc_name = "ast2500-a1";
amc->hw_strap1 = SONORAPASS_BMC_HW_STRAP1;
amc->fmc_model = "mx66l1g45g";
amc->spi_model = "mx66l1g45g";
amc->num_cs = 2;
amc->i2c_init = sonorapass_bmc_i2c_init;
- mc->auto_create_sdcard = true;
mc->default_ram_size = 512 * MiB;
aspeed_machine_class_init_cpus_defaults(mc);
};
@@ -1456,7 +1578,6 @@ static void aspeed_machine_witherspoon_class_init(ObjectClass *oc,
amc->spi_model = "mx66l1g45g";
amc->num_cs = 2;
amc->i2c_init = witherspoon_bmc_i2c_init;
- mc->auto_create_sdcard = true;
mc->default_ram_size = 512 * MiB;
aspeed_machine_class_init_cpus_defaults(mc);
};
@@ -1478,7 +1599,6 @@ static void aspeed_machine_ast2600_evb_class_init(ObjectClass *oc,
ASPEED_MAC3_ON;
amc->sdhci_wp_inverted = true;
amc->i2c_init = ast2600_evb_i2c_init;
- mc->auto_create_sdcard = true;
mc->default_ram_size = 1 * GiB;
aspeed_machine_class_init_cpus_defaults(mc);
aspeed_machine_ast2600_class_emmc_init(oc);
@@ -1497,7 +1617,6 @@ static void aspeed_machine_g220a_class_init(ObjectClass *oc, const void *data)
amc->num_cs = 2;
amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON;
amc->i2c_init = g220a_bmc_i2c_init;
- mc->auto_create_sdcard = true;
mc->default_ram_size = 1024 * MiB;
aspeed_machine_class_init_cpus_defaults(mc);
};
@@ -1509,6 +1628,7 @@ static void aspeed_machine_fp5280g2_class_init(ObjectClass *oc,
AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
mc->desc = "Inspur FP5280G2 BMC (ARM1176)";
+ mc->deprecation_reason = "use 'ast2500-evb' instead";
amc->soc_name = "ast2500-a1";
amc->hw_strap1 = FP5280G2_BMC_HW_STRAP1;
amc->fmc_model = "n25q512a";
@@ -1516,7 +1636,6 @@ static void aspeed_machine_fp5280g2_class_init(ObjectClass *oc,
amc->num_cs = 2;
amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON;
amc->i2c_init = fp5280g2_bmc_i2c_init;
- mc->auto_create_sdcard = true;
mc->default_ram_size = 512 * MiB;
aspeed_machine_class_init_cpus_defaults(mc);
};
@@ -1535,7 +1654,6 @@ static void aspeed_machine_rainier_class_init(ObjectClass *oc, const void *data)
amc->num_cs = 2;
amc->macs_mask = ASPEED_MAC2_ON | ASPEED_MAC3_ON;
amc->i2c_init = rainier_bmc_i2c_init;
- mc->auto_create_sdcard = true;
mc->default_ram_size = 1 * GiB;
aspeed_machine_class_init_cpus_defaults(mc);
aspeed_machine_ast2600_class_emmc_init(oc);
@@ -1558,7 +1676,6 @@ static void aspeed_machine_fuji_class_init(ObjectClass *oc, const void *data)
amc->macs_mask = ASPEED_MAC3_ON;
amc->i2c_init = fuji_bmc_i2c_init;
amc->uart_default = ASPEED_DEV_UART1;
- mc->auto_create_sdcard = true;
mc->default_ram_size = FUJI_BMC_RAM_SIZE;
aspeed_machine_class_init_cpus_defaults(mc);
};
@@ -1580,11 +1697,55 @@ static void aspeed_machine_bletchley_class_init(ObjectClass *oc,
amc->num_cs = 2;
amc->macs_mask = ASPEED_MAC2_ON;
amc->i2c_init = bletchley_bmc_i2c_init;
- mc->auto_create_sdcard = true;
mc->default_ram_size = BLETCHLEY_BMC_RAM_SIZE;
aspeed_machine_class_init_cpus_defaults(mc);
}
+static void aspeed_machine_catalina_class_init(ObjectClass *oc,
+ const void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
+
+ mc->desc = "Facebook Catalina BMC (Cortex-A7)";
+ amc->soc_name = "ast2600-a3";
+ amc->hw_strap1 = CATALINA_BMC_HW_STRAP1;
+ amc->hw_strap2 = CATALINA_BMC_HW_STRAP2;
+ amc->fmc_model = "w25q01jvq";
+ amc->spi_model = NULL;
+ amc->num_cs = 2;
+ amc->macs_mask = ASPEED_MAC2_ON;
+ amc->i2c_init = catalina_bmc_i2c_init;
+ mc->default_ram_size = CATALINA_BMC_RAM_SIZE;
+ aspeed_machine_class_init_cpus_defaults(mc);
+ aspeed_machine_ast2600_class_emmc_init(oc);
+}
+
+#define GB200NVL_BMC_RAM_SIZE ASPEED_RAM_SIZE(1 * GiB)
+
+static void aspeed_machine_gb200nvl_class_init(ObjectClass *oc,
+ const void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
+
+ mc->desc = "Nvidia GB200NVL BMC (Cortex-A7)";
+ amc->soc_name = "ast2600-a3";
+ amc->hw_strap1 = GB200NVL_BMC_HW_STRAP1;
+ amc->hw_strap2 = GB200NVL_BMC_HW_STRAP2;
+ amc->fmc_model = "mx66u51235f";
+ amc->spi_model = "mx66u51235f";
+ amc->num_cs = 2;
+
+ amc->spi2_model = "mx66u51235f";
+ amc->num_cs2 = 1;
+ amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON;
+ amc->i2c_init = gb200nvl_bmc_i2c_init;
+ mc->default_ram_size = GB200NVL_BMC_RAM_SIZE;
+ aspeed_machine_class_init_cpus_defaults(mc);
+ aspeed_machine_ast2600_class_emmc_init(oc);
+}
+
static void fby35_reset(MachineState *state, ResetType type)
{
AspeedMachineState *bmc = ASPEED_MACHINE(state);
@@ -1622,7 +1783,6 @@ static void aspeed_machine_fby35_class_init(ObjectClass *oc, const void *data)
amc->num_cs = 2;
amc->macs_mask = ASPEED_MAC3_ON;
amc->i2c_init = fby35_i2c_init;
- mc->auto_create_sdcard = true;
/* FIXME: Replace this macro with something more general */
mc->default_ram_size = FUJI_BMC_RAM_SIZE;
aspeed_machine_class_init_cpus_defaults(mc);
@@ -1724,7 +1884,6 @@ static void aspeed_machine_ast2700a0_evb_class_init(ObjectClass *oc,
MachineClass *mc = MACHINE_CLASS(oc);
AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
- mc->alias = "ast2700-evb";
mc->desc = "Aspeed AST2700 A0 EVB (Cortex-A35)";
amc->soc_name = "ast2700-a0";
amc->hw_strap1 = AST2700_EVB_HW_STRAP1;
@@ -1736,7 +1895,6 @@ static void aspeed_machine_ast2700a0_evb_class_init(ObjectClass *oc,
amc->uart_default = ASPEED_DEV_UART12;
amc->i2c_init = ast2700_evb_i2c_init;
amc->vbootrom = true;
- mc->auto_create_sdcard = true;
mc->default_ram_size = 1 * GiB;
aspeed_machine_class_init_cpus_defaults(mc);
}
@@ -1747,6 +1905,7 @@ static void aspeed_machine_ast2700a1_evb_class_init(ObjectClass *oc,
MachineClass *mc = MACHINE_CLASS(oc);
AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
+ mc->alias = "ast2700-evb";
mc->desc = "Aspeed AST2700 A1 EVB (Cortex-A35)";
amc->soc_name = "ast2700-a1";
amc->hw_strap1 = AST2700_EVB_HW_STRAP1;
@@ -1758,7 +1917,6 @@ static void aspeed_machine_ast2700a1_evb_class_init(ObjectClass *oc,
amc->uart_default = ASPEED_DEV_UART12;
amc->i2c_init = ast2700_evb_i2c_init;
amc->vbootrom = true;
- mc->auto_create_sdcard = true;
mc->default_ram_size = 1 * GiB;
aspeed_machine_class_init_cpus_defaults(mc);
}
@@ -1771,6 +1929,7 @@ static void aspeed_machine_qcom_dc_scm_v1_class_init(ObjectClass *oc,
AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
mc->desc = "Qualcomm DC-SCM V1 BMC (Cortex A7)";
+ mc->deprecation_reason = "use 'ast2600-evb' instead";
amc->soc_name = "ast2600-a3";
amc->hw_strap1 = QCOM_DC_SCM_V1_BMC_HW_STRAP1;
amc->hw_strap2 = QCOM_DC_SCM_V1_BMC_HW_STRAP2;
@@ -1779,7 +1938,6 @@ static void aspeed_machine_qcom_dc_scm_v1_class_init(ObjectClass *oc,
amc->num_cs = 2;
amc->macs_mask = ASPEED_MAC2_ON | ASPEED_MAC3_ON;
amc->i2c_init = qcom_dc_scm_bmc_i2c_init;
- mc->auto_create_sdcard = true;
mc->default_ram_size = 1 * GiB;
aspeed_machine_class_init_cpus_defaults(mc);
};
@@ -1791,6 +1949,7 @@ static void aspeed_machine_qcom_firework_class_init(ObjectClass *oc,
AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
mc->desc = "Qualcomm DC-SCM V1/Firework BMC (Cortex A7)";
+ mc->deprecation_reason = "use 'ast2600-evb' instead";
amc->soc_name = "ast2600-a3";
amc->hw_strap1 = QCOM_DC_SCM_V1_BMC_HW_STRAP1;
amc->hw_strap2 = QCOM_DC_SCM_V1_BMC_HW_STRAP2;
@@ -1799,7 +1958,6 @@ static void aspeed_machine_qcom_firework_class_init(ObjectClass *oc,
amc->num_cs = 2;
amc->macs_mask = ASPEED_MAC2_ON | ASPEED_MAC3_ON;
amc->i2c_init = qcom_dc_scm_firework_i2c_init;
- mc->auto_create_sdcard = true;
mc->default_ram_size = 1 * GiB;
aspeed_machine_class_init_cpus_defaults(mc);
};
@@ -1878,6 +2036,14 @@ static const TypeInfo aspeed_machine_types[] = {
.parent = TYPE_ASPEED_MACHINE,
.class_init = aspeed_machine_bletchley_class_init,
}, {
+ .name = MACHINE_TYPE_NAME("gb200nvl-bmc"),
+ .parent = TYPE_ASPEED_MACHINE,
+ .class_init = aspeed_machine_gb200nvl_class_init,
+ }, {
+ .name = MACHINE_TYPE_NAME("catalina-bmc"),
+ .parent = TYPE_ASPEED_MACHINE,
+ .class_init = aspeed_machine_catalina_class_init,
+ }, {
.name = MACHINE_TYPE_NAME("fby35-bmc"),
.parent = MACHINE_TYPE_NAME("ast2600-evb"),
.class_init = aspeed_machine_fby35_class_init,
diff --git a/hw/arm/aspeed_ast10x0.c b/hw/arm/aspeed_ast10x0.c
index e6e1ee6..7f49c13 100644
--- a/hw/arm/aspeed_ast10x0.c
+++ b/hw/arm/aspeed_ast10x0.c
@@ -154,7 +154,7 @@ static void aspeed_soc_ast1030_init(Object *obj)
object_initialize_child(obj, "peci", &s->peci, TYPE_ASPEED_PECI);
- object_initialize_child(obj, "sbc", &s->sbc, TYPE_ASPEED_SBC);
+ object_initialize_child(obj, "sbc", &s->sbc, TYPE_ASPEED_AST10X0_SBC);
for (i = 0; i < sc->wdts_num; i++) {
snprintf(typename, sizeof(typename), "aspeed.wdt-%s", socname);
@@ -192,6 +192,7 @@ static void aspeed_soc_ast1030_realize(DeviceState *dev_soc, Error **errp)
AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
DeviceState *armv7m;
Error *err = NULL;
+ int uart;
int i;
g_autofree char *sram_name = NULL;
@@ -201,17 +202,20 @@ static void aspeed_soc_ast1030_realize(DeviceState *dev_soc, Error **errp)
}
/* General I/O memory space to catch all unimplemented device */
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->iomem), "aspeed.io",
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&s->iomem),
+ "aspeed.io",
sc->memmap[ASPEED_DEV_IOMEM],
ASPEED_SOC_IOMEM_SIZE);
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->sbc_unimplemented),
+ aspeed_mmio_map_unimplemented(s->memory,
+ SYS_BUS_DEVICE(&s->sbc_unimplemented),
"aspeed.sbc", sc->memmap[ASPEED_DEV_SBC],
0x40000);
/* AST1030 CPU Core */
armv7m = DEVICE(&a->armv7m);
qdev_prop_set_uint32(armv7m, "num-irq", 256);
- qdev_prop_set_string(armv7m, "cpu-type", aspeed_soc_cpu_type(sc));
+ qdev_prop_set_string(armv7m, "cpu-type",
+ aspeed_soc_cpu_type(sc->valid_cpu_types));
qdev_connect_clock_in(armv7m, "cpuclk", s->sysclk);
object_property_set_link(OBJECT(&a->armv7m), "memory",
OBJECT(s->memory), &error_abort);
@@ -241,7 +245,8 @@ static void aspeed_soc_ast1030_realize(DeviceState *dev_soc, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->scu), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->scu), 0, sc->memmap[ASPEED_DEV_SCU]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->scu), 0,
+ sc->memmap[ASPEED_DEV_SCU]);
/* I2C */
@@ -250,7 +255,8 @@ static void aspeed_soc_ast1030_realize(DeviceState *dev_soc, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->i2c), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->i2c), 0, sc->memmap[ASPEED_DEV_I2C]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->i2c), 0,
+ sc->memmap[ASPEED_DEV_I2C]);
for (i = 0; i < ASPEED_I2C_GET_CLASS(&s->i2c)->num_busses; i++) {
qemu_irq irq = qdev_get_gpio_in(DEVICE(&a->armv7m),
sc->irqmap[ASPEED_DEV_I2C] + i);
@@ -262,7 +268,8 @@ static void aspeed_soc_ast1030_realize(DeviceState *dev_soc, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->i3c), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->i3c), 0, sc->memmap[ASPEED_DEV_I3C]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->i3c), 0,
+ sc->memmap[ASPEED_DEV_I3C]);
for (i = 0; i < ASPEED_I3C_NR_DEVICES; i++) {
qemu_irq irq = qdev_get_gpio_in(DEVICE(&a->armv7m),
sc->irqmap[ASPEED_DEV_I3C] + i);
@@ -274,20 +281,21 @@ static void aspeed_soc_ast1030_realize(DeviceState *dev_soc, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->peci), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->peci), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->peci), 0,
sc->memmap[ASPEED_DEV_PECI]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->peci), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_PECI));
+ aspeed_soc_ast1030_get_irq(s, ASPEED_DEV_PECI));
/* LPC */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->lpc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->lpc), 0, sc->memmap[ASPEED_DEV_LPC]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->lpc), 0,
+ sc->memmap[ASPEED_DEV_LPC]);
/* Connect the LPC IRQ to the GIC. It is otherwise unused. */
sysbus_connect_irq(SYS_BUS_DEVICE(&s->lpc), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_LPC));
+ aspeed_soc_ast1030_get_irq(s, ASPEED_DEV_LPC));
/*
* On the AST1030 LPC subdevice IRQs are connected straight to the GIC.
@@ -309,8 +317,13 @@ static void aspeed_soc_ast1030_realize(DeviceState *dev_soc, Error **errp)
sc->irqmap[ASPEED_DEV_KCS] + aspeed_lpc_kcs_4));
/* UART */
- if (!aspeed_soc_uart_realize(s, errp)) {
- return;
+ for (i = 0, uart = sc->uarts_base; i < sc->uarts_num; i++, uart++) {
+ if (!aspeed_soc_uart_realize(s->memory, &s->uart[i],
+ sc->memmap[uart], errp)) {
+ return;
+ }
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->uart[i]), 0,
+ aspeed_soc_ast1030_get_irq(s, uart));
}
/* Timer */
@@ -319,10 +332,10 @@ static void aspeed_soc_ast1030_realize(DeviceState *dev_soc, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->timerctrl), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->timerctrl), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->timerctrl), 0,
sc->memmap[ASPEED_DEV_TIMER1]);
for (i = 0; i < ASPEED_TIMER_NR_TIMERS; i++) {
- qemu_irq irq = aspeed_soc_get_irq(s, ASPEED_DEV_TIMER1 + i);
+ qemu_irq irq = aspeed_soc_ast1030_get_irq(s, ASPEED_DEV_TIMER1 + i);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->timerctrl), i, irq);
}
@@ -330,9 +343,10 @@ static void aspeed_soc_ast1030_realize(DeviceState *dev_soc, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->adc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->adc), 0, sc->memmap[ASPEED_DEV_ADC]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->adc), 0,
+ sc->memmap[ASPEED_DEV_ADC]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->adc), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_ADC));
+ aspeed_soc_ast1030_get_irq(s, ASPEED_DEV_ADC));
/* FMC, The number of CS is set at the board level */
object_property_set_link(OBJECT(&s->fmc), "dram", OBJECT(&s->sram),
@@ -340,11 +354,12 @@ static void aspeed_soc_ast1030_realize(DeviceState *dev_soc, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->fmc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->fmc), 0, sc->memmap[ASPEED_DEV_FMC]);
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->fmc), 1,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->fmc), 0,
+ sc->memmap[ASPEED_DEV_FMC]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->fmc), 1,
ASPEED_SMC_GET_CLASS(&s->fmc)->flash_window_base);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->fmc), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_FMC));
+ aspeed_soc_ast1030_get_irq(s, ASPEED_DEV_FMC));
/* SPI */
for (i = 0; i < sc->spis_num; i++) {
@@ -353,9 +368,9 @@ static void aspeed_soc_ast1030_realize(DeviceState *dev_soc, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->spi[i]), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->spi[i]), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->spi[i]), 0,
sc->memmap[ASPEED_DEV_SPI1 + i]);
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->spi[i]), 1,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->spi[i]), 1,
ASPEED_SMC_GET_CLASS(&s->spi[i])->flash_window_base);
}
@@ -363,7 +378,8 @@ static void aspeed_soc_ast1030_realize(DeviceState *dev_soc, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->sbc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->sbc), 0, sc->memmap[ASPEED_DEV_SBC]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->sbc), 0,
+ sc->memmap[ASPEED_DEV_SBC]);
/* HACE */
object_property_set_link(OBJECT(&s->hace), "dram", OBJECT(&s->sram),
@@ -371,10 +387,10 @@ static void aspeed_soc_ast1030_realize(DeviceState *dev_soc, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->hace), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->hace), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->hace), 0,
sc->memmap[ASPEED_DEV_HACE]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->hace), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_HACE));
+ aspeed_soc_ast1030_get_irq(s, ASPEED_DEV_HACE));
/* Watch dog */
for (i = 0; i < sc->wdts_num; i++) {
@@ -386,32 +402,38 @@ static void aspeed_soc_ast1030_realize(DeviceState *dev_soc, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->wdt[i]), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->wdt[i]), 0, wdt_offset);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->wdt[i]), 0, wdt_offset);
}
/* GPIO */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->gpio), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->gpio), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->gpio), 0,
sc->memmap[ASPEED_DEV_GPIO]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpio), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_GPIO));
+ aspeed_soc_ast1030_get_irq(s, ASPEED_DEV_GPIO));
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->pwm), "aspeed.pwm",
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&s->pwm),
+ "aspeed.pwm",
sc->memmap[ASPEED_DEV_PWM], 0x100);
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->espi), "aspeed.espi",
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&s->espi),
+ "aspeed.espi",
sc->memmap[ASPEED_DEV_ESPI], 0x800);
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->udc), "aspeed.udc",
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&s->udc),
+ "aspeed.udc",
sc->memmap[ASPEED_DEV_UDC], 0x1000);
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->sgpiom), "aspeed.sgpiom",
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&s->sgpiom),
+ "aspeed.sgpiom",
sc->memmap[ASPEED_DEV_SGPIOM], 0x100);
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->jtag[0]), "aspeed.jtag",
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&s->jtag[0]),
+ "aspeed.jtag",
sc->memmap[ASPEED_DEV_JTAG0], 0x20);
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->jtag[1]), "aspeed.jtag",
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&s->jtag[1]),
+ "aspeed.jtag",
sc->memmap[ASPEED_DEV_JTAG1], 0x20);
}
@@ -441,7 +463,6 @@ static void aspeed_soc_ast1030_class_init(ObjectClass *klass, const void *data)
sc->irqmap = aspeed_soc_ast1030_irqmap;
sc->memmap = aspeed_soc_ast1030_memmap;
sc->num_cpus = 1;
- sc->get_irq = aspeed_soc_ast1030_get_irq;
}
static const TypeInfo aspeed_soc_ast10x0_types[] = {
diff --git a/hw/arm/aspeed_ast2400.c b/hw/arm/aspeed_ast2400.c
index c7b0f21..b1b826b 100644
--- a/hw/arm/aspeed_ast2400.c
+++ b/hw/arm/aspeed_ast2400.c
@@ -157,7 +157,7 @@ static void aspeed_ast2400_soc_init(Object *obj)
for (i = 0; i < sc->num_cpus; i++) {
object_initialize_child(obj, "cpu[*]", &a->cpu[i],
- aspeed_soc_cpu_type(sc));
+ aspeed_soc_cpu_type(sc->valid_cpu_types));
}
snprintf(typename, sizeof(typename), "aspeed.scu-%s", socname);
@@ -251,6 +251,7 @@ static void aspeed_ast2400_soc_realize(DeviceState *dev, Error **errp)
AspeedSoCState *s = ASPEED_SOC(dev);
AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
g_autofree char *sram_name = NULL;
+ int uart;
/* Default boot region (SPI memory or ROMs) */
memory_region_init(&s->spi_boot_container, OBJECT(s),
@@ -259,12 +260,14 @@ static void aspeed_ast2400_soc_realize(DeviceState *dev, Error **errp)
&s->spi_boot_container);
/* IO space */
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->iomem), "aspeed.io",
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&s->iomem),
+ "aspeed.io",
sc->memmap[ASPEED_DEV_IOMEM],
ASPEED_SOC_IOMEM_SIZE);
/* Video engine stub */
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->video), "aspeed.video",
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&s->video),
+ "aspeed.video",
sc->memmap[ASPEED_DEV_VIDEO], 0x1000);
/* CPU */
@@ -289,13 +292,15 @@ static void aspeed_ast2400_soc_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->scu), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->scu), 0, sc->memmap[ASPEED_DEV_SCU]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->scu), 0,
+ sc->memmap[ASPEED_DEV_SCU]);
/* VIC */
if (!sysbus_realize(SYS_BUS_DEVICE(&a->vic), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&a->vic), 0, sc->memmap[ASPEED_DEV_VIC]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&a->vic), 0,
+ sc->memmap[ASPEED_DEV_VIC]);
sysbus_connect_irq(SYS_BUS_DEVICE(&a->vic), 0,
qdev_get_gpio_in(DEVICE(&a->cpu), ARM_CPU_IRQ));
sysbus_connect_irq(SYS_BUS_DEVICE(&a->vic), 1,
@@ -305,9 +310,10 @@ static void aspeed_ast2400_soc_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->rtc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->rtc), 0, sc->memmap[ASPEED_DEV_RTC]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->rtc), 0,
+ sc->memmap[ASPEED_DEV_RTC]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->rtc), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_RTC));
+ aspeed_soc_ast2400_get_irq(s, ASPEED_DEV_RTC));
/* Timer */
object_property_set_link(OBJECT(&s->timerctrl), "scu", OBJECT(&s->scu),
@@ -315,10 +321,10 @@ static void aspeed_ast2400_soc_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->timerctrl), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->timerctrl), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->timerctrl), 0,
sc->memmap[ASPEED_DEV_TIMER1]);
for (i = 0; i < ASPEED_TIMER_NR_TIMERS; i++) {
- qemu_irq irq = aspeed_soc_get_irq(s, ASPEED_DEV_TIMER1 + i);
+ qemu_irq irq = aspeed_soc_ast2400_get_irq(s, ASPEED_DEV_TIMER1 + i);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->timerctrl), i, irq);
}
@@ -326,13 +332,19 @@ static void aspeed_ast2400_soc_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->adc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->adc), 0, sc->memmap[ASPEED_DEV_ADC]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->adc), 0,
+ sc->memmap[ASPEED_DEV_ADC]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->adc), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_ADC));
+ aspeed_soc_ast2400_get_irq(s, ASPEED_DEV_ADC));
/* UART */
- if (!aspeed_soc_uart_realize(s, errp)) {
- return;
+ for (i = 0, uart = sc->uarts_base; i < sc->uarts_num; i++, uart++) {
+ if (!aspeed_soc_uart_realize(s->memory, &s->uart[i],
+ sc->memmap[uart], errp)) {
+ return;
+ }
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->uart[i]), 0,
+ aspeed_soc_ast2400_get_irq(s, uart));
}
/* I2C */
@@ -341,18 +353,19 @@ static void aspeed_ast2400_soc_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->i2c), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->i2c), 0, sc->memmap[ASPEED_DEV_I2C]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->i2c), 0,
+ sc->memmap[ASPEED_DEV_I2C]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->i2c), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_I2C));
+ aspeed_soc_ast2400_get_irq(s, ASPEED_DEV_I2C));
/* PECI */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->peci), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->peci), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->peci), 0,
sc->memmap[ASPEED_DEV_PECI]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->peci), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_PECI));
+ aspeed_soc_ast2400_get_irq(s, ASPEED_DEV_PECI));
/* FMC, The number of CS is set at the board level */
object_property_set_link(OBJECT(&s->fmc), "dram", OBJECT(s->dram_mr),
@@ -360,11 +373,12 @@ static void aspeed_ast2400_soc_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->fmc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->fmc), 0, sc->memmap[ASPEED_DEV_FMC]);
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->fmc), 1,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->fmc), 0,
+ sc->memmap[ASPEED_DEV_FMC]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->fmc), 1,
ASPEED_SMC_GET_CLASS(&s->fmc)->flash_window_base);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->fmc), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_FMC));
+ aspeed_soc_ast2400_get_irq(s, ASPEED_DEV_FMC));
/* Set up an alias on the FMC CE0 region (boot default) */
MemoryRegion *fmc0_mmio = &s->fmc.flashes[0].mmio;
@@ -377,9 +391,9 @@ static void aspeed_ast2400_soc_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->spi[i]), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->spi[i]), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->spi[i]), 0,
sc->memmap[ASPEED_DEV_SPI1 + i]);
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->spi[i]), 1,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->spi[i]), 1,
ASPEED_SMC_GET_CLASS(&s->spi[i])->flash_window_base);
}
@@ -388,17 +402,18 @@ static void aspeed_ast2400_soc_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->ehci[i]), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->ehci[i]), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->ehci[i]), 0,
sc->memmap[ASPEED_DEV_EHCI1 + i]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->ehci[i]), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_EHCI1 + i));
+ aspeed_soc_ast2400_get_irq(s,
+ ASPEED_DEV_EHCI1 + i));
}
/* SDMC - SDRAM Memory Controller */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->sdmc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->sdmc), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->sdmc), 0,
sc->memmap[ASPEED_DEV_SDMC]);
/* Watch dog */
@@ -411,7 +426,7 @@ static void aspeed_ast2400_soc_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->wdt[i]), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->wdt[i]), 0, wdt_offset);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->wdt[i]), 0, wdt_offset);
}
/* RAM */
@@ -426,48 +441,49 @@ static void aspeed_ast2400_soc_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->ftgmac100[i]), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->ftgmac100[i]), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->ftgmac100[i]), 0,
sc->memmap[ASPEED_DEV_ETH1 + i]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->ftgmac100[i]), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_ETH1 + i));
+ aspeed_soc_ast2400_get_irq(s, ASPEED_DEV_ETH1 + i));
}
/* XDMA */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->xdma), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->xdma), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->xdma), 0,
sc->memmap[ASPEED_DEV_XDMA]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->xdma), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_XDMA));
+ aspeed_soc_ast2400_get_irq(s, ASPEED_DEV_XDMA));
/* GPIO */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->gpio), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->gpio), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->gpio), 0,
sc->memmap[ASPEED_DEV_GPIO]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpio), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_GPIO));
+ aspeed_soc_ast2400_get_irq(s, ASPEED_DEV_GPIO));
/* SDHCI */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->sdhci), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->sdhci), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->sdhci), 0,
sc->memmap[ASPEED_DEV_SDHCI]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhci), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_SDHCI));
+ aspeed_soc_ast2400_get_irq(s, ASPEED_DEV_SDHCI));
/* LPC */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->lpc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->lpc), 0, sc->memmap[ASPEED_DEV_LPC]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->lpc), 0,
+ sc->memmap[ASPEED_DEV_LPC]);
/* Connect the LPC IRQ to the VIC */
sysbus_connect_irq(SYS_BUS_DEVICE(&s->lpc), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_LPC));
+ aspeed_soc_ast2400_get_irq(s, ASPEED_DEV_LPC));
/*
* On the AST2400 and AST2500 the one LPC IRQ is shared between all of the
@@ -496,10 +512,10 @@ static void aspeed_ast2400_soc_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->hace), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->hace), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->hace), 0,
sc->memmap[ASPEED_DEV_HACE]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->hace), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_HACE));
+ aspeed_soc_ast2400_get_irq(s, ASPEED_DEV_HACE));
}
static void aspeed_soc_ast2400_class_init(ObjectClass *oc, const void *data)
@@ -527,7 +543,6 @@ static void aspeed_soc_ast2400_class_init(ObjectClass *oc, const void *data)
sc->irqmap = aspeed_soc_ast2400_irqmap;
sc->memmap = aspeed_soc_ast2400_memmap;
sc->num_cpus = 1;
- sc->get_irq = aspeed_soc_ast2400_get_irq;
}
static void aspeed_soc_ast2500_class_init(ObjectClass *oc, const void *data)
@@ -555,7 +570,6 @@ static void aspeed_soc_ast2500_class_init(ObjectClass *oc, const void *data)
sc->irqmap = aspeed_soc_ast2500_irqmap;
sc->memmap = aspeed_soc_ast2500_memmap;
sc->num_cpus = 1;
- sc->get_irq = aspeed_soc_ast2400_get_irq;
}
static const TypeInfo aspeed_soc_ast2400_types[] = {
diff --git a/hw/arm/aspeed_ast2600.c b/hw/arm/aspeed_ast2600.c
index d12707f..498d1ec 100644
--- a/hw/arm/aspeed_ast2600.c
+++ b/hw/arm/aspeed_ast2600.c
@@ -48,11 +48,13 @@ static const hwaddr aspeed_soc_ast2600_memmap[] = {
[ASPEED_DEV_XDMA] = 0x1E6E7000,
[ASPEED_DEV_ADC] = 0x1E6E9000,
[ASPEED_DEV_DP] = 0x1E6EB000,
+ [ASPEED_DEV_PCIE_PHY1] = 0x1E6ED200,
[ASPEED_DEV_SBC] = 0x1E6F2000,
[ASPEED_DEV_EMMC_BC] = 0x1E6f5000,
[ASPEED_DEV_VIDEO] = 0x1E700000,
[ASPEED_DEV_SDHCI] = 0x1E740000,
[ASPEED_DEV_EMMC] = 0x1E750000,
+ [ASPEED_DEV_PCIE0] = 0x1E770000,
[ASPEED_DEV_GPIO] = 0x1E780000,
[ASPEED_DEV_GPIO_1_8V] = 0x1E780800,
[ASPEED_DEV_RTC] = 0x1E781000,
@@ -79,6 +81,7 @@ static const hwaddr aspeed_soc_ast2600_memmap[] = {
[ASPEED_DEV_FSI1] = 0x1E79B000,
[ASPEED_DEV_FSI2] = 0x1E79B100,
[ASPEED_DEV_I3C] = 0x1E7A0000,
+ [ASPEED_DEV_PCIE_MMIO1] = 0x70000000,
[ASPEED_DEV_SDRAM] = 0x80000000,
};
@@ -127,6 +130,7 @@ static const int aspeed_soc_ast2600_irqmap[] = {
[ASPEED_DEV_LPC] = 35,
[ASPEED_DEV_IBT] = 143,
[ASPEED_DEV_I2C] = 110, /* 110 -> 125 */
+ [ASPEED_DEV_PCIE0] = 168,
[ASPEED_DEV_PECI] = 38,
[ASPEED_DEV_ETH1] = 2,
[ASPEED_DEV_ETH2] = 3,
@@ -163,7 +167,7 @@ static void aspeed_soc_ast2600_init(Object *obj)
for (i = 0; i < sc->num_cpus; i++) {
object_initialize_child(obj, "cpu[*]", &a->cpu[i],
- aspeed_soc_cpu_type(sc));
+ aspeed_soc_cpu_type(sc->valid_cpu_types));
}
snprintf(typename, sizeof(typename), "aspeed.scu-%s", socname);
@@ -191,6 +195,10 @@ static void aspeed_soc_ast2600_init(Object *obj)
snprintf(typename, sizeof(typename), "aspeed.i2c-%s", socname);
object_initialize_child(obj, "i2c", &s->i2c, typename);
+ object_initialize_child(obj, "pcie-cfg", &s->pcie[0], TYPE_ASPEED_PCIE_CFG);
+ object_initialize_child(obj, "pcie-phy[*]", &s->pcie_phy[0],
+ TYPE_ASPEED_PCIE_PHY);
+
object_initialize_child(obj, "peci", &s->peci, TYPE_ASPEED_PECI);
snprintf(typename, sizeof(typename), "aspeed.fmc-%s", socname);
@@ -261,7 +269,7 @@ static void aspeed_soc_ast2600_init(Object *obj)
object_initialize_child(obj, "i3c", &s->i3c, TYPE_ASPEED_I3C);
- object_initialize_child(obj, "sbc", &s->sbc, TYPE_ASPEED_SBC);
+ object_initialize_child(obj, "sbc", &s->sbc, TYPE_ASPEED_AST2600_SBC);
object_initialize_child(obj, "iomem", &s->iomem, TYPE_UNIMPLEMENTED_DEVICE);
object_initialize_child(obj, "video", &s->video, TYPE_UNIMPLEMENTED_DEVICE);
@@ -285,6 +293,67 @@ static uint64_t aspeed_calc_affinity(int cpu)
return (0xf << ARM_AFF1_SHIFT) | cpu;
}
+/*
+ * PCIe Root Complex (RC)
+ *
+ * H2X register space (single block 0x00-0xFF):
+ * 0x00-0x7F : shared by RC_L (PCIe0) and RC_H (PCIe1)
+ * 0x80-0xBF : RC_L only
+ * 0xC0-0xFF : RC_H only
+ *
+ * Model scope / limitations:
+ * - Firmware supports RC_H only; this QEMU model does not support RC_L.
+ * - RC_H uses PHY1 and the MMIO window [0x70000000, 0x80000000]
+ * (aka MMIO1).
+ *
+ * Indexing convention (this model):
+ * - Expose a single logical instance at index 0.
+ * - pcie[0] -> hardware RC_H (PCIe1)
+ * - phy[0] -> hardware PHY1
+ * - mmio.0 -> guest address range MMIO1: 0x70000000-0x80000000
+ * - RC_L / PCIe0 is not created and mapped.
+ */
+static bool aspeed_soc_ast2600_pcie_realize(DeviceState *dev, Error **errp)
+{
+ Aspeed2600SoCState *a = ASPEED2600_SOC(dev);
+ AspeedSoCState *s = ASPEED_SOC(dev);
+ AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
+ MemoryRegion *mmio_mr = NULL;
+ qemu_irq irq;
+
+ if (!sysbus_realize(SYS_BUS_DEVICE(&s->pcie_phy[0]), errp)) {
+ return false;
+ }
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->pcie_phy[0]), 0,
+ sc->memmap[ASPEED_DEV_PCIE_PHY1]);
+
+ object_property_set_int(OBJECT(&s->pcie[0]), "dram-base",
+ sc->memmap[ASPEED_DEV_SDRAM],
+ &error_abort);
+ object_property_set_link(OBJECT(&s->pcie[0]), "dram", OBJECT(s->dram_mr),
+ &error_abort);
+ if (!sysbus_realize(SYS_BUS_DEVICE(&s->pcie[0]), errp)) {
+ return false;
+ }
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->pcie[0]), 0,
+ sc->memmap[ASPEED_DEV_PCIE0]);
+
+ irq = qdev_get_gpio_in(DEVICE(&a->a7mpcore),
+ sc->irqmap[ASPEED_DEV_PCIE0]);
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie[0].rc), 0, irq);
+
+ mmio_mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->pcie[0].rc), 1);
+ memory_region_init_alias(&s->pcie_mmio_alias[0], OBJECT(&s->pcie[0].rc),
+ "aspeed.pcie-mmio", mmio_mr,
+ sc->memmap[ASPEED_DEV_PCIE_MMIO1],
+ 0x10000000);
+ memory_region_add_subregion(s->memory,
+ sc->memmap[ASPEED_DEV_PCIE_MMIO1],
+ &s->pcie_mmio_alias[0]);
+
+ return true;
+}
+
static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
{
int i;
@@ -293,6 +362,7 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
qemu_irq irq;
g_autofree char *sram_name = NULL;
+ int uart;
/* Default boot region (SPI memory or ROMs) */
memory_region_init(&s->spi_boot_container, OBJECT(s),
@@ -301,16 +371,19 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
&s->spi_boot_container);
/* IO space */
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->iomem), "aspeed.io",
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&s->iomem),
+ "aspeed.io",
sc->memmap[ASPEED_DEV_IOMEM],
ASPEED_SOC_IOMEM_SIZE);
/* Video engine stub */
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->video), "aspeed.video",
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&s->video),
+ "aspeed.video",
sc->memmap[ASPEED_DEV_VIDEO], 0x1000);
/* eMMC Boot Controller stub */
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->emmc_boot_controller),
+ aspeed_mmio_map_unimplemented(s->memory,
+ SYS_BUS_DEVICE(&s->emmc_boot_controller),
"aspeed.emmc-boot-controller",
sc->memmap[ASPEED_DEV_EMMC_BC], 0x1000);
@@ -345,7 +418,8 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
&error_abort);
sysbus_realize(SYS_BUS_DEVICE(&a->a7mpcore), &error_abort);
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&a->a7mpcore), 0, ASPEED_A7MPCORE_ADDR);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&a->a7mpcore), 0,
+ ASPEED_A7MPCORE_ADDR);
for (i = 0; i < sc->num_cpus; i++) {
SysBusDevice *sbd = SYS_BUS_DEVICE(&a->a7mpcore);
@@ -371,7 +445,8 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
sc->memmap[ASPEED_DEV_SRAM], &s->sram);
/* DPMCU */
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->dpmcu), "aspeed.dpmcu",
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&s->dpmcu),
+ "aspeed.dpmcu",
sc->memmap[ASPEED_DEV_DPMCU],
ASPEED_SOC_DPMCU_SIZE);
@@ -379,15 +454,17 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->scu), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->scu), 0, sc->memmap[ASPEED_DEV_SCU]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->scu), 0,
+ sc->memmap[ASPEED_DEV_SCU]);
/* RTC */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->rtc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->rtc), 0, sc->memmap[ASPEED_DEV_RTC]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->rtc), 0,
+ sc->memmap[ASPEED_DEV_RTC]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->rtc), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_RTC));
+ aspeed_soc_ast2600_get_irq(s, ASPEED_DEV_RTC));
/* Timer */
object_property_set_link(OBJECT(&s->timerctrl), "scu", OBJECT(&s->scu),
@@ -395,10 +472,10 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->timerctrl), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->timerctrl), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->timerctrl), 0,
sc->memmap[ASPEED_DEV_TIMER1]);
for (i = 0; i < ASPEED_TIMER_NR_TIMERS; i++) {
- irq = aspeed_soc_get_irq(s, ASPEED_DEV_TIMER1 + i);
+ irq = aspeed_soc_ast2600_get_irq(s, ASPEED_DEV_TIMER1 + i);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->timerctrl), i, irq);
}
@@ -406,13 +483,19 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->adc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->adc), 0, sc->memmap[ASPEED_DEV_ADC]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->adc), 0,
+ sc->memmap[ASPEED_DEV_ADC]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->adc), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_ADC));
+ aspeed_soc_ast2600_get_irq(s, ASPEED_DEV_ADC));
/* UART */
- if (!aspeed_soc_uart_realize(s, errp)) {
- return;
+ for (i = 0, uart = sc->uarts_base; i < sc->uarts_num; i++, uart++) {
+ if (!aspeed_soc_uart_realize(s->memory, &s->uart[i],
+ sc->memmap[uart], errp)) {
+ return;
+ }
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->uart[i]), 0,
+ aspeed_soc_ast2600_get_irq(s, uart));
}
/* I2C */
@@ -421,7 +504,8 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->i2c), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->i2c), 0, sc->memmap[ASPEED_DEV_I2C]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->i2c), 0,
+ sc->memmap[ASPEED_DEV_I2C]);
for (i = 0; i < ASPEED_I2C_GET_CLASS(&s->i2c)->num_busses; i++) {
irq = qdev_get_gpio_in(DEVICE(&a->a7mpcore),
sc->irqmap[ASPEED_DEV_I2C] + i);
@@ -433,10 +517,15 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->peci), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->peci), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->peci), 0,
sc->memmap[ASPEED_DEV_PECI]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->peci), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_PECI));
+ aspeed_soc_ast2600_get_irq(s, ASPEED_DEV_PECI));
+
+ /* PCIe Root Complex (RC) */
+ if (!aspeed_soc_ast2600_pcie_realize(dev, errp)) {
+ return;
+ }
/* FMC, The number of CS is set at the board level */
object_property_set_link(OBJECT(&s->fmc), "dram", OBJECT(s->dram_mr),
@@ -444,11 +533,12 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->fmc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->fmc), 0, sc->memmap[ASPEED_DEV_FMC]);
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->fmc), 1,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->fmc), 0,
+ sc->memmap[ASPEED_DEV_FMC]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->fmc), 1,
ASPEED_SMC_GET_CLASS(&s->fmc)->flash_window_base);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->fmc), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_FMC));
+ aspeed_soc_ast2600_get_irq(s, ASPEED_DEV_FMC));
/* Set up an alias on the FMC CE0 region (boot default) */
MemoryRegion *fmc0_mmio = &s->fmc.flashes[0].mmio;
@@ -463,9 +553,9 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->spi[i]), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->spi[i]), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->spi[i]), 0,
sc->memmap[ASPEED_DEV_SPI1 + i]);
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->spi[i]), 1,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->spi[i]), 1,
ASPEED_SMC_GET_CLASS(&s->spi[i])->flash_window_base);
}
@@ -474,17 +564,18 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->ehci[i]), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->ehci[i]), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->ehci[i]), 0,
sc->memmap[ASPEED_DEV_EHCI1 + i]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->ehci[i]), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_EHCI1 + i));
+ aspeed_soc_ast2600_get_irq(s,
+ ASPEED_DEV_EHCI1 + i));
}
/* SDMC - SDRAM Memory Controller */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->sdmc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->sdmc), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->sdmc), 0,
sc->memmap[ASPEED_DEV_SDMC]);
/* Watch dog */
@@ -497,7 +588,7 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->wdt[i]), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->wdt[i]), 0, wdt_offset);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->wdt[i]), 0, wdt_offset);
}
/* RAM */
@@ -512,10 +603,10 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->ftgmac100[i]), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->ftgmac100[i]), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->ftgmac100[i]), 0,
sc->memmap[ASPEED_DEV_ETH1 + i]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->ftgmac100[i]), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_ETH1 + i));
+ aspeed_soc_ast2600_get_irq(s, ASPEED_DEV_ETH1 + i));
object_property_set_link(OBJECT(&s->mii[i]), "nic",
OBJECT(&s->ftgmac100[i]), &error_abort);
@@ -523,7 +614,7 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->mii[i]), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->mii[i]), 0,
sc->memmap[ASPEED_DEV_MII1 + i]);
}
@@ -531,55 +622,56 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->xdma), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->xdma), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->xdma), 0,
sc->memmap[ASPEED_DEV_XDMA]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->xdma), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_XDMA));
+ aspeed_soc_ast2600_get_irq(s, ASPEED_DEV_XDMA));
/* GPIO */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->gpio), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->gpio), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->gpio), 0,
sc->memmap[ASPEED_DEV_GPIO]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpio), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_GPIO));
+ aspeed_soc_ast2600_get_irq(s, ASPEED_DEV_GPIO));
if (!sysbus_realize(SYS_BUS_DEVICE(&s->gpio_1_8v), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->gpio_1_8v), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->gpio_1_8v), 0,
sc->memmap[ASPEED_DEV_GPIO_1_8V]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpio_1_8v), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_GPIO_1_8V));
+ aspeed_soc_ast2600_get_irq(s, ASPEED_DEV_GPIO_1_8V));
/* SDHCI */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->sdhci), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->sdhci), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->sdhci), 0,
sc->memmap[ASPEED_DEV_SDHCI]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhci), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_SDHCI));
+ aspeed_soc_ast2600_get_irq(s, ASPEED_DEV_SDHCI));
/* eMMC */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->emmc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->emmc), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->emmc), 0,
sc->memmap[ASPEED_DEV_EMMC]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->emmc), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_EMMC));
+ aspeed_soc_ast2600_get_irq(s, ASPEED_DEV_EMMC));
/* LPC */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->lpc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->lpc), 0, sc->memmap[ASPEED_DEV_LPC]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->lpc), 0,
+ sc->memmap[ASPEED_DEV_LPC]);
/* Connect the LPC IRQ to the GIC. It is otherwise unused. */
sysbus_connect_irq(SYS_BUS_DEVICE(&s->lpc), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_LPC));
+ aspeed_soc_ast2600_get_irq(s, ASPEED_DEV_LPC));
/*
* On the AST2600 LPC subdevice IRQs are connected straight to the GIC.
@@ -611,16 +703,17 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->hace), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->hace), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->hace), 0,
sc->memmap[ASPEED_DEV_HACE]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->hace), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_HACE));
+ aspeed_soc_ast2600_get_irq(s, ASPEED_DEV_HACE));
/* I3C */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->i3c), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->i3c), 0, sc->memmap[ASPEED_DEV_I3C]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->i3c), 0,
+ sc->memmap[ASPEED_DEV_I3C]);
for (i = 0; i < ASPEED_I3C_NR_DEVICES; i++) {
irq = qdev_get_gpio_in(DEVICE(&a->a7mpcore),
sc->irqmap[ASPEED_DEV_I3C] + i);
@@ -632,17 +725,18 @@ static void aspeed_soc_ast2600_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->sbc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->sbc), 0, sc->memmap[ASPEED_DEV_SBC]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->sbc), 0,
+ sc->memmap[ASPEED_DEV_SBC]);
/* FSI */
for (i = 0; i < ASPEED_FSI_NUM; i++) {
if (!sysbus_realize(SYS_BUS_DEVICE(&s->fsi[i]), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->fsi[i]), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->fsi[i]), 0,
sc->memmap[ASPEED_DEV_FSI1 + i]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->fsi[i]), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_FSI1 + i));
+ aspeed_soc_ast2600_get_irq(s, ASPEED_DEV_FSI1 + i));
}
}
@@ -678,7 +772,6 @@ static void aspeed_soc_ast2600_class_init(ObjectClass *oc, const void *data)
sc->irqmap = aspeed_soc_ast2600_irqmap;
sc->memmap = aspeed_soc_ast2600_memmap;
sc->num_cpus = 2;
- sc->get_irq = aspeed_soc_ast2600_get_irq;
sc->boot_from_emmc = aspeed_soc_ast2600_boot_from_emmc;
}
diff --git a/hw/arm/aspeed_ast27x0-fc.c b/hw/arm/aspeed_ast27x0-fc.c
index 125a3ad..a61ecff 100644
--- a/hw/arm/aspeed_ast27x0-fc.c
+++ b/hw/arm/aspeed_ast27x0-fc.c
@@ -21,7 +21,7 @@
#include "hw/loader.h"
#include "hw/arm/boot.h"
#include "hw/block/flash.h"
-
+#include "hw/arm/aspeed_coprocessor.h"
#define TYPE_AST2700A1FC MACHINE_TYPE_NAME("ast2700fc")
OBJECT_DECLARE_SIMPLE_TYPE(Ast2700FCState, AST2700A1FC);
@@ -42,13 +42,13 @@ struct Ast2700FCState {
Clock *tsp_sysclk;
Aspeed27x0SoCState ca35;
- Aspeed27x0SSPSoCState ssp;
- Aspeed27x0TSPSoCState tsp;
+ Aspeed27x0CoprocessorState ssp;
+ Aspeed27x0CoprocessorState tsp;
bool mmio_exec;
};
-#define AST2700FC_BMC_RAM_SIZE (2 * GiB)
+#define AST2700FC_BMC_RAM_SIZE (1 * GiB)
#define AST2700FC_CM4_DRAM_SIZE (32 * MiB)
#define AST2700FC_HW_STRAP1 0x000000C0
@@ -56,7 +56,7 @@ struct Ast2700FCState {
#define AST2700FC_FMC_MODEL "w25q01jvq"
#define AST2700FC_SPI_MODEL "w25q512jv"
-static void ast2700fc_ca35_init(MachineState *machine)
+static bool ast2700fc_ca35_init(MachineState *machine, Error **errp)
{
Ast2700FCState *s = AST2700A1FC(machine);
AspeedSoCState *soc;
@@ -68,35 +68,33 @@ static void ast2700fc_ca35_init(MachineState *machine)
memory_region_init(&s->ca35_memory, OBJECT(&s->ca35), "ca35-memory",
UINT64_MAX);
+ memory_region_add_subregion(get_system_memory(), 0, &s->ca35_memory);
if (!memory_region_init_ram(&s->ca35_dram, OBJECT(&s->ca35), "ca35-dram",
- AST2700FC_BMC_RAM_SIZE, &error_abort)) {
- return;
- }
- if (!object_property_set_link(OBJECT(&s->ca35), "memory",
- OBJECT(&s->ca35_memory),
- &error_abort)) {
- return;
- };
- if (!object_property_set_link(OBJECT(&s->ca35), "dram",
- OBJECT(&s->ca35_dram), &error_abort)) {
- return;
- }
- if (!object_property_set_int(OBJECT(&s->ca35), "ram-size",
- AST2700FC_BMC_RAM_SIZE, &error_abort)) {
- return;
+ AST2700FC_BMC_RAM_SIZE, errp)) {
+ return false;
}
- if (!object_property_set_int(OBJECT(&s->ca35), "hw-strap1",
- AST2700FC_HW_STRAP1, &error_abort)) {
- return;
+ object_property_set_link(OBJECT(&s->ca35), "memory",
+ OBJECT(&s->ca35_memory), &error_abort);
+ object_property_set_link(OBJECT(&s->ca35), "dram", OBJECT(&s->ca35_dram),
+ &error_abort);
+ object_property_set_int(OBJECT(&s->ca35), "ram-size",
+ AST2700FC_BMC_RAM_SIZE, &error_abort);
+
+ for (int i = 0; i < sc->macs_num; i++) {
+ if (!qemu_configure_nic_device(DEVICE(&soc->ftgmac100[i]),
+ true, NULL)) {
+ break;
+ }
}
- if (!object_property_set_int(OBJECT(&s->ca35), "hw-strap2",
- AST2700FC_HW_STRAP2, &error_abort)) {
- return;
- }
- aspeed_soc_uart_set_chr(soc, ASPEED_DEV_UART12, serial_hd(0));
- if (!qdev_realize(DEVICE(&s->ca35), NULL, &error_abort)) {
- return;
+ object_property_set_int(OBJECT(&s->ca35), "hw-strap1",
+ AST2700FC_HW_STRAP1, &error_abort);
+ object_property_set_int(OBJECT(&s->ca35), "hw-strap2",
+ AST2700FC_HW_STRAP2, &error_abort);
+ aspeed_soc_uart_set_chr(soc->uart, ASPEED_DEV_UART12, sc->uarts_base,
+ sc->uarts_num, serial_hd(0));
+ if (!qdev_realize(DEVICE(&s->ca35), NULL, errp)) {
+ return false;
}
/*
@@ -111,61 +109,71 @@ static void ast2700fc_ca35_init(MachineState *machine)
ast2700fc_board_info.loader_start = sc->memmap[ASPEED_DEV_SDRAM];
arm_load_kernel(ARM_CPU(first_cpu), machine, &ast2700fc_board_info);
+
+ return true;
}
-static void ast2700fc_ssp_init(MachineState *machine)
+static bool ast2700fc_ssp_init(MachineState *machine, Error **errp)
{
- AspeedSoCState *soc;
+ AspeedCoprocessorState *soc;
+ AspeedCoprocessorClass *sc;
Ast2700FCState *s = AST2700A1FC(machine);
s->ssp_sysclk = clock_new(OBJECT(s), "SSP_SYSCLK");
clock_set_hz(s->ssp_sysclk, 200000000ULL);
- object_initialize_child(OBJECT(s), "ssp", &s->ssp, TYPE_ASPEED27X0SSP_SOC);
+ object_initialize_child(OBJECT(s), "ssp", &s->ssp,
+ TYPE_ASPEED27X0SSP_COPROCESSOR);
memory_region_init(&s->ssp_memory, OBJECT(&s->ssp), "ssp-memory",
UINT64_MAX);
qdev_connect_clock_in(DEVICE(&s->ssp), "sysclk", s->ssp_sysclk);
- if (!object_property_set_link(OBJECT(&s->ssp), "memory",
- OBJECT(&s->ssp_memory), &error_abort)) {
- return;
+ object_property_set_link(OBJECT(&s->ssp), "memory",
+ OBJECT(&s->ssp_memory), &error_abort);
+
+ soc = ASPEED_COPROCESSOR(&s->ssp);
+ sc = ASPEED_COPROCESSOR_GET_CLASS(soc);
+ aspeed_soc_uart_set_chr(soc->uart, ASPEED_DEV_UART4, sc->uarts_base,
+ sc->uarts_num, serial_hd(1));
+ if (!qdev_realize(DEVICE(&s->ssp), NULL, errp)) {
+ return false;
}
- soc = ASPEED_SOC(&s->ssp);
- aspeed_soc_uart_set_chr(soc, ASPEED_DEV_UART4, serial_hd(1));
- if (!qdev_realize(DEVICE(&s->ssp), NULL, &error_abort)) {
- return;
- }
+ return true;
}
-static void ast2700fc_tsp_init(MachineState *machine)
+static bool ast2700fc_tsp_init(MachineState *machine, Error **errp)
{
- AspeedSoCState *soc;
+ AspeedCoprocessorState *soc;
+ AspeedCoprocessorClass *sc;
Ast2700FCState *s = AST2700A1FC(machine);
s->tsp_sysclk = clock_new(OBJECT(s), "TSP_SYSCLK");
clock_set_hz(s->tsp_sysclk, 200000000ULL);
- object_initialize_child(OBJECT(s), "tsp", &s->tsp, TYPE_ASPEED27X0TSP_SOC);
+ object_initialize_child(OBJECT(s), "tsp", &s->tsp,
+ TYPE_ASPEED27X0TSP_COPROCESSOR);
memory_region_init(&s->tsp_memory, OBJECT(&s->tsp), "tsp-memory",
UINT64_MAX);
qdev_connect_clock_in(DEVICE(&s->tsp), "sysclk", s->tsp_sysclk);
- if (!object_property_set_link(OBJECT(&s->tsp), "memory",
- OBJECT(&s->tsp_memory), &error_abort)) {
- return;
+ object_property_set_link(OBJECT(&s->tsp), "memory",
+ OBJECT(&s->tsp_memory), &error_abort);
+
+ soc = ASPEED_COPROCESSOR(&s->tsp);
+ sc = ASPEED_COPROCESSOR_GET_CLASS(soc);
+ aspeed_soc_uart_set_chr(soc->uart, ASPEED_DEV_UART7, sc->uarts_base,
+ sc->uarts_num, serial_hd(2));
+ if (!qdev_realize(DEVICE(&s->tsp), NULL, errp)) {
+ return false;
}
- soc = ASPEED_SOC(&s->tsp);
- aspeed_soc_uart_set_chr(soc, ASPEED_DEV_UART7, serial_hd(2));
- if (!qdev_realize(DEVICE(&s->tsp), NULL, &error_abort)) {
- return;
- }
+ return true;
}
static void ast2700fc_init(MachineState *machine)
{
- ast2700fc_ca35_init(machine);
- ast2700fc_ssp_init(machine);
- ast2700fc_tsp_init(machine);
+ ast2700fc_ca35_init(machine, &error_abort);
+ ast2700fc_ssp_init(machine, &error_abort);
+ ast2700fc_tsp_init(machine, &error_abort);
}
static void ast2700fc_class_init(ObjectClass *oc, const void *data)
diff --git a/hw/arm/aspeed_ast27x0-ssp.c b/hw/arm/aspeed_ast27x0-ssp.c
index 80ec599..936c7c7 100644
--- a/hw/arm/aspeed_ast27x0-ssp.c
+++ b/hw/arm/aspeed_ast27x0-ssp.c
@@ -1,5 +1,5 @@
/*
- * ASPEED Ast27x0 SSP SoC
+ * ASPEED Ast27x0 SSP Coprocessor
*
* Copyright (C) 2025 ASPEED Technology Inc.
*
@@ -14,6 +14,7 @@
#include "hw/qdev-clock.h"
#include "hw/misc/unimp.h"
#include "hw/arm/aspeed_soc.h"
+#include "hw/arm/aspeed_coprocessor.h"
#define AST2700_SSP_RAM_SIZE (32 * MiB)
@@ -104,10 +105,11 @@ static struct nvic_intc_irq_info ast2700_ssp_intcmap[] = {
{136, 0, 9, NULL},
};
-static qemu_irq aspeed_soc_ast27x0ssp_get_irq(AspeedSoCState *s, int dev)
+static qemu_irq aspeed_soc_ast27x0ssp_get_irq(AspeedCoprocessorState *s,
+ int dev)
{
- Aspeed27x0SSPSoCState *a = ASPEED27X0SSP_SOC(s);
- AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
+ Aspeed27x0CoprocessorState *a = ASPEED27X0SSP_COPROCESSOR(s);
+ AspeedCoprocessorClass *sc = ASPEED_COPROCESSOR_GET_CLASS(s);
int or_idx;
int idx;
@@ -128,9 +130,9 @@ static qemu_irq aspeed_soc_ast27x0ssp_get_irq(AspeedSoCState *s, int dev)
static void aspeed_soc_ast27x0ssp_init(Object *obj)
{
- Aspeed27x0SSPSoCState *a = ASPEED27X0SSP_SOC(obj);
- AspeedSoCState *s = ASPEED_SOC(obj);
- AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
+ Aspeed27x0CoprocessorState *a = ASPEED27X0SSP_COPROCESSOR(obj);
+ AspeedCoprocessorState *s = ASPEED_COPROCESSOR(obj);
+ AspeedCoprocessorClass *sc = ASPEED_COPROCESSOR_GET_CLASS(s);
int i;
object_initialize_child(obj, "armv7m", &a->armv7m, TYPE_ARMV7M);
@@ -159,11 +161,12 @@ static void aspeed_soc_ast27x0ssp_init(Object *obj)
static void aspeed_soc_ast27x0ssp_realize(DeviceState *dev_soc, Error **errp)
{
- Aspeed27x0SSPSoCState *a = ASPEED27X0SSP_SOC(dev_soc);
- AspeedSoCState *s = ASPEED_SOC(dev_soc);
- AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
+ Aspeed27x0CoprocessorState *a = ASPEED27X0SSP_COPROCESSOR(dev_soc);
+ AspeedCoprocessorState *s = ASPEED_COPROCESSOR(dev_soc);
+ AspeedCoprocessorClass *sc = ASPEED_COPROCESSOR_GET_CLASS(s);
DeviceState *armv7m;
g_autofree char *sram_name = NULL;
+ int uart;
int i;
if (!clock_has_source(s->sysclk)) {
@@ -174,7 +177,8 @@ static void aspeed_soc_ast27x0ssp_realize(DeviceState *dev_soc, Error **errp)
/* AST27X0 SSP Core */
armv7m = DEVICE(&a->armv7m);
qdev_prop_set_uint32(armv7m, "num-irq", 256);
- qdev_prop_set_string(armv7m, "cpu-type", aspeed_soc_cpu_type(sc));
+ qdev_prop_set_string(armv7m, "cpu-type",
+ aspeed_soc_cpu_type(sc->valid_cpu_types));
qdev_connect_clock_in(armv7m, "cpuclk", s->sysclk);
object_property_set_link(OBJECT(&a->armv7m), "memory",
OBJECT(s->memory), &error_abort);
@@ -183,8 +187,8 @@ static void aspeed_soc_ast27x0ssp_realize(DeviceState *dev_soc, Error **errp)
sram_name = g_strdup_printf("aspeed.dram.%d",
CPU(a->armv7m.cpu)->cpu_index);
- if (!memory_region_init_ram(&s->sram, OBJECT(s), sram_name, sc->sram_size,
- errp)) {
+ if (!memory_region_init_ram(&s->sram, OBJECT(s), sram_name,
+ AST2700_SSP_RAM_SIZE, errp)) {
return;
}
memory_region_add_subregion(s->memory,
@@ -195,14 +199,15 @@ static void aspeed_soc_ast27x0ssp_realize(DeviceState *dev_soc, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->scu), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->scu), 0, sc->memmap[ASPEED_DEV_SCU]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->scu), 0,
+ sc->memmap[ASPEED_DEV_SCU]);
/* INTC */
if (!sysbus_realize(SYS_BUS_DEVICE(&a->intc[0]), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&a->intc[0]), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&a->intc[0]), 0,
sc->memmap[ASPEED_DEV_INTC]);
/* INTCIO */
@@ -210,7 +215,7 @@ static void aspeed_soc_ast27x0ssp_realize(DeviceState *dev_soc, Error **errp)
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&a->intc[1]), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&a->intc[1]), 0,
sc->memmap[ASPEED_DEV_INTCIO]);
/* irq source orgates -> INTC0 */
@@ -235,57 +240,56 @@ static void aspeed_soc_ast27x0ssp_realize(DeviceState *dev_soc, Error **errp)
qdev_get_gpio_in(DEVICE(&a->intc[0].orgates[0]), i));
}
/* UART */
- if (!aspeed_soc_uart_realize(s, errp)) {
- return;
+ for (i = 0, uart = sc->uarts_base; i < sc->uarts_num; i++, uart++) {
+ if (!aspeed_soc_uart_realize(s->memory, &s->uart[i],
+ sc->memmap[uart], errp)) {
+ return;
+ }
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->uart[i]), 0,
+ aspeed_soc_ast27x0ssp_get_irq(s, uart));
}
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->timerctrl),
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&s->timerctrl),
"aspeed.timerctrl",
sc->memmap[ASPEED_DEV_TIMER1], 0x200);
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&a->ipc[0]),
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&a->ipc[0]),
"aspeed.ipc0",
sc->memmap[ASPEED_DEV_IPC0], 0x1000);
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&a->ipc[1]),
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&a->ipc[1]),
"aspeed.ipc1",
sc->memmap[ASPEED_DEV_IPC1], 0x1000);
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&a->scuio),
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&a->scuio),
"aspeed.scuio",
sc->memmap[ASPEED_DEV_SCUIO], 0x1000);
}
-static void aspeed_soc_ast27x0ssp_class_init(ObjectClass *klass, const void *data)
+static void aspeed_soc_ast27x0ssp_class_init(ObjectClass *klass,
+ const void *data)
{
static const char * const valid_cpu_types[] = {
ARM_CPU_TYPE_NAME("cortex-m4"), /* TODO: cortex-m4f */
NULL
};
DeviceClass *dc = DEVICE_CLASS(klass);
- AspeedSoCClass *sc = ASPEED_SOC_CLASS(dc);
+ AspeedCoprocessorClass *sc = ASPEED_COPROCESSOR_CLASS(dc);
- /* Reason: The Aspeed SoC can only be instantiated from a board */
+ /* Reason: The Aspeed Coprocessor can only be instantiated from a board */
dc->user_creatable = false;
dc->realize = aspeed_soc_ast27x0ssp_realize;
sc->valid_cpu_types = valid_cpu_types;
sc->silicon_rev = AST2700_A1_SILICON_REV;
- sc->sram_size = AST2700_SSP_RAM_SIZE;
- sc->spis_num = 0;
- sc->ehcis_num = 0;
- sc->wdts_num = 0;
- sc->macs_num = 0;
sc->uarts_num = 13;
sc->uarts_base = ASPEED_DEV_UART0;
sc->irqmap = aspeed_soc_ast27x0ssp_irqmap;
sc->memmap = aspeed_soc_ast27x0ssp_memmap;
- sc->num_cpus = 1;
- sc->get_irq = aspeed_soc_ast27x0ssp_get_irq;
}
static const TypeInfo aspeed_soc_ast27x0ssp_types[] = {
{
- .name = TYPE_ASPEED27X0SSP_SOC,
- .parent = TYPE_ASPEED_SOC,
- .instance_size = sizeof(Aspeed27x0SSPSoCState),
+ .name = TYPE_ASPEED27X0SSP_COPROCESSOR,
+ .parent = TYPE_ASPEED_COPROCESSOR,
+ .instance_size = sizeof(Aspeed27x0CoprocessorState),
.instance_init = aspeed_soc_ast27x0ssp_init,
.class_init = aspeed_soc_ast27x0ssp_class_init,
},
diff --git a/hw/arm/aspeed_ast27x0-tsp.c b/hw/arm/aspeed_ast27x0-tsp.c
index 4e0efae..9318f8c 100644
--- a/hw/arm/aspeed_ast27x0-tsp.c
+++ b/hw/arm/aspeed_ast27x0-tsp.c
@@ -1,5 +1,5 @@
/*
- * ASPEED Ast27x0 TSP SoC
+ * ASPEED Ast27x0 TSP Coprocessor
*
* Copyright (C) 2025 ASPEED Technology Inc.
*
@@ -14,6 +14,7 @@
#include "hw/qdev-clock.h"
#include "hw/misc/unimp.h"
#include "hw/arm/aspeed_soc.h"
+#include "hw/arm/aspeed_coprocessor.h"
#define AST2700_TSP_RAM_SIZE (32 * MiB)
@@ -104,10 +105,11 @@ static struct nvic_intc_irq_info ast2700_tsp_intcmap[] = {
{136, 0, 9, NULL},
};
-static qemu_irq aspeed_soc_ast27x0tsp_get_irq(AspeedSoCState *s, int dev)
+static qemu_irq aspeed_soc_ast27x0tsp_get_irq(AspeedCoprocessorState *s,
+ int dev)
{
- Aspeed27x0TSPSoCState *a = ASPEED27X0TSP_SOC(s);
- AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
+ Aspeed27x0CoprocessorState *a = ASPEED27X0TSP_COPROCESSOR(s);
+ AspeedCoprocessorClass *sc = ASPEED_COPROCESSOR_GET_CLASS(s);
int or_idx;
int idx;
@@ -128,9 +130,9 @@ static qemu_irq aspeed_soc_ast27x0tsp_get_irq(AspeedSoCState *s, int dev)
static void aspeed_soc_ast27x0tsp_init(Object *obj)
{
- Aspeed27x0TSPSoCState *a = ASPEED27X0TSP_SOC(obj);
- AspeedSoCState *s = ASPEED_SOC(obj);
- AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
+ Aspeed27x0CoprocessorState *a = ASPEED27X0TSP_COPROCESSOR(obj);
+ AspeedCoprocessorState *s = ASPEED_COPROCESSOR(obj);
+ AspeedCoprocessorClass *sc = ASPEED_COPROCESSOR_GET_CLASS(s);
int i;
object_initialize_child(obj, "armv7m", &a->armv7m, TYPE_ARMV7M);
@@ -159,11 +161,12 @@ static void aspeed_soc_ast27x0tsp_init(Object *obj)
static void aspeed_soc_ast27x0tsp_realize(DeviceState *dev_soc, Error **errp)
{
- Aspeed27x0TSPSoCState *a = ASPEED27X0TSP_SOC(dev_soc);
- AspeedSoCState *s = ASPEED_SOC(dev_soc);
- AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
+ Aspeed27x0CoprocessorState *a = ASPEED27X0TSP_COPROCESSOR(dev_soc);
+ AspeedCoprocessorState *s = ASPEED_COPROCESSOR(dev_soc);
+ AspeedCoprocessorClass *sc = ASPEED_COPROCESSOR_GET_CLASS(s);
DeviceState *armv7m;
g_autofree char *sram_name = NULL;
+ int uart;
int i;
if (!clock_has_source(s->sysclk)) {
@@ -174,7 +177,8 @@ static void aspeed_soc_ast27x0tsp_realize(DeviceState *dev_soc, Error **errp)
/* AST27X0 TSP Core */
armv7m = DEVICE(&a->armv7m);
qdev_prop_set_uint32(armv7m, "num-irq", 256);
- qdev_prop_set_string(armv7m, "cpu-type", aspeed_soc_cpu_type(sc));
+ qdev_prop_set_string(armv7m, "cpu-type",
+ aspeed_soc_cpu_type(sc->valid_cpu_types));
qdev_connect_clock_in(armv7m, "cpuclk", s->sysclk);
object_property_set_link(OBJECT(&a->armv7m), "memory",
OBJECT(s->memory), &error_abort);
@@ -183,8 +187,8 @@ static void aspeed_soc_ast27x0tsp_realize(DeviceState *dev_soc, Error **errp)
sram_name = g_strdup_printf("aspeed.dram.%d",
CPU(a->armv7m.cpu)->cpu_index);
- if (!memory_region_init_ram(&s->sram, OBJECT(s), sram_name, sc->sram_size,
- errp)) {
+ if (!memory_region_init_ram(&s->sram, OBJECT(s), sram_name,
+ AST2700_TSP_RAM_SIZE, errp)) {
return;
}
memory_region_add_subregion(s->memory,
@@ -195,14 +199,15 @@ static void aspeed_soc_ast27x0tsp_realize(DeviceState *dev_soc, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->scu), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->scu), 0, sc->memmap[ASPEED_DEV_SCU]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->scu), 0,
+ sc->memmap[ASPEED_DEV_SCU]);
/* INTC */
if (!sysbus_realize(SYS_BUS_DEVICE(&a->intc[0]), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&a->intc[0]), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&a->intc[0]), 0,
sc->memmap[ASPEED_DEV_INTC]);
/* INTCIO */
@@ -210,7 +215,7 @@ static void aspeed_soc_ast27x0tsp_realize(DeviceState *dev_soc, Error **errp)
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&a->intc[1]), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&a->intc[1]), 0,
sc->memmap[ASPEED_DEV_INTCIO]);
/* irq source orgates -> INTC */
@@ -235,57 +240,56 @@ static void aspeed_soc_ast27x0tsp_realize(DeviceState *dev_soc, Error **errp)
qdev_get_gpio_in(DEVICE(&a->intc[0].orgates[0]), i));
}
/* UART */
- if (!aspeed_soc_uart_realize(s, errp)) {
- return;
+ for (i = 0, uart = sc->uarts_base; i < sc->uarts_num; i++, uart++) {
+ if (!aspeed_soc_uart_realize(s->memory, &s->uart[i],
+ sc->memmap[uart], errp)) {
+ return;
+ }
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->uart[i]), 0,
+ aspeed_soc_ast27x0tsp_get_irq(s, uart));
}
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->timerctrl),
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&s->timerctrl),
"aspeed.timerctrl",
sc->memmap[ASPEED_DEV_TIMER1], 0x200);
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&a->ipc[0]),
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&a->ipc[0]),
"aspeed.ipc0",
sc->memmap[ASPEED_DEV_IPC0], 0x1000);
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&a->ipc[1]),
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&a->ipc[1]),
"aspeed.ipc1",
sc->memmap[ASPEED_DEV_IPC1], 0x1000);
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&a->scuio),
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&a->scuio),
"aspeed.scuio",
sc->memmap[ASPEED_DEV_SCUIO], 0x1000);
}
-static void aspeed_soc_ast27x0tsp_class_init(ObjectClass *klass, const void *data)
+static void aspeed_soc_ast27x0tsp_class_init(ObjectClass *klass,
+ const void *data)
{
static const char * const valid_cpu_types[] = {
ARM_CPU_TYPE_NAME("cortex-m4"), /* TODO cortex-m4f */
NULL
};
DeviceClass *dc = DEVICE_CLASS(klass);
- AspeedSoCClass *sc = ASPEED_SOC_CLASS(dc);
+ AspeedCoprocessorClass *sc = ASPEED_COPROCESSOR_CLASS(dc);
- /* Reason: The Aspeed SoC can only be instantiated from a board */
+ /* Reason: The Aspeed Coprocessor can only be instantiated from a board */
dc->user_creatable = false;
dc->realize = aspeed_soc_ast27x0tsp_realize;
sc->valid_cpu_types = valid_cpu_types;
sc->silicon_rev = AST2700_A1_SILICON_REV;
- sc->sram_size = AST2700_TSP_RAM_SIZE;
- sc->spis_num = 0;
- sc->ehcis_num = 0;
- sc->wdts_num = 0;
- sc->macs_num = 0;
sc->uarts_num = 13;
sc->uarts_base = ASPEED_DEV_UART0;
sc->irqmap = aspeed_soc_ast27x0tsp_irqmap;
sc->memmap = aspeed_soc_ast27x0tsp_memmap;
- sc->num_cpus = 1;
- sc->get_irq = aspeed_soc_ast27x0tsp_get_irq;
}
static const TypeInfo aspeed_soc_ast27x0tsp_types[] = {
{
- .name = TYPE_ASPEED27X0TSP_SOC,
- .parent = TYPE_ASPEED_SOC,
- .instance_size = sizeof(Aspeed27x0TSPSoCState),
+ .name = TYPE_ASPEED27X0TSP_COPROCESSOR,
+ .parent = TYPE_ASPEED_COPROCESSOR,
+ .instance_size = sizeof(Aspeed27x0CoprocessorState),
.instance_init = aspeed_soc_ast27x0tsp_init,
.class_init = aspeed_soc_ast27x0tsp_class_init,
},
diff --git a/hw/arm/aspeed_ast27x0.c b/hw/arm/aspeed_ast27x0.c
index 1974a25..c484bcd 100644
--- a/hw/arm/aspeed_ast27x0.c
+++ b/hw/arm/aspeed_ast27x0.c
@@ -23,14 +23,14 @@
#include "qobject/qlist.h"
#include "qemu/log.h"
-#define AST2700_SOC_IO_SIZE 0x01000000
+#define AST2700_SOC_IO_SIZE 0x00FE0000
#define AST2700_SOC_IOMEM_SIZE 0x01000000
#define AST2700_SOC_DPMCU_SIZE 0x00040000
#define AST2700_SOC_LTPI_SIZE 0x01000000
static const hwaddr aspeed_soc_ast2700_memmap[] = {
- [ASPEED_DEV_IOMEM] = 0x00000000,
[ASPEED_DEV_VBOOTROM] = 0x00000000,
+ [ASPEED_DEV_IOMEM] = 0x00020000,
[ASPEED_DEV_SRAM] = 0x10000000,
[ASPEED_DEV_DPMCU] = 0x11000000,
[ASPEED_DEV_IOMEM0] = 0x12000000,
@@ -38,6 +38,8 @@ static const hwaddr aspeed_soc_ast2700_memmap[] = {
[ASPEED_DEV_EHCI2] = 0x12063000,
[ASPEED_DEV_HACE] = 0x12070000,
[ASPEED_DEV_EMMC] = 0x12090000,
+ [ASPEED_DEV_PCIE0] = 0x120E0000,
+ [ASPEED_DEV_PCIE1] = 0x120F0000,
[ASPEED_DEV_INTC] = 0x12100000,
[ASPEED_GIC_DIST] = 0x12200000,
[ASPEED_GIC_REDIST] = 0x12280000,
@@ -45,6 +47,8 @@ static const hwaddr aspeed_soc_ast2700_memmap[] = {
[ASPEED_DEV_SCU] = 0x12C02000,
[ASPEED_DEV_RTC] = 0x12C0F000,
[ASPEED_DEV_TIMER1] = 0x12C10000,
+ [ASPEED_DEV_PCIE_PHY0] = 0x12C15000,
+ [ASPEED_DEV_PCIE_PHY1] = 0x12C15800,
[ASPEED_DEV_SLI] = 0x12C17000,
[ASPEED_DEV_UART4] = 0x12C1A000,
[ASPEED_DEV_IOMEM1] = 0x14000000,
@@ -59,6 +63,7 @@ static const hwaddr aspeed_soc_ast2700_memmap[] = {
[ASPEED_DEV_ETH2] = 0x14060000,
[ASPEED_DEV_ETH3] = 0x14070000,
[ASPEED_DEV_SDHCI] = 0x14080000,
+ [ASPEED_DEV_PCIE2] = 0x140D0000,
[ASPEED_DEV_EHCI3] = 0x14121000,
[ASPEED_DEV_EHCI4] = 0x14123000,
[ASPEED_DEV_ADC] = 0x14C00000,
@@ -66,6 +71,7 @@ static const hwaddr aspeed_soc_ast2700_memmap[] = {
[ASPEED_DEV_GPIO] = 0x14C0B000,
[ASPEED_DEV_I2C] = 0x14C0F000,
[ASPEED_DEV_INTCIO] = 0x14C18000,
+ [ASPEED_DEV_PCIE_PHY2] = 0x14C1C000,
[ASPEED_DEV_SLIIO] = 0x14C1E000,
[ASPEED_DEV_VUART] = 0x14C30000,
[ASPEED_DEV_UART0] = 0x14C33000,
@@ -81,6 +87,9 @@ static const hwaddr aspeed_soc_ast2700_memmap[] = {
[ASPEED_DEV_UART11] = 0x14C33A00,
[ASPEED_DEV_UART12] = 0x14C33B00,
[ASPEED_DEV_WDT] = 0x14C37000,
+ [ASPEED_DEV_PCIE_MMIO0] = 0x60000000,
+ [ASPEED_DEV_PCIE_MMIO1] = 0x80000000,
+ [ASPEED_DEV_PCIE_MMIO2] = 0xA0000000,
[ASPEED_DEV_SPI_BOOT] = 0x100000000,
[ASPEED_DEV_LTPI] = 0x300000000,
[ASPEED_DEV_SDRAM] = 0x400000000,
@@ -156,6 +165,8 @@ static const int aspeed_soc_ast2700a1_irqmap[] = {
[ASPEED_DEV_DP] = 28,
[ASPEED_DEV_EHCI1] = 33,
[ASPEED_DEV_EHCI2] = 37,
+ [ASPEED_DEV_PCIE0] = 56,
+ [ASPEED_DEV_PCIE1] = 57,
[ASPEED_DEV_LPC] = 192,
[ASPEED_DEV_IBT] = 192,
[ASPEED_DEV_KCS] = 192,
@@ -166,6 +177,7 @@ static const int aspeed_soc_ast2700a1_irqmap[] = {
[ASPEED_DEV_WDT] = 195,
[ASPEED_DEV_PWM] = 195,
[ASPEED_DEV_I3C] = 195,
+ [ASPEED_DEV_PCIE2] = 196,
[ASPEED_DEV_UART0] = 196,
[ASPEED_DEV_UART1] = 196,
[ASPEED_DEV_UART2] = 196,
@@ -233,6 +245,7 @@ static const int ast2700_gic132_gic196_intcmap[] = {
[ASPEED_DEV_UART12] = 18,
[ASPEED_DEV_EHCI3] = 28,
[ASPEED_DEV_EHCI4] = 29,
+ [ASPEED_DEV_PCIE2] = 31,
};
/* GICINT 133 */
@@ -346,8 +359,9 @@ static void aspeed_ram_capacity_write(void *opaque, hwaddr addr, uint64_t data,
* If writes the data to the address which is beyond the ram size,
* it would write the data to the "address % ram_size".
*/
- result = address_space_write(&s->dram_as, addr % ram_size,
- MEMTXATTRS_UNSPECIFIED, &data, 4);
+ address_space_stl_le(&s->dram_as, addr % ram_size, data,
+ MEMTXATTRS_UNSPECIFIED, &result);
+
if (result != MEMTX_OK) {
qemu_log_mask(LOG_GUEST_ERROR,
"%s: DRAM write failed, addr:0x%" HWADDR_PRIx
@@ -360,9 +374,10 @@ static const MemoryRegionOps aspeed_ram_capacity_ops = {
.read = aspeed_ram_capacity_read,
.write = aspeed_ram_capacity_write,
.endianness = DEVICE_LITTLE_ENDIAN,
+ .impl.min_access_size = 4,
.valid = {
- .min_access_size = 1,
- .max_access_size = 8,
+ .min_access_size = 4,
+ .max_access_size = 4,
},
};
@@ -421,7 +436,7 @@ static void aspeed_soc_ast2700_init(Object *obj)
for (i = 0; i < sc->num_cpus; i++) {
object_initialize_child(obj, "cpu[*]", &a->cpu[i],
- aspeed_soc_cpu_type(sc));
+ aspeed_soc_cpu_type(sc->valid_cpu_types));
}
object_initialize_child(obj, "gic", &a->gic, gicv3_class_name());
@@ -517,6 +532,17 @@ static void aspeed_soc_ast2700_init(Object *obj)
snprintf(typename, sizeof(typename), "aspeed.hace-%s", socname);
object_initialize_child(obj, "hace", &s->hace, typename);
+
+ for (i = 0; i < sc->pcie_num; i++) {
+ snprintf(typename, sizeof(typename), "aspeed.pcie-phy-%s", socname);
+ object_initialize_child(obj, "pcie-phy[*]", &s->pcie_phy[i], typename);
+ object_property_set_int(OBJECT(&s->pcie_phy[i]), "id", i, &error_abort);
+
+ snprintf(typename, sizeof(typename), "aspeed.pcie-cfg-%s", socname);
+ object_initialize_child(obj, "pcie-cfg[*]", &s->pcie[i], typename);
+ object_property_set_int(OBJECT(&s->pcie[i]), "id", i, &error_abort);
+ }
+
object_initialize_child(obj, "dpmcu", &s->dpmcu,
TYPE_UNIMPLEMENTED_DEVICE);
object_initialize_child(obj, "ltpi", &s->ltpi,
@@ -563,9 +589,9 @@ static bool aspeed_soc_ast2700_gic_realize(DeviceState *dev, Error **errp)
return false;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&a->gic), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&a->gic), 0,
sc->memmap[ASPEED_GIC_DIST]);
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&a->gic), 1,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&a->gic), 1,
sc->memmap[ASPEED_GIC_REDIST]);
for (i = 0; i < sc->num_cpus; i++) {
@@ -608,6 +634,49 @@ static bool aspeed_soc_ast2700_gic_realize(DeviceState *dev, Error **errp)
return true;
}
+static bool aspeed_soc_ast2700_pcie_realize(DeviceState *dev, Error **errp)
+{
+ AspeedSoCState *s = ASPEED_SOC(dev);
+ AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
+ MemoryRegion *mmio_mr = NULL;
+ char name[64];
+ qemu_irq irq;
+ int i;
+
+ for (i = 0; i < sc->pcie_num; i++) {
+ if (!sysbus_realize(SYS_BUS_DEVICE(&s->pcie_phy[i]), errp)) {
+ return false;
+ }
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->pcie_phy[i]), 0,
+ sc->memmap[ASPEED_DEV_PCIE_PHY0 + i]);
+
+ object_property_set_int(OBJECT(&s->pcie[i]), "dram-base",
+ sc->memmap[ASPEED_DEV_SDRAM],
+ &error_abort);
+ object_property_set_link(OBJECT(&s->pcie[i]), "dram",
+ OBJECT(s->dram_mr), &error_abort);
+ if (!sysbus_realize(SYS_BUS_DEVICE(&s->pcie[i]), errp)) {
+ return false;
+ }
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->pcie[i]), 0,
+ sc->memmap[ASPEED_DEV_PCIE0 + i]);
+ irq = aspeed_soc_ast2700_get_irq(s, ASPEED_DEV_PCIE0 + i);
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->pcie[i].rc), 0, irq);
+
+ mmio_mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->pcie[i].rc), 1);
+ snprintf(name, sizeof(name), "aspeed.pcie-mmio.%d", i);
+ memory_region_init_alias(&s->pcie_mmio_alias[i], OBJECT(&s->pcie[i].rc),
+ name, mmio_mr,
+ sc->memmap[ASPEED_DEV_PCIE_MMIO0 + i],
+ 0x20000000);
+ memory_region_add_subregion(s->memory,
+ sc->memmap[ASPEED_DEV_PCIE_MMIO0 + i],
+ &s->pcie_mmio_alias[i]);
+ }
+
+ return true;
+}
+
static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
{
int i;
@@ -618,6 +687,7 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
AspeedINTCClass *icio = ASPEED_INTC_GET_CLASS(&a->intc[1]);
g_autofree char *name = NULL;
qemu_irq irq;
+ int uart;
/* Default boot region (SPI memory or ROMs) */
memory_region_init(&s->spi_boot_container, OBJECT(s),
@@ -650,7 +720,7 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&a->intc[0]), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&a->intc[0]), 0,
sc->memmap[ASPEED_DEV_INTC]);
/* INTCIO */
@@ -658,7 +728,7 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&a->intc[1]), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&a->intc[1]), 0,
sc->memmap[ASPEED_DEV_INTCIO]);
/* irq sources -> orgates -> INTC */
@@ -708,18 +778,24 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->scu), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->scu), 0, sc->memmap[ASPEED_DEV_SCU]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->scu), 0,
+ sc->memmap[ASPEED_DEV_SCU]);
/* SCU1 */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->scuio), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->scuio), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->scuio), 0,
sc->memmap[ASPEED_DEV_SCUIO]);
/* UART */
- if (!aspeed_soc_uart_realize(s, errp)) {
- return;
+ for (i = 0, uart = sc->uarts_base; i < sc->uarts_num; i++, uart++) {
+ if (!aspeed_soc_uart_realize(s->memory, &s->uart[i],
+ sc->memmap[uart], errp)) {
+ return;
+ }
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->uart[i]), 0,
+ aspeed_soc_ast2700_get_irq(s, uart));
}
/* FMC, The number of CS is set at the board level */
@@ -731,11 +807,12 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->fmc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->fmc), 0, sc->memmap[ASPEED_DEV_FMC]);
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->fmc), 1,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->fmc), 0,
+ sc->memmap[ASPEED_DEV_FMC]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->fmc), 1,
ASPEED_SMC_GET_CLASS(&s->fmc)->flash_window_base);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->fmc), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_FMC));
+ aspeed_soc_ast2700_get_irq(s, ASPEED_DEV_FMC));
/* Set up an alias on the FMC CE0 region (boot default) */
MemoryRegion *fmc0_mmio = &s->fmc.flashes[0].mmio;
@@ -750,9 +827,9 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->spi[i]), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->spi[i]), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->spi[i]), 0,
sc->memmap[ASPEED_DEV_SPI0 + i]);
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->spi[i]), 1,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->spi[i]), 1,
ASPEED_SMC_GET_CLASS(&s->spi[i])->flash_window_base);
}
@@ -761,10 +838,11 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->ehci[i]), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->ehci[i]), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->ehci[i]), 0,
sc->memmap[ASPEED_DEV_EHCI1 + i]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->ehci[i]), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_EHCI1 + i));
+ aspeed_soc_ast2700_get_irq(s,
+ ASPEED_DEV_EHCI1 + i));
}
/*
@@ -779,7 +857,7 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->sdmc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->sdmc), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->sdmc), 0,
sc->memmap[ASPEED_DEV_SDMC]);
/* RAM */
@@ -796,10 +874,10 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->ftgmac100[i]), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->ftgmac100[i]), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->ftgmac100[i]), 0,
sc->memmap[ASPEED_DEV_ETH1 + i]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->ftgmac100[i]), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_ETH1 + i));
+ aspeed_soc_ast2700_get_irq(s, ASPEED_DEV_ETH1 + i));
object_property_set_link(OBJECT(&s->mii[i]), "nic",
OBJECT(&s->ftgmac100[i]), &error_abort);
@@ -807,7 +885,7 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->mii[i]), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->mii[i]), 0,
sc->memmap[ASPEED_DEV_MII1 + i]);
}
@@ -821,28 +899,30 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->wdt[i]), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->wdt[i]), 0, wdt_offset);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->wdt[i]), 0, wdt_offset);
}
/* SLI */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->sli), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->sli), 0, sc->memmap[ASPEED_DEV_SLI]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->sli), 0,
+ sc->memmap[ASPEED_DEV_SLI]);
if (!sysbus_realize(SYS_BUS_DEVICE(&s->sliio), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->sliio), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->sliio), 0,
sc->memmap[ASPEED_DEV_SLIIO]);
/* ADC */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->adc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->adc), 0, sc->memmap[ASPEED_DEV_ADC]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->adc), 0,
+ sc->memmap[ASPEED_DEV_ADC]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->adc), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_ADC));
+ aspeed_soc_ast2700_get_irq(s, ASPEED_DEV_ADC));
/* I2C */
object_property_set_link(OBJECT(&s->i2c), "dram", OBJECT(s->dram_mr),
@@ -850,7 +930,8 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->i2c), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->i2c), 0, sc->memmap[ASPEED_DEV_I2C]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->i2c), 0,
+ sc->memmap[ASPEED_DEV_I2C]);
for (i = 0; i < ASPEED_I2C_GET_CLASS(&s->i2c)->num_busses; i++) {
/*
* The AST2700 I2C controller has one source INTC per bus.
@@ -879,36 +960,37 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->gpio), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->gpio), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->gpio), 0,
sc->memmap[ASPEED_DEV_GPIO]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpio), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_GPIO));
+ aspeed_soc_ast2700_get_irq(s, ASPEED_DEV_GPIO));
/* RTC */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->rtc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->rtc), 0, sc->memmap[ASPEED_DEV_RTC]);
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->rtc), 0,
+ sc->memmap[ASPEED_DEV_RTC]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->rtc), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_RTC));
+ aspeed_soc_ast2700_get_irq(s, ASPEED_DEV_RTC));
/* SDHCI */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->sdhci), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->sdhci), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->sdhci), 0,
sc->memmap[ASPEED_DEV_SDHCI]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->sdhci), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_SDHCI));
+ aspeed_soc_ast2700_get_irq(s, ASPEED_DEV_SDHCI));
/* eMMC */
if (!sysbus_realize(SYS_BUS_DEVICE(&s->emmc), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->emmc), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->emmc), 0,
sc->memmap[ASPEED_DEV_EMMC]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->emmc), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_EMMC));
+ aspeed_soc_ast2700_get_irq(s, ASPEED_DEV_EMMC));
/* Timer */
object_property_set_link(OBJECT(&s->timerctrl), "scu", OBJECT(&s->scu),
@@ -916,10 +998,10 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->timerctrl), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->timerctrl), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->timerctrl), 0,
sc->memmap[ASPEED_DEV_TIMER1]);
for (i = 0; i < ASPEED_TIMER_NR_TIMERS; i++) {
- irq = aspeed_soc_get_irq(s, ASPEED_DEV_TIMER1 + i);
+ irq = aspeed_soc_ast2700_get_irq(s, ASPEED_DEV_TIMER1 + i);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->timerctrl), i, irq);
}
@@ -929,28 +1011,33 @@ static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
if (!sysbus_realize(SYS_BUS_DEVICE(&s->hace), errp)) {
return;
}
- aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->hace), 0,
+ aspeed_mmio_map(s->memory, SYS_BUS_DEVICE(&s->hace), 0,
sc->memmap[ASPEED_DEV_HACE]);
sysbus_connect_irq(SYS_BUS_DEVICE(&s->hace), 0,
- aspeed_soc_get_irq(s, ASPEED_DEV_HACE));
+ aspeed_soc_ast2700_get_irq(s, ASPEED_DEV_HACE));
+
+ /* PCIe Root Complex (RC) */
+ if (!aspeed_soc_ast2700_pcie_realize(dev, errp)) {
+ return;
+ }
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->dpmcu),
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&s->dpmcu),
"aspeed.dpmcu",
sc->memmap[ASPEED_DEV_DPMCU],
AST2700_SOC_DPMCU_SIZE);
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->ltpi),
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&s->ltpi),
"aspeed.ltpi",
sc->memmap[ASPEED_DEV_LTPI],
AST2700_SOC_LTPI_SIZE);
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->iomem),
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&s->iomem),
"aspeed.io",
sc->memmap[ASPEED_DEV_IOMEM],
AST2700_SOC_IO_SIZE);
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->iomem0),
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&s->iomem0),
"aspeed.iomem0",
sc->memmap[ASPEED_DEV_IOMEM0],
AST2700_SOC_IOMEM_SIZE);
- aspeed_mmio_map_unimplemented(s, SYS_BUS_DEVICE(&s->iomem1),
+ aspeed_mmio_map_unimplemented(s->memory, SYS_BUS_DEVICE(&s->iomem1),
"aspeed.iomem1",
sc->memmap[ASPEED_DEV_IOMEM1],
AST2700_SOC_IOMEM_SIZE);
@@ -972,6 +1059,7 @@ static void aspeed_soc_ast2700a0_class_init(ObjectClass *oc, const void *data)
sc->valid_cpu_types = valid_cpu_types;
sc->silicon_rev = AST2700_A0_SILICON_REV;
sc->sram_size = 0x20000;
+ sc->pcie_num = 0;
sc->spis_num = 3;
sc->ehcis_num = 2;
sc->wdts_num = 8;
@@ -981,7 +1069,6 @@ static void aspeed_soc_ast2700a0_class_init(ObjectClass *oc, const void *data)
sc->uarts_base = ASPEED_DEV_UART0;
sc->irqmap = aspeed_soc_ast2700a0_irqmap;
sc->memmap = aspeed_soc_ast2700_memmap;
- sc->get_irq = aspeed_soc_ast2700_get_irq;
}
static void aspeed_soc_ast2700a1_class_init(ObjectClass *oc, const void *data)
@@ -1000,6 +1087,7 @@ static void aspeed_soc_ast2700a1_class_init(ObjectClass *oc, const void *data)
sc->valid_cpu_types = valid_cpu_types;
sc->silicon_rev = AST2700_A1_SILICON_REV;
sc->sram_size = 0x20000;
+ sc->pcie_num = 3;
sc->spis_num = 3;
sc->ehcis_num = 4;
sc->wdts_num = 8;
@@ -1009,7 +1097,6 @@ static void aspeed_soc_ast2700a1_class_init(ObjectClass *oc, const void *data)
sc->uarts_base = ASPEED_DEV_UART0;
sc->irqmap = aspeed_soc_ast2700a1_irqmap;
sc->memmap = aspeed_soc_ast2700_memmap;
- sc->get_irq = aspeed_soc_ast2700_get_irq;
}
static const TypeInfo aspeed_soc_ast27x0_types[] = {
diff --git a/hw/arm/aspeed_coprocessor_common.c b/hw/arm/aspeed_coprocessor_common.c
new file mode 100644
index 0000000..8a94b44
--- /dev/null
+++ b/hw/arm/aspeed_coprocessor_common.c
@@ -0,0 +1,49 @@
+/*
+ * ASPEED Coprocessor
+ *
+ * Copyright (C) 2025 ASPEED Technology Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "system/memory.h"
+#include "hw/qdev-properties.h"
+#include "hw/arm/aspeed_coprocessor.h"
+
+static void aspeed_coprocessor_realize(DeviceState *dev, Error **errp)
+{
+ AspeedCoprocessorState *s = ASPEED_COPROCESSOR(dev);
+
+ if (!s->memory) {
+ error_setg(errp, "'memory' link is not set");
+ return;
+ }
+}
+
+static const Property aspeed_coprocessor_properties[] = {
+ DEFINE_PROP_LINK("memory", AspeedCoprocessorState, memory,
+ TYPE_MEMORY_REGION, MemoryRegion *),
+};
+
+static void aspeed_coprocessor_class_init(ObjectClass *oc, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(oc);
+
+ dc->realize = aspeed_coprocessor_realize;
+ device_class_set_props(dc, aspeed_coprocessor_properties);
+}
+
+static const TypeInfo aspeed_coprocessor_types[] = {
+ {
+ .name = TYPE_ASPEED_COPROCESSOR,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(AspeedCoprocessorState),
+ .class_size = sizeof(AspeedCoprocessorClass),
+ .class_init = aspeed_coprocessor_class_init,
+ .abstract = true,
+ },
+};
+
+DEFINE_TYPES(aspeed_coprocessor_types)
diff --git a/hw/arm/aspeed_eeprom.c b/hw/arm/aspeed_eeprom.c
index daa3d32..8bbbdec 100644
--- a/hw/arm/aspeed_eeprom.c
+++ b/hw/arm/aspeed_eeprom.c
@@ -162,6 +162,25 @@ const uint8_t rainier_bmc_fruid[] = {
0x31, 0x50, 0x46, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
};
+const uint8_t gb200nvl_bmc_fruid[] = {
+ 0x01, 0x00, 0x00, 0x01, 0x0b, 0x00, 0x00, 0xf3, 0x01, 0x0a, 0x19, 0x1f,
+ 0x0f, 0xe6, 0xc6, 0x4e, 0x56, 0x49, 0x44, 0x49, 0x41, 0xc5, 0x50, 0x33,
+ 0x38, 0x30, 0x39, 0xcd, 0x31, 0x35, 0x38, 0x33, 0x33, 0x32, 0x34, 0x38,
+ 0x30, 0x30, 0x31, 0x35, 0x30, 0xd2, 0x36, 0x39, 0x39, 0x2d, 0x31, 0x33,
+ 0x38, 0x30, 0x39, 0x2d, 0x30, 0x34, 0x30, 0x34, 0x2d, 0x36, 0x30, 0x30,
+ 0xc0, 0x01, 0x01, 0xd6, 0x4d, 0x41, 0x43, 0x3a, 0x20, 0x33, 0x43, 0x3a,
+ 0x36, 0x44, 0x3a, 0x36, 0x36, 0x3a, 0x31, 0x34, 0x3a, 0x43, 0x38, 0x3a,
+ 0x37, 0x41, 0xc1, 0x3b, 0x01, 0x09, 0x19, 0xc6, 0x4e, 0x56, 0x49, 0x44,
+ 0x49, 0x41, 0xc9, 0x50, 0x33, 0x38, 0x30, 0x39, 0x2d, 0x42, 0x4d, 0x43,
+ 0xd2, 0x36, 0x39, 0x39, 0x2d, 0x31, 0x33, 0x38, 0x30, 0x39, 0x2d, 0x30,
+ 0x34, 0x30, 0x34, 0x2d, 0x36, 0x30, 0x30, 0xc4, 0x41, 0x45, 0x2e, 0x31,
+ 0xcd, 0x31, 0x35, 0x38, 0x33, 0x33, 0x32, 0x34, 0x38, 0x30, 0x30, 0x31,
+ 0x35, 0x30, 0xc0, 0xc4, 0x76, 0x30, 0x2e, 0x31, 0xc1, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+
+};
+
const size_t tiogapass_bmc_fruid_len = sizeof(tiogapass_bmc_fruid);
const size_t fby35_nic_fruid_len = sizeof(fby35_nic_fruid);
const size_t fby35_bb_fruid_len = sizeof(fby35_bb_fruid);
@@ -169,3 +188,5 @@ const size_t fby35_bmc_fruid_len = sizeof(fby35_bmc_fruid);
const size_t yosemitev2_bmc_fruid_len = sizeof(yosemitev2_bmc_fruid);
const size_t rainier_bb_fruid_len = sizeof(rainier_bb_fruid);
const size_t rainier_bmc_fruid_len = sizeof(rainier_bmc_fruid);
+const size_t gb200nvl_bmc_fruid_len = sizeof(gb200nvl_bmc_fruid);
+
diff --git a/hw/arm/aspeed_eeprom.h b/hw/arm/aspeed_eeprom.h
index f08c16e..3ed9bc1 100644
--- a/hw/arm/aspeed_eeprom.h
+++ b/hw/arm/aspeed_eeprom.h
@@ -26,4 +26,7 @@ extern const size_t rainier_bb_fruid_len;
extern const uint8_t rainier_bmc_fruid[];
extern const size_t rainier_bmc_fruid_len;
+extern const uint8_t gb200nvl_bmc_fruid[];
+extern const size_t gb200nvl_bmc_fruid_len;
+
#endif
diff --git a/hw/arm/aspeed_soc_common.c b/hw/arm/aspeed_soc_common.c
index 1c4ac93..78b6ae1 100644
--- a/hw/arm/aspeed_soc_common.c
+++ b/hw/arm/aspeed_soc_common.c
@@ -16,54 +16,45 @@
#include "hw/misc/unimp.h"
#include "hw/arm/aspeed_soc.h"
#include "hw/char/serial-mm.h"
+#include "system/blockdev.h"
+#include "system/block-backend.h"
+#include "hw/loader.h"
+#include "qemu/datadir.h"
-const char *aspeed_soc_cpu_type(AspeedSoCClass *sc)
+const char *aspeed_soc_cpu_type(const char * const *valid_cpu_types)
{
- assert(sc->valid_cpu_types);
- assert(sc->valid_cpu_types[0]);
- assert(!sc->valid_cpu_types[1]);
- return sc->valid_cpu_types[0];
+ assert(valid_cpu_types);
+ assert(valid_cpu_types[0]);
+ assert(!valid_cpu_types[1]);
+ return valid_cpu_types[0];
}
-qemu_irq aspeed_soc_get_irq(AspeedSoCState *s, int dev)
+bool aspeed_soc_uart_realize(MemoryRegion *memory, SerialMM *smm,
+ const hwaddr addr, Error **errp)
{
- return ASPEED_SOC_GET_CLASS(s)->get_irq(s, dev);
-}
-
-bool aspeed_soc_uart_realize(AspeedSoCState *s, Error **errp)
-{
- AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
- SerialMM *smm;
-
- for (int i = 0, uart = sc->uarts_base; i < sc->uarts_num; i++, uart++) {
- smm = &s->uart[i];
-
- /* Chardev property is set by the machine. */
- qdev_prop_set_uint8(DEVICE(smm), "regshift", 2);
- qdev_prop_set_uint32(DEVICE(smm), "baudbase", 38400);
- qdev_set_legacy_instance_id(DEVICE(smm), sc->memmap[uart], 2);
- qdev_prop_set_uint8(DEVICE(smm), "endianness", DEVICE_LITTLE_ENDIAN);
- if (!sysbus_realize(SYS_BUS_DEVICE(smm), errp)) {
- return false;
- }
-
- sysbus_connect_irq(SYS_BUS_DEVICE(smm), 0, aspeed_soc_get_irq(s, uart));
- aspeed_mmio_map(s, SYS_BUS_DEVICE(smm), 0, sc->memmap[uart]);
+ /* Chardev property is set by the machine. */
+ qdev_prop_set_uint8(DEVICE(smm), "regshift", 2);
+ qdev_prop_set_uint32(DEVICE(smm), "baudbase", 38400);
+ qdev_set_legacy_instance_id(DEVICE(smm), addr, 2);
+ qdev_prop_set_uint8(DEVICE(smm), "endianness", DEVICE_LITTLE_ENDIAN);
+ if (!sysbus_realize(SYS_BUS_DEVICE(smm), errp)) {
+ return false;
}
+ aspeed_mmio_map(memory, SYS_BUS_DEVICE(smm), 0, addr);
return true;
}
-void aspeed_soc_uart_set_chr(AspeedSoCState *s, int dev, Chardev *chr)
+void aspeed_soc_uart_set_chr(SerialMM *uart, int dev, int uarts_base,
+ int uarts_num, Chardev *chr)
{
- AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
- int uart_first = aspeed_uart_first(sc);
+ int uart_first = aspeed_uart_first(uarts_base);
int uart_index = aspeed_uart_index(dev);
int i = uart_index - uart_first;
- g_assert(0 <= i && i < ARRAY_SIZE(s->uart) && i < sc->uarts_num);
- qdev_prop_set_chr(DEVICE(&s->uart[i]), "chardev", chr);
+ g_assert(0 <= i && i < ASPEED_UARTS_NUM && i < uarts_num);
+ qdev_prop_set_chr(DEVICE(&uart[i]), "chardev", chr);
}
/*
@@ -107,23 +98,115 @@ bool aspeed_soc_dram_init(AspeedSoCState *s, Error **errp)
return true;
}
-void aspeed_mmio_map(AspeedSoCState *s, SysBusDevice *dev, int n, hwaddr addr)
+void aspeed_mmio_map(MemoryRegion *memory, SysBusDevice *dev, int n,
+ hwaddr addr)
{
- memory_region_add_subregion(s->memory, addr,
- sysbus_mmio_get_region(dev, n));
+ memory_region_add_subregion(memory, addr, sysbus_mmio_get_region(dev, n));
}
-void aspeed_mmio_map_unimplemented(AspeedSoCState *s, SysBusDevice *dev,
+void aspeed_mmio_map_unimplemented(MemoryRegion *memory, SysBusDevice *dev,
const char *name, hwaddr addr, uint64_t size)
{
qdev_prop_set_string(DEVICE(dev), "name", name);
qdev_prop_set_uint64(DEVICE(dev), "size", size);
sysbus_realize(dev, &error_abort);
- memory_region_add_subregion_overlap(s->memory, addr,
+ memory_region_add_subregion_overlap(memory, addr,
sysbus_mmio_get_region(dev, 0), -1000);
}
+void aspeed_board_init_flashes(AspeedSMCState *s, const char *flashtype,
+ unsigned int count, int unit0)
+{
+ int i;
+
+ if (!flashtype) {
+ return;
+ }
+
+ for (i = 0; i < count; ++i) {
+ DriveInfo *dinfo = drive_get(IF_MTD, 0, unit0 + i);
+ DeviceState *dev;
+
+ dev = qdev_new(flashtype);
+ if (dinfo) {
+ qdev_prop_set_drive(dev, "drive", blk_by_legacy_dinfo(dinfo));
+ }
+ qdev_prop_set_uint8(dev, "cs", i);
+ qdev_realize_and_unref(dev, BUS(s->spi), &error_fatal);
+ }
+}
+
+void aspeed_write_boot_rom(BlockBackend *blk, hwaddr addr, size_t rom_size,
+ Error **errp)
+{
+ g_autofree void *storage = NULL;
+ int64_t size;
+
+ /*
+ * The block backend size should have already been 'validated' by
+ * the creation of the m25p80 object.
+ */
+ size = blk_getlength(blk);
+ if (size <= 0) {
+ error_setg(errp, "failed to get flash size");
+ return;
+ }
+
+ if (rom_size > size) {
+ rom_size = size;
+ }
+
+ storage = g_malloc0(rom_size);
+ if (blk_pread(blk, 0, rom_size, storage, 0) < 0) {
+ error_setg(errp, "failed to read the initial flash content");
+ return;
+ }
+
+ rom_add_blob_fixed("aspeed.boot_rom", storage, rom_size, addr);
+}
+
+/*
+ * Create a ROM and copy the flash contents at the expected address
+ * (0x0). Boots faster than execute-in-place.
+ */
+void aspeed_install_boot_rom(AspeedSoCState *soc, BlockBackend *blk,
+ MemoryRegion *boot_rom, uint64_t rom_size)
+{
+ AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(soc);
+
+ memory_region_init_rom(boot_rom, NULL, "aspeed.boot_rom", rom_size,
+ &error_abort);
+ memory_region_add_subregion_overlap(&soc->spi_boot_container, 0,
+ boot_rom, 1);
+ aspeed_write_boot_rom(blk, sc->memmap[ASPEED_DEV_SPI_BOOT], rom_size,
+ &error_abort);
+}
+
+/*
+ * This function locates the vbootrom image file specified via the command line
+ * using the -bios option. It loads the specified image into the vbootrom
+ * memory region and handles errors if the file cannot be found or loaded.
+ */
+void aspeed_load_vbootrom(AspeedSoCState *soc, const char *bios_name,
+ Error **errp)
+{
+ g_autofree char *filename = NULL;
+ int ret;
+
+ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+ if (!filename) {
+ error_setg(errp, "Could not find vbootrom image '%s'", bios_name);
+ return;
+ }
+
+ ret = load_image_mr(filename, &soc->vbootrom);
+ if (ret < 0) {
+ error_setg(errp, "Failed to load vbootrom image '%s'", bios_name);
+ return;
+ }
+}
+
static void aspeed_soc_realize(DeviceState *dev, Error **errp)
{
AspeedSoCState *s = ASPEED_SOC(dev);
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index f94b940..e77d867 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -15,19 +15,23 @@
#include "hw/arm/boot.h"
#include "hw/arm/linux-boot-if.h"
#include "cpu.h"
+#include "exec/tswap.h"
#include "exec/target_page.h"
#include "system/kvm.h"
#include "system/tcg.h"
#include "system/system.h"
+#include "system/memory.h"
#include "system/numa.h"
#include "hw/boards.h"
#include "system/reset.h"
#include "hw/loader.h"
+#include "hw/mem/memory-device.h"
#include "elf.h"
#include "system/device_tree.h"
#include "qemu/config-file.h"
#include "qemu/option.h"
#include "qemu/units.h"
+#include "qemu/bswap.h"
/* Kernel boot protocol is specified in the kernel docs
* Documentation/arm/Booting and Documentation/arm64/booting.txt
@@ -333,81 +337,6 @@ static void set_kernel_args(const struct arm_boot_info *info, AddressSpace *as)
WRITE_WORD(p, 0);
}
-static void set_kernel_args_old(const struct arm_boot_info *info,
- AddressSpace *as)
-{
- hwaddr p;
- const char *s;
- int initrd_size = info->initrd_size;
- hwaddr base = info->loader_start;
-
- /* see linux/include/asm-arm/setup.h */
- p = base + KERNEL_ARGS_ADDR;
- /* page_size */
- WRITE_WORD(p, 4096);
- /* nr_pages */
- WRITE_WORD(p, info->ram_size / 4096);
- /* ramdisk_size */
- WRITE_WORD(p, 0);
-#define FLAG_READONLY 1
-#define FLAG_RDLOAD 4
-#define FLAG_RDPROMPT 8
- /* flags */
- WRITE_WORD(p, FLAG_READONLY | FLAG_RDLOAD | FLAG_RDPROMPT);
- /* rootdev */
- WRITE_WORD(p, (31 << 8) | 0); /* /dev/mtdblock0 */
- /* video_num_cols */
- WRITE_WORD(p, 0);
- /* video_num_rows */
- WRITE_WORD(p, 0);
- /* video_x */
- WRITE_WORD(p, 0);
- /* video_y */
- WRITE_WORD(p, 0);
- /* memc_control_reg */
- WRITE_WORD(p, 0);
- /* unsigned char sounddefault */
- /* unsigned char adfsdrives */
- /* unsigned char bytes_per_char_h */
- /* unsigned char bytes_per_char_v */
- WRITE_WORD(p, 0);
- /* pages_in_bank[4] */
- WRITE_WORD(p, 0);
- WRITE_WORD(p, 0);
- WRITE_WORD(p, 0);
- WRITE_WORD(p, 0);
- /* pages_in_vram */
- WRITE_WORD(p, 0);
- /* initrd_start */
- if (initrd_size) {
- WRITE_WORD(p, info->initrd_start);
- } else {
- WRITE_WORD(p, 0);
- }
- /* initrd_size */
- WRITE_WORD(p, initrd_size);
- /* rd_start */
- WRITE_WORD(p, 0);
- /* system_rev */
- WRITE_WORD(p, 0);
- /* system_serial_low */
- WRITE_WORD(p, 0);
- /* system_serial_high */
- WRITE_WORD(p, 0);
- /* mem_fclk_21285 */
- WRITE_WORD(p, 0);
- /* zero unused fields */
- while (p < base + KERNEL_ARGS_ADDR + 256 + 1024) {
- WRITE_WORD(p, 0);
- }
- s = info->kernel_cmdline;
- if (s) {
- address_space_write(as, p, MEMTXATTRS_UNSPECIFIED, s, strlen(s) + 1);
- } else {
- WRITE_WORD(p, 0);
- }
-}
-
static int fdt_add_memory_node(void *fdt, uint32_t acells, hwaddr mem_base,
uint32_t scells, hwaddr mem_len,
int numa_node_id)
@@ -512,6 +441,29 @@ static void fdt_add_psci_node(void *fdt, ARMCPU *armcpu)
qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
}
+static int fdt_add_pmem_node(void *fdt, uint32_t acells, uint32_t scells,
+ int64_t mem_base, int64_t size, int64_t node)
+{
+ int ret;
+
+ g_autofree char *nodename = g_strdup_printf("/pmem@%" PRIx64, mem_base);
+
+ qemu_fdt_add_subnode(fdt, nodename);
+ qemu_fdt_setprop_string(fdt, nodename, "compatible", "pmem-region");
+ ret = qemu_fdt_setprop_sized_cells(fdt, nodename, "reg", acells,
+ mem_base, scells, size);
+ if (ret) {
+ return ret;
+ }
+
+ if (node >= 0) {
+ return qemu_fdt_setprop_cell(fdt, nodename, "numa-node-id",
+ node);
+ }
+
+ return 0;
+}
+
int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
hwaddr addr_limit, AddressSpace *as, MachineState *ms,
ARMCPU *cpu)
@@ -522,11 +474,12 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
unsigned int i;
hwaddr mem_base, mem_len;
char **node_path;
+ g_autoptr(MemoryDeviceInfoList) md_list = NULL;
Error *err = NULL;
if (binfo->dtb_filename) {
char *filename;
- filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, binfo->dtb_filename);
+ filename = qemu_find_file(QEMU_FILE_TYPE_DTB, binfo->dtb_filename);
if (!filename) {
fprintf(stderr, "Couldn't open dtb file %s\n", binfo->dtb_filename);
goto fail;
@@ -625,6 +578,23 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
}
}
+ md_list = qmp_memory_device_list();
+ for (MemoryDeviceInfoList *m = md_list; m != NULL; m = m->next) {
+ MemoryDeviceInfo *mi = m->value;
+
+ if (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM) {
+ PCDIMMDeviceInfo *di = mi->u.nvdimm.data;
+
+ rc = fdt_add_pmem_node(fdt, acells, scells,
+ di->addr, di->size, di->node);
+ if (rc < 0) {
+ fprintf(stderr, "couldn't add NVDIMM /pmem@%"PRIx64" node\n",
+ di->addr);
+ goto fail;
+ }
+ }
+ }
+
rc = fdt_path_offset(fdt, "/chosen");
if (rc < 0) {
qemu_fdt_add_subnode(fdt, "/chosen");
@@ -743,7 +713,7 @@ static void do_cpu_reset(void *opaque)
} else {
if (arm_feature(env, ARM_FEATURE_EL3) &&
(info->secure_boot ||
- (info->secure_board_setup && cs == first_cpu))) {
+ (info->secure_board_setup && cpu == info->primary_cpu))) {
/* Start this CPU in Secure SVC */
target_el = 3;
}
@@ -751,17 +721,13 @@ static void do_cpu_reset(void *opaque)
arm_emulate_firmware_reset(cs, target_el);
- if (cs == first_cpu) {
+ if (cpu == info->primary_cpu) {
AddressSpace *as = arm_boot_address_space(cpu, info);
cpu_set_pc(cs, info->loader_start);
if (!have_dtb(info)) {
- if (old_param) {
- set_kernel_args_old(info, as);
- } else {
- set_kernel_args(info, as);
- }
+ set_kernel_args(info, as);
}
} else if (info->secondary_cpu_reset_hook) {
info->secondary_cpu_reset_hook(cpu, info);
@@ -1238,6 +1204,9 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info)
info->dtb_filename = ms->dtb;
info->dtb_limit = 0;
+ /* We assume the CPU passed as argument is the primary CPU. */
+ info->primary_cpu = cpu;
+
/* Load the kernel. */
if (!info->kernel_filename || info->firmware_loaded) {
arm_setup_firmware_boot(cpu, info);
@@ -1287,12 +1256,8 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info)
object_property_set_int(cpuobj, "psci-conduit", info->psci_conduit,
&error_abort);
- /*
- * Secondary CPUs start in PSCI powered-down state. Like the
- * code in do_cpu_reset(), we assume first_cpu is the primary
- * CPU.
- */
- if (cs != first_cpu) {
+ /* Secondary CPUs start in PSCI powered-down state. */
+ if (ARM_CPU(cs) != info->primary_cpu) {
object_property_set_bool(cpuobj, "start-powered-off", true,
&error_abort);
}
diff --git a/hw/arm/fby35.c b/hw/arm/fby35.c
index e123fa6..5a94c84 100644
--- a/hw/arm/fby35.c
+++ b/hw/arm/fby35.c
@@ -71,12 +71,15 @@ static void fby35_bmc_write_boot_rom(DriveInfo *dinfo, MemoryRegion *mr,
static void fby35_bmc_init(Fby35State *s)
{
AspeedSoCState *soc;
+ AspeedSoCClass *sc;
object_initialize_child(OBJECT(s), "bmc", &s->bmc, "ast2600-a3");
soc = ASPEED_SOC(&s->bmc);
+ sc = ASPEED_SOC_GET_CLASS(soc);
memory_region_init(&s->bmc_memory, OBJECT(&s->bmc), "bmc-memory",
UINT64_MAX);
+ memory_region_add_subregion(get_system_memory(), 0, &s->bmc_memory);
memory_region_init_ram(&s->bmc_dram, OBJECT(&s->bmc), "bmc-dram",
FBY35_BMC_RAM_SIZE, &error_abort);
@@ -90,7 +93,8 @@ static void fby35_bmc_init(Fby35State *s)
&error_abort);
object_property_set_int(OBJECT(&s->bmc), "hw-strap2", 0x00000003,
&error_abort);
- aspeed_soc_uart_set_chr(soc, ASPEED_DEV_UART5, serial_hd(0));
+ aspeed_soc_uart_set_chr(soc->uart, ASPEED_DEV_UART5, sc->uarts_base,
+ sc->uarts_num, serial_hd(0));
qdev_realize(DEVICE(&s->bmc), NULL, &error_abort);
aspeed_board_init_flashes(&soc->fmc, "n25q00", 2, 0);
@@ -117,12 +121,14 @@ static void fby35_bmc_init(Fby35State *s)
static void fby35_bic_init(Fby35State *s)
{
AspeedSoCState *soc;
+ AspeedSoCClass *sc;
s->bic_sysclk = clock_new(OBJECT(s), "SYSCLK");
clock_set_hz(s->bic_sysclk, 200000000ULL);
object_initialize_child(OBJECT(s), "bic", &s->bic, "ast1030-a1");
soc = ASPEED_SOC(&s->bic);
+ sc = ASPEED_SOC_GET_CLASS(soc);
memory_region_init(&s->bic_memory, OBJECT(&s->bic), "bic-memory",
UINT64_MAX);
@@ -130,7 +136,8 @@ static void fby35_bic_init(Fby35State *s)
qdev_connect_clock_in(DEVICE(&s->bic), "sysclk", s->bic_sysclk);
object_property_set_link(OBJECT(&s->bic), "memory", OBJECT(&s->bic_memory),
&error_abort);
- aspeed_soc_uart_set_chr(soc, ASPEED_DEV_UART5, serial_hd(1));
+ aspeed_soc_uart_set_chr(soc->uart, ASPEED_DEV_UART5, sc->uarts_base,
+ sc->uarts_num, serial_hd(1));
qdev_realize(DEVICE(&s->bic), NULL, &error_abort);
aspeed_board_init_flashes(&soc->fmc, "sst25vf032b", 2, 2);
diff --git a/hw/arm/fsl-imx8mp.c b/hw/arm/fsl-imx8mp.c
index 23e662c..866f4d1 100644
--- a/hw/arm/fsl-imx8mp.c
+++ b/hw/arm/fsl-imx8mp.c
@@ -356,6 +356,10 @@ static void fsl_imx8mp_realize(DeviceState *dev, Error **errp)
qdev_get_gpio_in(cpudev, ARM_CPU_IRQ));
sysbus_connect_irq(gicsbd, i + ms->smp.cpus,
qdev_get_gpio_in(cpudev, ARM_CPU_FIQ));
+ sysbus_connect_irq(gicsbd, i + 2 * ms->smp.cpus,
+ qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ));
+ sysbus_connect_irq(gicsbd, i + 3 * ms->smp.cpus,
+ qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ));
}
}
diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c
index 3ae26eb..165c0b7 100644
--- a/hw/arm/highbank.c
+++ b/hw/arm/highbank.c
@@ -357,6 +357,7 @@ static void highbank_class_init(ObjectClass *oc, const void *data)
mc->max_cpus = 4;
mc->ignore_memory_transaction_failures = true;
mc->default_ram_id = "highbank.dram";
+ mc->deprecation_reason = "no known users left for this machine";
}
static const TypeInfo highbank_type = {
@@ -381,6 +382,7 @@ static void midway_class_init(ObjectClass *oc, const void *data)
mc->max_cpus = 4;
mc->ignore_memory_transaction_failures = true;
mc->default_ram_id = "highbank.dram";
+ mc->deprecation_reason = "no known users left for this machine";
}
static const TypeInfo midway_type = {
diff --git a/hw/arm/max78000_soc.c b/hw/arm/max78000_soc.c
new file mode 100644
index 0000000..7f1856f
--- /dev/null
+++ b/hw/arm/max78000_soc.c
@@ -0,0 +1,232 @@
+/*
+ * MAX78000 SOC
+ *
+ * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Implementation based on stm32f205 and Max78000 user guide at
+ * https://www.analog.com/media/en/technical-documentation/user-guides/max78000-user-guide.pdf
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "system/address-spaces.h"
+#include "system/system.h"
+#include "hw/arm/max78000_soc.h"
+#include "hw/qdev-clock.h"
+#include "hw/misc/unimp.h"
+
+static const uint32_t max78000_icc_addr[] = {0x4002a000, 0x4002a800};
+static const uint32_t max78000_uart_addr[] = {0x40042000, 0x40043000,
+ 0x40044000};
+
+static const int max78000_uart_irq[] = {14, 15, 34};
+
+static void max78000_soc_initfn(Object *obj)
+{
+ MAX78000State *s = MAX78000_SOC(obj);
+ int i;
+
+ object_initialize_child(obj, "armv7m", &s->armv7m, TYPE_ARMV7M);
+
+ object_initialize_child(obj, "gcr", &s->gcr, TYPE_MAX78000_GCR);
+
+ for (i = 0; i < MAX78000_NUM_ICC; i++) {
+ g_autofree char *name = g_strdup_printf("icc%d", i);
+ object_initialize_child(obj, name, &s->icc[i], TYPE_MAX78000_ICC);
+ }
+
+ for (i = 0; i < MAX78000_NUM_UART; i++) {
+ g_autofree char *name = g_strdup_printf("uart%d", i);
+ object_initialize_child(obj, name, &s->uart[i],
+ TYPE_MAX78000_UART);
+ }
+
+ object_initialize_child(obj, "trng", &s->trng, TYPE_MAX78000_TRNG);
+
+ object_initialize_child(obj, "aes", &s->aes, TYPE_MAX78000_AES);
+
+ s->sysclk = qdev_init_clock_in(DEVICE(s), "sysclk", NULL, NULL, 0);
+}
+
+static void max78000_soc_realize(DeviceState *dev_soc, Error **errp)
+{
+ MAX78000State *s = MAX78000_SOC(dev_soc);
+ MemoryRegion *system_memory = get_system_memory();
+ DeviceState *dev, *gcrdev, *armv7m;
+ SysBusDevice *busdev;
+ Error *err = NULL;
+ int i;
+
+ if (!clock_has_source(s->sysclk)) {
+ error_setg(errp, "sysclk clock must be wired up by the board code");
+ return;
+ }
+
+ memory_region_init_rom(&s->flash, OBJECT(dev_soc), "MAX78000.flash",
+ FLASH_SIZE, &err);
+ if (err != NULL) {
+ error_propagate(errp, err);
+ return;
+ }
+
+ memory_region_add_subregion(system_memory, FLASH_BASE_ADDRESS, &s->flash);
+
+ memory_region_init_ram(&s->sram, NULL, "MAX78000.sram", SRAM_SIZE,
+ &err);
+
+ gcrdev = DEVICE(&s->gcr);
+ object_property_set_link(OBJECT(gcrdev), "sram", OBJECT(&s->sram),
+ &err);
+
+ if (err != NULL) {
+ error_propagate(errp, err);
+ return;
+ }
+ memory_region_add_subregion(system_memory, SRAM_BASE_ADDRESS, &s->sram);
+
+ armv7m = DEVICE(&s->armv7m);
+
+ /*
+ * The MAX78000 user guide's Interrupt Vector Table section
+ * suggests that there are 120 IRQs in the text, while only listing
+ * 104 in table 5-1. Implement the more generous of the two.
+ * This has not been tested in hardware.
+ */
+ qdev_prop_set_uint32(armv7m, "num-irq", 120);
+ qdev_prop_set_uint8(armv7m, "num-prio-bits", 3);
+ qdev_prop_set_string(armv7m, "cpu-type", ARM_CPU_TYPE_NAME("cortex-m4"));
+ qdev_prop_set_bit(armv7m, "enable-bitband", true);
+ qdev_connect_clock_in(armv7m, "cpuclk", s->sysclk);
+ object_property_set_link(OBJECT(&s->armv7m), "memory",
+ OBJECT(system_memory), &error_abort);
+ if (!sysbus_realize(SYS_BUS_DEVICE(&s->armv7m), errp)) {
+ return;
+ }
+
+ for (i = 0; i < MAX78000_NUM_ICC; i++) {
+ dev = DEVICE(&(s->icc[i]));
+ sysbus_realize(SYS_BUS_DEVICE(dev), errp);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, max78000_icc_addr[i]);
+ }
+
+ for (i = 0; i < MAX78000_NUM_UART; i++) {
+ g_autofree char *link = g_strdup_printf("uart%d", i);
+ dev = DEVICE(&(s->uart[i]));
+ qdev_prop_set_chr(dev, "chardev", serial_hd(i));
+ if (!sysbus_realize(SYS_BUS_DEVICE(&s->uart[i]), errp)) {
+ return;
+ }
+
+ object_property_set_link(OBJECT(gcrdev), link, OBJECT(dev),
+ &err);
+
+ busdev = SYS_BUS_DEVICE(dev);
+ sysbus_mmio_map(busdev, 0, max78000_uart_addr[i]);
+ sysbus_connect_irq(busdev, 0, qdev_get_gpio_in(armv7m,
+ max78000_uart_irq[i]));
+ }
+
+ dev = DEVICE(&s->trng);
+ sysbus_realize(SYS_BUS_DEVICE(dev), errp);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x4004d000);
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(armv7m, 4));
+
+ object_property_set_link(OBJECT(gcrdev), "trng", OBJECT(dev), &err);
+
+ dev = DEVICE(&s->aes);
+ sysbus_realize(SYS_BUS_DEVICE(dev), errp);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x40007400);
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(armv7m, 5));
+
+ object_property_set_link(OBJECT(gcrdev), "aes", OBJECT(dev), &err);
+
+ dev = DEVICE(&s->gcr);
+ sysbus_realize(SYS_BUS_DEVICE(dev), errp);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0x40000000);
+
+ create_unimplemented_device("systemInterface", 0x40000400, 0x400);
+ create_unimplemented_device("functionControl", 0x40000800, 0x400);
+ create_unimplemented_device("watchdogTimer0", 0x40003000, 0x400);
+ create_unimplemented_device("dynamicVoltScale", 0x40003c00, 0x40);
+ create_unimplemented_device("SIMO", 0x40004400, 0x400);
+ create_unimplemented_device("trimSystemInit", 0x40005400, 0x400);
+ create_unimplemented_device("generalCtrlFunc", 0x40005800, 0x400);
+ create_unimplemented_device("wakeupTimer", 0x40006400, 0x400);
+ create_unimplemented_device("powerSequencer", 0x40006800, 0x400);
+ create_unimplemented_device("miscControl", 0x40006c00, 0x400);
+
+ create_unimplemented_device("gpio0", 0x40008000, 0x1000);
+ create_unimplemented_device("gpio1", 0x40009000, 0x1000);
+
+ create_unimplemented_device("parallelCamInterface", 0x4000e000, 0x1000);
+ create_unimplemented_device("CRC", 0x4000f000, 0x1000);
+
+ create_unimplemented_device("timer0", 0x40010000, 0x1000);
+ create_unimplemented_device("timer1", 0x40011000, 0x1000);
+ create_unimplemented_device("timer2", 0x40012000, 0x1000);
+ create_unimplemented_device("timer3", 0x40013000, 0x1000);
+
+ create_unimplemented_device("i2c0", 0x4001d000, 0x1000);
+ create_unimplemented_device("i2c1", 0x4001e000, 0x1000);
+ create_unimplemented_device("i2c2", 0x4001f000, 0x1000);
+
+ create_unimplemented_device("standardDMA", 0x40028000, 0x1000);
+ create_unimplemented_device("flashController0", 0x40029000, 0x400);
+
+ create_unimplemented_device("adc", 0x40034000, 0x1000);
+ create_unimplemented_device("pulseTrainEngine", 0x4003c000, 0xa0);
+ create_unimplemented_device("oneWireMaster", 0x4003d000, 0x1000);
+ create_unimplemented_device("semaphore", 0x4003e000, 0x1000);
+
+ create_unimplemented_device("spi1", 0x40046000, 0x2000);
+ create_unimplemented_device("i2s", 0x40060000, 0x1000);
+ create_unimplemented_device("lowPowerControl", 0x40080000, 0x400);
+ create_unimplemented_device("gpio2", 0x40080400, 0x200);
+ create_unimplemented_device("lowPowerWatchdogTimer", 0x40080800, 0x400);
+ create_unimplemented_device("lowPowerTimer4", 0x40080c00, 0x400);
+
+ create_unimplemented_device("lowPowerTimer5", 0x40081000, 0x400);
+ create_unimplemented_device("lowPowerUART0", 0x40081400, 0x400);
+ create_unimplemented_device("lowPowerComparator", 0x40088000, 0x400);
+
+ create_unimplemented_device("spi0", 0x400be000, 0x400);
+
+ /*
+ * The MAX78000 user guide's base address map lists the CNN TX FIFO as
+ * beginning at 0x400c0400 and ending at 0x400c0400. Given that CNN_FIFO
+ * is listed as having data accessible up to offset 0x1000, the user
+ * guide is likely incorrect.
+ */
+ create_unimplemented_device("cnnTxFIFO", 0x400c0400, 0x2000);
+
+ create_unimplemented_device("cnnGlobalControl", 0x50000000, 0x10000);
+ create_unimplemented_device("cnnx16quad0", 0x50100000, 0x40000);
+ create_unimplemented_device("cnnx16quad1", 0x50500000, 0x40000);
+ create_unimplemented_device("cnnx16quad2", 0x50900000, 0x40000);
+ create_unimplemented_device("cnnx16quad3", 0x50d00000, 0x40000);
+
+}
+
+static void max78000_soc_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->realize = max78000_soc_realize;
+}
+
+static const TypeInfo max78000_soc_info = {
+ .name = TYPE_MAX78000_SOC,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(MAX78000State),
+ .instance_init = max78000_soc_initfn,
+ .class_init = max78000_soc_class_init,
+};
+
+static void max78000_soc_types(void)
+{
+ type_register_static(&max78000_soc_info);
+}
+
+type_init(max78000_soc_types)
diff --git a/hw/arm/max78000fthr.c b/hw/arm/max78000fthr.c
new file mode 100644
index 0000000..c4f6b5b
--- /dev/null
+++ b/hw/arm/max78000fthr.c
@@ -0,0 +1,50 @@
+/*
+ * MAX78000FTHR Evaluation Board
+ *
+ * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/boards.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-clock.h"
+#include "qemu/error-report.h"
+#include "hw/arm/max78000_soc.h"
+#include "hw/arm/boot.h"
+
+/* 60MHz is the default, but other clocks can be selected. */
+#define SYSCLK_FRQ 60000000ULL
+static void max78000_init(MachineState *machine)
+{
+ DeviceState *dev;
+ Clock *sysclk;
+
+ sysclk = clock_new(OBJECT(machine), "SYSCLK");
+ clock_set_hz(sysclk, SYSCLK_FRQ);
+
+ dev = qdev_new(TYPE_MAX78000_SOC);
+ object_property_add_child(OBJECT(machine), "soc", OBJECT(dev));
+ qdev_connect_clock_in(dev, "sysclk", sysclk);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+
+ armv7m_load_kernel(ARM_CPU(first_cpu),
+ machine->kernel_filename,
+ 0x00000000, FLASH_SIZE);
+}
+
+static void max78000_machine_init(MachineClass *mc)
+{
+ static const char * const valid_cpu_types[] = {
+ ARM_CPU_TYPE_NAME("cortex-m4"),
+ NULL
+ };
+
+ mc->desc = "MAX78000FTHR Board (Cortex-M4 / (Unimplemented) RISC-V)";
+ mc->init = max78000_init;
+ mc->valid_cpu_types = valid_cpu_types;
+}
+
+DEFINE_MACHINE("max78000fthr", max78000_machine_init)
diff --git a/hw/arm/meson.build b/hw/arm/meson.build
index 5098795..b88b5b0 100644
--- a/hw/arm/meson.build
+++ b/hw/arm/meson.build
@@ -8,7 +8,7 @@ arm_common_ss.add(when: 'CONFIG_HIGHBANK', if_true: files('highbank.c'))
arm_common_ss.add(when: 'CONFIG_INTEGRATOR', if_true: files('integratorcp.c'))
arm_common_ss.add(when: 'CONFIG_MICROBIT', if_true: files('microbit.c'))
arm_common_ss.add(when: 'CONFIG_MPS3R', if_true: files('mps3r.c'))
-arm_common_ss.add(when: 'CONFIG_MUSICPAL', if_true: [pixman, files('musicpal.c')])
+arm_common_ss.add(when: 'CONFIG_MUSICPAL', if_true: [files('musicpal.c')])
arm_common_ss.add(when: 'CONFIG_NETDUINOPLUS2', if_true: files('netduinoplus2.c'))
arm_common_ss.add(when: 'CONFIG_OLIMEX_STM32_H405', if_true: files('olimex-stm32-h405.c'))
arm_common_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx.c', 'npcm7xx_boards.c'))
@@ -27,6 +27,7 @@ arm_common_ss.add(when: 'CONFIG_OMAP', if_true: files('omap1.c'))
arm_common_ss.add(when: 'CONFIG_ALLWINNER_A10', if_true: files('allwinner-a10.c', 'cubieboard.c'))
arm_common_ss.add(when: 'CONFIG_ALLWINNER_H3', if_true: files('allwinner-h3.c', 'orangepi.c'))
arm_common_ss.add(when: 'CONFIG_ALLWINNER_R40', if_true: files('allwinner-r40.c', 'bananapi_m2u.c'))
+arm_common_ss.add(when: 'CONFIG_MAX78000_SOC', if_true: files('max78000_soc.c'))
arm_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2836.c', 'raspi.c'))
arm_common_ss.add(when: ['CONFIG_RASPI', 'TARGET_AARCH64'], if_true: files('bcm2838.c', 'raspi4b.c'))
arm_common_ss.add(when: 'CONFIG_STM32F100_SOC', if_true: files('stm32f100_soc.c'))
@@ -44,14 +45,15 @@ arm_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files(
'aspeed_soc_common.c',
'aspeed_ast2400.c',
'aspeed_ast2600.c',
- 'aspeed_ast27x0-ssp.c',
- 'aspeed_ast27x0-tsp.c',
'aspeed_ast10x0.c',
'aspeed_eeprom.c',
'fby35.c'))
arm_common_ss.add(when: ['CONFIG_ASPEED_SOC', 'TARGET_AARCH64'], if_true: files(
'aspeed_ast27x0.c',
- 'aspeed_ast27x0-fc.c',))
+ 'aspeed_ast27x0-fc.c',
+ 'aspeed_ast27x0-ssp.c',
+ 'aspeed_ast27x0-tsp.c',
+ 'aspeed_coprocessor_common.c'))
arm_common_ss.add(when: 'CONFIG_MPS2', if_true: files('mps2.c'))
arm_common_ss.add(when: 'CONFIG_MPS2', if_true: files('mps2-tz.c'))
arm_common_ss.add(when: 'CONFIG_MSF2', if_true: files('msf2-soc.c'))
@@ -71,6 +73,7 @@ arm_ss.add(when: 'CONFIG_XEN', if_true: files(
arm_common_ss.add(when: 'CONFIG_ARM_SMMUV3', if_true: files('smmu-common.c'))
arm_common_ss.add(when: 'CONFIG_COLLIE', if_true: files('collie.c'))
arm_common_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4_boards.c'))
+arm_common_ss.add(when: 'CONFIG_MAX78000FTHR', if_true: files('max78000fthr.c'))
arm_common_ss.add(when: 'CONFIG_NETDUINO2', if_true: files('netduino2.c'))
arm_common_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_peripherals.c'))
arm_common_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2838_peripherals.c'))
@@ -79,7 +82,7 @@ arm_common_ss.add(when: 'CONFIG_SX1', if_true: files('omap_sx1.c'))
arm_common_ss.add(when: 'CONFIG_VERSATILE', if_true: files('versatilepb.c'))
arm_common_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c'))
-arm_common_ss.add(fdt, files('boot.c'))
+arm_common_ss.add(files('boot.c'))
hw_arch += {'arm': arm_ss}
hw_common_arch += {'arm': arm_common_ss}
diff --git a/hw/arm/mps2.c b/hw/arm/mps2.c
index 58efb41..bd378e3 100644
--- a/hw/arm/mps2.c
+++ b/hw/arm/mps2.c
@@ -224,7 +224,11 @@ static void mps2_common_init(MachineState *machine)
switch (mmc->fpga_type) {
case FPGA_AN385:
case FPGA_AN386:
+ qdev_prop_set_uint32(armv7m, "num-irq", 32);
+ break;
case FPGA_AN500:
+ /* The AN500 configures its Cortex-M7 with 16 MPU regions */
+ qdev_prop_set_uint32(armv7m, "mpu-ns-regions", 16);
qdev_prop_set_uint32(armv7m, "num-irq", 32);
break;
case FPGA_AN511:
diff --git a/hw/arm/npcm7xx.c b/hw/arm/npcm7xx.c
index 2f30c49..ecfae32 100644
--- a/hw/arm/npcm7xx.c
+++ b/hw/arm/npcm7xx.c
@@ -24,7 +24,7 @@
#include "hw/qdev-clock.h"
#include "hw/qdev-properties.h"
#include "qapi/error.h"
-#include "qemu/bswap.h"
+#include "exec/tswap.h"
#include "qemu/units.h"
#include "system/system.h"
#include "target/arm/cpu-qom.h"
diff --git a/hw/arm/npcm8xx.c b/hw/arm/npcm8xx.c
index d7ee306..a276fea 100644
--- a/hw/arm/npcm8xx.c
+++ b/hw/arm/npcm8xx.c
@@ -67,6 +67,9 @@
/* SDHCI Modules */
#define NPCM8XX_MMC_BA 0xf0842000
+/* PCS Module */
+#define NPCM8XX_PCS_BA 0xf0780000
+
/* PSPI Modules */
#define NPCM8XX_PSPI_BA 0xf0201000
@@ -85,6 +88,10 @@ enum NPCM8xxInterrupt {
NPCM8XX_ADC_IRQ = 0,
NPCM8XX_PECI_IRQ = 6,
NPCM8XX_KCS_HIB_IRQ = 9,
+ NPCM8XX_GMAC1_IRQ = 14,
+ NPCM8XX_GMAC2_IRQ,
+ NPCM8XX_GMAC3_IRQ,
+ NPCM8XX_GMAC4_IRQ,
NPCM8XX_MMC_IRQ = 26,
NPCM8XX_PSPI_IRQ = 28,
NPCM8XX_TIMER0_IRQ = 32, /* Timer Module 0 */
@@ -260,6 +267,14 @@ static const hwaddr npcm8xx_smbus_addr[] = {
0xfff0a000,
};
+/* Register base address for each GMAC Module */
+static const hwaddr npcm8xx_gmac_addr[] = {
+ 0xf0802000,
+ 0xf0804000,
+ 0xf0806000,
+ 0xf0808000,
+};
+
/* Register base address for each USB host EHCI registers */
static const hwaddr npcm8xx_ehci_addr[] = {
0xf0828100,
@@ -350,6 +365,7 @@ static struct arm_boot_info npcm8xx_binfo = {
.secure_boot = false,
.board_id = -1,
.board_setup_addr = NPCM8XX_BOARD_SETUP_ADDR,
+ .psci_conduit = QEMU_PSCI_CONDUIT_SMC,
};
void npcm8xx_load_kernel(MachineState *machine, NPCM8xxState *soc)
@@ -444,6 +460,11 @@ static void npcm8xx_init(Object *obj)
object_initialize_child(obj, "mft[*]", &s->mft[i], TYPE_NPCM7XX_MFT);
}
+ for (i = 0; i < ARRAY_SIZE(s->gmac); i++) {
+ object_initialize_child(obj, "gmac[*]", &s->gmac[i], TYPE_NPCM_GMAC);
+ }
+ object_initialize_child(obj, "pcs", &s->pcs, TYPE_NPCM_PCS);
+
object_initialize_child(obj, "mmc", &s->mmc, TYPE_NPCM7XX_SDHCI);
object_initialize_child(obj, "pspi", &s->pspi, TYPE_NPCM_PSPI);
}
@@ -669,6 +690,35 @@ static void npcm8xx_realize(DeviceState *dev, Error **errp)
}
/*
+ * GMAC Modules. Cannot fail.
+ */
+ QEMU_BUILD_BUG_ON(ARRAY_SIZE(npcm8xx_gmac_addr) != ARRAY_SIZE(s->gmac));
+ for (i = 0; i < ARRAY_SIZE(s->gmac); i++) {
+ SysBusDevice *sbd = SYS_BUS_DEVICE(&s->gmac[i]);
+
+ /* This is used to make sure that the NIC can create the device */
+ qemu_configure_nic_device(DEVICE(sbd), false, NULL);
+
+ /*
+ * The device exists regardless of whether it's connected to a QEMU
+ * netdev backend. So always instantiate it even if there is no
+ * backend.
+ */
+ sysbus_realize(sbd, &error_abort);
+ sysbus_mmio_map(sbd, 0, npcm8xx_gmac_addr[i]);
+ /*
+ * N.B. The values for the second argument sysbus_connect_irq are
+ * chosen to match the registration order in npcm7xx_emc_realize.
+ */
+ sysbus_connect_irq(sbd, 0, npcm8xx_irq(s, NPCM8XX_GMAC1_IRQ + i));
+ }
+ /*
+ * GMAC Physical Coding Sublayer(PCS) Module. Cannot fail.
+ */
+ sysbus_realize(SYS_BUS_DEVICE(&s->pcs), &error_abort);
+ sysbus_mmio_map(SYS_BUS_DEVICE(&s->pcs), 0, NPCM8XX_PCS_BA);
+
+ /*
* Flash Interface Unit (FIU). Can fail if incorrect number of chip selects
* specified, but this is a programming error.
*/
@@ -741,12 +791,7 @@ static void npcm8xx_realize(DeviceState *dev, Error **errp)
create_unimplemented_device("npcm8xx.ahbpci", 0xf0400000, 1 * MiB);
create_unimplemented_device("npcm8xx.dap", 0xf0500000, 960 * KiB);
create_unimplemented_device("npcm8xx.mcphy", 0xf05f0000, 64 * KiB);
- create_unimplemented_device("npcm8xx.pcs", 0xf0780000, 256 * KiB);
create_unimplemented_device("npcm8xx.tsgen", 0xf07fc000, 8 * KiB);
- create_unimplemented_device("npcm8xx.gmac1", 0xf0802000, 8 * KiB);
- create_unimplemented_device("npcm8xx.gmac2", 0xf0804000, 8 * KiB);
- create_unimplemented_device("npcm8xx.gmac3", 0xf0806000, 8 * KiB);
- create_unimplemented_device("npcm8xx.gmac4", 0xf0808000, 8 * KiB);
create_unimplemented_device("npcm8xx.copctl", 0xf080c000, 4 * KiB);
create_unimplemented_device("npcm8xx.tipctl", 0xf080d000, 4 * KiB);
create_unimplemented_device("npcm8xx.rst", 0xf080e000, 4 * KiB);
diff --git a/hw/arm/raspi4b.c b/hw/arm/raspi4b.c
index 20082d5..4df951a 100644
--- a/hw/arm/raspi4b.c
+++ b/hw/arm/raspi4b.c
@@ -36,9 +36,8 @@ struct Raspi4bMachineState {
* (see https://datasheets.raspberrypi.com/bcm2711/bcm2711-peripherals.pdf
* 1.2 Address Map)
*/
-static int raspi_add_memory_node(void *fdt, hwaddr mem_base, hwaddr mem_len)
+static void raspi_add_memory_node(void *fdt, hwaddr mem_base, hwaddr mem_len)
{
- int ret;
uint32_t acells, scells;
char *nodename = g_strdup_printf("/memory@%" PRIx64, mem_base);
@@ -46,19 +45,16 @@ static int raspi_add_memory_node(void *fdt, hwaddr mem_base, hwaddr mem_len)
NULL, &error_fatal);
scells = qemu_fdt_getprop_cell(fdt, "/", "#size-cells",
NULL, &error_fatal);
- if (acells == 0 || scells == 0) {
- fprintf(stderr, "dtb file invalid (#address-cells or #size-cells 0)\n");
- ret = -1;
- } else {
- qemu_fdt_add_subnode(fdt, nodename);
- qemu_fdt_setprop_string(fdt, nodename, "device_type", "memory");
- ret = qemu_fdt_setprop_sized_cells(fdt, nodename, "reg",
- acells, mem_base,
- scells, mem_len);
- }
+ /* validated by arm_load_dtb */
+ g_assert(acells && scells);
+
+ qemu_fdt_add_subnode(fdt, nodename);
+ qemu_fdt_setprop_string(fdt, nodename, "device_type", "memory");
+ qemu_fdt_setprop_sized_cells(fdt, nodename, "reg",
+ acells, mem_base,
+ scells, mem_len);
g_free(nodename);
- return ret;
}
static void raspi4_modify_dtb(const struct arm_boot_info *info, void *fdt)
diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
index deae5cf..15c1ff4 100644
--- a/hw/arm/sbsa-ref.c
+++ b/hw/arm/sbsa-ref.c
@@ -19,6 +19,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/cutils.h"
#include "qemu/datadir.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
@@ -53,8 +54,7 @@
#include "target/arm/cpu-qom.h"
#include "target/arm/gtimer.h"
-#define RAMLIMIT_GB 8192
-#define RAMLIMIT_BYTES (RAMLIMIT_GB * GiB)
+#define RAMLIMIT_BYTES (8 * TiB)
#define NUM_IRQS 256
#define NUM_SMMU_IRQS 4
@@ -756,7 +756,9 @@ static void sbsa_ref_init(MachineState *machine)
sms->smp_cpus = smp_cpus;
if (machine->ram_size > sbsa_ref_memmap[SBSA_MEM].size) {
- error_report("sbsa-ref: cannot model more than %dGB RAM", RAMLIMIT_GB);
+ char *size_str = size_to_str(RAMLIMIT_BYTES);
+
+ error_report("sbsa-ref: cannot model more than %s of RAM", size_str);
exit(1);
}
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index f39b99e..62a7612 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -20,6 +20,7 @@
#include "trace.h"
#include "exec/target_page.h"
#include "hw/core/cpu.h"
+#include "hw/pci/pci_bridge.h"
#include "hw/qdev-properties.h"
#include "qapi/error.h"
#include "qemu/jhash.h"
@@ -319,7 +320,7 @@ void smmu_iotlb_inv_vmid(SMMUState *s, int vmid)
g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid, &vmid);
}
-inline void smmu_iotlb_inv_vmid_s1(SMMUState *s, int vmid)
+void smmu_iotlb_inv_vmid_s1(SMMUState *s, int vmid)
{
trace_smmu_iotlb_inv_vmid_s1(vmid);
g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid_s1, &vmid);
@@ -925,6 +926,7 @@ static void smmu_base_realize(DeviceState *dev, Error **errp)
{
SMMUState *s = ARM_SMMU(dev);
SMMUBaseClass *sbc = ARM_SMMU_GET_CLASS(dev);
+ PCIBus *pci_bus = s->primary_bus;
Error *local_err = NULL;
sbc->parent_realize(dev, &local_err);
@@ -937,11 +939,39 @@ static void smmu_base_realize(DeviceState *dev, Error **errp)
g_free, g_free);
s->smmu_pcibus_by_busptr = g_hash_table_new(NULL, NULL);
- if (s->primary_bus) {
- pci_setup_iommu(s->primary_bus, &smmu_ops, s);
- } else {
+ if (!pci_bus) {
error_setg(errp, "SMMU is not attached to any PCI bus!");
+ return;
+ }
+
+ /*
+ * We only allow default PCIe Root Complex(pcie.0) or pxb-pcie based extra
+ * root complexes to be associated with SMMU.
+ */
+ if (pci_bus_is_express(pci_bus) && pci_bus_is_root(pci_bus) &&
+ object_dynamic_cast(OBJECT(pci_bus)->parent, TYPE_PCI_HOST_BRIDGE)) {
+ /*
+ * This condition matches either the default pcie.0, pxb-pcie, or
+ * pxb-cxl. For both pxb-pcie and pxb-cxl, parent_dev will be set.
+ * Currently, we don't allow pxb-cxl as it requires further
+ * verification. Therefore, make sure this is indeed pxb-pcie.
+ */
+ if (pci_bus->parent_dev) {
+ if (!object_dynamic_cast(OBJECT(pci_bus), TYPE_PXB_PCIE_BUS)) {
+ goto out_err;
+ }
+ }
+
+ if (s->smmu_per_bus) {
+ pci_setup_iommu_per_bus(pci_bus, &smmu_ops, s);
+ } else {
+ pci_setup_iommu(pci_bus, &smmu_ops, s);
+ }
+ return;
}
+out_err:
+ error_setg(errp, "SMMU should be attached to a default PCIe root complex"
+ "(pcie.0) or a pxb-pcie based root complex");
}
/*
@@ -961,6 +991,7 @@ static void smmu_base_reset_exit(Object *obj, ResetType type)
static const Property smmu_dev_properties[] = {
DEFINE_PROP_UINT8("bus_num", SMMUState, bus_num, 0),
+ DEFINE_PROP_BOOL("smmu_per_bus", SMMUState, smmu_per_bus, false),
DEFINE_PROP_LINK("primary-bus", SMMUState, primary_bus,
TYPE_PCI_BUS, PCIBus *),
};
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index ab67972..bcf8af8 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1996,6 +1996,8 @@ static void smmuv3_class_init(ObjectClass *klass, const void *data)
device_class_set_parent_realize(dc, smmu_realize,
&c->parent_realize);
device_class_set_props(dc, smmuv3_properties);
+ dc->hotpluggable = false;
+ dc->user_creatable = true;
}
static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu,
diff --git a/hw/arm/stm32f205_soc.c b/hw/arm/stm32f205_soc.c
index 229af7f..e3c7203 100644
--- a/hw/arm/stm32f205_soc.c
+++ b/hw/arm/stm32f205_soc.c
@@ -66,7 +66,7 @@ static void stm32f205_soc_initfn(Object *obj)
TYPE_STM32F2XX_TIMER);
}
- s->adc_irqs = OR_IRQ(object_new(TYPE_OR_IRQ));
+ object_initialize_child(obj, "adc-irq-orgate", &s->adc_irqs, TYPE_OR_IRQ);
for (i = 0; i < STM_NUM_ADCS; i++) {
object_initialize_child(obj, "adc[*]", &s->adc[i], TYPE_STM32F2XX_ADC);
@@ -171,12 +171,12 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
}
/* ADC 1 to 3 */
- object_property_set_int(OBJECT(s->adc_irqs), "num-lines", STM_NUM_ADCS,
+ object_property_set_int(OBJECT(&s->adc_irqs), "num-lines", STM_NUM_ADCS,
&error_abort);
- if (!qdev_realize(DEVICE(s->adc_irqs), NULL, errp)) {
+ if (!qdev_realize(DEVICE(&s->adc_irqs), NULL, errp)) {
return;
}
- qdev_connect_gpio_out(DEVICE(s->adc_irqs), 0,
+ qdev_connect_gpio_out(DEVICE(&s->adc_irqs), 0,
qdev_get_gpio_in(armv7m, ADC_IRQ));
for (i = 0; i < STM_NUM_ADCS; i++) {
@@ -187,7 +187,7 @@ static void stm32f205_soc_realize(DeviceState *dev_soc, Error **errp)
busdev = SYS_BUS_DEVICE(dev);
sysbus_mmio_map(busdev, 0, adc_addr[i]);
sysbus_connect_irq(busdev, 0,
- qdev_get_gpio_in(DEVICE(s->adc_irqs), i));
+ qdev_get_gpio_in(DEVICE(&s->adc_irqs), i));
}
/* SPI 1 and 2 */
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 7e8e0f0..8bb6b60 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -34,15 +34,19 @@
#include "hw/core/cpu.h"
#include "hw/acpi/acpi-defs.h"
#include "hw/acpi/acpi.h"
+#include "hw/acpi/pcihp.h"
#include "hw/nvram/fw_cfg_acpi.h"
#include "hw/acpi/bios-linker-loader.h"
#include "hw/acpi/aml-build.h"
#include "hw/acpi/utils.h"
#include "hw/acpi/pci.h"
+#include "hw/acpi/cxl.h"
#include "hw/acpi/memory_hotplug.h"
#include "hw/acpi/generic_event_device.h"
#include "hw/acpi/tpm.h"
#include "hw/acpi/hmat.h"
+#include "hw/arm/smmuv3.h"
+#include "hw/cxl/cxl.h"
#include "hw/pci/pcie_host.h"
#include "hw/pci/pci.h"
#include "hw/pci/pci_bus.h"
@@ -119,16 +123,44 @@ static void acpi_dsdt_add_flash(Aml *scope, const MemMapEntry *flash_memmap)
aml_append(scope, dev);
}
+static void build_acpi0017(Aml *table)
+{
+ Aml *dev, *scope, *method;
+
+ scope = aml_scope("_SB");
+ dev = aml_device("CXLM");
+ aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0017")));
+
+ method = aml_method("_STA", 0, AML_NOTSERIALIZED);
+ aml_append(method, aml_return(aml_int(0x0B)));
+ aml_append(dev, method);
+ build_cxl_dsm_method(dev);
+
+ aml_append(scope, dev);
+ aml_append(table, scope);
+}
+
static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap,
uint32_t irq, VirtMachineState *vms)
{
int ecam_id = VIRT_ECAM_ID(vms->highmem_ecam);
+ bool cxl_present = false;
+ PCIBus *bus = vms->bus;
+ bool acpi_pcihp = false;
+
+ if (vms->acpi_dev) {
+ acpi_pcihp = object_property_get_bool(OBJECT(vms->acpi_dev),
+ ACPI_PM_PROP_ACPI_PCIHP_BRIDGE,
+ NULL);
+ }
+
struct GPEXConfig cfg = {
.mmio32 = memmap[VIRT_PCIE_MMIO],
.pio = memmap[VIRT_PCIE_PIO],
.ecam = memmap[ecam_id],
.irq = irq,
.bus = vms->bus,
+ .pci_native_hotplug = !acpi_pcihp,
};
if (vms->highmem_mmio) {
@@ -136,6 +168,14 @@ static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap,
}
acpi_dsdt_add_gpex(scope, &cfg);
+ QLIST_FOREACH(bus, &vms->bus->child, sibling) {
+ if (pci_bus_is_cxl(bus)) {
+ cxl_present = true;
+ }
+ }
+ if (cxl_present) {
+ build_acpi0017(scope);
+ }
}
static void acpi_dsdt_add_gpio(Aml *scope, const MemMapEntry *gpio_memmap,
@@ -266,41 +306,117 @@ static int iort_idmap_compare(gconstpointer a, gconstpointer b)
return idmap_a->input_base - idmap_b->input_base;
}
+typedef struct AcpiIortSMMUv3Dev {
+ int irq;
+ hwaddr base;
+ GArray *rc_smmu_idmaps;
+ /* Offset of the SMMUv3 IORT Node relative to the start of the IORT */
+ size_t offset;
+} AcpiIortSMMUv3Dev;
+
/*
- * Input Output Remapping Table (IORT)
- * Conforms to "IO Remapping Table System Software on ARM Platforms",
- * Document number: ARM DEN 0049E.b, Feb 2021
+ * Populate the struct AcpiIortSMMUv3Dev for the legacy SMMUv3 and
+ * return the total number of associated idmaps.
*/
-static void
-build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
+static int populate_smmuv3_legacy_dev(GArray *sdev_blob)
{
- int i, nb_nodes, rc_mapping_count;
- size_t node_size, smmu_offset = 0;
- AcpiIortIdMapping *idmap;
- uint32_t id = 0;
- GArray *smmu_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
- GArray *its_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
+ VirtMachineState *vms = VIRT_MACHINE(qdev_get_machine());
+ AcpiIortSMMUv3Dev sdev;
- AcpiTable table = { .sig = "IORT", .rev = 3, .oem_id = vms->oem_id,
- .oem_table_id = vms->oem_table_id };
- /* Table 2 The IORT */
- acpi_table_begin(&table, table_data);
+ sdev.rc_smmu_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
+ object_child_foreach_recursive(object_get_root(), iort_host_bridges,
+ sdev.rc_smmu_idmaps);
+ /*
+ * There can be only one legacy SMMUv3("iommu=smmuv3") as it is a machine
+ * wide one. Since it may cover multiple PCIe RCs(based on "bypass_iommu"
+ * property), may have multiple SMMUv3 idmaps. Sort it by input_base.
+ */
+ g_array_sort(sdev.rc_smmu_idmaps, iort_idmap_compare);
+
+ sdev.base = vms->memmap[VIRT_SMMU].base;
+ sdev.irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE;
+ g_array_append_val(sdev_blob, sdev);
+ return sdev.rc_smmu_idmaps->len;
+}
- if (vms->iommu == VIRT_IOMMU_SMMUV3) {
- AcpiIortIdMapping next_range = {0};
+static int smmuv3_dev_idmap_compare(gconstpointer a, gconstpointer b)
+{
+ AcpiIortSMMUv3Dev *sdev_a = (AcpiIortSMMUv3Dev *)a;
+ AcpiIortSMMUv3Dev *sdev_b = (AcpiIortSMMUv3Dev *)b;
+ AcpiIortIdMapping *map_a = &g_array_index(sdev_a->rc_smmu_idmaps,
+ AcpiIortIdMapping, 0);
+ AcpiIortIdMapping *map_b = &g_array_index(sdev_b->rc_smmu_idmaps,
+ AcpiIortIdMapping, 0);
+ return map_a->input_base - map_b->input_base;
+}
+
+static int iort_smmuv3_devices(Object *obj, void *opaque)
+{
+ VirtMachineState *vms = VIRT_MACHINE(qdev_get_machine());
+ GArray *sdev_blob = opaque;
+ AcpiIortIdMapping idmap;
+ PlatformBusDevice *pbus;
+ AcpiIortSMMUv3Dev sdev;
+ int min_bus, max_bus;
+ SysBusDevice *sbdev;
+ PCIBus *bus;
+
+ if (!object_dynamic_cast(obj, TYPE_ARM_SMMUV3)) {
+ return 0;
+ }
+
+ bus = PCI_BUS(object_property_get_link(obj, "primary-bus", &error_abort));
+ pbus = PLATFORM_BUS_DEVICE(vms->platform_bus_dev);
+ sbdev = SYS_BUS_DEVICE(obj);
+ sdev.base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
+ sdev.base += vms->memmap[VIRT_PLATFORM_BUS].base;
+ sdev.irq = platform_bus_get_irqn(pbus, sbdev, 0);
+ sdev.irq += vms->irqmap[VIRT_PLATFORM_BUS];
+ sdev.irq += ARM_SPI_BASE;
+
+ pci_bus_range(bus, &min_bus, &max_bus);
+ sdev.rc_smmu_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
+ idmap.input_base = min_bus << 8,
+ idmap.id_count = (max_bus - min_bus + 1) << 8,
+ g_array_append_val(sdev.rc_smmu_idmaps, idmap);
+ g_array_append_val(sdev_blob, sdev);
+ return 0;
+}
- object_child_foreach_recursive(object_get_root(),
- iort_host_bridges, smmu_idmaps);
+/*
+ * Populate the struct AcpiIortSMMUv3Dev for all SMMUv3 devices and
+ * return the total number of idmaps.
+ */
+static int populate_smmuv3_dev(GArray *sdev_blob)
+{
+ object_child_foreach_recursive(object_get_root(),
+ iort_smmuv3_devices, sdev_blob);
+ /* Sort the smmuv3 devices(if any) by smmu idmap input_base */
+ g_array_sort(sdev_blob, smmuv3_dev_idmap_compare);
+ /*
+ * Since each SMMUv3 dev is assocaited with specific host bridge,
+ * total number of idmaps equals to total number of smmuv3 devices.
+ */
+ return sdev_blob->len;
+}
- /* Sort the smmu idmap by input_base */
- g_array_sort(smmu_idmaps, iort_idmap_compare);
+/* Compute ID ranges (RIDs) from RC that are directed to the ITS Group node */
+static void create_rc_its_idmaps(GArray *its_idmaps, GArray *smmuv3_devs)
+{
+ AcpiIortIdMapping *idmap;
+ AcpiIortIdMapping next_range = {0};
+ AcpiIortSMMUv3Dev *sdev;
+ for (int i = 0; i < smmuv3_devs->len; i++) {
+ sdev = &g_array_index(smmuv3_devs, AcpiIortSMMUv3Dev, i);
/*
- * Split the whole RIDs by mapping from RC to SMMU,
- * build the ID mapping from RC to ITS directly.
+ * Based on the RID ranges that are directed to the SMMU, determine the
+ * bypassed RID ranges, i.e., the ones that are directed to the ITS
+ * Group node and do not pass through the SMMU, by subtracting the
+ * SMMU-bound ranges from the full RID range (0x0000–0xFFFF).
*/
- for (i = 0; i < smmu_idmaps->len; i++) {
- idmap = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i);
+ for (int j = 0; j < sdev->rc_smmu_idmaps->len; j++) {
+ idmap = &g_array_index(sdev->rc_smmu_idmaps, AcpiIortIdMapping, j);
if (next_range.input_base < idmap->input_base) {
next_range.id_count = idmap->input_base - next_range.input_base;
@@ -309,18 +425,71 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
next_range.input_base = idmap->input_base + idmap->id_count;
}
+ }
+ /*
+ * Append the last RC -> ITS ID mapping.
+ *
+ * RIDs are 16-bit, according to the PCI Express 2.0 Base Specification, rev
+ * 0.9, section 2.2.6.2, "Transaction Descriptor - Transaction ID Field",
+ * hence the end of the range is 0x10000.
+ */
+ if (next_range.input_base < 0x10000) {
+ next_range.id_count = 0x10000 - next_range.input_base;
+ g_array_append_val(its_idmaps, next_range);
+ }
+}
- /* Append the last RC -> ITS ID mapping */
- if (next_range.input_base < 0x10000) {
- next_range.id_count = 0x10000 - next_range.input_base;
- g_array_append_val(its_idmaps, next_range);
- }
+/*
+ * Input Output Remapping Table (IORT)
+ * Conforms to "IO Remapping Table System Software on ARM Platforms",
+ * Document number: ARM DEN 0049E.b, Feb 2021
+ */
+static void
+build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
+{
+ int i, nb_nodes, rc_mapping_count;
+ AcpiIortSMMUv3Dev *sdev;
+ size_t node_size;
+ int num_smmus = 0;
+ uint32_t id = 0;
+ int rc_smmu_idmaps_len = 0;
+ GArray *smmuv3_devs = g_array_new(false, true, sizeof(AcpiIortSMMUv3Dev));
+ GArray *rc_its_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
- nb_nodes = 3; /* RC, ITS, SMMUv3 */
- rc_mapping_count = smmu_idmaps->len + its_idmaps->len;
+ AcpiTable table = { .sig = "IORT", .rev = 3, .oem_id = vms->oem_id,
+ .oem_table_id = vms->oem_table_id };
+ /* Table 2 The IORT */
+ acpi_table_begin(&table, table_data);
+
+ if (vms->legacy_smmuv3_present) {
+ rc_smmu_idmaps_len = populate_smmuv3_legacy_dev(smmuv3_devs);
+ } else {
+ rc_smmu_idmaps_len = populate_smmuv3_dev(smmuv3_devs);
+ }
+
+ num_smmus = smmuv3_devs->len;
+ if (num_smmus) {
+ nb_nodes = num_smmus + 1; /* RC and SMMUv3 */
+ rc_mapping_count = rc_smmu_idmaps_len;
+
+ if (vms->its) {
+ /*
+ * Knowing the ID ranges from the RC to the SMMU, it's possible to
+ * determine the ID ranges from RC that go directly to ITS.
+ */
+ create_rc_its_idmaps(rc_its_idmaps, smmuv3_devs);
+
+ nb_nodes++; /* ITS */
+ rc_mapping_count += rc_its_idmaps->len;
+ }
} else {
- nb_nodes = 2; /* RC, ITS */
- rc_mapping_count = 1;
+ if (vms->its) {
+ nb_nodes = 2; /* RC and ITS */
+ rc_mapping_count = 1; /* Direct map to ITS */
+ } else {
+ nb_nodes = 1; /* RC only */
+ rc_mapping_count = 0; /* No output mapping */
+ }
}
/* Number of IORT Nodes */
build_append_int_noprefix(table_data, nb_nodes, 4);
@@ -329,33 +498,46 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
build_append_int_noprefix(table_data, IORT_NODE_OFFSET, 4);
build_append_int_noprefix(table_data, 0, 4); /* Reserved */
- /* Table 12 ITS Group Format */
- build_append_int_noprefix(table_data, 0 /* ITS Group */, 1); /* Type */
- node_size = 20 /* fixed header size */ + 4 /* 1 GIC ITS Identifier */;
- build_append_int_noprefix(table_data, node_size, 2); /* Length */
- build_append_int_noprefix(table_data, 1, 1); /* Revision */
- build_append_int_noprefix(table_data, id++, 4); /* Identifier */
- build_append_int_noprefix(table_data, 0, 4); /* Number of ID mappings */
- build_append_int_noprefix(table_data, 0, 4); /* Reference to ID Array */
- build_append_int_noprefix(table_data, 1, 4); /* Number of ITSs */
- /* GIC ITS Identifier Array */
- build_append_int_noprefix(table_data, 0 /* MADT translation_id */, 4);
-
- if (vms->iommu == VIRT_IOMMU_SMMUV3) {
- int irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE;
+ if (vms->its) {
+ /* Table 12 ITS Group Format */
+ build_append_int_noprefix(table_data, 0 /* ITS Group */, 1); /* Type */
+ node_size = 20 /* fixed header size */ + 4 /* 1 GIC ITS Identifier */;
+ build_append_int_noprefix(table_data, node_size, 2); /* Length */
+ build_append_int_noprefix(table_data, 1, 1); /* Revision */
+ build_append_int_noprefix(table_data, id++, 4); /* Identifier */
+ build_append_int_noprefix(table_data, 0, 4); /* Number of ID mappings */
+ build_append_int_noprefix(table_data, 0, 4); /* Reference to ID Array */
+ build_append_int_noprefix(table_data, 1, 4); /* Number of ITSs */
+ /* GIC ITS Identifier Array */
+ build_append_int_noprefix(table_data, 0 /* MADT translation_id */, 4);
+ }
- smmu_offset = table_data->len - table.table_offset;
+ for (i = 0; i < num_smmus; i++) {
+ sdev = &g_array_index(smmuv3_devs, AcpiIortSMMUv3Dev, i);
+ int smmu_mapping_count, offset_to_id_array;
+ int irq = sdev->irq;
+
+ if (vms->its) {
+ smmu_mapping_count = 1; /* ITS Group node */
+ offset_to_id_array = SMMU_V3_ENTRY_SIZE; /* Just after the header */
+ } else {
+ smmu_mapping_count = 0; /* No ID mappings */
+ offset_to_id_array = 0; /* No ID mappings array */
+ }
+ sdev->offset = table_data->len - table.table_offset;
/* Table 9 SMMUv3 Format */
build_append_int_noprefix(table_data, 4 /* SMMUv3 */, 1); /* Type */
- node_size = SMMU_V3_ENTRY_SIZE + ID_MAPPING_ENTRY_SIZE;
+ node_size = SMMU_V3_ENTRY_SIZE +
+ (ID_MAPPING_ENTRY_SIZE * smmu_mapping_count);
build_append_int_noprefix(table_data, node_size, 2); /* Length */
build_append_int_noprefix(table_data, 4, 1); /* Revision */
build_append_int_noprefix(table_data, id++, 4); /* Identifier */
- build_append_int_noprefix(table_data, 1, 4); /* Number of ID mappings */
+ /* Number of ID mappings */
+ build_append_int_noprefix(table_data, smmu_mapping_count, 4);
/* Reference to ID Array */
- build_append_int_noprefix(table_data, SMMU_V3_ENTRY_SIZE, 4);
+ build_append_int_noprefix(table_data, offset_to_id_array, 4);
/* Base address */
- build_append_int_noprefix(table_data, vms->memmap[VIRT_SMMU].base, 8);
+ build_append_int_noprefix(table_data, sdev->base, 8);
/* Flags */
build_append_int_noprefix(table_data, 1 /* COHACC Override */, 4);
build_append_int_noprefix(table_data, 0, 4); /* Reserved */
@@ -369,9 +551,11 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
build_append_int_noprefix(table_data, 0, 4); /* Proximity domain */
/* DeviceID mapping index (ignored since interrupts are GSIV based) */
build_append_int_noprefix(table_data, 0, 4);
-
- /* output IORT node is the ITS group node (the first node) */
- build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET);
+ /* Array of ID mappings */
+ if (smmu_mapping_count) {
+ /* Output IORT node is the ITS Group node (the first node). */
+ build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET);
+ }
}
/* Table 17 Root Complex Node */
@@ -404,32 +588,56 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
build_append_int_noprefix(table_data, 0, 3); /* Reserved */
/* Output Reference */
- if (vms->iommu == VIRT_IOMMU_SMMUV3) {
+ if (num_smmus) {
AcpiIortIdMapping *range;
- /* translated RIDs connect to SMMUv3 node: RC -> SMMUv3 -> ITS */
- for (i = 0; i < smmu_idmaps->len; i++) {
- range = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i);
- /* output IORT node is the smmuv3 node */
- build_iort_id_mapping(table_data, range->input_base,
- range->id_count, smmu_offset);
+ for (i = 0; i < num_smmus; i++) {
+ sdev = &g_array_index(smmuv3_devs, AcpiIortSMMUv3Dev, i);
+
+ /*
+ * Map RIDs (input) from RC to SMMUv3 nodes: RC -> SMMUv3.
+ *
+ * N.B.: The mapping from SMMUv3 to ITS Group node (SMMUv3 -> ITS)
+ * is defined in the SMMUv3 table, where all SMMUv3 IDs are mapped
+ * to the ITS Group node, if ITS is available.
+ */
+ for (int j = 0; j < sdev->rc_smmu_idmaps->len; j++) {
+ range = &g_array_index(sdev->rc_smmu_idmaps,
+ AcpiIortIdMapping, j);
+ /* Output IORT node is the SMMUv3 node. */
+ build_iort_id_mapping(table_data, range->input_base,
+ range->id_count, sdev->offset);
+ }
}
- /* bypassed RIDs connect to ITS group node directly: RC -> ITS */
- for (i = 0; i < its_idmaps->len; i++) {
- range = &g_array_index(its_idmaps, AcpiIortIdMapping, i);
- /* output IORT node is the ITS group node (the first node) */
- build_iort_id_mapping(table_data, range->input_base,
- range->id_count, IORT_NODE_OFFSET);
+ if (vms->its) {
+ /*
+ * Map bypassed (don't go through the SMMU) RIDs (input) to
+ * ITS Group node directly: RC -> ITS.
+ */
+ for (i = 0; i < rc_its_idmaps->len; i++) {
+ range = &g_array_index(rc_its_idmaps, AcpiIortIdMapping, i);
+ /* Output IORT node is the ITS Group node (the first node). */
+ build_iort_id_mapping(table_data, range->input_base,
+ range->id_count, IORT_NODE_OFFSET);
+ }
}
} else {
- /* output IORT node is the ITS group node (the first node) */
+ /*
+ * Map all RIDs (input) to ITS Group node directly, since there is no
+ * SMMU: RC -> ITS.
+ * Output IORT node is the ITS Group node (the first node).
+ */
build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET);
}
acpi_table_end(linker, &table);
- g_array_free(smmu_idmaps, true);
- g_array_free(its_idmaps, true);
+ g_array_free(rc_its_idmaps, true);
+ for (i = 0; i < num_smmus; i++) {
+ sdev = &g_array_index(smmuv3_devs, AcpiIortSMMUv3Dev, i);
+ g_array_free(sdev->rc_smmu_idmaps, true);
+ }
+ g_array_free(smmuv3_devs, true);
}
/*
@@ -737,7 +945,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
memmap[VIRT_HIGH_GIC_REDIST2].size);
}
- if (its_class_name()) {
+ if (vms->its) {
/*
* ACPI spec, Revision 6.0 Errata A
* (original 6.0 definition has invalid Length)
@@ -810,6 +1018,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
const int *irqmap = vms->irqmap;
AcpiTable table = { .sig = "DSDT", .rev = 2, .oem_id = vms->oem_id,
.oem_table_id = vms->oem_table_id };
+ Aml *pci0_scope;
acpi_table_begin(&table, table_data);
dsdt = init_aml_allocator();
@@ -857,12 +1066,40 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
}
acpi_dsdt_add_power_button(scope);
+ aml_append(scope, aml_error_device());
#ifdef CONFIG_TPM
acpi_dsdt_add_tpm(scope, vms);
#endif
aml_append(dsdt, scope);
+ pci0_scope = aml_scope("\\_SB.PCI0");
+
+ aml_append(pci0_scope, build_pci_bridge_edsm());
+ build_append_pci_bus_devices(pci0_scope, vms->bus);
+ if (object_property_find(OBJECT(vms->bus), ACPI_PCIHP_PROP_BSEL)) {
+ build_append_pcihp_slots(pci0_scope, vms->bus);
+ }
+
+ if (vms->acpi_dev) {
+ bool acpi_pcihp;
+
+ acpi_pcihp = object_property_get_bool(OBJECT(vms->acpi_dev),
+ ACPI_PM_PROP_ACPI_PCIHP_BRIDGE,
+ NULL);
+
+ if (acpi_pcihp) {
+ build_acpi_pci_hotplug(dsdt, AML_SYSTEM_MEMORY,
+ memmap[VIRT_ACPI_PCIHP].base);
+ build_append_pcihp_resources(pci0_scope,
+ memmap[VIRT_ACPI_PCIHP].base,
+ memmap[VIRT_ACPI_PCIHP].size);
+
+ build_append_notification_callback(pci0_scope, vms->bus);
+ }
+ }
+ aml_append(dsdt, pci0_scope);
+
/* copy AML table into ACPI tables blob */
g_array_append_vals(table_data, dsdt->buf->data, dsdt->buf->len);
@@ -889,6 +1126,15 @@ static void acpi_align_size(GArray *blob, unsigned align)
g_array_set_size(blob, ROUND_UP(acpi_data_len(blob), align));
}
+static const AcpiNotificationSourceId hest_ghes_notify[] = {
+ { ACPI_HEST_SRC_ID_SYNC, ACPI_GHES_NOTIFY_SEA },
+ { ACPI_HEST_SRC_ID_QMP, ACPI_GHES_NOTIFY_GPIO },
+};
+
+static const AcpiNotificationSourceId hest_ghes_notify_10_0[] = {
+ { ACPI_HEST_SRC_ID_SYNC, ACPI_GHES_NOTIFY_SEA },
+};
+
static
void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
{
@@ -936,15 +1182,37 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
}
acpi_add_table(table_offsets, tables_blob);
- spcr_setup(tables_blob, tables->linker, vms);
+
+ if (ms->acpi_spcr_enabled) {
+ spcr_setup(tables_blob, tables->linker, vms);
+ }
acpi_add_table(table_offsets, tables_blob);
build_dbg2(tables_blob, tables->linker, vms);
if (vms->ras) {
- acpi_add_table(table_offsets, tables_blob);
- acpi_build_hest(tables_blob, tables->hardware_errors, tables->linker,
- vms->oem_id, vms->oem_table_id);
+ AcpiGedState *acpi_ged_state;
+ static const AcpiNotificationSourceId *notify;
+ unsigned int notify_sz;
+ AcpiGhesState *ags;
+
+ acpi_ged_state = ACPI_GED(vms->acpi_dev);
+ ags = &acpi_ged_state->ghes_state;
+ if (ags) {
+ acpi_add_table(table_offsets, tables_blob);
+
+ if (!ags->use_hest_addr) {
+ notify = hest_ghes_notify_10_0;
+ notify_sz = ARRAY_SIZE(hest_ghes_notify_10_0);
+ } else {
+ notify = hest_ghes_notify;
+ notify_sz = ARRAY_SIZE(hest_ghes_notify);
+ }
+
+ acpi_build_hest(ags, tables_blob, tables->hardware_errors,
+ tables->linker, notify, notify_sz,
+ vms->oem_id, vms->oem_table_id);
+ }
}
if (ms->numa_state->num_nodes > 0) {
@@ -963,16 +1231,19 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
}
}
+ if (vms->cxl_devices_state.is_enabled) {
+ cxl_build_cedt(table_offsets, tables_blob, tables->linker,
+ vms->oem_id, vms->oem_table_id, &vms->cxl_devices_state);
+ }
+
if (ms->nvdimms_state->is_enabled) {
nvdimm_build_acpi(table_offsets, tables_blob, tables->linker,
ms->nvdimms_state, ms->ram_slots, vms->oem_id,
vms->oem_table_id);
}
- if (its_class_name()) {
- acpi_add_table(table_offsets, tables_blob);
- build_iort(tables_blob, tables->linker, vms);
- }
+ acpi_add_table(table_offsets, tables_blob);
+ build_iort(tables_blob, tables->linker, vms);
#ifdef CONFIG_TPM
if (tpm_get_version(tpm_find()) == TPM_VERSION_2_0) {
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 9a6cd08..1750238 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -38,8 +38,6 @@
#include "hw/arm/primecell.h"
#include "hw/arm/virt.h"
#include "hw/block/flash.h"
-#include "hw/vfio/vfio-calxeda-xgmac.h"
-#include "hw/vfio/vfio-amd-xgbe.h"
#include "hw/display/ramfb.h"
#include "net/net.h"
#include "system/device_tree.h"
@@ -50,13 +48,16 @@
#include "system/kvm.h"
#include "system/hvf.h"
#include "system/qtest.h"
+#include "system/system.h"
#include "hw/loader.h"
#include "qapi/error.h"
#include "qemu/bitops.h"
#include "qemu/cutils.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
+#include "hw/pci/pci_bus.h"
#include "hw/pci-host/gpex.h"
+#include "hw/pci-bridge/pci_expander_bridge.h"
#include "hw/virtio/virtio-pci.h"
#include "hw/core/sysbus-fdt.h"
#include "hw/platform-bus.h"
@@ -75,6 +76,7 @@
#include "standard-headers/linux/input.h"
#include "hw/arm/smmuv3.h"
#include "hw/acpi/acpi.h"
+#include "hw/acpi/pcihp.h"
#include "target/arm/cpu-qom.h"
#include "target/arm/internals.h"
#include "target/arm/multiprocessing.h"
@@ -86,6 +88,8 @@
#include "hw/virtio/virtio-md-pci.h"
#include "hw/virtio/virtio-iommu.h"
#include "hw/char/pl011.h"
+#include "hw/cxl/cxl.h"
+#include "hw/cxl/cxl_host.h"
#include "qemu/guest-random.h"
static GlobalProperty arm_virt_compat[] = {
@@ -146,6 +150,9 @@ static void arm_virt_compat_set(MachineClass *mc)
#define LEGACY_RAMLIMIT_GB 255
#define LEGACY_RAMLIMIT_BYTES (LEGACY_RAMLIMIT_GB * GiB)
+/* MMIO region size for SMMUv3 */
+#define SMMU_IO_LEN 0x20000
+
/* Addresses and sizes of our components.
* 0..128MB is space for a flash device so we can run bootrom code such as UEFI.
* 128MB..256MB is used for miscellaneous device I/O.
@@ -177,12 +184,13 @@ static const MemMapEntry base_memmap[] = {
[VIRT_FW_CFG] = { 0x09020000, 0x00000018 },
[VIRT_GPIO] = { 0x09030000, 0x00001000 },
[VIRT_UART1] = { 0x09040000, 0x00001000 },
- [VIRT_SMMU] = { 0x09050000, 0x00020000 },
+ [VIRT_SMMU] = { 0x09050000, SMMU_IO_LEN },
[VIRT_PCDIMM_ACPI] = { 0x09070000, MEMORY_HOTPLUG_IO_LEN },
[VIRT_ACPI_GED] = { 0x09080000, ACPI_GED_EVT_SEL_LEN },
[VIRT_NVDIMM_ACPI] = { 0x09090000, NVDIMM_ACPI_IO_LEN},
[VIRT_PVTIME] = { 0x090a0000, 0x00010000 },
[VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 },
+ [VIRT_ACPI_PCIHP] = { 0x090c0000, ACPI_PCIHP_SIZE },
[VIRT_MMIO] = { 0x0a000000, 0x00000200 },
/* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
[VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 },
@@ -220,9 +228,11 @@ static const MemMapEntry base_memmap[] = {
static MemMapEntry extended_memmap[] = {
/* Additional 64 MB redist region (can contain up to 512 redistributors) */
[VIRT_HIGH_GIC_REDIST2] = { 0x0, 64 * MiB },
+ [VIRT_CXL_HOST] = { 0x0, 64 * KiB * 16 }, /* 16 UID */
[VIRT_HIGH_PCIE_ECAM] = { 0x0, 256 * MiB },
/* Second PCIe window */
[VIRT_HIGH_PCIE_MMIO] = { 0x0, DEFAULT_HIGH_PCIE_MMIO_SIZE },
+ /* Any CXL Fixed memory windows come here */
};
static const int a15irqmap[] = {
@@ -681,8 +691,10 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms)
{
DeviceState *dev;
MachineState *ms = MACHINE(vms);
+ SysBusDevice *sbdev;
int irq = vms->irqmap[VIRT_ACPI_GED];
- uint32_t event = ACPI_GED_PWR_DOWN_EVT;
+ uint32_t event = ACPI_GED_PWR_DOWN_EVT | ACPI_GED_ERROR_EVT;
+ bool acpi_pcihp;
if (ms->ram_slots) {
event |= ACPI_GED_MEM_HOTPLUG_EVT;
@@ -694,32 +706,44 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms)
dev = qdev_new(TYPE_ACPI_GED);
qdev_prop_set_uint32(dev, "ged-event", event);
- sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+ object_property_set_link(OBJECT(dev), "bus", OBJECT(vms->bus), &error_abort);
+ sbdev = SYS_BUS_DEVICE(dev);
+ sysbus_realize_and_unref(sbdev, &error_fatal);
+
+ sysbus_mmio_map_name(sbdev, TYPE_ACPI_GED, vms->memmap[VIRT_ACPI_GED].base);
+ sysbus_mmio_map_name(sbdev, ACPI_MEMHP_REGION_NAME,
+ vms->memmap[VIRT_PCDIMM_ACPI].base);
+
+ acpi_pcihp = object_property_get_bool(OBJECT(dev),
+ ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, NULL);
- sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_ACPI_GED].base);
- sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, vms->memmap[VIRT_PCDIMM_ACPI].base);
- sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(vms->gic, irq));
+ if (acpi_pcihp) {
+ int pcihp_region_index;
+
+ pcihp_region_index = sysbus_mmio_map_name(sbdev, ACPI_PCIHP_REGION_NAME,
+ vms->memmap[VIRT_ACPI_PCIHP].base);
+ assert(pcihp_region_index >= 0);
+ }
+
+ sysbus_connect_irq(sbdev, 0, qdev_get_gpio_in(vms->gic, irq));
return dev;
}
static void create_its(VirtMachineState *vms)
{
- const char *itsclass = its_class_name();
DeviceState *dev;
- if (!strcmp(itsclass, "arm-gicv3-its")) {
- if (!vms->tcg_its) {
- itsclass = NULL;
- }
- }
-
- if (!itsclass) {
- /* Do nothing if not supported */
+ assert(vms->its);
+ if (!kvm_irqchip_in_kernel() && !vms->tcg_its) {
+ /*
+ * Do nothing if ITS is neither supported by the host nor emulated by
+ * the machine.
+ */
return;
}
- dev = qdev_new(itsclass);
+ dev = qdev_new(its_class_name());
object_property_set_link(OBJECT(dev), "parent-gicv3", OBJECT(vms->gic),
&error_abort);
@@ -795,6 +819,13 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem)
default:
g_assert_not_reached();
}
+
+ if (kvm_enabled() && vms->virt &&
+ (revision != 3 || !kvm_irqchip_in_kernel())) {
+ error_report("KVM EL2 is only supported with in-kernel GICv3");
+ exit(1);
+ }
+
vms->gic = qdev_new(gictype);
qdev_prop_set_uint32(vms->gic, "revision", revision);
qdev_prop_set_uint32(vms->gic, "num-cpu", smp_cpus);
@@ -831,6 +862,9 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem)
OBJECT(mem), &error_fatal);
qdev_prop_set_bit(vms->gic, "has-lpi", true);
}
+ } else if (vms->virt) {
+ qdev_prop_set_uint32(vms->gic, "maintenance-interrupt-id",
+ ARCH_GIC_MAINT_IRQ);
}
} else {
if (!kvm_irqchip_in_kernel()) {
@@ -1016,6 +1050,20 @@ static void virt_powerdown_req(Notifier *n, void *opaque)
}
}
+static void virt_generic_error_req(Notifier *n, void *opaque)
+{
+ uint16_t *source_id = opaque;
+
+ /* Currently, only QMP source ID is async */
+ if (*source_id != ACPI_HEST_SRC_ID_QMP) {
+ return;
+ }
+
+ VirtMachineState *s = container_of(n, VirtMachineState, generic_error_notifier);
+
+ acpi_send_event(s->acpi_dev, ACPI_GENERIC_ERROR);
+}
+
static void create_gpio_keys(char *fdt, DeviceState *pl061_dev,
uint32_t phandle)
{
@@ -1412,19 +1460,66 @@ static void create_pcie_irq_map(const MachineState *ms,
0x7 /* PCI irq */);
}
+static void create_smmuv3_dt_bindings(const VirtMachineState *vms, hwaddr base,
+ hwaddr size, int irq)
+{
+ char *node;
+ const char compat[] = "arm,smmu-v3";
+ const char irq_names[] = "eventq\0priq\0cmdq-sync\0gerror";
+ MachineState *ms = MACHINE(vms);
+
+ node = g_strdup_printf("/smmuv3@%" PRIx64, base);
+ qemu_fdt_add_subnode(ms->fdt, node);
+ qemu_fdt_setprop(ms->fdt, node, "compatible", compat, sizeof(compat));
+ qemu_fdt_setprop_sized_cells(ms->fdt, node, "reg", 2, base, 2, size);
+
+ qemu_fdt_setprop_cells(ms->fdt, node, "interrupts",
+ GIC_FDT_IRQ_TYPE_SPI, irq , GIC_FDT_IRQ_FLAGS_EDGE_LO_HI,
+ GIC_FDT_IRQ_TYPE_SPI, irq + 1, GIC_FDT_IRQ_FLAGS_EDGE_LO_HI,
+ GIC_FDT_IRQ_TYPE_SPI, irq + 2, GIC_FDT_IRQ_FLAGS_EDGE_LO_HI,
+ GIC_FDT_IRQ_TYPE_SPI, irq + 3, GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
+
+ qemu_fdt_setprop(ms->fdt, node, "interrupt-names", irq_names,
+ sizeof(irq_names));
+
+ qemu_fdt_setprop(ms->fdt, node, "dma-coherent", NULL, 0);
+ qemu_fdt_setprop_cell(ms->fdt, node, "#iommu-cells", 1);
+ qemu_fdt_setprop_cell(ms->fdt, node, "phandle", vms->iommu_phandle);
+ g_free(node);
+}
+
+static void create_smmuv3_dev_dtb(VirtMachineState *vms,
+ DeviceState *dev, PCIBus *bus)
+{
+ PlatformBusDevice *pbus = PLATFORM_BUS_DEVICE(vms->platform_bus_dev);
+ SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
+ int irq = platform_bus_get_irqn(pbus, sbdev, 0);
+ hwaddr base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
+ MachineState *ms = MACHINE(vms);
+
+ if (!(vms->bootinfo.firmware_loaded && virt_is_acpi_enabled(vms)) &&
+ strcmp("pcie.0", bus->qbus.name)) {
+ warn_report("SMMUv3 device only supported with pcie.0 for DT");
+ return;
+ }
+ base += vms->memmap[VIRT_PLATFORM_BUS].base;
+ irq += vms->irqmap[VIRT_PLATFORM_BUS];
+
+ vms->iommu_phandle = qemu_fdt_alloc_phandle(ms->fdt);
+ create_smmuv3_dt_bindings(vms, base, SMMU_IO_LEN, irq);
+ qemu_fdt_setprop_cells(ms->fdt, vms->pciehb_nodename, "iommu-map",
+ 0x0, vms->iommu_phandle, 0x0, 0x10000);
+}
+
static void create_smmu(const VirtMachineState *vms,
PCIBus *bus)
{
VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms);
- char *node;
- const char compat[] = "arm,smmu-v3";
int irq = vms->irqmap[VIRT_SMMU];
int i;
hwaddr base = vms->memmap[VIRT_SMMU].base;
hwaddr size = vms->memmap[VIRT_SMMU].size;
- const char irq_names[] = "eventq\0priq\0cmdq-sync\0gerror";
DeviceState *dev;
- MachineState *ms = MACHINE(vms);
if (vms->iommu != VIRT_IOMMU_SMMUV3 || !vms->iommu_phandle) {
return;
@@ -1443,27 +1538,7 @@ static void create_smmu(const VirtMachineState *vms,
sysbus_connect_irq(SYS_BUS_DEVICE(dev), i,
qdev_get_gpio_in(vms->gic, irq + i));
}
-
- node = g_strdup_printf("/smmuv3@%" PRIx64, base);
- qemu_fdt_add_subnode(ms->fdt, node);
- qemu_fdt_setprop(ms->fdt, node, "compatible", compat, sizeof(compat));
- qemu_fdt_setprop_sized_cells(ms->fdt, node, "reg", 2, base, 2, size);
-
- qemu_fdt_setprop_cells(ms->fdt, node, "interrupts",
- GIC_FDT_IRQ_TYPE_SPI, irq , GIC_FDT_IRQ_FLAGS_EDGE_LO_HI,
- GIC_FDT_IRQ_TYPE_SPI, irq + 1, GIC_FDT_IRQ_FLAGS_EDGE_LO_HI,
- GIC_FDT_IRQ_TYPE_SPI, irq + 2, GIC_FDT_IRQ_FLAGS_EDGE_LO_HI,
- GIC_FDT_IRQ_TYPE_SPI, irq + 3, GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
-
- qemu_fdt_setprop(ms->fdt, node, "interrupt-names", irq_names,
- sizeof(irq_names));
-
- qemu_fdt_setprop(ms->fdt, node, "dma-coherent", NULL, 0);
-
- qemu_fdt_setprop_cell(ms->fdt, node, "#iommu-cells", 1);
-
- qemu_fdt_setprop_cell(ms->fdt, node, "phandle", vms->iommu_phandle);
- g_free(node);
+ create_smmuv3_dt_bindings(vms, base, size, irq);
}
static void create_virtio_iommu_dt_bindings(VirtMachineState *vms)
@@ -1487,9 +1562,12 @@ static void create_virtio_iommu_dt_bindings(VirtMachineState *vms)
qemu_fdt_setprop_cell(ms->fdt, node, "phandle", vms->iommu_phandle);
g_free(node);
- qemu_fdt_setprop_cells(ms->fdt, vms->pciehb_nodename, "iommu-map",
- 0x0, vms->iommu_phandle, 0x0, bdf,
- bdf + 1, vms->iommu_phandle, bdf + 1, 0xffff - bdf);
+ if (!vms->default_bus_bypass_iommu) {
+ qemu_fdt_setprop_cells(ms->fdt, vms->pciehb_nodename, "iommu-map",
+ 0x0, vms->iommu_phandle, 0x0, bdf,
+ bdf + 1, vms->iommu_phandle, bdf + 1,
+ 0xffff - bdf);
+ }
}
static void create_pcie(VirtMachineState *vms)
@@ -1612,8 +1690,11 @@ static void create_pcie(VirtMachineState *vms)
switch (vms->iommu) {
case VIRT_IOMMU_SMMUV3:
create_smmu(vms, vms->bus);
- qemu_fdt_setprop_cells(ms->fdt, nodename, "iommu-map",
- 0x0, vms->iommu_phandle, 0x0, 0x10000);
+ if (!vms->default_bus_bypass_iommu) {
+ qemu_fdt_setprop_cells(ms->fdt, nodename, "iommu-map",
+ 0x0, vms->iommu_phandle, 0x0, 0x10000);
+ }
+ vms->legacy_smmuv3_present = true;
break;
default:
g_assert_not_reached();
@@ -1621,6 +1702,17 @@ static void create_pcie(VirtMachineState *vms)
}
}
+static void create_cxl_host_reg_region(VirtMachineState *vms)
+{
+ MemoryRegion *sysmem = get_system_memory();
+ MemoryRegion *mr = &vms->cxl_devices_state.host_mr;
+
+ memory_region_init(mr, OBJECT(vms), "cxl_host_reg",
+ vms->memmap[VIRT_CXL_HOST].size);
+ memory_region_add_subregion(sysmem, vms->memmap[VIRT_CXL_HOST].base, mr);
+ vms->highmem_cxl = true;
+}
+
static void create_platform_bus(VirtMachineState *vms)
{
DeviceState *dev;
@@ -1737,6 +1829,12 @@ void virt_machine_done(Notifier *notifier, void *data)
struct arm_boot_info *info = &vms->bootinfo;
AddressSpace *as = arm_boot_address_space(cpu, info);
+ cxl_hook_up_pxb_registers(vms->bus, &vms->cxl_devices_state,
+ &error_fatal);
+
+ if (vms->cxl_devices_state.is_enabled) {
+ cxl_fmws_link_targets(&error_fatal);
+ }
/*
* If the user provided a dtb, we assume the dynamic sysbus nodes
* already are integrated there. This corresponds to a use case where
@@ -1783,6 +1881,7 @@ static inline bool *virt_get_high_memmap_enabled(VirtMachineState *vms,
{
bool *enabled_array[] = {
&vms->highmem_redists,
+ &vms->highmem_cxl,
&vms->highmem_ecam,
&vms->highmem_mmio,
};
@@ -1890,6 +1989,9 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
if (device_memory_size > 0) {
machine_memory_devices_init(ms, device_memory_base, device_memory_size);
}
+ vms->highest_gpa = cxl_fmws_set_memmap(ROUND_UP(vms->highest_gpa + 1,
+ 256 * MiB),
+ BIT_ULL(pa_bits)) - 1;
}
static VirtGICType finalize_gic_version_do(const char *accel_name,
@@ -2024,10 +2126,11 @@ static void finalize_gic_version(VirtMachineState *vms)
}
/*
- * virt_cpu_post_init() must be called after the CPUs have
- * been realized and the GIC has been created.
+ * virt_post_cpus_gic_realized() must be called after the CPUs and
+ * the GIC have both been realized.
*/
-static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem)
+static void virt_post_cpus_gic_realized(VirtMachineState *vms,
+ MemoryRegion *sysmem)
{
int max_cpus = MACHINE(vms)->smp.max_cpus;
bool aarch64, pmu, steal_time;
@@ -2060,6 +2163,10 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem)
memory_region_init_ram(pvtime, NULL, "pvtime", pvtime_size, NULL);
memory_region_add_subregion(sysmem, pvtime_reg_base, pvtime);
}
+ if (!aarch64 && vms->virt) {
+ error_report("KVM does not support EL2 on an AArch32 vCPU");
+ exit(1);
+ }
CPU_FOREACH(cpu) {
if (pmu) {
@@ -2198,14 +2305,20 @@ static void machvirt_init(MachineState *machine)
exit(1);
}
- if (vms->secure && (kvm_enabled() || hvf_enabled())) {
+ if (vms->secure && !tcg_enabled() && !qtest_enabled()) {
error_report("mach-virt: %s does not support providing "
"Security extensions (TrustZone) to the guest CPU",
current_accel_name());
exit(1);
}
- if (vms->virt && (kvm_enabled() || hvf_enabled())) {
+ if (vms->virt && kvm_enabled() && !kvm_arm_el2_supported()) {
+ error_report("mach-virt: host kernel KVM does not support providing "
+ "Virtualization extensions to the guest CPU");
+ exit(1);
+ }
+
+ if (vms->virt && !kvm_enabled() && !tcg_enabled() && !qtest_enabled()) {
error_report("mach-virt: %s does not support providing "
"Virtualization extensions to the guest CPU",
current_accel_name());
@@ -2340,11 +2453,13 @@ static void machvirt_init(MachineState *machine)
memory_region_add_subregion(sysmem, vms->memmap[VIRT_MEM].base,
machine->ram);
+ cxl_fmws_update_mmio();
+
virt_flash_fdt(vms, sysmem, secure_sysmem ?: sysmem);
create_gic(vms, sysmem);
- virt_cpu_post_init(vms, sysmem);
+ virt_post_cpus_gic_realized(vms, sysmem);
fdt_add_pmu_nodes(vms);
@@ -2395,9 +2510,13 @@ static void machvirt_init(MachineState *machine)
create_rtc(vms);
create_pcie(vms);
+ create_cxl_host_reg_region(vms);
if (has_ged && aarch64 && firmware_loaded && virt_is_acpi_enabled(vms)) {
vms->acpi_dev = create_acpi_ged(vms);
+ vms->generic_error_notifier.notify = virt_generic_error_req;
+ notifier_list_add(&acpi_generic_error_notifiers,
+ &vms->generic_error_notifier);
} else {
create_gpio_devices(vms, VIRT_GPIO, sysmem);
}
@@ -2846,7 +2965,7 @@ static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
const MachineState *ms = MACHINE(hotplug_dev);
const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
- if (!vms->acpi_dev) {
+ if (!vms->acpi_dev && !(is_nvdimm && !dev->hotplugged)) {
error_setg(errp,
"memory hotplug is not enabled: missing acpi-ged device");
return;
@@ -2878,8 +2997,10 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev,
nvdimm_plug(ms->nvdimms_state);
}
- hotplug_handler_plug(HOTPLUG_HANDLER(vms->acpi_dev),
- dev, &error_abort);
+ if (vms->acpi_dev) {
+ hotplug_handler_plug(HOTPLUG_HANDLER(vms->acpi_dev),
+ dev, &error_abort);
+ }
}
static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
@@ -2924,6 +3045,16 @@ static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
qlist_append_str(reserved_regions, resv_prop_str);
qdev_prop_set_array(dev, "reserved-regions", reserved_regions);
g_free(resv_prop_str);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_ARM_SMMUV3)) {
+ if (vms->legacy_smmuv3_present || vms->iommu == VIRT_IOMMU_VIRTIO) {
+ error_setg(errp, "virt machine already has %s set. "
+ "Doesn't support incompatible iommus",
+ (vms->legacy_smmuv3_present) ?
+ "iommu=smmuv3" : "virtio-iommu");
+ } else if (vms->iommu == VIRT_IOMMU_NONE) {
+ /* The new SMMUv3 device is specific to the PCI bus */
+ object_property_set_bool(OBJECT(dev), "smmu_per_bus", true, NULL);
+ }
}
}
@@ -2947,6 +3078,22 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev,
virtio_md_pci_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp);
}
+ if (object_dynamic_cast(OBJECT(dev), TYPE_ARM_SMMUV3)) {
+ if (!vms->legacy_smmuv3_present && vms->platform_bus_dev) {
+ PCIBus *bus;
+
+ bus = PCI_BUS(object_property_get_link(OBJECT(dev), "primary-bus",
+ &error_abort));
+ if (pci_bus_bypass_iommu(bus)) {
+ error_setg(errp, "Bypass option cannot be set for SMMUv3 "
+ "associated PCIe RC");
+ return;
+ }
+
+ create_smmuv3_dev_dtb(vms, dev, bus);
+ }
+ }
+
if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) {
PCIDevice *pdev = PCI_DEVICE(dev);
@@ -3144,11 +3291,9 @@ static void virt_machine_class_init(ObjectClass *oc, const void *data)
* configuration of the particular instance.
*/
mc->max_cpus = 512;
- machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_CALXEDA_XGMAC);
- machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE);
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
- machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_PLATFORM);
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_UEFI_VARS_SYSBUS);
+ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_ARM_SMMUV3);
#ifdef CONFIG_TPM
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
#endif
@@ -3337,12 +3482,8 @@ static void virt_instance_init(Object *obj)
/* Default allows ITS instantiation */
vms->its = true;
-
- if (vmc->no_tcg_its) {
- vms->tcg_its = false;
- } else {
- vms->tcg_its = true;
- }
+ /* Allow ITS emulation if the machine version supports it */
+ vms->tcg_its = !vmc->no_tcg_its;
/* Default disallows iommu instantiation */
vms->iommu = VIRT_IOMMU_NONE;
@@ -3365,6 +3506,7 @@ static void virt_instance_init(Object *obj)
vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
+ cxl_machine_init(obj, &vms->cxl_devices_state);
}
static const TypeInfo virt_machine_info = {
@@ -3387,10 +3529,18 @@ static void machvirt_machine_init(void)
}
type_init(machvirt_machine_init);
+static void virt_machine_10_2_options(MachineClass *mc)
+{
+}
+DEFINE_VIRT_MACHINE_AS_LATEST(10, 2)
+
static void virt_machine_10_1_options(MachineClass *mc)
{
+ virt_machine_10_2_options(mc);
+ mc->smbios_memory_device_size = 2047 * TiB;
+ compat_props_add(mc->compat_props, hw_compat_10_1, hw_compat_10_1_len);
}
-DEFINE_VIRT_MACHINE_AS_LATEST(10, 1)
+DEFINE_VIRT_MACHINE(10, 1)
static void virt_machine_10_0_options(MachineClass *mc)
{
diff --git a/hw/arm/xen-pvh.c b/hw/arm/xen-pvh.c
index 4b26bcf..1a9eeb0 100644
--- a/hw/arm/xen-pvh.c
+++ b/hw/arm/xen-pvh.c
@@ -10,7 +10,6 @@
#include "hw/boards.h"
#include "system/system.h"
#include "hw/xen/xen-pvh-common.h"
-#include "hw/xen/arch_hvm.h"
#define TYPE_XEN_ARM MACHINE_TYPE_NAME("xenpvh")
diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c
index adadbb7..149b448 100644
--- a/hw/arm/xlnx-versal-virt.c
+++ b/hw/arm/xlnx-versal-virt.c
@@ -1,7 +1,8 @@
/*
- * Xilinx Versal Virtual board.
+ * AMD/Xilinx Versal family Virtual board.
*
* Copyright (c) 2018 Xilinx Inc.
+ * Copyright (c) 2025 Advanced Micro Devices, Inc.
* Written by Edgar E. Iglesias
*
* This program is free software; you can redistribute it and/or modify
@@ -13,18 +14,22 @@
#include "qemu/error-report.h"
#include "qapi/error.h"
#include "system/device_tree.h"
+#include "system/address-spaces.h"
#include "hw/block/flash.h"
#include "hw/boards.h"
#include "hw/sysbus.h"
#include "hw/arm/fdt.h"
-#include "hw/qdev-properties.h"
#include "hw/arm/xlnx-versal.h"
#include "hw/arm/boot.h"
-#include "target/arm/multiprocessing.h"
#include "qom/object.h"
+#include "target/arm/cpu.h"
-#define TYPE_XLNX_VERSAL_VIRT_MACHINE MACHINE_TYPE_NAME("xlnx-versal-virt")
-OBJECT_DECLARE_SIMPLE_TYPE(VersalVirt, XLNX_VERSAL_VIRT_MACHINE)
+#define TYPE_XLNX_VERSAL_VIRT_BASE_MACHINE \
+ MACHINE_TYPE_NAME("amd-versal-virt-base")
+OBJECT_DECLARE_TYPE(VersalVirt, VersalVirtClass, XLNX_VERSAL_VIRT_BASE_MACHINE)
+
+#define TYPE_XLNX_VERSAL_VIRT_MACHINE MACHINE_TYPE_NAME("amd-versal-virt")
+#define TYPE_XLNX_VERSAL2_VIRT_MACHINE MACHINE_TYPE_NAME("amd-versal2-virt")
#define XLNX_VERSAL_NUM_OSPI_FLASH 4
@@ -35,28 +40,27 @@ struct VersalVirt {
void *fdt;
int fdt_size;
- struct {
- uint32_t gic;
- uint32_t ethernet_phy[2];
- uint32_t clk_125Mhz;
- uint32_t clk_25Mhz;
- uint32_t usb;
- uint32_t dwc;
- uint32_t canfd[2];
- } phandle;
struct arm_boot_info binfo;
- CanBusState *canbus[XLNX_VERSAL_NR_CANFD];
+ CanBusState **canbus;
+
struct {
- bool secure;
+ char *ospi_model;
} cfg;
- char *ospi_model;
+};
+
+struct VersalVirtClass {
+ MachineClass parent_class;
+
+ VersalVersion version;
};
static void fdt_create(VersalVirt *s)
{
MachineClass *mc = MACHINE_GET_CLASS(s);
- int i;
+ VersalVirtClass *vvc = XLNX_VERSAL_VIRT_BASE_MACHINE_GET_CLASS(s);
+ const char versal_compat[] = "amd-versal-virt\0xlnx-versal-virt";
+ const char versal2_compat[] = "amd-versal2-virt";
s->fdt = create_device_tree(&s->fdt_size);
if (!s->fdt) {
@@ -64,392 +68,26 @@ static void fdt_create(VersalVirt *s)
exit(1);
}
- /* Allocate all phandles. */
- s->phandle.gic = qemu_fdt_alloc_phandle(s->fdt);
- for (i = 0; i < ARRAY_SIZE(s->phandle.ethernet_phy); i++) {
- s->phandle.ethernet_phy[i] = qemu_fdt_alloc_phandle(s->fdt);
- }
- s->phandle.clk_25Mhz = qemu_fdt_alloc_phandle(s->fdt);
- s->phandle.clk_125Mhz = qemu_fdt_alloc_phandle(s->fdt);
-
- s->phandle.usb = qemu_fdt_alloc_phandle(s->fdt);
- s->phandle.dwc = qemu_fdt_alloc_phandle(s->fdt);
/* Create /chosen node for load_dtb. */
qemu_fdt_add_subnode(s->fdt, "/chosen");
+ qemu_fdt_add_subnode(s->fdt, "/aliases");
/* Header */
- qemu_fdt_setprop_cell(s->fdt, "/", "interrupt-parent", s->phandle.gic);
- qemu_fdt_setprop_cell(s->fdt, "/", "#size-cells", 0x2);
- qemu_fdt_setprop_cell(s->fdt, "/", "#address-cells", 0x2);
qemu_fdt_setprop_string(s->fdt, "/", "model", mc->desc);
- qemu_fdt_setprop_string(s->fdt, "/", "compatible", "xlnx-versal-virt");
-}
-
-static void fdt_add_clk_node(VersalVirt *s, const char *name,
- unsigned int freq_hz, uint32_t phandle)
-{
- qemu_fdt_add_subnode(s->fdt, name);
- qemu_fdt_setprop_cell(s->fdt, name, "phandle", phandle);
- qemu_fdt_setprop_cell(s->fdt, name, "clock-frequency", freq_hz);
- qemu_fdt_setprop_cell(s->fdt, name, "#clock-cells", 0x0);
- qemu_fdt_setprop_string(s->fdt, name, "compatible", "fixed-clock");
- qemu_fdt_setprop(s->fdt, name, "u-boot,dm-pre-reloc", NULL, 0);
-}
-
-static void fdt_add_cpu_nodes(VersalVirt *s, uint32_t psci_conduit)
-{
- int i;
-
- qemu_fdt_add_subnode(s->fdt, "/cpus");
- qemu_fdt_setprop_cell(s->fdt, "/cpus", "#size-cells", 0x0);
- qemu_fdt_setprop_cell(s->fdt, "/cpus", "#address-cells", 1);
-
- for (i = XLNX_VERSAL_NR_ACPUS - 1; i >= 0; i--) {
- char *name = g_strdup_printf("/cpus/cpu@%d", i);
- ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i));
-
- qemu_fdt_add_subnode(s->fdt, name);
- qemu_fdt_setprop_cell(s->fdt, name, "reg",
- arm_cpu_mp_affinity(armcpu));
- if (psci_conduit != QEMU_PSCI_CONDUIT_DISABLED) {
- qemu_fdt_setprop_string(s->fdt, name, "enable-method", "psci");
- }
- qemu_fdt_setprop_string(s->fdt, name, "device_type", "cpu");
- qemu_fdt_setprop_string(s->fdt, name, "compatible",
- armcpu->dtb_compatible);
- g_free(name);
- }
-}
-
-static void fdt_add_gic_nodes(VersalVirt *s)
-{
- char *nodename;
-
- nodename = g_strdup_printf("/gic@%x", MM_GIC_APU_DIST_MAIN);
- qemu_fdt_add_subnode(s->fdt, nodename);
- qemu_fdt_setprop_cell(s->fdt, nodename, "phandle", s->phandle.gic);
- qemu_fdt_setprop_cells(s->fdt, nodename, "interrupts",
- GIC_FDT_IRQ_TYPE_PPI, VERSAL_GIC_MAINT_IRQ,
- GIC_FDT_IRQ_FLAGS_LEVEL_HI);
- qemu_fdt_setprop(s->fdt, nodename, "interrupt-controller", NULL, 0);
- qemu_fdt_setprop_sized_cells(s->fdt, nodename, "reg",
- 2, MM_GIC_APU_DIST_MAIN,
- 2, MM_GIC_APU_DIST_MAIN_SIZE,
- 2, MM_GIC_APU_REDIST_0,
- 2, MM_GIC_APU_REDIST_0_SIZE);
- qemu_fdt_setprop_cell(s->fdt, nodename, "#interrupt-cells", 3);
- qemu_fdt_setprop_string(s->fdt, nodename, "compatible", "arm,gic-v3");
- g_free(nodename);
-}
-
-static void fdt_add_timer_nodes(VersalVirt *s)
-{
- const char compat[] = "arm,armv8-timer";
- uint32_t irqflags = GIC_FDT_IRQ_FLAGS_LEVEL_HI;
-
- qemu_fdt_add_subnode(s->fdt, "/timer");
- qemu_fdt_setprop_cells(s->fdt, "/timer", "interrupts",
- GIC_FDT_IRQ_TYPE_PPI, VERSAL_TIMER_S_EL1_IRQ, irqflags,
- GIC_FDT_IRQ_TYPE_PPI, VERSAL_TIMER_NS_EL1_IRQ, irqflags,
- GIC_FDT_IRQ_TYPE_PPI, VERSAL_TIMER_VIRT_IRQ, irqflags,
- GIC_FDT_IRQ_TYPE_PPI, VERSAL_TIMER_NS_EL2_IRQ, irqflags);
- qemu_fdt_setprop(s->fdt, "/timer", "compatible",
- compat, sizeof(compat));
-}
-
-static void fdt_add_usb_xhci_nodes(VersalVirt *s)
-{
- const char clocknames[] = "bus_clk\0ref_clk";
- const char irq_name[] = "dwc_usb3";
- const char compatVersalDWC3[] = "xlnx,versal-dwc3";
- const char compatDWC3[] = "snps,dwc3";
- char *name = g_strdup_printf("/usb@%" PRIx32, MM_USB2_CTRL_REGS);
-
- qemu_fdt_add_subnode(s->fdt, name);
- qemu_fdt_setprop(s->fdt, name, "compatible",
- compatVersalDWC3, sizeof(compatVersalDWC3));
- qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
- 2, MM_USB2_CTRL_REGS,
- 2, MM_USB2_CTRL_REGS_SIZE);
- qemu_fdt_setprop(s->fdt, name, "clock-names",
- clocknames, sizeof(clocknames));
- qemu_fdt_setprop_cells(s->fdt, name, "clocks",
- s->phandle.clk_25Mhz, s->phandle.clk_125Mhz);
- qemu_fdt_setprop(s->fdt, name, "ranges", NULL, 0);
- qemu_fdt_setprop_cell(s->fdt, name, "#address-cells", 2);
- qemu_fdt_setprop_cell(s->fdt, name, "#size-cells", 2);
- qemu_fdt_setprop_cell(s->fdt, name, "phandle", s->phandle.usb);
- g_free(name);
-
- name = g_strdup_printf("/usb@%" PRIx32 "/dwc3@%" PRIx32,
- MM_USB2_CTRL_REGS, MM_USB_0);
- qemu_fdt_add_subnode(s->fdt, name);
- qemu_fdt_setprop(s->fdt, name, "compatible",
- compatDWC3, sizeof(compatDWC3));
- qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
- 2, MM_USB_0, 2, MM_USB_0_SIZE);
- qemu_fdt_setprop(s->fdt, name, "interrupt-names",
- irq_name, sizeof(irq_name));
- qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
- GIC_FDT_IRQ_TYPE_SPI, VERSAL_USB0_IRQ_0,
- GIC_FDT_IRQ_FLAGS_LEVEL_HI);
- qemu_fdt_setprop_cell(s->fdt, name,
- "snps,quirk-frame-length-adjustment", 0x20);
- qemu_fdt_setprop_cells(s->fdt, name, "#stream-id-cells", 1);
- qemu_fdt_setprop_string(s->fdt, name, "dr_mode", "host");
- qemu_fdt_setprop_string(s->fdt, name, "phy-names", "usb3-phy");
- qemu_fdt_setprop(s->fdt, name, "snps,dis_u2_susphy_quirk", NULL, 0);
- qemu_fdt_setprop(s->fdt, name, "snps,dis_u3_susphy_quirk", NULL, 0);
- qemu_fdt_setprop(s->fdt, name, "snps,refclk_fladj", NULL, 0);
- qemu_fdt_setprop(s->fdt, name, "snps,mask_phy_reset", NULL, 0);
- qemu_fdt_setprop_cell(s->fdt, name, "phandle", s->phandle.dwc);
- qemu_fdt_setprop_string(s->fdt, name, "maximum-speed", "high-speed");
- g_free(name);
-}
-
-static void fdt_add_uart_nodes(VersalVirt *s)
-{
- uint64_t addrs[] = { MM_UART1, MM_UART0 };
- unsigned int irqs[] = { VERSAL_UART1_IRQ_0, VERSAL_UART0_IRQ_0 };
- const char compat[] = "arm,pl011\0arm,sbsa-uart";
- const char clocknames[] = "uartclk\0apb_pclk";
- int i;
-
- for (i = 0; i < ARRAY_SIZE(addrs); i++) {
- char *name = g_strdup_printf("/uart@%" PRIx64, addrs[i]);
- qemu_fdt_add_subnode(s->fdt, name);
- qemu_fdt_setprop_cell(s->fdt, name, "current-speed", 115200);
- qemu_fdt_setprop_cells(s->fdt, name, "clocks",
- s->phandle.clk_125Mhz, s->phandle.clk_125Mhz);
- qemu_fdt_setprop(s->fdt, name, "clock-names",
- clocknames, sizeof(clocknames));
-
- qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
- GIC_FDT_IRQ_TYPE_SPI, irqs[i],
- GIC_FDT_IRQ_FLAGS_LEVEL_HI);
- qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
- 2, addrs[i], 2, 0x1000);
- qemu_fdt_setprop(s->fdt, name, "compatible",
- compat, sizeof(compat));
- qemu_fdt_setprop(s->fdt, name, "u-boot,dm-pre-reloc", NULL, 0);
-
- if (addrs[i] == MM_UART0) {
- /* Select UART0. */
- qemu_fdt_setprop_string(s->fdt, "/chosen", "stdout-path", name);
- }
- g_free(name);
- }
-}
-
-static void fdt_add_canfd_nodes(VersalVirt *s)
-{
- uint64_t addrs[] = { MM_CANFD1, MM_CANFD0 };
- uint32_t size[] = { MM_CANFD1_SIZE, MM_CANFD0_SIZE };
- unsigned int irqs[] = { VERSAL_CANFD1_IRQ_0, VERSAL_CANFD0_IRQ_0 };
- const char clocknames[] = "can_clk\0s_axi_aclk";
- int i;
-
- /* Create and connect CANFD0 and CANFD1 nodes to canbus0. */
- for (i = 0; i < ARRAY_SIZE(addrs); i++) {
- char *name = g_strdup_printf("/canfd@%" PRIx64, addrs[i]);
- qemu_fdt_add_subnode(s->fdt, name);
-
- qemu_fdt_setprop_cell(s->fdt, name, "rx-fifo-depth", 0x40);
- qemu_fdt_setprop_cell(s->fdt, name, "tx-mailbox-count", 0x20);
-
- qemu_fdt_setprop_cells(s->fdt, name, "clocks",
- s->phandle.clk_25Mhz, s->phandle.clk_25Mhz);
- qemu_fdt_setprop(s->fdt, name, "clock-names",
- clocknames, sizeof(clocknames));
- qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
- GIC_FDT_IRQ_TYPE_SPI, irqs[i],
- GIC_FDT_IRQ_FLAGS_LEVEL_HI);
- qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
- 2, addrs[i], 2, size[i]);
- qemu_fdt_setprop_string(s->fdt, name, "compatible",
- "xlnx,canfd-2.0");
-
- g_free(name);
- }
-}
-
-static void fdt_add_fixed_link_nodes(VersalVirt *s, char *gemname,
- uint32_t phandle)
-{
- char *name = g_strdup_printf("%s/fixed-link", gemname);
-
- qemu_fdt_add_subnode(s->fdt, name);
- qemu_fdt_setprop_cell(s->fdt, name, "phandle", phandle);
- qemu_fdt_setprop(s->fdt, name, "full-duplex", NULL, 0);
- qemu_fdt_setprop_cell(s->fdt, name, "speed", 1000);
- g_free(name);
-}
-
-static void fdt_add_gem_nodes(VersalVirt *s)
-{
- uint64_t addrs[] = { MM_GEM1, MM_GEM0 };
- unsigned int irqs[] = { VERSAL_GEM1_IRQ_0, VERSAL_GEM0_IRQ_0 };
- const char clocknames[] = "pclk\0hclk\0tx_clk\0rx_clk";
- const char compat_gem[] = "cdns,zynqmp-gem\0cdns,gem";
- int i;
-
- for (i = 0; i < ARRAY_SIZE(addrs); i++) {
- char *name = g_strdup_printf("/ethernet@%" PRIx64, addrs[i]);
- qemu_fdt_add_subnode(s->fdt, name);
-
- fdt_add_fixed_link_nodes(s, name, s->phandle.ethernet_phy[i]);
- qemu_fdt_setprop_string(s->fdt, name, "phy-mode", "rgmii-id");
- qemu_fdt_setprop_cell(s->fdt, name, "phy-handle",
- s->phandle.ethernet_phy[i]);
- qemu_fdt_setprop_cells(s->fdt, name, "clocks",
- s->phandle.clk_25Mhz, s->phandle.clk_25Mhz,
- s->phandle.clk_125Mhz, s->phandle.clk_125Mhz);
- qemu_fdt_setprop(s->fdt, name, "clock-names",
- clocknames, sizeof(clocknames));
- qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
- GIC_FDT_IRQ_TYPE_SPI, irqs[i],
- GIC_FDT_IRQ_FLAGS_LEVEL_HI,
- GIC_FDT_IRQ_TYPE_SPI, irqs[i],
- GIC_FDT_IRQ_FLAGS_LEVEL_HI);
- qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
- 2, addrs[i], 2, 0x1000);
- qemu_fdt_setprop(s->fdt, name, "compatible",
- compat_gem, sizeof(compat_gem));
- qemu_fdt_setprop_cell(s->fdt, name, "#address-cells", 1);
- qemu_fdt_setprop_cell(s->fdt, name, "#size-cells", 0);
- g_free(name);
- }
-}
-static void fdt_add_zdma_nodes(VersalVirt *s)
-{
- const char clocknames[] = "clk_main\0clk_apb";
- const char compat[] = "xlnx,zynqmp-dma-1.0";
- int i;
-
- for (i = XLNX_VERSAL_NR_ADMAS - 1; i >= 0; i--) {
- uint64_t addr = MM_ADMA_CH0 + MM_ADMA_CH0_SIZE * i;
- char *name = g_strdup_printf("/dma@%" PRIx64, addr);
-
- qemu_fdt_add_subnode(s->fdt, name);
-
- qemu_fdt_setprop_cell(s->fdt, name, "xlnx,bus-width", 64);
- qemu_fdt_setprop_cells(s->fdt, name, "clocks",
- s->phandle.clk_25Mhz, s->phandle.clk_25Mhz);
- qemu_fdt_setprop(s->fdt, name, "clock-names",
- clocknames, sizeof(clocknames));
- qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
- GIC_FDT_IRQ_TYPE_SPI, VERSAL_ADMA_IRQ_0 + i,
- GIC_FDT_IRQ_FLAGS_LEVEL_HI);
- qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
- 2, addr, 2, 0x1000);
- qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat));
- g_free(name);
- }
-}
-
-static void fdt_add_sd_nodes(VersalVirt *s)
-{
- const char clocknames[] = "clk_xin\0clk_ahb";
- const char compat[] = "arasan,sdhci-8.9a";
- int i;
+ switch (vvc->version) {
+ case VERSAL_VER_VERSAL:
+ qemu_fdt_setprop(s->fdt, "/", "compatible", versal_compat,
+ sizeof(versal_compat));
+ break;
- for (i = ARRAY_SIZE(s->soc.pmc.iou.sd) - 1; i >= 0; i--) {
- uint64_t addr = MM_PMC_SD0 + MM_PMC_SD0_SIZE * i;
- char *name = g_strdup_printf("/sdhci@%" PRIx64, addr);
-
- qemu_fdt_add_subnode(s->fdt, name);
-
- qemu_fdt_setprop_cells(s->fdt, name, "clocks",
- s->phandle.clk_25Mhz, s->phandle.clk_25Mhz);
- qemu_fdt_setprop(s->fdt, name, "clock-names",
- clocknames, sizeof(clocknames));
- qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
- GIC_FDT_IRQ_TYPE_SPI, VERSAL_SD0_IRQ_0 + i * 2,
- GIC_FDT_IRQ_FLAGS_LEVEL_HI);
- qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
- 2, addr, 2, MM_PMC_SD0_SIZE);
- qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat));
- g_free(name);
+ case VERSAL_VER_VERSAL2:
+ qemu_fdt_setprop(s->fdt, "/", "compatible", versal2_compat,
+ sizeof(versal2_compat));
+ break;
}
}
-static void fdt_add_rtc_node(VersalVirt *s)
-{
- const char compat[] = "xlnx,zynqmp-rtc";
- const char interrupt_names[] = "alarm\0sec";
- char *name = g_strdup_printf("/rtc@%x", MM_PMC_RTC);
-
- qemu_fdt_add_subnode(s->fdt, name);
-
- qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
- GIC_FDT_IRQ_TYPE_SPI, VERSAL_RTC_ALARM_IRQ,
- GIC_FDT_IRQ_FLAGS_LEVEL_HI,
- GIC_FDT_IRQ_TYPE_SPI, VERSAL_RTC_SECONDS_IRQ,
- GIC_FDT_IRQ_FLAGS_LEVEL_HI);
- qemu_fdt_setprop(s->fdt, name, "interrupt-names",
- interrupt_names, sizeof(interrupt_names));
- qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
- 2, MM_PMC_RTC, 2, MM_PMC_RTC_SIZE);
- qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat));
- g_free(name);
-}
-
-static void fdt_add_bbram_node(VersalVirt *s)
-{
- const char compat[] = TYPE_XLNX_BBRAM;
- const char interrupt_names[] = "bbram-error";
- char *name = g_strdup_printf("/bbram@%x", MM_PMC_BBRAM_CTRL);
-
- qemu_fdt_add_subnode(s->fdt, name);
-
- qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
- GIC_FDT_IRQ_TYPE_SPI, VERSAL_PMC_APB_IRQ,
- GIC_FDT_IRQ_FLAGS_LEVEL_HI);
- qemu_fdt_setprop(s->fdt, name, "interrupt-names",
- interrupt_names, sizeof(interrupt_names));
- qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
- 2, MM_PMC_BBRAM_CTRL,
- 2, MM_PMC_BBRAM_CTRL_SIZE);
- qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat));
- g_free(name);
-}
-
-static void fdt_add_efuse_ctrl_node(VersalVirt *s)
-{
- const char compat[] = TYPE_XLNX_VERSAL_EFUSE_CTRL;
- const char interrupt_names[] = "pmc_efuse";
- char *name = g_strdup_printf("/pmc_efuse@%x", MM_PMC_EFUSE_CTRL);
-
- qemu_fdt_add_subnode(s->fdt, name);
-
- qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
- GIC_FDT_IRQ_TYPE_SPI, VERSAL_EFUSE_IRQ,
- GIC_FDT_IRQ_FLAGS_LEVEL_HI);
- qemu_fdt_setprop(s->fdt, name, "interrupt-names",
- interrupt_names, sizeof(interrupt_names));
- qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
- 2, MM_PMC_EFUSE_CTRL,
- 2, MM_PMC_EFUSE_CTRL_SIZE);
- qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat));
- g_free(name);
-}
-
-static void fdt_add_efuse_cache_node(VersalVirt *s)
-{
- const char compat[] = TYPE_XLNX_VERSAL_EFUSE_CACHE;
- char *name = g_strdup_printf("/xlnx_pmc_efuse_cache@%x",
- MM_PMC_EFUSE_CACHE);
-
- qemu_fdt_add_subnode(s->fdt, name);
-
- qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
- 2, MM_PMC_EFUSE_CACHE,
- 2, MM_PMC_EFUSE_CACHE_SIZE);
- qemu_fdt_setprop(s->fdt, name, "compatible", compat, sizeof(compat));
- g_free(name);
-}
-
static void fdt_nop_memory_nodes(void *fdt, Error **errp)
{
Error *err = NULL;
@@ -470,88 +108,13 @@ static void fdt_nop_memory_nodes(void *fdt, Error **errp)
g_strfreev(node_path);
}
-static void fdt_add_memory_nodes(VersalVirt *s, void *fdt, uint64_t ram_size)
-{
- /* Describes the various split DDR access regions. */
- static const struct {
- uint64_t base;
- uint64_t size;
- } addr_ranges[] = {
- { MM_TOP_DDR, MM_TOP_DDR_SIZE },
- { MM_TOP_DDR_2, MM_TOP_DDR_2_SIZE },
- { MM_TOP_DDR_3, MM_TOP_DDR_3_SIZE },
- { MM_TOP_DDR_4, MM_TOP_DDR_4_SIZE }
- };
- uint64_t mem_reg_prop[8] = {0};
- uint64_t size = ram_size;
- Error *err = NULL;
- char *name;
- int i;
-
- fdt_nop_memory_nodes(fdt, &err);
- if (err) {
- error_report_err(err);
- return;
- }
-
- name = g_strdup_printf("/memory@%x", MM_TOP_DDR);
- for (i = 0; i < ARRAY_SIZE(addr_ranges) && size; i++) {
- uint64_t mapsize;
-
- mapsize = size < addr_ranges[i].size ? size : addr_ranges[i].size;
-
- mem_reg_prop[i * 2] = addr_ranges[i].base;
- mem_reg_prop[i * 2 + 1] = mapsize;
- size -= mapsize;
- }
- qemu_fdt_add_subnode(fdt, name);
- qemu_fdt_setprop_string(fdt, name, "device_type", "memory");
-
- switch (i) {
- case 1:
- qemu_fdt_setprop_sized_cells(fdt, name, "reg",
- 2, mem_reg_prop[0],
- 2, mem_reg_prop[1]);
- break;
- case 2:
- qemu_fdt_setprop_sized_cells(fdt, name, "reg",
- 2, mem_reg_prop[0],
- 2, mem_reg_prop[1],
- 2, mem_reg_prop[2],
- 2, mem_reg_prop[3]);
- break;
- case 3:
- qemu_fdt_setprop_sized_cells(fdt, name, "reg",
- 2, mem_reg_prop[0],
- 2, mem_reg_prop[1],
- 2, mem_reg_prop[2],
- 2, mem_reg_prop[3],
- 2, mem_reg_prop[4],
- 2, mem_reg_prop[5]);
- break;
- case 4:
- qemu_fdt_setprop_sized_cells(fdt, name, "reg",
- 2, mem_reg_prop[0],
- 2, mem_reg_prop[1],
- 2, mem_reg_prop[2],
- 2, mem_reg_prop[3],
- 2, mem_reg_prop[4],
- 2, mem_reg_prop[5],
- 2, mem_reg_prop[6],
- 2, mem_reg_prop[7]);
- break;
- default:
- g_assert_not_reached();
- }
- g_free(name);
-}
-
static void versal_virt_modify_dtb(const struct arm_boot_info *binfo,
void *fdt)
{
VersalVirt *s = container_of(binfo, VersalVirt, binfo);
- fdt_add_memory_nodes(s, fdt, binfo->ram_size);
+ fdt_nop_memory_nodes(s->fdt, &error_abort);
+ versal_fdt_add_memory_nodes(&s->soc, binfo->ram_size);
}
static void *versal_virt_get_dtb(const struct arm_boot_info *binfo,
@@ -570,41 +133,34 @@ static void create_virtio_regions(VersalVirt *s)
int i;
for (i = 0; i < NUM_VIRTIO_TRANSPORT; i++) {
- char *name = g_strdup_printf("virtio%d", i);
- hwaddr base = MM_TOP_RSVD + i * virtio_mmio_size;
- int irq = VERSAL_RSVD_IRQ_FIRST + i;
+ hwaddr base = versal_get_reserved_mmio_addr(&s->soc)
+ + i * virtio_mmio_size;
+ g_autofree char *node = g_strdup_printf("/virtio_mmio@%" PRIx64, base);
+ int dtb_irq;
MemoryRegion *mr;
DeviceState *dev;
qemu_irq pic_irq;
- pic_irq = qdev_get_gpio_in(DEVICE(&s->soc.fpd.apu.gic), irq);
+ pic_irq = versal_get_reserved_irq(&s->soc, i, &dtb_irq);
dev = qdev_new("virtio-mmio");
- object_property_add_child(OBJECT(&s->soc), name, OBJECT(dev));
+ object_property_add_child(OBJECT(s), "virtio-mmio[*]", OBJECT(dev));
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic_irq);
mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
memory_region_add_subregion(&s->soc.mr_ps, base, mr);
- g_free(name);
- }
- for (i = 0; i < NUM_VIRTIO_TRANSPORT; i++) {
- hwaddr base = MM_TOP_RSVD + i * virtio_mmio_size;
- int irq = VERSAL_RSVD_IRQ_FIRST + i;
- char *name = g_strdup_printf("/virtio_mmio@%" PRIx64, base);
-
- qemu_fdt_add_subnode(s->fdt, name);
- qemu_fdt_setprop(s->fdt, name, "dma-coherent", NULL, 0);
- qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
- GIC_FDT_IRQ_TYPE_SPI, irq,
+ qemu_fdt_add_subnode(s->fdt, node);
+ qemu_fdt_setprop(s->fdt, node, "dma-coherent", NULL, 0);
+ qemu_fdt_setprop_cells(s->fdt, node, "interrupts",
+ GIC_FDT_IRQ_TYPE_SPI, dtb_irq,
GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
- qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
+ qemu_fdt_setprop_sized_cells(s->fdt, node, "reg",
2, base, 2, virtio_mmio_size);
- qemu_fdt_setprop_string(s->fdt, name, "compatible", "virtio,mmio");
- g_free(name);
+ qemu_fdt_setprop_string(s->fdt, node, "compatible", "virtio,mmio");
}
}
-static void bbram_attach_drive(XlnxBBRam *dev)
+static void bbram_attach_drive(VersalVirt *s)
{
DriveInfo *dinfo;
BlockBackend *blk;
@@ -612,11 +168,11 @@ static void bbram_attach_drive(XlnxBBRam *dev)
dinfo = drive_get_by_index(IF_PFLASH, 0);
blk = dinfo ? blk_by_legacy_dinfo(dinfo) : NULL;
if (blk) {
- qdev_prop_set_drive(DEVICE(dev), "drive", blk);
+ versal_bbram_attach_drive(&s->soc, blk);
}
}
-static void efuse_attach_drive(XlnxEFuse *dev)
+static void efuse_attach_drive(VersalVirt *s)
{
DriveInfo *dinfo;
BlockBackend *blk;
@@ -624,41 +180,37 @@ static void efuse_attach_drive(XlnxEFuse *dev)
dinfo = drive_get_by_index(IF_PFLASH, 1);
blk = dinfo ? blk_by_legacy_dinfo(dinfo) : NULL;
if (blk) {
- qdev_prop_set_drive(DEVICE(dev), "drive", blk);
+ versal_efuse_attach_drive(&s->soc, blk);
}
}
-static void sd_plugin_card(SDHCIState *sd, DriveInfo *di)
+static void sd_plug_card(VersalVirt *s, int idx, DriveInfo *di)
{
BlockBackend *blk = di ? blk_by_legacy_dinfo(di) : NULL;
- DeviceState *card;
- card = qdev_new(TYPE_SD_CARD);
- object_property_add_child(OBJECT(sd), "card[*]", OBJECT(card));
- qdev_prop_set_drive_err(card, "drive", blk, &error_fatal);
- qdev_realize_and_unref(card, qdev_get_child_bus(DEVICE(sd), "sd-bus"),
- &error_fatal);
+ versal_sdhci_plug_card(&s->soc, idx, blk);
}
static char *versal_get_ospi_model(Object *obj, Error **errp)
{
- VersalVirt *s = XLNX_VERSAL_VIRT_MACHINE(obj);
+ VersalVirt *s = XLNX_VERSAL_VIRT_BASE_MACHINE(obj);
- return g_strdup(s->ospi_model);
+ return g_strdup(s->cfg.ospi_model);
}
static void versal_set_ospi_model(Object *obj, const char *value, Error **errp)
{
- VersalVirt *s = XLNX_VERSAL_VIRT_MACHINE(obj);
+ VersalVirt *s = XLNX_VERSAL_VIRT_BASE_MACHINE(obj);
- g_free(s->ospi_model);
- s->ospi_model = g_strdup(value);
+ g_free(s->cfg.ospi_model);
+ s->cfg.ospi_model = g_strdup(value);
}
static void versal_virt_init(MachineState *machine)
{
- VersalVirt *s = XLNX_VERSAL_VIRT_MACHINE(machine);
+ VersalVirt *s = XLNX_VERSAL_VIRT_BASE_MACHINE(machine);
+ VersalVirtClass *vvc = XLNX_VERSAL_VIRT_BASE_MACHINE_GET_CLASS(machine);
int psci_conduit = QEMU_PSCI_CONDUIT_DISABLED;
int i;
@@ -690,48 +242,38 @@ static void versal_virt_init(MachineState *machine)
}
object_initialize_child(OBJECT(machine), "xlnx-versal", &s->soc,
- TYPE_XLNX_VERSAL);
+ versal_get_class(vvc->version));
object_property_set_link(OBJECT(&s->soc), "ddr", OBJECT(machine->ram),
&error_abort);
- object_property_set_link(OBJECT(&s->soc), "canbus0", OBJECT(s->canbus[0]),
- &error_abort);
- object_property_set_link(OBJECT(&s->soc), "canbus1", OBJECT(s->canbus[1]),
- &error_abort);
- sysbus_realize(SYS_BUS_DEVICE(&s->soc), &error_fatal);
+
+ for (i = 0; i < versal_get_num_can(vvc->version); i++) {
+ g_autofree char *prop_name = g_strdup_printf("canbus%d", i);
+
+ object_property_set_link(OBJECT(&s->soc), prop_name,
+ OBJECT(s->canbus[i]),
+ &error_abort);
+ }
fdt_create(s);
+ versal_set_fdt(&s->soc, s->fdt);
+ sysbus_realize(SYS_BUS_DEVICE(&s->soc), &error_fatal);
create_virtio_regions(s);
- fdt_add_gem_nodes(s);
- fdt_add_uart_nodes(s);
- fdt_add_canfd_nodes(s);
- fdt_add_gic_nodes(s);
- fdt_add_timer_nodes(s);
- fdt_add_zdma_nodes(s);
- fdt_add_usb_xhci_nodes(s);
- fdt_add_sd_nodes(s);
- fdt_add_rtc_node(s);
- fdt_add_bbram_node(s);
- fdt_add_efuse_ctrl_node(s);
- fdt_add_efuse_cache_node(s);
- fdt_add_cpu_nodes(s, psci_conduit);
- fdt_add_clk_node(s, "/clk125", 125000000, s->phandle.clk_125Mhz);
- fdt_add_clk_node(s, "/clk25", 25000000, s->phandle.clk_25Mhz);
-
- /* Make the APU cpu address space visible to virtio and other
- * modules unaware of multiple address-spaces. */
- memory_region_add_subregion_overlap(get_system_memory(),
- 0, &s->soc.fpd.apu.mr, 0);
+
+ /*
+ * Map the SoC address space onto system memory. This will allow virtio and
+ * other modules unaware of multiple address-spaces to work.
+ */
+ memory_region_add_subregion(get_system_memory(), 0, &s->soc.mr_ps);
/* Attach bbram backend, if given */
- bbram_attach_drive(&s->soc.pmc.bbram);
+ bbram_attach_drive(s);
/* Attach efuse backend, if given */
- efuse_attach_drive(&s->soc.pmc.efuse);
+ efuse_attach_drive(s);
- /* Plugin SD cards. */
- for (i = 0; i < ARRAY_SIZE(s->soc.pmc.iou.sd); i++) {
- sd_plugin_card(&s->soc.pmc.iou.sd[i],
- drive_get(IF_SD, 0, i));
+ /* Plug SD cards */
+ for (i = 0; i < versal_get_num_sdhci(vvc->version); i++) {
+ sd_plug_card(s, i, drive_get(IF_SD, 0, i));
}
s->binfo.ram_size = machine->ram_size;
@@ -745,100 +287,133 @@ static void versal_virt_init(MachineState *machine)
s->binfo.loader_start = 0x1000;
s->binfo.dtb_limit = 0x1000000;
}
- arm_load_kernel(&s->soc.fpd.apu.cpu[0], machine, &s->binfo);
+ arm_load_kernel(ARM_CPU(versal_get_boot_cpu(&s->soc)), machine, &s->binfo);
for (i = 0; i < XLNX_VERSAL_NUM_OSPI_FLASH; i++) {
- BusState *spi_bus;
- DeviceState *flash_dev;
ObjectClass *flash_klass;
- qemu_irq cs_line;
DriveInfo *dinfo = drive_get(IF_MTD, 0, i);
+ BlockBackend *blk;
+ const char *mdl;
- spi_bus = qdev_get_child_bus(DEVICE(&s->soc.pmc.iou.ospi), "spi0");
-
- if (s->ospi_model) {
- flash_klass = object_class_by_name(s->ospi_model);
+ if (s->cfg.ospi_model) {
+ flash_klass = object_class_by_name(s->cfg.ospi_model);
if (!flash_klass ||
object_class_is_abstract(flash_klass) ||
!object_class_dynamic_cast(flash_klass, TYPE_M25P80)) {
error_report("'%s' is either abstract or"
- " not a subtype of m25p80", s->ospi_model);
+ " not a subtype of m25p80", s->cfg.ospi_model);
exit(1);
}
+ mdl = s->cfg.ospi_model;
+ } else {
+ mdl = "mt35xu01g";
}
- flash_dev = qdev_new(s->ospi_model ? s->ospi_model : "mt35xu01g");
-
- if (dinfo) {
- qdev_prop_set_drive_err(flash_dev, "drive",
- blk_by_legacy_dinfo(dinfo), &error_fatal);
- }
- qdev_prop_set_uint8(flash_dev, "cs", i);
- qdev_realize_and_unref(flash_dev, spi_bus, &error_fatal);
-
- cs_line = qdev_get_gpio_in_named(flash_dev, SSI_GPIO_CS, 0);
-
- sysbus_connect_irq(SYS_BUS_DEVICE(&s->soc.pmc.iou.ospi),
- i + 1, cs_line);
+ blk = dinfo ? blk_by_legacy_dinfo(dinfo) : NULL;
+ versal_ospi_create_flash(&s->soc, i, mdl, blk);
}
}
static void versal_virt_machine_instance_init(Object *obj)
{
- VersalVirt *s = XLNX_VERSAL_VIRT_MACHINE(obj);
+ VersalVirt *s = XLNX_VERSAL_VIRT_BASE_MACHINE(obj);
+ VersalVirtClass *vvc = XLNX_VERSAL_VIRT_BASE_MACHINE_GET_CLASS(s);
+ size_t i, num_can;
+
+ num_can = versal_get_num_can(vvc->version);
+ s->canbus = g_new0(CanBusState *, num_can);
/*
- * User can set canbus0 and canbus1 properties to can-bus object and connect
- * to socketcan(optional) interface via command line.
+ * User can set canbusx properties to can-bus object and optionally connect
+ * to socketcan interface via command line.
*/
- object_property_add_link(obj, "canbus0", TYPE_CAN_BUS,
- (Object **)&s->canbus[0],
- object_property_allow_set_link,
- 0);
- object_property_add_link(obj, "canbus1", TYPE_CAN_BUS,
- (Object **)&s->canbus[1],
- object_property_allow_set_link,
- 0);
+ for (i = 0; i < num_can; i++) {
+ g_autofree char *prop_name = g_strdup_printf("canbus%zu", i);
+
+ object_property_add_link(obj, prop_name, TYPE_CAN_BUS,
+ (Object **) &s->canbus[i],
+ object_property_allow_set_link, 0);
+ }
}
static void versal_virt_machine_finalize(Object *obj)
{
- VersalVirt *s = XLNX_VERSAL_VIRT_MACHINE(obj);
+ VersalVirt *s = XLNX_VERSAL_VIRT_BASE_MACHINE(obj);
- g_free(s->ospi_model);
+ g_free(s->cfg.ospi_model);
+ g_free(s->canbus);
}
-static void versal_virt_machine_class_init(ObjectClass *oc, const void *data)
+static void versal_virt_machine_class_init_common(ObjectClass *oc)
{
MachineClass *mc = MACHINE_CLASS(oc);
+ VersalVirtClass *vvc = XLNX_VERSAL_VIRT_BASE_MACHINE_CLASS(mc);
+ int num_cpu = versal_get_num_cpu(vvc->version);
- mc->desc = "Xilinx Versal Virtual development board";
- mc->init = versal_virt_init;
- mc->min_cpus = XLNX_VERSAL_NR_ACPUS + XLNX_VERSAL_NR_RCPUS;
- mc->max_cpus = XLNX_VERSAL_NR_ACPUS + XLNX_VERSAL_NR_RCPUS;
- mc->default_cpus = XLNX_VERSAL_NR_ACPUS + XLNX_VERSAL_NR_RCPUS;
mc->no_cdrom = true;
mc->auto_create_sdcard = true;
mc->default_ram_id = "ddr";
+ mc->min_cpus = num_cpu;
+ mc->max_cpus = num_cpu;
+ mc->default_cpus = num_cpu;
+ mc->init = versal_virt_init;
+
object_class_property_add_str(oc, "ospi-flash", versal_get_ospi_model,
versal_set_ospi_model);
object_class_property_set_description(oc, "ospi-flash",
"Change the OSPI Flash model");
}
-static const TypeInfo versal_virt_machine_init_typeinfo = {
- .name = TYPE_XLNX_VERSAL_VIRT_MACHINE,
+static void versal_virt_machine_class_init(ObjectClass *oc, const void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ VersalVirtClass *vvc = XLNX_VERSAL_VIRT_BASE_MACHINE_CLASS(oc);
+
+ mc->desc = "AMD Versal Virtual development board";
+ mc->alias = "xlnx-versal-virt";
+ vvc->version = VERSAL_VER_VERSAL;
+
+ versal_virt_machine_class_init_common(oc);
+}
+
+static void versal2_virt_machine_class_init(ObjectClass *oc, const void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ VersalVirtClass *vvc = XLNX_VERSAL_VIRT_BASE_MACHINE_CLASS(oc);
+
+ mc->desc = "AMD Versal Gen 2 Virtual development board";
+ vvc->version = VERSAL_VER_VERSAL2;
+
+ versal_virt_machine_class_init_common(oc);
+}
+
+static const TypeInfo versal_virt_base_machine_init_typeinfo = {
+ .name = TYPE_XLNX_VERSAL_VIRT_BASE_MACHINE,
.parent = TYPE_MACHINE,
- .class_init = versal_virt_machine_class_init,
+ .class_size = sizeof(VersalVirtClass),
.instance_init = versal_virt_machine_instance_init,
.instance_size = sizeof(VersalVirt),
.instance_finalize = versal_virt_machine_finalize,
+ .abstract = true,
+};
+
+static const TypeInfo versal_virt_machine_init_typeinfo = {
+ .name = TYPE_XLNX_VERSAL_VIRT_MACHINE,
+ .parent = TYPE_XLNX_VERSAL_VIRT_BASE_MACHINE,
+ .class_init = versal_virt_machine_class_init,
+};
+
+static const TypeInfo versal2_virt_machine_init_typeinfo = {
+ .name = TYPE_XLNX_VERSAL2_VIRT_MACHINE,
+ .parent = TYPE_XLNX_VERSAL_VIRT_BASE_MACHINE,
+ .class_init = versal2_virt_machine_class_init,
};
static void versal_virt_machine_init_register_types(void)
{
+ type_register_static(&versal_virt_base_machine_init_typeinfo);
type_register_static(&versal_virt_machine_init_typeinfo);
+ type_register_static(&versal2_virt_machine_init_typeinfo);
}
type_init(versal_virt_machine_init_register_types)
-
diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
index a42b9e7..81cb629 100644
--- a/hw/arm/xlnx-versal.c
+++ b/hw/arm/xlnx-versal.c
@@ -1,7 +1,8 @@
/*
- * Xilinx Versal SoC model.
+ * AMD/Xilinx Versal family SoC model.
*
* Copyright (c) 2018 Xilinx Inc.
+ * Copyright (c) 2025 Advanced Micro Devices, Inc.
* Written by Edgar E. Iglesias
*
* This program is free software; you can redistribute it and/or modify
@@ -17,827 +18,1745 @@
#include "hw/sysbus.h"
#include "net/net.h"
#include "system/system.h"
-#include "hw/arm/boot.h"
#include "hw/misc/unimp.h"
#include "hw/arm/xlnx-versal.h"
#include "qemu/log.h"
#include "target/arm/cpu-qom.h"
#include "target/arm/gtimer.h"
+#include "system/device_tree.h"
+#include "hw/arm/fdt.h"
+#include "hw/char/pl011.h"
+#include "hw/net/xlnx-versal-canfd.h"
+#include "hw/sd/sdhci.h"
+#include "hw/net/cadence_gem.h"
+#include "hw/dma/xlnx-zdma.h"
+#include "hw/misc/xlnx-versal-xramc.h"
+#include "hw/usb/xlnx-usb-subsystem.h"
+#include "hw/nvram/xlnx-versal-efuse.h"
+#include "hw/ssi/xlnx-versal-ospi.h"
+#include "hw/misc/xlnx-versal-pmc-iou-slcr.h"
+#include "hw/nvram/xlnx-bbram.h"
+#include "hw/misc/xlnx-versal-trng.h"
+#include "hw/rtc/xlnx-zynqmp-rtc.h"
+#include "hw/misc/xlnx-versal-cfu.h"
+#include "hw/misc/xlnx-versal-cframe-reg.h"
+#include "hw/or-irq.h"
+#include "hw/misc/xlnx-versal-crl.h"
+#include "hw/intc/arm_gicv3_common.h"
+#include "hw/intc/arm_gicv3_its_common.h"
+#include "hw/intc/arm_gic.h"
+#include "hw/core/split-irq.h"
+#include "target/arm/cpu.h"
+#include "hw/cpu/cluster.h"
+#include "hw/arm/bsa.h"
-#define XLNX_VERSAL_ACPU_TYPE ARM_CPU_TYPE_NAME("cortex-a72")
-#define XLNX_VERSAL_RCPU_TYPE ARM_CPU_TYPE_NAME("cortex-r5f")
-#define GEM_REVISION 0x40070106
+/*
+ * IRQ descriptor to catch the following cases:
+ * - An IRQ can either connect to the GICs, to the PPU1 intc, or the the EAM
+ * - Multiple devices can connect to the same IRQ. They are OR'ed together.
+ */
+FIELD(VERSAL_IRQ, IRQ, 0, 16)
+FIELD(VERSAL_IRQ, TARGET, 16, 2)
+FIELD(VERSAL_IRQ, ORED, 18, 1)
+FIELD(VERSAL_IRQ, OR_IDX, 19, 4) /* input index on the IRQ OR gate */
+
+typedef enum VersalIrqTarget {
+ IRQ_TARGET_GIC,
+ IRQ_TARGET_PPU1,
+ IRQ_TARGET_EAM,
+} VersalIrqTarget;
+
+#define PPU1_IRQ(irq) ((IRQ_TARGET_PPU1 << R_VERSAL_IRQ_TARGET_SHIFT) | (irq))
+#define EAM_IRQ(irq) ((IRQ_TARGET_EAM << R_VERSAL_IRQ_TARGET_SHIFT) | (irq))
+#define OR_IRQ(irq, or_idx) \
+ (R_VERSAL_IRQ_ORED_MASK | ((or_idx) << R_VERSAL_IRQ_OR_IDX_SHIFT) | (irq))
+#define PPU1_OR_IRQ(irq, or_idx) \
+ ((IRQ_TARGET_PPU1 << R_VERSAL_IRQ_TARGET_SHIFT) | OR_IRQ(irq, or_idx))
+
+typedef struct VersalSimplePeriphMap {
+ uint64_t addr;
+ int irq;
+} VersalSimplePeriphMap;
+
+typedef struct VersalMemMap {
+ uint64_t addr;
+ uint64_t size;
+} VersalMemMap;
+
+typedef struct VersalGicMap {
+ int version;
+ uint64_t dist;
+ uint64_t redist;
+ uint64_t cpu_iface;
+ uint64_t its;
+ size_t num_irq;
+ bool has_its;
+} VersalGicMap;
+
+enum StartPoweredOffMode {
+ SPO_SECONDARIES,
+ SPO_ALL,
+};
+
+typedef struct VersalCpuClusterMap {
+ VersalGicMap gic;
+ /*
+ * true: one GIC per cluster.
+ * false: one GIC for all CPUs
+ */
+ bool per_cluster_gic;
+
+ const char *name;
+ const char *cpu_model;
+ size_t num_core;
+ size_t num_cluster;
+ uint32_t qemu_cluster_id;
+ bool dtb_expose;
-#define VERSAL_NUM_PMC_APB_IRQS 18
-#define NUM_OSPI_IRQ_LINES 3
+ struct {
+ uint64_t base;
+ uint64_t core_shift;
+ uint64_t cluster_shift;
+ } mp_affinity;
+
+ enum StartPoweredOffMode start_powered_off;
+} VersalCpuClusterMap;
+
+typedef struct VersalMap {
+ VersalMemMap ocm;
+
+ struct VersalDDRMap {
+ VersalMemMap chan[4];
+ size_t num_chan;
+ } ddr;
+
+ VersalCpuClusterMap apu;
+ VersalCpuClusterMap rpu;
+
+ VersalSimplePeriphMap uart[2];
+ size_t num_uart;
+
+ VersalSimplePeriphMap canfd[4];
+ size_t num_canfd;
+
+ VersalSimplePeriphMap sdhci[2];
+ size_t num_sdhci;
+
+ struct VersalGemMap {
+ VersalSimplePeriphMap map;
+ size_t num_prio_queue;
+ const char *phy_mode;
+ const uint32_t speed;
+ } gem[3];
+ size_t num_gem;
+
+ struct VersalZDMAMap {
+ const char *name;
+ VersalSimplePeriphMap map;
+ size_t num_chan;
+ uint64_t chan_stride;
+ int irq_stride;
+ } zdma[2];
+ size_t num_zdma;
+
+ struct VersalXramMap {
+ uint64_t mem;
+ uint64_t mem_stride;
+ uint64_t ctrl;
+ uint64_t ctrl_stride;
+ int irq;
+ size_t num;
+ } xram;
+
+ struct VersalUsbMap {
+ uint64_t xhci;
+ uint64_t ctrl;
+ int irq;
+ } usb[2];
+ size_t num_usb;
+
+ struct VersalEfuseMap {
+ uint64_t ctrl;
+ uint64_t cache;
+ int irq;
+ } efuse;
+
+ struct VersalOspiMap {
+ uint64_t ctrl;
+ uint64_t dac;
+ uint64_t dac_sz;
+ uint64_t dma_src;
+ uint64_t dma_dst;
+ int irq;
+ } ospi;
+
+ VersalSimplePeriphMap pmc_iou_slcr;
+ VersalSimplePeriphMap bbram;
+ VersalSimplePeriphMap trng;
+
+ struct VersalRtcMap {
+ VersalSimplePeriphMap map;
+ int alarm_irq;
+ int second_irq;
+ } rtc;
+
+ struct VersalCfuMap {
+ uint64_t cframe_base;
+ uint64_t cframe_stride;
+ uint64_t cfu_fdro;
+ uint64_t cframe_bcast_reg;
+ uint64_t cframe_bcast_fdri;
+ uint64_t cfu_apb;
+ uint64_t cfu_stream;
+ uint64_t cfu_stream_2;
+ uint64_t cfu_sfr;
+ int cfu_apb_irq;
+ int cframe_irq;
+ size_t num_cframe;
+ struct VersalCfuCframeCfg {
+ uint32_t blktype_frames[7];
+ } cframe_cfg[15];
+ } cfu;
+
+ VersalSimplePeriphMap crl;
+
+ /* reserved MMIO/IRQ space that can safely be used for virtio devices */
+ struct VersalReserved {
+ uint64_t mmio_start;
+ int irq_start;
+ int irq_num;
+ } reserved;
+} VersalMap;
+
+static const VersalMap VERSAL_MAP = {
+ .ocm = {
+ .addr = 0xfffc0000,
+ .size = 0x40000,
+ },
+
+ .ddr = {
+ .chan[0] = { .addr = 0x0, .size = 2 * GiB },
+ .chan[1] = { .addr = 0x800000000ull, .size = 32 * GiB },
+ .chan[2] = { .addr = 0xc00000000ull, .size = 256 * GiB },
+ .chan[3] = { .addr = 0x10000000000ull, .size = 734 * GiB },
+ .num_chan = 4,
+ },
+
+ .apu = {
+ .name = "apu",
+ .cpu_model = ARM_CPU_TYPE_NAME("cortex-a72"),
+ .num_cluster = 1,
+ .num_core = 2,
+ .qemu_cluster_id = 0,
+ .mp_affinity = {
+ .core_shift = ARM_AFF0_SHIFT,
+ .cluster_shift = ARM_AFF1_SHIFT,
+ },
+ .start_powered_off = SPO_SECONDARIES,
+ .dtb_expose = true,
+ .gic = {
+ .version = 3,
+ .dist = 0xf9000000,
+ .redist = 0xf9080000,
+ .num_irq = 192,
+ .has_its = true,
+ .its = 0xf9020000,
+ },
+ },
+
+ .rpu = {
+ .name = "rpu",
+ .cpu_model = ARM_CPU_TYPE_NAME("cortex-r5f"),
+ .num_cluster = 1,
+ .num_core = 2,
+ .qemu_cluster_id = 1,
+ .mp_affinity = {
+ .base = 0x100,
+ .core_shift = ARM_AFF0_SHIFT,
+ .cluster_shift = ARM_AFF1_SHIFT,
+ },
+ .start_powered_off = SPO_ALL,
+ .dtb_expose = false,
+ .gic = {
+ .version = 2,
+ .dist = 0xf9000000,
+ .cpu_iface = 0xf9001000,
+ .num_irq = 192,
+ },
+ },
+
+ .uart[0] = { 0xff000000, 18 },
+ .uart[1] = { 0xff010000, 19 },
+ .num_uart = 2,
+
+ .canfd[0] = { 0xff060000, 20 },
+ .canfd[1] = { 0xff070000, 21 },
+ .num_canfd = 2,
+
+ .sdhci[0] = { 0xf1040000, 126 },
+ .sdhci[1] = { 0xf1050000, 128 },
+ .num_sdhci = 2,
+
+ .gem[0] = { { 0xff0c0000, 56 }, 2, "rgmii-id", 1000 },
+ .gem[1] = { { 0xff0d0000, 58 }, 2, "rgmii-id", 1000 },
+ .num_gem = 2,
+
+ .zdma[0] = { "adma", { 0xffa80000, 60 }, 8, 0x10000, 1 },
+ .num_zdma = 1,
+
+ .xram = {
+ .num = 4,
+ .mem = 0xfe800000, .mem_stride = 1 * MiB,
+ .ctrl = 0xff8e0000, .ctrl_stride = 0x10000,
+ .irq = 79,
+ },
+
+ .usb[0] = { .xhci = 0xfe200000, .ctrl = 0xff9d0000, .irq = 22 },
+ .num_usb = 1,
+
+ .efuse = { .ctrl = 0xf1240000, .cache = 0xf1250000, .irq = 139 },
+
+ .ospi = {
+ .ctrl = 0xf1010000,
+ .dac = 0xc0000000, .dac_sz = 0x20000000,
+ .dma_src = 0xf1011000, .dma_dst = 0xf1011800,
+ .irq = 124,
+ },
+
+ .pmc_iou_slcr = { 0xf1060000, OR_IRQ(121, 0) },
+ .bbram = { 0xf11f0000, OR_IRQ(121, 1) },
+ .trng = { 0xf1230000, 141 },
+ .rtc = {
+ { 0xf12a0000, OR_IRQ(121, 2) },
+ .alarm_irq = 142, .second_irq = 143
+ },
+
+ .cfu = {
+ .cframe_base = 0xf12d0000, .cframe_stride = 0x1000,
+ .cframe_bcast_reg = 0xf12ee000, .cframe_bcast_fdri = 0xf12ef000,
+ .cfu_apb = 0xf12b0000, .cfu_sfr = 0xf12c1000,
+ .cfu_stream = 0xf12c0000, .cfu_stream_2 = 0xf1f80000,
+ .cfu_fdro = 0xf12c2000,
+ .cfu_apb_irq = 120, .cframe_irq = OR_IRQ(121, 3),
+ .num_cframe = 15,
+ .cframe_cfg = {
+ { { 34111, 3528, 12800, 11, 5, 1, 1 } },
+ { { 38498, 3841, 15361, 13, 7, 3, 1 } },
+ { { 38498, 3841, 15361, 13, 7, 3, 1 } },
+ { { 38498, 3841, 15361, 13, 7, 3, 1 } },
+ },
+ },
+
+ .crl = { 0xff5e0000, 10 },
+
+ .reserved = { 0xa0000000, 111, 8 },
+};
+
+static const VersalMap VERSAL2_MAP = {
+ .ocm = {
+ .addr = 0xbbe00000,
+ .size = 2 * MiB,
+ },
+
+ .ddr = {
+ .chan[0] = { .addr = 0x0, .size = 2046 * MiB },
+ .chan[1] = { .addr = 0x800000000ull, .size = 32 * GiB },
+ .chan[2] = { .addr = 0xc00000000ull, .size = 256 * GiB },
+ .chan[3] = { .addr = 0x10000000000ull, .size = 734 * GiB },
+ .num_chan = 4,
+ },
+
+ .apu = {
+ .name = "apu",
+ .cpu_model = ARM_CPU_TYPE_NAME("cortex-a78ae"),
+ .num_cluster = 4,
+ .num_core = 2,
+ .qemu_cluster_id = 0,
+ .mp_affinity = {
+ .base = 0x0, /* TODO: the MT bit should be set */
+ .core_shift = ARM_AFF1_SHIFT,
+ .cluster_shift = ARM_AFF2_SHIFT,
+ },
+ .start_powered_off = SPO_SECONDARIES,
+ .dtb_expose = true,
+ .gic = {
+ .version = 3,
+ .dist = 0xe2000000,
+ .redist = 0xe2060000,
+ .num_irq = 544,
+ .has_its = true,
+ .its = 0xe2040000,
+ },
+ },
+
+ .rpu = {
+ .name = "rpu",
+ .cpu_model = ARM_CPU_TYPE_NAME("cortex-r52"),
+ .num_cluster = 5,
+ .num_core = 2,
+ .qemu_cluster_id = 1,
+ .mp_affinity = {
+ .core_shift = ARM_AFF0_SHIFT,
+ .cluster_shift = ARM_AFF1_SHIFT,
+ },
+ .start_powered_off = SPO_ALL,
+ .dtb_expose = false,
+ .per_cluster_gic = true,
+ .gic = {
+ .version = 3,
+ .dist = 0x0,
+ .redist = 0x100000,
+ .num_irq = 288,
+ },
+ },
+
+ .uart[0] = { 0xf1920000, 25 },
+ .uart[1] = { 0xf1930000, 26 },
+ .num_uart = 2,
+
+ .canfd[0] = { 0xf19e0000, 27 },
+ .canfd[1] = { 0xf19f0000, 28 },
+ .canfd[2] = { 0xf1a00000, 95 },
+ .canfd[3] = { 0xf1a10000, 96 },
+ .num_canfd = 4,
+
+ .gem[0] = { { 0xf1a60000, 39 }, 2, "rgmii-id", 1000 },
+ .gem[1] = { { 0xf1a70000, 41 }, 2, "rgmii-id", 1000 },
+ .gem[2] = { { 0xed920000, 164 }, 4, "usxgmii", 10000 }, /* MMI 10Gb GEM */
+ .num_gem = 3,
+
+ .zdma[0] = { "adma", { 0xebd00000, 72 }, 8, 0x10000, 1 },
+ .zdma[1] = { "sdma", { 0xebd80000, 112 }, 8, 0x10000, 1 },
+ .num_zdma = 2,
+
+ .usb[0] = { .xhci = 0xf1b00000, .ctrl = 0xf1ee0000, .irq = 29 },
+ .usb[1] = { .xhci = 0xf1c00000, .ctrl = 0xf1ef0000, .irq = 34 },
+ .num_usb = 2,
+
+ .efuse = { .ctrl = 0xf1240000, .cache = 0xf1250000, .irq = 230 },
+
+ .ospi = {
+ .ctrl = 0xf1010000,
+ .dac = 0xc0000000, .dac_sz = 0x20000000,
+ .dma_src = 0xf1011000, .dma_dst = 0xf1011800,
+ .irq = 216,
+ },
+
+ .sdhci[0] = { 0xf1040000, 218 },
+ .sdhci[1] = { 0xf1050000, 220 }, /* eMMC */
+ .num_sdhci = 2,
+
+ .pmc_iou_slcr = { 0xf1060000, 222 },
+ .bbram = { 0xf11f0000, PPU1_OR_IRQ(18, 0) },
+ .crl = { 0xeb5e0000 },
+ .trng = { 0xf1230000, 233 },
+ .rtc = {
+ { 0xf12a0000, PPU1_OR_IRQ(18, 1) },
+ .alarm_irq = 200, .second_irq = 201
+ },
+
+ .cfu = {
+ .cframe_base = 0xf12d0000, .cframe_stride = 0x1000,
+ .cframe_bcast_reg = 0xf12ee000, .cframe_bcast_fdri = 0xf12ef000,
+ .cfu_apb = 0xf12b0000, .cfu_sfr = 0xf12c1000,
+ .cfu_stream = 0xf12c0000, .cfu_stream_2 = 0xf1f80000,
+ .cfu_fdro = 0xf12c2000,
+ .cfu_apb_irq = 235, .cframe_irq = EAM_IRQ(7),
+ },
+
+ .reserved = { 0xf5e00000, 270, 8 },
+};
-static void versal_create_apu_cpus(Versal *s)
+static const VersalMap *VERSION_TO_MAP[] = {
+ [VERSAL_VER_VERSAL] = &VERSAL_MAP,
+ [VERSAL_VER_VERSAL2] = &VERSAL2_MAP,
+};
+
+static inline VersalVersion versal_get_version(Versal *s)
{
- int i;
+ return XLNX_VERSAL_BASE_GET_CLASS(s)->version;
+}
- object_initialize_child(OBJECT(s), "apu-cluster", &s->fpd.apu.cluster,
- TYPE_CPU_CLUSTER);
- qdev_prop_set_uint32(DEVICE(&s->fpd.apu.cluster), "cluster-id", 0);
-
- for (i = 0; i < ARRAY_SIZE(s->fpd.apu.cpu); i++) {
- Object *obj;
-
- object_initialize_child(OBJECT(&s->fpd.apu.cluster),
- "apu-cpu[*]", &s->fpd.apu.cpu[i],
- XLNX_VERSAL_ACPU_TYPE);
- obj = OBJECT(&s->fpd.apu.cpu[i]);
- if (i) {
- /* Secondary CPUs start in powered-down state */
- object_property_set_bool(obj, "start-powered-off", true,
- &error_abort);
- }
+static inline const VersalMap *versal_get_map(Versal *s)
+{
+ return VERSION_TO_MAP[versal_get_version(s)];
+}
- object_property_set_int(obj, "core-count", ARRAY_SIZE(s->fpd.apu.cpu),
- &error_abort);
- object_property_set_link(obj, "memory", OBJECT(&s->fpd.apu.mr),
- &error_abort);
- qdev_realize(DEVICE(obj), NULL, &error_fatal);
+static inline Object *versal_get_child(Versal *s, const char *child)
+{
+ return object_resolve_path_at(OBJECT(s), child);
+}
+
+static inline Object *versal_get_child_idx(Versal *s, const char *child,
+ size_t idx)
+{
+ g_autofree char *n = g_strdup_printf("%s[%zu]", child, idx);
+
+ return versal_get_child(s, n);
+}
+
+/*
+ * The SoC embeds multiple GICs. They all receives the same IRQ lines at the
+ * same index. This function creates a TYPE_SPLIT_IRQ device to fan out the
+ * given IRQ input to all the GICs.
+ *
+ * The TYPE_SPLIT_IRQ devices lie in the /soc/irq-splits QOM container
+ */
+static qemu_irq versal_get_gic_irq(Versal *s, int irq_idx)
+{
+ DeviceState *split;
+ Object *container = versal_get_child(s, "irq-splits");
+ int idx = FIELD_EX32(irq_idx, VERSAL_IRQ, IRQ);
+ g_autofree char *name = g_strdup_printf("irq[%d]", idx);
+
+ split = DEVICE(object_resolve_path_at(container, name));
+
+ if (split == NULL) {
+ size_t i;
+
+ split = qdev_new(TYPE_SPLIT_IRQ);
+ qdev_prop_set_uint16(split, "num-lines", s->intc->len);
+ object_property_add_child(container, name, OBJECT(split));
+ qdev_realize_and_unref(split, NULL, &error_abort);
+
+ for (i = 0; i < s->intc->len; i++) {
+ DeviceState *gic;
+
+ gic = g_array_index(s->intc, DeviceState *, i);
+ qdev_connect_gpio_out(split, i, qdev_get_gpio_in(gic, idx));
+ }
+ } else {
+ g_assert(FIELD_EX32(irq_idx, VERSAL_IRQ, ORED));
}
- qdev_realize(DEVICE(&s->fpd.apu.cluster), NULL, &error_fatal);
+ return qdev_get_gpio_in(split, 0);
}
-static void versal_create_apu_gic(Versal *s, qemu_irq *pic)
+/*
+ * When the R_VERSAL_IRQ_ORED flag is set on an IRQ descriptor, this function is
+ * used to return the corresponding or gate input IRQ. The or gate is created if
+ * not already existant.
+ *
+ * Or gates are placed under the /soc/irq-or-gates QOM container.
+ */
+static qemu_irq versal_get_irq_or_gate_in(Versal *s, int irq_idx,
+ qemu_irq target_irq)
{
- static const uint64_t addrs[] = {
- MM_GIC_APU_DIST_MAIN,
- MM_GIC_APU_REDIST_0
+ static const char *TARGET_STR[] = {
+ [IRQ_TARGET_GIC] = "gic",
+ [IRQ_TARGET_PPU1] = "ppu1",
+ [IRQ_TARGET_EAM] = "eam",
};
- SysBusDevice *gicbusdev;
- DeviceState *gicdev;
- QList *redist_region_count;
- int nr_apu_cpus = ARRAY_SIZE(s->fpd.apu.cpu);
- int i;
- object_initialize_child(OBJECT(s), "apu-gic", &s->fpd.apu.gic,
- gicv3_class_name());
- gicbusdev = SYS_BUS_DEVICE(&s->fpd.apu.gic);
- gicdev = DEVICE(&s->fpd.apu.gic);
- qdev_prop_set_uint32(gicdev, "revision", 3);
- qdev_prop_set_uint32(gicdev, "num-cpu", nr_apu_cpus);
- qdev_prop_set_uint32(gicdev, "num-irq", XLNX_VERSAL_NR_IRQS + 32);
+ VersalIrqTarget target;
+ Object *container = versal_get_child(s, "irq-or-gates");
+ DeviceState *dev;
+ g_autofree char *name;
+ int idx, or_idx;
+
+ idx = FIELD_EX32(irq_idx, VERSAL_IRQ, IRQ);
+ or_idx = FIELD_EX32(irq_idx, VERSAL_IRQ, OR_IDX);
+ target = FIELD_EX32(irq_idx, VERSAL_IRQ, TARGET);
+
+ name = g_strdup_printf("%s-irq[%d]", TARGET_STR[target], idx);
+ dev = DEVICE(object_resolve_path_at(container, name));
+
+ if (dev == NULL) {
+ dev = qdev_new(TYPE_OR_IRQ);
+ object_property_add_child(container, name, OBJECT(dev));
+ qdev_prop_set_uint16(dev, "num-lines", 1 << R_VERSAL_IRQ_OR_IDX_LENGTH);
+ qdev_realize_and_unref(dev, NULL, &error_abort);
+ qdev_connect_gpio_out(dev, 0, target_irq);
+ }
- redist_region_count = qlist_new();
- qlist_append_int(redist_region_count, nr_apu_cpus);
- qdev_prop_set_array(gicdev, "redist-region-count", redist_region_count);
+ return qdev_get_gpio_in(dev, or_idx);
+}
+
+static qemu_irq versal_get_irq(Versal *s, int irq_idx)
+{
+ VersalIrqTarget target;
+ qemu_irq irq;
+ bool ored;
+
+ target = FIELD_EX32(irq_idx, VERSAL_IRQ, TARGET);
+ ored = FIELD_EX32(irq_idx, VERSAL_IRQ, ORED);
- qdev_prop_set_bit(gicdev, "has-security-extensions", true);
+ switch (target) {
+ case IRQ_TARGET_EAM:
+ /* EAM not implemented */
+ return NULL;
- sysbus_realize(SYS_BUS_DEVICE(&s->fpd.apu.gic), &error_fatal);
+ case IRQ_TARGET_PPU1:
+ /* PPU1 CPU not implemented */
+ return NULL;
- for (i = 0; i < ARRAY_SIZE(addrs); i++) {
- MemoryRegion *mr;
+ case IRQ_TARGET_GIC:
+ irq = versal_get_gic_irq(s, irq_idx);
+ break;
- mr = sysbus_mmio_get_region(gicbusdev, i);
- memory_region_add_subregion(&s->fpd.apu.mr, addrs[i], mr);
+ default:
+ g_assert_not_reached();
}
- for (i = 0; i < nr_apu_cpus; i++) {
- DeviceState *cpudev = DEVICE(&s->fpd.apu.cpu[i]);
- int ppibase = XLNX_VERSAL_NR_IRQS + i * GIC_INTERNAL + GIC_NR_SGIS;
- qemu_irq maint_irq;
- int ti;
- /* Mapping from the output timer irq lines from the CPU to the
- * GIC PPI inputs.
- */
- const int timer_irq[] = {
- [GTIMER_PHYS] = VERSAL_TIMER_NS_EL1_IRQ,
- [GTIMER_VIRT] = VERSAL_TIMER_VIRT_IRQ,
- [GTIMER_HYP] = VERSAL_TIMER_NS_EL2_IRQ,
- [GTIMER_SEC] = VERSAL_TIMER_S_EL1_IRQ,
- };
+ if (ored) {
+ irq = versal_get_irq_or_gate_in(s, irq_idx, irq);
+ }
+
+ return irq;
+}
+
+static void versal_sysbus_connect_irq(Versal *s, SysBusDevice *sbd,
+ int sbd_idx, int irq_idx)
+{
+ qemu_irq irq = versal_get_irq(s, irq_idx);
+
+ if (irq == NULL) {
+ return;
+ }
+
+ sysbus_connect_irq(sbd, sbd_idx, irq);
+}
+
+static void versal_qdev_connect_gpio_out(Versal *s, DeviceState *dev,
+ int dev_idx, int irq_idx)
+{
+ qemu_irq irq = versal_get_irq(s, irq_idx);
+
+ if (irq == NULL) {
+ return;
+ }
+
+ qdev_connect_gpio_out(dev, dev_idx, irq);
+}
+
+static inline char *versal_fdt_add_subnode(Versal *s, const char *path,
+ uint64_t at, const char *compat,
+ size_t compat_sz)
+{
+ char *p;
+
+ p = g_strdup_printf("%s@%" PRIx64, path, at);
+ qemu_fdt_add_subnode(s->cfg.fdt, p);
+
+ if (!strncmp(compat, "memory", compat_sz)) {
+ qemu_fdt_setprop(s->cfg.fdt, p, "device_type", compat, compat_sz);
+ } else {
+ qemu_fdt_setprop(s->cfg.fdt, p, "compatible", compat, compat_sz);
+ }
+
+ return p;
+}
+
+static inline char *versal_fdt_add_simple_subnode(Versal *s, const char *path,
+ uint64_t addr, uint64_t len,
+ const char *compat,
+ size_t compat_sz)
+{
+ char *p = versal_fdt_add_subnode(s, path, addr, compat, compat_sz);
+
+ qemu_fdt_setprop_sized_cells(s->cfg.fdt, p, "reg", 2, addr, 2, len);
+ return p;
+}
+
+static inline DeviceState *create_or_gate(Versal *s, Object *parent,
+ const char *name, uint16_t num_lines,
+ int irq_idx)
+{
+ DeviceState *or;
+
+ or = qdev_new(TYPE_OR_IRQ);
+ qdev_prop_set_uint16(or, "num-lines", num_lines);
+ object_property_add_child(parent, name, OBJECT(or));
+ qdev_realize_and_unref(or, NULL, &error_abort);
+ versal_qdev_connect_gpio_out(s, or, 0, irq_idx);
+
+ return or;
+}
+
+static MemoryRegion *create_cpu_mr(Versal *s, DeviceState *cluster,
+ const VersalCpuClusterMap *map)
+{
+ MemoryRegion *mr, *root_alias;
+ char *name;
+
+ mr = g_new(MemoryRegion, 1);
+ name = g_strdup_printf("%s-mr", map->name);
+ memory_region_init(mr, OBJECT(cluster), name, UINT64_MAX);
+ g_free(name);
+
+ root_alias = g_new(MemoryRegion, 1);
+ name = g_strdup_printf("ps-alias-for-%s", map->name);
+ memory_region_init_alias(root_alias, OBJECT(cluster), name,
+ &s->mr_ps, 0, UINT64_MAX);
+ g_free(name);
+ memory_region_add_subregion(mr, 0, root_alias);
+
+ return mr;
+}
+
+static void versal_create_gic_its(Versal *s,
+ const VersalCpuClusterMap *map,
+ DeviceState *gic,
+ MemoryRegion *mr,
+ char *gic_node)
+{
+ DeviceState *dev;
+ SysBusDevice *sbd;
+ g_autofree char *node_pat = NULL, *node = NULL;
+ const char compatible[] = "arm,gic-v3-its";
+
+ if (map->gic.version != 3) {
+ return;
+ }
+
+ if (!map->gic.has_its) {
+ return;
+ }
+
+ dev = qdev_new(TYPE_ARM_GICV3_ITS);
+ sbd = SYS_BUS_DEVICE(dev);
+
+ object_property_add_child(OBJECT(gic), "its", OBJECT(dev));
+ object_property_set_link(OBJECT(dev), "parent-gicv3", OBJECT(gic),
+ &error_abort);
+
+ sysbus_realize_and_unref(sbd, &error_abort);
+
+ memory_region_add_subregion(mr, map->gic.its,
+ sysbus_mmio_get_region(sbd, 0));
+
+ if (!map->dtb_expose) {
+ return;
+ }
+
+ qemu_fdt_setprop(s->cfg.fdt, gic_node, "ranges", NULL, 0);
+ qemu_fdt_setprop_cell(s->cfg.fdt, gic_node, "#address-cells", 2);
+ qemu_fdt_setprop_cell(s->cfg.fdt, gic_node, "#size-cells", 2);
+
+ node_pat = g_strdup_printf("%s/its", gic_node);
+ node = versal_fdt_add_simple_subnode(s, node_pat, map->gic.its, 0x20000,
+ compatible, sizeof(compatible));
+ qemu_fdt_setprop(s->cfg.fdt, node, "msi-controller", NULL, 0);
+ qemu_fdt_setprop_cell(s->cfg.fdt, node, "#msi-cells", 1);
+}
+
+static DeviceState *versal_create_gic(Versal *s,
+ const VersalCpuClusterMap *map,
+ MemoryRegion *mr,
+ int first_cpu_idx,
+ size_t num_cpu)
+{
+ DeviceState *dev;
+ SysBusDevice *sbd;
+ g_autofree char *node = NULL;
+ g_autofree char *name = NULL;
+ const char gicv3_compat[] = "arm,gic-v3";
+ const char gicv2_compat[] = "arm,cortex-a15-gic";
+
+ switch (map->gic.version) {
+ case 2:
+ dev = qdev_new(gic_class_name());
+ break;
+
+ case 3:
+ dev = qdev_new(gicv3_class_name());
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ name = g_strdup_printf("%s-gic[*]", map->name);
+ object_property_add_child(OBJECT(s), name, OBJECT(dev));
+ sbd = SYS_BUS_DEVICE(dev);
+ qdev_prop_set_uint32(dev, "revision", map->gic.version);
+ qdev_prop_set_uint32(dev, "num-cpu", num_cpu);
+ qdev_prop_set_uint32(dev, "num-irq", map->gic.num_irq + 32);
+ qdev_prop_set_bit(dev, "has-security-extensions", true);
+ qdev_prop_set_uint32(dev, "first-cpu-index", first_cpu_idx);
+
+ if (map->gic.version == 3) {
+ QList *redist_region_count;
+
+ redist_region_count = qlist_new();
+ qlist_append_int(redist_region_count, num_cpu);
+ qdev_prop_set_array(dev, "redist-region-count", redist_region_count);
+ qdev_prop_set_bit(dev, "has-lpi", map->gic.has_its);
+ object_property_set_link(OBJECT(dev), "sysmem", OBJECT(mr),
+ &error_abort);
+ }
+
+ sysbus_realize_and_unref(sbd, &error_fatal);
+
+ memory_region_add_subregion(mr, map->gic.dist,
+ sysbus_mmio_get_region(sbd, 0));
+
+ if (map->gic.version == 3) {
+ memory_region_add_subregion(mr, map->gic.redist,
+ sysbus_mmio_get_region(sbd, 1));
+ } else {
+ memory_region_add_subregion(mr, map->gic.cpu_iface,
+ sysbus_mmio_get_region(sbd, 1));
+ }
+
+ if (map->dtb_expose) {
+ if (map->gic.version == 3) {
+ node = versal_fdt_add_subnode(s, "/gic", map->gic.dist,
+ gicv3_compat,
+ sizeof(gicv3_compat));
+ qemu_fdt_setprop_sized_cells(s->cfg.fdt, node, "reg",
+ 2, map->gic.dist,
+ 2, 0x10000,
+ 2, map->gic.redist,
+ 2, GICV3_REDIST_SIZE * num_cpu);
+ } else {
+ node = versal_fdt_add_subnode(s, "/gic", map->gic.dist,
+ gicv2_compat,
+ sizeof(gicv2_compat));
+ qemu_fdt_setprop_sized_cells(s->cfg.fdt, node, "reg",
+ 2, map->gic.dist,
+ 2, 0x1000,
+ 2, map->gic.cpu_iface,
+ 2, 0x1000);
+ }
+
+ qemu_fdt_setprop_cell(s->cfg.fdt, node, "phandle", s->phandle.gic);
+ qemu_fdt_setprop_cell(s->cfg.fdt, node, "#interrupt-cells", 3);
+ qemu_fdt_setprop_cells(s->cfg.fdt, node, "interrupts",
+ GIC_FDT_IRQ_TYPE_PPI,
+ INTID_TO_PPI(ARCH_GIC_MAINT_IRQ),
+ GIC_FDT_IRQ_FLAGS_LEVEL_HI);
+ qemu_fdt_setprop(s->cfg.fdt, node, "interrupt-controller", NULL, 0);
+ }
+
+ versal_create_gic_its(s, map, dev, mr, node);
+
+ g_array_append_val(s->intc, dev);
+
+ return dev;
+}
+
+static void connect_gic_to_cpu(const VersalCpuClusterMap *map,
+ DeviceState *gic, DeviceState *cpu, size_t idx,
+ size_t num_cpu)
+{
+ SysBusDevice *sbd = SYS_BUS_DEVICE(gic);
+ int ppibase = map->gic.num_irq + idx * GIC_INTERNAL + GIC_NR_SGIS;
+ int ti;
+ bool has_gtimer;
+ /*
+ * Mapping from the output timer irq lines from the CPU to the
+ * GIC PPI inputs.
+ */
+ const int timer_irq[] = {
+ [GTIMER_PHYS] = INTID_TO_PPI(ARCH_TIMER_NS_EL1_IRQ),
+ [GTIMER_VIRT] = INTID_TO_PPI(ARCH_TIMER_VIRT_IRQ),
+ [GTIMER_HYP] = INTID_TO_PPI(ARCH_TIMER_NS_EL2_IRQ),
+ [GTIMER_SEC] = INTID_TO_PPI(ARCH_TIMER_S_EL1_IRQ),
+ };
+
+ has_gtimer = arm_feature(&ARM_CPU(cpu)->env, ARM_FEATURE_GENERIC_TIMER);
+
+ if (has_gtimer) {
for (ti = 0; ti < ARRAY_SIZE(timer_irq); ti++) {
- qdev_connect_gpio_out(cpudev, ti,
- qdev_get_gpio_in(gicdev,
+ qdev_connect_gpio_out(cpu, ti,
+ qdev_get_gpio_in(gic,
ppibase + timer_irq[ti]));
}
- maint_irq = qdev_get_gpio_in(gicdev,
- ppibase + VERSAL_GIC_MAINT_IRQ);
- qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt",
- 0, maint_irq);
- sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ));
- sysbus_connect_irq(gicbusdev, i + nr_apu_cpus,
- qdev_get_gpio_in(cpudev, ARM_CPU_FIQ));
- sysbus_connect_irq(gicbusdev, i + 2 * nr_apu_cpus,
- qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ));
- sysbus_connect_irq(gicbusdev, i + 3 * nr_apu_cpus,
- qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ));
}
- for (i = 0; i < XLNX_VERSAL_NR_IRQS; i++) {
- pic[i] = qdev_get_gpio_in(gicdev, i);
+ if (map->gic.version == 3) {
+ qemu_irq maint_irq;
+ int maint_idx = ppibase + INTID_TO_PPI(ARCH_GIC_MAINT_IRQ);
+
+ maint_irq = qdev_get_gpio_in(gic, maint_idx);
+ qdev_connect_gpio_out_named(cpu, "gicv3-maintenance-interrupt",
+ 0, maint_irq);
}
+
+ sysbus_connect_irq(sbd, idx, qdev_get_gpio_in(cpu, ARM_CPU_IRQ));
+ sysbus_connect_irq(sbd, idx + num_cpu,
+ qdev_get_gpio_in(cpu, ARM_CPU_FIQ));
+ sysbus_connect_irq(sbd, idx + 2 * num_cpu,
+ qdev_get_gpio_in(cpu, ARM_CPU_VIRQ));
+ sysbus_connect_irq(sbd, idx + 3 * num_cpu,
+ qdev_get_gpio_in(cpu, ARM_CPU_VFIQ));
}
-static void versal_create_rpu_cpus(Versal *s)
+static inline void versal_create_and_connect_gic(Versal *s,
+ const VersalCpuClusterMap *map,
+ MemoryRegion *mr,
+ DeviceState **cpus,
+ size_t num_cpu)
{
- int i;
+ DeviceState *gic;
+ int first_cpu_idx;
+ size_t i;
- object_initialize_child(OBJECT(s), "rpu-cluster", &s->lpd.rpu.cluster,
- TYPE_CPU_CLUSTER);
- qdev_prop_set_uint32(DEVICE(&s->lpd.rpu.cluster), "cluster-id", 1);
+ first_cpu_idx = CPU(cpus[0])->cpu_index;
+ gic = versal_create_gic(s, map, mr, first_cpu_idx, num_cpu);
- for (i = 0; i < ARRAY_SIZE(s->lpd.rpu.cpu); i++) {
- Object *obj;
+ for (i = 0; i < num_cpu; i++) {
+ connect_gic_to_cpu(map, gic, cpus[i], i, num_cpu);
+ }
+}
- object_initialize_child(OBJECT(&s->lpd.rpu.cluster),
- "rpu-cpu[*]", &s->lpd.rpu.cpu[i],
- XLNX_VERSAL_RCPU_TYPE);
- obj = OBJECT(&s->lpd.rpu.cpu[i]);
- object_property_set_bool(obj, "start-powered-off", true,
- &error_abort);
+static DeviceState *versal_create_cpu(Versal *s,
+ const VersalCpuClusterMap *map,
+ DeviceState *qemu_cluster,
+ MemoryRegion *cpu_mr,
+ size_t cluster_idx,
+ size_t core_idx)
+{
+ DeviceState *cpu = qdev_new(map->cpu_model);
+ ARMCPU *arm_cpu = ARM_CPU(cpu);
+ Object *obj = OBJECT(cpu);
+ uint64_t affinity;
+ bool start_off;
+ size_t idx = cluster_idx * map->num_core + core_idx;
+ g_autofree char *name;
+ g_autofree char *node = NULL;
+
+ affinity = map->mp_affinity.base;
+ affinity |= (cluster_idx & 0xff) << map->mp_affinity.cluster_shift;
+ affinity |= (core_idx & 0xff) << map->mp_affinity.core_shift;
+
+ start_off = map->start_powered_off == SPO_ALL
+ || ((map->start_powered_off == SPO_SECONDARIES)
+ && (cluster_idx || core_idx));
+
+ name = g_strdup_printf("%s[*]", map->name);
+ object_property_add_child(OBJECT(qemu_cluster), name, obj);
+ object_property_set_bool(obj, "start-powered-off", start_off,
+ &error_abort);
+ qdev_prop_set_uint64(cpu, "mp-affinity", affinity);
+ qdev_prop_set_int32(cpu, "core-count", map->num_core);
+ object_property_set_link(obj, "memory", OBJECT(cpu_mr), &error_abort);
+ qdev_realize_and_unref(cpu, NULL, &error_fatal);
- object_property_set_int(obj, "mp-affinity", 0x100 | i, &error_abort);
- object_property_set_int(obj, "core-count", ARRAY_SIZE(s->lpd.rpu.cpu),
- &error_abort);
- object_property_set_link(obj, "memory", OBJECT(&s->lpd.rpu.mr),
- &error_abort);
- qdev_realize(DEVICE(obj), NULL, &error_fatal);
+ if (!map->dtb_expose) {
+ return cpu;
}
- qdev_realize(DEVICE(&s->lpd.rpu.cluster), NULL, &error_fatal);
+ node = versal_fdt_add_subnode(s, "/cpus/cpu", idx,
+ arm_cpu->dtb_compatible,
+ strlen(arm_cpu->dtb_compatible) + 1);
+ qemu_fdt_setprop_cell(s->cfg.fdt, node, "reg",
+ arm_cpu_mp_affinity(arm_cpu) & ARM64_AFFINITY_MASK);
+ qemu_fdt_setprop_string(s->cfg.fdt, node, "device_type", "cpu");
+ qemu_fdt_setprop_string(s->cfg.fdt, node, "enable-method", "psci");
+
+ return cpu;
}
-static void versal_create_uarts(Versal *s, qemu_irq *pic)
+static void versal_create_cpu_cluster(Versal *s, const VersalCpuClusterMap *map)
{
- int i;
+ size_t i, j;
+ DeviceState *cluster;
+ MemoryRegion *mr;
+ char *name;
+ g_autofree DeviceState **cpus;
+ const char compatible[] = "arm,armv8-timer";
+ bool has_gtimer;
- for (i = 0; i < ARRAY_SIZE(s->lpd.iou.uart); i++) {
- static const int irqs[] = { VERSAL_UART0_IRQ_0, VERSAL_UART1_IRQ_0};
- static const uint64_t addrs[] = { MM_UART0, MM_UART1 };
- char *name = g_strdup_printf("uart%d", i);
- DeviceState *dev;
- MemoryRegion *mr;
+ cluster = qdev_new(TYPE_CPU_CLUSTER);
+ name = g_strdup_printf("%s-cluster", map->name);
+ object_property_add_child(OBJECT(s), name, OBJECT(cluster));
+ g_free(name);
+ qdev_prop_set_uint32(cluster, "cluster-id", map->qemu_cluster_id);
- object_initialize_child(OBJECT(s), name, &s->lpd.iou.uart[i],
- TYPE_PL011);
- dev = DEVICE(&s->lpd.iou.uart[i]);
- qdev_prop_set_chr(dev, "chardev", serial_hd(i));
- sysbus_realize(SYS_BUS_DEVICE(dev), &error_fatal);
+ mr = create_cpu_mr(s, cluster, map);
- mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
- memory_region_add_subregion(&s->mr_ps, addrs[i], mr);
+ cpus = g_new(DeviceState *, map->num_cluster * map->num_core);
- sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[irqs[i]]);
- g_free(name);
+ if (map->dtb_expose) {
+ qemu_fdt_add_subnode(s->cfg.fdt, "/cpus");
+ qemu_fdt_setprop_cell(s->cfg.fdt, "/cpus", "#size-cells", 0);
+ qemu_fdt_setprop_cell(s->cfg.fdt, "/cpus", "#address-cells", 1);
+ }
+
+ for (i = 0; i < map->num_cluster; i++) {
+ for (j = 0; j < map->num_core; j++) {
+ DeviceState *cpu = versal_create_cpu(s, map, cluster, mr, i, j);
+
+ cpus[i * map->num_core + j] = cpu;
+ }
+
+ if (map->per_cluster_gic) {
+ versal_create_and_connect_gic(s, map, mr, &cpus[i * map->num_core],
+ map->num_core);
+ }
+ }
+
+ qdev_realize_and_unref(cluster, NULL, &error_fatal);
+
+ if (!map->per_cluster_gic) {
+ versal_create_and_connect_gic(s, map, mr, cpus,
+ map->num_cluster * map->num_core);
+ }
+
+ has_gtimer = arm_feature(&ARM_CPU(cpus[0])->env, ARM_FEATURE_GENERIC_TIMER);
+ if (map->dtb_expose && has_gtimer) {
+ qemu_fdt_add_subnode(s->cfg.fdt, "/timer");
+ qemu_fdt_setprop_cells(s->cfg.fdt, "/timer", "interrupts",
+ GIC_FDT_IRQ_TYPE_PPI,
+ INTID_TO_PPI(ARCH_TIMER_S_EL1_IRQ),
+ GIC_FDT_IRQ_FLAGS_LEVEL_HI,
+ GIC_FDT_IRQ_TYPE_PPI,
+ INTID_TO_PPI(ARCH_TIMER_NS_EL1_IRQ),
+ GIC_FDT_IRQ_FLAGS_LEVEL_HI,
+ GIC_FDT_IRQ_TYPE_PPI,
+ INTID_TO_PPI(ARCH_TIMER_VIRT_IRQ),
+ GIC_FDT_IRQ_FLAGS_LEVEL_HI,
+ GIC_FDT_IRQ_TYPE_PPI,
+ INTID_TO_PPI(ARCH_TIMER_NS_EL2_IRQ),
+ GIC_FDT_IRQ_FLAGS_LEVEL_HI);
+ qemu_fdt_setprop(s->cfg.fdt, "/timer", "compatible",
+ compatible, sizeof(compatible));
}
}
-static void versal_create_canfds(Versal *s, qemu_irq *pic)
+static void versal_create_uart(Versal *s,
+ const VersalSimplePeriphMap *map,
+ int chardev_idx)
{
- int i;
- uint32_t irqs[] = { VERSAL_CANFD0_IRQ_0, VERSAL_CANFD1_IRQ_0};
- uint64_t addrs[] = { MM_CANFD0, MM_CANFD1 };
+ DeviceState *dev;
+ MemoryRegion *mr;
+ g_autofree char *node;
+ g_autofree char *alias;
+ const char compatible[] = "arm,pl011\0arm,sbsa-uart";
+ const char clocknames[] = "uartclk\0apb_pclk";
+
+ dev = qdev_new(TYPE_PL011);
+ object_property_add_child(OBJECT(s), "uart[*]", OBJECT(dev));
+ qdev_prop_set_chr(dev, "chardev", serial_hd(chardev_idx));
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
- for (i = 0; i < ARRAY_SIZE(s->lpd.iou.canfd); i++) {
- char *name = g_strdup_printf("canfd%d", i);
- SysBusDevice *sbd;
- MemoryRegion *mr;
+ mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
+ memory_region_add_subregion(&s->mr_ps, map->addr, mr);
+
+ versal_sysbus_connect_irq(s, SYS_BUS_DEVICE(dev), 0, map->irq);
+
+ node = versal_fdt_add_simple_subnode(s, "/uart", map->addr, 0x1000,
+ compatible, sizeof(compatible));
+ qemu_fdt_setprop_cell(s->cfg.fdt, node, "current-speed", 115200);
+ qemu_fdt_setprop_cells(s->cfg.fdt, node, "clocks",
+ s->phandle.clk_125mhz, s->phandle.clk_125mhz);
+ qemu_fdt_setprop(s->cfg.fdt, node, "clock-names", clocknames,
+ sizeof(clocknames));
+ qemu_fdt_setprop_cells(s->cfg.fdt, node, "interrupts",
+ GIC_FDT_IRQ_TYPE_SPI, map->irq,
+ GIC_FDT_IRQ_FLAGS_LEVEL_HI);
+ qemu_fdt_setprop(s->cfg.fdt, node, "u-boot,dm-pre-reloc", NULL, 0);
+
+ alias = g_strdup_printf("serial%d", chardev_idx);
+ qemu_fdt_setprop_string(s->cfg.fdt, "/aliases", alias, node);
+
+ if (chardev_idx == 0) {
+ qemu_fdt_setprop_string(s->cfg.fdt, "/chosen", "stdout-path", node);
+ }
+}
- object_initialize_child(OBJECT(s), name, &s->lpd.iou.canfd[i],
- TYPE_XILINX_CANFD);
- sbd = SYS_BUS_DEVICE(&s->lpd.iou.canfd[i]);
+static void versal_create_canfd(Versal *s, const VersalSimplePeriphMap *map,
+ CanBusState *bus)
+{
+ SysBusDevice *sbd;
+ MemoryRegion *mr;
+ g_autofree char *node;
+ const char compatible[] = "xlnx,canfd-2.0";
+ const char clocknames[] = "can_clk\0s_axi_aclk";
- object_property_set_int(OBJECT(&s->lpd.iou.canfd[i]), "ext_clk_freq",
- XLNX_VERSAL_CANFD_REF_CLK , &error_abort);
+ sbd = SYS_BUS_DEVICE(qdev_new(TYPE_XILINX_CANFD));
+ object_property_add_child(OBJECT(s), "canfd[*]", OBJECT(sbd));
- object_property_set_link(OBJECT(&s->lpd.iou.canfd[i]), "canfdbus",
- OBJECT(s->lpd.iou.canbus[i]),
- &error_abort);
+ object_property_set_int(OBJECT(sbd), "ext_clk_freq",
+ 25 * 1000 * 1000 , &error_abort);
- sysbus_realize(sbd, &error_fatal);
+ object_property_set_link(OBJECT(sbd), "canfdbus", OBJECT(bus),
+ &error_abort);
- mr = sysbus_mmio_get_region(sbd, 0);
- memory_region_add_subregion(&s->mr_ps, addrs[i], mr);
+ sysbus_realize_and_unref(sbd, &error_fatal);
- sysbus_connect_irq(sbd, 0, pic[irqs[i]]);
- g_free(name);
- }
+ mr = sysbus_mmio_get_region(sbd, 0);
+ memory_region_add_subregion(&s->mr_ps, map->addr, mr);
+
+ versal_sysbus_connect_irq(s, sbd, 0, map->irq);
+
+ node = versal_fdt_add_simple_subnode(s, "/canfd", map->addr, 0x10000,
+ compatible, sizeof(compatible));
+ qemu_fdt_setprop_cell(s->cfg.fdt, node, "rx-fifo-depth", 0x40);
+ qemu_fdt_setprop_cell(s->cfg.fdt, node, "tx-mailbox-count", 0x20);
+ qemu_fdt_setprop_cells(s->cfg.fdt, node, "clocks",
+ s->phandle.clk_25mhz, s->phandle.clk_25mhz);
+ qemu_fdt_setprop(s->cfg.fdt, node, "clock-names",
+ clocknames, sizeof(clocknames));
+ qemu_fdt_setprop_cells(s->cfg.fdt, node, "interrupts",
+ GIC_FDT_IRQ_TYPE_SPI, map->irq,
+ GIC_FDT_IRQ_FLAGS_LEVEL_HI);
}
-static void versal_create_usbs(Versal *s, qemu_irq *pic)
+static void versal_create_usb(Versal *s,
+ const struct VersalUsbMap *map)
{
DeviceState *dev;
MemoryRegion *mr;
+ g_autofree char *node, *subnode;
+ const char clocknames[] = "bus_clk\0ref_clk";
+ const char irq_name[] = "dwc_usb3";
+ const char compat_versal_dwc3[] = "xlnx,versal-dwc3";
+ const char compat_dwc3[] = "snps,dwc3";
- object_initialize_child(OBJECT(s), "usb2", &s->lpd.iou.usb,
- TYPE_XILINX_VERSAL_USB2);
- dev = DEVICE(&s->lpd.iou.usb);
+ dev = qdev_new(TYPE_XILINX_VERSAL_USB2);
+ object_property_add_child(OBJECT(s), "usb[*]", OBJECT(dev));
object_property_set_link(OBJECT(dev), "dma", OBJECT(&s->mr_ps),
&error_abort);
qdev_prop_set_uint32(dev, "intrs", 1);
qdev_prop_set_uint32(dev, "slots", 2);
- sysbus_realize(SYS_BUS_DEVICE(dev), &error_fatal);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
- memory_region_add_subregion(&s->mr_ps, MM_USB_0, mr);
+ memory_region_add_subregion(&s->mr_ps, map->xhci, mr);
- sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[VERSAL_USB0_IRQ_0]);
+ versal_sysbus_connect_irq(s, SYS_BUS_DEVICE(dev), 0, map->irq);
mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
- memory_region_add_subregion(&s->mr_ps, MM_USB2_CTRL_REGS, mr);
+ memory_region_add_subregion(&s->mr_ps, map->ctrl, mr);
+
+ node = versal_fdt_add_simple_subnode(s, "/usb", map->ctrl, 0x10000,
+ compat_versal_dwc3,
+ sizeof(compat_versal_dwc3));
+ qemu_fdt_setprop(s->cfg.fdt, node, "clock-names",
+ clocknames, sizeof(clocknames));
+ qemu_fdt_setprop_cells(s->cfg.fdt, node, "clocks",
+ s->phandle.clk_25mhz, s->phandle.clk_125mhz);
+ qemu_fdt_setprop(s->cfg.fdt, node, "ranges", NULL, 0);
+ qemu_fdt_setprop_cell(s->cfg.fdt, node, "#address-cells", 2);
+ qemu_fdt_setprop_cell(s->cfg.fdt, node, "#size-cells", 2);
+
+ subnode = g_strdup_printf("/%s/dwc3", node);
+ g_free(node);
+
+ node = versal_fdt_add_simple_subnode(s, subnode, map->xhci, 0x10000,
+ compat_dwc3,
+ sizeof(compat_dwc3));
+ qemu_fdt_setprop(s->cfg.fdt, node, "interrupt-names",
+ irq_name, sizeof(irq_name));
+ qemu_fdt_setprop_cells(s->cfg.fdt, node, "interrupts",
+ GIC_FDT_IRQ_TYPE_SPI, map->irq,
+ GIC_FDT_IRQ_FLAGS_LEVEL_HI);
+ qemu_fdt_setprop_cell(s->cfg.fdt, node,
+ "snps,quirk-frame-length-adjustment", 0x20);
+ qemu_fdt_setprop_cells(s->cfg.fdt, node, "#stream-id-cells", 1);
+ qemu_fdt_setprop_string(s->cfg.fdt, node, "dr_mode", "host");
+ qemu_fdt_setprop_string(s->cfg.fdt, node, "phy-names", "usb3-phy");
+ qemu_fdt_setprop(s->cfg.fdt, node, "snps,dis_u2_susphy_quirk", NULL, 0);
+ qemu_fdt_setprop(s->cfg.fdt, node, "snps,dis_u3_susphy_quirk", NULL, 0);
+ qemu_fdt_setprop(s->cfg.fdt, node, "snps,refclk_fladj", NULL, 0);
+ qemu_fdt_setprop(s->cfg.fdt, node, "snps,mask_phy_reset", NULL, 0);
+ qemu_fdt_setprop_string(s->cfg.fdt, node, "maximum-speed", "high-speed");
}
-static void versal_create_gems(Versal *s, qemu_irq *pic)
+static void versal_create_gem(Versal *s,
+ const struct VersalGemMap *map)
{
+ DeviceState *dev;
+ MemoryRegion *mr;
+ DeviceState *or;
int i;
- for (i = 0; i < ARRAY_SIZE(s->lpd.iou.gem); i++) {
- static const int irqs[] = { VERSAL_GEM0_IRQ_0, VERSAL_GEM1_IRQ_0};
- static const uint64_t addrs[] = { MM_GEM0, MM_GEM1 };
- char *name = g_strdup_printf("gem%d", i);
- DeviceState *dev;
- MemoryRegion *mr;
- OrIRQState *or_irq;
-
- object_initialize_child(OBJECT(s), name, &s->lpd.iou.gem[i],
- TYPE_CADENCE_GEM);
- or_irq = &s->lpd.iou.gem_irq_orgate[i];
- object_initialize_child(OBJECT(s), "gem-irq-orgate[*]",
- or_irq, TYPE_OR_IRQ);
- dev = DEVICE(&s->lpd.iou.gem[i]);
- qemu_configure_nic_device(dev, true, NULL);
- object_property_set_int(OBJECT(dev), "phy-addr", 23, &error_abort);
- object_property_set_int(OBJECT(dev), "num-priority-queues", 2,
- &error_abort);
- object_property_set_int(OBJECT(or_irq),
- "num-lines", 2, &error_fatal);
- qdev_realize(DEVICE(or_irq), NULL, &error_fatal);
- qdev_connect_gpio_out(DEVICE(or_irq), 0, pic[irqs[i]]);
-
- object_property_set_link(OBJECT(dev), "dma", OBJECT(&s->mr_ps),
- &error_abort);
- sysbus_realize(SYS_BUS_DEVICE(dev), &error_fatal);
+ dev = qdev_new(TYPE_CADENCE_GEM);
+ object_property_add_child(OBJECT(s), "gem[*]", OBJECT(dev));
- mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
- memory_region_add_subregion(&s->mr_ps, addrs[i], mr);
+ qemu_configure_nic_device(dev, true, NULL);
+ object_property_set_int(OBJECT(dev), "phy-addr", 23, &error_abort);
+ object_property_set_int(OBJECT(dev), "num-priority-queues",
+ map->num_prio_queue, &error_abort);
+
+ object_property_set_link(OBJECT(dev), "dma", OBJECT(&s->mr_ps),
+ &error_abort);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+
+ mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
+ memory_region_add_subregion(&s->mr_ps, map->map.addr, mr);
+
+ /*
+ * The GEM controller exposes one IRQ line per priority queue. In Versal
+ * family devices, those are OR'ed together.
+ */
+ or = create_or_gate(s, OBJECT(dev), "irq-orgate",
+ map->num_prio_queue, map->map.irq);
- sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(DEVICE(or_irq), 0));
- sysbus_connect_irq(SYS_BUS_DEVICE(dev), 1, qdev_get_gpio_in(DEVICE(or_irq), 1));
- g_free(name);
+ for (i = 0; i < map->num_prio_queue; i++) {
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, qdev_get_gpio_in(or, i));
}
}
-static void versal_create_admas(Versal *s, qemu_irq *pic)
+static void versal_create_gem_fdt(Versal *s,
+ const struct VersalGemMap *map)
{
int i;
-
- for (i = 0; i < ARRAY_SIZE(s->lpd.iou.adma); i++) {
- char *name = g_strdup_printf("adma%d", i);
- DeviceState *dev;
- MemoryRegion *mr;
-
- object_initialize_child(OBJECT(s), name, &s->lpd.iou.adma[i],
- TYPE_XLNX_ZDMA);
- dev = DEVICE(&s->lpd.iou.adma[i]);
- object_property_set_int(OBJECT(dev), "bus-width", 128, &error_abort);
- object_property_set_link(OBJECT(dev), "dma",
- OBJECT(get_system_memory()), &error_fatal);
- sysbus_realize(SYS_BUS_DEVICE(dev), &error_fatal);
-
- mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
- memory_region_add_subregion(&s->mr_ps,
- MM_ADMA_CH0 + i * MM_ADMA_CH0_SIZE, mr);
-
- sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, pic[VERSAL_ADMA_IRQ_0 + i]);
- g_free(name);
+ g_autofree char *node;
+ g_autofree char *phy_node;
+ int phy_phandle;
+ const char compatible[] = "cdns,zynqmp-gem\0cdns,gem";
+ const char clocknames[] = "pclk\0hclk\0tx_clk\0rx_clk";
+ g_autofree uint32_t *irq_prop;
+
+ node = versal_fdt_add_simple_subnode(s, "/ethernet", map->map.addr, 0x1000,
+ compatible, sizeof(compatible));
+ phy_node = g_strdup_printf("%s/fixed-link", node);
+ phy_phandle = qemu_fdt_alloc_phandle(s->cfg.fdt);
+
+ /* Fixed link PHY node */
+ qemu_fdt_add_subnode(s->cfg.fdt, phy_node);
+ qemu_fdt_setprop_cell(s->cfg.fdt, phy_node, "phandle", phy_phandle);
+ qemu_fdt_setprop(s->cfg.fdt, phy_node, "full-duplex", NULL, 0);
+ qemu_fdt_setprop_cell(s->cfg.fdt, phy_node, "speed", map->speed);
+
+ qemu_fdt_setprop_string(s->cfg.fdt, node, "phy-mode", map->phy_mode);
+ qemu_fdt_setprop_cell(s->cfg.fdt, node, "phy-handle", phy_phandle);
+ qemu_fdt_setprop_cells(s->cfg.fdt, node, "clocks",
+ s->phandle.clk_25mhz, s->phandle.clk_25mhz,
+ s->phandle.clk_125mhz, s->phandle.clk_125mhz);
+ qemu_fdt_setprop(s->cfg.fdt, node, "clock-names",
+ clocknames, sizeof(clocknames));
+
+ irq_prop = g_new(uint32_t, map->num_prio_queue * 3);
+ for (i = 0; i < map->num_prio_queue; i++) {
+ irq_prop[3 * i] = cpu_to_be32(GIC_FDT_IRQ_TYPE_SPI);
+ irq_prop[3 * i + 1] = cpu_to_be32(map->map.irq);
+ irq_prop[3 * i + 2] = cpu_to_be32(GIC_FDT_IRQ_FLAGS_LEVEL_HI);
}
+ qemu_fdt_setprop(s->cfg.fdt, node, "interrupts", irq_prop,
+ sizeof(uint32_t) * map->num_prio_queue * 3);
}
-#define SDHCI_CAPABILITIES 0x280737ec6481 /* Same as on ZynqMP. */
-static void versal_create_sds(Versal *s, qemu_irq *pic)
+static void versal_create_zdma(Versal *s,
+ const struct VersalZDMAMap *map)
{
- int i;
+ DeviceState *dev;
+ MemoryRegion *mr;
+ g_autofree char *name;
+ const char compatible[] = "xlnx,zynqmp-dma-1.0";
+ const char clocknames[] = "clk_main\0clk_apb";
+ size_t i;
- for (i = 0; i < ARRAY_SIZE(s->pmc.iou.sd); i++) {
- DeviceState *dev;
- MemoryRegion *mr;
+ name = g_strdup_printf("%s[*]", map->name);
- object_initialize_child(OBJECT(s), "sd[*]", &s->pmc.iou.sd[i],
- TYPE_SYSBUS_SDHCI);
- dev = DEVICE(&s->pmc.iou.sd[i]);
+ for (i = 0; i < map->num_chan; i++) {
+ uint64_t addr = map->map.addr + map->chan_stride * i;
+ int irq = map->map.irq + map->irq_stride * i;
+ g_autofree char *node;
- object_property_set_uint(OBJECT(dev), "sd-spec-version", 3,
- &error_fatal);
- object_property_set_uint(OBJECT(dev), "capareg", SDHCI_CAPABILITIES,
- &error_fatal);
- object_property_set_uint(OBJECT(dev), "uhs", UHS_I, &error_fatal);
- sysbus_realize(SYS_BUS_DEVICE(dev), &error_fatal);
+ dev = qdev_new(TYPE_XLNX_ZDMA);
+ object_property_add_child(OBJECT(s), name, OBJECT(dev));
+ object_property_set_int(OBJECT(dev), "bus-width", 128, &error_abort);
+ object_property_set_link(OBJECT(dev), "dma",
+ OBJECT(get_system_memory()), &error_fatal);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
- memory_region_add_subregion(&s->mr_ps,
- MM_PMC_SD0 + i * MM_PMC_SD0_SIZE, mr);
-
- sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0,
- pic[VERSAL_SD0_IRQ_0 + i * 2]);
+ memory_region_add_subregion(&s->mr_ps, addr, mr);
+
+ versal_sysbus_connect_irq(s, SYS_BUS_DEVICE(dev), 0, irq);
+
+ node = versal_fdt_add_simple_subnode(s, "/dma", addr, 0x1000,
+ compatible, sizeof(compatible));
+ qemu_fdt_setprop_cell(s->cfg.fdt, node, "xlnx,bus-width", 64);
+ qemu_fdt_setprop_cells(s->cfg.fdt, node, "clocks",
+ s->phandle.clk_25mhz, s->phandle.clk_25mhz);
+ qemu_fdt_setprop(s->cfg.fdt, node, "clock-names",
+ clocknames, sizeof(clocknames));
+ qemu_fdt_setprop_cells(s->cfg.fdt, node, "interrupts",
+ GIC_FDT_IRQ_TYPE_SPI, irq,
+ GIC_FDT_IRQ_FLAGS_LEVEL_HI);
}
}
-static void versal_create_pmc_apb_irq_orgate(Versal *s, qemu_irq *pic)
+#define SDHCI_CAPABILITIES 0x280737ec6481 /* Same as on ZynqMP. */
+static void versal_create_sdhci(Versal *s,
+ const VersalSimplePeriphMap *map)
{
- DeviceState *orgate;
+ DeviceState *dev;
+ MemoryRegion *mr;
+ g_autofree char *node;
+ const char compatible[] = "arasan,sdhci-8.9a";
+ const char clocknames[] = "clk_xin\0clk_ahb";
+
+ dev = qdev_new(TYPE_SYSBUS_SDHCI);
+ object_property_add_child(OBJECT(s), "sdhci[*]", OBJECT(dev));
+
+ object_property_set_uint(OBJECT(dev), "sd-spec-version", 3,
+ &error_fatal);
+ object_property_set_uint(OBJECT(dev), "capareg", SDHCI_CAPABILITIES,
+ &error_fatal);
+ object_property_set_uint(OBJECT(dev), "uhs", UHS_I, &error_fatal);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
- /*
- * The VERSAL_PMC_APB_IRQ is an 'or' of the interrupts from the following
- * models:
- * - RTC
- * - BBRAM
- * - PMC SLCR
- * - CFRAME regs (input 3 - 17 to the orgate)
- */
- object_initialize_child(OBJECT(s), "pmc-apb-irq-orgate",
- &s->pmc.apb_irq_orgate, TYPE_OR_IRQ);
- orgate = DEVICE(&s->pmc.apb_irq_orgate);
- object_property_set_int(OBJECT(orgate),
- "num-lines", VERSAL_NUM_PMC_APB_IRQS, &error_fatal);
- qdev_realize(orgate, NULL, &error_fatal);
- qdev_connect_gpio_out(orgate, 0, pic[VERSAL_PMC_APB_IRQ]);
+ mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
+ memory_region_add_subregion(&s->mr_ps, map->addr, mr);
+
+ versal_sysbus_connect_irq(s, SYS_BUS_DEVICE(dev), 0, map->irq);
+
+ node = versal_fdt_add_simple_subnode(s, "/sdhci", map->addr, 0x10000,
+ compatible, sizeof(compatible));
+ qemu_fdt_setprop_cells(s->cfg.fdt, node, "clocks",
+ s->phandle.clk_25mhz, s->phandle.clk_25mhz);
+ qemu_fdt_setprop(s->cfg.fdt, node, "clock-names",
+ clocknames, sizeof(clocknames));
+ qemu_fdt_setprop_cells(s->cfg.fdt, node, "interrupts",
+ GIC_FDT_IRQ_TYPE_SPI, map->irq,
+ GIC_FDT_IRQ_FLAGS_LEVEL_HI);
}
-static void versal_create_rtc(Versal *s, qemu_irq *pic)
+static void versal_create_rtc(Versal *s, const struct VersalRtcMap *map)
{
SysBusDevice *sbd;
MemoryRegion *mr;
+ g_autofree char *node;
+ const char compatible[] = "xlnx,zynqmp-rtc";
+ const char interrupt_names[] = "alarm\0sec";
- object_initialize_child(OBJECT(s), "rtc", &s->pmc.rtc,
- TYPE_XLNX_ZYNQMP_RTC);
- sbd = SYS_BUS_DEVICE(&s->pmc.rtc);
- sysbus_realize(sbd, &error_fatal);
+ sbd = SYS_BUS_DEVICE(qdev_new(TYPE_XLNX_ZYNQMP_RTC));
+ object_property_add_child(OBJECT(s), "rtc", OBJECT(sbd));
+ sysbus_realize_and_unref(sbd, &error_abort);
mr = sysbus_mmio_get_region(sbd, 0);
- memory_region_add_subregion(&s->mr_ps, MM_PMC_RTC, mr);
+ memory_region_add_subregion(&s->mr_ps, map->map.addr, mr);
/*
* TODO: Connect the ALARM and SECONDS interrupts once our RTC model
* supports them.
*/
- sysbus_connect_irq(sbd, 1,
- qdev_get_gpio_in(DEVICE(&s->pmc.apb_irq_orgate), 0));
+ versal_sysbus_connect_irq(s, sbd, 0, map->map.irq);
+
+ node = versal_fdt_add_simple_subnode(s, "/rtc", map->map.addr, 0x10000,
+ compatible, sizeof(compatible));
+ qemu_fdt_setprop_cells(s->cfg.fdt, node, "interrupts",
+ GIC_FDT_IRQ_TYPE_SPI, map->alarm_irq,
+ GIC_FDT_IRQ_FLAGS_LEVEL_HI,
+ GIC_FDT_IRQ_TYPE_SPI, map->second_irq,
+ GIC_FDT_IRQ_FLAGS_LEVEL_HI);
+ qemu_fdt_setprop(s->cfg.fdt, node, "interrupt-names",
+ interrupt_names, sizeof(interrupt_names));
}
-static void versal_create_trng(Versal *s, qemu_irq *pic)
+static void versal_create_trng(Versal *s, const VersalSimplePeriphMap *map)
{
SysBusDevice *sbd;
MemoryRegion *mr;
- object_initialize_child(OBJECT(s), "trng", &s->pmc.trng,
- TYPE_XLNX_VERSAL_TRNG);
- sbd = SYS_BUS_DEVICE(&s->pmc.trng);
- sysbus_realize(sbd, &error_fatal);
+ sbd = SYS_BUS_DEVICE(qdev_new(TYPE_XLNX_VERSAL_TRNG));
+ object_property_add_child(OBJECT(s), "trng", OBJECT(sbd));
+ sysbus_realize_and_unref(sbd, &error_abort);
mr = sysbus_mmio_get_region(sbd, 0);
- memory_region_add_subregion(&s->mr_ps, MM_PMC_TRNG, mr);
- sysbus_connect_irq(sbd, 0, pic[VERSAL_TRNG_IRQ]);
+ memory_region_add_subregion(&s->mr_ps, map->addr, mr);
+ versal_sysbus_connect_irq(s, sbd, 0, map->irq);
}
-static void versal_create_xrams(Versal *s, qemu_irq *pic)
+static void versal_create_xrams(Versal *s, const struct VersalXramMap *map)
{
- int nr_xrams = ARRAY_SIZE(s->lpd.xram.ctrl);
- DeviceState *orgate;
- int i;
+ SysBusDevice *sbd;
+ MemoryRegion *mr;
+ DeviceState *or;
+ size_t i;
+
+ or = create_or_gate(s, OBJECT(s), "xram-orgate", map->num, map->irq);
- /* XRAM IRQs get ORed into a single line. */
- object_initialize_child(OBJECT(s), "xram-irq-orgate",
- &s->lpd.xram.irq_orgate, TYPE_OR_IRQ);
- orgate = DEVICE(&s->lpd.xram.irq_orgate);
- object_property_set_int(OBJECT(orgate),
- "num-lines", nr_xrams, &error_fatal);
- qdev_realize(orgate, NULL, &error_fatal);
- qdev_connect_gpio_out(orgate, 0, pic[VERSAL_XRAM_IRQ_0]);
+ for (i = 0; i < map->num; i++) {
+ hwaddr ctrl, mem;
- for (i = 0; i < ARRAY_SIZE(s->lpd.xram.ctrl); i++) {
- SysBusDevice *sbd;
- MemoryRegion *mr;
+ sbd = SYS_BUS_DEVICE(qdev_new(TYPE_XLNX_XRAM_CTRL));
+ object_property_add_child(OBJECT(s), "xram[*]", OBJECT(sbd));
+ sysbus_realize_and_unref(sbd, &error_fatal);
- object_initialize_child(OBJECT(s), "xram[*]", &s->lpd.xram.ctrl[i],
- TYPE_XLNX_XRAM_CTRL);
- sbd = SYS_BUS_DEVICE(&s->lpd.xram.ctrl[i]);
- sysbus_realize(sbd, &error_fatal);
+ ctrl = map->ctrl + map->ctrl_stride * i;
+ mem = map->mem + map->mem_stride * i;
mr = sysbus_mmio_get_region(sbd, 0);
- memory_region_add_subregion(&s->mr_ps,
- MM_XRAMC + i * MM_XRAMC_SIZE, mr);
+ memory_region_add_subregion(&s->mr_ps, ctrl, mr);
mr = sysbus_mmio_get_region(sbd, 1);
- memory_region_add_subregion(&s->mr_ps, MM_XRAM + i * MiB, mr);
+ memory_region_add_subregion(&s->mr_ps, mem, mr);
- sysbus_connect_irq(sbd, 0, qdev_get_gpio_in(orgate, i));
+ sysbus_connect_irq(sbd, 0, qdev_get_gpio_in(or, i));
}
}
-static void versal_create_bbram(Versal *s, qemu_irq *pic)
+static void versal_create_bbram(Versal *s,
+ const VersalSimplePeriphMap *map)
{
+ DeviceState *dev;
SysBusDevice *sbd;
- object_initialize_child_with_props(OBJECT(s), "bbram", &s->pmc.bbram,
- sizeof(s->pmc.bbram), TYPE_XLNX_BBRAM,
- &error_fatal,
- "crc-zpads", "0",
- NULL);
- sbd = SYS_BUS_DEVICE(&s->pmc.bbram);
+ dev = qdev_new(TYPE_XLNX_BBRAM);
+ sbd = SYS_BUS_DEVICE(dev);
- sysbus_realize(sbd, &error_fatal);
- memory_region_add_subregion(&s->mr_ps, MM_PMC_BBRAM_CTRL,
+ object_property_add_child(OBJECT(s), "bbram", OBJECT(dev));
+ qdev_prop_set_uint32(dev, "crc-zpads", 0);
+ sysbus_realize_and_unref(sbd, &error_abort);
+ memory_region_add_subregion(&s->mr_ps, map->addr,
sysbus_mmio_get_region(sbd, 0));
- sysbus_connect_irq(sbd, 0,
- qdev_get_gpio_in(DEVICE(&s->pmc.apb_irq_orgate), 1));
+ versal_sysbus_connect_irq(s, sbd, 0, map->irq);
}
-static void versal_realize_efuse_part(Versal *s, Object *dev, hwaddr base)
+static void versal_create_efuse(Versal *s,
+ const struct VersalEfuseMap *map)
{
- SysBusDevice *part = SYS_BUS_DEVICE(dev);
+ DeviceState *bits;
+ DeviceState *ctrl;
+ DeviceState *cache;
- object_property_set_link(OBJECT(part), "efuse",
- OBJECT(&s->pmc.efuse), &error_abort);
+ if (versal_get_version(s) != VERSAL_VER_VERSAL) {
+ /* TODO for versal2 */
+ return;
+ }
- sysbus_realize(part, &error_abort);
- memory_region_add_subregion(&s->mr_ps, base,
- sysbus_mmio_get_region(part, 0));
-}
+ ctrl = qdev_new(TYPE_XLNX_VERSAL_EFUSE_CTRL);
+ cache = qdev_new(TYPE_XLNX_VERSAL_EFUSE_CACHE);
+ bits = qdev_new(TYPE_XLNX_EFUSE);
-static void versal_create_efuse(Versal *s, qemu_irq *pic)
-{
- Object *bits = OBJECT(&s->pmc.efuse);
- Object *ctrl = OBJECT(&s->pmc.efuse_ctrl);
- Object *cache = OBJECT(&s->pmc.efuse_cache);
+ qdev_prop_set_uint32(bits, "efuse-nr", 3);
+ qdev_prop_set_uint32(bits, "efuse-size", 8192);
+
+ object_property_add_child(OBJECT(s), "efuse", OBJECT(bits));
+ qdev_realize_and_unref(bits, NULL, &error_abort);
- object_initialize_child(OBJECT(s), "efuse-ctrl", &s->pmc.efuse_ctrl,
- TYPE_XLNX_VERSAL_EFUSE_CTRL);
+ object_property_set_link(OBJECT(ctrl), "efuse", OBJECT(bits), &error_abort);
- object_initialize_child(OBJECT(s), "efuse-cache", &s->pmc.efuse_cache,
- TYPE_XLNX_VERSAL_EFUSE_CACHE);
+ object_property_set_link(OBJECT(cache), "efuse", OBJECT(bits),
+ &error_abort);
- object_initialize_child_with_props(ctrl, "xlnx-efuse@0", bits,
- sizeof(s->pmc.efuse),
- TYPE_XLNX_EFUSE, &error_abort,
- "efuse-nr", "3",
- "efuse-size", "8192",
- NULL);
+ object_property_add_child(OBJECT(s), "efuse-cache", OBJECT(cache));
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(cache), &error_abort);
- qdev_realize(DEVICE(bits), NULL, &error_abort);
- versal_realize_efuse_part(s, ctrl, MM_PMC_EFUSE_CTRL);
- versal_realize_efuse_part(s, cache, MM_PMC_EFUSE_CACHE);
+ object_property_add_child(OBJECT(s), "efuse-ctrl", OBJECT(ctrl));
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(ctrl), &error_abort);
- sysbus_connect_irq(SYS_BUS_DEVICE(ctrl), 0, pic[VERSAL_EFUSE_IRQ]);
+ memory_region_add_subregion(&s->mr_ps, map->ctrl,
+ sysbus_mmio_get_region(SYS_BUS_DEVICE(ctrl),
+ 0));
+ memory_region_add_subregion(&s->mr_ps, map->cache,
+ sysbus_mmio_get_region(SYS_BUS_DEVICE(cache),
+ 0));
+ versal_sysbus_connect_irq(s, SYS_BUS_DEVICE(ctrl), 0, map->irq);
}
-static void versal_create_pmc_iou_slcr(Versal *s, qemu_irq *pic)
+static DeviceState *versal_create_pmc_iou_slcr(Versal *s,
+ const VersalSimplePeriphMap *map)
{
SysBusDevice *sbd;
+ DeviceState *dev;
- object_initialize_child(OBJECT(s), "versal-pmc-iou-slcr", &s->pmc.iou.slcr,
- TYPE_XILINX_VERSAL_PMC_IOU_SLCR);
+ dev = qdev_new(TYPE_XILINX_VERSAL_PMC_IOU_SLCR);
+ object_property_add_child(OBJECT(s), "pmc-iou-slcr", OBJECT(dev));
- sbd = SYS_BUS_DEVICE(&s->pmc.iou.slcr);
- sysbus_realize(sbd, &error_fatal);
+ sbd = SYS_BUS_DEVICE(dev);
+ sysbus_realize_and_unref(sbd, &error_fatal);
- memory_region_add_subregion(&s->mr_ps, MM_PMC_PMC_IOU_SLCR,
+ memory_region_add_subregion(&s->mr_ps, map->addr,
sysbus_mmio_get_region(sbd, 0));
- sysbus_connect_irq(sbd, 0,
- qdev_get_gpio_in(DEVICE(&s->pmc.apb_irq_orgate), 2));
+ versal_sysbus_connect_irq(s, sbd, 0, map->irq);
+
+ return dev;
}
-static void versal_create_ospi(Versal *s, qemu_irq *pic)
+static DeviceState *versal_create_ospi(Versal *s,
+ const struct VersalOspiMap *map)
{
SysBusDevice *sbd;
MemoryRegion *mr_dac;
- qemu_irq ospi_mux_sel;
- DeviceState *orgate;
+ DeviceState *dev, *dma_dst, *dma_src, *orgate;
+ MemoryRegion *linear_mr = g_new(MemoryRegion, 1);
- memory_region_init(&s->pmc.iou.ospi.linear_mr, OBJECT(s),
- "versal-ospi-linear-mr" , MM_PMC_OSPI_DAC_SIZE);
+ dev = qdev_new(TYPE_XILINX_VERSAL_OSPI);
+ object_property_add_child(OBJECT(s), "ospi", OBJECT(dev));
- object_initialize_child(OBJECT(s), "versal-ospi", &s->pmc.iou.ospi.ospi,
- TYPE_XILINX_VERSAL_OSPI);
+ memory_region_init(linear_mr, OBJECT(dev), "linear-mr", map->dac_sz);
- mr_dac = sysbus_mmio_get_region(SYS_BUS_DEVICE(&s->pmc.iou.ospi.ospi), 1);
- memory_region_add_subregion(&s->pmc.iou.ospi.linear_mr, 0x0, mr_dac);
+ mr_dac = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1);
+ memory_region_add_subregion(linear_mr, 0x0, mr_dac);
/* Create the OSPI destination DMA */
- object_initialize_child(OBJECT(s), "versal-ospi-dma-dst",
- &s->pmc.iou.ospi.dma_dst,
- TYPE_XLNX_CSU_DMA);
+ dma_dst = qdev_new(TYPE_XLNX_CSU_DMA);
+ object_property_add_child(OBJECT(dev), "dma-dst-dev", OBJECT(dma_dst));
+ object_property_set_link(OBJECT(dma_dst), "dma",
+ OBJECT(get_system_memory()), &error_abort);
- object_property_set_link(OBJECT(&s->pmc.iou.ospi.dma_dst),
- "dma", OBJECT(get_system_memory()),
- &error_abort);
+ sbd = SYS_BUS_DEVICE(dma_dst);
+ sysbus_realize_and_unref(sbd, &error_fatal);
- sbd = SYS_BUS_DEVICE(&s->pmc.iou.ospi.dma_dst);
- sysbus_realize(sbd, &error_fatal);
-
- memory_region_add_subregion(&s->mr_ps, MM_PMC_OSPI_DMA_DST,
+ memory_region_add_subregion(&s->mr_ps, map->dma_dst,
sysbus_mmio_get_region(sbd, 0));
/* Create the OSPI source DMA */
- object_initialize_child(OBJECT(s), "versal-ospi-dma-src",
- &s->pmc.iou.ospi.dma_src,
- TYPE_XLNX_CSU_DMA);
-
- object_property_set_bool(OBJECT(&s->pmc.iou.ospi.dma_src), "is-dst",
- false, &error_abort);
+ dma_src = qdev_new(TYPE_XLNX_CSU_DMA);
+ object_property_add_child(OBJECT(dev), "dma-src-dev", OBJECT(dma_src));
- object_property_set_link(OBJECT(&s->pmc.iou.ospi.dma_src),
- "dma", OBJECT(mr_dac), &error_abort);
+ object_property_set_bool(OBJECT(dma_src), "is-dst", false, &error_abort);
- object_property_set_link(OBJECT(&s->pmc.iou.ospi.dma_src),
- "stream-connected-dma",
- OBJECT(&s->pmc.iou.ospi.dma_dst),
+ object_property_set_link(OBJECT(dma_src), "dma", OBJECT(mr_dac),
&error_abort);
- sbd = SYS_BUS_DEVICE(&s->pmc.iou.ospi.dma_src);
- sysbus_realize(sbd, &error_fatal);
+ object_property_set_link(OBJECT(dma_src), "stream-connected-dma",
+ OBJECT(dma_dst), &error_abort);
+
+ sbd = SYS_BUS_DEVICE(dma_src);
+ sysbus_realize_and_unref(sbd, &error_fatal);
- memory_region_add_subregion(&s->mr_ps, MM_PMC_OSPI_DMA_SRC,
+ memory_region_add_subregion(&s->mr_ps, map->dma_src,
sysbus_mmio_get_region(sbd, 0));
/* Realize the OSPI */
- object_property_set_link(OBJECT(&s->pmc.iou.ospi.ospi), "dma-src",
- OBJECT(&s->pmc.iou.ospi.dma_src), &error_abort);
+ object_property_set_link(OBJECT(dev), "dma-src",
+ OBJECT(dma_src), &error_abort);
- sbd = SYS_BUS_DEVICE(&s->pmc.iou.ospi.ospi);
- sysbus_realize(sbd, &error_fatal);
+ sbd = SYS_BUS_DEVICE(dev);
+ sysbus_realize_and_unref(sbd, &error_fatal);
- memory_region_add_subregion(&s->mr_ps, MM_PMC_OSPI,
+ memory_region_add_subregion(&s->mr_ps, map->ctrl,
sysbus_mmio_get_region(sbd, 0));
- memory_region_add_subregion(&s->mr_ps, MM_PMC_OSPI_DAC,
- &s->pmc.iou.ospi.linear_mr);
-
- /* ospi_mux_sel */
- ospi_mux_sel = qdev_get_gpio_in_named(DEVICE(&s->pmc.iou.ospi.ospi),
- "ospi-mux-sel", 0);
- qdev_connect_gpio_out_named(DEVICE(&s->pmc.iou.slcr), "ospi-mux-sel", 0,
- ospi_mux_sel);
+ memory_region_add_subregion(&s->mr_ps, map->dac,
+ linear_mr);
/* OSPI irq */
- object_initialize_child(OBJECT(s), "ospi-irq-orgate",
- &s->pmc.iou.ospi.irq_orgate, TYPE_OR_IRQ);
- object_property_set_int(OBJECT(&s->pmc.iou.ospi.irq_orgate),
- "num-lines", NUM_OSPI_IRQ_LINES, &error_fatal);
-
- orgate = DEVICE(&s->pmc.iou.ospi.irq_orgate);
- qdev_realize(orgate, NULL, &error_fatal);
+ orgate = create_or_gate(s, OBJECT(dev), "irq-orgate", 3,
+ map->irq);
- sysbus_connect_irq(SYS_BUS_DEVICE(&s->pmc.iou.ospi.ospi), 0,
- qdev_get_gpio_in(orgate, 0));
- sysbus_connect_irq(SYS_BUS_DEVICE(&s->pmc.iou.ospi.dma_src), 0,
- qdev_get_gpio_in(orgate, 1));
- sysbus_connect_irq(SYS_BUS_DEVICE(&s->pmc.iou.ospi.dma_dst), 0,
- qdev_get_gpio_in(orgate, 2));
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(orgate, 0));
+ sysbus_connect_irq(SYS_BUS_DEVICE(dma_src), 0, qdev_get_gpio_in(orgate, 1));
+ sysbus_connect_irq(SYS_BUS_DEVICE(dma_dst), 0, qdev_get_gpio_in(orgate, 2));
- qdev_connect_gpio_out(orgate, 0, pic[VERSAL_OSPI_IRQ]);
+ return dev;
}
-static void versal_create_cfu(Versal *s, qemu_irq *pic)
+static void versal_create_cfu(Versal *s, const struct VersalCfuMap *map)
{
SysBusDevice *sbd;
- DeviceState *dev;
+ Object *container;
+ DeviceState *cfu_fdro, *cfu_apb, *cfu_sfr, *cframe_bcast;
+ DeviceState *cframe_irq_or;
int i;
- const struct {
- uint64_t reg_base;
- uint64_t fdri_base;
- } cframe_addr[] = {
- { MM_PMC_CFRAME0_REG, MM_PMC_CFRAME0_FDRI },
- { MM_PMC_CFRAME1_REG, MM_PMC_CFRAME1_FDRI },
- { MM_PMC_CFRAME2_REG, MM_PMC_CFRAME2_FDRI },
- { MM_PMC_CFRAME3_REG, MM_PMC_CFRAME3_FDRI },
- { MM_PMC_CFRAME4_REG, MM_PMC_CFRAME4_FDRI },
- { MM_PMC_CFRAME5_REG, MM_PMC_CFRAME5_FDRI },
- { MM_PMC_CFRAME6_REG, MM_PMC_CFRAME6_FDRI },
- { MM_PMC_CFRAME7_REG, MM_PMC_CFRAME7_FDRI },
- { MM_PMC_CFRAME8_REG, MM_PMC_CFRAME8_FDRI },
- { MM_PMC_CFRAME9_REG, MM_PMC_CFRAME9_FDRI },
- { MM_PMC_CFRAME10_REG, MM_PMC_CFRAME10_FDRI },
- { MM_PMC_CFRAME11_REG, MM_PMC_CFRAME11_FDRI },
- { MM_PMC_CFRAME12_REG, MM_PMC_CFRAME12_FDRI },
- { MM_PMC_CFRAME13_REG, MM_PMC_CFRAME13_FDRI },
- { MM_PMC_CFRAME14_REG, MM_PMC_CFRAME14_FDRI },
- };
- const struct {
- uint32_t blktype0_frames;
- uint32_t blktype1_frames;
- uint32_t blktype2_frames;
- uint32_t blktype3_frames;
- uint32_t blktype4_frames;
- uint32_t blktype5_frames;
- uint32_t blktype6_frames;
- } cframe_cfg[] = {
- [0] = { 34111, 3528, 12800, 11, 5, 1, 1 },
- [1] = { 38498, 3841, 15361, 13, 7, 3, 1 },
- [2] = { 38498, 3841, 15361, 13, 7, 3, 1 },
- [3] = { 38498, 3841, 15361, 13, 7, 3, 1 },
- };
+
+ container = object_new(TYPE_CONTAINER);
+ object_property_add_child(OBJECT(s), "cfu", container);
+ object_unref(container);
/* CFU FDRO */
- object_initialize_child(OBJECT(s), "cfu-fdro", &s->pmc.cfu_fdro,
- TYPE_XLNX_VERSAL_CFU_FDRO);
- sbd = SYS_BUS_DEVICE(&s->pmc.cfu_fdro);
+ cfu_fdro = qdev_new(TYPE_XLNX_VERSAL_CFU_FDRO);
+ object_property_add_child(container, "cfu-fdro", OBJECT(cfu_fdro));
+ sbd = SYS_BUS_DEVICE(cfu_fdro);
- sysbus_realize(sbd, &error_fatal);
- memory_region_add_subregion(&s->mr_ps, MM_PMC_CFU_FDRO,
+ sysbus_realize_and_unref(sbd, &error_fatal);
+ memory_region_add_subregion(&s->mr_ps, map->cfu_fdro,
sysbus_mmio_get_region(sbd, 0));
- /* CFRAME REG */
- for (i = 0; i < ARRAY_SIZE(s->pmc.cframe); i++) {
- g_autofree char *name = g_strdup_printf("cframe%d", i);
+ /* cframe bcast */
+ cframe_bcast = qdev_new(TYPE_XLNX_VERSAL_CFRAME_BCAST_REG);
+ object_property_add_child(container, "cframe-bcast", OBJECT(cframe_bcast));
- object_initialize_child(OBJECT(s), name, &s->pmc.cframe[i],
- TYPE_XLNX_VERSAL_CFRAME_REG);
+ /* CFU APB */
+ cfu_apb = qdev_new(TYPE_XLNX_VERSAL_CFU_APB);
+ object_property_add_child(container, "cfu-apb", OBJECT(cfu_apb));
+
+ /* IRQ or gate for cframes */
+ cframe_irq_or = qdev_new(TYPE_OR_IRQ);
+ object_property_add_child(container, "cframe-irq-or-gate",
+ OBJECT(cframe_irq_or));
+ qdev_prop_set_uint16(cframe_irq_or, "num-lines", map->num_cframe);
+ qdev_realize_and_unref(cframe_irq_or, NULL, &error_abort);
+ versal_qdev_connect_gpio_out(s, cframe_irq_or, 0, map->cframe_irq);
+
+ /* cframe reg */
+ for (i = 0; i < map->num_cframe; i++) {
+ uint64_t reg_base;
+ uint64_t fdri_base;
+ DeviceState *dev;
+ g_autofree char *prop_name;
+ size_t j;
- sbd = SYS_BUS_DEVICE(&s->pmc.cframe[i]);
- dev = DEVICE(&s->pmc.cframe[i]);
+ dev = qdev_new(TYPE_XLNX_VERSAL_CFRAME_REG);
+ object_property_add_child(container, "cframe[*]", OBJECT(dev));
- if (i < ARRAY_SIZE(cframe_cfg)) {
- object_property_set_int(OBJECT(dev), "blktype0-frames",
- cframe_cfg[i].blktype0_frames,
- &error_abort);
- object_property_set_int(OBJECT(dev), "blktype1-frames",
- cframe_cfg[i].blktype1_frames,
- &error_abort);
- object_property_set_int(OBJECT(dev), "blktype2-frames",
- cframe_cfg[i].blktype2_frames,
- &error_abort);
- object_property_set_int(OBJECT(dev), "blktype3-frames",
- cframe_cfg[i].blktype3_frames,
- &error_abort);
- object_property_set_int(OBJECT(dev), "blktype4-frames",
- cframe_cfg[i].blktype4_frames,
- &error_abort);
- object_property_set_int(OBJECT(dev), "blktype5-frames",
- cframe_cfg[i].blktype5_frames,
- &error_abort);
- object_property_set_int(OBJECT(dev), "blktype6-frames",
- cframe_cfg[i].blktype6_frames,
+ sbd = SYS_BUS_DEVICE(dev);
+
+ for (j = 0; j < ARRAY_SIZE(map->cframe_cfg[i].blktype_frames); j++) {
+ g_autofree char *blktype_prop_name;
+
+ blktype_prop_name = g_strdup_printf("blktype%zu-frames", j);
+ object_property_set_int(OBJECT(dev), blktype_prop_name,
+ map->cframe_cfg[i].blktype_frames[j],
&error_abort);
}
+
object_property_set_link(OBJECT(dev), "cfu-fdro",
- OBJECT(&s->pmc.cfu_fdro), &error_fatal);
+ OBJECT(cfu_fdro), &error_abort);
- sysbus_realize(SYS_BUS_DEVICE(dev), &error_fatal);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_abort);
- memory_region_add_subregion(&s->mr_ps, cframe_addr[i].reg_base,
+ reg_base = map->cframe_base + i * map->cframe_stride * 2;
+ fdri_base = reg_base + map->cframe_stride;
+ memory_region_add_subregion(&s->mr_ps, reg_base,
sysbus_mmio_get_region(sbd, 0));
- memory_region_add_subregion(&s->mr_ps, cframe_addr[i].fdri_base,
+ memory_region_add_subregion(&s->mr_ps, fdri_base,
sysbus_mmio_get_region(sbd, 1));
- sysbus_connect_irq(sbd, 0,
- qdev_get_gpio_in(DEVICE(&s->pmc.apb_irq_orgate),
- 3 + i));
- }
-
- /* CFRAME BCAST */
- object_initialize_child(OBJECT(s), "cframe_bcast", &s->pmc.cframe_bcast,
- TYPE_XLNX_VERSAL_CFRAME_BCAST_REG);
-
- sbd = SYS_BUS_DEVICE(&s->pmc.cframe_bcast);
- dev = DEVICE(&s->pmc.cframe_bcast);
+ sysbus_connect_irq(sbd, 0, qdev_get_gpio_in(cframe_irq_or, i));
- for (i = 0; i < ARRAY_SIZE(s->pmc.cframe); i++) {
- g_autofree char *propname = g_strdup_printf("cframe%d", i);
- object_property_set_link(OBJECT(dev), propname,
- OBJECT(&s->pmc.cframe[i]), &error_fatal);
+ prop_name = g_strdup_printf("cframe%d", i);
+ object_property_set_link(OBJECT(cframe_bcast), prop_name,
+ OBJECT(dev), &error_abort);
+ object_property_set_link(OBJECT(cfu_apb), prop_name,
+ OBJECT(dev), &error_abort);
}
- sysbus_realize(sbd, &error_fatal);
-
- memory_region_add_subregion(&s->mr_ps, MM_PMC_CFRAME_BCAST_REG,
+ sbd = SYS_BUS_DEVICE(cframe_bcast);
+ sysbus_realize_and_unref(sbd, &error_abort);
+ memory_region_add_subregion(&s->mr_ps, map->cframe_bcast_reg,
sysbus_mmio_get_region(sbd, 0));
- memory_region_add_subregion(&s->mr_ps, MM_PMC_CFRAME_BCAST_FDRI,
+ memory_region_add_subregion(&s->mr_ps, map->cframe_bcast_fdri,
sysbus_mmio_get_region(sbd, 1));
- /* CFU APB */
- object_initialize_child(OBJECT(s), "cfu-apb", &s->pmc.cfu_apb,
- TYPE_XLNX_VERSAL_CFU_APB);
- sbd = SYS_BUS_DEVICE(&s->pmc.cfu_apb);
- dev = DEVICE(&s->pmc.cfu_apb);
-
- for (i = 0; i < ARRAY_SIZE(s->pmc.cframe); i++) {
- g_autofree char *propname = g_strdup_printf("cframe%d", i);
- object_property_set_link(OBJECT(dev), propname,
- OBJECT(&s->pmc.cframe[i]), &error_fatal);
- }
-
- sysbus_realize(sbd, &error_fatal);
- memory_region_add_subregion(&s->mr_ps, MM_PMC_CFU_APB,
+ sbd = SYS_BUS_DEVICE(cfu_apb);
+ sysbus_realize_and_unref(sbd, &error_fatal);
+ memory_region_add_subregion(&s->mr_ps, map->cfu_apb,
sysbus_mmio_get_region(sbd, 0));
- memory_region_add_subregion(&s->mr_ps, MM_PMC_CFU_STREAM,
+ memory_region_add_subregion(&s->mr_ps, map->cfu_stream,
sysbus_mmio_get_region(sbd, 1));
- memory_region_add_subregion(&s->mr_ps, MM_PMC_CFU_STREAM_2,
+ memory_region_add_subregion(&s->mr_ps, map->cfu_stream_2,
sysbus_mmio_get_region(sbd, 2));
- sysbus_connect_irq(sbd, 0, pic[VERSAL_CFU_IRQ_0]);
+ versal_sysbus_connect_irq(s, sbd, 0, map->cfu_apb_irq);
/* CFU SFR */
- object_initialize_child(OBJECT(s), "cfu-sfr", &s->pmc.cfu_sfr,
- TYPE_XLNX_VERSAL_CFU_SFR);
-
- sbd = SYS_BUS_DEVICE(&s->pmc.cfu_sfr);
-
- object_property_set_link(OBJECT(&s->pmc.cfu_sfr),
- "cfu", OBJECT(&s->pmc.cfu_apb), &error_abort);
-
- sysbus_realize(sbd, &error_fatal);
- memory_region_add_subregion(&s->mr_ps, MM_PMC_CFU_SFR,
+ cfu_sfr = qdev_new(TYPE_XLNX_VERSAL_CFU_SFR);
+ object_property_add_child(container, "cfu-sfr", OBJECT(cfu_sfr));
+ sbd = SYS_BUS_DEVICE(cfu_sfr);
+
+ object_property_set_link(OBJECT(cfu_sfr),
+ "cfu", OBJECT(cfu_apb), &error_abort);
+ sysbus_realize_and_unref(sbd, &error_fatal);
+ memory_region_add_subregion(&s->mr_ps, map->cfu_sfr,
sysbus_mmio_get_region(sbd, 0));
}
-static void versal_create_crl(Versal *s, qemu_irq *pic)
+static inline void crl_connect_dev(Object *crl, Object *dev)
{
- SysBusDevice *sbd;
- int i;
+ const char *prop = object_get_canonical_path_component(dev);
- object_initialize_child(OBJECT(s), "crl", &s->lpd.crl,
- TYPE_XLNX_VERSAL_CRL);
- sbd = SYS_BUS_DEVICE(&s->lpd.crl);
+ /* The component part of the device path matches the CRL property name */
+ object_property_set_link(crl, prop, dev, &error_abort);
+}
- for (i = 0; i < ARRAY_SIZE(s->lpd.rpu.cpu); i++) {
- g_autofree gchar *name = g_strdup_printf("cpu_r5[%d]", i);
+static inline void crl_connect_dev_by_name(Versal *s, Object *crl,
+ const char *name, size_t num)
+{
+ size_t i;
- object_property_set_link(OBJECT(&s->lpd.crl),
- name, OBJECT(&s->lpd.rpu.cpu[i]),
- &error_abort);
+ for (i = 0; i < num; i++) {
+ Object *dev = versal_get_child_idx(s, name, i);
+
+ crl_connect_dev(crl, dev);
}
+}
- for (i = 0; i < ARRAY_SIZE(s->lpd.iou.gem); i++) {
- g_autofree gchar *name = g_strdup_printf("gem[%d]", i);
+static inline void versal_create_crl(Versal *s)
+{
+ const VersalMap *map;
+ VersalVersion ver;
+ const char *crl_class;
+ DeviceState *dev;
+ size_t num_gem;
+ Object *obj;
- object_property_set_link(OBJECT(&s->lpd.crl),
- name, OBJECT(&s->lpd.iou.gem[i]),
- &error_abort);
- }
+ map = versal_get_map(s);
+ ver = versal_get_version(s);
- for (i = 0; i < ARRAY_SIZE(s->lpd.iou.adma); i++) {
- g_autofree gchar *name = g_strdup_printf("adma[%d]", i);
+ crl_class = xlnx_versal_crl_class_name(ver);
+ dev = qdev_new(crl_class);
+ obj = OBJECT(dev);
+ object_property_add_child(OBJECT(s), "crl", obj);
- object_property_set_link(OBJECT(&s->lpd.crl),
- name, OBJECT(&s->lpd.iou.adma[i]),
- &error_abort);
- }
+ /*
+ * The 3rd GEM controller on versal2 is in the MMI subsystem.
+ * Its reset line is not connected to the CRL. Consider only the first two
+ * ones.
+ */
+ num_gem = ver == VERSAL_VER_VERSAL2 ? 2 : map->num_gem;
- for (i = 0; i < ARRAY_SIZE(s->lpd.iou.uart); i++) {
- g_autofree gchar *name = g_strdup_printf("uart[%d]", i);
+ crl_connect_dev_by_name(s, obj, "rpu-cluster/rpu",
+ map->rpu.num_cluster * map->rpu.num_core);
+ crl_connect_dev_by_name(s, obj, map->zdma[0].name, map->zdma[0].num_chan);
+ crl_connect_dev_by_name(s, obj, "uart", map->num_uart);
+ crl_connect_dev_by_name(s, obj, "gem", num_gem);
+ crl_connect_dev_by_name(s, obj, "usb", map->num_usb);
- object_property_set_link(OBJECT(&s->lpd.crl),
- name, OBJECT(&s->lpd.iou.uart[i]),
- &error_abort);
- }
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_abort);
- object_property_set_link(OBJECT(&s->lpd.crl),
- "usb", OBJECT(&s->lpd.iou.usb),
- &error_abort);
+ memory_region_add_subregion(&s->mr_ps, map->crl.addr,
+ sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0));
- sysbus_realize(sbd, &error_fatal);
- memory_region_add_subregion(&s->mr_ps, MM_CRL,
- sysbus_mmio_get_region(sbd, 0));
- sysbus_connect_irq(sbd, 0, pic[VERSAL_CRL_IRQ]);
+ if (ver == VERSAL_VER_VERSAL) {
+ /* CRL IRQ line has been removed in versal2 */
+ versal_sysbus_connect_irq(s, SYS_BUS_DEVICE(dev), 0, map->crl.irq);
+ }
}
-/* This takes the board allocated linear DDR memory and creates aliases
+/*
+ * This takes the board allocated linear DDR memory and creates aliases
* for each split DDR range/aperture on the Versal address map.
*/
-static void versal_map_ddr(Versal *s)
+static void versal_map_ddr(Versal *s, const struct VersalDDRMap *map)
{
uint64_t size = memory_region_size(s->cfg.mr_ddr);
- /* Describes the various split DDR access regions. */
- static const struct {
- uint64_t base;
- uint64_t size;
- } addr_ranges[] = {
- { MM_TOP_DDR, MM_TOP_DDR_SIZE },
- { MM_TOP_DDR_2, MM_TOP_DDR_2_SIZE },
- { MM_TOP_DDR_3, MM_TOP_DDR_3_SIZE },
- { MM_TOP_DDR_4, MM_TOP_DDR_4_SIZE }
- };
uint64_t offset = 0;
int i;
- assert(ARRAY_SIZE(addr_ranges) == ARRAY_SIZE(s->noc.mr_ddr_ranges));
- for (i = 0; i < ARRAY_SIZE(addr_ranges) && size; i++) {
- char *name;
+ for (i = 0; i < map->num_chan && size; i++) {
uint64_t mapsize;
+ MemoryRegion *alias;
+
+ mapsize = MIN(size, map->chan[i].size);
- mapsize = size < addr_ranges[i].size ? size : addr_ranges[i].size;
- name = g_strdup_printf("noc-ddr-range%d", i);
/* Create the MR alias. */
- memory_region_init_alias(&s->noc.mr_ddr_ranges[i], OBJECT(s),
- name, s->cfg.mr_ddr,
- offset, mapsize);
+ alias = g_new(MemoryRegion, 1);
+ memory_region_init_alias(alias, OBJECT(s), "noc-ddr-range",
+ s->cfg.mr_ddr, offset, mapsize);
/* Map it onto the NoC MR. */
- memory_region_add_subregion(&s->mr_ps, addr_ranges[i].base,
- &s->noc.mr_ddr_ranges[i]);
+ memory_region_add_subregion(&s->mr_ps, map->chan[i].addr, alias);
offset += mapsize;
size -= mapsize;
- g_free(name);
}
}
+void versal_fdt_add_memory_nodes(Versal *s, uint64_t size)
+{
+ const struct VersalDDRMap *map = &versal_get_map(s)->ddr;
+ g_autofree char *node;
+ g_autofree uint64_t *reg;
+ int i;
+
+ reg = g_new(uint64_t, map->num_chan * 2);
+
+ for (i = 0; i < map->num_chan && size; i++) {
+ uint64_t mapsize;
+
+ mapsize = MIN(size, map->chan[i].size);
+
+ reg[i * 2] = cpu_to_be64(map->chan[i].addr);
+ reg[i * 2 + 1] = cpu_to_be64(mapsize);
+
+ size -= mapsize;
+ }
+
+ node = versal_fdt_add_subnode(s, "/memory", 0, "memory", sizeof("memory"));
+ qemu_fdt_setprop(s->cfg.fdt, node, "reg", reg, sizeof(uint64_t) * i * 2);
+}
+
static void versal_unimp_area(Versal *s, const char *name,
MemoryRegion *mr,
hwaddr base, hwaddr size)
@@ -875,22 +1794,12 @@ static void versal_unimp_irq_parity_imr(void *opaque, int n, int level)
"is not yet implemented\n");
}
-static void versal_unimp(Versal *s)
+static void versal_unimp_common(Versal *s)
{
+ DeviceState *slcr;
qemu_irq gpio_in;
- versal_unimp_area(s, "psm", &s->mr_ps,
- MM_PSM_START, MM_PSM_END - MM_PSM_START);
- versal_unimp_area(s, "crf", &s->mr_ps,
- MM_FPD_CRF, MM_FPD_CRF_SIZE);
- versal_unimp_area(s, "apu", &s->mr_ps,
- MM_FPD_FPD_APU, MM_FPD_FPD_APU_SIZE);
- versal_unimp_area(s, "crp", &s->mr_ps,
- MM_PMC_CRP, MM_PMC_CRP_SIZE);
- versal_unimp_area(s, "iou-scntr", &s->mr_ps,
- MM_IOU_SCNTR, MM_IOU_SCNTR_SIZE);
- versal_unimp_area(s, "iou-scntr-seucre", &s->mr_ps,
- MM_IOU_SCNTRS, MM_IOU_SCNTRS_SIZE);
+ versal_unimp_area(s, "crp", &s->mr_ps, 0xf1260000, 0x10000);
qdev_init_gpio_in_named(DEVICE(s), versal_unimp_sd_emmc_sel,
"sd-emmc-sel-dummy", 2);
@@ -899,102 +1808,353 @@ static void versal_unimp(Versal *s)
qdev_init_gpio_in_named(DEVICE(s), versal_unimp_irq_parity_imr,
"irq-parity-imr-dummy", 1);
+ slcr = DEVICE(versal_get_child(s, "pmc-iou-slcr"));
gpio_in = qdev_get_gpio_in_named(DEVICE(s), "sd-emmc-sel-dummy", 0);
- qdev_connect_gpio_out_named(DEVICE(&s->pmc.iou.slcr), "sd-emmc-sel", 0,
- gpio_in);
+ qdev_connect_gpio_out_named(slcr, "sd-emmc-sel", 0, gpio_in);
gpio_in = qdev_get_gpio_in_named(DEVICE(s), "sd-emmc-sel-dummy", 1);
- qdev_connect_gpio_out_named(DEVICE(&s->pmc.iou.slcr), "sd-emmc-sel", 1,
- gpio_in);
+ qdev_connect_gpio_out_named(slcr, "sd-emmc-sel", 1, gpio_in);
gpio_in = qdev_get_gpio_in_named(DEVICE(s), "qspi-ospi-mux-sel-dummy", 0);
- qdev_connect_gpio_out_named(DEVICE(&s->pmc.iou.slcr),
- "qspi-ospi-mux-sel", 0,
- gpio_in);
+ qdev_connect_gpio_out_named(slcr, "qspi-ospi-mux-sel", 0, gpio_in);
gpio_in = qdev_get_gpio_in_named(DEVICE(s), "irq-parity-imr-dummy", 0);
- qdev_connect_gpio_out_named(DEVICE(&s->pmc.iou.slcr),
- SYSBUS_DEVICE_GPIO_IRQ, 0,
- gpio_in);
+ qdev_connect_gpio_out_named(slcr, SYSBUS_DEVICE_GPIO_IRQ, 0, gpio_in);
+}
+
+static void versal_unimp(Versal *s)
+{
+ versal_unimp_area(s, "psm", &s->mr_ps, 0xffc80000, 0x70000);
+ versal_unimp_area(s, "crf", &s->mr_ps, 0xfd1a0000, 0x140000);
+ versal_unimp_area(s, "apu", &s->mr_ps, 0xfd5c0000, 0x100);
+ versal_unimp_area(s, "iou-scntr", &s->mr_ps, 0xff130000, 0x10000);
+ versal_unimp_area(s, "iou-scntr-secure", &s->mr_ps, 0xff140000, 0x10000);
+
+ versal_unimp_common(s);
+}
+
+static void versal2_unimp(Versal *s)
+{
+ versal_unimp_area(s, "fpd-systmr-ctrl", &s->mr_ps, 0xec920000, 0x1000);
+ versal_unimp_area(s, "crf", &s->mr_ps, 0xec200000, 0x100000);
+
+ versal_unimp_common(s);
+}
+
+static uint32_t fdt_add_clk_node(Versal *s, const char *name,
+ unsigned int freq_hz)
+{
+ uint32_t phandle;
+
+ phandle = qemu_fdt_alloc_phandle(s->cfg.fdt);
+
+ qemu_fdt_add_subnode(s->cfg.fdt, name);
+ qemu_fdt_setprop_cell(s->cfg.fdt, name, "phandle", phandle);
+ qemu_fdt_setprop_cell(s->cfg.fdt, name, "clock-frequency", freq_hz);
+ qemu_fdt_setprop_cell(s->cfg.fdt, name, "#clock-cells", 0x0);
+ qemu_fdt_setprop_string(s->cfg.fdt, name, "compatible", "fixed-clock");
+ qemu_fdt_setprop(s->cfg.fdt, name, "u-boot,dm-pre-reloc", NULL, 0);
+
+ return phandle;
+}
+
+static void versal_realize_common(Versal *s)
+{
+ DeviceState *slcr, *ospi;
+ MemoryRegion *ocm;
+ Object *container;
+ const VersalMap *map = versal_get_map(s);
+ size_t i;
+
+ g_assert(s->cfg.fdt != NULL);
+
+ s->phandle.clk_25mhz = fdt_add_clk_node(s, "/clk25", 25 * 1000 * 1000);
+ s->phandle.clk_125mhz = fdt_add_clk_node(s, "/clk125", 125 * 1000 * 1000);
+ s->phandle.gic = qemu_fdt_alloc_phandle(s->cfg.fdt);
+
+ container = object_new(TYPE_CONTAINER);
+ object_property_add_child(OBJECT(s), "irq-splits", container);
+ object_unref(container);
+
+ container = object_new(TYPE_CONTAINER);
+ object_property_add_child(OBJECT(s), "irq-or-gates", container);
+ object_unref(container);
+
+ qemu_fdt_setprop_cell(s->cfg.fdt, "/", "interrupt-parent", s->phandle.gic);
+ qemu_fdt_setprop_cell(s->cfg.fdt, "/", "#size-cells", 0x2);
+ qemu_fdt_setprop_cell(s->cfg.fdt, "/", "#address-cells", 0x2);
+
+ versal_create_cpu_cluster(s, &map->apu);
+ versal_create_cpu_cluster(s, &map->rpu);
+
+ for (i = 0; i < map->num_uart; i++) {
+ versal_create_uart(s, &map->uart[i], i);
+ }
+
+ for (i = 0; i < map->num_canfd; i++) {
+ versal_create_canfd(s, &map->canfd[i], s->cfg.canbus[i]);
+ }
+
+ for (i = 0; i < map->num_sdhci; i++) {
+ versal_create_sdhci(s, &map->sdhci[i]);
+ }
+
+ for (i = 0; i < map->num_gem; i++) {
+ versal_create_gem(s, &map->gem[i]);
+ /*
+ * Create fdt node in reverse order to keep backward compatibility with
+ * previous versions of the generated FDT. This affects Linux kernel
+ * interface naming order when persistent naming scheme is not in use.
+ */
+ versal_create_gem_fdt(s, &map->gem[map->num_gem - 1 - i]);
+ }
+
+ for (i = 0; i < map->num_zdma; i++) {
+ versal_create_zdma(s, &map->zdma[i]);
+ }
+
+ versal_create_xrams(s, &map->xram);
+
+ for (i = 0; i < map->num_usb; i++) {
+ versal_create_usb(s, &map->usb[i]);
+ }
+
+ versal_create_efuse(s, &map->efuse);
+ ospi = versal_create_ospi(s, &map->ospi);
+ slcr = versal_create_pmc_iou_slcr(s, &map->pmc_iou_slcr);
+
+ qdev_connect_gpio_out_named(slcr, "ospi-mux-sel", 0,
+ qdev_get_gpio_in_named(ospi,
+ "ospi-mux-sel", 0));
+
+ versal_create_bbram(s, &map->bbram);
+ versal_create_trng(s, &map->trng);
+ versal_create_rtc(s, &map->rtc);
+ versal_create_cfu(s, &map->cfu);
+ versal_create_crl(s);
+
+ versal_map_ddr(s, &map->ddr);
+
+ /* Create the On Chip Memory (OCM). */
+ ocm = g_new(MemoryRegion, 1);
+ memory_region_init_ram(ocm, OBJECT(s), "ocm", map->ocm.size, &error_fatal);
+ memory_region_add_subregion_overlap(&s->mr_ps, map->ocm.addr, ocm, 0);
}
static void versal_realize(DeviceState *dev, Error **errp)
{
- Versal *s = XLNX_VERSAL(dev);
- qemu_irq pic[XLNX_VERSAL_NR_IRQS];
-
- versal_create_apu_cpus(s);
- versal_create_apu_gic(s, pic);
- versal_create_rpu_cpus(s);
- versal_create_uarts(s, pic);
- versal_create_canfds(s, pic);
- versal_create_usbs(s, pic);
- versal_create_gems(s, pic);
- versal_create_admas(s, pic);
- versal_create_sds(s, pic);
- versal_create_pmc_apb_irq_orgate(s, pic);
- versal_create_rtc(s, pic);
- versal_create_trng(s, pic);
- versal_create_xrams(s, pic);
- versal_create_bbram(s, pic);
- versal_create_efuse(s, pic);
- versal_create_pmc_iou_slcr(s, pic);
- versal_create_ospi(s, pic);
- versal_create_crl(s, pic);
- versal_create_cfu(s, pic);
- versal_map_ddr(s);
+ Versal *s = XLNX_VERSAL_BASE(dev);
+
+ versal_realize_common(s);
versal_unimp(s);
+}
- /* Create the On Chip Memory (OCM). */
- memory_region_init_ram(&s->lpd.mr_ocm, OBJECT(s), "ocm",
- MM_OCM_SIZE, &error_fatal);
+static void versal2_realize(DeviceState *dev, Error **errp)
+{
+ Versal *s = XLNX_VERSAL_BASE(dev);
+
+ versal_realize_common(s);
+ versal2_unimp(s);
+}
+
+DeviceState *versal_get_boot_cpu(Versal *s)
+{
+ return DEVICE(versal_get_child_idx(s, "apu-cluster/apu", 0));
+}
+
+void versal_sdhci_plug_card(Versal *s, int sd_idx, BlockBackend *blk)
+{
+ DeviceState *sdhci, *card;
+
+ sdhci = DEVICE(versal_get_child_idx(s, "sdhci", sd_idx));
+
+ if (sdhci == NULL) {
+ return;
+ }
+
+ card = qdev_new(TYPE_SD_CARD);
+ object_property_add_child(OBJECT(sdhci), "card[*]", OBJECT(card));
+ qdev_prop_set_drive_err(card, "drive", blk, &error_fatal);
+ qdev_realize_and_unref(card, qdev_get_child_bus(DEVICE(sdhci), "sd-bus"),
+ &error_fatal);
+}
+
+void versal_efuse_attach_drive(Versal *s, BlockBackend *blk)
+{
+ DeviceState *efuse;
+
+ efuse = DEVICE(versal_get_child(s, "efuse"));
+
+ if (efuse == NULL) {
+ return;
+ }
+
+ qdev_prop_set_drive(efuse, "drive", blk);
+}
+
+void versal_bbram_attach_drive(Versal *s, BlockBackend *blk)
+{
+ DeviceState *bbram;
+
+ bbram = DEVICE(versal_get_child(s, "bbram"));
+
+ if (bbram == NULL) {
+ return;
+ }
+
+ qdev_prop_set_drive(bbram, "drive", blk);
+}
+
+void versal_ospi_create_flash(Versal *s, int flash_idx, const char *flash_mdl,
+ BlockBackend *blk)
+{
+ BusState *spi_bus;
+ DeviceState *flash, *ospi;
+ qemu_irq cs_line;
+
+ ospi = DEVICE(versal_get_child(s, "ospi"));
+ spi_bus = qdev_get_child_bus(ospi, "spi0");
+
+ flash = qdev_new(flash_mdl);
- memory_region_add_subregion_overlap(&s->mr_ps, MM_OCM, &s->lpd.mr_ocm, 0);
- memory_region_add_subregion_overlap(&s->fpd.apu.mr, 0, &s->mr_ps, 0);
- memory_region_add_subregion_overlap(&s->lpd.rpu.mr, 0,
- &s->lpd.rpu.mr_ps_alias, 0);
+ if (blk) {
+ qdev_prop_set_drive_err(flash, "drive", blk, &error_fatal);
+ }
+ qdev_prop_set_uint8(flash, "cs", flash_idx);
+ qdev_realize_and_unref(flash, spi_bus, &error_fatal);
+
+ cs_line = qdev_get_gpio_in_named(flash, SSI_GPIO_CS, 0);
+
+ sysbus_connect_irq(SYS_BUS_DEVICE(ospi),
+ flash_idx + 1, cs_line);
+}
+
+qemu_irq versal_get_reserved_irq(Versal *s, int idx, int *dtb_idx)
+{
+ const VersalMap *map = versal_get_map(s);
+
+ g_assert(idx < map->reserved.irq_num);
+
+ *dtb_idx = map->reserved.irq_start + idx;
+ return versal_get_irq(s, *dtb_idx);
}
-static void versal_init(Object *obj)
+hwaddr versal_get_reserved_mmio_addr(Versal *s)
{
- Versal *s = XLNX_VERSAL(obj);
+ const VersalMap *map = versal_get_map(s);
+
+ return map->reserved.mmio_start;
+}
+
+int versal_get_num_cpu(VersalVersion version)
+{
+ const VersalMap *map = VERSION_TO_MAP[version];
+
+ return map->apu.num_cluster * map->apu.num_core
+ + map->rpu.num_cluster * map->rpu.num_core;
+}
+
+int versal_get_num_can(VersalVersion version)
+{
+ const VersalMap *map = VERSION_TO_MAP[version];
+
+ return map->num_canfd;
+}
+
+int versal_get_num_sdhci(VersalVersion version)
+{
+ const VersalMap *map = VERSION_TO_MAP[version];
+
+ return map->num_sdhci;
+}
+
+static void versal_base_init(Object *obj)
+{
+ Versal *s = XLNX_VERSAL_BASE(obj);
+ size_t i, num_can;
- memory_region_init(&s->fpd.apu.mr, obj, "mr-apu", UINT64_MAX);
- memory_region_init(&s->lpd.rpu.mr, obj, "mr-rpu", UINT64_MAX);
memory_region_init(&s->mr_ps, obj, "mr-ps-switch", UINT64_MAX);
- memory_region_init_alias(&s->lpd.rpu.mr_ps_alias, OBJECT(s),
- "mr-rpu-ps-alias", &s->mr_ps, 0, UINT64_MAX);
+ s->intc = g_array_new(false, false, sizeof(DeviceState *));
+
+ num_can = versal_get_map(s)->num_canfd;
+ s->cfg.canbus = g_new0(CanBusState *, num_can);
+
+ for (i = 0; i < num_can; i++) {
+ g_autofree char *prop_name = g_strdup_printf("canbus%zu", i);
+
+ object_property_add_link(obj, prop_name, TYPE_CAN_BUS,
+ (Object **) &s->cfg.canbus[i],
+ object_property_allow_set_link, 0);
+ }
+}
+
+static void versal_base_finalize(Object *obj)
+{
+ Versal *s = XLNX_VERSAL_BASE(obj);
+
+ g_array_free(s->intc, true);
+ g_free(s->cfg.canbus);
}
static const Property versal_properties[] = {
DEFINE_PROP_LINK("ddr", Versal, cfg.mr_ddr, TYPE_MEMORY_REGION,
MemoryRegion *),
- DEFINE_PROP_LINK("canbus0", Versal, lpd.iou.canbus[0],
- TYPE_CAN_BUS, CanBusState *),
- DEFINE_PROP_LINK("canbus1", Versal, lpd.iou.canbus[1],
- TYPE_CAN_BUS, CanBusState *),
};
-static void versal_class_init(ObjectClass *klass, const void *data)
+static void versal_base_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
- dc->realize = versal_realize;
device_class_set_props(dc, versal_properties);
/* No VMSD since we haven't got any top-level SoC state to save. */
}
-static const TypeInfo versal_info = {
- .name = TYPE_XLNX_VERSAL,
+static void versal_class_init(ObjectClass *klass, const void *data)
+{
+ VersalClass *vc = XLNX_VERSAL_BASE_CLASS(klass);
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ vc->version = VERSAL_VER_VERSAL;
+ dc->realize = versal_realize;
+}
+
+static void versal2_class_init(ObjectClass *klass, const void *data)
+{
+ VersalClass *vc = XLNX_VERSAL_BASE_CLASS(klass);
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ vc->version = VERSAL_VER_VERSAL2;
+ dc->realize = versal2_realize;
+}
+
+static const TypeInfo versal_base_info = {
+ .name = TYPE_XLNX_VERSAL_BASE,
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(Versal),
- .instance_init = versal_init,
+ .instance_init = versal_base_init,
+ .instance_finalize = versal_base_finalize,
+ .class_init = versal_base_class_init,
+ .class_size = sizeof(VersalClass),
+ .abstract = true,
+};
+
+static const TypeInfo versal_info = {
+ .name = TYPE_XLNX_VERSAL,
+ .parent = TYPE_XLNX_VERSAL_BASE,
.class_init = versal_class_init,
};
+static const TypeInfo versal2_info = {
+ .name = TYPE_XLNX_VERSAL2,
+ .parent = TYPE_XLNX_VERSAL_BASE,
+ .class_init = versal2_class_init,
+};
+
static void versal_register_types(void)
{
+ type_register_static(&versal_base_info);
type_register_static(&versal_info);
+ type_register_static(&versal2_info);
}
type_init(versal_register_types);
diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index ec96a46..ffed6e5 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -26,8 +26,6 @@
#include "target/arm/cpu-qom.h"
#include "target/arm/gtimer.h"
-#define GIC_NUM_SPI_INTR 160
-
#define ARM_PHYS_TIMER_PPI 30
#define ARM_VIRT_TIMER_PPI 27
#define ARM_HYP_TIMER_PPI 26
@@ -206,17 +204,26 @@ static const XlnxZynqMPGICRegion xlnx_zynqmp_gic_regions[] = {
static inline int arm_gic_ppi_index(int cpu_nr, int ppi_index)
{
- return GIC_NUM_SPI_INTR + cpu_nr * GIC_INTERNAL + ppi_index;
+ return XLNX_ZYNQMP_GIC_NUM_SPI_INTR + cpu_nr * GIC_INTERNAL + ppi_index;
+}
+
+static unsigned int xlnx_zynqmp_get_rpu_number(MachineState *ms)
+{
+ /*
+ * RPUs will be created only if "-smp" is higher than the maximum
+ * of APUs. Round it up to 0 to avoid dealing with negative values.
+ */
+ return MAX(0, MIN((int)(ms->smp.cpus - XLNX_ZYNQMP_NUM_APU_CPUS),
+ XLNX_ZYNQMP_NUM_RPU_CPUS));
}
static void xlnx_zynqmp_create_rpu(MachineState *ms, XlnxZynqMPState *s,
const char *boot_cpu, Error **errp)
{
int i;
- int num_rpus = MIN((int)(ms->smp.cpus - XLNX_ZYNQMP_NUM_APU_CPUS),
- XLNX_ZYNQMP_NUM_RPU_CPUS);
+ int num_rpus = xlnx_zynqmp_get_rpu_number(ms);
- if (num_rpus <= 0) {
+ if (!num_rpus) {
/* Don't create rpu-cluster object if there's nothing to put in it */
return;
}
@@ -377,6 +384,7 @@ static void xlnx_zynqmp_init(Object *obj)
XlnxZynqMPState *s = XLNX_ZYNQMP(obj);
int i;
int num_apus = MIN(ms->smp.cpus, XLNX_ZYNQMP_NUM_APU_CPUS);
+ int num_rpus = xlnx_zynqmp_get_rpu_number(ms);
object_initialize_child(obj, "apu-cluster", &s->apu_cluster,
TYPE_CPU_CLUSTER);
@@ -390,6 +398,12 @@ static void xlnx_zynqmp_init(Object *obj)
object_initialize_child(obj, "gic", &s->gic, gic_class_name());
+ if (num_rpus) {
+ /* Do not create the rpu_gic if we don't have rpus */
+ object_initialize_child(obj, "rpu_gic", &s->rpu_gic,
+ gic_class_name());
+ }
+
for (i = 0; i < XLNX_ZYNQMP_NUM_GEMS; i++) {
object_initialize_child(obj, "gem[*]", &s->gem[i], TYPE_CADENCE_GEM);
object_initialize_child(obj, "gem-irq-orgate[*]",
@@ -439,6 +453,15 @@ static void xlnx_zynqmp_init(Object *obj)
object_initialize_child(obj, "qspi-irq-orgate",
&s->qspi_irq_orgate, TYPE_OR_IRQ);
+ if (num_rpus) {
+ for (i = 0; i < ARRAY_SIZE(s->splitter); i++) {
+ g_autofree char *name = g_strdup_printf("irq-splitter%d", i);
+ object_initialize_child(obj, name, &s->splitter[i], TYPE_SPLIT_IRQ);
+ }
+ }
+
+
+
for (i = 0; i < XLNX_ZYNQMP_NUM_USB; i++) {
object_initialize_child(obj, "usb[*]", &s->usb[i], TYPE_USB_DWC3);
}
@@ -452,9 +475,10 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
uint8_t i;
uint64_t ram_size;
int num_apus = MIN(ms->smp.cpus, XLNX_ZYNQMP_NUM_APU_CPUS);
+ int num_rpus = xlnx_zynqmp_get_rpu_number(ms);
const char *boot_cpu = s->boot_cpu ? s->boot_cpu : "apu-cpu[0]";
ram_addr_t ddr_low_size, ddr_high_size;
- qemu_irq gic_spi[GIC_NUM_SPI_INTR];
+ qemu_irq gic_spi[XLNX_ZYNQMP_GIC_NUM_SPI_INTR];
Error *err = NULL;
ram_size = memory_region_size(s->ddr_ram);
@@ -502,13 +526,22 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
g_free(ocm_name);
}
- qdev_prop_set_uint32(DEVICE(&s->gic), "num-irq", GIC_NUM_SPI_INTR + 32);
+ qdev_prop_set_uint32(DEVICE(&s->gic), "num-irq",
+ XLNX_ZYNQMP_GIC_NUM_SPI_INTR + 32);
qdev_prop_set_uint32(DEVICE(&s->gic), "revision", 2);
qdev_prop_set_uint32(DEVICE(&s->gic), "num-cpu", num_apus);
qdev_prop_set_bit(DEVICE(&s->gic), "has-security-extensions", s->secure);
qdev_prop_set_bit(DEVICE(&s->gic),
"has-virtualization-extensions", s->virt);
+ if (num_rpus) {
+ qdev_prop_set_uint32(DEVICE(&s->rpu_gic), "num-irq",
+ XLNX_ZYNQMP_GIC_NUM_SPI_INTR + 32);
+ qdev_prop_set_uint32(DEVICE(&s->rpu_gic), "revision", 1);
+ qdev_prop_set_uint32(DEVICE(&s->rpu_gic), "num-cpu", num_rpus);
+ qdev_prop_set_uint32(DEVICE(&s->rpu_gic), "first-cpu-index", 4);
+ }
+
qdev_realize(DEVICE(&s->apu_cluster), NULL, &error_fatal);
/* Realize APUs before realizing the GIC. KVM requires this. */
@@ -608,13 +641,63 @@ static void xlnx_zynqmp_realize(DeviceState *dev, Error **errp)
return;
}
+ if (num_rpus) {
+ if (!sysbus_realize(SYS_BUS_DEVICE(&s->rpu_gic), errp)) {
+ return;
+ }
+
+ for (i = 0; i < num_rpus; i++) {
+ qemu_irq irq;
+
+ sysbus_mmio_map(SYS_BUS_DEVICE(&s->rpu_gic), i + 1,
+ GIC_BASE_ADDR + i * 0x1000);
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->rpu_gic), i,
+ qdev_get_gpio_in(DEVICE(&s->rpu_cpu[i]),
+ ARM_CPU_IRQ));
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->rpu_gic), i + num_rpus,
+ qdev_get_gpio_in(DEVICE(&s->rpu_cpu[i]),
+ ARM_CPU_FIQ));
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->rpu_gic), i + num_rpus * 2,
+ qdev_get_gpio_in(DEVICE(&s->rpu_cpu[i]),
+ ARM_CPU_VIRQ));
+ sysbus_connect_irq(SYS_BUS_DEVICE(&s->rpu_gic), i + num_rpus * 3,
+ qdev_get_gpio_in(DEVICE(&s->rpu_cpu[i]),
+ ARM_CPU_VFIQ));
+ irq = qdev_get_gpio_in(DEVICE(&s->rpu_gic),
+ arm_gic_ppi_index(i, ARM_PHYS_TIMER_PPI));
+ qdev_connect_gpio_out(DEVICE(&s->rpu_cpu[i]), GTIMER_PHYS, irq);
+ irq = qdev_get_gpio_in(DEVICE(&s->rpu_gic),
+ arm_gic_ppi_index(i, ARM_VIRT_TIMER_PPI));
+ qdev_connect_gpio_out(DEVICE(&s->rpu_cpu[i]), GTIMER_VIRT, irq);
+ irq = qdev_get_gpio_in(DEVICE(&s->rpu_gic),
+ arm_gic_ppi_index(i, ARM_HYP_TIMER_PPI));
+ qdev_connect_gpio_out(DEVICE(&s->rpu_cpu[i]), GTIMER_HYP, irq);
+ irq = qdev_get_gpio_in(DEVICE(&s->rpu_gic),
+ arm_gic_ppi_index(i, ARM_SEC_TIMER_PPI));
+ qdev_connect_gpio_out(DEVICE(&s->rpu_cpu[i]), GTIMER_SEC, irq);
+ }
+
+ sysbus_mmio_map(SYS_BUS_DEVICE(&s->rpu_gic), 0, GIC_BASE_ADDR);
+ }
+
if (!s->boot_cpu_ptr) {
error_setg(errp, "ZynqMP Boot cpu %s not found", boot_cpu);
return;
}
- for (i = 0; i < GIC_NUM_SPI_INTR; i++) {
- gic_spi[i] = qdev_get_gpio_in(DEVICE(&s->gic), i);
+ for (i = 0; i < XLNX_ZYNQMP_GIC_NUM_SPI_INTR; i++) {
+ if (num_rpus) {
+ DeviceState *splitter = DEVICE(&s->splitter[i]);
+ qdev_prop_set_uint16(splitter, "num-lines", 2);
+ qdev_realize(splitter, NULL, &error_abort);
+ gic_spi[i] = qdev_get_gpio_in(splitter, 0);
+ qdev_connect_gpio_out(splitter, 0,
+ qdev_get_gpio_in(DEVICE(&s->gic), i));
+ qdev_connect_gpio_out(splitter, 1,
+ qdev_get_gpio_in(DEVICE(&s->rpu_gic), i));
+ } else {
+ gic_spi[i] = qdev_get_gpio_in(DEVICE(&s->gic), i);
+ }
}
for (i = 0; i < XLNX_ZYNQMP_NUM_GEMS; i++) {
diff --git a/hw/audio/ac97.c b/hw/audio/ac97.c
index 669a046..eb7a847 100644
--- a/hw/audio/ac97.c
+++ b/hw/audio/ac97.c
@@ -886,7 +886,7 @@ static void nabm_writel(void *opaque, uint32_t addr, uint32_t val)
static int write_audio(AC97LinkState *s, AC97BusMasterRegs *r,
int max, int *stop)
{
- uint8_t tmpbuf[4096];
+ QEMU_UNINITIALIZED uint8_t tmpbuf[4096];
uint32_t addr = r->bd.addr;
uint32_t temp = r->picb << 1;
uint32_t written = 0;
@@ -959,7 +959,7 @@ static void write_bup(AC97LinkState *s, int elapsed)
static int read_audio(AC97LinkState *s, AC97BusMasterRegs *r,
int max, int *stop)
{
- uint8_t tmpbuf[4096];
+ QEMU_UNINITIALIZED uint8_t tmpbuf[4096];
uint32_t addr = r->bd.addr;
uint32_t temp = r->picb << 1;
uint32_t nread = 0;
diff --git a/hw/audio/asc.c b/hw/audio/asc.c
index 18382cc..edd42d6 100644
--- a/hw/audio/asc.c
+++ b/hw/audio/asc.c
@@ -12,6 +12,7 @@
#include "qemu/osdep.h"
#include "qemu/timer.h"
+#include "qapi/error.h"
#include "hw/sysbus.h"
#include "hw/irq.h"
#include "audio/audio.h"
@@ -653,11 +654,17 @@ static void asc_realize(DeviceState *dev, Error **errp)
s->voice = AUD_open_out(&s->card, s->voice, "asc.out", s, asc_out_cb,
&as);
+ if (!s->voice) {
+ AUD_remove_card(&s->card);
+ error_setg(errp, "Initializing audio stream failed");
+ return;
+ }
+
s->shift = 1;
s->samples = AUD_get_buffer_size_out(s->voice) >> s->shift;
s->mixbuf = g_malloc0(s->samples << s->shift);
- s->silentbuf = g_malloc0(s->samples << s->shift);
+ s->silentbuf = g_malloc(s->samples << s->shift);
memset(s->silentbuf, 0x80, s->samples << s->shift);
/* Add easc registers if required */
diff --git a/hw/audio/cs4231a.c b/hw/audio/cs4231a.c
index eb9a458..6dfff20 100644
--- a/hw/audio/cs4231a.c
+++ b/hw/audio/cs4231a.c
@@ -528,7 +528,7 @@ static int cs_write_audio (CSState *s, int nchan, int dma_pos,
int dma_len, int len)
{
int temp, net;
- uint8_t tmpbuf[4096];
+ QEMU_UNINITIALIZED uint8_t tmpbuf[4096];
IsaDmaClass *k = ISADMA_GET_CLASS(s->isa_dma);
temp = len;
@@ -547,7 +547,7 @@ static int cs_write_audio (CSState *s, int nchan, int dma_pos,
copied = k->read_memory(s->isa_dma, nchan, tmpbuf, dma_pos, to_copy);
if (s->tab) {
int i;
- int16_t linbuf[4096];
+ QEMU_UNINITIALIZED int16_t linbuf[4096];
for (i = 0; i < copied; ++i)
linbuf[i] = s->tab[tmpbuf[i]];
diff --git a/hw/audio/es1370.c b/hw/audio/es1370.c
index 8efb969..a6a32a6 100644
--- a/hw/audio/es1370.c
+++ b/hw/audio/es1370.c
@@ -604,7 +604,7 @@ static uint64_t es1370_read(void *opaque, hwaddr addr, unsigned size)
static void es1370_transfer_audio (ES1370State *s, struct chan *d, int loop_sel,
int max, bool *irq)
{
- uint8_t tmpbuf[4096];
+ QEMU_UNINITIALIZED uint8_t tmpbuf[4096];
size_t to_transfer;
uint32_t addr = d->frame_addr;
int sc = d->scount & 0xffff;
diff --git a/hw/audio/gus.c b/hw/audio/gus.c
index 87e8634..c36df02 100644
--- a/hw/audio/gus.c
+++ b/hw/audio/gus.c
@@ -183,7 +183,7 @@ static int GUS_read_DMA (void *opaque, int nchan, int dma_pos, int dma_len)
{
GUSState *s = opaque;
IsaDmaClass *k = ISADMA_GET_CLASS(s->isa_dma);
- char tmpbuf[4096];
+ QEMU_UNINITIALIZED char tmpbuf[4096];
int pos = dma_pos, mode, left = dma_len - dma_pos;
ldebug ("read DMA %#x %d\n", dma_pos, dma_len);
diff --git a/hw/audio/marvell_88w8618.c b/hw/audio/marvell_88w8618.c
index 6d3ebbb..c5c79d0 100644
--- a/hw/audio/marvell_88w8618.c
+++ b/hw/audio/marvell_88w8618.c
@@ -66,7 +66,7 @@ static void mv88w8618_audio_callback(void *opaque, int free_out, int free_in)
{
mv88w8618_audio_state *s = opaque;
int16_t *codec_buffer;
- int8_t buf[4096];
+ QEMU_UNINITIALIZED int8_t buf[4096];
int8_t *mem_buffer;
int pos, block_size;
diff --git a/hw/audio/sb16.c b/hw/audio/sb16.c
index 19fd3b9..bac6411 100644
--- a/hw/audio/sb16.c
+++ b/hw/audio/sb16.c
@@ -1181,7 +1181,7 @@ static int write_audio (SB16State *s, int nchan, int dma_pos,
IsaDma *isa_dma = nchan == s->dma ? s->isa_dma : s->isa_hdma;
IsaDmaClass *k = ISADMA_GET_CLASS(isa_dma);
int temp, net;
- uint8_t tmpbuf[4096];
+ QEMU_UNINITIALIZED uint8_t tmpbuf[4096];
temp = len;
net = 0;
diff --git a/hw/audio/via-ac97.c b/hw/audio/via-ac97.c
index 1e0a5c7..d5231e1 100644
--- a/hw/audio/via-ac97.c
+++ b/hw/audio/via-ac97.c
@@ -175,7 +175,7 @@ static void out_cb(void *opaque, int avail)
ViaAC97SGDChannel *c = &s->aur;
int temp, to_copy, copied;
bool stop = false;
- uint8_t tmpbuf[4096];
+ QEMU_UNINITIALIZED uint8_t tmpbuf[4096];
if (c->stat & STAT_PAUSED) {
return;
diff --git a/hw/block/Kconfig b/hw/block/Kconfig
index a898e04..737dbcd 100644
--- a/hw/block/Kconfig
+++ b/hw/block/Kconfig
@@ -13,9 +13,6 @@ config FDC_SYSBUS
config SSI_M25P80
bool
-config NAND
- bool
-
config PFLASH_CFI01
bool
diff --git a/hw/block/hd-geometry.c b/hw/block/hd-geometry.c
index f3939e7..db22190 100644
--- a/hw/block/hd-geometry.c
+++ b/hw/block/hd-geometry.c
@@ -33,7 +33,6 @@
#include "qemu/osdep.h"
#include "system/block-backend.h"
#include "qapi/qapi-types-block.h"
-#include "qemu/bswap.h"
#include "hw/block/block.h"
#include "trace.h"
diff --git a/hw/block/meson.build b/hw/block/meson.build
index 16a51bf..43ed296 100644
--- a/hw/block/meson.build
+++ b/hw/block/meson.build
@@ -6,7 +6,6 @@ system_ss.add(files(
system_ss.add(when: 'CONFIG_FDC', if_true: files('fdc.c'))
system_ss.add(when: 'CONFIG_FDC_ISA', if_true: files('fdc-isa.c'))
system_ss.add(when: 'CONFIG_FDC_SYSBUS', if_true: files('fdc-sysbus.c'))
-system_ss.add(when: 'CONFIG_NAND', if_true: files('nand.c'))
system_ss.add(when: 'CONFIG_PFLASH_CFI01', if_true: files('pflash_cfi01.c'))
system_ss.add(when: 'CONFIG_PFLASH_CFI02', if_true: files('pflash_cfi02.c'))
system_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c'))
@@ -14,7 +13,9 @@ system_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80_sfdp.c'))
system_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c'))
system_ss.add(when: 'CONFIG_XEN_BUS', if_true: files('xen-block.c'))
-specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c', 'virtio-blk-common.c'))
-specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk.c', 'virtio-blk-common.c'))
+specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
+system_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk-common.c'))
+specific_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk.c'))
+system_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('virtio-blk-common.c'))
subdir('dataplane')
diff --git a/hw/block/nand.c b/hw/block/nand.c
deleted file mode 100644
index c80bf78..0000000
--- a/hw/block/nand.c
+++ /dev/null
@@ -1,835 +0,0 @@
-/*
- * Flash NAND memory emulation. Based on "16M x 8 Bit NAND Flash
- * Memory" datasheet for the KM29U128AT / K9F2808U0A chips from
- * Samsung Electronic.
- *
- * Copyright (c) 2006 Openedhand Ltd.
- * Written by Andrzej Zaborowski <balrog@zabor.org>
- *
- * Support for additional features based on "MT29F2G16ABCWP 2Gx16"
- * datasheet from Micron Technology and "NAND02G-B2C" datasheet
- * from ST Microelectronics.
- *
- * This code is licensed under the GNU GPL v2.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-
-#ifndef NAND_IO
-
-#include "qemu/osdep.h"
-#include "hw/hw.h"
-#include "hw/qdev-properties.h"
-#include "hw/qdev-properties-system.h"
-#include "hw/block/flash.h"
-#include "system/block-backend.h"
-#include "migration/vmstate.h"
-#include "qapi/error.h"
-#include "qemu/error-report.h"
-#include "qemu/module.h"
-#include "qom/object.h"
-
-# define NAND_CMD_READ0 0x00
-# define NAND_CMD_READ1 0x01
-# define NAND_CMD_READ2 0x50
-# define NAND_CMD_LPREAD2 0x30
-# define NAND_CMD_NOSERIALREAD2 0x35
-# define NAND_CMD_RANDOMREAD1 0x05
-# define NAND_CMD_RANDOMREAD2 0xe0
-# define NAND_CMD_READID 0x90
-# define NAND_CMD_RESET 0xff
-# define NAND_CMD_PAGEPROGRAM1 0x80
-# define NAND_CMD_PAGEPROGRAM2 0x10
-# define NAND_CMD_CACHEPROGRAM2 0x15
-# define NAND_CMD_BLOCKERASE1 0x60
-# define NAND_CMD_BLOCKERASE2 0xd0
-# define NAND_CMD_READSTATUS 0x70
-# define NAND_CMD_COPYBACKPRG1 0x85
-
-# define NAND_IOSTATUS_ERROR (1 << 0)
-# define NAND_IOSTATUS_PLANE0 (1 << 1)
-# define NAND_IOSTATUS_PLANE1 (1 << 2)
-# define NAND_IOSTATUS_PLANE2 (1 << 3)
-# define NAND_IOSTATUS_PLANE3 (1 << 4)
-# define NAND_IOSTATUS_READY (1 << 6)
-# define NAND_IOSTATUS_UNPROTCT (1 << 7)
-
-# define MAX_PAGE 0x800
-# define MAX_OOB 0x40
-
-typedef struct NANDFlashState NANDFlashState;
-struct NANDFlashState {
- DeviceState parent_obj;
-
- uint8_t manf_id, chip_id;
- uint8_t buswidth; /* in BYTES */
- int size, pages;
- int page_shift, oob_shift, erase_shift, addr_shift;
- uint8_t *storage;
- BlockBackend *blk;
- int mem_oob;
-
- uint8_t cle, ale, ce, wp, gnd;
-
- uint8_t io[MAX_PAGE + MAX_OOB + 0x400];
- uint8_t *ioaddr;
- int iolen;
-
- uint32_t cmd;
- uint64_t addr;
- int addrlen;
- int status;
- int offset;
-
- void (*blk_write)(NANDFlashState *s);
- void (*blk_erase)(NANDFlashState *s);
- /*
- * Returns %true when block containing (@addr + @offset) is
- * successfully loaded, otherwise %false.
- */
- bool (*blk_load)(NANDFlashState *s, uint64_t addr, unsigned offset);
-
- uint32_t ioaddr_vmstate;
-};
-
-#define TYPE_NAND "nand"
-
-OBJECT_DECLARE_SIMPLE_TYPE(NANDFlashState, NAND)
-
-static void mem_and(uint8_t *dest, const uint8_t *src, size_t n)
-{
- /* Like memcpy() but we logical-AND the data into the destination */
- int i;
- for (i = 0; i < n; i++) {
- dest[i] &= src[i];
- }
-}
-
-# define NAND_NO_AUTOINCR 0x00000001
-# define NAND_BUSWIDTH_16 0x00000002
-# define NAND_NO_PADDING 0x00000004
-# define NAND_CACHEPRG 0x00000008
-# define NAND_COPYBACK 0x00000010
-# define NAND_IS_AND 0x00000020
-# define NAND_4PAGE_ARRAY 0x00000040
-# define NAND_NO_READRDY 0x00000100
-# define NAND_SAMSUNG_LP (NAND_NO_PADDING | NAND_COPYBACK)
-
-# define NAND_IO
-
-# define PAGE(addr) ((addr) >> ADDR_SHIFT)
-# define PAGE_START(page) (PAGE(page) * (NAND_PAGE_SIZE + OOB_SIZE))
-# define PAGE_MASK ((1 << ADDR_SHIFT) - 1)
-# define OOB_SHIFT (PAGE_SHIFT - 5)
-# define OOB_SIZE (1 << OOB_SHIFT)
-# define SECTOR(addr) ((addr) >> (9 + ADDR_SHIFT - PAGE_SHIFT))
-# define SECTOR_OFFSET(addr) ((addr) & ((511 >> PAGE_SHIFT) << 8))
-
-# define NAND_PAGE_SIZE 256
-# define PAGE_SHIFT 8
-# define PAGE_SECTORS 1
-# define ADDR_SHIFT 8
-# include "nand.c"
-# define NAND_PAGE_SIZE 512
-# define PAGE_SHIFT 9
-# define PAGE_SECTORS 1
-# define ADDR_SHIFT 8
-# include "nand.c"
-# define NAND_PAGE_SIZE 2048
-# define PAGE_SHIFT 11
-# define PAGE_SECTORS 4
-# define ADDR_SHIFT 16
-# include "nand.c"
-
-/* Information based on Linux drivers/mtd/nand/raw/nand_ids.c */
-static const struct {
- int size;
- int width;
- int page_shift;
- int erase_shift;
- uint32_t options;
-} nand_flash_ids[0x100] = {
- [0 ... 0xff] = { 0 },
-
- [0x6b] = { 4, 8, 9, 4, 0 },
- [0xe3] = { 4, 8, 9, 4, 0 },
- [0xe5] = { 4, 8, 9, 4, 0 },
- [0xd6] = { 8, 8, 9, 4, 0 },
- [0xe6] = { 8, 8, 9, 4, 0 },
-
- [0x33] = { 16, 8, 9, 5, 0 },
- [0x73] = { 16, 8, 9, 5, 0 },
- [0x43] = { 16, 16, 9, 5, NAND_BUSWIDTH_16 },
- [0x53] = { 16, 16, 9, 5, NAND_BUSWIDTH_16 },
-
- [0x35] = { 32, 8, 9, 5, 0 },
- [0x75] = { 32, 8, 9, 5, 0 },
- [0x45] = { 32, 16, 9, 5, NAND_BUSWIDTH_16 },
- [0x55] = { 32, 16, 9, 5, NAND_BUSWIDTH_16 },
-
- [0x36] = { 64, 8, 9, 5, 0 },
- [0x76] = { 64, 8, 9, 5, 0 },
- [0x46] = { 64, 16, 9, 5, NAND_BUSWIDTH_16 },
- [0x56] = { 64, 16, 9, 5, NAND_BUSWIDTH_16 },
-
- [0x78] = { 128, 8, 9, 5, 0 },
- [0x39] = { 128, 8, 9, 5, 0 },
- [0x79] = { 128, 8, 9, 5, 0 },
- [0x72] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 },
- [0x49] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 },
- [0x74] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 },
- [0x59] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 },
-
- [0x71] = { 256, 8, 9, 5, 0 },
-
- /*
- * These are the new chips with large page size. The pagesize and the
- * erasesize is determined from the extended id bytes
- */
-# define LP_OPTIONS (NAND_SAMSUNG_LP | NAND_NO_READRDY | NAND_NO_AUTOINCR)
-# define LP_OPTIONS16 (LP_OPTIONS | NAND_BUSWIDTH_16)
-
- /* 512 Megabit */
- [0xa2] = { 64, 8, 0, 0, LP_OPTIONS },
- [0xf2] = { 64, 8, 0, 0, LP_OPTIONS },
- [0xb2] = { 64, 16, 0, 0, LP_OPTIONS16 },
- [0xc2] = { 64, 16, 0, 0, LP_OPTIONS16 },
-
- /* 1 Gigabit */
- [0xa1] = { 128, 8, 0, 0, LP_OPTIONS },
- [0xf1] = { 128, 8, 0, 0, LP_OPTIONS },
- [0xb1] = { 128, 16, 0, 0, LP_OPTIONS16 },
- [0xc1] = { 128, 16, 0, 0, LP_OPTIONS16 },
-
- /* 2 Gigabit */
- [0xaa] = { 256, 8, 0, 0, LP_OPTIONS },
- [0xda] = { 256, 8, 0, 0, LP_OPTIONS },
- [0xba] = { 256, 16, 0, 0, LP_OPTIONS16 },
- [0xca] = { 256, 16, 0, 0, LP_OPTIONS16 },
-
- /* 4 Gigabit */
- [0xac] = { 512, 8, 0, 0, LP_OPTIONS },
- [0xdc] = { 512, 8, 0, 0, LP_OPTIONS },
- [0xbc] = { 512, 16, 0, 0, LP_OPTIONS16 },
- [0xcc] = { 512, 16, 0, 0, LP_OPTIONS16 },
-
- /* 8 Gigabit */
- [0xa3] = { 1024, 8, 0, 0, LP_OPTIONS },
- [0xd3] = { 1024, 8, 0, 0, LP_OPTIONS },
- [0xb3] = { 1024, 16, 0, 0, LP_OPTIONS16 },
- [0xc3] = { 1024, 16, 0, 0, LP_OPTIONS16 },
-
- /* 16 Gigabit */
- [0xa5] = { 2048, 8, 0, 0, LP_OPTIONS },
- [0xd5] = { 2048, 8, 0, 0, LP_OPTIONS },
- [0xb5] = { 2048, 16, 0, 0, LP_OPTIONS16 },
- [0xc5] = { 2048, 16, 0, 0, LP_OPTIONS16 },
-};
-
-static void nand_reset(DeviceState *dev)
-{
- NANDFlashState *s = NAND(dev);
- s->cmd = NAND_CMD_READ0;
- s->addr = 0;
- s->addrlen = 0;
- s->iolen = 0;
- s->offset = 0;
- s->status &= NAND_IOSTATUS_UNPROTCT;
- s->status |= NAND_IOSTATUS_READY;
-}
-
-static inline void nand_pushio_byte(NANDFlashState *s, uint8_t value)
-{
- s->ioaddr[s->iolen++] = value;
- for (value = s->buswidth; --value;) {
- s->ioaddr[s->iolen++] = 0;
- }
-}
-
-/*
- * nand_load_block: Load block containing (s->addr + @offset).
- * Returns length of data available at @offset in this block.
- */
-static unsigned nand_load_block(NANDFlashState *s, unsigned offset)
-{
- unsigned iolen;
-
- if (!s->blk_load(s, s->addr, offset)) {
- return 0;
- }
-
- iolen = (1 << s->page_shift);
- if (s->gnd) {
- iolen += 1 << s->oob_shift;
- }
- assert(offset <= iolen);
- iolen -= offset;
-
- return iolen;
-}
-
-static void nand_command(NANDFlashState *s)
-{
- switch (s->cmd) {
- case NAND_CMD_READ0:
- s->iolen = 0;
- break;
-
- case NAND_CMD_READID:
- s->ioaddr = s->io;
- s->iolen = 0;
- nand_pushio_byte(s, s->manf_id);
- nand_pushio_byte(s, s->chip_id);
- nand_pushio_byte(s, 'Q'); /* Don't-care byte (often 0xa5) */
- if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) {
- /* Page Size, Block Size, Spare Size; bit 6 indicates
- * 8 vs 16 bit width NAND.
- */
- nand_pushio_byte(s, (s->buswidth == 2) ? 0x55 : 0x15);
- } else {
- nand_pushio_byte(s, 0xc0); /* Multi-plane */
- }
- break;
-
- case NAND_CMD_RANDOMREAD2:
- case NAND_CMD_NOSERIALREAD2:
- if (!(nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP))
- break;
- s->iolen = nand_load_block(s, s->addr & ((1 << s->addr_shift) - 1));
- break;
-
- case NAND_CMD_RESET:
- nand_reset(DEVICE(s));
- break;
-
- case NAND_CMD_PAGEPROGRAM1:
- s->ioaddr = s->io;
- s->iolen = 0;
- break;
-
- case NAND_CMD_PAGEPROGRAM2:
- if (s->wp) {
- s->blk_write(s);
- }
- break;
-
- case NAND_CMD_BLOCKERASE1:
- break;
-
- case NAND_CMD_BLOCKERASE2:
- s->addr &= (1ull << s->addrlen * 8) - 1;
- s->addr <<= nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP ?
- 16 : 8;
-
- if (s->wp) {
- s->blk_erase(s);
- }
- break;
-
- case NAND_CMD_READSTATUS:
- s->ioaddr = s->io;
- s->iolen = 0;
- nand_pushio_byte(s, s->status);
- break;
-
- default:
- printf("%s: Unknown NAND command 0x%02x\n", __func__, s->cmd);
- }
-}
-
-static int nand_pre_save(void *opaque)
-{
- NANDFlashState *s = NAND(opaque);
-
- s->ioaddr_vmstate = s->ioaddr - s->io;
-
- return 0;
-}
-
-static int nand_post_load(void *opaque, int version_id)
-{
- NANDFlashState *s = NAND(opaque);
-
- if (s->ioaddr_vmstate > sizeof(s->io)) {
- return -EINVAL;
- }
- s->ioaddr = s->io + s->ioaddr_vmstate;
-
- return 0;
-}
-
-static const VMStateDescription vmstate_nand = {
- .name = "nand",
- .version_id = 1,
- .minimum_version_id = 1,
- .pre_save = nand_pre_save,
- .post_load = nand_post_load,
- .fields = (const VMStateField[]) {
- VMSTATE_UINT8(cle, NANDFlashState),
- VMSTATE_UINT8(ale, NANDFlashState),
- VMSTATE_UINT8(ce, NANDFlashState),
- VMSTATE_UINT8(wp, NANDFlashState),
- VMSTATE_UINT8(gnd, NANDFlashState),
- VMSTATE_BUFFER(io, NANDFlashState),
- VMSTATE_UINT32(ioaddr_vmstate, NANDFlashState),
- VMSTATE_INT32(iolen, NANDFlashState),
- VMSTATE_UINT32(cmd, NANDFlashState),
- VMSTATE_UINT64(addr, NANDFlashState),
- VMSTATE_INT32(addrlen, NANDFlashState),
- VMSTATE_INT32(status, NANDFlashState),
- VMSTATE_INT32(offset, NANDFlashState),
- /* XXX: do we want to save s->storage too? */
- VMSTATE_END_OF_LIST()
- }
-};
-
-static void nand_realize(DeviceState *dev, Error **errp)
-{
- int pagesize;
- NANDFlashState *s = NAND(dev);
- int ret;
-
-
- s->buswidth = nand_flash_ids[s->chip_id].width >> 3;
- s->size = nand_flash_ids[s->chip_id].size << 20;
- if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) {
- s->page_shift = 11;
- s->erase_shift = 6;
- } else {
- s->page_shift = nand_flash_ids[s->chip_id].page_shift;
- s->erase_shift = nand_flash_ids[s->chip_id].erase_shift;
- }
-
- switch (1 << s->page_shift) {
- case 256:
- nand_init_256(s);
- break;
- case 512:
- nand_init_512(s);
- break;
- case 2048:
- nand_init_2048(s);
- break;
- default:
- error_setg(errp, "Unsupported NAND block size %#x",
- 1 << s->page_shift);
- return;
- }
-
- pagesize = 1 << s->oob_shift;
- s->mem_oob = 1;
- if (s->blk) {
- if (!blk_supports_write_perm(s->blk)) {
- error_setg(errp, "Can't use a read-only drive");
- return;
- }
- ret = blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
- BLK_PERM_ALL, errp);
- if (ret < 0) {
- return;
- }
- if (blk_getlength(s->blk) >=
- (s->pages << s->page_shift) + (s->pages << s->oob_shift)) {
- pagesize = 0;
- s->mem_oob = 0;
- }
- } else {
- pagesize += 1 << s->page_shift;
- }
- if (pagesize) {
- s->storage = (uint8_t *) memset(g_malloc(s->pages * pagesize),
- 0xff, s->pages * pagesize);
- }
- /* Give s->ioaddr a sane value in case we save state before it is used. */
- s->ioaddr = s->io;
-}
-
-static const Property nand_properties[] = {
- DEFINE_PROP_UINT8("manufacturer_id", NANDFlashState, manf_id, 0),
- DEFINE_PROP_UINT8("chip_id", NANDFlashState, chip_id, 0),
- DEFINE_PROP_DRIVE("drive", NANDFlashState, blk),
-};
-
-static void nand_class_init(ObjectClass *klass, const void *data)
-{
- DeviceClass *dc = DEVICE_CLASS(klass);
-
- dc->realize = nand_realize;
- device_class_set_legacy_reset(dc, nand_reset);
- dc->vmsd = &vmstate_nand;
- device_class_set_props(dc, nand_properties);
- set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-}
-
-static const TypeInfo nand_info = {
- .name = TYPE_NAND,
- .parent = TYPE_DEVICE,
- .instance_size = sizeof(NANDFlashState),
- .class_init = nand_class_init,
-};
-
-static void nand_register_types(void)
-{
- type_register_static(&nand_info);
-}
-
-/*
- * Chip inputs are CLE, ALE, CE, WP, GND and eight I/O pins. Chip
- * outputs are R/B and eight I/O pins.
- *
- * CE, WP and R/B are active low.
- */
-void nand_setpins(DeviceState *dev, uint8_t cle, uint8_t ale,
- uint8_t ce, uint8_t wp, uint8_t gnd)
-{
- NANDFlashState *s = NAND(dev);
-
- s->cle = cle;
- s->ale = ale;
- s->ce = ce;
- s->wp = wp;
- s->gnd = gnd;
- if (wp) {
- s->status |= NAND_IOSTATUS_UNPROTCT;
- } else {
- s->status &= ~NAND_IOSTATUS_UNPROTCT;
- }
-}
-
-void nand_getpins(DeviceState *dev, int *rb)
-{
- *rb = 1;
-}
-
-void nand_setio(DeviceState *dev, uint32_t value)
-{
- int i;
- NANDFlashState *s = NAND(dev);
-
- if (!s->ce && s->cle) {
- if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) {
- if (s->cmd == NAND_CMD_READ0 && value == NAND_CMD_LPREAD2)
- return;
- if (value == NAND_CMD_RANDOMREAD1) {
- s->addr &= ~((1 << s->addr_shift) - 1);
- s->addrlen = 0;
- return;
- }
- }
- if (value == NAND_CMD_READ0) {
- s->offset = 0;
- } else if (value == NAND_CMD_READ1) {
- s->offset = 0x100;
- value = NAND_CMD_READ0;
- } else if (value == NAND_CMD_READ2) {
- s->offset = 1 << s->page_shift;
- value = NAND_CMD_READ0;
- }
-
- s->cmd = value;
-
- if (s->cmd == NAND_CMD_READSTATUS ||
- s->cmd == NAND_CMD_PAGEPROGRAM2 ||
- s->cmd == NAND_CMD_BLOCKERASE1 ||
- s->cmd == NAND_CMD_BLOCKERASE2 ||
- s->cmd == NAND_CMD_NOSERIALREAD2 ||
- s->cmd == NAND_CMD_RANDOMREAD2 ||
- s->cmd == NAND_CMD_RESET) {
- nand_command(s);
- }
-
- if (s->cmd != NAND_CMD_RANDOMREAD2) {
- s->addrlen = 0;
- }
- }
-
- if (s->ale) {
- unsigned int shift = s->addrlen * 8;
- uint64_t mask = ~(0xffull << shift);
- uint64_t v = (uint64_t)value << shift;
-
- s->addr = (s->addr & mask) | v;
- s->addrlen ++;
-
- switch (s->addrlen) {
- case 1:
- if (s->cmd == NAND_CMD_READID) {
- nand_command(s);
- }
- break;
- case 2: /* fix cache address as a byte address */
- s->addr <<= (s->buswidth - 1);
- break;
- case 3:
- if (!(nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) &&
- (s->cmd == NAND_CMD_READ0 ||
- s->cmd == NAND_CMD_PAGEPROGRAM1)) {
- nand_command(s);
- }
- break;
- case 4:
- if ((nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) &&
- nand_flash_ids[s->chip_id].size < 256 && /* 1Gb or less */
- (s->cmd == NAND_CMD_READ0 ||
- s->cmd == NAND_CMD_PAGEPROGRAM1)) {
- nand_command(s);
- }
- break;
- case 5:
- if ((nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) &&
- nand_flash_ids[s->chip_id].size >= 256 && /* 2Gb or more */
- (s->cmd == NAND_CMD_READ0 ||
- s->cmd == NAND_CMD_PAGEPROGRAM1)) {
- nand_command(s);
- }
- break;
- default:
- break;
- }
- }
-
- if (!s->cle && !s->ale && s->cmd == NAND_CMD_PAGEPROGRAM1) {
- if (s->iolen < (1 << s->page_shift) + (1 << s->oob_shift)) {
- for (i = s->buswidth; i--; value >>= 8) {
- s->io[s->iolen ++] = (uint8_t) (value & 0xff);
- }
- }
- } else if (!s->cle && !s->ale && s->cmd == NAND_CMD_COPYBACKPRG1) {
- if ((s->addr & ((1 << s->addr_shift) - 1)) <
- (1 << s->page_shift) + (1 << s->oob_shift)) {
- for (i = s->buswidth; i--; s->addr++, value >>= 8) {
- s->io[s->iolen + (s->addr & ((1 << s->addr_shift) - 1))] =
- (uint8_t) (value & 0xff);
- }
- }
- }
-}
-
-uint32_t nand_getio(DeviceState *dev)
-{
- int offset;
- uint32_t x = 0;
- NANDFlashState *s = NAND(dev);
-
- /* Allow sequential reading */
- if (!s->iolen && s->cmd == NAND_CMD_READ0) {
- offset = (int) (s->addr & ((1 << s->addr_shift) - 1)) + s->offset;
- s->offset = 0;
- s->iolen = nand_load_block(s, offset);
- }
-
- if (s->ce || s->iolen <= 0) {
- return 0;
- }
-
- for (offset = s->buswidth; offset--;) {
- x |= s->ioaddr[offset] << (offset << 3);
- }
- /* after receiving READ STATUS command all subsequent reads will
- * return the status register value until another command is issued
- */
- if (s->cmd != NAND_CMD_READSTATUS) {
- s->addr += s->buswidth;
- s->ioaddr += s->buswidth;
- s->iolen -= s->buswidth;
- }
- return x;
-}
-
-uint32_t nand_getbuswidth(DeviceState *dev)
-{
- NANDFlashState *s = (NANDFlashState *) dev;
- return s->buswidth << 3;
-}
-
-DeviceState *nand_init(BlockBackend *blk, int manf_id, int chip_id)
-{
- DeviceState *dev;
-
- if (nand_flash_ids[chip_id].size == 0) {
- hw_error("%s: Unsupported NAND chip ID.\n", __func__);
- }
- dev = qdev_new(TYPE_NAND);
- qdev_prop_set_uint8(dev, "manufacturer_id", manf_id);
- qdev_prop_set_uint8(dev, "chip_id", chip_id);
- if (blk) {
- qdev_prop_set_drive_err(dev, "drive", blk, &error_fatal);
- }
-
- qdev_realize(dev, NULL, &error_fatal);
- return dev;
-}
-
-type_init(nand_register_types)
-
-#else
-
-/* Program a single page */
-static void glue(nand_blk_write_, NAND_PAGE_SIZE)(NANDFlashState *s)
-{
- uint64_t off, page, sector, soff;
- uint8_t iobuf[(PAGE_SECTORS + 2) * 0x200];
- if (PAGE(s->addr) >= s->pages)
- return;
-
- if (!s->blk) {
- mem_and(s->storage + PAGE_START(s->addr) + (s->addr & PAGE_MASK) +
- s->offset, s->io, s->iolen);
- } else if (s->mem_oob) {
- sector = SECTOR(s->addr);
- off = (s->addr & PAGE_MASK) + s->offset;
- soff = SECTOR_OFFSET(s->addr);
- if (blk_pread(s->blk, sector << BDRV_SECTOR_BITS,
- PAGE_SECTORS << BDRV_SECTOR_BITS, iobuf, 0) < 0) {
- printf("%s: read error in sector %" PRIu64 "\n", __func__, sector);
- return;
- }
-
- mem_and(iobuf + (soff | off), s->io, MIN(s->iolen, NAND_PAGE_SIZE - off));
- if (off + s->iolen > NAND_PAGE_SIZE) {
- page = PAGE(s->addr);
- mem_and(s->storage + (page << OOB_SHIFT), s->io + NAND_PAGE_SIZE - off,
- MIN(OOB_SIZE, off + s->iolen - NAND_PAGE_SIZE));
- }
-
- if (blk_pwrite(s->blk, sector << BDRV_SECTOR_BITS,
- PAGE_SECTORS << BDRV_SECTOR_BITS, iobuf, 0) < 0) {
- printf("%s: write error in sector %" PRIu64 "\n", __func__, sector);
- }
- } else {
- off = PAGE_START(s->addr) + (s->addr & PAGE_MASK) + s->offset;
- sector = off >> 9;
- soff = off & 0x1ff;
- if (blk_pread(s->blk, sector << BDRV_SECTOR_BITS,
- (PAGE_SECTORS + 2) << BDRV_SECTOR_BITS, iobuf, 0) < 0) {
- printf("%s: read error in sector %" PRIu64 "\n", __func__, sector);
- return;
- }
-
- mem_and(iobuf + soff, s->io, s->iolen);
-
- if (blk_pwrite(s->blk, sector << BDRV_SECTOR_BITS,
- (PAGE_SECTORS + 2) << BDRV_SECTOR_BITS, iobuf, 0) < 0) {
- printf("%s: write error in sector %" PRIu64 "\n", __func__, sector);
- }
- }
- s->offset = 0;
-}
-
-/* Erase a single block */
-static void glue(nand_blk_erase_, NAND_PAGE_SIZE)(NANDFlashState *s)
-{
- uint64_t i, page, addr;
- uint8_t iobuf[0x200] = { [0 ... 0x1ff] = 0xff, };
- addr = s->addr & ~((1 << (ADDR_SHIFT + s->erase_shift)) - 1);
-
- if (PAGE(addr) >= s->pages) {
- return;
- }
-
- if (!s->blk) {
- memset(s->storage + PAGE_START(addr),
- 0xff, (NAND_PAGE_SIZE + OOB_SIZE) << s->erase_shift);
- } else if (s->mem_oob) {
- memset(s->storage + (PAGE(addr) << OOB_SHIFT),
- 0xff, OOB_SIZE << s->erase_shift);
- i = SECTOR(addr);
- page = SECTOR(addr + (1 << (ADDR_SHIFT + s->erase_shift)));
- for (; i < page; i ++)
- if (blk_pwrite(s->blk, i << BDRV_SECTOR_BITS,
- BDRV_SECTOR_SIZE, iobuf, 0) < 0) {
- printf("%s: write error in sector %" PRIu64 "\n", __func__, i);
- }
- } else {
- addr = PAGE_START(addr);
- page = addr >> 9;
- if (blk_pread(s->blk, page << BDRV_SECTOR_BITS,
- BDRV_SECTOR_SIZE, iobuf, 0) < 0) {
- printf("%s: read error in sector %" PRIu64 "\n", __func__, page);
- }
- memset(iobuf + (addr & 0x1ff), 0xff, (~addr & 0x1ff) + 1);
- if (blk_pwrite(s->blk, page << BDRV_SECTOR_BITS,
- BDRV_SECTOR_SIZE, iobuf, 0) < 0) {
- printf("%s: write error in sector %" PRIu64 "\n", __func__, page);
- }
-
- memset(iobuf, 0xff, 0x200);
- i = (addr & ~0x1ff) + 0x200;
- for (addr += ((NAND_PAGE_SIZE + OOB_SIZE) << s->erase_shift) - 0x200;
- i < addr; i += 0x200) {
- if (blk_pwrite(s->blk, i, BDRV_SECTOR_SIZE, iobuf, 0) < 0) {
- printf("%s: write error in sector %" PRIu64 "\n",
- __func__, i >> 9);
- }
- }
-
- page = i >> 9;
- if (blk_pread(s->blk, page << BDRV_SECTOR_BITS,
- BDRV_SECTOR_SIZE, iobuf, 0) < 0) {
- printf("%s: read error in sector %" PRIu64 "\n", __func__, page);
- }
- memset(iobuf, 0xff, ((addr - 1) & 0x1ff) + 1);
- if (blk_pwrite(s->blk, page << BDRV_SECTOR_BITS,
- BDRV_SECTOR_SIZE, iobuf, 0) < 0) {
- printf("%s: write error in sector %" PRIu64 "\n", __func__, page);
- }
- }
-}
-
-static bool glue(nand_blk_load_, NAND_PAGE_SIZE)(NANDFlashState *s,
- uint64_t addr, unsigned offset)
-{
- if (PAGE(addr) >= s->pages) {
- return false;
- }
-
- if (offset > NAND_PAGE_SIZE + OOB_SIZE) {
- return false;
- }
-
- if (s->blk) {
- if (s->mem_oob) {
- if (blk_pread(s->blk, SECTOR(addr) << BDRV_SECTOR_BITS,
- PAGE_SECTORS << BDRV_SECTOR_BITS, s->io, 0) < 0) {
- printf("%s: read error in sector %" PRIu64 "\n",
- __func__, SECTOR(addr));
- }
- memcpy(s->io + SECTOR_OFFSET(s->addr) + NAND_PAGE_SIZE,
- s->storage + (PAGE(s->addr) << OOB_SHIFT),
- OOB_SIZE);
- s->ioaddr = s->io + SECTOR_OFFSET(s->addr) + offset;
- } else {
- if (blk_pread(s->blk, PAGE_START(addr),
- (PAGE_SECTORS + 2) << BDRV_SECTOR_BITS, s->io, 0)
- < 0) {
- printf("%s: read error in sector %" PRIu64 "\n",
- __func__, PAGE_START(addr) >> 9);
- }
- s->ioaddr = s->io + (PAGE_START(addr) & 0x1ff) + offset;
- }
- } else {
- memcpy(s->io, s->storage + PAGE_START(s->addr) +
- offset, NAND_PAGE_SIZE + OOB_SIZE - offset);
- s->ioaddr = s->io;
- }
-
- return true;
-}
-
-static void glue(nand_init_, NAND_PAGE_SIZE)(NANDFlashState *s)
-{
- s->oob_shift = PAGE_SHIFT - 5;
- s->pages = s->size >> PAGE_SHIFT;
- s->addr_shift = ADDR_SHIFT;
-
- s->blk_erase = glue(nand_blk_erase_, NAND_PAGE_SIZE);
- s->blk_write = glue(nand_blk_write_, NAND_PAGE_SIZE);
- s->blk_load = glue(nand_blk_load_, NAND_PAGE_SIZE);
-}
-
-# undef NAND_PAGE_SIZE
-# undef PAGE_SHIFT
-# undef PAGE_SECTORS
-# undef ADDR_SHIFT
-#endif /* NAND_IO */
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 0eebbcd..c0cc5f6 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -210,6 +210,7 @@ static int vhost_user_blk_stop(VirtIODevice *vdev)
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
int ret;
+ bool force_stop = false;
if (!s->started_vu) {
return 0;
@@ -220,7 +221,11 @@ static int vhost_user_blk_stop(VirtIODevice *vdev)
return 0;
}
- ret = vhost_dev_stop(&s->dev, vdev, true);
+ force_stop = s->skip_get_vring_base_on_force_shutdown &&
+ qemu_force_shutdown_requested();
+
+ ret = force_stop ? vhost_dev_force_stop(&s->dev, vdev, true) :
+ vhost_dev_stop(&s->dev, vdev, true);
if (k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false) < 0) {
error_report("vhost guest notifier cleanup failed: %d", ret);
@@ -584,6 +589,8 @@ static const Property vhost_user_blk_properties[] = {
VIRTIO_BLK_F_DISCARD, true),
DEFINE_PROP_BIT64("write-zeroes", VHostUserBlk, parent_obj.host_features,
VIRTIO_BLK_F_WRITE_ZEROES, true),
+ DEFINE_PROP_BOOL("skip-get-vring-base-on-force-shutdown", VHostUserBlk,
+ skip_get_vring_base_on_force_shutdown, false),
};
static void vhost_user_blk_class_init(ObjectClass *klass, const void *data)
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 9bab271..64efce4 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -62,11 +62,7 @@ void virtio_blk_req_complete(VirtIOBlockReq *req, unsigned char status)
iov_discard_undo(&req->inhdr_undo);
iov_discard_undo(&req->outhdr_undo);
virtqueue_push(req->vq, &req->elem, req->in_len);
- if (qemu_in_iothread()) {
- virtio_notify_irqfd(vdev, req->vq);
- } else {
- virtio_notify(vdev, req->vq);
- }
+ virtio_notify(vdev, req->vq);
}
static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
diff --git a/hw/char/Kconfig b/hw/char/Kconfig
index 9d517f3..020c0a8 100644
--- a/hw/char/Kconfig
+++ b/hw/char/Kconfig
@@ -48,6 +48,9 @@ config VIRTIO_SERIAL
default y
depends on VIRTIO
+config MAX78000_UART
+ bool
+
config STM32F2XX_USART
bool
diff --git a/hw/char/max78000_uart.c b/hw/char/max78000_uart.c
new file mode 100644
index 0000000..c76c0e7
--- /dev/null
+++ b/hw/char/max78000_uart.c
@@ -0,0 +1,292 @@
+/*
+ * MAX78000 UART
+ *
+ * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/char/max78000_uart.h"
+#include "hw/irq.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "migration/vmstate.h"
+#include "trace.h"
+
+
+static int max78000_uart_can_receive(void *opaque)
+{
+ Max78000UartState *s = opaque;
+ if (!(s->ctrl & UART_BCLKEN)) {
+ return 0;
+ }
+ return fifo8_num_free(&s->rx_fifo);
+}
+
+static void max78000_update_irq(Max78000UartState *s)
+{
+ int interrupt_level;
+
+ interrupt_level = s->int_fl & s->int_en;
+ qemu_set_irq(s->irq, interrupt_level);
+}
+
+static void max78000_uart_receive(void *opaque, const uint8_t *buf, int size)
+{
+ Max78000UartState *s = opaque;
+
+ assert(size <= fifo8_num_free(&s->rx_fifo));
+
+ fifo8_push_all(&s->rx_fifo, buf, size);
+
+ uint32_t rx_threshold = s->ctrl & 0xf;
+
+ if (fifo8_num_used(&s->rx_fifo) >= rx_threshold) {
+ s->int_fl |= UART_RX_THD;
+ }
+
+ max78000_update_irq(s);
+}
+
+static void max78000_uart_reset_hold(Object *obj, ResetType type)
+{
+ Max78000UartState *s = MAX78000_UART(obj);
+
+ s->ctrl = 0;
+ s->status = UART_TX_EM | UART_RX_EM;
+ s->int_en = 0;
+ s->int_fl = 0;
+ s->osr = 0;
+ s->txpeek = 0;
+ s->pnr = UART_RTS;
+ s->fifo = 0;
+ s->dma = 0;
+ s->wken = 0;
+ s->wkfl = 0;
+ fifo8_reset(&s->rx_fifo);
+}
+
+static uint64_t max78000_uart_read(void *opaque, hwaddr addr,
+ unsigned int size)
+{
+ Max78000UartState *s = opaque;
+ uint64_t retvalue = 0;
+ switch (addr) {
+ case UART_CTRL:
+ retvalue = s->ctrl;
+ break;
+ case UART_STATUS:
+ retvalue = (fifo8_num_used(&s->rx_fifo) << UART_RX_LVL) |
+ UART_TX_EM |
+ (fifo8_is_empty(&s->rx_fifo) ? UART_RX_EM : 0);
+ break;
+ case UART_INT_EN:
+ retvalue = s->int_en;
+ break;
+ case UART_INT_FL:
+ retvalue = s->int_fl;
+ break;
+ case UART_CLKDIV:
+ retvalue = s->clkdiv;
+ break;
+ case UART_OSR:
+ retvalue = s->osr;
+ break;
+ case UART_TXPEEK:
+ if (!fifo8_is_empty(&s->rx_fifo)) {
+ retvalue = fifo8_peek(&s->rx_fifo);
+ }
+ break;
+ case UART_PNR:
+ retvalue = s->pnr;
+ break;
+ case UART_FIFO:
+ if (!fifo8_is_empty(&s->rx_fifo)) {
+ retvalue = fifo8_pop(&s->rx_fifo);
+ max78000_update_irq(s);
+ }
+ break;
+ case UART_DMA:
+ /* DMA not implemented */
+ retvalue = s->dma;
+ break;
+ case UART_WKEN:
+ retvalue = s->wken;
+ break;
+ case UART_WKFL:
+ retvalue = s->wkfl;
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Bad offset 0x%"HWADDR_PRIx"\n", __func__, addr);
+ break;
+ }
+
+ return retvalue;
+}
+
+static void max78000_uart_write(void *opaque, hwaddr addr,
+ uint64_t val64, unsigned int size)
+{
+ Max78000UartState *s = opaque;
+
+ uint32_t value = val64;
+ uint8_t data;
+
+ switch (addr) {
+ case UART_CTRL:
+ if (value & UART_FLUSH_RX) {
+ fifo8_reset(&s->rx_fifo);
+ }
+ if (value & UART_BCLKEN) {
+ value = value | UART_BCLKRDY;
+ }
+ s->ctrl = value & ~(UART_FLUSH_RX | UART_FLUSH_TX);
+
+ /*
+ * Software can manage UART flow control manually by setting hfc_en
+ * in UART_CTRL. This would require emulating uart at a lower level,
+ * and is currently unimplemented.
+ */
+
+ return;
+ case UART_STATUS:
+ /* UART_STATUS is read only */
+ return;
+ case UART_INT_EN:
+ s->int_en = value;
+ return;
+ case UART_INT_FL:
+ s->int_fl = s->int_fl & ~(value);
+ max78000_update_irq(s);
+ return;
+ case UART_CLKDIV:
+ s->clkdiv = value;
+ return;
+ case UART_OSR:
+ s->osr = value;
+ return;
+ case UART_PNR:
+ s->pnr = value;
+ return;
+ case UART_FIFO:
+ data = value & 0xff;
+ /*
+ * XXX this blocks entire thread. Rewrite to use
+ * qemu_chr_fe_write and background I/O callbacks
+ */
+ qemu_chr_fe_write_all(&s->chr, &data, 1);
+
+ /* TX is always empty */
+ s->int_fl |= UART_TX_HE;
+ max78000_update_irq(s);
+
+ return;
+ case UART_DMA:
+ /* DMA not implemented */
+ s->dma = value;
+ return;
+ case UART_WKEN:
+ s->wken = value;
+ return;
+ case UART_WKFL:
+ s->wkfl = value;
+ return;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%"
+ HWADDR_PRIx "\n", __func__, addr);
+ }
+}
+
+static const MemoryRegionOps max78000_uart_ops = {
+ .read = max78000_uart_read,
+ .write = max78000_uart_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid.min_access_size = 4,
+ .valid.max_access_size = 4,
+};
+
+static const Property max78000_uart_properties[] = {
+ DEFINE_PROP_CHR("chardev", Max78000UartState, chr),
+};
+
+static const VMStateDescription max78000_uart_vmstate = {
+ .name = TYPE_MAX78000_UART,
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(ctrl, Max78000UartState),
+ VMSTATE_UINT32(status, Max78000UartState),
+ VMSTATE_UINT32(int_en, Max78000UartState),
+ VMSTATE_UINT32(int_fl, Max78000UartState),
+ VMSTATE_UINT32(clkdiv, Max78000UartState),
+ VMSTATE_UINT32(osr, Max78000UartState),
+ VMSTATE_UINT32(txpeek, Max78000UartState),
+ VMSTATE_UINT32(pnr, Max78000UartState),
+ VMSTATE_UINT32(fifo, Max78000UartState),
+ VMSTATE_UINT32(dma, Max78000UartState),
+ VMSTATE_UINT32(wken, Max78000UartState),
+ VMSTATE_UINT32(wkfl, Max78000UartState),
+ VMSTATE_FIFO8(rx_fifo, Max78000UartState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void max78000_uart_init(Object *obj)
+{
+ Max78000UartState *s = MAX78000_UART(obj);
+ fifo8_create(&s->rx_fifo, 8);
+
+ sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq);
+
+ memory_region_init_io(&s->mmio, obj, &max78000_uart_ops, s,
+ TYPE_MAX78000_UART, 0x400);
+ sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio);
+}
+
+static void max78000_uart_finalize(Object *obj)
+{
+ Max78000UartState *s = MAX78000_UART(obj);
+ fifo8_destroy(&s->rx_fifo);
+}
+
+static void max78000_uart_realize(DeviceState *dev, Error **errp)
+{
+ Max78000UartState *s = MAX78000_UART(dev);
+
+ qemu_chr_fe_set_handlers(&s->chr, max78000_uart_can_receive,
+ max78000_uart_receive, NULL, NULL,
+ s, NULL, true);
+}
+
+static void max78000_uart_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ ResettableClass *rc = RESETTABLE_CLASS(klass);
+
+ rc->phases.hold = max78000_uart_reset_hold;
+
+ device_class_set_props(dc, max78000_uart_properties);
+ dc->realize = max78000_uart_realize;
+
+ dc->vmsd = &max78000_uart_vmstate;
+}
+
+static const TypeInfo max78000_uart_info = {
+ .name = TYPE_MAX78000_UART,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(Max78000UartState),
+ .instance_init = max78000_uart_init,
+ .instance_finalize = max78000_uart_finalize,
+ .class_init = max78000_uart_class_init,
+};
+
+static void max78000_uart_register_types(void)
+{
+ type_register_static(&max78000_uart_info);
+}
+
+type_init(max78000_uart_register_types)
diff --git a/hw/char/meson.build b/hw/char/meson.build
index 4e439da..a9e1dc2 100644
--- a/hw/char/meson.build
+++ b/hw/char/meson.build
@@ -26,6 +26,7 @@ system_ss.add(when: 'CONFIG_AVR_USART', if_true: files('avr_usart.c'))
system_ss.add(when: 'CONFIG_COLDFIRE', if_true: files('mcf_uart.c'))
system_ss.add(when: 'CONFIG_DIGIC', if_true: files('digic-uart.c'))
system_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210_uart.c'))
+system_ss.add(when: 'CONFIG_MAX78000_UART', if_true: files('max78000_uart.c'))
system_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_uart.c'))
system_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_aux.c'))
system_ss.add(when: 'CONFIG_RENESAS_SCI', if_true: files('renesas_sci.c'))
diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c
index c884be5..a78ea9b 100644
--- a/hw/char/riscv_htif.c
+++ b/hw/char/riscv_htif.c
@@ -29,7 +29,6 @@
#include "qemu/timer.h"
#include "qemu/error-report.h"
#include "system/address-spaces.h"
-#include "exec/tswap.h"
#include "system/dma.h"
#include "system/runstate.h"
#include "trace.h"
diff --git a/hw/char/sclpconsole-lm.c b/hw/char/sclpconsole-lm.c
index e9580aa..3e40d5e 100644
--- a/hw/char/sclpconsole-lm.c
+++ b/hw/char/sclpconsole-lm.c
@@ -214,7 +214,7 @@ static int process_mdb(SCLPEvent *event, MDBO *mdbo)
{
int rc;
int len;
- uint8_t buffer[SIZE_BUFFER];
+ QEMU_UNINITIALIZED uint8_t buffer[SIZE_BUFFER];
len = be16_to_cpu(mdbo->length);
len -= sizeof(mdbo->length) + sizeof(mdbo->type)
diff --git a/hw/char/serial-pci-multi.c b/hw/char/serial-pci-multi.c
index 13df272..34f30fb 100644
--- a/hw/char/serial-pci-multi.c
+++ b/hw/char/serial-pci-multi.c
@@ -180,7 +180,8 @@ static void multi_serial_init(Object *o)
size_t i, nports = multi_serial_get_port_count(PCI_DEVICE_GET_CLASS(dev));
for (i = 0; i < nports; i++) {
- qemu_init_irq(&pms->irqs[i], multi_serial_irq_mux, pms, i);
+ qemu_init_irq_child(o, "irq[*]", &pms->irqs[i],
+ multi_serial_irq_mux, pms, i);
object_initialize_child(o, "serial[*]", &pms->state[i], TYPE_SERIAL);
}
}
diff --git a/hw/char/sh_serial.c b/hw/char/sh_serial.c
index 6abd803..30447fa 100644
--- a/hw/char/sh_serial.c
+++ b/hw/char/sh_serial.c
@@ -78,10 +78,6 @@ struct SHSerialState {
qemu_irq bri;
};
-typedef struct {} SHSerialStateClass;
-
-OBJECT_DEFINE_TYPE(SHSerialState, sh_serial, SH_SERIAL, SYS_BUS_DEVICE)
-
static void sh_serial_clear_fifo(SHSerialState *s)
{
memset(s->rx_fifo, 0, SH_RX_FIFO_LENGTH);
@@ -434,17 +430,13 @@ static void sh_serial_realize(DeviceState *d, Error **errp)
s->etu = NANOSECONDS_PER_SECOND / 9600;
}
-static void sh_serial_finalize(Object *obj)
+static void sh_serial_unrealize(DeviceState *dev)
{
- SHSerialState *s = SH_SERIAL(obj);
+ SHSerialState *s = SH_SERIAL(dev);
timer_del(&s->fifo_timeout_timer);
}
-static void sh_serial_init(Object *obj)
-{
-}
-
static const Property sh_serial_properties[] = {
DEFINE_PROP_CHR("chardev", SHSerialState, chr),
DEFINE_PROP_UINT8("features", SHSerialState, feat, 0),
@@ -456,7 +448,19 @@ static void sh_serial_class_init(ObjectClass *oc, const void *data)
device_class_set_props(dc, sh_serial_properties);
dc->realize = sh_serial_realize;
+ dc->unrealize = sh_serial_unrealize;
device_class_set_legacy_reset(dc, sh_serial_reset);
/* Reason: part of SuperH CPU/SoC, needs to be wired up */
dc->user_creatable = false;
}
+
+static const TypeInfo sh_serial_types[] = {
+ {
+ .name = TYPE_SH_SERIAL,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(SHSerialState),
+ .class_init = sh_serial_class_init,
+ },
+};
+
+DEFINE_TYPES(sh_serial_types)
diff --git a/hw/char/sifive_uart.c b/hw/char/sifive_uart.c
index 0fc89e7..e7357d5 100644
--- a/hw/char/sifive_uart.c
+++ b/hw/char/sifive_uart.c
@@ -28,23 +28,18 @@
#define TX_INTERRUPT_TRIGGER_DELAY_NS 100
-/*
- * Not yet implemented:
- *
- * Transmit FIFO using "qemu/fifo8.h"
- */
-
/* Returns the state of the IP (interrupt pending) register */
-static uint64_t sifive_uart_ip(SiFiveUARTState *s)
+static uint32_t sifive_uart_ip(SiFiveUARTState *s)
{
- uint64_t ret = 0;
+ uint32_t ret = 0;
- uint64_t txcnt = SIFIVE_UART_GET_TXCNT(s->txctrl);
- uint64_t rxcnt = SIFIVE_UART_GET_RXCNT(s->rxctrl);
+ uint32_t txcnt = SIFIVE_UART_GET_TXCNT(s->txctrl);
+ uint32_t rxcnt = SIFIVE_UART_GET_RXCNT(s->rxctrl);
- if (txcnt != 0) {
+ if (fifo8_num_used(&s->tx_fifo) < txcnt) {
ret |= SIFIVE_UART_IP_TXWM;
}
+
if (s->rx_fifo_len > rxcnt) {
ret |= SIFIVE_UART_IP_RXWM;
}
@@ -55,15 +50,14 @@ static uint64_t sifive_uart_ip(SiFiveUARTState *s)
static void sifive_uart_update_irq(SiFiveUARTState *s)
{
int cond = 0;
- if ((s->ie & SIFIVE_UART_IE_TXWM) ||
- ((s->ie & SIFIVE_UART_IE_RXWM) && s->rx_fifo_len)) {
+ uint32_t ip = sifive_uart_ip(s);
+
+ if (((ip & SIFIVE_UART_IP_TXWM) && (s->ie & SIFIVE_UART_IE_TXWM)) ||
+ ((ip & SIFIVE_UART_IP_RXWM) && (s->ie & SIFIVE_UART_IE_RXWM))) {
cond = 1;
}
- if (cond) {
- qemu_irq_raise(s->irq);
- } else {
- qemu_irq_lower(s->irq);
- }
+
+ qemu_set_irq(s->irq, cond);
}
static gboolean sifive_uart_xmit(void *do_not_use, GIOCondition cond,
@@ -119,17 +113,21 @@ static void sifive_uart_write_tx_fifo(SiFiveUARTState *s, const uint8_t *buf,
if (size > fifo8_num_free(&s->tx_fifo)) {
size = fifo8_num_free(&s->tx_fifo);
- qemu_log_mask(LOG_GUEST_ERROR, "sifive_uart: TX FIFO overflow");
+ qemu_log_mask(LOG_GUEST_ERROR, "sifive_uart: TX FIFO overflow.\n");
}
- fifo8_push_all(&s->tx_fifo, buf, size);
+ if (size > 0) {
+ fifo8_push_all(&s->tx_fifo, buf, size);
+ }
if (fifo8_is_full(&s->tx_fifo)) {
s->txfifo |= SIFIVE_UART_TXFIFO_FULL;
}
- timer_mod(s->fifo_trigger_handle, current_time +
- TX_INTERRUPT_TRIGGER_DELAY_NS);
+ if (!timer_pending(s->fifo_trigger_handle)) {
+ timer_mod(s->fifo_trigger_handle, current_time +
+ TX_INTERRUPT_TRIGGER_DELAY_NS);
+ }
}
static uint64_t
diff --git a/hw/char/trace-events b/hw/char/trace-events
index 05a3303..9e74be2 100644
--- a/hw/char/trace-events
+++ b/hw/char/trace-events
@@ -58,15 +58,15 @@ imx_serial_write(const char *chrname, uint64_t addr, uint64_t value) "%s:[0x%03"
imx_serial_put_data(const char *chrname, uint32_t value) "%s: 0x%" PRIx32
# pl011.c
-pl011_irq_state(int level) "irq state %d"
-pl011_read(uint32_t addr, uint32_t value, const char *regname) "addr 0x%03x value 0x%08x reg %s"
-pl011_read_fifo(unsigned rx_fifo_used, size_t rx_fifo_depth) "RX FIFO read, used %u/%zu"
-pl011_write(uint32_t addr, uint32_t value, const char *regname) "addr 0x%03x value 0x%08x reg %s"
-pl011_can_receive(uint32_t lcr, unsigned rx_fifo_used, size_t rx_fifo_depth, unsigned rx_fifo_available) "LCR 0x%02x, RX FIFO used %u/%zu, can_receive %u chars"
-pl011_fifo_rx_put(uint32_t c, unsigned read_count, size_t rx_fifo_depth) "RX FIFO push char [0x%02x] %d/%zu depth used"
+pl011_irq_state(bool level) "irq state %d"
+pl011_read(uint64_t addr, uint32_t value, const char *regname) "addr 0x%03" PRIx64 " value 0x%08x reg %s"
+pl011_read_fifo(unsigned rx_fifo_used, unsigned rx_fifo_depth) "RX FIFO read, used %u/%u"
+pl011_write(uint64_t addr, uint32_t value, const char *regname) "addr 0x%03" PRIx64 " value 0x%08x reg %s"
+pl011_can_receive(uint32_t lcr, unsigned rx_fifo_used, unsigned rx_fifo_depth, unsigned rx_fifo_available) "LCR 0x%02x, RX FIFO used %u/%u, can_receive %u chars"
+pl011_fifo_rx_put(uint32_t c, unsigned read_count, unsigned rx_fifo_depth) "RX FIFO push char [0x%02x] %d/%u depth used"
pl011_fifo_rx_full(void) "RX FIFO now full, RXFF set"
pl011_baudrate_change(unsigned int baudrate, uint64_t clock, uint32_t ibrd, uint32_t fbrd) "new baudrate %u (clk: %" PRIu64 "hz, ibrd: %" PRIu32 ", fbrd: %" PRIu32 ")"
-pl011_receive(int size) "recv %d chars"
+pl011_receive(size_t size) "recv %zd chars"
# cmsdk-apb-uart.c
cmsdk_apb_uart_read(uint64_t offset, uint64_t data, unsigned size) "CMSDK APB UART read: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
index 39e674a..8c306c8 100644
--- a/hw/core/cpu-common.c
+++ b/hw/core/cpu-common.c
@@ -67,27 +67,16 @@ CPUState *cpu_create(const char *typename)
return cpu;
}
-/* Resetting the IRQ comes from across the code base so we take the
- * BQL here if we need to. cpu_interrupt assumes it is held.*/
void cpu_reset_interrupt(CPUState *cpu, int mask)
{
- bool need_lock = !bql_locked();
-
- if (need_lock) {
- bql_lock();
- }
- cpu->interrupt_request &= ~mask;
- if (need_lock) {
- bql_unlock();
- }
+ qatomic_and(&cpu->interrupt_request, ~mask);
}
void cpu_exit(CPUState *cpu)
{
- qatomic_set(&cpu->exit_request, 1);
- /* Ensure cpu_exec will see the exit request after TCG has exited. */
- smp_wmb();
- qatomic_set(&cpu->neg.icount_decr.u16.high, -1);
+ /* Ensure cpu_exec will see the reason why the exit request was set. */
+ qatomic_store_release(&cpu->exit_request, true);
+ qemu_cpu_kick(cpu);
}
static int cpu_common_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg)
@@ -119,11 +108,6 @@ static void cpu_common_reset_hold(Object *obj, ResetType type)
{
CPUState *cpu = CPU(obj);
- if (qemu_loglevel_mask(CPU_LOG_RESET)) {
- qemu_log("CPU Reset (CPU %d)\n", cpu->cpu_index);
- log_cpu_state(cpu, cpu->cc->reset_dump_flags);
- }
-
cpu->interrupt_request = 0;
cpu->halted = cpu->start_powered_off;
cpu->mem_io_pc = 0;
@@ -137,6 +121,21 @@ static void cpu_common_reset_hold(Object *obj, ResetType type)
cpu_exec_reset_hold(cpu);
}
+static void cpu_common_reset_exit(Object *obj, ResetType type)
+{
+ if (qemu_loglevel_mask(CPU_LOG_RESET)) {
+ FILE *f = qemu_log_trylock();
+
+ if (f) {
+ CPUState *cpu = CPU(obj);
+
+ fprintf(f, "CPU Reset (CPU %d)\n", cpu->cpu_index);
+ cpu_dump_state(cpu, f, cpu->cc->reset_dump_flags);
+ qemu_log_unlock(f);
+ }
+ }
+}
+
ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model)
{
ObjectClass *oc;
@@ -295,6 +294,7 @@ void cpu_exec_unrealizefn(CPUState *cpu)
* accel_cpu_common_unrealize, which may free fields using call_rcu.
*/
accel_cpu_common_unrealize(cpu);
+ cpu_destroy_address_spaces(cpu);
}
static void cpu_common_initfn(Object *obj)
@@ -380,6 +380,7 @@ static void cpu_common_class_init(ObjectClass *klass, const void *data)
dc->realize = cpu_common_realizefn;
dc->unrealize = cpu_common_unrealizefn;
rc->phases.hold = cpu_common_reset_hold;
+ rc->phases.exit = cpu_common_reset_exit;
cpu_class_init_props(dc);
/*
* Reason: CPUs still need special care by board code: wiring up
diff --git a/hw/core/cpu-system.c b/hw/core/cpu-system.c
index 3c84176..f601a08 100644
--- a/hw/core/cpu-system.c
+++ b/hw/core/cpu-system.c
@@ -23,8 +23,7 @@
#include "system/address-spaces.h"
#include "exec/cputlb.h"
#include "system/memory.h"
-#include "exec/tb-flush.h"
-#include "exec/tswap.h"
+#include "qemu/target-info.h"
#include "hw/qdev-core.h"
#include "hw/qdev-properties.h"
#include "hw/core/sysemu-cpu-ops.h"
@@ -204,17 +203,9 @@ static int cpu_common_post_load(void *opaque, int version_id)
* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
* version_id is increased.
*/
- cpu->interrupt_request &= ~0x01;
+ cpu_reset_interrupt(cpu, 0x01);
tlb_flush(cpu);
-
- /*
- * loadvm has just updated the content of RAM, bypassing the
- * usual mechanisms that ensure we flush TBs for writes to
- * memory we've translated code from. So we must flush all TBs,
- * which will now be stale.
- */
- tb_flush(cpu);
}
return 0;
diff --git a/hw/core/irq.c b/hw/core/irq.c
index 6dd8d47..0c768f7 100644
--- a/hw/core/irq.c
+++ b/hw/core/irq.c
@@ -49,6 +49,14 @@ void qemu_init_irq(IRQState *irq, qemu_irq_handler handler, void *opaque,
init_irq_fields(irq, handler, opaque, n);
}
+void qemu_init_irq_child(Object *parent, const char *propname,
+ IRQState *irq, qemu_irq_handler handler,
+ void *opaque, int n)
+{
+ object_initialize_child(parent, propname, irq, TYPE_IRQ);
+ init_irq_fields(irq, handler, opaque, n);
+}
+
void qemu_init_irqs(IRQState irq[], size_t count,
qemu_irq_handler handler, void *opaque)
{
diff --git a/hw/core/loader.c b/hw/core/loader.c
index b792a54..477661a 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -295,10 +295,6 @@ static void *load_at(int fd, off_t offset, size_t size)
return ptr;
}
-#ifdef ELF_CLASS
-#undef ELF_CLASS
-#endif
-
#define ELF_CLASS ELFCLASS32
#include "elf.h"
@@ -1246,7 +1242,7 @@ static void rom_reset(void *unused)
* that the instruction cache for that new region is clear, so that the
* CPU definitely fetches its instructions from the just written data.
*/
- cpu_flush_icache_range(rom->addr, rom->datasize);
+ address_space_flush_icache_range(rom->as, rom->addr, rom->datasize);
trace_loader_write_rom(rom->name, rom->addr, rom->datasize, rom->isrom);
}
@@ -1333,20 +1329,6 @@ void rom_set_fw(FWCfgState *f)
fw_cfg = f;
}
-void rom_set_order_override(int order)
-{
- if (!fw_cfg)
- return;
- fw_cfg_set_order_override(fw_cfg, order);
-}
-
-void rom_reset_order_override(void)
-{
- if (!fw_cfg)
- return;
- fw_cfg_reset_order_override(fw_cfg);
-}
-
void rom_transaction_begin(void)
{
Rom *rom;
diff --git a/hw/core/machine-hmp-cmds.c b/hw/core/machine-hmp-cmds.c
index c6325cd..74a5660 100644
--- a/hw/core/machine-hmp-cmds.c
+++ b/hw/core/machine-hmp-cmds.c
@@ -18,6 +18,7 @@
#include "monitor/monitor.h"
#include "qapi/error.h"
#include "qapi/qapi-builtin-visit.h"
+#include "qapi/qapi-commands-accelerator.h"
#include "qapi/qapi-commands-machine.h"
#include "qobject/qdict.h"
#include "qapi/string-output-visitor.h"
@@ -32,6 +33,7 @@ void hmp_info_cpus(Monitor *mon, const QDict *qdict)
cpu_list = qmp_query_cpus_fast(NULL);
for (cpu = cpu_list; cpu; cpu = cpu->next) {
+ g_autofree char *cpu_model = cpu_model_from_type(cpu->value->qom_type);
int active = ' ';
if (cpu->value->cpu_index == monitor_get_cpu_index(mon)) {
@@ -40,7 +42,8 @@ void hmp_info_cpus(Monitor *mon, const QDict *qdict)
monitor_printf(mon, "%c CPU #%" PRId64 ":", active,
cpu->value->cpu_index);
- monitor_printf(mon, " thread_id=%" PRId64 "\n", cpu->value->thread_id);
+ monitor_printf(mon, " thread_id=%" PRId64 " model=%s\n",
+ cpu->value->thread_id, cpu_model);
}
qapi_free_CpuInfoFastList(cpu_list);
@@ -160,6 +163,24 @@ void hmp_info_kvm(Monitor *mon, const QDict *qdict)
qapi_free_KvmInfo(info);
}
+void hmp_info_accelerators(Monitor *mon, const QDict *qdict)
+{
+ AcceleratorInfo *info;
+ AcceleratorList *accel;
+
+ info = qmp_query_accelerators(NULL);
+ for (accel = info->present; accel; accel = accel->next) {
+ char trail = accel->next ? ' ' : '\n';
+ if (info->enabled == accel->value) {
+ monitor_printf(mon, "[%s]%c", Accelerator_str(accel->value), trail);
+ } else {
+ monitor_printf(mon, "%s%c", Accelerator_str(accel->value), trail);
+ }
+ }
+
+ qapi_free_AcceleratorInfo(info);
+}
+
void hmp_info_uuid(Monitor *mon, const QDict *qdict)
{
UuidInfo *info;
diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c
index d82043e..51d5c23 100644
--- a/hw/core/machine-qmp-cmds.c
+++ b/hw/core/machine-qmp-cmds.c
@@ -14,12 +14,13 @@
#include "hw/mem/memory-device.h"
#include "qapi/error.h"
#include "qapi/qapi-builtin-visit.h"
+#include "qapi/qapi-commands-accelerator.h"
#include "qapi/qapi-commands-machine.h"
#include "qobject/qobject.h"
#include "qapi/qobject-input-visitor.h"
#include "qapi/type-helpers.h"
#include "qemu/uuid.h"
-#include "qemu/target-info.h"
+#include "qemu/target-info-qapi.h"
#include "qom/qom-qobject.h"
#include "system/hostmem.h"
#include "system/hw_accel.h"
@@ -27,6 +28,30 @@
#include "system/runstate.h"
#include "system/system.h"
#include "hw/s390x/storage-keys.h"
+#include <sys/stat.h>
+
+/*
+ * QMP query for enabled and present accelerators
+ */
+AcceleratorInfo *qmp_query_accelerators(Error **errp)
+{
+ AcceleratorInfo *info = g_malloc0(sizeof(*info));
+ AccelClass *current_class = ACCEL_GET_CLASS(current_accel());
+ int i;
+
+ for (i = ACCELERATOR__MAX; i-- > 0; ) {
+ const char *s = Accelerator_str(i);
+ AccelClass *this_class = accel_find(s);
+
+ if (this_class) {
+ QAPI_LIST_PREPEND(info->present, i);
+ if (this_class == current_class) {
+ info->enabled = i;
+ }
+ }
+ }
+ return info;
+}
/*
* fast means: we NEVER interrupt vCPU threads to retrieve
@@ -37,8 +62,7 @@ CpuInfoFastList *qmp_query_cpus_fast(Error **errp)
MachineState *ms = MACHINE(qdev_get_machine());
MachineClass *mc = MACHINE_GET_CLASS(ms);
CpuInfoFastList *head = NULL, **tail = &head;
- SysEmuTarget target = qapi_enum_parse(&SysEmuTarget_lookup, target_name(),
- -1, &error_abort);
+ SysEmuTarget target = target_arch();
CPUState *cpu;
CPU_FOREACH(cpu) {
@@ -47,6 +71,7 @@ CpuInfoFastList *qmp_query_cpus_fast(Error **errp)
value->cpu_index = cpu->cpu_index;
value->qom_path = object_get_canonical_path(OBJECT(cpu));
value->thread_id = cpu->thread_id;
+ value->qom_type = g_strdup(object_get_typename(OBJECT(cpu)));
if (mc->cpu_index_to_instance_props) {
CpuInstanceProperties *props;
@@ -139,8 +164,7 @@ QemuTargetInfo *qmp_query_target(Error **errp)
{
QemuTargetInfo *info = g_malloc0(sizeof(*info));
- info->arch = qapi_enum_parse(&SysEmuTarget_lookup, target_name(), -1,
- &error_abort);
+ info->arch = target_arch();
return info;
}
diff --git a/hw/core/machine.c b/hw/core/machine.c
index b8ae155..681adbb 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -35,9 +35,20 @@
#include "hw/virtio/virtio-pci.h"
#include "hw/virtio/virtio-net.h"
#include "hw/virtio/virtio-iommu.h"
+#include "hw/acpi/generic_event_device.h"
#include "audio/audio.h"
-GlobalProperty hw_compat_10_0[] = {};
+GlobalProperty hw_compat_10_1[] = {
+ { TYPE_ACPI_GED, "x-has-hest-addr", "false" },
+};
+const size_t hw_compat_10_1_len = G_N_ELEMENTS(hw_compat_10_1);
+
+GlobalProperty hw_compat_10_0[] = {
+ { "scsi-hd", "dpofua", "off" },
+ { "vfio-pci", "x-migration-load-config-after-iter", "off" },
+ { "ramfb", "use-legacy-x86-rom", "true"},
+ { "vfio-pci-nohotplug", "use-legacy-x86-rom", "true" },
+};
const size_t hw_compat_10_0_len = G_N_ELEMENTS(hw_compat_10_0);
GlobalProperty hw_compat_9_2[] = {
@@ -283,24 +294,6 @@ GlobalProperty hw_compat_2_6[] = {
};
const size_t hw_compat_2_6_len = G_N_ELEMENTS(hw_compat_2_6);
-GlobalProperty hw_compat_2_5[] = {
- { "isa-fdc", "fallback", "144" },
- { "pvscsi", "x-old-pci-configuration", "on" },
- { "pvscsi", "x-disable-pcie", "on" },
- { "vmxnet3", "x-old-msi-offsets", "on" },
- { "vmxnet3", "x-disable-pcie", "on" },
-};
-const size_t hw_compat_2_5_len = G_N_ELEMENTS(hw_compat_2_5);
-
-GlobalProperty hw_compat_2_4[] = {
- { "e1000", "extra_mac_registers", "off" },
- { "virtio-pci", "x-disable-pcie", "on" },
- { "virtio-pci", "migrate-extra", "off" },
- { "fw_cfg_mem", "dma_enabled", "off" },
- { "fw_cfg_io", "dma_enabled", "off" }
-};
-const size_t hw_compat_2_4_len = G_N_ELEMENTS(hw_compat_2_4);
-
MachineState *current_machine;
static char *machine_get_kernel(Object *obj, Error **errp)
@@ -593,6 +586,20 @@ static void machine_set_nvdimm(Object *obj, bool value, Error **errp)
ms->nvdimms_state->is_enabled = value;
}
+static bool machine_get_spcr(Object *obj, Error **errp)
+{
+ MachineState *ms = MACHINE(obj);
+
+ return ms->acpi_spcr_enabled;
+}
+
+static void machine_set_spcr(Object *obj, bool value, Error **errp)
+{
+ MachineState *ms = MACHINE(obj);
+
+ ms->acpi_spcr_enabled = value;
+}
+
static bool machine_get_hmat(Object *obj, Error **errp)
{
MachineState *ms = MACHINE(obj);
@@ -1111,8 +1118,11 @@ static void machine_class_init(ObjectClass *oc, const void *data)
* SMBIOS 3.1.0 7.18.5 Memory Device — Extended Size
* use max possible value that could be encoded into
* 'Extended Size' field (2047Tb).
+ *
+ * Unfortunately (current) Windows Server 2025 and earlier do not handle
+ * 4Tb+ DIMM size.
*/
- mc->smbios_memory_device_size = 2047 * TiB;
+ mc->smbios_memory_device_size = 2 * TiB;
/* numa node memory size aligned on 8MB by default.
* On Linux, each node's border has to be 8MB aligned
@@ -1297,6 +1307,14 @@ static void machine_initfn(Object *obj)
"Table (HMAT)");
}
+ /* SPCR */
+ ms->acpi_spcr_enabled = true;
+ object_property_add_bool(obj, "spcr", machine_get_spcr, machine_set_spcr);
+ object_property_set_description(obj, "spcr",
+ "Set on/off to enable/disable "
+ "ACPI Serial Port Console Redirection "
+ "Table (spcr)");
+
/* default to mc->default_cpus */
ms->smp.cpus = mc->default_cpus;
ms->smp.max_cpus = mc->default_cpus;
diff --git a/hw/core/meson.build b/hw/core/meson.build
index 547de65..b5a545a 100644
--- a/hw/core/meson.build
+++ b/hw/core/meson.build
@@ -26,7 +26,7 @@ system_ss.add(when: 'CONFIG_XILINX_AXI', if_true: files('stream.c'))
system_ss.add(when: 'CONFIG_PLATFORM_BUS', if_true: files('sysbus-fdt.c'))
system_ss.add(when: 'CONFIG_EIF', if_true: [files('eif.c'), zlib, libcbor, gnutls])
-libsystem_ss.add(files(
+system_ss.add(files(
'cpu-system.c',
'fw-path-provider.c',
'gpio.c',
@@ -46,7 +46,7 @@ libsystem_ss.add(files(
'vm-change-state-handler.c',
'clock-vmstate.c',
))
-libuser_ss.add(files(
+user_ss.add(files(
'cpu-user.c',
'qdev-user.c',
))
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 8e11e63..1f810b7 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -145,6 +145,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name,
if (ctx != bdrv_get_aio_context(bs)) {
error_setg(errp, "Different aio context is not supported for new "
"node");
+ return;
}
blk_replace_bs(blk, bs, errp);
@@ -1298,3 +1299,47 @@ const PropertyInfo qdev_prop_vmapple_virtio_blk_variant = {
.set = qdev_propinfo_set_enum,
.set_default_value = qdev_propinfo_set_default_value_enum,
};
+
+/* --- VirtIOGPUOutputList --- */
+
+static void get_virtio_gpu_output_list(Object *obj, Visitor *v,
+ const char *name, void *opaque, Error **errp)
+{
+ VirtIOGPUOutputList **prop_ptr =
+ object_field_prop_ptr(obj, opaque);
+
+ visit_type_VirtIOGPUOutputList(v, name, prop_ptr, errp);
+}
+
+static void set_virtio_gpu_output_list(Object *obj, Visitor *v,
+ const char *name, void *opaque, Error **errp)
+{
+ VirtIOGPUOutputList **prop_ptr =
+ object_field_prop_ptr(obj, opaque);
+ VirtIOGPUOutputList *list;
+
+ if (!visit_type_VirtIOGPUOutputList(v, name, &list, errp)) {
+ return;
+ }
+
+ qapi_free_VirtIOGPUOutputList(*prop_ptr);
+ *prop_ptr = list;
+}
+
+static void release_virtio_gpu_output_list(Object *obj,
+ const char *name, void *opaque)
+{
+ VirtIOGPUOutputList **prop_ptr =
+ object_field_prop_ptr(obj, opaque);
+
+ qapi_free_VirtIOGPUOutputList(*prop_ptr);
+ *prop_ptr = NULL;
+}
+
+const PropertyInfo qdev_prop_virtio_gpu_output_list = {
+ .type = "VirtIOGPUOutputList",
+ .description = "VirtIO GPU output list [{\"name\":\"<name>\"},...]",
+ .get = get_virtio_gpu_output_list,
+ .set = set_virtio_gpu_output_list,
+ .release = release_virtio_gpu_output_list,
+};
diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index 147b3ff..b7e8a89 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -2,6 +2,7 @@
#include "hw/qdev-properties.h"
#include "qapi/error.h"
#include "qapi/qapi-types-misc.h"
+#include "qapi/qapi-visit-common.h"
#include "qobject/qlist.h"
#include "qemu/ctype.h"
#include "qemu/error-report.h"
@@ -180,7 +181,8 @@ const PropertyInfo qdev_prop_bit = {
static uint64_t qdev_get_prop_mask64(const Property *prop)
{
- assert(prop->info == &qdev_prop_bit64);
+ assert(prop->info == &qdev_prop_bit64 ||
+ prop->info == &qdev_prop_on_off_auto_bit64);
return 0x1ull << prop->bitnr;
}
@@ -225,6 +227,69 @@ const PropertyInfo qdev_prop_bit64 = {
.set_default_value = set_default_value_bool,
};
+static void prop_get_on_off_auto_bit64(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ Property *prop = opaque;
+ OnOffAutoBit64 *p = object_field_prop_ptr(obj, prop);
+ OnOffAuto value;
+ uint64_t mask = qdev_get_prop_mask64(prop);
+
+ if (p->auto_bits & mask) {
+ value = ON_OFF_AUTO_AUTO;
+ } else if (p->on_bits & mask) {
+ value = ON_OFF_AUTO_ON;
+ } else {
+ value = ON_OFF_AUTO_OFF;
+ }
+
+ visit_type_OnOffAuto(v, name, &value, errp);
+}
+
+static void prop_set_on_off_auto_bit64(Object *obj, Visitor *v,
+ const char *name, void *opaque,
+ Error **errp)
+{
+ Property *prop = opaque;
+ OnOffAutoBit64 *p = object_field_prop_ptr(obj, prop);
+ OnOffAuto value;
+ uint64_t mask = qdev_get_prop_mask64(prop);
+
+ if (!visit_type_OnOffAuto(v, name, &value, errp)) {
+ return;
+ }
+
+ switch (value) {
+ case ON_OFF_AUTO_AUTO:
+ p->on_bits &= ~mask;
+ p->auto_bits |= mask;
+ break;
+
+ case ON_OFF_AUTO_ON:
+ p->on_bits |= mask;
+ p->auto_bits &= ~mask;
+ break;
+
+ case ON_OFF_AUTO_OFF:
+ p->on_bits &= ~mask;
+ p->auto_bits &= ~mask;
+ break;
+
+ case ON_OFF_AUTO__MAX:
+ g_assert_not_reached();
+ }
+}
+
+const PropertyInfo qdev_prop_on_off_auto_bit64 = {
+ .type = "OnOffAuto",
+ .description = "on/off/auto",
+ .enum_table = &OnOffAuto_lookup,
+ .get = prop_get_on_off_auto_bit64,
+ .set = prop_set_on_off_auto_bit64,
+ .set_default_value = qdev_propinfo_set_default_value_enum,
+};
+
/* --- bool --- */
static void get_bool(Object *obj, Visitor *v, const char *name, void *opaque,
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index f600226..fab42a7 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -411,6 +411,35 @@ char *qdev_get_dev_path(DeviceState *dev)
return NULL;
}
+const char *qdev_get_printable_name(DeviceState *vdev)
+{
+ /*
+ * Return device ID if explicity set
+ * (e.g. -device virtio-blk-pci,id=foo)
+ * This allows users to correlate errors with their custom device
+ * names.
+ */
+ if (vdev->id) {
+ return vdev->id;
+ }
+ /*
+ * Fall back to the canonical QOM device path (eg. ID for PCI
+ * devices).
+ * This ensures the device is still uniquely and meaningfully
+ * identified.
+ */
+ const char *path = qdev_get_dev_path(vdev);
+ if (path) {
+ return path;
+ }
+
+ /*
+ * Final fallback: if all else fails, return a placeholder string.
+ * This ensures the error message always contains a valid string.
+ */
+ return "<unknown device>";
+}
+
void qdev_add_unplug_blocker(DeviceState *dev, Error *reason)
{
dev->unplug_blockers = g_slist_prepend(dev->unplug_blockers, reason);
diff --git a/hw/core/register.c b/hw/core/register.c
index 8f63d9f..3340df7 100644
--- a/hw/core/register.c
+++ b/hw/core/register.c
@@ -314,7 +314,6 @@ RegisterInfoArray *register_init_block64(DeviceState *owner,
void register_finalize_block(RegisterInfoArray *r_array)
{
- object_unparent(OBJECT(&r_array->mem));
g_free(r_array->r);
g_free(r_array);
}
diff --git a/hw/core/sysbus-fdt.c b/hw/core/sysbus-fdt.c
index c339a27..59f1d17 100644
--- a/hw/core/sysbus-fdt.c
+++ b/hw/core/sysbus-fdt.c
@@ -31,11 +31,8 @@
#include "qemu/error-report.h"
#include "system/device_tree.h"
#include "system/tpm.h"
+#include "hw/arm/smmuv3.h"
#include "hw/platform-bus.h"
-#include "hw/vfio/vfio-platform.h"
-#include "hw/vfio/vfio-calxeda-xgmac.h"
-#include "hw/vfio/vfio-amd-xgbe.h"
-#include "hw/vfio/vfio-region.h"
#include "hw/display/ramfb.h"
#include "hw/uefi/var-service-api.h"
#include "hw/arm/fdt.h"
@@ -66,380 +63,6 @@ typedef struct HostProperty {
bool optional;
} HostProperty;
-#ifdef CONFIG_LINUX
-
-/**
- * copy_properties_from_host
- *
- * copies properties listed in an array from host device tree to
- * guest device tree. If a non optional property is not found, the
- * function asserts. An optional property is ignored if not found
- * in the host device tree.
- * @props: array of HostProperty to copy
- * @nb_props: number of properties in the array
- * @host_dt: host device tree blob
- * @guest_dt: guest device tree blob
- * @node_path: host dt node path where the property is supposed to be
- found
- * @nodename: guest node name the properties should be added to
- */
-static void copy_properties_from_host(HostProperty *props, int nb_props,
- void *host_fdt, void *guest_fdt,
- char *node_path, char *nodename)
-{
- int i, prop_len;
- const void *r;
- Error *err = NULL;
-
- for (i = 0; i < nb_props; i++) {
- r = qemu_fdt_getprop(host_fdt, node_path,
- props[i].name,
- &prop_len,
- &err);
- if (r) {
- qemu_fdt_setprop(guest_fdt, nodename,
- props[i].name, r, prop_len);
- } else {
- if (props[i].optional && prop_len == -FDT_ERR_NOTFOUND) {
- /* optional property does not exist */
- error_free(err);
- } else {
- error_report_err(err);
- }
- if (!props[i].optional) {
- /* mandatory property not found: bail out */
- exit(1);
- }
- err = NULL;
- }
- }
-}
-
-/* clock properties whose values are copied/pasted from host */
-static HostProperty clock_copied_properties[] = {
- {"compatible", false},
- {"#clock-cells", false},
- {"clock-frequency", true},
- {"clock-output-names", true},
-};
-
-/**
- * fdt_build_clock_node
- *
- * Build a guest clock node, used as a dependency from a passthrough'ed
- * device. Most information are retrieved from the host clock node.
- * Also check the host clock is a fixed one.
- *
- * @host_fdt: host device tree blob from which info are retrieved
- * @guest_fdt: guest device tree blob where the clock node is added
- * @host_phandle: phandle of the clock in host device tree
- * @guest_phandle: phandle to assign to the guest node
- */
-static void fdt_build_clock_node(void *host_fdt, void *guest_fdt,
- uint32_t host_phandle,
- uint32_t guest_phandle)
-{
- char *node_path = NULL;
- char *nodename;
- const void *r;
- int ret, node_offset, prop_len, path_len = 16;
-
- node_offset = fdt_node_offset_by_phandle(host_fdt, host_phandle);
- if (node_offset <= 0) {
- error_report("not able to locate clock handle %d in host device tree",
- host_phandle);
- exit(1);
- }
- node_path = g_malloc(path_len);
- while ((ret = fdt_get_path(host_fdt, node_offset, node_path, path_len))
- == -FDT_ERR_NOSPACE) {
- path_len += 16;
- node_path = g_realloc(node_path, path_len);
- }
- if (ret < 0) {
- error_report("not able to retrieve node path for clock handle %d",
- host_phandle);
- exit(1);
- }
-
- r = qemu_fdt_getprop(host_fdt, node_path, "compatible", &prop_len,
- &error_fatal);
- if (strcmp(r, "fixed-clock")) {
- error_report("clock handle %d is not a fixed clock", host_phandle);
- exit(1);
- }
-
- nodename = strrchr(node_path, '/');
- qemu_fdt_add_subnode(guest_fdt, nodename);
-
- copy_properties_from_host(clock_copied_properties,
- ARRAY_SIZE(clock_copied_properties),
- host_fdt, guest_fdt,
- node_path, nodename);
-
- qemu_fdt_setprop_cell(guest_fdt, nodename, "phandle", guest_phandle);
-
- g_free(node_path);
-}
-
-/**
- * sysfs_to_dt_name: convert the name found in sysfs into the node name
- * for instance e0900000.xgmac is converted into xgmac@e0900000
- * @sysfs_name: directory name in sysfs
- *
- * returns the device tree name upon success or NULL in case the sysfs name
- * does not match the expected format
- */
-static char *sysfs_to_dt_name(const char *sysfs_name)
-{
- gchar **substrings = g_strsplit(sysfs_name, ".", 2);
- char *dt_name = NULL;
-
- if (!substrings || !substrings[0] || !substrings[1]) {
- goto out;
- }
- dt_name = g_strdup_printf("%s@%s", substrings[1], substrings[0]);
-out:
- g_strfreev(substrings);
- return dt_name;
-}
-
-/* Device Specific Code */
-
-/**
- * add_calxeda_midway_xgmac_fdt_node
- *
- * Generates a simple node with following properties:
- * compatible string, regs, interrupts, dma-coherent
- */
-static int add_calxeda_midway_xgmac_fdt_node(SysBusDevice *sbdev, void *opaque)
-{
- PlatformBusFDTData *data = opaque;
- PlatformBusDevice *pbus = data->pbus;
- void *fdt = data->fdt;
- const char *parent_node = data->pbus_node_name;
- int compat_str_len, i;
- char *nodename;
- uint32_t *irq_attr, *reg_attr;
- uint64_t mmio_base, irq_number;
- VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
- VFIODevice *vbasedev = &vdev->vbasedev;
-
- mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
- nodename = g_strdup_printf("%s/%s@%" PRIx64, parent_node,
- vbasedev->name, mmio_base);
- qemu_fdt_add_subnode(fdt, nodename);
-
- compat_str_len = strlen(vdev->compat) + 1;
- qemu_fdt_setprop(fdt, nodename, "compatible",
- vdev->compat, compat_str_len);
-
- qemu_fdt_setprop(fdt, nodename, "dma-coherent", "", 0);
-
- reg_attr = g_new(uint32_t, vbasedev->num_regions * 2);
- for (i = 0; i < vbasedev->num_regions; i++) {
- mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i);
- reg_attr[2 * i] = cpu_to_be32(mmio_base);
- reg_attr[2 * i + 1] = cpu_to_be32(
- memory_region_size(vdev->regions[i]->mem));
- }
- qemu_fdt_setprop(fdt, nodename, "reg", reg_attr,
- vbasedev->num_regions * 2 * sizeof(uint32_t));
-
- irq_attr = g_new(uint32_t, vbasedev->num_irqs * 3);
- for (i = 0; i < vbasedev->num_irqs; i++) {
- irq_number = platform_bus_get_irqn(pbus, sbdev , i)
- + data->irq_start;
- irq_attr[3 * i] = cpu_to_be32(GIC_FDT_IRQ_TYPE_SPI);
- irq_attr[3 * i + 1] = cpu_to_be32(irq_number);
- irq_attr[3 * i + 2] = cpu_to_be32(GIC_FDT_IRQ_FLAGS_LEVEL_HI);
- }
- qemu_fdt_setprop(fdt, nodename, "interrupts",
- irq_attr, vbasedev->num_irqs * 3 * sizeof(uint32_t));
- g_free(irq_attr);
- g_free(reg_attr);
- g_free(nodename);
- return 0;
-}
-
-/* AMD xgbe properties whose values are copied/pasted from host */
-static HostProperty amd_xgbe_copied_properties[] = {
- {"compatible", false},
- {"dma-coherent", true},
- {"amd,per-channel-interrupt", true},
- {"phy-mode", false},
- {"mac-address", true},
- {"amd,speed-set", false},
- {"amd,serdes-blwc", true},
- {"amd,serdes-cdr-rate", true},
- {"amd,serdes-pq-skew", true},
- {"amd,serdes-tx-amp", true},
- {"amd,serdes-dfe-tap-config", true},
- {"amd,serdes-dfe-tap-enable", true},
- {"clock-names", false},
-};
-
-/**
- * add_amd_xgbe_fdt_node
- *
- * Generates the combined xgbe/phy node following kernel >=4.2
- * binding documentation:
- * Documentation/devicetree/bindings/net/amd-xgbe.txt:
- * Also 2 clock nodes are created (dma and ptp)
- *
- * Asserts in case of error
- */
-static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, void *opaque)
-{
- PlatformBusFDTData *data = opaque;
- PlatformBusDevice *pbus = data->pbus;
- VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
- VFIODevice *vbasedev = &vdev->vbasedev;
- VFIOINTp *intp;
- const char *parent_node = data->pbus_node_name;
- char **node_path, *nodename, *dt_name;
- void *guest_fdt = data->fdt, *host_fdt;
- const void *r;
- int i, prop_len;
- uint32_t *irq_attr, *reg_attr;
- const uint32_t *host_clock_phandles;
- uint64_t mmio_base, irq_number;
- uint32_t guest_clock_phandles[2];
-
- host_fdt = load_device_tree_from_sysfs();
-
- dt_name = sysfs_to_dt_name(vbasedev->name);
- if (!dt_name) {
- error_report("%s incorrect sysfs device name %s",
- __func__, vbasedev->name);
- exit(1);
- }
- node_path = qemu_fdt_node_path(host_fdt, dt_name, vdev->compat,
- &error_fatal);
- if (!node_path || !node_path[0]) {
- error_report("%s unable to retrieve node path for %s/%s",
- __func__, dt_name, vdev->compat);
- exit(1);
- }
-
- if (node_path[1]) {
- error_report("%s more than one node matching %s/%s!",
- __func__, dt_name, vdev->compat);
- exit(1);
- }
-
- g_free(dt_name);
-
- if (vbasedev->num_regions != 5) {
- error_report("%s Does the host dt node combine XGBE/PHY?", __func__);
- exit(1);
- }
-
- /* generate nodes for DMA_CLK and PTP_CLK */
- r = qemu_fdt_getprop(host_fdt, node_path[0], "clocks",
- &prop_len, &error_fatal);
- if (prop_len != 8) {
- error_report("%s clocks property should contain 2 handles", __func__);
- exit(1);
- }
- host_clock_phandles = r;
- guest_clock_phandles[0] = qemu_fdt_alloc_phandle(guest_fdt);
- guest_clock_phandles[1] = qemu_fdt_alloc_phandle(guest_fdt);
-
- /**
- * clock handles fetched from host dt are in be32 layout whereas
- * rest of the code uses cpu layout. Also guest clock handles are
- * in cpu layout.
- */
- fdt_build_clock_node(host_fdt, guest_fdt,
- be32_to_cpu(host_clock_phandles[0]),
- guest_clock_phandles[0]);
-
- fdt_build_clock_node(host_fdt, guest_fdt,
- be32_to_cpu(host_clock_phandles[1]),
- guest_clock_phandles[1]);
-
- /* combined XGBE/PHY node */
- mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
- nodename = g_strdup_printf("%s/%s@%" PRIx64, parent_node,
- vbasedev->name, mmio_base);
- qemu_fdt_add_subnode(guest_fdt, nodename);
-
- copy_properties_from_host(amd_xgbe_copied_properties,
- ARRAY_SIZE(amd_xgbe_copied_properties),
- host_fdt, guest_fdt,
- node_path[0], nodename);
-
- qemu_fdt_setprop_cells(guest_fdt, nodename, "clocks",
- guest_clock_phandles[0],
- guest_clock_phandles[1]);
-
- reg_attr = g_new(uint32_t, vbasedev->num_regions * 2);
- for (i = 0; i < vbasedev->num_regions; i++) {
- mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, i);
- reg_attr[2 * i] = cpu_to_be32(mmio_base);
- reg_attr[2 * i + 1] = cpu_to_be32(
- memory_region_size(vdev->regions[i]->mem));
- }
- qemu_fdt_setprop(guest_fdt, nodename, "reg", reg_attr,
- vbasedev->num_regions * 2 * sizeof(uint32_t));
-
- irq_attr = g_new(uint32_t, vbasedev->num_irqs * 3);
- for (i = 0; i < vbasedev->num_irqs; i++) {
- irq_number = platform_bus_get_irqn(pbus, sbdev , i)
- + data->irq_start;
- irq_attr[3 * i] = cpu_to_be32(GIC_FDT_IRQ_TYPE_SPI);
- irq_attr[3 * i + 1] = cpu_to_be32(irq_number);
- /*
- * General device interrupt and PCS auto-negotiation interrupts are
- * level-sensitive while the 4 per-channel interrupts are edge
- * sensitive
- */
- QLIST_FOREACH(intp, &vdev->intp_list, next) {
- if (intp->pin == i) {
- break;
- }
- }
- if (intp->flags & VFIO_IRQ_INFO_AUTOMASKED) {
- irq_attr[3 * i + 2] = cpu_to_be32(GIC_FDT_IRQ_FLAGS_LEVEL_HI);
- } else {
- irq_attr[3 * i + 2] = cpu_to_be32(GIC_FDT_IRQ_FLAGS_EDGE_LO_HI);
- }
- }
- qemu_fdt_setprop(guest_fdt, nodename, "interrupts",
- irq_attr, vbasedev->num_irqs * 3 * sizeof(uint32_t));
-
- g_free(host_fdt);
- g_strfreev(node_path);
- g_free(irq_attr);
- g_free(reg_attr);
- g_free(nodename);
- return 0;
-}
-
-/* DT compatible matching */
-static bool vfio_platform_match(SysBusDevice *sbdev,
- const BindingEntry *entry)
-{
- VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
- const char *compat;
- unsigned int n;
-
- for (n = vdev->num_compat, compat = vdev->compat; n > 0;
- n--, compat += strlen(compat) + 1) {
- if (!strcmp(entry->compat, compat)) {
- return true;
- }
- }
-
- return false;
-}
-
-#define VFIO_PLATFORM_BINDING(compat, add_fn) \
- {TYPE_VFIO_PLATFORM, (compat), (add_fn), vfio_platform_match}
-
-#endif /* CONFIG_LINUX */
-
#ifdef CONFIG_TPM
/*
* add_tpm_tis_fdt_node: Create a DT node for TPM TIS
@@ -510,14 +133,11 @@ static bool type_match(SysBusDevice *sbdev, const BindingEntry *entry)
/* list of supported dynamic sysbus bindings */
static const BindingEntry bindings[] = {
-#ifdef CONFIG_LINUX
- TYPE_BINDING(TYPE_VFIO_CALXEDA_XGMAC, add_calxeda_midway_xgmac_fdt_node),
- TYPE_BINDING(TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node),
- VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a", add_amd_xgbe_fdt_node),
-#endif
#ifdef CONFIG_TPM
TYPE_BINDING(TYPE_TPM_TIS_SYSBUS, add_tpm_tis_fdt_node),
#endif
+ /* No generic DT support for smmuv3 dev. Support added for arm virt only */
+ TYPE_BINDING(TYPE_ARM_SMMUV3, no_fdt_node),
TYPE_BINDING(TYPE_RAMFB_DEVICE, no_fdt_node),
TYPE_BINDING(TYPE_UEFI_VARS_SYSBUS, add_uefi_vars_node),
TYPE_BINDING("", NULL), /* last element */
diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c
index e71367a..ec69e87 100644
--- a/hw/core/sysbus.c
+++ b/hw/core/sysbus.c
@@ -151,6 +151,17 @@ void sysbus_mmio_map(SysBusDevice *dev, int n, hwaddr addr)
sysbus_mmio_map_common(dev, n, addr, false, 0);
}
+int sysbus_mmio_map_name(SysBusDevice *dev, const char *name, hwaddr addr)
+{
+ for (int i = 0; i < dev->num_mmio; i++) {
+ if (!strcmp(dev->mmio[i].memory->name, name)) {
+ sysbus_mmio_map(dev, i, addr);
+ return i;
+ }
+ }
+ return -1;
+}
+
void sysbus_mmio_map_overlap(SysBusDevice *dev, int n, hwaddr addr,
int priority)
{
diff --git a/hw/cxl/cxl-events.c b/hw/cxl/cxl-events.c
index 12dee2e..7583dd9 100644
--- a/hw/cxl/cxl-events.c
+++ b/hw/cxl/cxl-events.c
@@ -8,8 +8,6 @@
*/
#include "qemu/osdep.h"
-
-#include "qemu/bswap.h"
#include "qemu/error-report.h"
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
@@ -260,3 +258,41 @@ void cxl_event_irq_assert(CXLType3Dev *ct3d)
}
}
}
+
+void cxl_create_dc_event_records_for_extents(CXLType3Dev *ct3d,
+ CXLDCEventType type,
+ CXLDCExtentRaw extents[],
+ uint32_t ext_count)
+{
+ CXLEventDynamicCapacity event_rec = {};
+ int i;
+
+ cxl_assign_event_header(&event_rec.hdr,
+ &dynamic_capacity_uuid,
+ (1 << CXL_EVENT_TYPE_INFO),
+ sizeof(event_rec),
+ cxl_device_get_timestamp(&ct3d->cxl_dstate));
+ event_rec.type = type;
+ event_rec.validity_flags = 1;
+ event_rec.host_id = 0;
+ event_rec.updated_region_id = 0;
+ event_rec.extents_avail = CXL_NUM_EXTENTS_SUPPORTED -
+ ct3d->dc.total_extent_count;
+
+ for (i = 0; i < ext_count; i++) {
+ memcpy(&event_rec.dynamic_capacity_extent,
+ &extents[i],
+ sizeof(CXLDCExtentRaw));
+ event_rec.flags = 0;
+ if (i < ext_count - 1) {
+ /* Set "More" flag */
+ event_rec.flags |= BIT(0);
+ }
+
+ if (cxl_event_insert(&ct3d->cxl_dstate,
+ CXL_EVENT_TYPE_DYNAMIC_CAP,
+ (CXLEventRecordRaw *)&event_rec)) {
+ cxl_event_irq_assert(ct3d);
+ }
+ }
+}
diff --git a/hw/cxl/cxl-host-stubs.c b/hw/cxl/cxl-host-stubs.c
index cae4afc..c015baa 100644
--- a/hw/cxl/cxl-host-stubs.c
+++ b/hw/cxl/cxl-host-stubs.c
@@ -8,8 +8,13 @@
#include "hw/cxl/cxl.h"
#include "hw/cxl/cxl_host.h"
-void cxl_fmws_link_targets(CXLState *stat, Error **errp) {};
+void cxl_fmws_link_targets(Error **errp) {};
void cxl_machine_init(Object *obj, CXLState *state) {};
void cxl_hook_up_pxb_registers(PCIBus *bus, CXLState *state, Error **errp) {};
+hwaddr cxl_fmws_set_memmap(hwaddr base, hwaddr max_addr)
+{
+ return base;
+};
+void cxl_fmws_update_mmio(void) {};
const MemoryRegionOps cfmws_ops;
diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c
index e010163..0d891c6 100644
--- a/hw/cxl/cxl-host.c
+++ b/hw/cxl/cxl-host.c
@@ -22,15 +22,17 @@
#include "hw/pci/pcie_port.h"
#include "hw/pci-bridge/pci_expander_bridge.h"
-static void cxl_fixed_memory_window_config(CXLState *cxl_state,
- CXLFixedMemoryWindowOptions *object,
- Error **errp)
+static void cxl_fixed_memory_window_config(CXLFixedMemoryWindowOptions *object,
+ int index, Error **errp)
{
ERRP_GUARD();
- g_autofree CXLFixedWindow *fw = g_malloc0(sizeof(*fw));
+ DeviceState *dev = qdev_new(TYPE_CXL_FMW);
+ CXLFixedWindow *fw = CXL_FMW(dev);
strList *target;
int i;
+ fw->index = index;
+
for (target = object->targets; target; target = target->next) {
fw->num_targets++;
}
@@ -65,35 +67,40 @@ static void cxl_fixed_memory_window_config(CXLState *cxl_state,
fw->targets[i] = g_strdup(target->value);
}
- cxl_state->fixed_windows = g_list_append(cxl_state->fixed_windows,
- g_steal_pointer(&fw));
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), errp);
}
-void cxl_fmws_link_targets(CXLState *cxl_state, Error **errp)
+static int cxl_fmws_link(Object *obj, void *opaque)
{
- if (cxl_state && cxl_state->fixed_windows) {
- GList *it;
-
- for (it = cxl_state->fixed_windows; it; it = it->next) {
- CXLFixedWindow *fw = it->data;
- int i;
-
- for (i = 0; i < fw->num_targets; i++) {
- Object *o;
- bool ambig;
-
- o = object_resolve_path_type(fw->targets[i],
- TYPE_PXB_CXL_DEV,
- &ambig);
- if (!o) {
- error_setg(errp, "Could not resolve CXLFM target %s",
- fw->targets[i]);
- return;
- }
- fw->target_hbs[i] = PXB_CXL_DEV(o);
- }
+ Error **errp = opaque;
+ struct CXLFixedWindow *fw;
+ int i;
+
+ if (!object_dynamic_cast(obj, TYPE_CXL_FMW)) {
+ return 0;
+ }
+ fw = CXL_FMW(obj);
+
+ for (i = 0; i < fw->num_targets; i++) {
+ Object *o;
+ bool ambig;
+
+ o = object_resolve_path_type(fw->targets[i], TYPE_PXB_CXL_DEV,
+ &ambig);
+ if (!o) {
+ error_setg(errp, "Could not resolve CXLFM target %s",
+ fw->targets[i]);
+ return -1;
}
+ fw->target_hbs[i] = PXB_CXL_DEV(o);
}
+ return 0;
+}
+
+void cxl_fmws_link_targets(Error **errp)
+{
+ /* Order doesn't matter for this, so no need to build list */
+ object_child_foreach_recursive(object_get_root(), cxl_fmws_link, errp);
}
static bool cxl_hdm_find_target(uint32_t *cache_mem, hwaddr addr,
@@ -325,14 +332,15 @@ static void machine_set_cfmw(Object *obj, Visitor *v, const char *name,
CXLState *state = opaque;
CXLFixedMemoryWindowOptionsList *cfmw_list = NULL;
CXLFixedMemoryWindowOptionsList *it;
+ int index;
visit_type_CXLFixedMemoryWindowOptionsList(v, name, &cfmw_list, errp);
if (!cfmw_list) {
return;
}
- for (it = cfmw_list; it; it = it->next) {
- cxl_fixed_memory_window_config(state, it->value, errp);
+ for (it = cfmw_list, index = 0; it; it = it->next, index++) {
+ cxl_fixed_memory_window_config(it->value, index, errp);
}
state->cfmw_list = cfmw_list;
}
@@ -370,3 +378,110 @@ void cxl_hook_up_pxb_registers(PCIBus *bus, CXLState *state, Error **errp)
}
}
}
+
+static int cxl_fmws_find(Object *obj, void *opaque)
+{
+ GSList **list = opaque;
+
+ if (!object_dynamic_cast(obj, TYPE_CXL_FMW)) {
+ return 0;
+ }
+ *list = g_slist_prepend(*list, obj);
+
+ return 0;
+}
+
+static GSList *cxl_fmws_get_all(void)
+{
+ GSList *list = NULL;
+
+ object_child_foreach_recursive(object_get_root(), cxl_fmws_find, &list);
+
+ return list;
+}
+
+static gint cfmws_cmp(gconstpointer a, gconstpointer b, gpointer d)
+{
+ const struct CXLFixedWindow *ap = a;
+ const struct CXLFixedWindow *bp = b;
+
+ return ap->index > bp->index;
+}
+
+GSList *cxl_fmws_get_all_sorted(void)
+{
+ return g_slist_sort_with_data(cxl_fmws_get_all(), cfmws_cmp, NULL);
+}
+
+static int cxl_fmws_mmio_map(Object *obj, void *opaque)
+{
+ struct CXLFixedWindow *fw;
+
+ if (!object_dynamic_cast(obj, TYPE_CXL_FMW)) {
+ return 0;
+ }
+ fw = CXL_FMW(obj);
+ sysbus_mmio_map(SYS_BUS_DEVICE(fw), 0, fw->base);
+
+ return 0;
+}
+
+void cxl_fmws_update_mmio(void)
+{
+ /* Ordering is not required for this */
+ object_child_foreach_recursive(object_get_root(), cxl_fmws_mmio_map, NULL);
+}
+
+hwaddr cxl_fmws_set_memmap(hwaddr base, hwaddr max_addr)
+{
+ GSList *cfmws_list, *iter;
+ CXLFixedWindow *fw;
+
+ cfmws_list = cxl_fmws_get_all_sorted();
+ for (iter = cfmws_list; iter; iter = iter->next) {
+ fw = CXL_FMW(iter->data);
+ if (base + fw->size <= max_addr) {
+ fw->base = base;
+ base += fw->size;
+ }
+ }
+ g_slist_free(cfmws_list);
+
+ return base;
+}
+
+static void cxl_fmw_realize(DeviceState *dev, Error **errp)
+{
+ CXLFixedWindow *fw = CXL_FMW(dev);
+
+ memory_region_init_io(&fw->mr, OBJECT(dev), &cfmws_ops, fw,
+ "cxl-fixed-memory-region", fw->size);
+ sysbus_init_mmio(SYS_BUS_DEVICE(dev), &fw->mr);
+}
+
+/*
+ * Note: Fixed memory windows represent fixed address decoders on the host and
+ * as such have no dynamic state to reset or migrate
+ */
+static void cxl_fmw_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->desc = "CXL Fixed Memory Window";
+ dc->realize = cxl_fmw_realize;
+ /* Reason - created by machines as tightly coupled to machine memory map */
+ dc->user_creatable = false;
+}
+
+static const TypeInfo cxl_fmw_info = {
+ .name = TYPE_CXL_FMW,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(CXLFixedWindow),
+ .class_init = cxl_fmw_class_init,
+};
+
+static void cxl_host_register_types(void)
+{
+ type_register_static(&cxl_fmw_info);
+}
+type_init(cxl_host_register_types)
diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 299f232..68c7cc9 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -18,15 +18,16 @@
#include "hw/pci/pci.h"
#include "hw/pci-bridge/cxl_upstream_port.h"
#include "qemu/cutils.h"
+#include "qemu/host-utils.h"
#include "qemu/log.h"
#include "qemu/units.h"
#include "qemu/uuid.h"
#include "system/hostmem.h"
#include "qemu/range.h"
+#include "qapi/qapi-types-cxl.h"
#define CXL_CAPACITY_MULTIPLIER (256 * MiB)
#define CXL_DC_EVENT_LOG_SIZE 8
-#define CXL_NUM_EXTENTS_SUPPORTED 512
#define CXL_NUM_TAGS_SUPPORTED 0
#define CXL_ALERTS_LIFE_USED_WARN_THRESH (1 << 0)
#define CXL_ALERTS_OVER_TEMP_WARN_THRESH (1 << 1)
@@ -117,6 +118,13 @@ enum {
#define GET_PHYSICAL_PORT_STATE 0x1
TUNNEL = 0x53,
#define MANAGEMENT_COMMAND 0x0
+ FMAPI_DCD_MGMT = 0x56,
+ #define GET_DCD_INFO 0x0
+ #define GET_HOST_DC_REGION_CONFIG 0x1
+ #define SET_DC_REGION_CONFIG 0x2
+ #define GET_DC_REGION_EXTENT_LIST 0x3
+ #define INITIATE_DC_ADD 0x4
+ #define INITIATE_DC_RELEASE 0x5
};
/* CCI Message Format CXL r3.1 Figure 7-19 */
@@ -2750,7 +2758,7 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd,
uint16_t out_pl_len, size;
CXLDCExtent *ent;
- if (start_extent_id > ct3d->dc.total_extent_count) {
+ if (start_extent_id > ct3d->dc.nr_extents_accepted) {
return CXL_MBOX_INVALID_INPUT;
}
@@ -2761,7 +2769,7 @@ static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd,
out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
stl_le_p(&out->count, record_count);
- stl_le_p(&out->total_extents, ct3d->dc.total_extent_count);
+ stl_le_p(&out->total_extents, ct3d->dc.nr_extents_accepted);
stl_le_p(&out->generation_num, ct3d->dc.ext_list_gen_seq);
if (record_count > 0) {
@@ -2883,16 +2891,20 @@ void cxl_extent_group_list_insert_tail(CXLDCExtentGroupList *list,
QTAILQ_INSERT_TAIL(list, group, node);
}
-void cxl_extent_group_list_delete_front(CXLDCExtentGroupList *list)
+uint32_t cxl_extent_group_list_delete_front(CXLDCExtentGroupList *list)
{
CXLDCExtent *ent, *ent_next;
CXLDCExtentGroup *group = QTAILQ_FIRST(list);
+ uint32_t extents_deleted = 0;
QTAILQ_REMOVE(list, group, node);
QTAILQ_FOREACH_SAFE(ent, &group->list, node, ent_next) {
cxl_remove_extent_from_extent_list(&group->list, ent);
+ extents_deleted++;
}
g_free(group);
+
+ return extents_deleted;
}
/*
@@ -3011,7 +3023,7 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd,
CXLUpdateDCExtentListInPl *in = (void *)payload_in;
CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
CXLDCExtentList *extent_list = &ct3d->dc.extents;
- uint32_t i;
+ uint32_t i, num;
uint64_t dpa, len;
CXLRetCode ret;
@@ -3020,7 +3032,8 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd,
}
if (in->num_entries_updated == 0) {
- cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending);
+ num = cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending);
+ ct3d->dc.total_extent_count -= num;
return CXL_MBOX_SUCCESS;
}
@@ -3051,10 +3064,12 @@ static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd,
cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
ct3d->dc.total_extent_count += 1;
+ ct3d->dc.nr_extents_accepted += 1;
ct3_set_region_block_backed(ct3d, dpa, len);
}
/* Remove the first extent group in the pending list */
- cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending);
+ num = cxl_extent_group_list_delete_front(&ct3d->dc.extents_pending);
+ ct3d->dc.total_extent_count -= num;
return CXL_MBOX_SUCCESS;
}
@@ -3160,7 +3175,7 @@ free_and_exit:
}
*updated_list_size = 0;
} else {
- *updated_list_size = ct3d->dc.total_extent_count + cnt_delta;
+ *updated_list_size = ct3d->dc.nr_extents_accepted + cnt_delta;
}
return ret;
@@ -3222,11 +3237,495 @@ static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd,
ct3_set_region_block_backed(ct3d, ent->start_dpa, ent->len);
cxl_remove_extent_from_extent_list(&updated_list, ent);
}
- ct3d->dc.total_extent_count = updated_list_size;
+ ct3d->dc.total_extent_count += (updated_list_size -
+ ct3d->dc.nr_extents_accepted);
+
+ ct3d->dc.nr_extents_accepted = updated_list_size;
+
+ return CXL_MBOX_SUCCESS;
+}
+
+/* CXL r3.2 section 7.6.7.6.1: Get DCD Info (Opcode 5600h) */
+static CXLRetCode cmd_fm_get_dcd_info(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ struct {
+ uint8_t num_hosts;
+ uint8_t num_regions_supported;
+ uint8_t rsvd1[2];
+ uint16_t supported_add_sel_policy_bitmask;
+ uint8_t rsvd2[2];
+ uint16_t supported_removal_policy_bitmask;
+ uint8_t sanitize_on_release_bitmask;
+ uint8_t rsvd3;
+ uint64_t total_dynamic_capacity;
+ uint64_t region_blk_size_bitmasks[8];
+ } QEMU_PACKED *out = (void *)payload_out;
+ CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+ CXLDCRegion *region;
+ int i;
+
+ out->num_hosts = 1;
+ out->num_regions_supported = ct3d->dc.num_regions;
+ stw_le_p(&out->supported_add_sel_policy_bitmask,
+ BIT(CXL_EXTENT_SELECTION_POLICY_PRESCRIPTIVE));
+ stw_le_p(&out->supported_removal_policy_bitmask,
+ BIT(CXL_EXTENT_REMOVAL_POLICY_PRESCRIPTIVE));
+ out->sanitize_on_release_bitmask = 0;
+
+ stq_le_p(&out->total_dynamic_capacity,
+ ct3d->dc.total_capacity / CXL_CAPACITY_MULTIPLIER);
+
+ for (i = 0; i < ct3d->dc.num_regions; i++) {
+ region = &ct3d->dc.regions[i];
+ memcpy(&out->region_blk_size_bitmasks[i],
+ &region->supported_blk_size_bitmask,
+ sizeof(out->region_blk_size_bitmasks[i]));
+ }
+
+ *len_out = sizeof(*out);
+ return CXL_MBOX_SUCCESS;
+}
+
+static void build_dsmas_flags(uint8_t *flags, CXLDCRegion *region)
+{
+ *flags = 0;
+
+ if (region->nonvolatile) {
+ *flags |= BIT(CXL_DSMAS_FLAGS_NONVOLATILE);
+ }
+ if (region->sharable) {
+ *flags |= BIT(CXL_DSMAS_FLAGS_SHARABLE);
+ }
+ if (region->hw_managed_coherency) {
+ *flags |= BIT(CXL_DSMAS_FLAGS_HW_MANAGED_COHERENCY);
+ }
+ if (region->ic_specific_dc_management) {
+ *flags |= BIT(CXL_DSMAS_FLAGS_IC_SPECIFIC_DC_MANAGEMENT);
+ }
+ if (region->rdonly) {
+ *flags |= BIT(CXL_DSMAS_FLAGS_RDONLY);
+ }
+}
+
+/*
+ * CXL r3.2 section 7.6.7.6.2:
+ * Get Host DC Region Configuration (Opcode 5601h)
+ */
+static CXLRetCode cmd_fm_get_host_dc_region_config(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ struct {
+ uint16_t host_id;
+ uint8_t region_cnt;
+ uint8_t start_rid;
+ } QEMU_PACKED *in = (void *)payload_in;
+ struct {
+ uint16_t host_id;
+ uint8_t num_regions;
+ uint8_t regions_returned;
+ struct {
+ uint64_t base;
+ uint64_t decode_len;
+ uint64_t region_len;
+ uint64_t block_size;
+ uint8_t flags;
+ uint8_t rsvd1[3];
+ uint8_t sanitize;
+ uint8_t rsvd2[3];
+ } QEMU_PACKED records[];
+ } QEMU_PACKED *out = (void *)payload_out;
+ struct {
+ uint32_t num_extents_supported;
+ uint32_t num_extents_available;
+ uint32_t num_tags_supported;
+ uint32_t num_tags_available;
+ } QEMU_PACKED *extra_out;
+ CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+ uint16_t record_count, out_pl_len, i;
+
+ if (in->start_rid >= ct3d->dc.num_regions) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ record_count = MIN(ct3d->dc.num_regions - in->start_rid, in->region_cnt);
+
+ out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+ extra_out = (void *)out + out_pl_len;
+ out_pl_len += sizeof(*extra_out);
+
+ assert(out_pl_len <= CXL_MAILBOX_MAX_PAYLOAD_SIZE);
+
+ stw_le_p(&out->host_id, 0);
+ out->num_regions = ct3d->dc.num_regions;
+ out->regions_returned = record_count;
+
+ for (i = 0; i < record_count; i++) {
+ stq_le_p(&out->records[i].base,
+ ct3d->dc.regions[in->start_rid + i].base);
+ stq_le_p(&out->records[i].decode_len,
+ ct3d->dc.regions[in->start_rid + i].decode_len /
+ CXL_CAPACITY_MULTIPLIER);
+ stq_le_p(&out->records[i].region_len,
+ ct3d->dc.regions[in->start_rid + i].len);
+ stq_le_p(&out->records[i].block_size,
+ ct3d->dc.regions[in->start_rid + i].block_size);
+ build_dsmas_flags(&out->records[i].flags,
+ &ct3d->dc.regions[in->start_rid + i]);
+ /* Sanitize is bit 0 of flags. */
+ out->records[i].sanitize =
+ ct3d->dc.regions[in->start_rid + i].flags & BIT(0);
+ }
+
+ stl_le_p(&extra_out->num_extents_supported, CXL_NUM_EXTENTS_SUPPORTED);
+ stl_le_p(&extra_out->num_extents_available, CXL_NUM_EXTENTS_SUPPORTED -
+ ct3d->dc.total_extent_count);
+ stl_le_p(&extra_out->num_tags_supported, CXL_NUM_TAGS_SUPPORTED);
+ stl_le_p(&extra_out->num_tags_available, CXL_NUM_TAGS_SUPPORTED);
+
+ *len_out = out_pl_len;
+ return CXL_MBOX_SUCCESS;
+}
+
+/* CXL r3.2 section 7.6.7.6.3: Set Host DC Region Configuration (Opcode 5602) */
+static CXLRetCode cmd_fm_set_dc_region_config(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ struct {
+ uint8_t reg_id;
+ uint8_t rsvd[3];
+ uint64_t block_sz;
+ uint8_t flags;
+ uint8_t rsvd2[3];
+ } QEMU_PACKED *in = (void *)payload_in;
+ CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+ CXLEventDynamicCapacity dcEvent = {};
+ CXLDCRegion *region = &ct3d->dc.regions[in->reg_id];
+
+ /*
+ * CXL r3.2 7.6.7.6.3: Set DC Region Configuration
+ * This command shall fail with Unsupported when the Sanitize on Release
+ * field does not match the region’s configuration... and the device
+ * does not support reconfiguration of the Sanitize on Release setting.
+ *
+ * Currently not reconfigurable, so always fail if sanitize bit (bit 0)
+ * doesn't match.
+ */
+ if ((in->flags & 0x1) != (region->flags & 0x1)) {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+
+ if (in->reg_id >= DCD_MAX_NUM_REGION) {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+
+ /* Check that no extents are in the region being reconfigured */
+ if (!bitmap_empty(region->blk_bitmap, region->len / region->block_size)) {
+ return CXL_MBOX_UNSUPPORTED;
+ }
+
+ /* Check that new block size is supported */
+ if (!is_power_of_2(in->block_sz) ||
+ !(in->block_sz & region->supported_blk_size_bitmask)) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+
+ /* Return success if new block size == current block size */
+ if (in->block_sz == region->block_size) {
+ return CXL_MBOX_SUCCESS;
+ }
+
+ /* Free bitmap and create new one for new block size. */
+ qemu_mutex_lock(&region->bitmap_lock);
+ g_free(region->blk_bitmap);
+ region->blk_bitmap = bitmap_new(region->len / in->block_sz);
+ qemu_mutex_unlock(&region->bitmap_lock);
+ region->block_size = in->block_sz;
+
+ /* Create event record and insert into event log */
+ cxl_assign_event_header(&dcEvent.hdr,
+ &dynamic_capacity_uuid,
+ (1 << CXL_EVENT_TYPE_INFO),
+ sizeof(dcEvent),
+ cxl_device_get_timestamp(&ct3d->cxl_dstate));
+ dcEvent.type = DC_EVENT_REGION_CONFIG_UPDATED;
+ dcEvent.validity_flags = 1;
+ dcEvent.host_id = 0;
+ dcEvent.updated_region_id = in->reg_id;
+
+ if (cxl_event_insert(&ct3d->cxl_dstate,
+ CXL_EVENT_TYPE_DYNAMIC_CAP,
+ (CXLEventRecordRaw *)&dcEvent)) {
+ cxl_event_irq_assert(ct3d);
+ }
+ return CXL_MBOX_SUCCESS;
+}
+
+/* CXL r3.2 section 7.6.7.6.4: Get DC Region Extent Lists (Opcode 5603h) */
+static CXLRetCode cmd_fm_get_dc_region_extent_list(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ struct {
+ uint16_t host_id;
+ uint8_t rsvd[2];
+ uint32_t extent_cnt;
+ uint32_t start_extent_id;
+ } QEMU_PACKED *in = (void *)payload_in;
+ struct {
+ uint16_t host_id;
+ uint8_t rsvd[2];
+ uint32_t start_extent_id;
+ uint32_t extents_returned;
+ uint32_t total_extents;
+ uint32_t list_generation_num;
+ uint8_t rsvd2[4];
+ CXLDCExtentRaw records[];
+ } QEMU_PACKED *out = (void *)payload_out;
+ QEMU_BUILD_BUG_ON(sizeof(*in) != 0xc);
+ CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+ CXLDCExtent *ent;
+ CXLDCExtentRaw *out_rec;
+ uint16_t record_count = 0, record_done = 0, i = 0;
+ uint16_t out_pl_len, max_size;
+
+ if (in->host_id != 0) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+
+ if (in->start_extent_id > ct3d->dc.nr_extents_accepted) {
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ record_count = MIN(in->extent_cnt,
+ ct3d->dc.nr_extents_accepted - in->start_extent_id);
+ max_size = CXL_MAILBOX_MAX_PAYLOAD_SIZE - sizeof(*out);
+ record_count = MIN(record_count, max_size / sizeof(out->records[0]));
+ out_pl_len = sizeof(*out) + record_count * sizeof(out->records[0]);
+
+ stw_le_p(&out->host_id, in->host_id);
+ stl_le_p(&out->start_extent_id, in->start_extent_id);
+ stl_le_p(&out->extents_returned, record_count);
+ stl_le_p(&out->total_extents, ct3d->dc.nr_extents_accepted);
+ stl_le_p(&out->list_generation_num, ct3d->dc.ext_list_gen_seq);
+
+ if (record_count > 0) {
+ QTAILQ_FOREACH(ent, &ct3d->dc.extents, node) {
+ if (i++ < in->start_extent_id) {
+ continue;
+ }
+ out_rec = &out->records[record_done];
+ stq_le_p(&out_rec->start_dpa, ent->start_dpa);
+ stq_le_p(&out_rec->len, ent->len);
+ memcpy(&out_rec->tag, ent->tag, 0x10);
+ stw_le_p(&out_rec->shared_seq, ent->shared_seq);
+
+ record_done++;
+ if (record_done == record_count) {
+ break;
+ }
+ }
+ }
+
+ *len_out = out_pl_len;
return CXL_MBOX_SUCCESS;
}
+/*
+ * Helper function to convert CXLDCExtentRaw to CXLUpdateDCExtentListInPl
+ * in order to reuse cxl_detect_malformed_extent_list() function which accepts
+ * CXLUpdateDCExtentListInPl as a parameter.
+ */
+static void convert_raw_extents(CXLDCExtentRaw raw_extents[],
+ CXLUpdateDCExtentListInPl *extent_list,
+ int count)
+{
+ int i;
+
+ extent_list->num_entries_updated = count;
+
+ for (i = 0; i < count; i++) {
+ extent_list->updated_entries[i].start_dpa = raw_extents[i].start_dpa;
+ extent_list->updated_entries[i].len = raw_extents[i].len;
+ }
+}
+
+/* CXL r3.2 Section 7.6.7.6.5: Initiate Dynamic Capacity Add (Opcode 5604h) */
+static CXLRetCode cmd_fm_initiate_dc_add(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ struct {
+ uint16_t host_id;
+ uint8_t selection_policy;
+ uint8_t reg_num;
+ uint64_t length;
+ uint8_t tag[0x10];
+ uint32_t ext_count;
+ CXLDCExtentRaw extents[];
+ } QEMU_PACKED *in = (void *)payload_in;
+ CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+ int i, rc;
+
+ switch (in->selection_policy) {
+ case CXL_EXTENT_SELECTION_POLICY_PRESCRIPTIVE: {
+ /* Adding extents exceeds device's extent tracking ability. */
+ if (in->ext_count + ct3d->dc.total_extent_count >
+ CXL_NUM_EXTENTS_SUPPORTED) {
+ return CXL_MBOX_RESOURCES_EXHAUSTED;
+ }
+
+ g_autofree CXLUpdateDCExtentListInPl *list =
+ g_malloc0(sizeof(*list) +
+ in->ext_count * sizeof(*list->updated_entries));
+
+ convert_raw_extents(in->extents, list, in->ext_count);
+ rc = cxl_detect_malformed_extent_list(ct3d, list);
+
+ for (i = 0; i < in->ext_count; i++) {
+ CXLDCExtentRaw *ext = &in->extents[i];
+
+ /* Check requested extents do not overlap with pending ones. */
+ if (cxl_extent_groups_overlaps_dpa_range(&ct3d->dc.extents_pending,
+ ext->start_dpa,
+ ext->len)) {
+ return CXL_MBOX_INVALID_EXTENT_LIST;
+ }
+ /* Check requested extents do not overlap with existing ones. */
+ if (cxl_extents_overlaps_dpa_range(&ct3d->dc.extents,
+ ext->start_dpa,
+ ext->len)) {
+ return CXL_MBOX_INVALID_EXTENT_LIST;
+ }
+ }
+
+ if (rc) {
+ return rc;
+ }
+
+ CXLDCExtentGroup *group = NULL;
+ for (i = 0; i < in->ext_count; i++) {
+ CXLDCExtentRaw *ext = &in->extents[i];
+
+ group = cxl_insert_extent_to_extent_group(group, ext->start_dpa,
+ ext->len, ext->tag,
+ ext->shared_seq);
+ }
+
+ cxl_extent_group_list_insert_tail(&ct3d->dc.extents_pending, group);
+ ct3d->dc.total_extent_count += in->ext_count;
+ cxl_create_dc_event_records_for_extents(ct3d,
+ DC_EVENT_ADD_CAPACITY,
+ in->extents,
+ in->ext_count);
+
+ return CXL_MBOX_SUCCESS;
+ }
+ default: {
+ qemu_log_mask(LOG_UNIMP,
+ "CXL extent selection policy not supported.\n");
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ }
+}
+
+#define CXL_EXTENT_REMOVAL_POLICY_MASK 0x0F
+#define CXL_FORCED_REMOVAL_MASK (1 << 4)
+/*
+ * CXL r3.2 Section 7.6.7.6.6:
+ * Initiate Dynamic Capacity Release (Opcode 5605h)
+ */
+static CXLRetCode cmd_fm_initiate_dc_release(const struct cxl_cmd *cmd,
+ uint8_t *payload_in,
+ size_t len_in,
+ uint8_t *payload_out,
+ size_t *len_out,
+ CXLCCI *cci)
+{
+ struct {
+ uint16_t host_id;
+ uint8_t flags;
+ uint8_t reg_num;
+ uint64_t length;
+ uint8_t tag[0x10];
+ uint32_t ext_count;
+ CXLDCExtentRaw extents[];
+ } QEMU_PACKED *in = (void *)payload_in;
+ CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+ int i, rc;
+
+ switch (in->flags & CXL_EXTENT_REMOVAL_POLICY_MASK) {
+ case CXL_EXTENT_REMOVAL_POLICY_PRESCRIPTIVE: {
+ CXLDCExtentList updated_list;
+ uint32_t updated_list_size;
+ g_autofree CXLUpdateDCExtentListInPl *list =
+ g_malloc0(sizeof(*list) +
+ in->ext_count * sizeof(*list->updated_entries));
+
+ convert_raw_extents(in->extents, list, in->ext_count);
+ rc = cxl_detect_malformed_extent_list(ct3d, list);
+ if (rc) {
+ return rc;
+ }
+
+ /*
+ * Fail with Invalid PA if an extent is pending and Forced Removal
+ * flag not set.
+ */
+ if (!(in->flags & CXL_FORCED_REMOVAL_MASK)) {
+ for (i = 0; i < in->ext_count; i++) {
+ CXLDCExtentRaw ext = in->extents[i];
+ /*
+ * Check requested extents don't overlap with pending
+ * extents.
+ */
+ if (cxl_extent_groups_overlaps_dpa_range(
+ &ct3d->dc.extents_pending,
+ ext.start_dpa,
+ ext.len)) {
+ return CXL_MBOX_INVALID_PA;
+ }
+ }
+ }
+
+ rc = cxl_dc_extent_release_dry_run(ct3d,
+ list,
+ &updated_list,
+ &updated_list_size);
+ if (rc) {
+ return rc;
+ }
+ cxl_create_dc_event_records_for_extents(ct3d,
+ DC_EVENT_RELEASE_CAPACITY,
+ in->extents,
+ in->ext_count);
+ return CXL_MBOX_SUCCESS;
+ }
+ default: {
+ qemu_log_mask(LOG_UNIMP,
+ "CXL extent removal policy not supported.\n");
+ return CXL_MBOX_INVALID_INPUT;
+ }
+ }
+}
+
static const struct cxl_cmd cxl_cmd_set[256][256] = {
[INFOSTAT][BACKGROUND_OPERATION_ABORT] = { "BACKGROUND_OPERATION_ABORT",
cmd_infostat_bg_op_abort, 0, 0 },
@@ -3340,6 +3839,36 @@ static const struct cxl_cmd cxl_cmd_set_sw[256][256] = {
cmd_tunnel_management_cmd, ~0, 0 },
};
+static const struct cxl_cmd cxl_cmd_set_fm_dcd[256][256] = {
+ [FMAPI_DCD_MGMT][GET_DCD_INFO] = { "GET_DCD_INFO",
+ cmd_fm_get_dcd_info, 0, 0 },
+ [FMAPI_DCD_MGMT][GET_HOST_DC_REGION_CONFIG] = { "GET_HOST_DC_REGION_CONFIG",
+ cmd_fm_get_host_dc_region_config, 4, 0 },
+ [FMAPI_DCD_MGMT][SET_DC_REGION_CONFIG] = { "SET_DC_REGION_CONFIG",
+ cmd_fm_set_dc_region_config, 16,
+ (CXL_MBOX_CONFIG_CHANGE_COLD_RESET |
+ CXL_MBOX_CONFIG_CHANGE_CONV_RESET |
+ CXL_MBOX_CONFIG_CHANGE_CXL_RESET |
+ CXL_MBOX_IMMEDIATE_CONFIG_CHANGE |
+ CXL_MBOX_IMMEDIATE_DATA_CHANGE) },
+ [FMAPI_DCD_MGMT][GET_DC_REGION_EXTENT_LIST] = { "GET_DC_REGION_EXTENT_LIST",
+ cmd_fm_get_dc_region_extent_list, 12, 0 },
+ [FMAPI_DCD_MGMT][INITIATE_DC_ADD] = { "INIT_DC_ADD",
+ cmd_fm_initiate_dc_add, ~0,
+ (CXL_MBOX_CONFIG_CHANGE_COLD_RESET |
+ CXL_MBOX_CONFIG_CHANGE_CONV_RESET |
+ CXL_MBOX_CONFIG_CHANGE_CXL_RESET |
+ CXL_MBOX_IMMEDIATE_CONFIG_CHANGE |
+ CXL_MBOX_IMMEDIATE_DATA_CHANGE) },
+ [FMAPI_DCD_MGMT][INITIATE_DC_RELEASE] = { "INIT_DC_RELEASE",
+ cmd_fm_initiate_dc_release, ~0,
+ (CXL_MBOX_CONFIG_CHANGE_COLD_RESET |
+ CXL_MBOX_CONFIG_CHANGE_CONV_RESET |
+ CXL_MBOX_CONFIG_CHANGE_CXL_RESET |
+ CXL_MBOX_IMMEDIATE_CONFIG_CHANGE |
+ CXL_MBOX_IMMEDIATE_DATA_CHANGE) },
+};
+
/*
* While the command is executing in the background, the device should
* update the percentage complete in the Background Command Status Register
@@ -3614,7 +4143,12 @@ void cxl_initialize_t3_fm_owned_ld_mctpcci(CXLCCI *cci, DeviceState *d,
DeviceState *intf,
size_t payload_max)
{
+ CXLType3Dev *ct3d = CXL_TYPE3(d);
+
cxl_copy_cci_commands(cci, cxl_cmd_set_t3_fm_owned_ld_mctp);
+ if (ct3d->dc.num_regions) {
+ cxl_copy_cci_commands(cci, cxl_cmd_set_fm_dcd);
+ }
cci->d = d;
cci->intf = intf;
cxl_init_cci(cci, payload_max);
diff --git a/hw/display/apple-gfx.m b/hw/display/apple-gfx.m
index 8dde1f1..174d56a 100644
--- a/hw/display/apple-gfx.m
+++ b/hw/display/apple-gfx.m
@@ -454,7 +454,7 @@ static void set_cursor_glyph(void *opaque)
/* ------ DMA (device reading system memory) ------ */
typedef struct AppleGFXReadMemoryJob {
- QemuSemaphore sem;
+ QemuEvent event;
hwaddr physical_address;
uint64_t length;
void *dst;
@@ -470,7 +470,7 @@ static void apple_gfx_do_read_memory(void *opaque)
job->dst, job->length, MEMTXATTRS_UNSPECIFIED);
job->success = (r == MEMTX_OK);
- qemu_sem_post(&job->sem);
+ qemu_event_set(&job->event);
}
static bool apple_gfx_read_memory(AppleGFXState *s, hwaddr physical_address,
@@ -483,11 +483,11 @@ static bool apple_gfx_read_memory(AppleGFXState *s, hwaddr physical_address,
trace_apple_gfx_read_memory(physical_address, length, dst);
/* Performing DMA requires BQL, so do it in a BH. */
- qemu_sem_init(&job.sem, 0);
+ qemu_event_init(&job.event, 0);
aio_bh_schedule_oneshot(qemu_get_aio_context(),
apple_gfx_do_read_memory, &job);
- qemu_sem_wait(&job.sem);
- qemu_sem_destroy(&job.sem);
+ qemu_event_wait(&job.event);
+ qemu_event_destroy(&job.event);
return job.success;
}
diff --git a/hw/display/artist.c b/hw/display/artist.c
index 3fafc8a..3c884c9 100644
--- a/hw/display/artist.c
+++ b/hw/display/artist.c
@@ -12,6 +12,7 @@
#include "qemu/log.h"
#include "qemu/module.h"
#include "qemu/units.h"
+#include "qemu/bswap.h"
#include "qapi/error.h"
#include "hw/sysbus.h"
#include "hw/loader.h"
diff --git a/hw/display/ati.c b/hw/display/ati.c
index 7de2773..f7c0006 100644
--- a/hw/display/ati.c
+++ b/hw/display/ati.c
@@ -22,6 +22,7 @@
#include "vga-access.h"
#include "hw/qdev-properties.h"
#include "vga_regs.h"
+#include "qemu/bswap.h"
#include "qemu/log.h"
#include "qemu/module.h"
#include "qemu/error-report.h"
diff --git a/hw/display/bcm2835_fb.c b/hw/display/bcm2835_fb.c
index 820e67a..1bb2ee4 100644
--- a/hw/display/bcm2835_fb.c
+++ b/hw/display/bcm2835_fb.c
@@ -27,6 +27,7 @@
#include "hw/display/bcm2835_fb.h"
#include "hw/hw.h"
#include "hw/irq.h"
+#include "ui/console.h"
#include "framebuffer.h"
#include "ui/pixel_ops.h"
#include "hw/misc/bcm2835_mbox_defs.h"
diff --git a/hw/display/framebuffer.c b/hw/display/framebuffer.c
index 4485aa3..b4296e8 100644
--- a/hw/display/framebuffer.c
+++ b/hw/display/framebuffer.c
@@ -95,9 +95,9 @@ void framebuffer_update_display(
}
first = -1;
- addr += i * src_width;
- src += i * src_width;
- dest += i * dest_row_pitch;
+ addr += (uint64_t)i * src_width;
+ src += (uint64_t)i * src_width;
+ dest += (uint64_t)i * dest_row_pitch;
snap = memory_region_snapshot_and_clear_dirty(mem, addr, src_width * rows,
DIRTY_MEMORY_VGA);
diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c
index eda6d3d..c6a9ac1 100644
--- a/hw/display/qxl-render.c
+++ b/hw/display/qxl-render.c
@@ -222,6 +222,7 @@ static void qxl_unpack_chunks(void *dest, size_t size, PCIQXLDevice *qxl,
uint32_t max_chunks = 32;
size_t offset = 0;
size_t bytes;
+ QXLPHYSICAL next_chunk_phys = 0;
for (;;) {
bytes = MIN(size - offset, chunk->data_size);
@@ -230,7 +231,15 @@ static void qxl_unpack_chunks(void *dest, size_t size, PCIQXLDevice *qxl,
if (offset == size) {
return;
}
- chunk = qxl_phys2virt(qxl, chunk->next_chunk, group_id,
+ next_chunk_phys = chunk->next_chunk;
+ /* fist time, only get the next chunk's data size */
+ chunk = qxl_phys2virt(qxl, next_chunk_phys, group_id,
+ sizeof(QXLDataChunk));
+ if (!chunk) {
+ return;
+ }
+ /* second time, check data size and get data */
+ chunk = qxl_phys2virt(qxl, next_chunk_phys, group_id,
sizeof(QXLDataChunk) + chunk->data_size);
if (!chunk) {
return;
diff --git a/hw/display/ramfb-standalone.c b/hw/display/ramfb-standalone.c
index 08f2d5d..72b2071 100644
--- a/hw/display/ramfb-standalone.c
+++ b/hw/display/ramfb-standalone.c
@@ -17,6 +17,7 @@ struct RAMFBStandaloneState {
QemuConsole *con;
RAMFBState *state;
bool migrate;
+ bool use_legacy_x86_rom;
};
static void display_update_wrapper(void *dev)
@@ -39,7 +40,7 @@ static void ramfb_realizefn(DeviceState *dev, Error **errp)
RAMFBStandaloneState *ramfb = RAMFB(dev);
ramfb->con = graphic_console_init(dev, 0, &wrapper_ops, dev);
- ramfb->state = ramfb_setup(errp);
+ ramfb->state = ramfb_setup(ramfb->use_legacy_x86_rom, errp);
}
static bool migrate_needed(void *opaque)
@@ -62,6 +63,8 @@ static const VMStateDescription ramfb_dev_vmstate = {
static const Property ramfb_properties[] = {
DEFINE_PROP_BOOL("x-migrate", RAMFBStandaloneState, migrate, true),
+ DEFINE_PROP_BOOL("use-legacy-x86-rom", RAMFBStandaloneState,
+ use_legacy_x86_rom, false),
};
static void ramfb_class_initfn(ObjectClass *klass, const void *data)
diff --git a/hw/display/ramfb-stubs.c b/hw/display/ramfb-stubs.c
index cf64733..b835513 100644
--- a/hw/display/ramfb-stubs.c
+++ b/hw/display/ramfb-stubs.c
@@ -8,7 +8,7 @@ void ramfb_display_update(QemuConsole *con, RAMFBState *s)
{
}
-RAMFBState *ramfb_setup(Error **errp)
+RAMFBState *ramfb_setup(bool romfile, Error **errp)
{
error_setg(errp, "ramfb support not available");
return NULL;
diff --git a/hw/display/ramfb.c b/hw/display/ramfb.c
index 8c0f907..9a17d97 100644
--- a/hw/display/ramfb.c
+++ b/hw/display/ramfb.c
@@ -135,7 +135,7 @@ const VMStateDescription ramfb_vmstate = {
}
};
-RAMFBState *ramfb_setup(Error **errp)
+RAMFBState *ramfb_setup(bool romfile, Error **errp)
{
FWCfgState *fw_cfg = fw_cfg_find();
RAMFBState *s;
@@ -147,7 +147,9 @@ RAMFBState *ramfb_setup(Error **errp)
s = g_new0(RAMFBState, 1);
- rom_add_vga("vgabios-ramfb.bin");
+ if (romfile) {
+ rom_add_vga("vgabios-ramfb.bin");
+ }
fw_cfg_add_file_callback(fw_cfg, "etc/ramfb",
NULL, ramfb_fw_cfg_write, s,
&s->cfg, sizeof(s->cfg), false);
diff --git a/hw/display/sm501.c b/hw/display/sm501.c
index 6d2f186..bc091b3 100644
--- a/hw/display/sm501.c
+++ b/hw/display/sm501.c
@@ -26,6 +26,7 @@
#include "qemu/osdep.h"
#include "qemu/units.h"
#include "qapi/error.h"
+#include "qemu/error-report.h"
#include "qemu/log.h"
#include "qemu/module.h"
#include "hw/usb/hcd-ohci.h"
diff --git a/hw/display/vga.c b/hw/display/vga.c
index 20475eb..90b89cf 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -26,7 +26,7 @@
#include "qemu/units.h"
#include "system/reset.h"
#include "qapi/error.h"
-#include "exec/tswap.h"
+#include "qemu/target-info.h"
#include "hw/display/vga.h"
#include "hw/i386/x86.h"
#include "hw/pci/pci.h"
diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c
index 9eb806b..7269477 100644
--- a/hw/display/virtio-gpu-base.c
+++ b/hw/display/virtio-gpu-base.c
@@ -19,6 +19,7 @@
#include "qemu/error-report.h"
#include "hw/display/edid.h"
#include "trace.h"
+#include "qapi/qapi-types-virtio.h"
void
virtio_gpu_base_reset(VirtIOGPUBase *g)
@@ -56,6 +57,8 @@ void
virtio_gpu_base_generate_edid(VirtIOGPUBase *g, int scanout,
struct virtio_gpu_resp_edid *edid)
{
+ size_t output_idx;
+ VirtIOGPUOutputList *node;
qemu_edid_info info = {
.width_mm = g->req_state[scanout].width_mm,
.height_mm = g->req_state[scanout].height_mm,
@@ -64,6 +67,14 @@ virtio_gpu_base_generate_edid(VirtIOGPUBase *g, int scanout,
.refresh_rate = g->req_state[scanout].refresh_rate,
};
+ for (output_idx = 0, node = g->conf.outputs;
+ output_idx <= scanout && node; output_idx++, node = node->next) {
+ if (output_idx == scanout && node->value && node->value->name) {
+ info.name = node->value->name;
+ break;
+ }
+ }
+
edid->size = cpu_to_le32(sizeof(edid->edid));
qemu_edid_generate(edid->edid, sizeof(edid->edid), &info);
}
@@ -172,6 +183,8 @@ virtio_gpu_base_device_realize(DeviceState *qdev,
VirtIOHandleOutput cursor_cb,
Error **errp)
{
+ size_t output_idx;
+ VirtIOGPUOutputList *node;
VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
VirtIOGPUBase *g = VIRTIO_GPU_BASE(qdev);
int i;
@@ -181,6 +194,20 @@ virtio_gpu_base_device_realize(DeviceState *qdev,
return false;
}
+ for (output_idx = 0, node = g->conf.outputs;
+ node; output_idx++, node = node->next) {
+ if (output_idx == g->conf.max_outputs) {
+ error_setg(errp, "invalid outputs > %d", g->conf.max_outputs);
+ return false;
+ }
+ if (node->value && node->value->name &&
+ strlen(node->value->name) > EDID_NAME_MAX_LENGTH) {
+ error_setg(errp, "invalid output name '%s' > %d",
+ node->value->name, EDID_NAME_MAX_LENGTH);
+ return false;
+ }
+ }
+
if (virtio_gpu_virgl_enabled(g->conf)) {
error_setg(&g->migration_blocker, "virgl is not yet migratable");
if (migrate_add_blocker(&g->migration_blocker, errp) < 0) {
diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
index 145a0b3..94ddc01 100644
--- a/hw/display/virtio-gpu-virgl.c
+++ b/hw/display/virtio-gpu-virgl.c
@@ -970,6 +970,15 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
}
trace_virtio_gpu_fence_ctrl(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type);
+#if VIRGL_VERSION_MAJOR >= 1
+ if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX) {
+ virgl_renderer_context_create_fence(cmd->cmd_hdr.ctx_id,
+ VIRGL_RENDERER_FENCE_FLAG_MERGEABLE,
+ cmd->cmd_hdr.ring_idx,
+ cmd->cmd_hdr.fence_id);
+ return;
+ }
+#endif
virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type);
}
@@ -983,6 +992,11 @@ static void virgl_write_fence(void *opaque, uint32_t fence)
* the guest can end up emitting fences out of order
* so we should check all fenced cmds not just the first one.
*/
+#if VIRGL_VERSION_MAJOR >= 1
+ if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX) {
+ continue;
+ }
+#endif
if (cmd->cmd_hdr.fence_id > fence) {
continue;
}
@@ -997,6 +1011,29 @@ static void virgl_write_fence(void *opaque, uint32_t fence)
}
}
+#if VIRGL_VERSION_MAJOR >= 1
+static void virgl_write_context_fence(void *opaque, uint32_t ctx_id,
+ uint32_t ring_idx, uint64_t fence_id) {
+ VirtIOGPU *g = opaque;
+ struct virtio_gpu_ctrl_command *cmd, *tmp;
+
+ QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) {
+ if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX &&
+ cmd->cmd_hdr.ctx_id == ctx_id && cmd->cmd_hdr.ring_idx == ring_idx &&
+ cmd->cmd_hdr.fence_id <= fence_id) {
+ trace_virtio_gpu_fence_resp(cmd->cmd_hdr.fence_id);
+ virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
+ QTAILQ_REMOVE(&g->fenceq, cmd, next);
+ g_free(cmd);
+ g->inflight--;
+ if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
+ trace_virtio_gpu_dec_inflight_fences(g->inflight);
+ }
+ }
+ }
+}
+#endif
+
static virgl_renderer_gl_context
virgl_create_context(void *opaque, int scanout_idx,
struct virgl_renderer_gl_ctx_param *params)
@@ -1031,11 +1068,18 @@ static int virgl_make_context_current(void *opaque, int scanout_idx,
}
static struct virgl_renderer_callbacks virtio_gpu_3d_cbs = {
+#if VIRGL_VERSION_MAJOR >= 1
+ .version = 3,
+#else
.version = 1,
+#endif
.write_fence = virgl_write_fence,
.create_gl_context = virgl_create_context,
.destroy_gl_context = virgl_destroy_context,
.make_current = virgl_make_context_current,
+#if VIRGL_VERSION_MAJOR >= 1
+ .write_context_fence = virgl_write_context_fence,
+#endif
};
static void virtio_gpu_print_stats(void *opaque)
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 0a1a625..3a55512 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -242,6 +242,7 @@ static uint32_t calc_image_hostmem(pixman_format_code_t pformat,
static void virtio_gpu_resource_create_2d(VirtIOGPU *g,
struct virtio_gpu_ctrl_command *cmd)
{
+ Error *err = NULL;
pixman_format_code_t pformat;
struct virtio_gpu_simple_resource *res;
struct virtio_gpu_resource_create_2d c2d;
@@ -293,7 +294,8 @@ static void virtio_gpu_resource_create_2d(VirtIOGPU *g,
c2d.width,
c2d.height,
c2d.height ? res->hostmem / c2d.height : 0,
- &error_warn)) {
+ &err)) {
+ warn_report_err(err);
goto end;
}
}
@@ -1246,7 +1248,8 @@ static int virtio_gpu_save(QEMUFile *f, void *opaque, size_t size,
}
qemu_put_be32(f, 0); /* end of list */
- return vmstate_save_state(f, &vmstate_virtio_gpu_scanouts, g, NULL);
+ return vmstate_save_state(f, &vmstate_virtio_gpu_scanouts, g, NULL,
+ &error_fatal);
}
static bool virtio_gpu_load_restore_mapping(VirtIOGPU *g,
@@ -1282,6 +1285,7 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size,
const VMStateField *field)
{
VirtIOGPU *g = opaque;
+ Error *err = NULL;
struct virtio_gpu_simple_resource *res;
uint32_t resource_id, pformat;
int i;
@@ -1317,7 +1321,8 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size,
res->width,
res->height,
res->height ? res->hostmem / res->height : 0,
- &error_warn)) {
+ &err)) {
+ warn_report_err(err);
g_free(res);
return -EINVAL;
}
@@ -1343,7 +1348,7 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size,
}
/* load & apply scanout state */
- vmstate_load_state(f, &vmstate_virtio_gpu_scanouts, g, 1);
+ vmstate_load_state(f, &vmstate_virtio_gpu_scanouts, g, 1, &error_fatal);
return 0;
}
diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c
index 544bb65..bc1a8ed 100644
--- a/hw/display/vmware_vga.c
+++ b/hw/display/vmware_vga.c
@@ -618,7 +618,7 @@ static void vmsvga_fifo_run(struct vmsvga_state_s *s)
uint32_t cmd, colour;
int args, len, maxloop = 1024;
int x, y, dx, dy, width, height;
- struct vmsvga_cursor_definition_s cursor;
+ QEMU_UNINITIALIZED struct vmsvga_cursor_definition_s cursor;
uint32_t cmd_start;
len = vmsvga_fifo_length(s);
diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
index 22822fe..164fd0b 100644
--- a/hw/display/xenfb.c
+++ b/hw/display/xenfb.c
@@ -283,8 +283,7 @@ static void xenfb_mouse_event(DeviceState *dev, QemuConsole *src,
scale = surface_height(surface) - 1;
break;
default:
- scale = 0x8000;
- break;
+ g_assert_not_reached();
}
xenfb->axis[move->axis] = move->value * scale / 0x7fff;
}
diff --git a/hw/display/xlnx_dp.c b/hw/display/xlnx_dp.c
index 7c980ee..ef73e18 100644
--- a/hw/display/xlnx_dp.c
+++ b/hw/display/xlnx_dp.c
@@ -1267,14 +1267,18 @@ static void xlnx_dp_init(Object *obj)
s->aux_bus = aux_bus_init(DEVICE(obj), "aux");
/*
- * Initialize DPCD and EDID..
+ * Initialize DPCD and EDID. Once we have added the objects as
+ * child properties of this device, we can drop the reference we
+ * hold to them, leaving the child-property as the only reference.
*/
s->dpcd = DPCD(qdev_new("dpcd"));
object_property_add_child(OBJECT(s), "dpcd", OBJECT(s->dpcd));
+ object_unref(s->dpcd);
s->edid = I2CDDC(qdev_new("i2c-ddc"));
i2c_slave_set_address(I2C_SLAVE(s->edid), 0x50);
object_property_add_child(OBJECT(s), "edid", OBJECT(s->edid));
+ object_unref(s->edid);
fifo8_create(&s->rx_fifo, 16);
fifo8_create(&s->tx_fifo, 16);
@@ -1311,8 +1315,8 @@ static void xlnx_dp_realize(DeviceState *dev, Error **errp)
qdev_realize(DEVICE(s->dpcd), BUS(s->aux_bus), &error_fatal);
aux_map_slave(AUX_SLAVE(s->dpcd), 0x0000);
- qdev_realize_and_unref(DEVICE(s->edid), BUS(aux_get_i2c_bus(s->aux_bus)),
- &error_fatal);
+ qdev_realize(DEVICE(s->edid), BUS(aux_get_i2c_bus(s->aux_bus)),
+ &error_fatal);
s->console = graphic_console_init(dev, 0, &xlnx_dp_gfx_ops, s);
surface = qemu_console_surface(s->console);
diff --git a/hw/dma/xlnx_csu_dma.c b/hw/dma/xlnx_csu_dma.c
index 3db3904..d8c7da1 100644
--- a/hw/dma/xlnx_csu_dma.c
+++ b/hw/dma/xlnx_csu_dma.c
@@ -287,7 +287,7 @@ static uint32_t xlnx_csu_dma_advance(XlnxCSUDMA *s, uint32_t len)
static void xlnx_csu_dma_src_notify(void *opaque)
{
XlnxCSUDMA *s = XLNX_CSU_DMA(opaque);
- unsigned char buf[4 * 1024];
+ QEMU_UNINITIALIZED unsigned char buf[4 * 1024];
size_t rlen = 0;
ptimer_transaction_begin(s->src_timer);
diff --git a/hw/gpio/pca9552.c b/hw/gpio/pca9552.c
index d65c0a2..1e10238 100644
--- a/hw/gpio/pca9552.c
+++ b/hw/gpio/pca9552.c
@@ -76,7 +76,7 @@ static void pca955x_display_pins_status(PCA955xState *s,
return;
}
if (trace_event_get_state_backends(TRACE_PCA955X_GPIO_STATUS)) {
- char *buf = g_newa(char, k->pin_count + 1);
+ char buf[PCA955X_PIN_COUNT_MAX + 1];
for (i = 0; i < k->pin_count; i++) {
if (extract32(pins_status, i, 1)) {
diff --git a/hw/gpio/pca9554.c b/hw/gpio/pca9554.c
index de3f883..eac0d23 100644
--- a/hw/gpio/pca9554.c
+++ b/hw/gpio/pca9554.c
@@ -174,7 +174,7 @@ static void pca9554_set_pin(Object *obj, Visitor *v, const char *name,
PCA9554State *s = PCA9554(obj);
int pin, rc, val;
uint8_t state, mask;
- char *state_str;
+ g_autofree char *state_str = NULL;
if (!visit_type_str(v, name, &state_str, errp)) {
return;
diff --git a/hw/gpio/zaurus.c b/hw/gpio/zaurus.c
index b8d27f5..590ffde 100644
--- a/hw/gpio/zaurus.c
+++ b/hw/gpio/zaurus.c
@@ -18,7 +18,6 @@
#include "qemu/osdep.h"
#include "hw/irq.h"
-#include "hw/arm/sharpsl.h"
#include "hw/sysbus.h"
#include "migration/vmstate.h"
#include "qemu/module.h"
@@ -265,44 +264,3 @@ static void scoop_register_types(void)
}
type_init(scoop_register_types)
-
-/* Write the bootloader parameters memory area. */
-
-#define MAGIC_CHG(a, b, c, d) ((d << 24) | (c << 16) | (b << 8) | a)
-
-static struct QEMU_PACKED sl_param_info {
- uint32_t comadj_keyword;
- int32_t comadj;
-
- uint32_t uuid_keyword;
- char uuid[16];
-
- uint32_t touch_keyword;
- int32_t touch_xp;
- int32_t touch_yp;
- int32_t touch_xd;
- int32_t touch_yd;
-
- uint32_t adadj_keyword;
- int32_t adadj;
-
- uint32_t phad_keyword;
- int32_t phadadj;
-} zaurus_bootparam = {
- .comadj_keyword = MAGIC_CHG('C', 'M', 'A', 'D'),
- .comadj = 125,
- .uuid_keyword = MAGIC_CHG('U', 'U', 'I', 'D'),
- .uuid = { -1 },
- .touch_keyword = MAGIC_CHG('T', 'U', 'C', 'H'),
- .touch_xp = -1,
- .adadj_keyword = MAGIC_CHG('B', 'V', 'A', 'D'),
- .adadj = -1,
- .phad_keyword = MAGIC_CHG('P', 'H', 'A', 'D'),
- .phadadj = 0x01,
-};
-
-void sl_bootparam_write(hwaddr ptr)
-{
- cpu_physical_memory_write(ptr, &zaurus_bootparam,
- sizeof(struct sl_param_info));
-}
diff --git a/hw/hppa/machine.c b/hw/hppa/machine.c
index dacedc5..cddca69 100644
--- a/hw/hppa/machine.c
+++ b/hw/hppa/machine.c
@@ -36,6 +36,13 @@
#include "net/net.h"
#include "qemu/log.h"
+#define TYPE_HPPA_COMMON_MACHINE MACHINE_TYPE_NAME("hppa-common")
+OBJECT_DECLARE_SIMPLE_TYPE(HppaMachineState, HPPA_COMMON_MACHINE)
+
+struct HppaMachineState {
+ MachineState parent_obj;
+};
+
#define MIN_SEABIOS_HPPA_VERSION 12 /* require at least this fw version */
#define HPA_POWER_BUTTON (FIRMWARE_END - 0x10)
@@ -345,16 +352,11 @@ static void machine_HP_common_init_tail(MachineState *machine, PCIBus *pci_bus,
TranslateFn *translate)
{
const char *kernel_filename = machine->kernel_filename;
- const char *kernel_cmdline = machine->kernel_cmdline;
- const char *initrd_filename = machine->initrd_filename;
- const char *firmware = machine->firmware;
MachineClass *mc = MACHINE_GET_CLASS(machine);
DeviceState *dev;
PCIDevice *pci_dev;
- char *firmware_filename;
- uint64_t firmware_low, firmware_high;
long size;
- uint64_t kernel_entry = 0, kernel_low, kernel_high;
+ uint64_t kernel_entry = 0;
MemoryRegion *addr_space = get_system_memory();
MemoryRegion *rom_region;
SysBusDevice *s;
@@ -424,6 +426,10 @@ static void machine_HP_common_init_tail(MachineState *machine, PCIBus *pci_bus,
firmware on 64-bit machines by default if not specified
on command line. */
if (!qtest_enabled()) {
+ const char *firmware = machine->firmware;
+ uint64_t firmware_low, firmware_high;
+ char *firmware_filename;
+
if (!firmware) {
firmware = lasi_dev ? "hppa-firmware.img" : "hppa-firmware64.img";
}
@@ -460,6 +466,10 @@ static void machine_HP_common_init_tail(MachineState *machine, PCIBus *pci_bus,
/* Load kernel */
if (kernel_filename) {
+ const char *kernel_cmdline = machine->kernel_cmdline;
+ const char *initrd_filename = machine->initrd_filename;
+ uint64_t kernel_low, kernel_high;
+
size = load_elf(kernel_filename, NULL, linux_kernel_virt_to_phys,
NULL, &kernel_entry, &kernel_low, &kernel_high, NULL,
ELFDATA2MSB, EM_PARISC, 0, 0);
@@ -683,6 +693,22 @@ static void hppa_nmi(NMIState *n, int cpu_index, Error **errp)
}
}
+static void hppa_machine_common_class_init(ObjectClass *oc, const void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ NMIClass *nc = NMI_CLASS(oc);
+
+ mc->reset = hppa_machine_reset;
+ mc->block_default_type = IF_SCSI;
+ mc->default_cpus = 1;
+ mc->max_cpus = HPPA_MAX_CPUS;
+ mc->default_boot_order = "cd";
+ mc->default_ram_id = "ram";
+ mc->default_nic = "tulip";
+
+ nc->nmi_monitor_handler = hppa_nmi;
+}
+
static void HP_B160L_machine_init_class_init(ObjectClass *oc, const void *data)
{
static const char * const valid_cpu_types[] = {
@@ -690,35 +716,15 @@ static void HP_B160L_machine_init_class_init(ObjectClass *oc, const void *data)
NULL
};
MachineClass *mc = MACHINE_CLASS(oc);
- NMIClass *nc = NMI_CLASS(oc);
mc->desc = "HP B160L workstation";
mc->default_cpu_type = TYPE_HPPA_CPU;
mc->valid_cpu_types = valid_cpu_types;
mc->init = machine_HP_B160L_init;
- mc->reset = hppa_machine_reset;
- mc->block_default_type = IF_SCSI;
- mc->max_cpus = HPPA_MAX_CPUS;
- mc->default_cpus = 1;
mc->is_default = true;
mc->default_ram_size = 512 * MiB;
- mc->default_boot_order = "cd";
- mc->default_ram_id = "ram";
- mc->default_nic = "tulip";
-
- nc->nmi_monitor_handler = hppa_nmi;
}
-static const TypeInfo HP_B160L_machine_init_typeinfo = {
- .name = MACHINE_TYPE_NAME("B160L"),
- .parent = TYPE_MACHINE,
- .class_init = HP_B160L_machine_init_class_init,
- .interfaces = (const InterfaceInfo[]) {
- { TYPE_NMI },
- { }
- },
-};
-
static void HP_C3700_machine_init_class_init(ObjectClass *oc, const void *data)
{
static const char * const valid_cpu_types[] = {
@@ -726,39 +732,35 @@ static void HP_C3700_machine_init_class_init(ObjectClass *oc, const void *data)
NULL
};
MachineClass *mc = MACHINE_CLASS(oc);
- NMIClass *nc = NMI_CLASS(oc);
mc->desc = "HP C3700 workstation";
mc->default_cpu_type = TYPE_HPPA64_CPU;
mc->valid_cpu_types = valid_cpu_types;
mc->init = machine_HP_C3700_init;
- mc->reset = hppa_machine_reset;
- mc->block_default_type = IF_SCSI;
mc->max_cpus = HPPA_MAX_CPUS;
- mc->default_cpus = 1;
- mc->is_default = false;
mc->default_ram_size = 1024 * MiB;
- mc->default_boot_order = "cd";
- mc->default_ram_id = "ram";
- mc->default_nic = "tulip";
-
- nc->nmi_monitor_handler = hppa_nmi;
}
-static const TypeInfo HP_C3700_machine_init_typeinfo = {
- .name = MACHINE_TYPE_NAME("C3700"),
- .parent = TYPE_MACHINE,
- .class_init = HP_C3700_machine_init_class_init,
- .interfaces = (const InterfaceInfo[]) {
- { TYPE_NMI },
- { }
+static const TypeInfo hppa_machine_types[] = {
+ {
+ .name = TYPE_HPPA_COMMON_MACHINE,
+ .parent = TYPE_MACHINE,
+ .instance_size = sizeof(HppaMachineState),
+ .class_init = hppa_machine_common_class_init,
+ .abstract = true,
+ .interfaces = (const InterfaceInfo[]) {
+ { TYPE_NMI },
+ { }
+ },
+ }, {
+ .name = MACHINE_TYPE_NAME("B160L"),
+ .parent = TYPE_HPPA_COMMON_MACHINE,
+ .class_init = HP_B160L_machine_init_class_init,
+ }, {
+ .name = MACHINE_TYPE_NAME("C3700"),
+ .parent = TYPE_HPPA_COMMON_MACHINE,
+ .class_init = HP_C3700_machine_init_class_init,
},
};
-static void hppa_machine_init_register_types(void)
-{
- type_register_static(&HP_B160L_machine_init_typeinfo);
- type_register_static(&HP_C3700_machine_init_typeinfo);
-}
-
-type_init(hppa_machine_init_register_types)
+DEFINE_TYPES(hppa_machine_types)
diff --git a/hw/hyperv/hv-balloon-our_range_memslots.c b/hw/hyperv/hv-balloon-our_range_memslots.c
index 1505a39..1fc95e1 100644
--- a/hw/hyperv/hv-balloon-our_range_memslots.c
+++ b/hw/hyperv/hv-balloon-our_range_memslots.c
@@ -8,6 +8,7 @@
*/
#include "qemu/osdep.h"
+#include "system/ramblock.h"
#include "hv-balloon-internal.h"
#include "hv-balloon-our_range_memslots.h"
#include "trace.h"
diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c
index 94b0abb..2d6d7db 100644
--- a/hw/hyperv/hv-balloon.c
+++ b/hw/hyperv/hv-balloon.c
@@ -67,10 +67,6 @@
* these requests
*/
-struct HvBalloonClass {
- VMBusDeviceClass parent_class;
-} HvBalloonClass;
-
typedef enum State {
/* not a real state */
S_NO_CHANGE = 0,
@@ -162,8 +158,9 @@ typedef struct HvBalloon {
MemoryRegion *mr;
} HvBalloon;
-OBJECT_DEFINE_TYPE_WITH_INTERFACES(HvBalloon, hv_balloon, HV_BALLOON, VMBUS_DEVICE, \
- { TYPE_MEMORY_DEVICE }, { })
+OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(HvBalloon, hv_balloon, \
+ HV_BALLOON, VMBUS_DEVICE, \
+ { TYPE_MEMORY_DEVICE }, { })
#define HV_BALLOON_SET_STATE(hvb, news) \
do { \
@@ -1478,16 +1475,6 @@ static void hv_balloon_ensure_mr(HvBalloon *balloon)
balloon->mr->align = memory_region_get_alignment(hostmem_mr);
}
-static void hv_balloon_free_mr(HvBalloon *balloon)
-{
- if (!balloon->mr) {
- return;
- }
-
- object_unparent(OBJECT(balloon->mr));
- g_clear_pointer(&balloon->mr, g_free);
-}
-
static void hv_balloon_vmdev_realize(VMBusDevice *vdev, Error **errp)
{
ERRP_GUARD();
@@ -1583,7 +1570,7 @@ static void hv_balloon_vmdev_reset(VMBusDevice *vdev)
*/
static void hv_balloon_unrealize_finalize_common(HvBalloon *balloon)
{
- hv_balloon_free_mr(balloon);
+ g_clear_pointer(&balloon->mr, g_free);
balloon->addr = 0;
balloon->memslot_count = 0;
diff --git a/hw/hyperv/syndbg.c b/hw/hyperv/syndbg.c
index 8b8a147..bcdfdf6 100644
--- a/hw/hyperv/syndbg.c
+++ b/hw/hyperv/syndbg.c
@@ -192,7 +192,7 @@ static uint16_t handle_recv_msg(HvSynDbg *syndbg, uint64_t outgpa,
{
uint16_t ret;
g_assert(MSG_BUFSZ >= qemu_target_page_size());
- uint8_t data_buf[MSG_BUFSZ];
+ QEMU_UNINITIALIZED uint8_t data_buf[MSG_BUFSZ];
hwaddr out_len;
void *out_data;
ssize_t recv_byte_count;
@@ -338,7 +338,9 @@ static void hv_syndbg_realize(DeviceState *dev, Error **errp)
return;
}
- qemu_socket_set_nonblock(syndbg->socket);
+ if (!qemu_set_blocking(syndbg->socket, false, errp)) {
+ return;
+ }
syndbg->servaddr.sin_port = htons(syndbg->host_port);
syndbg->servaddr.sin_family = AF_INET;
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index d34ce07..6a0ab54 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -4,12 +4,17 @@ config X86_FW_OVMF
config SEV
bool
select X86_FW_OVMF
- depends on KVM
+ depends on KVM && X86_64
config SGX
bool
depends on KVM
+config TDX
+ bool
+ select X86_FW_OVMF
+ depends on KVM && X86_64
+
config PC
bool
imply APPLESMC
@@ -26,6 +31,7 @@ config PC
imply QXL
imply SEV
imply SGX
+ imply TDX
imply TEST_DEVICES
imply TPM_CRB
imply TPM_TIS_ISA
@@ -90,9 +96,6 @@ config ISAPC
select ISA_BUS
select PC
select IDE_ISA
- # FIXME: it is in the same file as i440fx, and does not compile
- # if separated
- depends on I440FX
config Q35
bool
@@ -125,6 +128,7 @@ config MICROVM
select I8259
select MC146818RTC
select VIRTIO_MMIO
+ select ACPI_PCI
select ACPI_HW_REDUCED
select PCI_EXPRESS_GENERIC_BRIDGE
select USB_XHCI_SYSBUS
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 61851cc..9446a9f 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -338,405 +338,6 @@ build_facs(GArray *table_data)
g_array_append_vals(table_data, reserved, 40); /* Reserved */
}
-Aml *aml_pci_device_dsm(void)
-{
- Aml *method;
-
- method = aml_method("_DSM", 4, AML_SERIALIZED);
- {
- Aml *params = aml_local(0);
- Aml *pkg = aml_package(2);
- aml_append(pkg, aml_int(0));
- aml_append(pkg, aml_int(0));
- aml_append(method, aml_store(pkg, params));
- aml_append(method,
- aml_store(aml_name("BSEL"), aml_index(params, aml_int(0))));
- aml_append(method,
- aml_store(aml_name("ASUN"), aml_index(params, aml_int(1))));
- aml_append(method,
- aml_return(aml_call5("PDSM", aml_arg(0), aml_arg(1),
- aml_arg(2), aml_arg(3), params))
- );
- }
- return method;
-}
-
-static void build_append_pci_dsm_func0_common(Aml *ctx, Aml *retvar)
-{
- Aml *UUID, *ifctx1;
- uint8_t byte_list[1] = { 0 }; /* nothing supported yet */
-
- aml_append(ctx, aml_store(aml_buffer(1, byte_list), retvar));
- /*
- * PCI Firmware Specification 3.1
- * 4.6. _DSM Definitions for PCI
- */
- UUID = aml_touuid("E5C937D0-3553-4D7A-9117-EA4D19C3434D");
- ifctx1 = aml_if(aml_lnot(aml_equal(aml_arg(0), UUID)));
- {
- /* call is for unsupported UUID, bail out */
- aml_append(ifctx1, aml_return(retvar));
- }
- aml_append(ctx, ifctx1);
-
- ifctx1 = aml_if(aml_lless(aml_arg(1), aml_int(2)));
- {
- /* call is for unsupported REV, bail out */
- aml_append(ifctx1, aml_return(retvar));
- }
- aml_append(ctx, ifctx1);
-}
-
-static Aml *aml_pci_edsm(void)
-{
- Aml *method, *ifctx;
- Aml *zero = aml_int(0);
- Aml *func = aml_arg(2);
- Aml *ret = aml_local(0);
- Aml *aidx = aml_local(1);
- Aml *params = aml_arg(4);
-
- method = aml_method("EDSM", 5, AML_SERIALIZED);
-
- /* get supported functions */
- ifctx = aml_if(aml_equal(func, zero));
- {
- /* 1: have supported functions */
- /* 7: support for function 7 */
- const uint8_t caps = 1 | BIT(7);
- build_append_pci_dsm_func0_common(ifctx, ret);
- aml_append(ifctx, aml_store(aml_int(caps), aml_index(ret, zero)));
- aml_append(ifctx, aml_return(ret));
- }
- aml_append(method, ifctx);
-
- /* handle specific functions requests */
- /*
- * PCI Firmware Specification 3.1
- * 4.6.7. _DSM for Naming a PCI or PCI Express Device Under
- * Operating Systems
- */
- ifctx = aml_if(aml_equal(func, aml_int(7)));
- {
- Aml *pkg = aml_package(2);
- aml_append(pkg, zero);
- /* optional, if not impl. should return null string */
- aml_append(pkg, aml_string("%s", ""));
- aml_append(ifctx, aml_store(pkg, ret));
-
- /*
- * IASL is fine when initializing Package with computational data,
- * however it makes guest unhappy /it fails to process such AML/.
- * So use runtime assignment to set acpi-index after initializer
- * to make OSPM happy.
- */
- aml_append(ifctx,
- aml_store(aml_derefof(aml_index(params, aml_int(0))), aidx));
- aml_append(ifctx, aml_store(aidx, aml_index(ret, zero)));
- aml_append(ifctx, aml_return(ret));
- }
- aml_append(method, ifctx);
-
- return method;
-}
-
-static Aml *aml_pci_static_endpoint_dsm(PCIDevice *pdev)
-{
- Aml *method;
-
- g_assert(pdev->acpi_index != 0);
- method = aml_method("_DSM", 4, AML_SERIALIZED);
- {
- Aml *params = aml_local(0);
- Aml *pkg = aml_package(1);
- aml_append(pkg, aml_int(pdev->acpi_index));
- aml_append(method, aml_store(pkg, params));
- aml_append(method,
- aml_return(aml_call5("EDSM", aml_arg(0), aml_arg(1),
- aml_arg(2), aml_arg(3), params))
- );
- }
- return method;
-}
-
-static void build_append_pcihp_notify_entry(Aml *method, int slot)
-{
- Aml *if_ctx;
- int32_t devfn = PCI_DEVFN(slot, 0);
-
- if_ctx = aml_if(aml_and(aml_arg(0), aml_int(0x1U << slot), NULL));
- aml_append(if_ctx, aml_notify(aml_name("S%.02X", devfn), aml_arg(1)));
- aml_append(method, if_ctx);
-}
-
-static bool is_devfn_ignored_generic(const int devfn, const PCIBus *bus)
-{
- const PCIDevice *pdev = bus->devices[devfn];
-
- if (PCI_FUNC(devfn)) {
- if (IS_PCI_BRIDGE(pdev)) {
- /*
- * Ignore only hotplugged PCI bridges on !0 functions, but
- * allow describing cold plugged bridges on all functions
- */
- if (DEVICE(pdev)->hotplugged) {
- return true;
- }
- }
- }
- return false;
-}
-
-static bool is_devfn_ignored_hotplug(const int devfn, const PCIBus *bus)
-{
- PCIDevice *pdev = bus->devices[devfn];
- if (pdev) {
- return is_devfn_ignored_generic(devfn, bus) ||
- !DEVICE_GET_CLASS(pdev)->hotpluggable ||
- /* Cold plugged bridges aren't themselves hot-pluggable */
- (IS_PCI_BRIDGE(pdev) && !DEVICE(pdev)->hotplugged);
- } else { /* non populated slots */
- /*
- * hotplug is supported only for non-multifunction device
- * so generate device description only for function 0
- */
- if (PCI_FUNC(devfn) ||
- (pci_bus_is_express(bus) && PCI_SLOT(devfn) > 0)) {
- return true;
- }
- }
- return false;
-}
-
-void build_append_pcihp_slots(Aml *parent_scope, PCIBus *bus)
-{
- int devfn;
- Aml *dev, *notify_method = NULL, *method;
- QObject *bsel = object_property_get_qobject(OBJECT(bus),
- ACPI_PCIHP_PROP_BSEL, NULL);
- uint64_t bsel_val = qnum_get_uint(qobject_to(QNum, bsel));
- qobject_unref(bsel);
-
- aml_append(parent_scope, aml_name_decl("BSEL", aml_int(bsel_val)));
- notify_method = aml_method("DVNT", 2, AML_NOTSERIALIZED);
-
- for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
- int slot = PCI_SLOT(devfn);
- int adr = slot << 16 | PCI_FUNC(devfn);
-
- if (is_devfn_ignored_hotplug(devfn, bus)) {
- continue;
- }
-
- if (bus->devices[devfn]) {
- dev = aml_scope("S%.02X", devfn);
- } else {
- dev = aml_device("S%.02X", devfn);
- aml_append(dev, aml_name_decl("_ADR", aml_int(adr)));
- }
-
- /*
- * Can't declare _SUN here for every device as it changes 'slot'
- * enumeration order in linux kernel, so use another variable for it
- */
- aml_append(dev, aml_name_decl("ASUN", aml_int(slot)));
- aml_append(dev, aml_pci_device_dsm());
-
- aml_append(dev, aml_name_decl("_SUN", aml_int(slot)));
- /* add _EJ0 to make slot hotpluggable */
- method = aml_method("_EJ0", 1, AML_NOTSERIALIZED);
- aml_append(method,
- aml_call2("PCEJ", aml_name("BSEL"), aml_name("_SUN"))
- );
- aml_append(dev, method);
-
- build_append_pcihp_notify_entry(notify_method, slot);
-
- /* device descriptor has been composed, add it into parent context */
- aml_append(parent_scope, dev);
- }
- aml_append(parent_scope, notify_method);
-}
-
-void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus)
-{
- int devfn;
- Aml *dev;
-
- for (devfn = 0; devfn < ARRAY_SIZE(bus->devices); devfn++) {
- /* ACPI spec: 1.0b: Table 6-2 _ADR Object Bus Types, PCI type */
- int adr = PCI_SLOT(devfn) << 16 | PCI_FUNC(devfn);
- PCIDevice *pdev = bus->devices[devfn];
-
- if (!pdev || is_devfn_ignored_generic(devfn, bus)) {
- continue;
- }
-
- /* start to compose PCI device descriptor */
- dev = aml_device("S%.02X", devfn);
- aml_append(dev, aml_name_decl("_ADR", aml_int(adr)));
-
- call_dev_aml_func(DEVICE(bus->devices[devfn]), dev);
- /* add _DSM if device has acpi-index set */
- if (pdev->acpi_index &&
- !object_property_get_bool(OBJECT(pdev), "hotpluggable",
- &error_abort)) {
- aml_append(dev, aml_pci_static_endpoint_dsm(pdev));
- }
-
- /* device descriptor has been composed, add it into parent context */
- aml_append(parent_scope, dev);
- }
-}
-
-static bool build_append_notification_callback(Aml *parent_scope,
- const PCIBus *bus)
-{
- Aml *method;
- PCIBus *sec;
- QObject *bsel;
- int nr_notifiers = 0;
- GQueue *pcnt_bus_list = g_queue_new();
-
- QLIST_FOREACH(sec, &bus->child, sibling) {
- Aml *br_scope = aml_scope("S%.02X", sec->parent_dev->devfn);
- if (pci_bus_is_root(sec)) {
- continue;
- }
- nr_notifiers = nr_notifiers +
- build_append_notification_callback(br_scope, sec);
- /*
- * add new child scope to parent
- * and keep track of bus that have PCNT,
- * bus list is used later to call children PCNTs from this level PCNT
- */
- if (nr_notifiers) {
- g_queue_push_tail(pcnt_bus_list, sec);
- aml_append(parent_scope, br_scope);
- }
- }
-
- /*
- * Append PCNT method to notify about events on local and child buses.
- * ps: hostbridge might not have hotplug (bsel) enabled but might have
- * child bridges that do have bsel.
- */
- method = aml_method("PCNT", 0, AML_NOTSERIALIZED);
-
- /* If bus supports hotplug select it and notify about local events */
- bsel = object_property_get_qobject(OBJECT(bus), ACPI_PCIHP_PROP_BSEL, NULL);
- if (bsel) {
- uint64_t bsel_val = qnum_get_uint(qobject_to(QNum, bsel));
-
- aml_append(method, aml_store(aml_int(bsel_val), aml_name("BNUM")));
- aml_append(method, aml_call2("DVNT", aml_name("PCIU"),
- aml_int(1))); /* Device Check */
- aml_append(method, aml_call2("DVNT", aml_name("PCID"),
- aml_int(3))); /* Eject Request */
- nr_notifiers++;
- }
-
- /* Notify about child bus events in any case */
- while ((sec = g_queue_pop_head(pcnt_bus_list))) {
- aml_append(method, aml_name("^S%.02X.PCNT", sec->parent_dev->devfn));
- }
-
- aml_append(parent_scope, method);
- qobject_unref(bsel);
- g_queue_free(pcnt_bus_list);
- return !!nr_notifiers;
-}
-
-static Aml *aml_pci_pdsm(void)
-{
- Aml *method, *ifctx, *ifctx1;
- Aml *ret = aml_local(0);
- Aml *caps = aml_local(1);
- Aml *acpi_index = aml_local(2);
- Aml *zero = aml_int(0);
- Aml *one = aml_int(1);
- Aml *not_supp = aml_int(0xFFFFFFFF);
- Aml *func = aml_arg(2);
- Aml *params = aml_arg(4);
- Aml *bnum = aml_derefof(aml_index(params, aml_int(0)));
- Aml *sunum = aml_derefof(aml_index(params, aml_int(1)));
-
- method = aml_method("PDSM", 5, AML_SERIALIZED);
-
- /* get supported functions */
- ifctx = aml_if(aml_equal(func, zero));
- {
- build_append_pci_dsm_func0_common(ifctx, ret);
-
- aml_append(ifctx, aml_store(zero, caps));
- aml_append(ifctx,
- aml_store(aml_call2("AIDX", bnum, sunum), acpi_index));
- /*
- * advertise function 7 if device has acpi-index
- * acpi_index values:
- * 0: not present (default value)
- * FFFFFFFF: not supported (old QEMU without PIDX reg)
- * other: device's acpi-index
- */
- ifctx1 = aml_if(aml_lnot(
- aml_or(aml_equal(acpi_index, zero),
- aml_equal(acpi_index, not_supp), NULL)
- ));
- {
- /* have supported functions */
- aml_append(ifctx1, aml_or(caps, one, caps));
- /* support for function 7 */
- aml_append(ifctx1,
- aml_or(caps, aml_shiftleft(one, aml_int(7)), caps));
- }
- aml_append(ifctx, ifctx1);
-
- aml_append(ifctx, aml_store(caps, aml_index(ret, zero)));
- aml_append(ifctx, aml_return(ret));
- }
- aml_append(method, ifctx);
-
- /* handle specific functions requests */
- /*
- * PCI Firmware Specification 3.1
- * 4.6.7. _DSM for Naming a PCI or PCI Express Device Under
- * Operating Systems
- */
- ifctx = aml_if(aml_equal(func, aml_int(7)));
- {
- Aml *pkg = aml_package(2);
-
- aml_append(ifctx, aml_store(aml_call2("AIDX", bnum, sunum), acpi_index));
- aml_append(ifctx, aml_store(pkg, ret));
- /*
- * Windows calls func=7 without checking if it's available,
- * as workaround Microsoft has suggested to return invalid for func7
- * Package, so return 2 elements package but only initialize elements
- * when acpi_index is supported and leave them uninitialized, which
- * leads elements to being Uninitialized ObjectType and should trip
- * Windows into discarding result as an unexpected and prevent setting
- * bogus 'PCI Label' on the device.
- */
- ifctx1 = aml_if(aml_lnot(aml_lor(
- aml_equal(acpi_index, zero), aml_equal(acpi_index, not_supp)
- )));
- {
- aml_append(ifctx1, aml_store(acpi_index, aml_index(ret, zero)));
- /*
- * optional, if not impl. should return null string
- */
- aml_append(ifctx1, aml_store(aml_string("%s", ""),
- aml_index(ret, one)));
- }
- aml_append(ifctx, ifctx1);
-
- aml_append(ifctx, aml_return(ret));
- }
-
- aml_append(method, ifctx);
- return method;
-}
-
/*
* build_prt - Define interrupt routing rules
*
@@ -1227,112 +828,6 @@ static Aml *build_q35_dram_controller(const AcpiMcfgInfo *mcfg)
return dev;
}
-static void build_x86_acpi_pci_hotplug(Aml *table, uint64_t pcihp_addr)
-{
- Aml *scope;
- Aml *field;
- Aml *method;
-
- scope = aml_scope("_SB.PCI0");
-
- aml_append(scope,
- aml_operation_region("PCST", AML_SYSTEM_IO, aml_int(pcihp_addr), 0x08));
- field = aml_field("PCST", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS);
- aml_append(field, aml_named_field("PCIU", 32));
- aml_append(field, aml_named_field("PCID", 32));
- aml_append(scope, field);
-
- aml_append(scope,
- aml_operation_region("SEJ", AML_SYSTEM_IO,
- aml_int(pcihp_addr + ACPI_PCIHP_SEJ_BASE), 0x04));
- field = aml_field("SEJ", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS);
- aml_append(field, aml_named_field("B0EJ", 32));
- aml_append(scope, field);
-
- aml_append(scope,
- aml_operation_region("BNMR", AML_SYSTEM_IO,
- aml_int(pcihp_addr + ACPI_PCIHP_BNMR_BASE), 0x08));
- field = aml_field("BNMR", AML_DWORD_ACC, AML_NOLOCK, AML_WRITE_AS_ZEROS);
- aml_append(field, aml_named_field("BNUM", 32));
- aml_append(field, aml_named_field("PIDX", 32));
- aml_append(scope, field);
-
- aml_append(scope, aml_mutex("BLCK", 0));
-
- method = aml_method("PCEJ", 2, AML_NOTSERIALIZED);
- aml_append(method, aml_acquire(aml_name("BLCK"), 0xFFFF));
- aml_append(method, aml_store(aml_arg(0), aml_name("BNUM")));
- aml_append(method,
- aml_store(aml_shiftleft(aml_int(1), aml_arg(1)), aml_name("B0EJ")));
- aml_append(method, aml_release(aml_name("BLCK")));
- aml_append(method, aml_return(aml_int(0)));
- aml_append(scope, method);
-
- method = aml_method("AIDX", 2, AML_NOTSERIALIZED);
- aml_append(method, aml_acquire(aml_name("BLCK"), 0xFFFF));
- aml_append(method, aml_store(aml_arg(0), aml_name("BNUM")));
- aml_append(method,
- aml_store(aml_shiftleft(aml_int(1), aml_arg(1)), aml_name("PIDX")));
- aml_append(method, aml_store(aml_name("PIDX"), aml_local(0)));
- aml_append(method, aml_release(aml_name("BLCK")));
- aml_append(method, aml_return(aml_local(0)));
- aml_append(scope, method);
-
- aml_append(scope, aml_pci_pdsm());
-
- aml_append(table, scope);
-}
-
-static Aml *build_q35_osc_method(bool enable_native_pcie_hotplug)
-{
- Aml *if_ctx;
- Aml *if_ctx2;
- Aml *else_ctx;
- Aml *method;
- Aml *a_cwd1 = aml_name("CDW1");
- Aml *a_ctrl = aml_local(0);
-
- method = aml_method("_OSC", 4, AML_NOTSERIALIZED);
- aml_append(method, aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1"));
-
- if_ctx = aml_if(aml_equal(
- aml_arg(0), aml_touuid("33DB4D5B-1FF7-401C-9657-7441C03DD766")));
- aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2"));
- aml_append(if_ctx, aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3"));
-
- aml_append(if_ctx, aml_store(aml_name("CDW3"), a_ctrl));
-
- /*
- * Always allow native PME, AER (no dependencies)
- * Allow SHPC (PCI bridges can have SHPC controller)
- * Disable PCIe Native Hot-plug if ACPI PCI Hot-plug is enabled.
- */
- aml_append(if_ctx, aml_and(a_ctrl,
- aml_int(0x1E | (enable_native_pcie_hotplug ? 0x1 : 0x0)), a_ctrl));
-
- if_ctx2 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(1))));
- /* Unknown revision */
- aml_append(if_ctx2, aml_or(a_cwd1, aml_int(0x08), a_cwd1));
- aml_append(if_ctx, if_ctx2);
-
- if_ctx2 = aml_if(aml_lnot(aml_equal(aml_name("CDW3"), a_ctrl)));
- /* Capabilities bits were masked */
- aml_append(if_ctx2, aml_or(a_cwd1, aml_int(0x10), a_cwd1));
- aml_append(if_ctx, if_ctx2);
-
- /* Update DWORD3 in the buffer */
- aml_append(if_ctx, aml_store(a_ctrl, aml_name("CDW3")));
- aml_append(method, if_ctx);
-
- else_ctx = aml_else();
- /* Unrecognized UUID */
- aml_append(else_ctx, aml_or(a_cwd1, aml_int(4), a_cwd1));
- aml_append(method, else_ctx);
-
- aml_append(method, aml_return(aml_arg(3)));
- return method;
-}
-
static void build_acpi0017(Aml *table)
{
Aml *dev, *scope, *method;
@@ -1389,12 +884,12 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
dev = aml_device("PCI0");
aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03")));
aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid)));
- aml_append(dev, aml_pci_edsm());
+ aml_append(dev, build_pci_bridge_edsm());
aml_append(sb_scope, dev);
aml_append(dsdt, sb_scope);
if (pm->pcihp_bridge_en || pm->pcihp_root_en) {
- build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base);
+ build_acpi_pci_hotplug(dsdt, AML_SYSTEM_IO, pm->pcihp_io_base);
}
build_piix4_pci0_int(dsdt);
} else if (q35) {
@@ -1403,8 +898,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08")));
aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03")));
aml_append(dev, aml_name_decl("_UID", aml_int(pcmc->pci_root_uid)));
- aml_append(dev, build_q35_osc_method(!pm->pcihp_bridge_en));
- aml_append(dev, aml_pci_edsm());
+ aml_append(dev, build_pci_host_bridge_osc_method(!pm->pcihp_bridge_en));
+ aml_append(dev, build_pci_bridge_edsm());
aml_append(sb_scope, dev);
if (mcfg_valid) {
aml_append(sb_scope, build_q35_dram_controller(&mcfg));
@@ -1438,7 +933,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
aml_append(dsdt, sb_scope);
if (pm->pcihp_bridge_en) {
- build_x86_acpi_pci_hotplug(dsdt, pm->pcihp_io_base);
+ build_acpi_pci_hotplug(dsdt, AML_SYSTEM_IO, pm->pcihp_io_base);
}
build_q35_pci0_int(dsdt);
}
@@ -1525,7 +1020,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03")));
/* Expander bridges do not have ACPI PCI Hot-plug enabled */
- aml_append(dev, build_q35_osc_method(true));
+ aml_append(dev, build_pci_host_bridge_osc_method(true));
} else {
aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03")));
}
@@ -1654,19 +1149,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker,
/* reserve PCIHP resources */
if (pm->pcihp_io_len && (pm->pcihp_bridge_en || pm->pcihp_root_en)) {
- dev = aml_device("PHPR");
- aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A06")));
- aml_append(dev,
- aml_name_decl("_UID", aml_string("PCI Hotplug resources")));
- /* device present, functioning, decoding, not shown in UI */
- aml_append(dev, aml_name_decl("_STA", aml_int(0xB)));
- crs = aml_resource_template();
- aml_append(crs,
- aml_io(AML_DECODE16, pm->pcihp_io_base, pm->pcihp_io_base, 1,
- pm->pcihp_io_len)
- );
- aml_append(dev, aml_name_decl("_CRS", crs));
- aml_append(scope, dev);
+ build_append_pcihp_resources(scope,
+ pm->pcihp_io_base, pm->pcihp_io_len);
}
aml_append(dsdt, scope);
@@ -2379,7 +1863,11 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id,
/* IOMMU info */
build_append_int_noprefix(table_data, 0, 2);
/* IOMMU Attributes */
- build_append_int_noprefix(table_data, 0, 4);
+ if (!s->iommu.dma_translation) {
+ build_append_int_noprefix(table_data, (1UL << 0) /* HATDis */, 4);
+ } else {
+ build_append_int_noprefix(table_data, 0, 4);
+ }
/* EFR Register Image */
build_append_int_noprefix(table_data,
amdvi_extended_feature_register(s),
diff --git a/hw/i386/acpi-build.h b/hw/i386/acpi-build.h
index 275ec05..8ba3c33 100644
--- a/hw/i386/acpi-build.h
+++ b/hw/i386/acpi-build.h
@@ -5,10 +5,6 @@
extern const struct AcpiGenericAddress x86_nvdimm_acpi_dsmio;
-/* PCI Hot-plug registers' base. See docs/specs/acpi_pci_hotplug.rst */
-#define ACPI_PCIHP_SEJ_BASE 0x8
-#define ACPI_PCIHP_BNMR_BASE 0x10
-
void acpi_setup(void);
Object *acpi_get_i386_pci_host(void);
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 0775c8f..378e0cb 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -33,6 +33,7 @@
#include "hw/i386/apic-msidef.h"
#include "hw/qdev-properties.h"
#include "kvm/kvm_i386.h"
+#include "qemu/iova-tree.h"
/* used AMD-Vi MMIO registers */
const char *amdvi_mmio_low[] = {
@@ -66,6 +67,15 @@ struct AMDVIAddressSpace {
MemoryRegion iommu_nodma; /* Alias of shared nodma memory region */
MemoryRegion iommu_ir; /* Device's interrupt remapping region */
AddressSpace as; /* device's corresponding address space */
+
+ /* DMA address translation support */
+ IOMMUNotifierFlag notifier_flags;
+ /* entry in list of Address spaces with registered notifiers */
+ QLIST_ENTRY(AMDVIAddressSpace) next;
+ /* Record DMA translation ranges */
+ IOVATree *iova_tree;
+ /* DMA address translation active */
+ bool addr_translation;
};
/* AMDVI cache entry */
@@ -77,12 +87,29 @@ typedef struct AMDVIIOTLBEntry {
uint64_t page_mask; /* physical page size */
} AMDVIIOTLBEntry;
+/*
+ * These 'fault' reasons have an overloaded meaning since they are not only
+ * intended for describing reasons that generate an IO_PAGE_FAULT as per the AMD
+ * IOMMU specification, but are also used to signal internal errors in the
+ * emulation code.
+ */
+typedef enum AMDVIFaultReason {
+ AMDVI_FR_DTE_RTR_ERR = 1, /* Failure to retrieve DTE */
+ AMDVI_FR_DTE_V, /* DTE[V] = 0 */
+ AMDVI_FR_DTE_TV, /* DTE[TV] = 0 */
+ AMDVI_FR_PT_ROOT_INV, /* Page Table Root ptr invalid */
+ AMDVI_FR_PT_ENTRY_INV, /* Failure to read PTE from guest memory */
+} AMDVIFaultReason;
+
uint64_t amdvi_extended_feature_register(AMDVIState *s)
{
uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES;
if (s->xtsup) {
feature |= AMDVI_FEATURE_XT;
}
+ if (!s->iommu.dma_translation) {
+ feature |= AMDVI_HATS_MODE_RESERVED;
+ }
return feature;
}
@@ -123,8 +150,13 @@ static void amdvi_writew(AMDVIState *s, hwaddr addr, uint16_t val)
uint16_t romask = lduw_le_p(&s->romask[addr]);
uint16_t w1cmask = lduw_le_p(&s->w1cmask[addr]);
uint16_t oldval = lduw_le_p(&s->mmior[addr]);
+
+ uint16_t oldval_preserved = oldval & (romask | w1cmask);
+ uint16_t newval_write = val & ~romask;
+ uint16_t newval_w1c_set = val & w1cmask;
+
stw_le_p(&s->mmior[addr],
- ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
+ (oldval_preserved | newval_write) & ~newval_w1c_set);
}
static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val)
@@ -132,23 +164,33 @@ static void amdvi_writel(AMDVIState *s, hwaddr addr, uint32_t val)
uint32_t romask = ldl_le_p(&s->romask[addr]);
uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
uint32_t oldval = ldl_le_p(&s->mmior[addr]);
+
+ uint32_t oldval_preserved = oldval & (romask | w1cmask);
+ uint32_t newval_write = val & ~romask;
+ uint32_t newval_w1c_set = val & w1cmask;
+
stl_le_p(&s->mmior[addr],
- ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
+ (oldval_preserved | newval_write) & ~newval_w1c_set);
}
static void amdvi_writeq(AMDVIState *s, hwaddr addr, uint64_t val)
{
uint64_t romask = ldq_le_p(&s->romask[addr]);
uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
- uint32_t oldval = ldq_le_p(&s->mmior[addr]);
+ uint64_t oldval = ldq_le_p(&s->mmior[addr]);
+
+ uint64_t oldval_preserved = oldval & (romask | w1cmask);
+ uint64_t newval_write = val & ~romask;
+ uint64_t newval_w1c_set = val & w1cmask;
+
stq_le_p(&s->mmior[addr],
- ((oldval & romask) | (val & ~romask)) & ~(val & w1cmask));
+ (oldval_preserved | newval_write) & ~newval_w1c_set);
}
-/* OR a 64-bit register with a 64-bit value */
+/* AND a 64-bit register with a 64-bit value */
static bool amdvi_test_mask(AMDVIState *s, hwaddr addr, uint64_t val)
{
- return amdvi_readq(s, addr) | val;
+ return amdvi_readq(s, addr) & val;
}
/* OR a 64-bit register with a 64-bit value storing result in the register */
@@ -177,19 +219,31 @@ static void amdvi_generate_msi_interrupt(AMDVIState *s)
}
}
+static uint32_t get_next_eventlog_entry(AMDVIState *s)
+{
+ uint32_t evtlog_size = s->evtlog_len * AMDVI_EVENT_LEN;
+ return (s->evtlog_tail + AMDVI_EVENT_LEN) % evtlog_size;
+}
+
static void amdvi_log_event(AMDVIState *s, uint64_t *evt)
{
+ uint32_t evtlog_tail_next;
+
/* event logging not enabled */
if (!s->evtlog_enabled || amdvi_test_mask(s, AMDVI_MMIO_STATUS,
AMDVI_MMIO_STATUS_EVT_OVF)) {
return;
}
+ evtlog_tail_next = get_next_eventlog_entry(s);
+
/* event log buffer full */
- if (s->evtlog_tail >= s->evtlog_len) {
- amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF);
- /* generate interrupt */
- amdvi_generate_msi_interrupt(s);
+ if (evtlog_tail_next == s->evtlog_head) {
+ /* generate overflow interrupt */
+ if (s->evtlog_intr) {
+ amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVT_OVF);
+ amdvi_generate_msi_interrupt(s);
+ }
return;
}
@@ -198,9 +252,13 @@ static void amdvi_log_event(AMDVIState *s, uint64_t *evt)
trace_amdvi_evntlog_fail(s->evtlog, s->evtlog_tail);
}
- s->evtlog_tail += AMDVI_EVENT_LEN;
- amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_COMP_INT);
- amdvi_generate_msi_interrupt(s);
+ s->evtlog_tail = evtlog_tail_next;
+ amdvi_writeq_raw(s, AMDVI_MMIO_EVENT_TAIL, s->evtlog_tail);
+
+ if (s->evtlog_intr) {
+ amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_EVENT_INT);
+ amdvi_generate_msi_interrupt(s);
+ }
}
static void amdvi_setevent_bits(uint64_t *buffer, uint64_t value, int start,
@@ -407,18 +465,704 @@ static void amdvi_completion_wait(AMDVIState *s, uint64_t *cmd)
trace_amdvi_completion_wait(addr, data);
}
+static inline uint64_t amdvi_get_perms(uint64_t entry)
+{
+ return (entry & (AMDVI_DEV_PERM_READ | AMDVI_DEV_PERM_WRITE)) >>
+ AMDVI_DEV_PERM_SHIFT;
+}
+
+/* validate that reserved bits are honoured */
+static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid,
+ uint64_t *dte)
+{
+
+ uint64_t root;
+
+ if ((dte[0] & AMDVI_DTE_QUAD0_RESERVED) ||
+ (dte[1] & AMDVI_DTE_QUAD1_RESERVED) ||
+ (dte[2] & AMDVI_DTE_QUAD2_RESERVED) ||
+ (dte[3] & AMDVI_DTE_QUAD3_RESERVED)) {
+ amdvi_log_illegaldevtab_error(s, devid,
+ s->devtab +
+ devid * AMDVI_DEVTAB_ENTRY_SIZE, 0);
+ return false;
+ }
+
+ /*
+ * 1 = Host Address Translation is not supported. Value in MMIO Offset
+ * 0030h[HATS] is not meaningful. A non-zero host page table root pointer
+ * in the DTE would result in an ILLEGAL_DEV_TABLE_ENTRY event.
+ */
+ root = (dte[0] & AMDVI_DEV_PT_ROOT_MASK) >> 12;
+ if (root && !s->iommu.dma_translation) {
+ amdvi_log_illegaldevtab_error(s, devid,
+ s->devtab +
+ devid * AMDVI_DEVTAB_ENTRY_SIZE, 0);
+ return false;
+ }
+
+ return true;
+}
+
+/* get a device table entry given the devid */
+static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry)
+{
+ uint32_t offset = devid * AMDVI_DEVTAB_ENTRY_SIZE;
+
+ if (dma_memory_read(&address_space_memory, s->devtab + offset, entry,
+ AMDVI_DEVTAB_ENTRY_SIZE, MEMTXATTRS_UNSPECIFIED)) {
+ trace_amdvi_dte_get_fail(s->devtab, offset);
+ /* log error accessing dte */
+ amdvi_log_devtab_error(s, devid, s->devtab + offset, 0);
+ return false;
+ }
+
+ *entry = le64_to_cpu(*entry);
+ if (!amdvi_validate_dte(s, devid, entry)) {
+ trace_amdvi_invalid_dte(entry[0]);
+ return false;
+ }
+
+ return true;
+}
+
+/* get pte translation mode */
+static inline uint8_t get_pte_translation_mode(uint64_t pte)
+{
+ return (pte >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK;
+}
+
+static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr,
+ uint16_t devid)
+{
+ uint64_t pte;
+
+ if (dma_memory_read(&address_space_memory, pte_addr,
+ &pte, sizeof(pte), MEMTXATTRS_UNSPECIFIED)) {
+ trace_amdvi_get_pte_hwerror(pte_addr);
+ amdvi_log_pagetab_error(s, devid, pte_addr, 0);
+ pte = (uint64_t)-1;
+ return pte;
+ }
+
+ pte = le64_to_cpu(pte);
+ return pte;
+}
+
+static int amdvi_as_to_dte(AMDVIAddressSpace *as, uint64_t *dte)
+{
+ uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn);
+ AMDVIState *s = as->iommu_state;
+
+ if (!amdvi_get_dte(s, devid, dte)) {
+ /* Unable to retrieve DTE for devid */
+ return -AMDVI_FR_DTE_RTR_ERR;
+ }
+
+ if (!(dte[0] & AMDVI_DEV_VALID)) {
+ /* DTE[V] not set, address is passed untranslated for devid */
+ return -AMDVI_FR_DTE_V;
+ }
+
+ if (!(dte[0] & AMDVI_DEV_TRANSLATION_VALID)) {
+ /* DTE[TV] not set, host page table not valid for devid */
+ return -AMDVI_FR_DTE_TV;
+ }
+ return 0;
+}
+
+/*
+ * For a PTE encoding a large page, return the page size it encodes as described
+ * by the AMD IOMMU Specification Table 14: Example Page Size Encodings.
+ * No need to adjust the value of the PTE to point to the first PTE in the large
+ * page since the encoding guarantees all "base" PTEs in the large page are the
+ * same.
+ */
+static uint64_t large_pte_page_size(uint64_t pte)
+{
+ assert(PTE_NEXT_LEVEL(pte) == 7);
+
+ /* Determine size of the large/contiguous page encoded in the PTE */
+ return PTE_LARGE_PAGE_SIZE(pte);
+}
+
+/*
+ * Helper function to fetch a PTE using AMD v1 pgtable format.
+ * On successful page walk, returns 0 and pte parameter points to a valid PTE.
+ * On failure, returns:
+ * -AMDVI_FR_PT_ROOT_INV: A page walk is not possible due to conditions like DTE
+ * with invalid permissions, Page Table Root can not be read from DTE, or a
+ * larger IOVA than supported by page table level encoded in DTE[Mode].
+ * -AMDVI_FR_PT_ENTRY_INV: A PTE could not be read from guest memory during a
+ * page table walk. This means that the DTE has valid data, but one of the
+ * lower level entries in the Page Table could not be read.
+ */
+static uint64_t fetch_pte(AMDVIAddressSpace *as, hwaddr address, uint64_t dte,
+ uint64_t *pte, hwaddr *page_size)
+{
+ IOMMUAccessFlags perms = amdvi_get_perms(dte);
+
+ uint8_t level, mode;
+ uint64_t pte_addr;
+
+ *pte = dte;
+ *page_size = 0;
+
+ if (perms == IOMMU_NONE) {
+ return -AMDVI_FR_PT_ROOT_INV;
+ }
+
+ /*
+ * The Linux kernel driver initializes the default mode to 3, corresponding
+ * to a 39-bit GPA space, where each entry in the pagetable translates to a
+ * 1GB (2^30) page size.
+ */
+ level = mode = get_pte_translation_mode(dte);
+ assert(mode > 0 && mode < 7);
+
+ /*
+ * If IOVA is larger than the max supported by the current pgtable level,
+ * there is nothing to do.
+ */
+ if (address > PT_LEVEL_MAX_ADDR(mode - 1)) {
+ /* IOVA too large for the current DTE */
+ return -AMDVI_FR_PT_ROOT_INV;
+ }
+
+ do {
+ level -= 1;
+
+ /* Update the page_size */
+ *page_size = PTE_LEVEL_PAGE_SIZE(level);
+
+ /* Permission bits are ANDed at every level, including the DTE */
+ perms &= amdvi_get_perms(*pte);
+ if (perms == IOMMU_NONE) {
+ return 0;
+ }
+
+ /* Not Present */
+ if (!IOMMU_PTE_PRESENT(*pte)) {
+ return 0;
+ }
+
+ /* Large or Leaf PTE found */
+ if (PTE_NEXT_LEVEL(*pte) == 7 || PTE_NEXT_LEVEL(*pte) == 0) {
+ /* Leaf PTE found */
+ break;
+ }
+
+ /*
+ * Index the pgtable using the IOVA bits corresponding to current level
+ * and walk down to the lower level.
+ */
+ pte_addr = NEXT_PTE_ADDR(*pte, level, address);
+ *pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn);
+
+ if (*pte == (uint64_t)-1) {
+ /*
+ * A returned PTE of -1 indicates a failure to read the page table
+ * entry from guest memory.
+ */
+ if (level == mode - 1) {
+ /* Failure to retrieve the Page Table from Root Pointer */
+ *page_size = 0;
+ return -AMDVI_FR_PT_ROOT_INV;
+ } else {
+ /* Failure to read PTE. Page walk skips a page_size chunk */
+ return -AMDVI_FR_PT_ENTRY_INV;
+ }
+ }
+ } while (level > 0);
+
+ assert(PTE_NEXT_LEVEL(*pte) == 0 || PTE_NEXT_LEVEL(*pte) == 7 ||
+ level == 0);
+ /*
+ * Page walk ends when Next Level field on PTE shows that either a leaf PTE
+ * or a series of large PTEs have been reached. In the latter case, even if
+ * the range starts in the middle of a contiguous page, the returned PTE
+ * must be the first PTE of the series.
+ */
+ if (PTE_NEXT_LEVEL(*pte) == 7) {
+ /* Update page_size with the large PTE page size */
+ *page_size = large_pte_page_size(*pte);
+ }
+
+ return 0;
+}
+
+/*
+ * Invoke notifiers registered for the address space. Update record of mapped
+ * ranges in IOVA Tree.
+ */
+static void amdvi_notify_iommu(AMDVIAddressSpace *as, IOMMUTLBEvent *event)
+{
+ IOMMUTLBEntry *entry = &event->entry;
+
+ DMAMap target = {
+ .iova = entry->iova,
+ .size = entry->addr_mask,
+ .translated_addr = entry->translated_addr,
+ .perm = entry->perm,
+ };
+
+ /*
+ * Search the IOVA Tree for an existing translation for the target, and skip
+ * the notification if the mapping is already recorded.
+ * When the guest uses large pages, comparing against the record makes it
+ * possible to determine the size of the original MAP and adjust the UNMAP
+ * request to match it. This avoids failed checks against the mappings kept
+ * by the VFIO kernel driver.
+ */
+ const DMAMap *mapped = iova_tree_find(as->iova_tree, &target);
+
+ if (event->type == IOMMU_NOTIFIER_UNMAP) {
+ if (!mapped) {
+ /* No record exists of this mapping, nothing to do */
+ return;
+ }
+ /*
+ * Adjust the size based on the original record. This is essential to
+ * determine when large/contiguous pages are used, since the guest has
+ * already cleared the PTE (erasing the pagesize encoded on it) before
+ * issuing the invalidation command.
+ */
+ if (mapped->size != target.size) {
+ assert(mapped->size > target.size);
+ target.size = mapped->size;
+ /* Adjust event to invoke notifier with correct range */
+ entry->addr_mask = mapped->size;
+ }
+ iova_tree_remove(as->iova_tree, target);
+ } else { /* IOMMU_NOTIFIER_MAP */
+ if (mapped) {
+ /*
+ * If a mapping is present and matches the request, skip the
+ * notification.
+ */
+ if (!memcmp(mapped, &target, sizeof(DMAMap))) {
+ return;
+ } else {
+ /*
+ * This should never happen unless a buggy guest OS omits or
+ * sends incorrect invalidation(s). Report an error in the event
+ * it does happen.
+ */
+ error_report("Found conflicting translation. This could be due "
+ "to an incorrect or missing invalidation command");
+ }
+ }
+ /* Record the new mapping */
+ iova_tree_insert(as->iova_tree, &target);
+ }
+
+ /* Invoke the notifiers registered for this address space */
+ memory_region_notify_iommu(&as->iommu, 0, *event);
+}
+
+/*
+ * Walk the guest page table for an IOVA and range and signal the registered
+ * notifiers to sync the shadow page tables in the host.
+ * Must be called with a valid DTE for DMA remapping i.e. V=1,TV=1
+ */
+static void amdvi_sync_shadow_page_table_range(AMDVIAddressSpace *as,
+ uint64_t *dte, hwaddr addr,
+ uint64_t size, bool send_unmap)
+{
+ IOMMUTLBEvent event;
+
+ hwaddr page_mask, pagesize;
+ hwaddr iova = addr;
+ hwaddr end = iova + size - 1;
+
+ uint64_t pte;
+ int ret;
+
+ while (iova < end) {
+
+ ret = fetch_pte(as, iova, dte[0], &pte, &pagesize);
+
+ if (ret == -AMDVI_FR_PT_ROOT_INV) {
+ /*
+ * Invalid conditions such as the IOVA being larger than supported
+ * by current page table mode as configured in the DTE, or a failure
+ * to fetch the Page Table from the Page Table Root Pointer in DTE.
+ */
+ assert(pagesize == 0);
+ return;
+ }
+ /* PTE has been validated for major errors and pagesize is set */
+ assert(pagesize);
+ page_mask = ~(pagesize - 1);
+
+ if (ret == -AMDVI_FR_PT_ENTRY_INV) {
+ /*
+ * Failure to read PTE from memory, the pagesize matches the current
+ * level. Unable to determine the region type, so a safe strategy is
+ * to skip the range and continue the page walk.
+ */
+ goto next;
+ }
+
+ event.entry.target_as = &address_space_memory;
+ event.entry.iova = iova & page_mask;
+ /* translated_addr is irrelevant for the unmap case */
+ event.entry.translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) &
+ page_mask;
+ event.entry.addr_mask = ~page_mask;
+ event.entry.perm = amdvi_get_perms(pte);
+
+ /*
+ * In cases where the leaf PTE is not found, or it has invalid
+ * permissions, an UNMAP type notification is sent, but only if the
+ * caller requested it.
+ */
+ if (!IOMMU_PTE_PRESENT(pte) || (event.entry.perm == IOMMU_NONE)) {
+ if (!send_unmap) {
+ goto next;
+ }
+ event.type = IOMMU_NOTIFIER_UNMAP;
+ } else {
+ event.type = IOMMU_NOTIFIER_MAP;
+ }
+
+ /*
+ * The following call might need to adjust event.entry.size in cases
+ * where the guest unmapped a series of large pages.
+ */
+ amdvi_notify_iommu(as, &event);
+ /*
+ * In the special scenario where the guest is unmapping a large page,
+ * addr_mask has been adjusted before sending the notification. Update
+ * pagesize accordingly in order to correctly compute the next IOVA.
+ */
+ pagesize = event.entry.addr_mask + 1;
+
+next:
+ iova &= ~(pagesize - 1);
+
+ /* Check for 64-bit overflow and terminate walk in such cases */
+ if ((iova + pagesize) < iova) {
+ break;
+ } else {
+ iova += pagesize;
+ }
+ }
+}
+
+/*
+ * Unmap entire range that the notifier registered for i.e. the full AS.
+ *
+ * This is seemingly technically equivalent to directly calling
+ * memory_region_unmap_iommu_notifier_range(), but it allows to check for
+ * notifier boundaries and issue notifications with ranges within those bounds.
+ */
+static void amdvi_address_space_unmap(AMDVIAddressSpace *as, IOMMUNotifier *n)
+{
+
+ hwaddr start = n->start;
+ hwaddr end = n->end;
+ hwaddr remain;
+ DMAMap map;
+
+ assert(start <= end);
+ remain = end - start + 1;
+
+ /*
+ * Divide the notifier range into chunks that are aligned and do not exceed
+ * the notifier boundaries.
+ */
+ while (remain >= AMDVI_PAGE_SIZE) {
+
+ IOMMUTLBEvent event;
+
+ uint64_t mask = dma_aligned_pow2_mask(start, end, 64);
+
+ event.type = IOMMU_NOTIFIER_UNMAP;
+
+ IOMMUTLBEntry entry = {
+ .target_as = &address_space_memory,
+ .iova = start,
+ .translated_addr = 0, /* irrelevant for unmap case */
+ .addr_mask = mask,
+ .perm = IOMMU_NONE,
+ };
+ event.entry = entry;
+
+ /* Call notifier registered for updates on this address space */
+ memory_region_notify_iommu_one(n, &event);
+
+ start += mask + 1;
+ remain -= mask + 1;
+ }
+
+ assert(!remain);
+
+ map.iova = n->start;
+ map.size = n->end - n->start;
+
+ iova_tree_remove(as->iova_tree, map);
+}
+
+/*
+ * For all the address spaces with notifiers registered, unmap the entire range
+ * the notifier registered for i.e. clear all the address spaces managed by the
+ * IOMMU.
+ */
+static void amdvi_address_space_unmap_all(AMDVIState *s)
+{
+ AMDVIAddressSpace *as;
+ IOMMUNotifier *n;
+
+ QLIST_FOREACH(as, &s->amdvi_as_with_notifiers, next) {
+ IOMMU_NOTIFIER_FOREACH(n, &as->iommu) {
+ amdvi_address_space_unmap(as, n);
+ }
+ }
+}
+
+/*
+ * For every translation present in the IOMMU, construct IOMMUTLBEntry data
+ * and pass it as parameter to notifier callback.
+ */
+static void amdvi_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
+{
+ AMDVIAddressSpace *as = container_of(iommu_mr, AMDVIAddressSpace, iommu);
+ uint64_t dte[4] = { 0 };
+
+ if (!(n->notifier_flags & IOMMU_NOTIFIER_MAP)) {
+ return;
+ }
+
+ if (amdvi_as_to_dte(as, dte)) {
+ return;
+ }
+
+ /* Dropping all mappings for the address space. Also clears the IOVA tree */
+ amdvi_address_space_unmap(as, n);
+
+ amdvi_sync_shadow_page_table_range(as, &dte[0], 0, UINT64_MAX, false);
+}
+
+static void amdvi_address_space_sync(AMDVIAddressSpace *as)
+{
+ IOMMUNotifier *n;
+ uint64_t dte[4] = { 0 };
+
+ /* If only UNMAP notifiers are registered, drop all existing mappings */
+ if (!(as->notifier_flags & IOMMU_NOTIFIER_MAP)) {
+ IOMMU_NOTIFIER_FOREACH(n, &as->iommu) {
+ /*
+ * Directly calling memory_region_unmap_iommu_notifier_range() does
+ * not guarantee that the addr_mask eventually passed as parameter
+ * to the notifier is valid. Use amdvi_address_space_unmap() which
+ * ensures the notifier range is divided into properly aligned
+ * regions, and issues notifications for each one.
+ */
+ amdvi_address_space_unmap(as, n);
+ }
+ return;
+ }
+
+ if (amdvi_as_to_dte(as, dte)) {
+ return;
+ }
+
+ amdvi_sync_shadow_page_table_range(as, &dte[0], 0, UINT64_MAX, true);
+}
+
+/*
+ * This differs from the replay() method in that it issues both MAP and UNMAP
+ * notifications since it is called after global invalidation events in order to
+ * re-sync all address spaces.
+ */
+static void amdvi_iommu_address_space_sync_all(AMDVIState *s)
+{
+ AMDVIAddressSpace *as;
+
+ QLIST_FOREACH(as, &s->amdvi_as_with_notifiers, next) {
+ amdvi_address_space_sync(as);
+ }
+}
+
+/*
+ * Toggle between address translation and passthrough modes by enabling the
+ * corresponding memory regions.
+ */
+static void amdvi_switch_address_space(AMDVIAddressSpace *amdvi_as)
+{
+ AMDVIState *s = amdvi_as->iommu_state;
+
+ if (s->dma_remap && amdvi_as->addr_translation) {
+ /* Enabling DMA region */
+ memory_region_set_enabled(&amdvi_as->iommu_nodma, false);
+ memory_region_set_enabled(MEMORY_REGION(&amdvi_as->iommu), true);
+ } else {
+ /* Disabling DMA region, using passthrough */
+ memory_region_set_enabled(MEMORY_REGION(&amdvi_as->iommu), false);
+ memory_region_set_enabled(&amdvi_as->iommu_nodma, true);
+ }
+}
+
+/*
+ * For all existing address spaces managed by the IOMMU, enable/disable the
+ * corresponding memory regions to reset the address translation mode and
+ * use passthrough by default.
+ */
+static void amdvi_reset_address_translation_all(AMDVIState *s)
+{
+ AMDVIAddressSpace **iommu_as;
+
+ for (int bus_num = 0; bus_num < PCI_BUS_MAX; bus_num++) {
+
+ /* Nothing to do if there are no devices on the current bus */
+ if (!s->address_spaces[bus_num]) {
+ continue;
+ }
+ iommu_as = s->address_spaces[bus_num];
+
+ for (int devfn = 0; devfn < PCI_DEVFN_MAX; devfn++) {
+
+ if (!iommu_as[devfn]) {
+ continue;
+ }
+ /* Use passthrough as default mode after reset */
+ iommu_as[devfn]->addr_translation = false;
+ amdvi_switch_address_space(iommu_as[devfn]);
+ }
+ }
+}
+
+static void enable_dma_mode(AMDVIAddressSpace *as, bool inval_current)
+{
+ /*
+ * When enabling DMA mode for the purpose of isolating guest devices on
+ * a failure to retrieve or invalid DTE, all existing mappings must be
+ * dropped.
+ */
+ if (inval_current) {
+ IOMMUNotifier *n;
+ IOMMU_NOTIFIER_FOREACH(n, &as->iommu) {
+ amdvi_address_space_unmap(as, n);
+ }
+ }
+
+ if (as->addr_translation) {
+ return;
+ }
+
+ /* Installing DTE enabling translation, activate region */
+ as->addr_translation = true;
+ amdvi_switch_address_space(as);
+ /* Sync shadow page tables */
+ amdvi_address_space_sync(as);
+}
+
+/*
+ * If paging was previously in use in the address space
+ * - invalidate all existing mappings
+ * - switch to no_dma memory region
+ */
+static void enable_nodma_mode(AMDVIAddressSpace *as)
+{
+ IOMMUNotifier *n;
+
+ if (!as->addr_translation) {
+ /* passthrough is already active, nothing to do */
+ return;
+ }
+
+ as->addr_translation = false;
+ IOMMU_NOTIFIER_FOREACH(n, &as->iommu) {
+ /* Drop all mappings for the address space */
+ amdvi_address_space_unmap(as, n);
+ }
+ amdvi_switch_address_space(as);
+}
+
+/*
+ * A guest driver must issue the INVALIDATE_DEVTAB_ENTRY command to the IOMMU
+ * after changing a Device Table entry. We can use this fact to detect when a
+ * Device Table entry is created for a device attached to a paging domain and
+ * enable the corresponding IOMMU memory region to allow for DMA translation if
+ * appropriate.
+ */
+static void amdvi_update_addr_translation_mode(AMDVIState *s, uint16_t devid)
+{
+ uint8_t bus_num, devfn, dte_mode;
+ AMDVIAddressSpace *as;
+ uint64_t dte[4] = { 0 };
+ int ret;
+
+ /*
+ * Convert the devid encoded in the command to a bus and devfn in
+ * order to retrieve the corresponding address space.
+ */
+ bus_num = PCI_BUS_NUM(devid);
+ devfn = devid & 0xff;
+
+ /*
+ * The main buffer of size (AMDVIAddressSpace *) * (PCI_BUS_MAX) has already
+ * been allocated within AMDVIState, but must be careful to not access
+ * unallocated devfn.
+ */
+ if (!s->address_spaces[bus_num] || !s->address_spaces[bus_num][devfn]) {
+ return;
+ }
+ as = s->address_spaces[bus_num][devfn];
+
+ ret = amdvi_as_to_dte(as, dte);
+
+ if (!ret) {
+ dte_mode = (dte[0] >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK;
+ }
+
+ switch (ret) {
+ case 0:
+ /* DTE was successfully retrieved */
+ if (!dte_mode) {
+ enable_nodma_mode(as); /* DTE[V]=1 && DTE[Mode]=0 => passthrough */
+ } else {
+ enable_dma_mode(as, false); /* Enable DMA translation */
+ }
+ break;
+ case -AMDVI_FR_DTE_V:
+ /* DTE[V]=0, address is passed untranslated */
+ enable_nodma_mode(as);
+ break;
+ case -AMDVI_FR_DTE_RTR_ERR:
+ case -AMDVI_FR_DTE_TV:
+ /*
+ * Enforce isolation by using DMA in rare scenarios where the DTE cannot
+ * be retrieved or DTE[TV]=0. Existing mappings are dropped.
+ */
+ enable_dma_mode(as, true);
+ break;
+ }
+}
+
/* log error without aborting since linux seems to be using reserved bits */
static void amdvi_inval_devtab_entry(AMDVIState *s, uint64_t *cmd)
{
uint16_t devid = cpu_to_le16((uint16_t)extract64(cmd[0], 0, 16));
+ trace_amdvi_devtab_inval(PCI_BUS_NUM(devid), PCI_SLOT(devid),
+ PCI_FUNC(devid));
+
/* This command should invalidate internal caches of which there isn't */
if (extract64(cmd[0], 16, 44) || cmd[1]) {
amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
s->cmdbuf + s->cmdbuf_head);
+ return;
+ }
+
+ /*
+ * When DMA remapping capability is enabled, check if updated DTE is setup
+ * for paging or not, and configure the corresponding memory regions.
+ */
+ if (s->dma_remap) {
+ amdvi_update_addr_translation_mode(s, devid);
}
- trace_amdvi_devtab_inval(PCI_BUS_NUM(devid), PCI_SLOT(devid),
- PCI_FUNC(devid));
}
static void amdvi_complete_ppr(AMDVIState *s, uint64_t *cmd)
@@ -449,6 +1193,13 @@ static void amdvi_inval_all(AMDVIState *s, uint64_t *cmd)
amdvi_intremap_inval_notify_all(s, true, 0, 0);
amdvi_iotlb_reset(s);
+
+ /*
+ * Fully replay the address space i.e. send both UNMAP and MAP events in
+ * order to synchronize guest and host IO page tables tables.
+ */
+ amdvi_iommu_address_space_sync_all(s);
+
trace_amdvi_all_inval();
}
@@ -460,10 +1211,109 @@ static gboolean amdvi_iotlb_remove_by_domid(gpointer key, gpointer value,
return entry->domid == domid;
}
+/*
+ * Helper to decode the size of the range to invalidate encoded in the
+ * INVALIDATE_IOMMU_PAGES Command format.
+ * The size of the region to invalidate depends on the S bit and address.
+ * S bit value:
+ * 0 : Invalidation size is 4 Kbytes.
+ * 1 : Invalidation size is determined by first zero bit in the address
+ * starting from Address[12].
+ *
+ * In the AMD IOMMU Linux driver, an invalidation command with address
+ * ((1 << 63) - 1) is sent when intending to clear the entire cache.
+ * However, Table 14: Example Page Size Encodings shows that an address of
+ * ((1ULL << 51) - 1) encodes the entire cache, so effectively any address with
+ * first zero at bit 51 or larger is a request to invalidate the entire address
+ * space.
+ */
+static uint64_t amdvi_decode_invalidation_size(hwaddr addr, uint16_t flags)
+{
+ uint64_t size = AMDVI_PAGE_SIZE;
+ uint8_t fzbit = 0;
+
+ if (flags & AMDVI_CMD_INVAL_IOMMU_PAGES_S) {
+ fzbit = cto64(addr | 0xFFF);
+
+ if (fzbit >= 51) {
+ size = AMDVI_INV_ALL_PAGES;
+ } else {
+ size = 1ULL << (fzbit + 1);
+ }
+ }
+ return size;
+}
+
+/*
+ * Synchronize the guest page tables with the shadow page tables kept in the
+ * host for the specified range.
+ * The invalidation command issued by the guest and intercepted by the VMM
+ * does not specify a device, but a domain, since all devices in the same domain
+ * share the same page tables. However, vIOMMU emulation creates separate
+ * address spaces per device, so it is necessary to traverse the list of all of
+ * address spaces (i.e. devices) that have notifiers registered in order to
+ * propagate the changes to the host page tables.
+ * We cannot return early from this function once a matching domain has been
+ * identified and its page tables synced (based on the fact that all devices in
+ * the same domain share the page tables). The reason is that different devices
+ * (i.e. address spaces) could have different notifiers registered, and by
+ * skipping address spaces that appear later on the amdvi_as_with_notifiers list
+ * their notifiers (which could differ from the ones registered for the first
+ * device/address space) would not be invoked.
+ */
+static void amdvi_sync_domain(AMDVIState *s, uint16_t domid, uint64_t addr,
+ uint16_t flags)
+{
+ AMDVIAddressSpace *as;
+
+ uint64_t size = amdvi_decode_invalidation_size(addr, flags);
+
+ if (size == AMDVI_INV_ALL_PAGES) {
+ addr = 0; /* Set start address to 0 and invalidate entire AS */
+ } else {
+ addr &= ~(size - 1);
+ }
+
+ /*
+ * Call notifiers that have registered for each address space matching the
+ * domain ID, in order to sync the guest pagetable state with the host.
+ */
+ QLIST_FOREACH(as, &s->amdvi_as_with_notifiers, next) {
+
+ uint64_t dte[4] = { 0 };
+
+ /*
+ * Retrieve the Device Table entry for the devid corresponding to the
+ * current address space, and verify the DomainID matches i.e. the page
+ * tables to be synced belong to devices in the domain.
+ */
+ if (amdvi_as_to_dte(as, dte)) {
+ continue;
+ }
+
+ /* Only need to sync the Page Tables for a matching domain */
+ if (domid != (dte[1] & AMDVI_DEV_DOMID_ID_MASK)) {
+ continue;
+ }
+
+ /*
+ * We have determined that there is a valid Device Table Entry for a
+ * device matching the DomainID in the INV_IOMMU_PAGES command issued by
+ * the guest. Walk the guest page table to sync shadow page table.
+ */
+ if (as->notifier_flags & IOMMU_NOTIFIER_MAP) {
+ /* Sync guest IOMMU mappings with host */
+ amdvi_sync_shadow_page_table_range(as, &dte[0], addr, size, true);
+ }
+ }
+}
+
/* we don't have devid - we can't remove pages by address */
static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd)
{
uint16_t domid = cpu_to_le16((uint16_t)extract64(cmd[0], 32, 16));
+ uint64_t addr = cpu_to_le64(extract64(cmd[1], 12, 52)) << 12;
+ uint16_t flags = cpu_to_le16((uint16_t)extract64(cmd[1], 0, 3));
if (extract64(cmd[0], 20, 12) || extract64(cmd[0], 48, 12) ||
extract64(cmd[1], 3, 9)) {
@@ -473,6 +1323,8 @@ static void amdvi_inval_pages(AMDVIState *s, uint64_t *cmd)
g_hash_table_foreach_remove(s->iotlb, amdvi_iotlb_remove_by_domid,
&domid);
+
+ amdvi_sync_domain(s, domid, addr, flags);
trace_amdvi_pages_inval(domid);
}
@@ -508,7 +1360,7 @@ static void amdvi_inval_inttable(AMDVIState *s, uint64_t *cmd)
static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd)
{
- uint16_t devid = extract64(cmd[0], 0, 16);
+ uint16_t devid = cpu_to_le16(extract64(cmd[0], 0, 16));
if (extract64(cmd[1], 1, 1) || extract64(cmd[1], 3, 1) ||
extract64(cmd[1], 6, 6)) {
amdvi_log_illegalcom_error(s, extract64(cmd[0], 60, 4),
@@ -521,7 +1373,7 @@ static void iommu_inval_iotlb(AMDVIState *s, uint64_t *cmd)
&devid);
} else {
amdvi_iotlb_remove_page(s, cpu_to_le64(extract64(cmd[1], 12, 52)) << 12,
- cpu_to_le16(extract64(cmd[1], 0, 16)));
+ devid);
}
trace_amdvi_iotlb_inval();
}
@@ -592,18 +1444,31 @@ static void amdvi_cmdbuf_run(AMDVIState *s)
}
}
-static void amdvi_mmio_trace(hwaddr addr, unsigned size)
+static inline uint8_t amdvi_mmio_get_index(hwaddr addr)
{
uint8_t index = (addr & ~0x2000) / 8;
if ((addr & 0x2000)) {
/* high table */
index = index >= AMDVI_MMIO_REGS_HIGH ? AMDVI_MMIO_REGS_HIGH : index;
- trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
} else {
index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index;
- trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07);
}
+
+ return index;
+}
+
+static void amdvi_mmio_trace_read(hwaddr addr, unsigned size)
+{
+ uint8_t index = amdvi_mmio_get_index(addr);
+ trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07);
+}
+
+static void amdvi_mmio_trace_write(hwaddr addr, unsigned size, uint64_t val)
+{
+ uint8_t index = amdvi_mmio_get_index(addr);
+ trace_amdvi_mmio_write(amdvi_mmio_low[index], addr, size, val,
+ addr & ~0x07);
}
static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size)
@@ -623,7 +1488,7 @@ static uint64_t amdvi_mmio_read(void *opaque, hwaddr addr, unsigned size)
} else if (size == 8) {
val = amdvi_readq(s, addr);
}
- amdvi_mmio_trace(addr, size);
+ amdvi_mmio_trace_read(addr, size);
return val;
}
@@ -633,7 +1498,6 @@ static void amdvi_handle_control_write(AMDVIState *s)
unsigned long control = amdvi_readq(s, AMDVI_MMIO_CONTROL);
s->enabled = !!(control & AMDVI_MMIO_CONTROL_AMDVIEN);
- s->ats_enabled = !!(control & AMDVI_MMIO_CONTROL_HTTUNEN);
s->evtlog_enabled = s->enabled && !!(control &
AMDVI_MMIO_CONTROL_EVENTLOGEN);
@@ -665,8 +1529,8 @@ static inline void amdvi_handle_devtab_write(AMDVIState *s)
uint64_t val = amdvi_readq(s, AMDVI_MMIO_DEVICE_TABLE);
s->devtab = (val & AMDVI_MMIO_DEVTAB_BASE_MASK);
- /* set device table length */
- s->devtab_len = ((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1 *
+ /* set device table length (i.e. number of entries table can hold) */
+ s->devtab_len = (((val & AMDVI_MMIO_DEVTAB_SIZE_MASK) + 1) *
(AMDVI_MMIO_DEVTAB_SIZE_UNIT /
AMDVI_MMIO_DEVTAB_ENTRY_SIZE));
}
@@ -704,9 +1568,19 @@ static inline void amdvi_handle_excllim_write(AMDVIState *s)
static inline void amdvi_handle_evtbase_write(AMDVIState *s)
{
uint64_t val = amdvi_readq(s, AMDVI_MMIO_EVENT_BASE);
+
+ if (amdvi_readq(s, AMDVI_MMIO_STATUS) & AMDVI_MMIO_STATUS_EVENT_INT)
+ /* Do not reset if eventlog interrupt bit is set*/
+ return;
+
s->evtlog = val & AMDVI_MMIO_EVTLOG_BASE_MASK;
s->evtlog_len = 1UL << (amdvi_readq(s, AMDVI_MMIO_EVTLOG_SIZE_BYTE)
& AMDVI_MMIO_EVTLOG_SIZE_MASK);
+
+ /* clear tail and head pointer to 0 when event base is updated */
+ s->evtlog_tail = s->evtlog_head = 0;
+ amdvi_writeq_raw(s, AMDVI_MMIO_EVENT_HEAD, s->evtlog_head);
+ amdvi_writeq_raw(s, AMDVI_MMIO_EVENT_TAIL, s->evtlog_tail);
}
static inline void amdvi_handle_evttail_write(AMDVIState *s)
@@ -770,7 +1644,7 @@ static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val,
return;
}
- amdvi_mmio_trace(addr, size);
+ amdvi_mmio_trace_write(addr, size, val);
switch (addr & ~0x07) {
case AMDVI_MMIO_CONTROL:
amdvi_mmio_reg_write(s, size, val, addr);
@@ -835,152 +1709,74 @@ static void amdvi_mmio_write(void *opaque, hwaddr addr, uint64_t val,
amdvi_mmio_reg_write(s, size, val, addr);
amdvi_handle_pprtail_write(s);
break;
+ case AMDVI_MMIO_STATUS:
+ amdvi_mmio_reg_write(s, size, val, addr);
+ break;
}
}
-static inline uint64_t amdvi_get_perms(uint64_t entry)
-{
- return (entry & (AMDVI_DEV_PERM_READ | AMDVI_DEV_PERM_WRITE)) >>
- AMDVI_DEV_PERM_SHIFT;
-}
-
-/* validate that reserved bits are honoured */
-static bool amdvi_validate_dte(AMDVIState *s, uint16_t devid,
- uint64_t *dte)
-{
- if ((dte[0] & AMDVI_DTE_LOWER_QUAD_RESERVED)
- || (dte[1] & AMDVI_DTE_MIDDLE_QUAD_RESERVED)
- || (dte[2] & AMDVI_DTE_UPPER_QUAD_RESERVED) || dte[3]) {
- amdvi_log_illegaldevtab_error(s, devid,
- s->devtab +
- devid * AMDVI_DEVTAB_ENTRY_SIZE, 0);
- return false;
- }
-
- return true;
-}
-
-/* get a device table entry given the devid */
-static bool amdvi_get_dte(AMDVIState *s, int devid, uint64_t *entry)
+static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte,
+ IOMMUTLBEntry *ret, unsigned perms,
+ hwaddr addr)
{
- uint32_t offset = devid * AMDVI_DEVTAB_ENTRY_SIZE;
+ hwaddr page_mask, pagesize = 0;
+ uint8_t mode;
+ uint64_t pte;
+ int fetch_ret;
- if (dma_memory_read(&address_space_memory, s->devtab + offset, entry,
- AMDVI_DEVTAB_ENTRY_SIZE, MEMTXATTRS_UNSPECIFIED)) {
- trace_amdvi_dte_get_fail(s->devtab, offset);
- /* log error accessing dte */
- amdvi_log_devtab_error(s, devid, s->devtab + offset, 0);
- return false;
+ /* make sure the DTE has TV = 1 */
+ if (!(dte[0] & AMDVI_DEV_TRANSLATION_VALID)) {
+ /*
+ * A DTE with V=1, TV=0 does not have a valid Page Table Root Pointer.
+ * An IOMMU processing a request that requires a table walk terminates
+ * the walk when it encounters this condition. Do the same and return
+ * instead of assuming that the address is forwarded without translation
+ * i.e. the passthrough case, as it is done for the case where DTE[V]=0.
+ */
+ return;
}
- *entry = le64_to_cpu(*entry);
- if (!amdvi_validate_dte(s, devid, entry)) {
- trace_amdvi_invalid_dte(entry[0]);
- return false;
+ mode = get_pte_translation_mode(dte[0]);
+ if (mode >= 7) {
+ trace_amdvi_mode_invalid(mode, addr);
+ return;
}
-
- return true;
-}
-
-/* get pte translation mode */
-static inline uint8_t get_pte_translation_mode(uint64_t pte)
-{
- return (pte >> AMDVI_DEV_MODE_RSHIFT) & AMDVI_DEV_MODE_MASK;
-}
-
-static inline uint64_t pte_override_page_mask(uint64_t pte)
-{
- uint8_t page_mask = 13;
- uint64_t addr = (pte & AMDVI_DEV_PT_ROOT_MASK) >> 12;
- /* find the first zero bit */
- while (addr & 1) {
- page_mask++;
- addr = addr >> 1;
+ if (mode == 0) {
+ goto no_remap;
}
- return ~((1ULL << page_mask) - 1);
-}
-
-static inline uint64_t pte_get_page_mask(uint64_t oldlevel)
-{
- return ~((1UL << ((oldlevel * 9) + 3)) - 1);
-}
+ /* Attempt to fetch the PTE to determine if a valid mapping exists */
+ fetch_ret = fetch_pte(as, addr, dte[0], &pte, &pagesize);
-static inline uint64_t amdvi_get_pte_entry(AMDVIState *s, uint64_t pte_addr,
- uint16_t devid)
-{
- uint64_t pte;
+ /*
+ * If walking the page table results in an error of any type, returns an
+ * empty PTE i.e. no mapping, or the permissions do not match, return since
+ * there is no translation available.
+ */
+ if (fetch_ret < 0 || !IOMMU_PTE_PRESENT(pte) ||
+ perms != (perms & amdvi_get_perms(pte))) {
- if (dma_memory_read(&address_space_memory, pte_addr,
- &pte, sizeof(pte), MEMTXATTRS_UNSPECIFIED)) {
- trace_amdvi_get_pte_hwerror(pte_addr);
- amdvi_log_pagetab_error(s, devid, pte_addr, 0);
- pte = 0;
- return pte;
+ amdvi_page_fault(as->iommu_state, as->devfn, addr, perms);
+ trace_amdvi_page_fault(addr);
+ return;
}
- pte = le64_to_cpu(pte);
- return pte;
-}
-
-static void amdvi_page_walk(AMDVIAddressSpace *as, uint64_t *dte,
- IOMMUTLBEntry *ret, unsigned perms,
- hwaddr addr)
-{
- unsigned level, present, pte_perms, oldlevel;
- uint64_t pte = dte[0], pte_addr, page_mask;
-
- /* make sure the DTE has TV = 1 */
- if (pte & AMDVI_DEV_TRANSLATION_VALID) {
- level = get_pte_translation_mode(pte);
- if (level >= 7) {
- trace_amdvi_mode_invalid(level, addr);
- return;
- }
- if (level == 0) {
- goto no_remap;
- }
-
- /* we are at the leaf page table or page table encodes a huge page */
- do {
- pte_perms = amdvi_get_perms(pte);
- present = pte & 1;
- if (!present || perms != (perms & pte_perms)) {
- amdvi_page_fault(as->iommu_state, as->devfn, addr, perms);
- trace_amdvi_page_fault(addr);
- return;
- }
-
- /* go to the next lower level */
- pte_addr = pte & AMDVI_DEV_PT_ROOT_MASK;
- /* add offset and load pte */
- pte_addr += ((addr >> (3 + 9 * level)) & 0x1FF) << 3;
- pte = amdvi_get_pte_entry(as->iommu_state, pte_addr, as->devfn);
- if (!pte) {
- return;
- }
- oldlevel = level;
- level = get_pte_translation_mode(pte);
- } while (level > 0 && level < 7);
+ /* A valid PTE and page size has been retrieved */
+ assert(pagesize);
+ page_mask = ~(pagesize - 1);
- if (level == 0x7) {
- page_mask = pte_override_page_mask(pte);
- } else {
- page_mask = pte_get_page_mask(oldlevel);
- }
+ /* get access permissions from pte */
+ ret->iova = addr & page_mask;
+ ret->translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & page_mask;
+ ret->addr_mask = ~page_mask;
+ ret->perm = amdvi_get_perms(pte);
+ return;
- /* get access permissions from pte */
- ret->iova = addr & page_mask;
- ret->translated_addr = (pte & AMDVI_DEV_PT_ROOT_MASK) & page_mask;
- ret->addr_mask = ~page_mask;
- ret->perm = amdvi_get_perms(pte);
- return;
- }
no_remap:
ret->iova = addr & AMDVI_PAGE_MASK_4K;
ret->translated_addr = addr & AMDVI_PAGE_MASK_4K;
ret->addr_mask = ~AMDVI_PAGE_MASK_4K;
- ret->perm = amdvi_get_perms(pte);
+ ret->perm = amdvi_get_perms(dte[0]);
}
static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr,
@@ -990,6 +1786,7 @@ static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr,
uint16_t devid = PCI_BUILD_BDF(as->bus_num, as->devfn);
AMDVIIOTLBEntry *iotlb_entry = amdvi_iotlb_lookup(s, addr, devid);
uint64_t entry[4];
+ int dte_ret;
if (iotlb_entry) {
trace_amdvi_iotlb_hit(PCI_BUS_NUM(devid), PCI_SLOT(devid),
@@ -1001,13 +1798,14 @@ static void amdvi_do_translate(AMDVIAddressSpace *as, hwaddr addr,
return;
}
- if (!amdvi_get_dte(s, devid, entry)) {
- return;
- }
+ dte_ret = amdvi_as_to_dte(as, entry);
- /* devices with V = 0 are not translated */
- if (!(entry[0] & AMDVI_DEV_VALID)) {
- goto out;
+ if (dte_ret < 0) {
+ if (dte_ret == -AMDVI_FR_DTE_V) {
+ /* DTE[V]=0, address is passed untranslated */
+ goto out;
+ }
+ return;
}
amdvi_page_walk(as, entry, ret,
@@ -1426,7 +2224,6 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
AMDVIState *s = opaque;
AMDVIAddressSpace **iommu_as, *amdvi_dev_as;
int bus_num = pci_bus_num(bus);
- X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
iommu_as = s->address_spaces[bus_num];
@@ -1444,6 +2241,9 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
iommu_as[devfn]->bus_num = (uint8_t)bus_num;
iommu_as[devfn]->devfn = (uint8_t)devfn;
iommu_as[devfn]->iommu_state = s;
+ iommu_as[devfn]->notifier_flags = IOMMU_NOTIFIER_NONE;
+ iommu_as[devfn]->iova_tree = iova_tree_new();
+ iommu_as[devfn]->addr_translation = false;
amdvi_dev_as = iommu_as[devfn];
@@ -1486,15 +2286,7 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
AMDVI_INT_ADDR_FIRST,
&amdvi_dev_as->iommu_ir, 1);
- if (!x86_iommu->pt_supported) {
- memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false);
- memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu),
- true);
- } else {
- memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu),
- false);
- memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, true);
- }
+ amdvi_switch_address_space(amdvi_dev_as);
}
return &iommu_as[devfn]->as;
}
@@ -1524,14 +2316,35 @@ static int amdvi_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
Error **errp)
{
AMDVIAddressSpace *as = container_of(iommu, AMDVIAddressSpace, iommu);
+ AMDVIState *s = as->iommu_state;
+
+ /*
+ * Accurate synchronization of the vIOMMU page tables required to support
+ * MAP notifiers is provided by the dma-remap feature. In addition, this
+ * also requires that the vIOMMU presents the NpCache capability, so a guest
+ * driver issues invalidations for both map() and unmap() operations. The
+ * capability is already set by default as part of AMDVI_CAPAB_FEATURES and
+ * written to the configuration in amdvi_pci_realize().
+ */
+ if (!s->dma_remap && (new & IOMMU_NOTIFIER_MAP)) {
+ error_setg_errno(errp, ENOTSUP,
+ "device %02x.%02x.%x requires dma-remap=1",
+ as->bus_num, PCI_SLOT(as->devfn), PCI_FUNC(as->devfn));
+ return -ENOTSUP;
+ }
+
+ /*
+ * Update notifier flags for address space and the list of address spaces
+ * with registered notifiers.
+ */
+ as->notifier_flags = new;
- if (new & IOMMU_NOTIFIER_MAP) {
- error_setg(errp,
- "device %02x.%02x.%x requires iommu notifier which is not "
- "currently supported", as->bus_num, PCI_SLOT(as->devfn),
- PCI_FUNC(as->devfn));
- return -EINVAL;
+ if (old == IOMMU_NOTIFIER_NONE) {
+ QLIST_INSERT_HEAD(&s->amdvi_as_with_notifiers, as, next);
+ } else if (new == IOMMU_NOTIFIER_NONE) {
+ QLIST_REMOVE(as, next);
}
+
return 0;
}
@@ -1549,7 +2362,6 @@ static void amdvi_init(AMDVIState *s)
s->excl_allow = false;
s->mmio_enabled = false;
s->enabled = false;
- s->ats_enabled = false;
s->cmdbuf_enabled = false;
/* reset MMIO */
@@ -1609,6 +2421,10 @@ static void amdvi_sysbus_reset(DeviceState *dev)
msi_reset(&s->pci->dev);
amdvi_init(s);
+
+ /* Discard all mappings on device reset */
+ amdvi_address_space_unmap_all(s);
+ amdvi_reset_address_translation_all(s);
}
static const VMStateDescription vmstate_amdvi_sysbus_migratable = {
@@ -1620,7 +2436,8 @@ static const VMStateDescription vmstate_amdvi_sysbus_migratable = {
/* Updated in amdvi_handle_control_write() */
VMSTATE_BOOL(enabled, AMDVIState),
VMSTATE_BOOL(ga_enabled, AMDVIState),
- VMSTATE_BOOL(ats_enabled, AMDVIState),
+ /* bool ats_enabled is obsolete */
+ VMSTATE_UNUSED(1), /* was ats_enabled */
VMSTATE_BOOL(cmdbuf_enabled, AMDVIState),
VMSTATE_BOOL(completion_wait_intr, AMDVIState),
VMSTATE_BOOL(evtlog_enabled, AMDVIState),
@@ -1693,9 +2510,6 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
amdvi_uint64_equal, g_free, g_free);
- /* Pseudo address space under root PCI bus. */
- x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID);
-
/* set up MMIO */
memory_region_init_io(&s->mr_mmio, OBJECT(s), &mmio_mem_ops, s,
"amdvi-mmio", AMDVI_MMIO_SIZE);
@@ -1718,11 +2532,22 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
memory_region_add_subregion_overlap(&s->mr_sys, AMDVI_INT_ADDR_FIRST,
&s->mr_ir, 1);
+ /* Pseudo address space under root PCI bus. */
+ x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID);
+
if (kvm_enabled() && x86ms->apic_id_limit > 255 && !s->xtsup) {
error_report("AMD IOMMU with x2APIC configuration requires xtsup=on");
exit(EXIT_FAILURE);
}
+ if (s->xtsup) {
+ if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
+ error_report("AMD IOMMU xtsup=on requires x2APIC support on "
+ "the KVM side");
+ exit(EXIT_FAILURE);
+ }
+ }
+
pci_setup_iommu(bus, &amdvi_iommu_ops, s);
amdvi_init(s);
}
@@ -1730,6 +2555,7 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
static const Property amdvi_properties[] = {
DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false),
DEFINE_PROP_STRING("pci-id", AMDVIState, pci_id),
+ DEFINE_PROP_BOOL("dma-remap", AMDVIState, dma_remap, false),
};
static const VMStateDescription vmstate_amdvi_sysbus = {
@@ -1791,6 +2617,7 @@ static void amdvi_iommu_memory_region_class_init(ObjectClass *klass,
imrc->translate = amdvi_translate;
imrc->notify_flag_changed = amdvi_iommu_notify_flag_changed;
+ imrc->replay = amdvi_iommu_replay;
}
static const TypeInfo amdvi_iommu_memory_region_info = {
diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
index 5672bde..daf82fc 100644
--- a/hw/i386/amd_iommu.h
+++ b/hw/i386/amd_iommu.h
@@ -25,6 +25,8 @@
#include "hw/i386/x86-iommu.h"
#include "qom/object.h"
+#define GENMASK64(h, l) (((~0ULL) >> (63 - (h) + (l))) << (l))
+
/* Capability registers */
#define AMDVI_CAPAB_BAR_LOW 0x04
#define AMDVI_CAPAB_BAR_HIGH 0x08
@@ -66,34 +68,34 @@
#define AMDVI_MMIO_SIZE 0x4000
-#define AMDVI_MMIO_DEVTAB_SIZE_MASK ((1ULL << 12) - 1)
-#define AMDVI_MMIO_DEVTAB_BASE_MASK (((1ULL << 52) - 1) & ~ \
- AMDVI_MMIO_DEVTAB_SIZE_MASK)
+#define AMDVI_MMIO_DEVTAB_SIZE_MASK GENMASK64(8, 0)
+#define AMDVI_MMIO_DEVTAB_BASE_MASK GENMASK64(51, 12)
+
#define AMDVI_MMIO_DEVTAB_ENTRY_SIZE 32
#define AMDVI_MMIO_DEVTAB_SIZE_UNIT 4096
/* some of this are similar but just for readability */
#define AMDVI_MMIO_CMDBUF_SIZE_BYTE (AMDVI_MMIO_COMMAND_BASE + 7)
#define AMDVI_MMIO_CMDBUF_SIZE_MASK 0x0f
-#define AMDVI_MMIO_CMDBUF_BASE_MASK AMDVI_MMIO_DEVTAB_BASE_MASK
-#define AMDVI_MMIO_CMDBUF_HEAD_MASK (((1ULL << 19) - 1) & ~0x0f)
-#define AMDVI_MMIO_CMDBUF_TAIL_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK
+#define AMDVI_MMIO_CMDBUF_BASE_MASK GENMASK64(51, 12)
+#define AMDVI_MMIO_CMDBUF_HEAD_MASK GENMASK64(18, 4)
+#define AMDVI_MMIO_CMDBUF_TAIL_MASK GENMASK64(18, 4)
#define AMDVI_MMIO_EVTLOG_SIZE_BYTE (AMDVI_MMIO_EVENT_BASE + 7)
-#define AMDVI_MMIO_EVTLOG_SIZE_MASK AMDVI_MMIO_CMDBUF_SIZE_MASK
-#define AMDVI_MMIO_EVTLOG_BASE_MASK AMDVI_MMIO_CMDBUF_BASE_MASK
-#define AMDVI_MMIO_EVTLOG_HEAD_MASK (((1ULL << 19) - 1) & ~0x0f)
-#define AMDVI_MMIO_EVTLOG_TAIL_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK
+#define AMDVI_MMIO_EVTLOG_SIZE_MASK 0x0f
+#define AMDVI_MMIO_EVTLOG_BASE_MASK GENMASK64(51, 12)
+#define AMDVI_MMIO_EVTLOG_HEAD_MASK GENMASK64(18, 4)
+#define AMDVI_MMIO_EVTLOG_TAIL_MASK GENMASK64(18, 4)
-#define AMDVI_MMIO_PPRLOG_SIZE_BYTE (AMDVI_MMIO_EVENT_BASE + 7)
-#define AMDVI_MMIO_PPRLOG_HEAD_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK
-#define AMDVI_MMIO_PPRLOG_TAIL_MASK AMDVI_MMIO_EVTLOG_HEAD_MASK
-#define AMDVI_MMIO_PPRLOG_BASE_MASK AMDVI_MMIO_EVTLOG_BASE_MASK
-#define AMDVI_MMIO_PPRLOG_SIZE_MASK AMDVI_MMIO_EVTLOG_SIZE_MASK
+#define AMDVI_MMIO_PPRLOG_SIZE_BYTE (AMDVI_MMIO_PPR_BASE + 7)
+#define AMDVI_MMIO_PPRLOG_SIZE_MASK 0x0f
+#define AMDVI_MMIO_PPRLOG_BASE_MASK GENMASK64(51, 12)
+#define AMDVI_MMIO_PPRLOG_HEAD_MASK GENMASK64(18, 4)
+#define AMDVI_MMIO_PPRLOG_TAIL_MASK GENMASK64(18, 4)
#define AMDVI_MMIO_EXCL_ENABLED_MASK (1ULL << 0)
#define AMDVI_MMIO_EXCL_ALLOW_MASK (1ULL << 1)
-#define AMDVI_MMIO_EXCL_LIMIT_MASK AMDVI_MMIO_DEVTAB_BASE_MASK
+#define AMDVI_MMIO_EXCL_LIMIT_MASK GENMASK64(51, 12)
#define AMDVI_MMIO_EXCL_LIMIT_LOW 0xfff
/* mmio control register flags */
@@ -109,6 +111,7 @@
#define AMDVI_MMIO_STATUS_CMDBUF_RUN (1 << 4)
#define AMDVI_MMIO_STATUS_EVT_RUN (1 << 3)
#define AMDVI_MMIO_STATUS_COMP_INT (1 << 2)
+#define AMDVI_MMIO_STATUS_EVENT_INT (1 << 1)
#define AMDVI_MMIO_STATUS_EVT_OVF (1 << 0)
#define AMDVI_CMDBUF_ID_BYTE 0x07
@@ -123,6 +126,10 @@
#define AMDVI_CMD_COMPLETE_PPR_REQUEST 0x07
#define AMDVI_CMD_INVAL_AMDVI_ALL 0x08
+
+#define AMDVI_CMD_INVAL_IOMMU_PAGES_S (1ULL << 0)
+#define AMDVI_INV_ALL_PAGES (1ULL << 52)
+
#define AMDVI_DEVTAB_ENTRY_SIZE 32
/* Device table entry bits 0:63 */
@@ -130,14 +137,14 @@
#define AMDVI_DEV_TRANSLATION_VALID (1ULL << 1)
#define AMDVI_DEV_MODE_MASK 0x7
#define AMDVI_DEV_MODE_RSHIFT 9
-#define AMDVI_DEV_PT_ROOT_MASK 0xffffffffff000
+#define AMDVI_DEV_PT_ROOT_MASK GENMASK64(51, 12)
#define AMDVI_DEV_PT_ROOT_RSHIFT 12
#define AMDVI_DEV_PERM_SHIFT 61
#define AMDVI_DEV_PERM_READ (1ULL << 61)
#define AMDVI_DEV_PERM_WRITE (1ULL << 62)
/* Device table entry bits 64:127 */
-#define AMDVI_DEV_DOMID_ID_MASK ((1ULL << 16) - 1)
+#define AMDVI_DEV_DOMID_ID_MASK GENMASK64(15, 0)
/* Event codes and flags, as stored in the info field */
#define AMDVI_EVENT_ILLEGAL_DEVTAB_ENTRY (0x1U << 12)
@@ -162,13 +169,55 @@
#define AMDVI_FEATURE_PC (1ULL << 9) /* Perf counters */
/* reserved DTE bits */
-#define AMDVI_DTE_LOWER_QUAD_RESERVED 0x80300000000000fc
-#define AMDVI_DTE_MIDDLE_QUAD_RESERVED 0x0000000000000100
-#define AMDVI_DTE_UPPER_QUAD_RESERVED 0x08f0000000000000
+#define AMDVI_DTE_QUAD0_RESERVED (GENMASK64(6, 2) | GENMASK64(63, 63))
+#define AMDVI_DTE_QUAD1_RESERVED 0
+#define AMDVI_DTE_QUAD2_RESERVED GENMASK64(53, 52)
+#define AMDVI_DTE_QUAD3_RESERVED (GENMASK64(14, 0) | GENMASK64(53, 48))
/* AMDVI paging mode */
#define AMDVI_GATS_MODE (2ULL << 12)
#define AMDVI_HATS_MODE (2ULL << 10)
+#define AMDVI_HATS_MODE_RESERVED (3ULL << 10)
+
+/* Page Table format */
+
+#define AMDVI_PTE_PR (1ULL << 0)
+#define AMDVI_PTE_NEXT_LEVEL_MASK GENMASK64(11, 9)
+
+#define IOMMU_PTE_PRESENT(pte) ((pte) & AMDVI_PTE_PR)
+
+/* Using level=0 for leaf PTE at 4K page size */
+#define PT_LEVEL_SHIFT(level) (12 + ((level) * 9))
+
+/* Return IOVA bit group used to index the Page Table at specific level */
+#define PT_LEVEL_INDEX(level, iova) (((iova) >> PT_LEVEL_SHIFT(level)) & \
+ GENMASK64(8, 0))
+
+/* Return the max address for a specified level i.e. max_oaddr */
+#define PT_LEVEL_MAX_ADDR(x) (((x) < 5) ? \
+ ((1ULL << PT_LEVEL_SHIFT((x + 1))) - 1) : \
+ (~(0ULL)))
+
+/* Extract the NextLevel field from PTE/PDE */
+#define PTE_NEXT_LEVEL(pte) (((pte) & AMDVI_PTE_NEXT_LEVEL_MASK) >> 9)
+
+/* Take page table level and return default pagetable size for level */
+#define PTE_LEVEL_PAGE_SIZE(level) (1ULL << (PT_LEVEL_SHIFT(level)))
+
+/*
+ * Return address of lower level page table encoded in PTE and specified by
+ * current level and corresponding IOVA bit group at such level.
+ */
+#define NEXT_PTE_ADDR(pte, level, iova) (((pte) & AMDVI_DEV_PT_ROOT_MASK) + \
+ (PT_LEVEL_INDEX(level, iova) * 8))
+
+/*
+ * Take a PTE value with mode=0x07 and return the page size it encodes.
+ */
+#define PTE_LARGE_PAGE_SIZE(pte) (1ULL << (1 + cto64(((pte) | 0xfffULL))))
+
+/* Return number of PTEs to use for a given page size (expected power of 2) */
+#define PAGE_SIZE_PTE_COUNT(pgsz) (1ULL << ((ctz64(pgsz) - 12) % 9))
/* IOTLB */
#define AMDVI_IOTLB_MAX_SIZE 1024
@@ -194,20 +243,16 @@
#define AMDVI_PAGE_SIZE (1ULL << AMDVI_PAGE_SHIFT)
#define AMDVI_PAGE_SHIFT_4K 12
-#define AMDVI_PAGE_MASK_4K (~((1ULL << AMDVI_PAGE_SHIFT_4K) - 1))
+#define AMDVI_PAGE_MASK_4K GENMASK64(63, 12)
-#define AMDVI_MAX_VA_ADDR (48UL << 5)
-#define AMDVI_MAX_PH_ADDR (40UL << 8)
-#define AMDVI_MAX_GVA_ADDR (48UL << 15)
+#define AMDVI_MAX_GVA_ADDR (2UL << 5)
+#define AMDVI_MAX_PH_ADDR (40UL << 8)
+#define AMDVI_MAX_VA_ADDR (48UL << 15)
/* Completion Wait data size */
#define AMDVI_COMPLETION_DATA_SIZE 8
#define AMDVI_COMMAND_SIZE 16
-/* Completion Wait data size */
-#define AMDVI_COMPLETION_DATA_SIZE 8
-
-#define AMDVI_COMMAND_SIZE 16
#define AMDVI_INT_ADDR_FIRST 0xfee00000
#define AMDVI_INT_ADDR_LAST 0xfeefffff
@@ -228,7 +273,7 @@
#define AMDVI_IR_INTCTL_PASS 1
#define AMDVI_IR_INTCTL_REMAP 2
-#define AMDVI_IR_PHYS_ADDR_MASK (((1ULL << 45) - 1) << 6)
+#define AMDVI_IR_PHYS_ADDR_MASK GENMASK64(51, 6)
/* MSI data 10:0 bits (section 2.2.5.1 Fig 14) */
#define AMDVI_IRTE_OFFSET 0x7ff
@@ -323,7 +368,6 @@ struct AMDVIState {
uint64_t mmio_addr;
bool enabled; /* IOMMU enabled */
- bool ats_enabled; /* address translation enabled */
bool cmdbuf_enabled; /* command buffer enabled */
bool evtlog_enabled; /* event log enabled */
bool excl_enabled;
@@ -366,12 +410,18 @@ struct AMDVIState {
/* for each served device */
AMDVIAddressSpace **address_spaces[PCI_BUS_MAX];
+ /* list of address spaces with registered notifiers */
+ QLIST_HEAD(, AMDVIAddressSpace) amdvi_as_with_notifiers;
+
/* IOTLB */
GHashTable *iotlb;
/* Interrupt remapping */
bool ga_enabled;
bool xtsup;
+
+ /* DMA address translation */
+ bool dma_remap;
};
uint64_t amdvi_extended_feature_register(AMDVIState *s);
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 69d72ad..6a168d5 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -45,6 +45,8 @@
((ce)->val[1] & VTD_SM_CONTEXT_ENTRY_RID2PASID_MASK)
#define VTD_CE_GET_PASID_DIR_TABLE(ce) \
((ce)->val[0] & VTD_PASID_DIR_BASE_ADDR_MASK)
+#define VTD_CE_GET_PRE(ce) \
+ ((ce)->val[0] & VTD_SM_CONTEXT_ENTRY_PRE)
/* pe operations */
#define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT)
@@ -85,13 +87,6 @@ struct vtd_iotlb_key {
static void vtd_address_space_refresh_all(IntelIOMMUState *s);
static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
-static void vtd_panic_require_caching_mode(void)
-{
- error_report("We need to set caching-mode=on for intel-iommu to enable "
- "device assignment with IOMMU protection.");
- exit(1);
-}
-
static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
uint64_t wmask, uint64_t w1cmask)
{
@@ -1838,6 +1833,7 @@ static const bool vtd_qualified_faults[] = {
[VTD_FR_FS_NON_CANONICAL] = true,
[VTD_FR_FS_PAGING_ENTRY_US] = true,
[VTD_FR_SM_WRITE] = true,
+ [VTD_FR_SM_PRE_ABS] = true,
[VTD_FR_SM_INTERRUPT_ADDR] = true,
[VTD_FR_FS_BIT_UPDATE_FAILED] = true,
[VTD_FR_MAX] = false,
@@ -1987,9 +1983,9 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce,
uint32_t pasid)
{
dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid);
- uint32_t level = vtd_get_iova_level(s, ce, pasid);
uint32_t offset;
uint64_t flpte, flag_ad = VTD_FL_A;
+ *flpte_level = vtd_get_iova_level(s, ce, pasid);
if (!vtd_iova_fl_check_canonical(s, iova, ce, pasid)) {
error_report_once("%s: detected non canonical IOVA (iova=0x%" PRIx64 ","
@@ -1998,11 +1994,11 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce,
}
while (true) {
- offset = vtd_iova_level_offset(iova, level);
+ offset = vtd_iova_level_offset(iova, *flpte_level);
flpte = vtd_get_pte(addr, offset);
if (flpte == (uint64_t)-1) {
- if (level == vtd_get_iova_level(s, ce, pasid)) {
+ if (*flpte_level == vtd_get_iova_level(s, ce, pasid)) {
/* Invalid programming of pasid-entry */
return -VTD_FR_PASID_ENTRY_FSPTPTR_INV;
} else {
@@ -2028,15 +2024,15 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce,
if (is_write && !(flpte & VTD_FL_RW)) {
return -VTD_FR_SM_WRITE;
}
- if (vtd_flpte_nonzero_rsvd(flpte, level)) {
+ if (vtd_flpte_nonzero_rsvd(flpte, *flpte_level)) {
error_report_once("%s: detected flpte reserved non-zero "
"iova=0x%" PRIx64 ", level=0x%" PRIx32
"flpte=0x%" PRIx64 ", pasid=0x%" PRIX32 ")",
- __func__, iova, level, flpte, pasid);
+ __func__, iova, *flpte_level, flpte, pasid);
return -VTD_FR_FS_PAGING_ENTRY_RSVD;
}
- if (vtd_is_last_pte(flpte, level) && is_write) {
+ if (vtd_is_last_pte(flpte, *flpte_level) && is_write) {
flag_ad |= VTD_FL_D;
}
@@ -2044,14 +2040,13 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce,
return -VTD_FR_FS_BIT_UPDATE_FAILED;
}
- if (vtd_is_last_pte(flpte, level)) {
+ if (vtd_is_last_pte(flpte, *flpte_level)) {
*flptep = flpte;
- *flpte_level = level;
return 0;
}
addr = vtd_get_pte_addr(flpte, aw_bits);
- level--;
+ (*flpte_level)--;
}
}
@@ -2092,7 +2087,8 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
uint8_t bus_num = pci_bus_num(bus);
VTDContextCacheEntry *cc_entry;
uint64_t pte, page_mask;
- uint32_t level, pasid = vtd_as->pasid;
+ uint32_t level = UINT32_MAX;
+ uint32_t pasid = vtd_as->pasid;
uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn);
int ret_fr;
bool is_fpd_set = false;
@@ -2251,14 +2247,19 @@ out:
entry->iova = addr & page_mask;
entry->translated_addr = vtd_get_pte_addr(pte, s->aw_bits) & page_mask;
entry->addr_mask = ~page_mask;
- entry->perm = access_flags;
+ entry->perm = (is_write ? access_flags : (access_flags & (~IOMMU_WO)));
return true;
error:
vtd_iommu_unlock(s);
entry->iova = 0;
entry->translated_addr = 0;
- entry->addr_mask = 0;
+ /*
+ * Set the mask for ATS (the range must be present even when the
+ * translation fails : PCIe rev 5 10.2.3.5)
+ */
+ entry->addr_mask = (level != UINT32_MAX) ?
+ (~vtd_pt_level_page_mask(level)) : (~VTD_PAGE_MASK_4K);
entry->perm = IOMMU_NONE;
return false;
}
@@ -2503,6 +2504,7 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
.translated_addr = 0,
.addr_mask = size - 1,
.perm = IOMMU_NONE,
+ .pasid = vtd_as->pasid,
},
};
memory_region_notify_iommu(&vtd_as->iommu, 0, event);
@@ -2695,7 +2697,7 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s)
uint32_t changed = status ^ val;
trace_vtd_reg_write_gcmd(status, val);
- if ((changed & VTD_GCMD_TE) && s->dma_translation) {
+ if ((changed & VTD_GCMD_TE) && x86_iommu->dma_translation) {
/* Translation enable/disable */
vtd_handle_gcmd_te(s, val & VTD_GCMD_TE);
}
@@ -2822,6 +2824,7 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
{
uint64_t mask[4] = {VTD_INV_DESC_WAIT_RSVD_LO, VTD_INV_DESC_WAIT_RSVD_HI,
VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE};
+ bool ret = true;
if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, false,
__func__, "wait")) {
@@ -2833,8 +2836,6 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
uint32_t status_data = (uint32_t)(inv_desc->lo >>
VTD_INV_DESC_WAIT_DATA_SHIFT);
- assert(!(inv_desc->lo & VTD_INV_DESC_WAIT_IF));
-
/* FIXME: need to be masked with HAW? */
dma_addr_t status_addr = inv_desc->hi;
trace_vtd_inv_desc_wait_sw(status_addr, status_data);
@@ -2843,18 +2844,28 @@ static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
&status_data, sizeof(status_data),
MEMTXATTRS_UNSPECIFIED)) {
trace_vtd_inv_desc_wait_write_fail(inv_desc->hi, inv_desc->lo);
- return false;
+ ret = false;
}
- } else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) {
+ }
+
+ if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) {
/* Interrupt flag */
vtd_generate_completion_event(s);
- } else {
+ }
+
+ /*
+ * SW=0, IF=0, FN=1 is also a valid descriptor (VT-d 7.10)
+ * Nothing to do as we process the descriptors in order
+ */
+
+ if (!(inv_desc->lo & (VTD_INV_DESC_WAIT_IF | VTD_INV_DESC_WAIT_SW |
+ VTD_INV_DESC_WAIT_FN))) {
error_report_once("%s: invalid wait desc: hi=%"PRIx64", lo=%"PRIx64
" (unknown type)", __func__, inv_desc->hi,
inv_desc->lo);
return false;
}
- return true;
+ return ret;
}
static bool vtd_process_context_cache_desc(IntelIOMMUState *s,
@@ -3090,6 +3101,7 @@ static void do_invalidate_device_tlb(VTDAddressSpace *vtd_dev_as,
event.entry.iova = addr;
event.entry.perm = IOMMU_NONE;
event.entry.translated_addr = 0;
+ event.entry.pasid = vtd_dev_as->pasid;
memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event);
}
@@ -3136,6 +3148,59 @@ static bool vtd_process_device_piotlb_desc(IntelIOMMUState *s,
return true;
}
+static bool vtd_process_page_group_response_desc(IntelIOMMUState *s,
+ VTDInvDesc *inv_desc)
+{
+ VTDAddressSpace *vtd_dev_as;
+ bool pasid_present;
+ uint8_t response_code;
+ uint16_t rid;
+ uint32_t pasid;
+ uint16_t prgi;
+ IOMMUPRIResponse response;
+
+ if ((inv_desc->lo & VTD_INV_DESC_PGRESP_RSVD_LO) ||
+ (inv_desc->hi & VTD_INV_DESC_PGRESP_RSVD_HI)) {
+ error_report_once("%s: invalid page group response desc: hi=%"PRIx64
+ ", lo=%"PRIx64" (reserved nonzero)", __func__,
+ inv_desc->hi, inv_desc->lo);
+ return false;
+ }
+
+ pasid_present = VTD_INV_DESC_PGRESP_PP(inv_desc->lo);
+ response_code = VTD_INV_DESC_PGRESP_RC(inv_desc->lo);
+ rid = VTD_INV_DESC_PGRESP_RID(inv_desc->lo);
+ pasid = VTD_INV_DESC_PGRESP_PASID(inv_desc->lo);
+ prgi = VTD_INV_DESC_PGRESP_PRGI(inv_desc->hi);
+
+ if (!pasid_present) {
+ error_report_once("Page group response without PASID is"
+ "not supported yet");
+ return false;
+ }
+
+ vtd_dev_as = vtd_get_as_by_sid_and_pasid(s, rid, pasid);
+ if (!vtd_dev_as) {
+ return true;
+ }
+
+ response.prgi = prgi;
+
+ if (response_code == 0x0u) {
+ response.response_code = IOMMU_PRI_RESP_SUCCESS;
+ } else if (response_code == 0x1u) {
+ response.response_code = IOMMU_PRI_RESP_INVALID_REQUEST;
+ } else {
+ response.response_code = IOMMU_PRI_RESP_FAILURE;
+ }
+
+ if (vtd_dev_as->pri_notifier) {
+ vtd_dev_as->pri_notifier->notify(vtd_dev_as->pri_notifier, &response);
+ }
+
+ return true;
+}
+
static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
VTDInvDesc *inv_desc)
{
@@ -3236,6 +3301,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
}
break;
+ case VTD_INV_DESC_PGRESP:
+ trace_vtd_inv_desc("page group response", inv_desc.hi, inv_desc.lo);
+ if (!vtd_process_page_group_response_desc(s, &inv_desc)) {
+ return false;
+ }
+ break;
+
/*
* TODO: the entity of below two cases will be implemented in future series.
* To make guest (which integrates scalable mode support patch set in
@@ -3370,6 +3442,27 @@ static void vtd_handle_iectl_write(IntelIOMMUState *s)
}
}
+static void vtd_handle_prs_write(IntelIOMMUState *s)
+{
+ uint32_t prs = vtd_get_long_raw(s, DMAR_PRS_REG);
+ if (!(prs & VTD_PR_STATUS_PPR) && !(prs & VTD_PR_STATUS_PRO)) {
+ vtd_set_clear_mask_long(s, DMAR_PECTL_REG, VTD_PR_PECTL_IP, 0);
+ }
+}
+
+static void vtd_handle_pectl_write(IntelIOMMUState *s)
+{
+ uint32_t pectl = vtd_get_long_raw(s, DMAR_PECTL_REG);
+ if ((pectl & VTD_PR_PECTL_IP) && !(pectl & VTD_PR_PECTL_IM)) {
+ /*
+ * If IP field was 1 when software clears the IM field,
+ * the interrupt is generated along with clearing the IP field.
+ */
+ vtd_set_clear_mask_long(s, DMAR_PECTL_REG, VTD_PR_PECTL_IP, 0);
+ vtd_generate_interrupt(s, DMAR_PEADDR_REG, DMAR_PEDATA_REG);
+ }
+}
+
static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size)
{
IntelIOMMUState *s = opaque;
@@ -3412,6 +3505,11 @@ static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size)
val = s->iq >> 32;
break;
+ case DMAR_PEUADDR_REG:
+ assert(size == 4);
+ val = vtd_get_long_raw(s, DMAR_PEUADDR_REG);
+ break;
+
default:
if (size == 4) {
val = vtd_get_long(s, addr);
@@ -3475,6 +3573,11 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
vtd_handle_iotlb_write(s);
break;
+ case DMAR_PEUADDR_REG:
+ assert(size == 4);
+ vtd_set_long(s, addr, val);
+ break;
+
/* Invalidate Address Register, 64-bit */
case DMAR_IVA_REG:
if (size == 4) {
@@ -3655,6 +3758,18 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
vtd_set_long(s, addr, val);
break;
+ case DMAR_PRS_REG:
+ assert(size == 4);
+ vtd_set_long(s, addr, val);
+ vtd_handle_prs_write(s);
+ break;
+
+ case DMAR_PECTL_REG:
+ assert(size == 4);
+ vtd_set_long(s, addr, val);
+ vtd_handle_pectl_write(s);
+ break;
+
default:
if (size == 4) {
vtd_set_long(s, addr, val);
@@ -3672,6 +3787,7 @@ static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
IOMMUTLBEntry iotlb = {
/* We'll fill in the rest later. */
.target_as = &address_space_memory,
+ .pasid = vtd_as->pasid,
};
bool success;
@@ -3824,7 +3940,6 @@ static const Property vtd_properties[] = {
DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false),
DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false),
DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true),
- DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true),
DEFINE_PROP_BOOL("stale-tm", IntelIOMMUState, stale_tm, false),
DEFINE_PROP_BOOL("fs1gp", IntelIOMMUState, fs1gp, true),
};
@@ -4367,6 +4482,12 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
assert(hiod);
+ if (!s->caching_mode) {
+ error_setg(errp, "Device assignment is not allowed without enabling "
+ "caching-mode=on for Intel IOMMU.");
+ return false;
+ }
+
vtd_iommu_lock(s);
if (g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) {
@@ -4538,11 +4659,11 @@ static void vtd_cap_init(IntelIOMMUState *s)
s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
- VTD_CAP_MGAW(s->aw_bits);
+ VTD_CAP_ESRTPS | VTD_CAP_MGAW(s->aw_bits);
if (s->dma_drain) {
s->cap |= VTD_CAP_DRAIN;
}
- if (s->dma_translation) {
+ if (x86_iommu->dma_translation) {
if (s->aw_bits >= VTD_HOST_AW_39BIT) {
s->cap |= VTD_CAP_SAGAW_39bit;
}
@@ -4587,7 +4708,7 @@ static void vtd_cap_init(IntelIOMMUState *s)
}
if (s->pasid) {
- s->ecap |= VTD_ECAP_PASID;
+ s->ecap |= VTD_ECAP_PASID | VTD_ECAP_PSS;
}
}
@@ -4705,6 +4826,18 @@ static void vtd_init(IntelIOMMUState *s)
* Interrupt remapping registers.
*/
vtd_define_quad(s, DMAR_IRTA_REG, 0, 0xfffffffffffff80fULL, 0);
+
+ /* Page request registers */
+ if (s->ecap & VTD_ECAP_PRS) {
+ vtd_define_quad(s, DMAR_PQH_REG, 0, 0x7ffe0ULL, 0);
+ vtd_define_quad(s, DMAR_PQT_REG, 0, 0x7ffe0ULL, 0);
+ vtd_define_quad(s, DMAR_PQA_REG, 0, 0xfffffffffffff007ULL, 0);
+ vtd_define_long(s, DMAR_PRS_REG, 0, 0, 0x3UL);
+ vtd_define_long(s, DMAR_PECTL_REG, 0, 0x80000000UL, 0);
+ vtd_define_long(s, DMAR_PEDATA_REG, 0, 0xffffUL, 0);
+ vtd_define_long(s, DMAR_PEADDR_REG, 0, 0xfffffffcUL, 0);
+ vtd_define_long(s, DMAR_PEUADDR_REG, 0, 0xffffffffUL, 0);
+ }
}
/* Should not reset address_spaces when reset because devices will still use
@@ -4730,10 +4863,329 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
return &vtd_as->as;
}
+static IOMMUTLBEntry vtd_iommu_ats_do_translate(IOMMUMemoryRegion *iommu,
+ hwaddr addr,
+ IOMMUAccessFlags flags)
+{
+ IOMMUTLBEntry entry;
+ VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
+
+ if (vtd_is_interrupt_addr(addr)) {
+ vtd_report_ir_illegal_access(vtd_as, addr, flags & IOMMU_WO);
+ entry.target_as = &address_space_memory;
+ entry.iova = 0;
+ entry.translated_addr = 0;
+ entry.addr_mask = ~VTD_PAGE_MASK_4K;
+ entry.perm = IOMMU_NONE;
+ entry.pasid = PCI_NO_PASID;
+ } else {
+ entry = vtd_iommu_translate(iommu, addr, flags, 0);
+ }
+
+ return entry;
+}
+
+static ssize_t vtd_ats_request_translation(PCIBus *bus, void *opaque,
+ int devfn, uint32_t pasid,
+ bool priv_req, bool exec_req,
+ hwaddr addr, size_t length,
+ bool no_write, IOMMUTLBEntry *result,
+ size_t result_length,
+ uint32_t *err_count)
+{
+ IntelIOMMUState *s = opaque;
+ VTDAddressSpace *vtd_as;
+ IOMMUAccessFlags flags = IOMMU_ACCESS_FLAG_FULL(true, !no_write, exec_req,
+ priv_req, false, false);
+ ssize_t res_index = 0;
+ hwaddr target_address = addr + length;
+ IOMMUTLBEntry entry;
+
+ vtd_as = vtd_find_add_as(s, bus, devfn, pasid);
+ *err_count = 0;
+
+ while ((addr < target_address) && (res_index < result_length)) {
+ entry = vtd_iommu_ats_do_translate(&vtd_as->iommu, addr, flags);
+ entry.perm &= ~IOMMU_GLOBAL; /* Spec 4.1.2: Global Mapping never set */
+
+ if ((entry.perm & flags) != flags) {
+ *err_count += 1; /* Less than expected */
+ }
+
+ result[res_index] = entry;
+ res_index += 1;
+ addr = (addr & (~entry.addr_mask)) + (entry.addr_mask + 1);
+ }
+
+ /* Buffer too small */
+ if (addr < target_address) {
+ return -ENOMEM;
+ }
+
+ return res_index;
+}
+
+/* 11.4.11.3 : The number of entries in the page request queue is 2^(PQS + 7) */
+static inline uint64_t vtd_prq_size(IntelIOMMUState *s)
+{
+ return 1ULL << ((vtd_get_quad(s, DMAR_PQA_REG) & VTD_PQA_SIZE) + 7);
+}
+
+/**
+ * Return true if the bit is accessible and correctly set, false otherwise
+ */
+static bool vtd_check_pre_bit(VTDAddressSpace *vtd_as, hwaddr addr,
+ uint16_t sid, bool is_write)
+{
+ int ret;
+ IntelIOMMUState *s = vtd_as->iommu_state;
+ uint8_t bus_n = pci_bus_num(vtd_as->bus);
+ VTDContextEntry ce;
+ bool is_fpd_set = false;
+
+ ret = vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce);
+
+ if (ret) {
+ goto error_report;
+ }
+
+ if (!VTD_CE_GET_PRE(&ce)) {
+ ret = -VTD_FR_SM_PRE_ABS;
+ goto error_get_fpd_and_report;
+ }
+
+ return true;
+
+error_get_fpd_and_report:
+ /* Try to get fpd (may not work but we are already on an error path) */
+ is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
+ vtd_ce_get_pasid_fpd(s, &ce, &is_fpd_set, vtd_as->pasid);
+error_report:
+ vtd_report_fault(s, -ret, is_fpd_set, sid, addr, is_write,
+ vtd_as->pasid != PCI_NO_PASID, vtd_as->pasid);
+ return false;
+}
+
+/* Logic described in section 7.5 */
+static void vtd_generate_page_request_event(IntelIOMMUState *s,
+ uint32_t old_pr_status)
+{
+ uint32_t current_pectl = vtd_get_long(s, DMAR_PECTL_REG);
+ /*
+ * Hardware evaluates PPR and PRO fields in the Page Request Status Register
+ * and if any of them is set, Page Request Event is not generated
+ */
+ if (old_pr_status & (VTD_PR_STATUS_PRO | VTD_PR_STATUS_PPR)) {
+ return;
+ }
+
+ vtd_set_clear_mask_long(s, DMAR_PECTL_REG, 0, VTD_PR_PECTL_IP);
+ if (!(current_pectl & VTD_PR_PECTL_IM)) {
+ vtd_set_clear_mask_long(s, DMAR_PECTL_REG, VTD_PR_PECTL_IP, 0);
+ vtd_generate_interrupt(s, DMAR_PEADDR_REG, DMAR_PEDATA_REG);
+ }
+}
+
+/* When calling this function, we known that we are in scalable mode */
+static int vtd_pri_perform_implicit_invalidation(VTDAddressSpace *vtd_as,
+ hwaddr addr)
+{
+ IntelIOMMUState *s = vtd_as->iommu_state;
+ VTDContextEntry ce;
+ VTDPASIDEntry pe;
+ uint16_t pgtt;
+ uint16_t domain_id;
+ int ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
+ vtd_as->devfn, &ce);
+ if (ret) {
+ return -EINVAL;
+ }
+ ret = vtd_ce_get_rid2pasid_entry(s, &ce, &pe, vtd_as->pasid);
+ if (ret) {
+ return -EINVAL;
+ }
+ pgtt = VTD_PE_GET_TYPE(&pe);
+ domain_id = VTD_SM_PASID_ENTRY_DID(pe.val[1]);
+ ret = 0;
+ switch (pgtt) {
+ case VTD_SM_PASID_ENTRY_FLT:
+ vtd_piotlb_page_invalidate(s, domain_id, vtd_as->pasid, addr, 0);
+ break;
+ /* Room for other pgtt values */
+ default:
+ error_report_once("Translation type not supported yet : %d", pgtt);
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+/* Page Request Descriptor : 7.4.1.1 */
+static int vtd_pri_request_page(PCIBus *bus, void *opaque, int devfn,
+ uint32_t pasid, bool priv_req, bool exec_req,
+ hwaddr addr, bool lpig, uint16_t prgi,
+ bool is_read, bool is_write)
+{
+ IntelIOMMUState *s = opaque;
+ VTDAddressSpace *vtd_as;
+
+ vtd_as = vtd_find_add_as(s, bus, devfn, pasid);
+
+ uint64_t queue_addr_reg = vtd_get_quad(s, DMAR_PQA_REG);
+ uint64_t queue_tail_offset_reg = vtd_get_quad(s, DMAR_PQT_REG);
+ uint64_t new_queue_tail_offset = (
+ (queue_tail_offset_reg + VTD_PQA_ENTRY_SIZE) %
+ (vtd_prq_size(s) * VTD_PQA_ENTRY_SIZE));
+ uint64_t queue_head_offset_reg = vtd_get_quad(s, DMAR_PQH_REG);
+ hwaddr queue_tail = (queue_addr_reg & VTD_PQA_ADDR) + queue_tail_offset_reg;
+ uint32_t old_pr_status = vtd_get_long(s, DMAR_PRS_REG);
+ uint16_t sid = PCI_BUILD_BDF(pci_bus_num(vtd_as->bus), vtd_as->devfn);
+ VTDPRDesc desc;
+
+ if (!(s->ecap & VTD_ECAP_PRS)) {
+ return -EPERM;
+ }
+
+ /*
+ * No need to check if scalable mode is enabled as we already known that
+ * VTD_ECAP_PRS is set (see vtd_decide_config)
+ */
+
+ /* We do not support PRI without PASID */
+ if (vtd_as->pasid == PCI_NO_PASID) {
+ return -EPERM;
+ }
+ if (exec_req && !is_read) {
+ return -EINVAL;
+ }
+
+ /* Check PRE bit in the scalable mode context entry */
+ if (!vtd_check_pre_bit(vtd_as, addr, sid, is_write)) {
+ return -EPERM;
+ }
+
+ if (old_pr_status & VTD_PR_STATUS_PRO) {
+ /*
+ * No action is taken by hardware to report a fault
+ * or generate an event
+ */
+ return -ENOSPC;
+ }
+
+ /* Check for overflow */
+ if (new_queue_tail_offset == queue_head_offset_reg) {
+ vtd_set_clear_mask_long(s, DMAR_PRS_REG, 0, VTD_PR_STATUS_PRO);
+ vtd_generate_page_request_event(s, old_pr_status);
+ return -ENOSPC;
+ }
+
+ if (vtd_pri_perform_implicit_invalidation(vtd_as, addr)) {
+ return -EINVAL;
+ }
+
+ desc.lo = VTD_PRD_TYPE | VTD_PRD_PP(true) | VTD_PRD_RID(sid) |
+ VTD_PRD_PASID(vtd_as->pasid) | VTD_PRD_PMR(priv_req);
+ desc.hi = VTD_PRD_RDR(is_read) | VTD_PRD_WRR(is_write) |
+ VTD_PRD_LPIG(lpig) | VTD_PRD_PRGI(prgi) | VTD_PRD_ADDR(addr);
+
+ desc.lo = cpu_to_le64(desc.lo);
+ desc.hi = cpu_to_le64(desc.hi);
+ if (dma_memory_write(&address_space_memory, queue_tail, &desc, sizeof(desc),
+ MEMTXATTRS_UNSPECIFIED)) {
+ error_report_once("IO error, the PQ tail cannot be updated");
+ return -EIO;
+ }
+
+ /* increment the tail register and set the pending request bit */
+ vtd_set_quad(s, DMAR_PQT_REG, new_queue_tail_offset);
+ /*
+ * read status again so that the kernel does not miss a request.
+ * in some cases, we can trigger an unecessary interrupt but this strategy
+ * drastically improves performance as we don't need to take a lock.
+ */
+ old_pr_status = vtd_get_long(s, DMAR_PRS_REG);
+ if (!(old_pr_status & VTD_PR_STATUS_PPR)) {
+ vtd_set_clear_mask_long(s, DMAR_PRS_REG, 0, VTD_PR_STATUS_PPR);
+ vtd_generate_page_request_event(s, old_pr_status);
+ }
+
+ return 0;
+}
+
+static void vtd_init_iotlb_notifier(PCIBus *bus, void *opaque, int devfn,
+ IOMMUNotifier *n, IOMMUNotify fn,
+ void *user_opaque)
+{
+ n->opaque = user_opaque;
+ iommu_notifier_init(n, fn, IOMMU_NOTIFIER_DEVIOTLB_EVENTS, 0,
+ HWADDR_MAX, 0);
+}
+
+static void vtd_get_iotlb_info(void *opaque, uint8_t *addr_width,
+ uint32_t *min_page_size)
+{
+ IntelIOMMUState *s = opaque;
+
+ *addr_width = s->aw_bits;
+ *min_page_size = VTD_PAGE_SIZE;
+}
+
+static void vtd_register_iotlb_notifier(PCIBus *bus, void *opaque,
+ int devfn, uint32_t pasid,
+ IOMMUNotifier *n)
+{
+ IntelIOMMUState *s = opaque;
+ VTDAddressSpace *vtd_as;
+
+ vtd_as = vtd_find_add_as(s, bus, devfn, pasid);
+ memory_region_register_iommu_notifier(MEMORY_REGION(&vtd_as->iommu), n,
+ &error_fatal);
+}
+
+static void vtd_unregister_iotlb_notifier(PCIBus *bus, void *opaque,
+ int devfn, uint32_t pasid,
+ IOMMUNotifier *n)
+{
+ IntelIOMMUState *s = opaque;
+ VTDAddressSpace *vtd_as;
+
+ vtd_as = vtd_find_add_as(s, bus, devfn, pasid);
+ memory_region_unregister_iommu_notifier(MEMORY_REGION(&vtd_as->iommu), n);
+}
+
+static void vtd_pri_register_notifier(PCIBus *bus, void *opaque, int devfn,
+ uint32_t pasid, IOMMUPRINotifier *notifier)
+{
+ IntelIOMMUState *s = opaque;
+ VTDAddressSpace *vtd_as;
+
+ vtd_as = vtd_find_add_as(s, bus, devfn, pasid);
+ vtd_as->pri_notifier = notifier;
+}
+
+static void vtd_pri_unregister_notifier(PCIBus *bus, void *opaque,
+ int devfn, uint32_t pasid)
+{
+ IntelIOMMUState *s = opaque;
+ VTDAddressSpace *vtd_as;
+
+ vtd_as = vtd_find_add_as(s, bus, devfn, pasid);
+ vtd_as->pri_notifier = NULL;
+}
+
static PCIIOMMUOps vtd_iommu_ops = {
.get_address_space = vtd_host_dma_iommu,
.set_iommu_device = vtd_dev_set_iommu_device,
.unset_iommu_device = vtd_dev_unset_iommu_device,
+ .get_iotlb_info = vtd_get_iotlb_info,
+ .init_iotlb_notifier = vtd_init_iotlb_notifier,
+ .register_iotlb_notifier = vtd_register_iotlb_notifier,
+ .unregister_iotlb_notifier = vtd_unregister_iotlb_notifier,
+ .ats_request_translation = vtd_ats_request_translation,
+ .pri_register_notifier = vtd_pri_register_notifier,
+ .pri_unregister_notifier = vtd_pri_unregister_notifier,
+ .pri_request_page = vtd_pri_request_page,
};
static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
@@ -4791,32 +5243,6 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
return true;
}
-static int vtd_machine_done_notify_one(Object *child, void *unused)
-{
- IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
-
- /*
- * We hard-coded here because vfio-pci is the only special case
- * here. Let's be more elegant in the future when we can, but so
- * far there seems to be no better way.
- */
- if (object_dynamic_cast(child, "vfio-pci") && !iommu->caching_mode) {
- vtd_panic_require_caching_mode();
- }
-
- return 0;
-}
-
-static void vtd_machine_done_hook(Notifier *notifier, void *unused)
-{
- object_child_foreach_recursive(object_get_root(),
- vtd_machine_done_notify_one, NULL);
-}
-
-static Notifier vtd_machine_done_notify = {
- .notify = vtd_machine_done_hook,
-};
-
static void vtd_realize(DeviceState *dev, Error **errp)
{
MachineState *ms = MACHINE(qdev_get_machine());
@@ -4871,7 +5297,6 @@ static void vtd_realize(DeviceState *dev, Error **errp)
pci_setup_iommu(bus, &vtd_iommu_ops, dev);
/* Pseudo address space under root PCI bus. */
x86ms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
- qemu_add_machine_init_done_notifier(&vtd_machine_done_notify);
}
static void vtd_class_init(ObjectClass *klass, const void *data)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index e8b211e..0f6a123 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -190,8 +190,10 @@
#define VTD_ECAP_EIM (1ULL << 4)
#define VTD_ECAP_PT (1ULL << 6)
#define VTD_ECAP_SC (1ULL << 7)
+#define VTD_ECAP_PRS (1ULL << 29)
#define VTD_ECAP_MHMV (15ULL << 20)
#define VTD_ECAP_SRS (1ULL << 31)
+#define VTD_ECAP_PSS (7ULL << 35) /* limit: MemTxAttrs::pid */
#define VTD_ECAP_PASID (1ULL << 40)
#define VTD_ECAP_SMTS (1ULL << 43)
#define VTD_ECAP_SLTS (1ULL << 46)
@@ -213,6 +215,7 @@
#define VTD_CAP_DRAIN_WRITE (1ULL << 54)
#define VTD_CAP_DRAIN_READ (1ULL << 55)
#define VTD_CAP_FS1GP (1ULL << 56)
+#define VTD_CAP_ESRTPS (1ULL << 63)
#define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE)
#define VTD_CAP_CM (1ULL << 7)
#define VTD_PASID_ID_SHIFT 20
@@ -313,6 +316,8 @@ typedef enum VTDFaultReason {
* request while disabled */
VTD_FR_IR_SID_ERR = 0x26, /* Invalid Source-ID */
+ VTD_FR_SM_PRE_ABS = 0x47, /* SCT.8 : PRE bit in a present SM CE is 0 */
+
/* PASID directory entry access failure */
VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
/* The Present(P) field of pasid directory entry is 0 */
@@ -375,6 +380,18 @@ union VTDInvDesc {
};
typedef union VTDInvDesc VTDInvDesc;
+/* Page Request Descriptor */
+union VTDPRDesc {
+ struct {
+ uint64_t lo;
+ uint64_t hi;
+ };
+ struct {
+ uint64_t val[4];
+ };
+};
+typedef union VTDPRDesc VTDPRDesc;
+
/* Masks for struct VTDInvDesc */
#define VTD_INV_DESC_ALL_ONE -1ULL
#define VTD_INV_DESC_TYPE(val) ((((val) >> 5) & 0x70ULL) | \
@@ -388,6 +405,7 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_PIOTLB 0x6 /* PASID-IOTLB Invalidate Desc */
#define VTD_INV_DESC_PC 0x7 /* PASID-cache Invalidate Desc */
#define VTD_INV_DESC_DEV_PIOTLB 0x8 /* PASID-based-DIOTLB inv_desc*/
+#define VTD_INV_DESC_PGRESP 0x9 /* Page Group Response Desc */
#define VTD_INV_DESC_NONE 0 /* Not an Invalidate Descriptor */
/* Masks for Invalidation Wait Descriptor*/
@@ -439,6 +457,15 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL0 0xfff000000000f000ULL
#define VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL1 0x7feULL
+/* Mask for Page Group Response Descriptor */
+#define VTD_INV_DESC_PGRESP_RSVD_HI 0xfffffffffffff003ULL
+#define VTD_INV_DESC_PGRESP_RSVD_LO 0xfff00000000001e0ULL
+#define VTD_INV_DESC_PGRESP_PP(val) (((val) >> 4) & 0x1ULL)
+#define VTD_INV_DESC_PGRESP_RC(val) (((val) >> 12) & 0xfULL)
+#define VTD_INV_DESC_PGRESP_RID(val) (((val) >> 16) & 0xffffULL)
+#define VTD_INV_DESC_PGRESP_PASID(val) (((val) >> 32) & 0xfffffULL)
+#define VTD_INV_DESC_PGRESP_PRGI(val) (((val) >> 3) & 0x1ffULL)
+
/* Rsvd field masks for spte */
#define VTD_SPTE_SNP 0x800ULL
@@ -490,6 +517,31 @@ typedef union VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff000000000f1c0ULL
#define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL
+/* Page Request Descriptor */
+/* For the low 64-bit of 128-bit */
+#define VTD_PRD_TYPE (1ULL)
+#define VTD_PRD_PP(val) (((val) & 1ULL) << 8)
+#define VTD_PRD_RID(val) (((val) & 0xffffULL) << 16)
+#define VTD_PRD_PASID(val) (((val) & 0xfffffULL) << 32)
+#define VTD_PRD_EXR(val) (((val) & 1ULL) << 52)
+#define VTD_PRD_PMR(val) (((val) & 1ULL) << 53)
+/* For the high 64-bit of 128-bit */
+#define VTD_PRD_RDR(val) ((val) & 1ULL)
+#define VTD_PRD_WRR(val) (((val) & 1ULL) << 1)
+#define VTD_PRD_LPIG(val) (((val) & 1ULL) << 2)
+#define VTD_PRD_PRGI(val) (((val) & 0x1ffULL) << 3)
+#define VTD_PRD_ADDR(val) ((val) & 0xfffffffffffff000ULL)
+
+/* Page Request Queue constants */
+#define VTD_PQA_ENTRY_SIZE 32 /* Size of an entry in bytes */
+/* Page Request Queue masks */
+#define VTD_PQA_ADDR 0xfffffffffffff000ULL /* PR queue address */
+#define VTD_PQA_SIZE 0x7ULL /* PR queue size */
+#define VTD_PR_STATUS_PPR 1UL /* Pending page request */
+#define VTD_PR_STATUS_PRO 2UL /* Page request overflow */
+#define VTD_PR_PECTL_IP 0x40000000UL /* PR control interrup pending */
+#define VTD_PR_PECTL_IM 0x80000000UL /* PR control interrup mask */
+
/* Information about page-selective IOTLB invalidate */
struct VTDIOTLBPageInvInfo {
uint16_t domain_id;
@@ -549,6 +601,7 @@ typedef struct VTDRootEntry VTDRootEntry;
#define VTD_SM_CONTEXT_ENTRY_RID2PASID_MASK 0xfffff
#define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL | ~VTD_HAW_MASK(aw))
#define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL
+#define VTD_SM_CONTEXT_ENTRY_PRE 0x10ULL
/* PASID Table Related Definitions */
#define VTD_PASID_DIR_BASE_ADDR_MASK (~0xfffULL)
diff --git a/hw/i386/isapc.c b/hw/i386/isapc.c
new file mode 100644
index 0000000..44f4a44
--- /dev/null
+++ b/hw/i386/isapc.c
@@ -0,0 +1,189 @@
+/*
+ * QEMU PC System Emulator
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "qemu/osdep.h"
+
+#include "qemu/units.h"
+#include "qemu/error-report.h"
+#include "hw/char/parallel-isa.h"
+#include "hw/dma/i8257.h"
+#include "hw/i386/pc.h"
+#include "hw/ide/isa.h"
+#include "hw/ide/ide-bus.h"
+#include "system/kvm.h"
+#include "hw/i386/kvm/clock.h"
+#include "hw/xen/xen-x86.h"
+#include "system/xen.h"
+#include "hw/rtc/mc146818rtc.h"
+#include "target/i386/cpu.h"
+
+static const int ide_iobase[MAX_IDE_BUS] = { 0x1f0, 0x170 };
+static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 };
+static const int ide_irq[MAX_IDE_BUS] = { 14, 15 };
+
+
+static void pc_init_isa(MachineState *machine)
+{
+ PCMachineState *pcms = PC_MACHINE(machine);
+ PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
+ X86MachineState *x86ms = X86_MACHINE(machine);
+ MemoryRegion *system_memory = get_system_memory();
+ MemoryRegion *system_io = get_system_io();
+ ISABus *isa_bus;
+ uint32_t irq;
+ GSIState *gsi_state;
+ MemoryRegion *ram_memory;
+ DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
+ int i;
+
+ /*
+ * There is a small chance that someone unintentionally passes "-cpu max"
+ * for the isapc machine, which will provide a much more modern 32-bit
+ * CPU than would be expected for an ISA-era PC. If the "max" cpu type has
+ * been specified, choose the "best" 32-bit cpu possible which we consider
+ * be the pentium3 (deliberately choosing an Intel CPU given that the
+ * default 486 CPU for the isapc machine is also an Intel CPU).
+ */
+ if (!strcmp(machine->cpu_type, X86_CPU_TYPE_NAME("max"))) {
+ machine->cpu_type = X86_CPU_TYPE_NAME("pentium3");
+ warn_report("-cpu max is invalid for isapc machine, using pentium3");
+ }
+
+ /*
+ * Similarly if someone unintentionally passes "-cpu host" for the isapc
+ * machine then display a warning and also switch to the "best" 32-bit
+ * cpu possible which we consider to be the pentium3. This is because any
+ * host CPU will already be modern than this, but it also ensures any
+ * newer CPU flags/features are filtered out for older guests.
+ */
+ if (!strcmp(machine->cpu_type, X86_CPU_TYPE_NAME("host"))) {
+ machine->cpu_type = X86_CPU_TYPE_NAME("pentium3");
+ warn_report("-cpu host is invalid for isapc machine, using pentium3");
+ }
+
+ if (machine->ram_size > 3.5 * GiB) {
+ error_report("Too much memory for this machine: %" PRId64 " MiB, "
+ "maximum 3584 MiB", machine->ram_size / MiB);
+ exit(1);
+ }
+
+ /*
+ * There is no RAM split for the isapc machine
+ */
+ if (xen_enabled()) {
+ xen_hvm_init_pc(pcms, &ram_memory);
+ } else {
+ ram_memory = machine->ram;
+
+ pcms->max_ram_below_4g = 3.5 * GiB;
+ x86ms->above_4g_mem_size = 0;
+ x86ms->below_4g_mem_size = machine->ram_size;
+ }
+
+ x86_cpus_init(x86ms, pcmc->default_cpu_version);
+
+ if (kvm_enabled()) {
+ kvmclock_create(pcmc->kvmclock_create_always);
+ }
+
+ /* allocate ram and load rom/bios */
+ if (!xen_enabled()) {
+ pc_memory_init(pcms, system_memory, system_memory, 0);
+ } else {
+ assert(machine->ram_size == x86ms->below_4g_mem_size +
+ x86ms->above_4g_mem_size);
+
+ if (machine->kernel_filename != NULL) {
+ /* For xen HVM direct kernel boot, load linux here */
+ xen_load_linux(pcms);
+ }
+ }
+
+ gsi_state = pc_gsi_create(&x86ms->gsi, false);
+
+ isa_bus = isa_bus_new(NULL, system_memory, system_io,
+ &error_abort);
+ isa_bus_register_input_irqs(isa_bus, x86ms->gsi);
+
+ x86ms->rtc = isa_new(TYPE_MC146818_RTC);
+ qdev_prop_set_int32(DEVICE(x86ms->rtc), "base_year", 2000);
+ isa_realize_and_unref(x86ms->rtc, isa_bus, &error_fatal);
+ irq = object_property_get_uint(OBJECT(x86ms->rtc), "irq",
+ &error_fatal);
+ isa_connect_gpio_out(ISA_DEVICE(x86ms->rtc), 0, irq);
+
+ i8257_dma_init(OBJECT(machine), isa_bus, 0);
+ pcms->hpet_enabled = false;
+
+ if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) {
+ pc_i8259_create(isa_bus, gsi_state->i8259_irq);
+ }
+
+ if (tcg_enabled()) {
+ x86_register_ferr_irq(x86ms->gsi[13]);
+ }
+
+ pc_vga_init(isa_bus, NULL);
+
+ /* init basic PC hardware */
+ pc_basic_device_init(pcms, isa_bus, x86ms->gsi, x86ms->rtc,
+ !MACHINE_CLASS(pcmc)->no_floppy, 0x4);
+
+ pc_nic_init(pcmc, isa_bus, NULL);
+
+ ide_drive_get(hd, ARRAY_SIZE(hd));
+ for (i = 0; i < MAX_IDE_BUS; i++) {
+ ISADevice *dev;
+ char busname[] = "ide.0";
+ dev = isa_ide_init(isa_bus, ide_iobase[i], ide_iobase2[i],
+ ide_irq[i],
+ hd[MAX_IDE_DEVS * i], hd[MAX_IDE_DEVS * i + 1]);
+ /*
+ * The ide bus name is ide.0 for the first bus and ide.1 for the
+ * second one.
+ */
+ busname[4] = '0' + i;
+ pcms->idebus[i] = qdev_get_child_bus(DEVICE(dev), busname);
+ }
+}
+
+static void isapc_machine_options(MachineClass *m)
+{
+ static const char * const valid_cpu_types[] = {
+ X86_CPU_TYPE_NAME("486"),
+ X86_CPU_TYPE_NAME("athlon"),
+ X86_CPU_TYPE_NAME("kvm32"),
+ X86_CPU_TYPE_NAME("pentium"),
+ X86_CPU_TYPE_NAME("pentium2"),
+ X86_CPU_TYPE_NAME("pentium3"),
+ X86_CPU_TYPE_NAME("qemu32"),
+ X86_CPU_TYPE_NAME("max"),
+ X86_CPU_TYPE_NAME("host"),
+ NULL
+ };
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+
+ m->desc = "ISA-only PC";
+ m->max_cpus = 1;
+ m->option_rom_has_mr = true;
+ m->rom_file_has_mr = false;
+ pcmc->pci_enabled = false;
+ pcmc->has_acpi_build = false;
+ pcmc->smbios_defaults = false;
+ pcmc->gigabyte_align = false;
+ pcmc->smbios_legacy_mode = true;
+ pcmc->has_reserved_memory = false;
+ m->default_nic = "ne2k_isa";
+ m->default_cpu_type = X86_CPU_TYPE_NAME("486");
+ m->valid_cpu_types = valid_cpu_types;
+ m->no_floppy = !module_object_class_by_name(TYPE_ISA_FDC);
+ m->no_parallel = !module_object_class_by_name(TYPE_ISA_PARALLEL);
+}
+
+DEFINE_PC_MACHINE(isapc, "isapc", pc_init_isa,
+ isapc_machine_options);
diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c
index 39035db..1be9bfe 100644
--- a/hw/i386/kvm/apic.c
+++ b/hw/i386/kvm/apic.c
@@ -17,6 +17,7 @@
#include "system/hw_accel.h"
#include "system/kvm.h"
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
int reg_id, uint32_t val)
@@ -141,6 +142,10 @@ static void kvm_apic_put(CPUState *cs, run_on_cpu_data data)
struct kvm_lapic_state kapic;
int ret;
+ if (is_tdx_vm()) {
+ return;
+ }
+
kvm_put_apicbase(s->cpu, s->apicbase);
kvm_put_apic_state(s, &kapic);
diff --git a/hw/i386/kvm/xen-stubs.c b/hw/i386/kvm/xen-stubs.c
index d03131e..ce73119 100644
--- a/hw/i386/kvm/xen-stubs.c
+++ b/hw/i386/kvm/xen-stubs.c
@@ -12,7 +12,6 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
-#include "qapi/qapi-commands-misc-target.h"
#include "xen_evtchn.h"
#include "xen_primary_console.h"
@@ -38,15 +37,3 @@ void xen_primary_console_create(void)
void xen_primary_console_set_be_port(uint16_t port)
{
}
-#ifdef TARGET_I386
-EvtchnInfoList *qmp_xen_event_list(Error **errp)
-{
- error_setg(errp, "Xen event channel emulation not enabled");
- return NULL;
-}
-
-void qmp_xen_event_inject(uint32_t port, Error **errp)
-{
- error_setg(errp, "Xen event channel emulation not enabled");
-}
-#endif
diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index b519054..dd566c4 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -19,7 +19,7 @@
#include "monitor/monitor.h"
#include "monitor/hmp.h"
#include "qapi/error.h"
-#include "qapi/qapi-commands-misc-target.h"
+#include "qapi/qapi-commands-misc-i386.h"
#include "qobject/qdict.h"
#include "qom/object.h"
#include "exec/target_page.h"
diff --git a/hw/i386/meson.build b/hw/i386/meson.build
index 10bdfde..436b3ce 100644
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -14,6 +14,7 @@ i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'),
i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'),
if_false: files('amd_iommu-stub.c'))
i386_ss.add(when: 'CONFIG_I440FX', if_true: files('pc_piix.c'))
+i386_ss.add(when: 'CONFIG_ISAPC', if_true: files('isapc.c'))
i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('x86-common.c', 'microvm.c', 'acpi-microvm.c', 'microvm-dt.c'))
i386_ss.add(when: 'CONFIG_NITRO_ENCLAVE', if_true: files('nitro_enclave.c'))
i386_ss.add(when: 'CONFIG_Q35', if_true: files('pc_q35.c'))
@@ -32,6 +33,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files(
'port92.c'))
i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'),
if_false: files('pc_sysfw_ovmf-stubs.c'))
+i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c', 'tdvf-hob.c'))
subdir('kvm')
subdir('xen')
diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index e0daf0d..94d22a2 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -49,6 +49,7 @@
#include "hw/acpi/generic_event_device.h"
#include "hw/pci-host/gpex.h"
#include "hw/usb/xhci.h"
+#include "hw/vfio/types.h"
#include "elf.h"
#include "kvm/kvm_i386.h"
@@ -633,6 +634,8 @@ GlobalProperty microvm_properties[] = {
* so reserving io space is not going to work. Turn it off.
*/
{ "pcie-root-port", "io-reserve", "0" },
+ { TYPE_RAMFB_DEVICE, "use-legacy-x86-rom", "true" },
+ { TYPE_VFIO_PCI_NOHOTPLUG, "use-legacy-x86-rom", "true" },
};
static void microvm_class_init(ObjectClass *oc, const void *data)
diff --git a/hw/i386/monitor.c b/hw/i386/monitor.c
index 1921e4d..79df965 100644
--- a/hw/i386/monitor.c
+++ b/hw/i386/monitor.c
@@ -26,7 +26,7 @@
#include "monitor/monitor.h"
#include "qobject/qdict.h"
#include "qapi/error.h"
-#include "qapi/qapi-commands-misc-target.h"
+#include "qapi/qapi-commands-misc-i386.h"
#include "hw/i386/x86.h"
#include "hw/rtc/mc146818rtc.h"
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 7065615..4d6bcbb 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -44,6 +44,7 @@
#include "system/xen.h"
#include "system/reset.h"
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
#include "hw/xen/xen.h"
#include "qobject/qlist.h"
#include "qemu/error-report.h"
@@ -80,7 +81,15 @@
{ "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\
{ "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },
-GlobalProperty pc_compat_10_0[] = {};
+GlobalProperty pc_compat_10_1[] = {};
+const size_t pc_compat_10_1_len = G_N_ELEMENTS(pc_compat_10_1);
+
+GlobalProperty pc_compat_10_0[] = {
+ { TYPE_X86_CPU, "x-consistent-cache", "false" },
+ { TYPE_X86_CPU, "x-vendor-cpuid-only-v2", "false" },
+ { TYPE_X86_CPU, "x-arch-cap-always-on", "true" },
+ { TYPE_X86_CPU, "x-pdcm-on-even-without-pmu", "true" },
+};
const size_t pc_compat_10_0_len = G_N_ELEMENTS(pc_compat_10_0);
GlobalProperty pc_compat_9_2[] = {};
@@ -259,28 +268,6 @@ GlobalProperty pc_compat_2_6[] = {
};
const size_t pc_compat_2_6_len = G_N_ELEMENTS(pc_compat_2_6);
-GlobalProperty pc_compat_2_5[] = {};
-const size_t pc_compat_2_5_len = G_N_ELEMENTS(pc_compat_2_5);
-
-GlobalProperty pc_compat_2_4[] = {
- PC_CPU_MODEL_IDS("2.4.0")
- { "Haswell-" TYPE_X86_CPU, "abm", "off" },
- { "Haswell-noTSX-" TYPE_X86_CPU, "abm", "off" },
- { "Broadwell-" TYPE_X86_CPU, "abm", "off" },
- { "Broadwell-noTSX-" TYPE_X86_CPU, "abm", "off" },
- { "host" "-" TYPE_X86_CPU, "host-cache-info", "on" },
- { TYPE_X86_CPU, "check", "off" },
- { "qemu64" "-" TYPE_X86_CPU, "sse4a", "on" },
- { "qemu64" "-" TYPE_X86_CPU, "abm", "on" },
- { "qemu64" "-" TYPE_X86_CPU, "popcnt", "on" },
- { "qemu32" "-" TYPE_X86_CPU, "popcnt", "on" },
- { "Opteron_G2" "-" TYPE_X86_CPU, "rdtscp", "on" },
- { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "on" },
- { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "on" },
- { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "on", }
-};
-const size_t pc_compat_2_4_len = G_N_ELEMENTS(pc_compat_2_4);
-
/*
* @PC_FW_DATA:
* Size of the chunk of memory at the top of RAM for the BIOS ACPI tables
@@ -630,7 +617,7 @@ void pc_machine_done(Notifier *notifier, void *data)
&error_fatal);
if (pcms->cxl_devices_state.is_enabled) {
- cxl_fmws_link_targets(&pcms->cxl_devices_state, &error_fatal);
+ cxl_fmws_link_targets(&error_fatal);
}
/* set the number of CPUs */
@@ -739,20 +726,28 @@ static uint64_t pc_get_cxl_range_start(PCMachineState *pcms)
return cxl_base;
}
-static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
+static int cxl_get_fmw_end(Object *obj, void *opaque)
{
- uint64_t start = pc_get_cxl_range_start(pcms) + MiB;
+ struct CXLFixedWindow *fw;
+ uint64_t *start = opaque;
- if (pcms->cxl_devices_state.fixed_windows) {
- GList *it;
-
- start = ROUND_UP(start, 256 * MiB);
- for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
- CXLFixedWindow *fw = it->data;
- start += fw->size;
- }
+ if (!object_dynamic_cast(obj, TYPE_CXL_FMW)) {
+ return 0;
}
+ fw = CXL_FMW(obj);
+ *start += fw->size;
+
+ return 0;
+}
+
+static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
+{
+ uint64_t start = pc_get_cxl_range_start(pcms) + MiB;
+
+ /* Ordering doesn't matter so no need to build a sorted list */
+ object_child_foreach_recursive(object_get_root(), cxl_get_fmw_end,
+ &start);
return start;
}
@@ -844,6 +839,7 @@ void pc_memory_init(PCMachineState *pcms,
hwaddr maxphysaddr, maxusedaddr;
hwaddr cxl_base, cxl_resv_end = 0;
X86CPU *cpu = X86_CPU(first_cpu);
+ uint64_t res_mem_end;
assert(machine->ram_size == x86ms->below_4g_mem_size +
x86ms->above_4g_mem_size);
@@ -954,60 +950,48 @@ void pc_memory_init(PCMachineState *pcms,
cxl_base = pc_get_cxl_range_start(pcms);
memory_region_init(mr, OBJECT(machine), "cxl_host_reg", cxl_size);
memory_region_add_subregion(system_memory, cxl_base, mr);
- cxl_resv_end = cxl_base + cxl_size;
- if (pcms->cxl_devices_state.fixed_windows) {
- hwaddr cxl_fmw_base;
- GList *it;
-
- cxl_fmw_base = ROUND_UP(cxl_base + cxl_size, 256 * MiB);
- for (it = pcms->cxl_devices_state.fixed_windows; it; it = it->next) {
- CXLFixedWindow *fw = it->data;
-
- fw->base = cxl_fmw_base;
- memory_region_init_io(&fw->mr, OBJECT(machine), &cfmws_ops, fw,
- "cxl-fixed-memory-region", fw->size);
- memory_region_add_subregion(system_memory, fw->base, &fw->mr);
- cxl_fmw_base += fw->size;
- cxl_resv_end = cxl_fmw_base;
- }
- }
+ cxl_base = ROUND_UP(cxl_base + cxl_size, 256 * MiB);
+ cxl_resv_end = cxl_fmws_set_memmap(cxl_base, maxphysaddr);
+ cxl_fmws_update_mmio();
}
/* Initialize PC system firmware */
pc_system_firmware_init(pcms, rom_memory);
- option_rom_mr = g_malloc(sizeof(*option_rom_mr));
- if (machine_require_guest_memfd(machine)) {
- memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
- PC_ROM_SIZE, &error_fatal);
- } else {
- memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
- &error_fatal);
- if (pcmc->pci_enabled) {
- memory_region_set_readonly(option_rom_mr, true);
+ if (!is_tdx_vm()) {
+ option_rom_mr = g_malloc(sizeof(*option_rom_mr));
+ if (machine_require_guest_memfd(machine)) {
+ memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
+ PC_ROM_SIZE, &error_fatal);
+ } else {
+ memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
+ &error_fatal);
+ if (pcmc->pci_enabled) {
+ memory_region_set_readonly(option_rom_mr, true);
+ }
}
+ memory_region_add_subregion_overlap(rom_memory,
+ PC_ROM_MIN_VGA,
+ option_rom_mr,
+ 1);
}
- memory_region_add_subregion_overlap(rom_memory,
- PC_ROM_MIN_VGA,
- option_rom_mr,
- 1);
fw_cfg = fw_cfg_arch_create(machine,
x86ms->boot_cpus, x86ms->apic_id_limit);
rom_set_fw(fw_cfg);
- if (machine->device_memory) {
- uint64_t *val = g_malloc(sizeof(*val));
- uint64_t res_mem_end = machine->device_memory->base;
-
- if (!pcmc->broken_reserved_end) {
- res_mem_end += memory_region_size(&machine->device_memory->mr);
- }
+ if (pcms->cxl_devices_state.is_enabled) {
+ res_mem_end = cxl_resv_end;
+ } else if (machine->device_memory) {
+ res_mem_end = machine->device_memory->base
+ + memory_region_size(&machine->device_memory->mr);
+ } else {
+ res_mem_end = 0;
+ }
- if (pcms->cxl_devices_state.is_enabled) {
- res_mem_end = cxl_resv_end;
- }
+ if (res_mem_end) {
+ uint64_t *val = g_malloc(sizeof(*val));
*val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB));
fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
}
@@ -1044,9 +1028,7 @@ uint64_t pc_pci_hole64_start(void)
hole64_start = pc_get_cxl_range_end(pcms);
} else if (pcmc->has_reserved_memory && (ms->ram_size < ms->maxram_size)) {
pc_get_device_memory_range(pcms, &hole64_start, &size);
- if (!pcmc->broken_reserved_end) {
- hole64_start += size;
- }
+ hole64_start += size;
} else {
hole64_start = pc_above_4g_end(pcms);
}
@@ -1058,7 +1040,6 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
{
DeviceState *dev = NULL;
- rom_set_order_override(FW_CFG_ORDER_OVERRIDE_VGA);
if (pci_bus) {
PCIDevice *pcidev = pci_vga_init(pci_bus);
dev = pcidev ? &pcidev->qdev : NULL;
@@ -1066,7 +1047,7 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
ISADevice *isadev = isa_vga_init(isa_bus);
dev = isadev ? DEVICE(isadev) : NULL;
}
- rom_reset_order_override();
+
return dev;
}
@@ -1256,8 +1237,6 @@ void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus)
bool default_is_ne2k = g_str_equal(mc->default_nic, TYPE_ISA_NE2000);
NICInfo *nd;
- rom_set_order_override(FW_CFG_ORDER_OVERRIDE_NIC);
-
while ((nd = qemu_find_nic_info(TYPE_ISA_NE2000, default_is_ne2k, NULL))) {
pc_init_ne2k_isa(isa_bus, nd, &error_fatal);
}
@@ -1266,8 +1245,6 @@ void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus)
if (pci_bus) {
pci_init_nic_devices(pci_bus, mc->default_nic);
}
-
- rom_reset_order_override();
}
void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs)
@@ -1747,25 +1724,6 @@ static void pc_machine_wakeup(MachineState *machine)
cpu_synchronize_all_post_reset();
}
-static bool pc_hotplug_allowed(MachineState *ms, DeviceState *dev, Error **errp)
-{
- X86IOMMUState *iommu = x86_iommu_get_default();
- IntelIOMMUState *intel_iommu;
-
- if (iommu &&
- object_dynamic_cast((Object *)iommu, TYPE_INTEL_IOMMU_DEVICE) &&
- object_dynamic_cast((Object *)dev, "vfio-pci")) {
- intel_iommu = INTEL_IOMMU_DEVICE(iommu);
- if (!intel_iommu->caching_mode) {
- error_setg(errp, "Device assignment is not allowed without "
- "enabling caching-mode=on for Intel IOMMU.");
- return false;
- }
- }
-
- return true;
-}
-
static void pc_machine_class_init(ObjectClass *oc, const void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
@@ -1785,7 +1743,6 @@ static void pc_machine_class_init(ObjectClass *oc, const void *data)
x86mc->apic_xrupt_override = true;
assert(!mc->get_hotplug_handler);
mc->get_hotplug_handler = pc_get_hotplug_handler;
- mc->hotplug_allowed = pc_hotplug_allowed;
mc->auto_enable_numa_with_memhp = true;
mc->auto_enable_numa_with_memdev = true;
mc->has_hotpluggable_cpus = true;
@@ -1860,6 +1817,18 @@ static void pc_machine_class_init(ObjectClass *oc, const void *data)
object_class_property_add_bool(oc, "fd-bootchk",
pc_machine_get_fd_bootchk,
pc_machine_set_fd_bootchk);
+
+#if defined(CONFIG_IGVM)
+ object_class_property_add_link(oc, "igvm-cfg",
+ TYPE_IGVM_CFG,
+ offsetof(X86MachineState, igvm),
+ object_property_allow_set_link,
+ OBJ_PROP_LINK_STRONG);
+ object_class_property_set_description(oc, "igvm-cfg",
+ "Set IGVM configuration");
+#endif
+
+
}
static const TypeInfo pc_machine_info = {
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 0dce512..7b3611e 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -27,20 +27,16 @@
#include "qemu/units.h"
#include "hw/char/parallel-isa.h"
-#include "hw/dma/i8257.h"
-#include "hw/loader.h"
#include "hw/i386/x86.h"
#include "hw/i386/pc.h"
#include "hw/i386/apic.h"
#include "hw/pci-host/i440fx.h"
-#include "hw/rtc/mc146818rtc.h"
#include "hw/southbridge/piix.h"
#include "hw/display/ramfb.h"
#include "hw/pci/pci.h"
#include "hw/pci/pci_ids.h"
#include "hw/usb.h"
#include "net/net.h"
-#include "hw/ide/isa.h"
#include "hw/ide/pci.h"
#include "hw/irq.h"
#include "system/kvm.h"
@@ -49,6 +45,7 @@
#include "hw/i2c/smbus_eeprom.h"
#include "system/memory.h"
#include "hw/acpi/acpi.h"
+#include "hw/vfio/types.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "system/xen.h"
@@ -71,11 +68,12 @@
#define XEN_IOAPIC_NUM_PIRQS 128ULL
-#ifdef CONFIG_IDE_ISA
-static const int ide_iobase[MAX_IDE_BUS] = { 0x1f0, 0x170 };
-static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 };
-static const int ide_irq[MAX_IDE_BUS] = { 14, 15 };
-#endif
+static GlobalProperty pc_piix_compat_defaults[] = {
+ { TYPE_RAMFB_DEVICE, "use-legacy-x86-rom", "true" },
+ { TYPE_VFIO_PCI_NOHOTPLUG, "use-legacy-x86-rom", "true" },
+};
+static const size_t pc_piix_compat_defaults_len =
+ G_N_ELEMENTS(pc_piix_compat_defaults);
/*
* Return the global irq number corresponding to a given device irq
@@ -108,16 +106,20 @@ static void pc_init1(MachineState *machine, const char *pci_type)
X86MachineState *x86ms = X86_MACHINE(machine);
MemoryRegion *system_memory = get_system_memory();
MemoryRegion *system_io = get_system_io();
- Object *phb = NULL;
+ Object *phb;
ISABus *isa_bus;
Object *piix4_pm = NULL;
qemu_irq smi_irq;
GSIState *gsi_state;
MemoryRegion *ram_memory;
MemoryRegion *pci_memory = NULL;
- MemoryRegion *rom_memory = system_memory;
ram_addr_t lowmem;
uint64_t hole64_size = 0;
+ PCIDevice *pci_dev;
+ DeviceState *dev;
+ size_t i;
+
+ assert(pcmc->pci_enabled);
/*
* Calculate ram split, for memory below and above 4G. It's a bit
@@ -188,42 +190,39 @@ static void pc_init1(MachineState *machine, const char *pci_type)
kvmclock_create(pcmc->kvmclock_create_always);
}
- if (pcmc->pci_enabled) {
- pci_memory = g_new(MemoryRegion, 1);
- memory_region_init(pci_memory, NULL, "pci", UINT64_MAX);
- rom_memory = pci_memory;
-
- phb = OBJECT(qdev_new(TYPE_I440FX_PCI_HOST_BRIDGE));
- object_property_add_child(OBJECT(machine), "i440fx", phb);
- object_property_set_link(phb, PCI_HOST_PROP_RAM_MEM,
- OBJECT(ram_memory), &error_fatal);
- object_property_set_link(phb, PCI_HOST_PROP_PCI_MEM,
- OBJECT(pci_memory), &error_fatal);
- object_property_set_link(phb, PCI_HOST_PROP_SYSTEM_MEM,
- OBJECT(system_memory), &error_fatal);
- object_property_set_link(phb, PCI_HOST_PROP_IO_MEM,
- OBJECT(system_io), &error_fatal);
- object_property_set_uint(phb, PCI_HOST_BELOW_4G_MEM_SIZE,
- x86ms->below_4g_mem_size, &error_fatal);
- object_property_set_uint(phb, PCI_HOST_ABOVE_4G_MEM_SIZE,
- x86ms->above_4g_mem_size, &error_fatal);
- object_property_set_str(phb, I440FX_HOST_PROP_PCI_TYPE, pci_type,
- &error_fatal);
- sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal);
-
- pcms->pcibus = PCI_BUS(qdev_get_child_bus(DEVICE(phb), "pci.0"));
- pci_bus_map_irqs(pcms->pcibus,
- xen_enabled() ? xen_pci_slot_get_pirq
- : pc_pci_slot_get_pirq);
-
- hole64_size = object_property_get_uint(phb,
- PCI_HOST_PROP_PCI_HOLE64_SIZE,
- &error_abort);
- }
+ pci_memory = g_new(MemoryRegion, 1);
+ memory_region_init(pci_memory, NULL, "pci", UINT64_MAX);
+
+ phb = OBJECT(qdev_new(TYPE_I440FX_PCI_HOST_BRIDGE));
+ object_property_add_child(OBJECT(machine), "i440fx", phb);
+ object_property_set_link(phb, PCI_HOST_PROP_RAM_MEM,
+ OBJECT(ram_memory), &error_fatal);
+ object_property_set_link(phb, PCI_HOST_PROP_PCI_MEM,
+ OBJECT(pci_memory), &error_fatal);
+ object_property_set_link(phb, PCI_HOST_PROP_SYSTEM_MEM,
+ OBJECT(system_memory), &error_fatal);
+ object_property_set_link(phb, PCI_HOST_PROP_IO_MEM,
+ OBJECT(system_io), &error_fatal);
+ object_property_set_uint(phb, PCI_HOST_BELOW_4G_MEM_SIZE,
+ x86ms->below_4g_mem_size, &error_fatal);
+ object_property_set_uint(phb, PCI_HOST_ABOVE_4G_MEM_SIZE,
+ x86ms->above_4g_mem_size, &error_fatal);
+ object_property_set_str(phb, I440FX_HOST_PROP_PCI_TYPE, pci_type,
+ &error_fatal);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal);
+
+ pcms->pcibus = PCI_BUS(qdev_get_child_bus(DEVICE(phb), "pci.0"));
+ pci_bus_map_irqs(pcms->pcibus,
+ xen_enabled() ? xen_pci_slot_get_pirq
+ : pc_pci_slot_get_pirq);
+
+ hole64_size = object_property_get_uint(phb,
+ PCI_HOST_PROP_PCI_HOLE64_SIZE,
+ &error_abort);
/* allocate ram and load rom/bios */
if (!xen_enabled()) {
- pc_memory_init(pcms, system_memory, rom_memory, hole64_size);
+ pc_memory_init(pcms, system_memory, pci_memory, hole64_size);
} else {
assert(machine->ram_size == x86ms->below_4g_mem_size +
x86ms->above_4g_mem_size);
@@ -235,81 +234,63 @@ static void pc_init1(MachineState *machine, const char *pci_type)
}
}
- gsi_state = pc_gsi_create(&x86ms->gsi, pcmc->pci_enabled);
-
- if (pcmc->pci_enabled) {
- PCIDevice *pci_dev;
- DeviceState *dev;
- size_t i;
-
- pci_dev = pci_new_multifunction(-1, pcms->south_bridge);
- object_property_set_bool(OBJECT(pci_dev), "has-usb",
- machine_usb(machine), &error_abort);
- object_property_set_bool(OBJECT(pci_dev), "has-acpi",
- x86_machine_is_acpi_enabled(x86ms),
- &error_abort);
- object_property_set_bool(OBJECT(pci_dev), "has-pic", false,
- &error_abort);
- object_property_set_bool(OBJECT(pci_dev), "has-pit", false,
- &error_abort);
- qdev_prop_set_uint32(DEVICE(pci_dev), "smb_io_base", 0xb100);
- object_property_set_bool(OBJECT(pci_dev), "smm-enabled",
- x86_machine_is_smm_enabled(x86ms),
- &error_abort);
- dev = DEVICE(pci_dev);
- for (i = 0; i < ISA_NUM_IRQS; i++) {
- qdev_connect_gpio_out_named(dev, "isa-irqs", i, x86ms->gsi[i]);
- }
- pci_realize_and_unref(pci_dev, pcms->pcibus, &error_fatal);
-
- if (xen_enabled()) {
- pci_device_set_intx_routing_notifier(
- pci_dev, piix_intx_routing_notifier_xen);
-
- /*
- * Xen supports additional interrupt routes from the PCI devices to
- * the IOAPIC: the four pins of each PCI device on the bus are also
- * connected to the IOAPIC directly.
- * These additional routes can be discovered through ACPI.
- */
- pci_bus_irqs(pcms->pcibus, xen_intx_set_irq, pci_dev,
- XEN_IOAPIC_NUM_PIRQS);
- }
+ gsi_state = pc_gsi_create(&x86ms->gsi, true);
+
+ pci_dev = pci_new_multifunction(-1, pcms->south_bridge);
+ object_property_set_bool(OBJECT(pci_dev), "has-usb",
+ machine_usb(machine), &error_abort);
+ object_property_set_bool(OBJECT(pci_dev), "has-acpi",
+ x86_machine_is_acpi_enabled(x86ms),
+ &error_abort);
+ object_property_set_bool(OBJECT(pci_dev), "has-pic", false,
+ &error_abort);
+ object_property_set_bool(OBJECT(pci_dev), "has-pit", false,
+ &error_abort);
+ qdev_prop_set_uint32(DEVICE(pci_dev), "smb_io_base", 0xb100);
+ object_property_set_bool(OBJECT(pci_dev), "smm-enabled",
+ x86_machine_is_smm_enabled(x86ms),
+ &error_abort);
+ dev = DEVICE(pci_dev);
+ for (i = 0; i < ISA_NUM_IRQS; i++) {
+ qdev_connect_gpio_out_named(dev, "isa-irqs", i, x86ms->gsi[i]);
+ }
+ pci_realize_and_unref(pci_dev, pcms->pcibus, &error_fatal);
- isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(pci_dev), "isa.0"));
- x86ms->rtc = ISA_DEVICE(object_resolve_path_component(OBJECT(pci_dev),
- "rtc"));
- piix4_pm = object_resolve_path_component(OBJECT(pci_dev), "pm");
- dev = DEVICE(object_resolve_path_component(OBJECT(pci_dev), "ide"));
- pci_ide_create_devs(PCI_DEVICE(dev));
- pcms->idebus[0] = qdev_get_child_bus(dev, "ide.0");
- pcms->idebus[1] = qdev_get_child_bus(dev, "ide.1");
- } else {
- isa_bus = isa_bus_new(NULL, system_memory, system_io,
- &error_abort);
- isa_bus_register_input_irqs(isa_bus, x86ms->gsi);
+ if (xen_enabled()) {
+ pci_device_set_intx_routing_notifier(
+ pci_dev, piix_intx_routing_notifier_xen);
+
+ /*
+ * Xen supports additional interrupt routes from the PCI devices to
+ * the IOAPIC: the four pins of each PCI device on the bus are also
+ * connected to the IOAPIC directly.
+ * These additional routes can be discovered through ACPI.
+ */
+ pci_bus_irqs(pcms->pcibus, xen_intx_set_irq, pci_dev,
+ XEN_IOAPIC_NUM_PIRQS);
+ }
- x86ms->rtc = isa_new(TYPE_MC146818_RTC);
- qdev_prop_set_int32(DEVICE(x86ms->rtc), "base_year", 2000);
- isa_realize_and_unref(x86ms->rtc, isa_bus, &error_fatal);
+ isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(pci_dev), "isa.0"));
+ x86ms->rtc = ISA_DEVICE(object_resolve_path_component(OBJECT(pci_dev),
+ "rtc"));
+ piix4_pm = object_resolve_path_component(OBJECT(pci_dev), "pm");
+ dev = DEVICE(object_resolve_path_component(OBJECT(pci_dev), "ide"));
+ pci_ide_create_devs(PCI_DEVICE(dev));
+ pcms->idebus[0] = qdev_get_child_bus(dev, "ide.0");
+ pcms->idebus[1] = qdev_get_child_bus(dev, "ide.1");
- i8257_dma_init(OBJECT(machine), isa_bus, 0);
- pcms->hpet_enabled = false;
- }
if (x86ms->pic == ON_OFF_AUTO_ON || x86ms->pic == ON_OFF_AUTO_AUTO) {
pc_i8259_create(isa_bus, gsi_state->i8259_irq);
}
- if (phb) {
- ioapic_init_gsi(gsi_state, phb);
- }
+ ioapic_init_gsi(gsi_state, phb);
if (tcg_enabled()) {
x86_register_ferr_irq(x86ms->gsi[13]);
}
- pc_vga_init(isa_bus, pcmc->pci_enabled ? pcms->pcibus : NULL);
+ pc_vga_init(isa_bus, pcms->pcibus);
/* init basic PC hardware */
pc_basic_device_init(pcms, isa_bus, x86ms->gsi, x86ms->rtc,
@@ -317,28 +298,6 @@ static void pc_init1(MachineState *machine, const char *pci_type)
pc_nic_init(pcmc, isa_bus, pcms->pcibus);
-#ifdef CONFIG_IDE_ISA
- if (!pcmc->pci_enabled) {
- DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
- int i;
-
- ide_drive_get(hd, ARRAY_SIZE(hd));
- for (i = 0; i < MAX_IDE_BUS; i++) {
- ISADevice *dev;
- char busname[] = "ide.0";
- dev = isa_ide_init(isa_bus, ide_iobase[i], ide_iobase2[i],
- ide_irq[i],
- hd[MAX_IDE_DEVS * i], hd[MAX_IDE_DEVS * i + 1]);
- /*
- * The ide bus name is ide.0 for the first bus and ide.1 for the
- * second one.
- */
- busname[4] = '0' + i;
- pcms->idebus[i] = qdev_get_child_bus(DEVICE(dev), busname);
- }
- }
-#endif
-
if (piix4_pm) {
smi_irq = qemu_allocate_irq(pc_acpi_smi_interrupt, first_cpu, 0);
@@ -361,6 +320,16 @@ static void pc_init1(MachineState *machine, const char *pci_type)
x86_nvdimm_acpi_dsmio,
x86ms->fw_cfg, OBJECT(pcms));
}
+
+#if defined(CONFIG_IGVM)
+ /* Apply guest state from IGVM if supplied */
+ if (x86ms->igvm) {
+ if (IGVM_CFG_GET_CLASS(x86ms->igvm)
+ ->process(x86ms->igvm, machine->cgs, false, &error_fatal) < 0) {
+ g_assert_not_reached();
+ }
+ }
+#endif
}
typedef enum PCSouthBridgeOption {
@@ -410,22 +379,7 @@ static void pc_set_south_bridge(Object *obj, int value, Error **errp)
pcms->south_bridge = PCSouthBridgeOption_lookup.array[value];
}
-#ifdef CONFIG_ISAPC
-static void pc_init_isa(MachineState *machine)
-{
- pc_init1(machine, NULL);
-}
-#endif
-
#ifdef CONFIG_XEN
-static void pc_xen_hvm_init_pci(MachineState *machine)
-{
- const char *pci_type = xen_igd_gfx_pt_enabled() ?
- TYPE_IGD_PASSTHROUGH_I440FX_PCI_DEVICE : TYPE_I440FX_PCI_DEVICE;
-
- pc_init1(machine, pci_type);
-}
-
static void pc_xen_hvm_init(MachineState *machine)
{
PCMachineState *pcms = PC_MACHINE(machine);
@@ -435,7 +389,10 @@ static void pc_xen_hvm_init(MachineState *machine)
exit(1);
}
- pc_xen_hvm_init_pci(machine);
+ pc_init1(machine, xen_igd_gfx_pt_enabled()
+ ? TYPE_IGD_PASSTHROUGH_I440FX_PCI_DEVICE
+ : TYPE_I440FX_PCI_DEVICE);
+
xen_igd_reserve_slot(pcms->pcibus);
pci_create_simple(pcms->pcibus, -1, "xen-platform");
}
@@ -477,14 +434,26 @@ static void pc_i440fx_machine_options(MachineClass *m)
pc_set_south_bridge);
object_class_property_set_description(oc, "x-south-bridge",
"Use a different south bridge than PIIX3");
+ compat_props_add(m->compat_props,
+ pc_piix_compat_defaults, pc_piix_compat_defaults_len);
}
-static void pc_i440fx_machine_10_1_options(MachineClass *m)
+static void pc_i440fx_machine_10_2_options(MachineClass *m)
{
pc_i440fx_machine_options(m);
}
-DEFINE_I440FX_MACHINE_AS_LATEST(10, 1);
+DEFINE_I440FX_MACHINE_AS_LATEST(10, 2);
+
+static void pc_i440fx_machine_10_1_options(MachineClass *m)
+{
+ pc_i440fx_machine_10_2_options(m);
+ m->smbios_memory_device_size = 2047 * TiB;
+ compat_props_add(m->compat_props, hw_compat_10_1, hw_compat_10_1_len);
+ compat_props_add(m->compat_props, pc_compat_10_1, pc_compat_10_1_len);
+}
+
+DEFINE_I440FX_MACHINE(10, 1);
static void pc_i440fx_machine_10_0_options(MachineClass *m)
{
@@ -778,56 +747,6 @@ static void pc_i440fx_machine_2_6_options(MachineClass *m)
DEFINE_I440FX_MACHINE(2, 6);
-static void pc_i440fx_machine_2_5_options(MachineClass *m)
-{
- X86MachineClass *x86mc = X86_MACHINE_CLASS(m);
-
- pc_i440fx_machine_2_6_options(m);
- x86mc->save_tsc_khz = false;
- m->legacy_fw_cfg_order = 1;
- compat_props_add(m->compat_props, hw_compat_2_5, hw_compat_2_5_len);
- compat_props_add(m->compat_props, pc_compat_2_5, pc_compat_2_5_len);
-}
-
-DEFINE_I440FX_MACHINE(2, 5);
-
-static void pc_i440fx_machine_2_4_options(MachineClass *m)
-{
- PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
-
- pc_i440fx_machine_2_5_options(m);
- m->hw_version = "2.4.0";
- pcmc->broken_reserved_end = true;
- compat_props_add(m->compat_props, hw_compat_2_4, hw_compat_2_4_len);
- compat_props_add(m->compat_props, pc_compat_2_4, pc_compat_2_4_len);
-}
-
-DEFINE_I440FX_MACHINE(2, 4);
-
-#ifdef CONFIG_ISAPC
-static void isapc_machine_options(MachineClass *m)
-{
- PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
- m->desc = "ISA-only PC";
- m->max_cpus = 1;
- m->option_rom_has_mr = true;
- m->rom_file_has_mr = false;
- pcmc->pci_enabled = false;
- pcmc->has_acpi_build = false;
- pcmc->smbios_defaults = false;
- pcmc->gigabyte_align = false;
- pcmc->smbios_legacy_mode = true;
- pcmc->has_reserved_memory = false;
- m->default_nic = "ne2k_isa";
- m->default_cpu_type = X86_CPU_TYPE_NAME("486");
- m->no_floppy = !module_object_class_by_name(TYPE_ISA_FDC);
- m->no_parallel = !module_object_class_by_name(TYPE_ISA_PARALLEL);
-}
-
-DEFINE_PC_MACHINE(isapc, "isapc", pc_init_isa,
- isapc_machine_options);
-#endif
-
#ifdef CONFIG_XEN
static void xenfv_machine_4_2_options(MachineClass *m)
{
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index c538b3d..6015e63 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -45,6 +45,7 @@
#include "hw/i386/pc.h"
#include "hw/i386/amd_iommu.h"
#include "hw/i386/intel_iommu.h"
+#include "hw/vfio/types.h"
#include "hw/virtio/virtio-iommu.h"
#include "hw/display/ramfb.h"
#include "hw/ide/pci.h"
@@ -67,6 +68,8 @@
static GlobalProperty pc_q35_compat_defaults[] = {
{ TYPE_VIRTIO_IOMMU_PCI, "aw-bits", "39" },
+ { TYPE_RAMFB_DEVICE, "use-legacy-x86-rom", "true" },
+ { TYPE_VFIO_PCI_NOHOTPLUG, "use-legacy-x86-rom", "true" },
};
static const size_t pc_q35_compat_defaults_len =
G_N_ELEMENTS(pc_q35_compat_defaults);
@@ -325,6 +328,16 @@ static void pc_q35_init(MachineState *machine)
x86_nvdimm_acpi_dsmio,
x86ms->fw_cfg, OBJECT(pcms));
}
+
+#if defined(CONFIG_IGVM)
+ /* Apply guest state from IGVM if supplied */
+ if (x86ms->igvm) {
+ if (IGVM_CFG_GET_CLASS(x86ms->igvm)
+ ->process(x86ms->igvm, machine->cgs, false, &error_fatal) < 0) {
+ g_assert_not_reached();
+ }
+ }
+#endif
}
#define DEFINE_Q35_MACHINE(major, minor) \
@@ -361,12 +374,22 @@ static void pc_q35_machine_options(MachineClass *m)
pc_q35_compat_defaults, pc_q35_compat_defaults_len);
}
-static void pc_q35_machine_10_1_options(MachineClass *m)
+static void pc_q35_machine_10_2_options(MachineClass *m)
{
pc_q35_machine_options(m);
}
-DEFINE_Q35_MACHINE_AS_LATEST(10, 1);
+DEFINE_Q35_MACHINE_AS_LATEST(10, 2);
+
+static void pc_q35_machine_10_1_options(MachineClass *m)
+{
+ pc_q35_machine_10_2_options(m);
+ m->smbios_memory_device_size = 2047 * TiB;
+ compat_props_add(m->compat_props, hw_compat_10_1, hw_compat_10_1_len);
+ compat_props_add(m->compat_props, pc_compat_10_1, pc_compat_10_1_len);
+}
+
+DEFINE_Q35_MACHINE(10, 1);
static void pc_q35_machine_10_0_options(MachineClass *m)
{
@@ -672,29 +695,3 @@ static void pc_q35_machine_2_6_options(MachineClass *m)
}
DEFINE_Q35_MACHINE(2, 6);
-
-static void pc_q35_machine_2_5_options(MachineClass *m)
-{
- X86MachineClass *x86mc = X86_MACHINE_CLASS(m);
-
- pc_q35_machine_2_6_options(m);
- x86mc->save_tsc_khz = false;
- m->legacy_fw_cfg_order = 1;
- compat_props_add(m->compat_props, hw_compat_2_5, hw_compat_2_5_len);
- compat_props_add(m->compat_props, pc_compat_2_5, pc_compat_2_5_len);
-}
-
-DEFINE_Q35_MACHINE(2, 5);
-
-static void pc_q35_machine_2_4_options(MachineClass *m)
-{
- PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
-
- pc_q35_machine_2_5_options(m);
- m->hw_version = "2.4.0";
- pcmc->broken_reserved_end = true;
- compat_props_add(m->compat_props, hw_compat_2_4, hw_compat_2_4_len);
- compat_props_add(m->compat_props, pc_compat_2_4, pc_compat_2_4_len);
-}
-
-DEFINE_Q35_MACHINE(2, 4);
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index 1eeb58a..1a12b63 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -37,6 +37,7 @@
#include "hw/block/flash.h"
#include "system/kvm.h"
#include "target/i386/sev.h"
+#include "kvm/tdx.h"
#define FLASH_SECTOR_SIZE 4096
@@ -219,7 +220,13 @@ void pc_system_firmware_init(PCMachineState *pcms,
BlockBackend *pflash_blk[ARRAY_SIZE(pcms->flash)];
if (!pcmc->pci_enabled) {
- x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, true);
+ /*
+ * If an IGVM file is specified then the firmware must be provided
+ * in the IGVM file.
+ */
+ if (!X86_MACHINE(pcms)->igvm) {
+ x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, true);
+ }
return;
}
@@ -239,8 +246,13 @@ void pc_system_firmware_init(PCMachineState *pcms,
}
if (!pflash_blk[0]) {
- /* Machine property pflash0 not set, use ROM mode */
- x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, false);
+ /*
+ * Machine property pflash0 not set, use ROM mode unless using IGVM,
+ * in which case the firmware must be provided by the IGVM file.
+ */
+ if (!X86_MACHINE(pcms)->igvm) {
+ x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, false);
+ }
} else {
if (kvm_enabled() && !kvm_readonly_mem_enabled()) {
/*
@@ -256,6 +268,20 @@ void pc_system_firmware_init(PCMachineState *pcms,
}
pc_system_flash_cleanup_unused(pcms);
+
+ /*
+ * The user should not have specified any pflash devices when using IGVM
+ * to configure the guest.
+ */
+ if (X86_MACHINE(pcms)->igvm) {
+ for (i = 0; i < ARRAY_SIZE(pcms->flash); i++) {
+ if (pcms->flash[i]) {
+ error_report("pflash devices cannot be configured when "
+ "using IGVM");
+ exit(1);
+ }
+ }
+ }
}
void x86_firmware_configure(hwaddr gpa, void *ptr, int size)
@@ -280,5 +306,11 @@ void x86_firmware_configure(hwaddr gpa, void *ptr, int size)
}
sev_encrypt_flash(gpa, ptr, size, &error_fatal);
+ } else if (is_tdx_vm()) {
+ ret = tdx_parse_tdvf(ptr, size);
+ if (ret) {
+ error_report("failed to parse TDVF for TDX VM");
+ exit(1);
+ }
}
}
diff --git a/hw/i386/sgx-stub.c b/hw/i386/sgx-stub.c
index 38ff75e..d295e54 100644
--- a/hw/i386/sgx-stub.c
+++ b/hw/i386/sgx-stub.c
@@ -3,20 +3,20 @@
#include "monitor/hmp-target.h"
#include "hw/i386/pc.h"
#include "hw/i386/sgx-epc.h"
+#include "qapi/qapi-commands-misc-i386.h"
#include "qapi/error.h"
-#include "qapi/qapi-commands-misc-target.h"
void sgx_epc_build_srat(GArray *table_data)
{
}
-SGXInfo *qmp_query_sgx(Error **errp)
+SgxInfo *qmp_query_sgx(Error **errp)
{
error_setg(errp, "SGX support is not compiled in");
return NULL;
}
-SGXInfo *qmp_query_sgx_capabilities(Error **errp)
+SgxInfo *qmp_query_sgx_capabilities(Error **errp)
{
error_setg(errp, "SGX support is not compiled in");
return NULL;
diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c
index 5685c4f..e280154 100644
--- a/hw/i386/sgx.c
+++ b/hw/i386/sgx.c
@@ -19,7 +19,7 @@
#include "monitor/hmp-target.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
-#include "qapi/qapi-commands-misc-target.h"
+#include "qapi/qapi-commands-misc-i386.h"
#include "system/address-spaces.h"
#include "system/hw_accel.h"
#include "system/reset.h"
@@ -84,10 +84,10 @@ static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high)
((high & MAKE_64BIT_MASK(0, 20)) << 32);
}
-static SGXEPCSectionList *sgx_calc_host_epc_sections(void)
+static SgxEpcSectionList *sgx_calc_host_epc_sections(void)
{
- SGXEPCSectionList *head = NULL, **tail = &head;
- SGXEPCSection *section;
+ SgxEpcSectionList *head = NULL, **tail = &head;
+ SgxEpcSection *section;
uint32_t i, type;
uint32_t eax, ebx, ecx, edx;
uint32_t j = 0;
@@ -104,7 +104,7 @@ static SGXEPCSectionList *sgx_calc_host_epc_sections(void)
break;
}
- section = g_new0(SGXEPCSection, 1);
+ section = g_new0(SgxEpcSection, 1);
section->node = j++;
section->size = sgx_calc_section_metric(ecx, edx);
QAPI_LIST_APPEND(tail, section);
@@ -153,9 +153,9 @@ static void sgx_epc_reset(void *opaque)
}
}
-SGXInfo *qmp_query_sgx_capabilities(Error **errp)
+SgxInfo *qmp_query_sgx_capabilities(Error **errp)
{
- SGXInfo *info = NULL;
+ SgxInfo *info = NULL;
uint32_t eax, ebx, ecx, edx;
Error *local_err = NULL;
@@ -166,7 +166,7 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp)
return NULL;
}
- info = g_new0(SGXInfo, 1);
+ info = g_new0(SgxInfo, 1);
host_cpuid(0x7, 0, &eax, &ebx, &ecx, &edx);
info->sgx = ebx & (1U << 2) ? true : false;
@@ -183,17 +183,17 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp)
return info;
}
-static SGXEPCSectionList *sgx_get_epc_sections_list(void)
+static SgxEpcSectionList *sgx_get_epc_sections_list(void)
{
GSList *device_list = sgx_epc_get_device_list();
- SGXEPCSectionList *head = NULL, **tail = &head;
- SGXEPCSection *section;
+ SgxEpcSectionList *head = NULL, **tail = &head;
+ SgxEpcSection *section;
for (; device_list; device_list = device_list->next) {
DeviceState *dev = device_list->data;
Object *obj = OBJECT(dev);
- section = g_new0(SGXEPCSection, 1);
+ section = g_new0(SgxEpcSection, 1);
section->node = object_property_get_uint(obj, SGX_EPC_NUMA_NODE_PROP,
&error_abort);
section->size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP,
@@ -205,9 +205,9 @@ static SGXEPCSectionList *sgx_get_epc_sections_list(void)
return head;
}
-SGXInfo *qmp_query_sgx(Error **errp)
+SgxInfo *qmp_query_sgx(Error **errp)
{
- SGXInfo *info = NULL;
+ SgxInfo *info = NULL;
X86MachineState *x86ms;
PCMachineState *pcms =
(PCMachineState *)object_dynamic_cast(qdev_get_machine(),
@@ -223,7 +223,7 @@ SGXInfo *qmp_query_sgx(Error **errp)
return NULL;
}
- info = g_new0(SGXInfo, 1);
+ info = g_new0(SgxInfo, 1);
info->sgx = true;
info->sgx1 = true;
@@ -237,8 +237,8 @@ SGXInfo *qmp_query_sgx(Error **errp)
void hmp_info_sgx(Monitor *mon, const QDict *qdict)
{
Error *err = NULL;
- SGXEPCSectionList *section_list, *section;
- g_autoptr(SGXInfo) info = qmp_query_sgx(&err);
+ SgxEpcSectionList *section_list, *section;
+ g_autoptr(SgxInfo) info = qmp_query_sgx(&err);
uint64_t size = 0;
if (err) {
diff --git a/hw/i386/tdvf-hob.c b/hw/i386/tdvf-hob.c
new file mode 100644
index 0000000..782b3d1
--- /dev/null
+++ b/hw/i386/tdvf-hob.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2025 Intel Corporation
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ * <isaku.yamahata at intel.com>
+ * Xiaoyao Li <xiaoyao.li@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "standard-headers/uefi/uefi.h"
+#include "hw/pci/pcie_host.h"
+#include "tdvf-hob.h"
+
+typedef struct TdvfHob {
+ hwaddr hob_addr;
+ void *ptr;
+ int size;
+
+ /* working area */
+ void *current;
+ void *end;
+} TdvfHob;
+
+static uint64_t tdvf_current_guest_addr(const TdvfHob *hob)
+{
+ return hob->hob_addr + (hob->current - hob->ptr);
+}
+
+static void tdvf_align(TdvfHob *hob, size_t align)
+{
+ hob->current = QEMU_ALIGN_PTR_UP(hob->current, align);
+}
+
+static void *tdvf_get_area(TdvfHob *hob, uint64_t size)
+{
+ void *ret;
+
+ if (hob->current + size > hob->end) {
+ error_report("TD_HOB overrun, size = 0x%" PRIx64, size);
+ exit(1);
+ }
+
+ ret = hob->current;
+ hob->current += size;
+ tdvf_align(hob, 8);
+ return ret;
+}
+
+static void tdvf_hob_add_memory_resources(TdxGuest *tdx, TdvfHob *hob)
+{
+ EFI_HOB_RESOURCE_DESCRIPTOR *region;
+ EFI_RESOURCE_ATTRIBUTE_TYPE attr;
+ EFI_RESOURCE_TYPE resource_type;
+
+ TdxRamEntry *e;
+ int i;
+
+ for (i = 0; i < tdx->nr_ram_entries; i++) {
+ e = &tdx->ram_entries[i];
+
+ if (e->type == TDX_RAM_UNACCEPTED) {
+ resource_type = EFI_RESOURCE_MEMORY_UNACCEPTED;
+ attr = EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED;
+ } else if (e->type == TDX_RAM_ADDED) {
+ resource_type = EFI_RESOURCE_SYSTEM_MEMORY;
+ attr = EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE;
+ } else {
+ error_report("unknown TDX_RAM_ENTRY type %d", e->type);
+ exit(1);
+ }
+
+ region = tdvf_get_area(hob, sizeof(*region));
+ *region = (EFI_HOB_RESOURCE_DESCRIPTOR) {
+ .Header = {
+ .HobType = EFI_HOB_TYPE_RESOURCE_DESCRIPTOR,
+ .HobLength = cpu_to_le16(sizeof(*region)),
+ .Reserved = cpu_to_le32(0),
+ },
+ .Owner = EFI_HOB_OWNER_ZERO,
+ .ResourceType = cpu_to_le32(resource_type),
+ .ResourceAttribute = cpu_to_le32(attr),
+ .PhysicalStart = cpu_to_le64(e->address),
+ .ResourceLength = cpu_to_le64(e->length),
+ };
+ }
+}
+
+void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob)
+{
+ TdvfHob hob = {
+ .hob_addr = td_hob->address,
+ .size = td_hob->size,
+ .ptr = td_hob->mem_ptr,
+
+ .current = td_hob->mem_ptr,
+ .end = td_hob->mem_ptr + td_hob->size,
+ };
+
+ EFI_HOB_GENERIC_HEADER *last_hob;
+ EFI_HOB_HANDOFF_INFO_TABLE *hit;
+
+ /* Note, Efi{Free}Memory{Bottom,Top} are ignored, leave 'em zeroed. */
+ hit = tdvf_get_area(&hob, sizeof(*hit));
+ *hit = (EFI_HOB_HANDOFF_INFO_TABLE) {
+ .Header = {
+ .HobType = EFI_HOB_TYPE_HANDOFF,
+ .HobLength = cpu_to_le16(sizeof(*hit)),
+ .Reserved = cpu_to_le32(0),
+ },
+ .Version = cpu_to_le32(EFI_HOB_HANDOFF_TABLE_VERSION),
+ .BootMode = cpu_to_le32(0),
+ .EfiMemoryTop = cpu_to_le64(0),
+ .EfiMemoryBottom = cpu_to_le64(0),
+ .EfiFreeMemoryTop = cpu_to_le64(0),
+ .EfiFreeMemoryBottom = cpu_to_le64(0),
+ .EfiEndOfHobList = cpu_to_le64(0), /* initialized later */
+ };
+
+ tdvf_hob_add_memory_resources(tdx, &hob);
+
+ last_hob = tdvf_get_area(&hob, sizeof(*last_hob));
+ *last_hob = (EFI_HOB_GENERIC_HEADER) {
+ .HobType = EFI_HOB_TYPE_END_OF_HOB_LIST,
+ .HobLength = cpu_to_le16(sizeof(*last_hob)),
+ .Reserved = cpu_to_le32(0),
+ };
+ hit->EfiEndOfHobList = tdvf_current_guest_addr(&hob);
+}
diff --git a/hw/i386/tdvf-hob.h b/hw/i386/tdvf-hob.h
new file mode 100644
index 0000000..4fc6a37
--- /dev/null
+++ b/hw/i386/tdvf-hob.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef HW_I386_TD_HOB_H
+#define HW_I386_TD_HOB_H
+
+#include "hw/i386/tdvf.h"
+#include "target/i386/kvm/tdx.h"
+
+void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob);
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE \
+ (EFI_RESOURCE_ATTRIBUTE_PRESENT | \
+ EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
+ EFI_RESOURCE_ATTRIBUTE_TESTED)
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED \
+ (EFI_RESOURCE_ATTRIBUTE_PRESENT | \
+ EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
+ EFI_RESOURCE_ATTRIBUTE_TESTED)
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_MMIO \
+ (EFI_RESOURCE_ATTRIBUTE_PRESENT | \
+ EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
+ EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE)
+
+#endif
diff --git a/hw/i386/tdvf.c b/hw/i386/tdvf.c
new file mode 100644
index 0000000..645d9d1
--- /dev/null
+++ b/hw/i386/tdvf.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2025 Intel Corporation
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ * <isaku.yamahata at intel.com>
+ * Xiaoyao Li <xiaoyao.li@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+
+#include "hw/i386/pc.h"
+#include "hw/i386/tdvf.h"
+#include "system/kvm.h"
+
+#define TDX_METADATA_OFFSET_GUID "e47a6535-984a-4798-865e-4685a7bf8ec2"
+#define TDX_METADATA_VERSION 1
+#define TDVF_SIGNATURE 0x46564454 /* TDVF as little endian */
+#define TDVF_ALIGNMENT 4096
+
+/*
+ * the raw structs read from TDVF keeps the name convention in
+ * TDVF Design Guide spec.
+ */
+typedef struct {
+ uint32_t DataOffset;
+ uint32_t RawDataSize;
+ uint64_t MemoryAddress;
+ uint64_t MemoryDataSize;
+ uint32_t Type;
+ uint32_t Attributes;
+} TdvfSectionEntry;
+
+typedef struct {
+ uint32_t Signature;
+ uint32_t Length;
+ uint32_t Version;
+ uint32_t NumberOfSectionEntries;
+ TdvfSectionEntry SectionEntries[];
+} TdvfMetadata;
+
+struct tdx_metadata_offset {
+ uint32_t offset;
+};
+
+static TdvfMetadata *tdvf_get_metadata(void *flash_ptr, int size)
+{
+ TdvfMetadata *metadata;
+ uint32_t offset = 0;
+ uint8_t *data;
+
+ if ((uint32_t) size != size) {
+ return NULL;
+ }
+
+ if (pc_system_ovmf_table_find(TDX_METADATA_OFFSET_GUID, &data, NULL)) {
+ offset = size - le32_to_cpu(((struct tdx_metadata_offset *)data)->offset);
+
+ if (offset + sizeof(*metadata) > size) {
+ return NULL;
+ }
+ } else {
+ error_report("Cannot find TDX_METADATA_OFFSET_GUID");
+ return NULL;
+ }
+
+ metadata = flash_ptr + offset;
+
+ /* Finally, verify the signature to determine if this is a TDVF image. */
+ metadata->Signature = le32_to_cpu(metadata->Signature);
+ if (metadata->Signature != TDVF_SIGNATURE) {
+ error_report("Invalid TDVF signature in metadata!");
+ return NULL;
+ }
+
+ /* Sanity check that the TDVF doesn't overlap its own metadata. */
+ metadata->Length = le32_to_cpu(metadata->Length);
+ if (offset + metadata->Length > size) {
+ return NULL;
+ }
+
+ /* Only version 1 is supported/defined. */
+ metadata->Version = le32_to_cpu(metadata->Version);
+ if (metadata->Version != TDX_METADATA_VERSION) {
+ return NULL;
+ }
+
+ return metadata;
+}
+
+static int tdvf_parse_and_check_section_entry(const TdvfSectionEntry *src,
+ TdxFirmwareEntry *entry)
+{
+ entry->data_offset = le32_to_cpu(src->DataOffset);
+ entry->data_len = le32_to_cpu(src->RawDataSize);
+ entry->address = le64_to_cpu(src->MemoryAddress);
+ entry->size = le64_to_cpu(src->MemoryDataSize);
+ entry->type = le32_to_cpu(src->Type);
+ entry->attributes = le32_to_cpu(src->Attributes);
+
+ /* sanity check */
+ if (entry->size < entry->data_len) {
+ error_report("Broken metadata RawDataSize 0x%x MemoryDataSize 0x%"PRIx64,
+ entry->data_len, entry->size);
+ return -1;
+ }
+ if (!QEMU_IS_ALIGNED(entry->address, TDVF_ALIGNMENT)) {
+ error_report("MemoryAddress 0x%"PRIx64" not page aligned", entry->address);
+ return -1;
+ }
+ if (!QEMU_IS_ALIGNED(entry->size, TDVF_ALIGNMENT)) {
+ error_report("MemoryDataSize 0x%"PRIx64" not page aligned", entry->size);
+ return -1;
+ }
+
+ switch (entry->type) {
+ case TDVF_SECTION_TYPE_BFV:
+ case TDVF_SECTION_TYPE_CFV:
+ /* The sections that must be copied from firmware image to TD memory */
+ if (entry->data_len == 0) {
+ error_report("%d section with RawDataSize == 0", entry->type);
+ return -1;
+ }
+ break;
+ case TDVF_SECTION_TYPE_TD_HOB:
+ case TDVF_SECTION_TYPE_TEMP_MEM:
+ /* The sections that no need to be copied from firmware image */
+ if (entry->data_len != 0) {
+ error_report("%d section with RawDataSize 0x%x != 0",
+ entry->type, entry->data_len);
+ return -1;
+ }
+ break;
+ default:
+ error_report("TDVF contains unsupported section type %d", entry->type);
+ return -1;
+ }
+
+ return 0;
+}
+
+int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size)
+{
+ g_autofree TdvfSectionEntry *sections = NULL;
+ TdvfMetadata *metadata;
+ ssize_t entries_size;
+ int i;
+
+ metadata = tdvf_get_metadata(flash_ptr, size);
+ if (!metadata) {
+ return -EINVAL;
+ }
+
+ /* load and parse metadata entries */
+ fw->nr_entries = le32_to_cpu(metadata->NumberOfSectionEntries);
+ if (fw->nr_entries < 2) {
+ error_report("Invalid number of fw entries (%u) in TDVF Metadata",
+ fw->nr_entries);
+ return -EINVAL;
+ }
+
+ entries_size = fw->nr_entries * sizeof(TdvfSectionEntry);
+ if (metadata->Length != sizeof(*metadata) + entries_size) {
+ error_report("TDVF metadata len (0x%x) mismatch, expected (0x%x)",
+ metadata->Length,
+ (uint32_t)(sizeof(*metadata) + entries_size));
+ return -EINVAL;
+ }
+
+ fw->entries = g_new(TdxFirmwareEntry, fw->nr_entries);
+ sections = g_new(TdvfSectionEntry, fw->nr_entries);
+
+ memcpy(sections, (void *)metadata + sizeof(*metadata), entries_size);
+
+ for (i = 0; i < fw->nr_entries; i++) {
+ if (tdvf_parse_and_check_section_entry(&sections[i], &fw->entries[i])) {
+ goto err;
+ }
+ }
+
+ fw->mem_ptr = flash_ptr;
+ return 0;
+
+err:
+ fw->entries = 0;
+ g_free(fw->entries);
+ return -EINVAL;
+}
diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c
index 1b0671c..7512be6 100644
--- a/hw/i386/x86-common.c
+++ b/hw/i386/x86-common.c
@@ -44,6 +44,7 @@
#include "standard-headers/asm-x86/bootparam.h"
#include CONFIG_DEVICES
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
#ifdef CONFIG_XEN_EMU
#include "hw/xen/xen.h"
@@ -951,7 +952,7 @@ void x86_load_linux(X86MachineState *x86ms,
* kernel on the other side of the fw_cfg interface matches the hash of the
* file the user passed in.
*/
- if (!sev_enabled() && protocol > 0) {
+ if (!MACHINE(x86ms)->cgs && protocol > 0) {
memcpy(setup, header, MIN(sizeof(header), setup_size));
}
@@ -1035,11 +1036,14 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
if (machine_require_guest_memfd(MACHINE(x86ms))) {
memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios",
bios_size, &error_fatal);
+ if (is_tdx_vm()) {
+ tdx_set_tdvf_region(&x86ms->bios);
+ }
} else {
memory_region_init_ram(&x86ms->bios, NULL, "pc.bios",
bios_size, &error_fatal);
}
- if (sev_enabled()) {
+ if (sev_enabled() || is_tdx_vm()) {
/*
* The concept of a "reset" simply doesn't exist for
* confidential computing guests, we have to destroy and
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
index d34a684..c127a44 100644
--- a/hw/i386/x86-iommu.c
+++ b/hw/i386/x86-iommu.c
@@ -130,6 +130,7 @@ static const Property x86_iommu_properties[] = {
intr_supported, ON_OFF_AUTO_AUTO),
DEFINE_PROP_BOOL("device-iotlb", X86IOMMUState, dt_supported, false),
DEFINE_PROP_BOOL("pt", X86IOMMUState, pt_supported, true),
+ DEFINE_PROP_BOOL("dma-translation", X86IOMMUState, dma_translation, true),
};
static void x86_iommu_class_init(ObjectClass *klass, const void *data)
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index e2d0409..f80533d 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -382,7 +382,6 @@ static void x86_machine_class_init(ObjectClass *oc, const void *data)
mc->get_default_cpu_node_id = x86_get_default_cpu_node_id;
mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids;
mc->kvm_type = x86_kvm_type;
- x86mc->save_tsc_khz = true;
x86mc->fwcfg_dma_enabled = true;
nc->nmi_monitor_handler = x86_nmi;
diff --git a/hw/ide/ich.c b/hw/ide/ich.c
index 4cade0d..b00987f 100644
--- a/hw/ide/ich.c
+++ b/hw/ide/ich.c
@@ -114,7 +114,8 @@ static void pci_ich9_ahci_init(Object *obj)
{
AHCIPCIState *d = ICH9_AHCI(obj);
- qemu_init_irq(&d->irq, pci_ich9_ahci_update_irq, d, 0);
+ qemu_init_irq_child(obj, "update-irq", &d->irq,
+ pci_ich9_ahci_update_irq, d, 0);
ahci_init(&d->ahci, DEVICE(obj));
d->ahci.irq = &d->irq;
}
diff --git a/hw/input/hid.c b/hw/input/hid.c
index 76bedc1..de24cd0 100644
--- a/hw/input/hid.c
+++ b/hw/input/hid.c
@@ -478,6 +478,7 @@ int hid_keyboard_write(HIDState *hs, uint8_t *buf, int len)
ledstate |= QEMU_CAPS_LOCK_LED;
}
kbd_put_ledstate(ledstate);
+ return 1;
}
return 0;
}
diff --git a/hw/input/virtio-input-host.c b/hw/input/virtio-input-host.c
index bbfee9d..9f62532 100644
--- a/hw/input/virtio-input-host.c
+++ b/hw/input/virtio-input-host.c
@@ -114,8 +114,7 @@ static void virtio_input_host_realize(DeviceState *dev, Error **errp)
error_setg_file_open(errp, errno, vih->evdev);
return;
}
- if (!g_unix_set_fd_nonblocking(vih->fd, true, NULL)) {
- error_setg_errno(errp, errno, "Failed to set FD nonblocking");
+ if (!qemu_set_blocking(vih->fd, false, errp)) {
goto err_close;
}
diff --git a/hw/intc/Kconfig b/hw/intc/Kconfig
index 7547528..9f456d7 100644
--- a/hw/intc/Kconfig
+++ b/hw/intc/Kconfig
@@ -109,3 +109,6 @@ config LOONGARCH_PCH_MSI
config LOONGARCH_EXTIOI
bool
+
+config LOONGARCH_DINTC
+ bool
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index bcb1035..c768033 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -27,6 +27,7 @@
#include "hw/pci/msi.h"
#include "qemu/host-utils.h"
#include "system/kvm.h"
+#include "system/mshv.h"
#include "trace.h"
#include "hw/i386/apic-msidef.h"
#include "qapi/error.h"
@@ -645,8 +646,6 @@ void apic_sipi(DeviceState *dev)
{
APICCommonState *s = APIC(dev);
- cpu_reset_interrupt(CPU(s->cpu), CPU_INTERRUPT_SIPI);
-
if (!s->wait_for_sipi)
return;
cpu_x86_load_seg_cache_sipi(s->cpu, s->sipi_vector);
@@ -932,6 +931,13 @@ static void apic_send_msi(MSIMessage *msi)
uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
/* XXX: Ignore redirection hint. */
+#ifdef CONFIG_MSHV
+ if (mshv_enabled()) {
+ mshv_request_interrupt(mshv_state, delivery, vector, dest,
+ dest_mode, trigger_mode);
+ return;
+ }
+#endif
apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
}
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 37a7a70..394fe02 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -379,6 +379,7 @@ static const VMStateDescription vmstate_apic_common = {
.pre_load = apic_pre_load,
.pre_save = apic_dispatch_pre_save,
.post_load = apic_dispatch_post_load,
+ .priority = MIG_PRI_APIC,
.fields = (const VMStateField[]) {
VMSTATE_UINT32(apicbase, APICCommonState),
VMSTATE_UINT8(id, APICCommonState),
diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c
index d18bef4..899f133 100644
--- a/hw/intc/arm_gic.c
+++ b/hw/intc/arm_gic.c
@@ -59,7 +59,7 @@ static const uint8_t gic_id_gicv2[] = {
static inline int gic_get_current_cpu(GICState *s)
{
if (!qtest_enabled() && s->num_cpu > 1) {
- return current_cpu->cpu_index;
+ return current_cpu->cpu_index - s->first_cpu_index;
}
return 0;
}
diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c
index 0f0c48d..ed5be05 100644
--- a/hw/intc/arm_gic_common.c
+++ b/hw/intc/arm_gic_common.c
@@ -350,6 +350,7 @@ static void arm_gic_common_linux_init(ARMLinuxBootIf *obj,
static const Property arm_gic_common_properties[] = {
DEFINE_PROP_UINT32("num-cpu", GICState, num_cpu, 1),
+ DEFINE_PROP_UINT32("first-cpu-index", GICState, first_cpu_index, 0),
DEFINE_PROP_UINT32("num-irq", GICState, num_irq, 32),
/* Revision can be 1 or 2 for GIC architecture specification
* versions 1 or 2, or 0 to indicate the legacy 11MPCore GIC.
diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 1cee681..2d0df6d 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -436,7 +436,7 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp)
s->cpu = g_new0(GICv3CPUState, s->num_cpu);
for (i = 0; i < s->num_cpu; i++) {
- CPUState *cpu = qemu_get_cpu(i);
+ CPUState *cpu = qemu_get_cpu(s->first_cpu_idx + i);
uint64_t cpu_affid;
s->cpu[i].cpu = cpu;
@@ -612,6 +612,7 @@ static const Property arm_gicv3_common_properties[] = {
DEFINE_PROP_BOOL("has-lpi", GICv3State, lpi_enable, 0),
DEFINE_PROP_BOOL("has-nmi", GICv3State, nmi_support, 0),
DEFINE_PROP_BOOL("has-security-extensions", GICv3State, security_extn, 0),
+ DEFINE_PROP_UINT32("maintenance-interrupt-id", GICv3State, maint_irq, 0),
/*
* Compatibility property: force 8 bits of physical priority, even
* if the CPU being emulated should have fewer.
@@ -621,6 +622,7 @@ static const Property arm_gicv3_common_properties[] = {
redist_region_count, qdev_prop_uint32, uint32_t),
DEFINE_PROP_LINK("sysmem", GICv3State, dma, TYPE_MEMORY_REGION,
MemoryRegion *),
+ DEFINE_PROP_UINT32("first-cpu-index", GICv3State, first_cpu_idx, 0),
};
static void arm_gicv3_common_class_init(ObjectClass *klass, const void *data)
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index 4b4cf09..2e6c1f7 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -3024,7 +3024,7 @@ void gicv3_init_cpuif(GICv3State *s)
int i;
for (i = 0; i < s->num_cpu; i++) {
- ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i));
+ ARMCPU *cpu = ARM_CPU(qemu_get_cpu(s->first_cpu_idx + i));
GICv3CPUState *cs = &s->cpu[i];
/*
@@ -3037,15 +3037,7 @@ void gicv3_init_cpuif(GICv3State *s)
* cpu->gic_pribits
*/
- /* Note that we can't just use the GICv3CPUState as an opaque pointer
- * in define_arm_cp_regs_with_opaque(), because when we're called back
- * it might be with code translated by CPU 0 but run by CPU 1, in
- * which case we'd get the wrong value.
- * So instead we define the regs with no ri->opaque info, and
- * get back to the GICv3CPUState from the CPUARMState.
- *
- * These CP regs callbacks can be called from either TCG or HVF code.
- */
+ /* These CP regs callbacks can be called from either TCG or HVF. */
define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
/*
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 3be3bf6..9829e21 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -22,6 +22,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "hw/intc/arm_gicv3_common.h"
+#include "hw/arm/virt.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
#include "system/kvm.h"
@@ -30,6 +31,7 @@
#include "gicv3_internal.h"
#include "vgic_common.h"
#include "migration/blocker.h"
+#include "migration/misc.h"
#include "qom/object.h"
#include "target/arm/cpregs.h"
@@ -294,7 +296,7 @@ static void kvm_dist_putbmp(GICv3State *s, uint32_t offset,
* the 1 bits.
*/
if (clroffset != 0) {
- reg = 0;
+ reg = ~0;
kvm_gicd_access(s, clroffset, &reg, true);
clroffset += 4;
}
@@ -386,8 +388,6 @@ static void kvm_arm_gicv3_put(GICv3State *s)
reg = c->level;
kvm_gic_line_level_access(s, 0, ncpu, &reg, true);
- reg = ~0;
- kvm_gicr_access(s, GICR_ICPENDR0, ncpu, &reg, true);
reg = c->gicr_ipendr0;
kvm_gicr_access(s, GICR_ISPENDR0, ncpu, &reg, true);
@@ -444,7 +444,7 @@ static void kvm_arm_gicv3_put(GICv3State *s)
kvm_gic_put_line_level_bmp(s, s->level);
/* s->pending bitmap -> GICD_ISPENDRn */
- kvm_dist_putbmp(s, GICD_ISPENDR, GICD_ICPENDR, s->pending);
+ kvm_dist_putbmp(s, GICD_ISPENDR, 0, s->pending);
/* s->active bitmap -> GICD_ISACTIVERn */
kvm_dist_putbmp(s, GICD_ISACTIVER, GICD_ICACTIVER, s->active);
@@ -777,6 +777,17 @@ static void vm_change_state_handler(void *opaque, bool running,
}
}
+static int kvm_arm_gicv3_notifier(NotifierWithReturn *notifier,
+ MigrationEvent *e, Error **errp)
+{
+ if (e->type == MIG_EVENT_PRECOPY_DONE) {
+ GICv3State *s = container_of(notifier, GICv3State, cpr_notifier);
+ return kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES,
+ NULL, true, errp);
+ }
+ return 0;
+}
static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
{
@@ -810,6 +821,12 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
return;
}
+ if (s->first_cpu_idx != 0) {
+ error_setg(errp, "Non-zero first-cpu-idx is unsupported with the "
+ "in-kernel GIC");
+ return;
+ }
+
gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL);
for (i = 0; i < s->num_cpu; i++) {
@@ -825,6 +842,34 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
return;
}
+ if (s->maint_irq) {
+ Error *kvm_nv_migration_blocker = NULL;
+ int ret;
+
+ error_setg(&kvm_nv_migration_blocker,
+ "Live migration disabled because KVM nested virt is enabled");
+ if (migrate_add_blocker(&kvm_nv_migration_blocker, errp)) {
+ error_free(kvm_nv_migration_blocker);
+ return;
+ }
+
+ ret = kvm_device_check_attr(s->dev_fd,
+ KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ, 0);
+ if (!ret) {
+ error_setg_errno(errp, errno,
+ "VGICv3 setting maintenance IRQ is not "
+ "supported by this host kernel");
+ return;
+ }
+
+ ret = kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ, 0,
+ &s->maint_irq, true, errp);
+ if (ret) {
+ error_setg_errno(errp, errno, "Failed to set VGIC maintenance IRQ");
+ return;
+ }
+ }
+
multiple_redist_region_allowed =
kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION);
@@ -890,6 +935,9 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
if (kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES)) {
qemu_add_vm_change_state_handler(vm_change_state_handler, s);
+ migration_add_notifier_mode(&s->cpr_notifier,
+ kvm_arm_gicv3_notifier,
+ MIG_MODE_CPR_TRANSFER);
}
}
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index 83ff74f..7c78961 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -988,6 +988,7 @@ static void nvic_nmi_trigger(void *opaque, int n, int level)
static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs)
{
ARMCPU *cpu = s->cpu;
+ ARMISARegisters *isar = &cpu->isar;
uint32_t val;
switch (offset) {
@@ -1263,74 +1264,74 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs)
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_pfr0;
+ return GET_IDREG(isar, ID_PFR0);
case 0xd44: /* PFR1. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_pfr1;
+ return GET_IDREG(isar, ID_PFR1);
case 0xd48: /* DFR0. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_dfr0;
+ return GET_IDREG(isar, ID_DFR0);
case 0xd4c: /* AFR0. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->id_afr0;
+ return GET_IDREG(isar, ID_AFR0);
case 0xd50: /* MMFR0. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_mmfr0;
+ return GET_IDREG(isar, ID_MMFR0);
case 0xd54: /* MMFR1. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_mmfr1;
+ return GET_IDREG(isar, ID_MMFR1);
case 0xd58: /* MMFR2. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_mmfr2;
+ return GET_IDREG(isar, ID_MMFR2);
case 0xd5c: /* MMFR3. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_mmfr3;
+ return GET_IDREG(isar, ID_MMFR3);
case 0xd60: /* ISAR0. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_isar0;
+ return GET_IDREG(&cpu->isar, ID_ISAR0);
case 0xd64: /* ISAR1. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_isar1;
+ return GET_IDREG(&cpu->isar, ID_ISAR1);
case 0xd68: /* ISAR2. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_isar2;
+ return GET_IDREG(&cpu->isar, ID_ISAR2);
case 0xd6c: /* ISAR3. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_isar3;
+ return GET_IDREG(&cpu->isar, ID_ISAR3);
case 0xd70: /* ISAR4. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_isar4;
+ return GET_IDREG(&cpu->isar, ID_ISAR4);
case 0xd74: /* ISAR5. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_isar5;
+ return GET_IDREG(&cpu->isar, ID_ISAR5);
case 0xd78: /* CLIDR */
- return cpu->clidr;
+ return GET_IDREG(&cpu->isar, CLIDR);
case 0xd7c: /* CTR */
return cpu->ctr;
case 0xd80: /* CSSIDR */
diff --git a/hw/intc/aspeed_intc.c b/hw/intc/aspeed_intc.c
index 33fcbe7..5cd786d 100644
--- a/hw/intc/aspeed_intc.c
+++ b/hw/intc/aspeed_intc.c
@@ -737,6 +737,7 @@ static const MemoryRegionOps aspeed_intc_ops = {
.read = aspeed_intc_read,
.write = aspeed_intc_write,
.endianness = DEVICE_LITTLE_ENDIAN,
+ .impl.min_access_size = 4,
.valid = {
.min_access_size = 4,
.max_access_size = 4,
@@ -747,6 +748,7 @@ static const MemoryRegionOps aspeed_intcio_ops = {
.read = aspeed_intcio_read,
.write = aspeed_intcio_write,
.endianness = DEVICE_LITTLE_ENDIAN,
+ .impl.min_access_size = 4,
.valid = {
.min_access_size = 4,
.max_access_size = 4,
@@ -757,6 +759,7 @@ static const MemoryRegionOps aspeed_ssp_intc_ops = {
.read = aspeed_intc_read,
.write = aspeed_ssp_intc_write,
.endianness = DEVICE_LITTLE_ENDIAN,
+ .impl.min_access_size = 4,
.valid = {
.min_access_size = 4,
.max_access_size = 4,
@@ -767,6 +770,7 @@ static const MemoryRegionOps aspeed_ssp_intcio_ops = {
.read = aspeed_intcio_read,
.write = aspeed_ssp_intcio_write,
.endianness = DEVICE_LITTLE_ENDIAN,
+ .impl.min_access_size = 4,
.valid = {
.min_access_size = 4,
.max_access_size = 4,
@@ -777,6 +781,7 @@ static const MemoryRegionOps aspeed_tsp_intc_ops = {
.read = aspeed_intc_read,
.write = aspeed_tsp_intc_write,
.endianness = DEVICE_LITTLE_ENDIAN,
+ .impl.min_access_size = 4,
.valid = {
.min_access_size = 4,
.max_access_size = 4,
@@ -787,6 +792,7 @@ static const MemoryRegionOps aspeed_tsp_intcio_ops = {
.read = aspeed_intcio_read,
.write = aspeed_tsp_intcio_write,
.endianness = DEVICE_LITTLE_ENDIAN,
+ .impl.min_access_size = 4,
.valid = {
.min_access_size = 4,
.max_access_size = 4,
@@ -995,7 +1001,8 @@ static AspeedINTCIRQ aspeed_2700ssp_intcio_irqs[ASPEED_INTC_MAX_INPINS] = {
{5, 5, 1, R_SSPINT165_EN, R_SSPINT165_STATUS},
};
-static void aspeed_2700ssp_intcio_class_init(ObjectClass *klass, const void *data)
+static void aspeed_2700ssp_intcio_class_init(ObjectClass *klass,
+ const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
AspeedINTCClass *aic = ASPEED_INTC_CLASS(klass);
@@ -1063,7 +1070,8 @@ static AspeedINTCIRQ aspeed_2700tsp_intcio_irqs[ASPEED_INTC_MAX_INPINS] = {
{5, 5, 1, R_TSPINT165_EN, R_TSPINT165_STATUS},
};
-static void aspeed_2700tsp_intcio_class_init(ObjectClass *klass, const void *data)
+static void aspeed_2700tsp_intcio_class_init(ObjectClass *klass,
+ const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
AspeedINTCClass *aic = ASPEED_INTC_CLASS(klass);
diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c
index 133bef8..e431d00 100644
--- a/hw/intc/ioapic.c
+++ b/hw/intc/ioapic.c
@@ -30,12 +30,18 @@
#include "hw/intc/ioapic_internal.h"
#include "hw/pci/msi.h"
#include "hw/qdev-properties.h"
+#include "system/accel-irq.h"
#include "system/kvm.h"
#include "system/system.h"
#include "hw/i386/apic-msidef.h"
#include "hw/i386/x86-iommu.h"
#include "trace.h"
+
+#if defined(CONFIG_KVM) || defined(CONFIG_MSHV)
+#define ACCEL_GSI_IRQFD_POSSIBLE
+#endif
+
#define APIC_DELIVERY_MODE_SHIFT 8
#define APIC_POLARITY_SHIFT 14
#define APIC_TRIG_MODE_SHIFT 15
@@ -191,10 +197,10 @@ static void ioapic_set_irq(void *opaque, int vector, int level)
static void ioapic_update_kvm_routes(IOAPICCommonState *s)
{
-#ifdef CONFIG_KVM
+#ifdef ACCEL_GSI_IRQFD_POSSIBLE
int i;
- if (kvm_irqchip_is_split()) {
+ if (accel_irqchip_is_split()) {
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
MSIMessage msg;
struct ioapic_entry_info info;
@@ -202,15 +208,15 @@ static void ioapic_update_kvm_routes(IOAPICCommonState *s)
if (!info.masked) {
msg.address = info.addr;
msg.data = info.data;
- kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL);
+ accel_irqchip_update_msi_route(i, msg, NULL);
}
}
- kvm_irqchip_commit_routes(kvm_state);
+ accel_irqchip_commit_routes();
}
#endif
}
-#ifdef CONFIG_KVM
+#ifdef ACCEL_KERNEL_GSI_IRQFD_POSSIBLE
static void ioapic_iec_notifier(void *private, bool global,
uint32_t index, uint32_t mask)
{
@@ -428,11 +434,11 @@ static const MemoryRegionOps ioapic_io_ops = {
static void ioapic_machine_done_notify(Notifier *notifier, void *data)
{
-#ifdef CONFIG_KVM
+#ifdef ACCEL_KERNEL_GSI_IRQFD_POSSIBLE
IOAPICCommonState *s = container_of(notifier, IOAPICCommonState,
machine_done);
- if (kvm_irqchip_is_split()) {
+ if (accel_irqchip_is_split()) {
X86IOMMUState *iommu = x86_iommu_get_default();
if (iommu) {
/* Register this IOAPIC with IOMMU IEC notifier, so that
diff --git a/hw/intc/loongarch_dintc.c b/hw/intc/loongarch_dintc.c
new file mode 100644
index 0000000..32bdd17
--- /dev/null
+++ b/hw/intc/loongarch_dintc.c
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * QEMU LoongArch direct interrupt controller.
+ *
+ * Copyright (C) 2025 Loongson Technology Corporation Limited
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "hw/irq.h"
+#include "hw/intc/loongarch_pch_msi.h"
+#include "hw/intc/loongarch_pch_pic.h"
+#include "hw/intc/loongarch_dintc.h"
+#include "hw/pci/msi.h"
+#include "hw/misc/unimp.h"
+#include "migration/vmstate.h"
+#include "trace.h"
+#include "hw/qdev-properties.h"
+#include "target/loongarch/cpu.h"
+#include "qemu/error-report.h"
+#include "system/hw_accel.h"
+
+/* msg addr field */
+FIELD(MSG_ADDR, IRQ_NUM, 4, 8)
+FIELD(MSG_ADDR, CPU_NUM, 12, 8)
+FIELD(MSG_ADDR, FIX, 28, 12)
+
+static uint64_t loongarch_dintc_mem_read(void *opaque,
+ hwaddr addr, unsigned size)
+{
+ return 0;
+}
+
+static void do_set_vcpu_dintc_irq(CPUState *cs, run_on_cpu_data data)
+{
+ int irq = data.host_int;
+ CPULoongArchState *env;
+
+ env = &LOONGARCH_CPU(cs)->env;
+ cpu_synchronize_state(cs);
+ set_bit(irq, (unsigned long *)&env->CSR_MSGIS);
+}
+
+static void loongarch_dintc_mem_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ int irq_num, cpu_num = 0;
+ LoongArchDINTCState *s = LOONGARCH_DINTC(opaque);
+ uint64_t msg_addr = addr + VIRT_DINTC_BASE;
+ CPUState *cs;
+
+ cpu_num = FIELD_EX64(msg_addr, MSG_ADDR, CPU_NUM);
+ cs = cpu_by_arch_id(cpu_num);
+ irq_num = FIELD_EX64(msg_addr, MSG_ADDR, IRQ_NUM);
+
+ async_run_on_cpu(cs, do_set_vcpu_dintc_irq,
+ RUN_ON_CPU_HOST_INT(irq_num));
+ qemu_set_irq(s->cpu[cpu_num].parent_irq, 1);
+}
+
+static const MemoryRegionOps loongarch_dintc_ops = {
+ .read = loongarch_dintc_mem_read,
+ .write = loongarch_dintc_mem_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void loongarch_dintc_realize(DeviceState *dev, Error **errp)
+{
+ LoongArchDINTCState *s = LOONGARCH_DINTC(dev);
+ LoongArchDINTCClass *lac = LOONGARCH_DINTC_GET_CLASS(dev);
+ MachineState *machine = MACHINE(qdev_get_machine());
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
+ const CPUArchIdList *id_list;
+ int i;
+
+ Error *local_err = NULL;
+ lac->parent_realize(dev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ assert(mc->possible_cpu_arch_ids);
+ id_list = mc->possible_cpu_arch_ids(machine);
+ s->num_cpu = id_list->len;
+ s->cpu = g_new(DINTCCore, s->num_cpu);
+ if (s->cpu == NULL) {
+ error_setg(errp, "Memory allocation for DINTCCore fail");
+ return;
+ }
+
+ for (i = 0; i < s->num_cpu; i++) {
+ s->cpu[i].arch_id = id_list->cpus[i].arch_id;
+ s->cpu[i].cpu = CPU(id_list->cpus[i].cpu);
+ qdev_init_gpio_out(dev, &s->cpu[i].parent_irq, 1);
+ }
+
+ return;
+}
+
+static void loongarch_dintc_unrealize(DeviceState *dev)
+{
+ LoongArchDINTCState *s = LOONGARCH_DINTC(dev);
+ g_free(s->cpu);
+}
+
+static void loongarch_dintc_init(Object *obj)
+{
+ LoongArchDINTCState *s = LOONGARCH_DINTC(obj);
+ SysBusDevice *shd = SYS_BUS_DEVICE(obj);
+ memory_region_init_io(&s->dintc_mmio, OBJECT(s), &loongarch_dintc_ops,
+ s, TYPE_LOONGARCH_DINTC, VIRT_DINTC_SIZE);
+ sysbus_init_mmio(shd, &s->dintc_mmio);
+ msi_nonbroken = true;
+ return;
+}
+
+static DINTCCore *loongarch_dintc_get_cpu(LoongArchDINTCState *s,
+ DeviceState *dev)
+{
+ CPUClass *k = CPU_GET_CLASS(dev);
+ uint64_t arch_id = k->get_arch_id(CPU(dev));
+ int i;
+
+ for (i = 0; i < s->num_cpu; i++) {
+ if (s->cpu[i].arch_id == arch_id) {
+ return &s->cpu[i];
+ }
+ }
+
+ return NULL;
+}
+
+static void loongarch_dintc_cpu_plug(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ LoongArchDINTCState *s = LOONGARCH_DINTC(hotplug_dev);
+ Object *obj = OBJECT(dev);
+ DINTCCore *core;
+ int index;
+
+ if (!object_dynamic_cast(obj, TYPE_LOONGARCH_CPU)) {
+ warn_report("LoongArch DINTC: Invalid %s device type",
+ object_get_typename(obj));
+ return;
+ }
+ core = loongarch_dintc_get_cpu(s, dev);
+ if (!core) {
+ return;
+ }
+
+ core->cpu = CPU(dev);
+ index = core - s->cpu;
+
+ /* connect dintc msg irq to cpu irq */
+ qdev_connect_gpio_out(DEVICE(s), index, qdev_get_gpio_in(dev, INT_DMSI));
+ return;
+}
+
+static void loongarch_dintc_cpu_unplug(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ LoongArchDINTCState *s = LOONGARCH_DINTC(hotplug_dev);
+ Object *obj = OBJECT(dev);
+ DINTCCore *core;
+
+ if (!object_dynamic_cast(obj, TYPE_LOONGARCH_CPU)) {
+ warn_report("LoongArch DINTC: Invalid %s device type",
+ object_get_typename(obj));
+ return;
+ }
+
+ core = loongarch_dintc_get_cpu(s, dev);
+
+ if (!core) {
+ return;
+ }
+
+ core->cpu = NULL;
+}
+
+static void loongarch_dintc_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
+ LoongArchDINTCClass *lac = LOONGARCH_DINTC_CLASS(klass);
+
+ dc->unrealize = loongarch_dintc_unrealize;
+ device_class_set_parent_realize(dc, loongarch_dintc_realize,
+ &lac->parent_realize);
+ hc->plug = loongarch_dintc_cpu_plug;
+ hc->unplug = loongarch_dintc_cpu_unplug;
+}
+
+static const TypeInfo loongarch_dintc_info = {
+ .name = TYPE_LOONGARCH_DINTC,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(LoongArchDINTCState),
+ .instance_init = loongarch_dintc_init,
+ .class_size = sizeof(LoongArchDINTCClass),
+ .class_init = loongarch_dintc_class_init,
+ .interfaces = (const InterfaceInfo[]) {
+ { TYPE_HOTPLUG_HANDLER },
+ { }
+ },
+};
+
+static void loongarch_dintc_register_types(void)
+{
+ type_register_static(&loongarch_dintc_info);
+}
+
+type_init(loongarch_dintc_register_types)
diff --git a/hw/intc/loongarch_extioi.c b/hw/intc/loongarch_extioi.c
index 7c38c4c..3e9c88d 100644
--- a/hw/intc/loongarch_extioi.c
+++ b/hw/intc/loongarch_extioi.c
@@ -12,6 +12,7 @@
#include "hw/irq.h"
#include "hw/loongarch/virt.h"
#include "system/address-spaces.h"
+#include "system/kvm.h"
#include "hw/intc/loongarch_extioi.h"
#include "trace.h"
@@ -351,30 +352,29 @@ static void loongarch_extioi_realize(DeviceState *dev, Error **errp)
return;
}
- for (i = 0; i < EXTIOI_IRQS; i++) {
- sysbus_init_irq(sbd, &s->irq[i]);
- }
-
- qdev_init_gpio_in(dev, extioi_setirq, EXTIOI_IRQS);
- memory_region_init_io(&s->extioi_system_mem, OBJECT(s), &extioi_ops,
- s, "extioi_system_mem", 0x900);
- sysbus_init_mmio(sbd, &s->extioi_system_mem);
-
if (s->features & BIT(EXTIOI_HAS_VIRT_EXTENSION)) {
- memory_region_init_io(&s->virt_extend, OBJECT(s), &extioi_virt_ops,
- s, "extioi_virt", EXTIOI_VIRT_SIZE);
- sysbus_init_mmio(sbd, &s->virt_extend);
s->features |= EXTIOI_VIRT_HAS_FEATURES;
} else {
s->status |= BIT(EXTIOI_ENABLE);
}
-}
-static void loongarch_extioi_unrealize(DeviceState *dev)
-{
- LoongArchExtIOICommonState *s = LOONGARCH_EXTIOI_COMMON(dev);
+ if (kvm_irqchip_in_kernel()) {
+ kvm_extioi_realize(dev, errp);
+ } else {
+ for (i = 0; i < EXTIOI_IRQS; i++) {
+ sysbus_init_irq(sbd, &s->irq[i]);
+ }
- g_free(s->cpu);
+ qdev_init_gpio_in(dev, extioi_setirq, EXTIOI_IRQS);
+ memory_region_init_io(&s->extioi_system_mem, OBJECT(s), &extioi_ops,
+ s, "extioi_system_mem", 0x900);
+ sysbus_init_mmio(sbd, &s->extioi_system_mem);
+ if (s->features & BIT(EXTIOI_HAS_VIRT_EXTENSION)) {
+ memory_region_init_io(&s->virt_extend, OBJECT(s), &extioi_virt_ops,
+ s, "extioi_virt", EXTIOI_VIRT_SIZE);
+ sysbus_init_mmio(sbd, &s->virt_extend);
+ }
+ }
}
static void loongarch_extioi_reset_hold(Object *obj, ResetType type)
@@ -384,6 +384,19 @@ static void loongarch_extioi_reset_hold(Object *obj, ResetType type)
if (lec->parent_phases.hold) {
lec->parent_phases.hold(obj, type);
}
+
+ if (kvm_irqchip_in_kernel()) {
+ kvm_extioi_put(obj, 0);
+ }
+}
+
+static int vmstate_extioi_pre_save(void *opaque)
+{
+ if (kvm_irqchip_in_kernel()) {
+ return kvm_extioi_get(opaque);
+ }
+
+ return 0;
}
static int vmstate_extioi_post_load(void *opaque, int version_id)
@@ -391,6 +404,10 @@ static int vmstate_extioi_post_load(void *opaque, int version_id)
LoongArchExtIOICommonState *s = LOONGARCH_EXTIOI_COMMON(opaque);
int i, start_irq;
+ if (kvm_irqchip_in_kernel()) {
+ return kvm_extioi_put(opaque, version_id);
+ }
+
for (i = 0; i < (EXTIOI_IRQS / 4); i++) {
start_irq = i * 4;
extioi_update_sw_coremap(s, start_irq, s->coremap[i], false);
@@ -412,10 +429,9 @@ static void loongarch_extioi_class_init(ObjectClass *klass, const void *data)
device_class_set_parent_realize(dc, loongarch_extioi_realize,
&lec->parent_realize);
- device_class_set_parent_unrealize(dc, loongarch_extioi_unrealize,
- &lec->parent_unrealize);
resettable_class_set_parent_phases(rc, NULL, loongarch_extioi_reset_hold,
NULL, &lec->parent_phases);
+ lecc->pre_save = vmstate_extioi_pre_save;
lecc->post_load = vmstate_extioi_post_load;
}
diff --git a/hw/intc/loongarch_extioi_common.c b/hw/intc/loongarch_extioi_common.c
index 4a904b3..ba03383 100644
--- a/hw/intc/loongarch_extioi_common.c
+++ b/hw/intc/loongarch_extioi_common.c
@@ -108,6 +108,13 @@ static void loongarch_extioi_common_realize(DeviceState *dev, Error **errp)
}
}
+static void loongarch_extioi_common_unrealize(DeviceState *dev)
+{
+ LoongArchExtIOICommonState *s = LOONGARCH_EXTIOI_COMMON(dev);
+
+ g_free(s->cpu);
+}
+
static void loongarch_extioi_common_reset_hold(Object *obj, ResetType type)
{
LoongArchExtIOICommonClass *lecc = LOONGARCH_EXTIOI_COMMON_GET_CLASS(obj);
@@ -221,6 +228,8 @@ static void loongarch_extioi_common_class_init(ObjectClass *klass,
device_class_set_parent_realize(dc, loongarch_extioi_common_realize,
&lecc->parent_realize);
+ device_class_set_parent_unrealize(dc, loongarch_extioi_common_unrealize,
+ &lecc->parent_unrealize);
resettable_class_set_parent_phases(rc, NULL,
loongarch_extioi_common_reset_hold,
NULL, &lecc->parent_phases);
diff --git a/hw/intc/loongarch_extioi_kvm.c b/hw/intc/loongarch_extioi_kvm.c
new file mode 100644
index 0000000..aa2e8c7
--- /dev/null
+++ b/hw/intc/loongarch_extioi_kvm.c
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * LoongArch EXTIOI interrupt kvm support
+ *
+ * Copyright (C) 2025 Loongson Technology Corporation Limited
+ */
+
+#include "qemu/osdep.h"
+#include "hw/intc/loongarch_extioi.h"
+#include "linux/kvm.h"
+#include "qapi/error.h"
+#include "system/kvm.h"
+
+static void kvm_extioi_access_reg(int fd, uint64_t addr, void *val, bool write)
+{
+ kvm_device_access(fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS,
+ addr, val, write, &error_abort);
+}
+
+static void kvm_extioi_access_sw_state(int fd, uint64_t addr,
+ void *val, bool write)
+{
+ kvm_device_access(fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS,
+ addr, val, write, &error_abort);
+}
+
+static void kvm_extioi_access_sw_status(void *opaque, bool write)
+{
+ LoongArchExtIOICommonState *lecs = LOONGARCH_EXTIOI_COMMON(opaque);
+ LoongArchExtIOIState *les = LOONGARCH_EXTIOI(opaque);
+ int addr;
+
+ addr = KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_STATE;
+ kvm_extioi_access_sw_state(les->dev_fd, addr, &lecs->status, write);
+}
+
+static void kvm_extioi_access_regs(void *opaque, bool write)
+{
+ LoongArchExtIOICommonState *lecs = LOONGARCH_EXTIOI_COMMON(opaque);
+ LoongArchExtIOIState *les = LOONGARCH_EXTIOI(opaque);
+ int fd = les->dev_fd;
+ int addr, offset, cpu;
+
+ for (addr = EXTIOI_NODETYPE_START; addr < EXTIOI_NODETYPE_END; addr += 4) {
+ offset = (addr - EXTIOI_NODETYPE_START) / 4;
+ kvm_extioi_access_reg(fd, addr, &lecs->nodetype[offset], write);
+ }
+
+ for (addr = EXTIOI_IPMAP_START; addr < EXTIOI_IPMAP_END; addr += 4) {
+ offset = (addr - EXTIOI_IPMAP_START) / 4;
+ kvm_extioi_access_reg(fd, addr, &lecs->ipmap[offset], write);
+ }
+
+ for (addr = EXTIOI_ENABLE_START; addr < EXTIOI_ENABLE_END; addr += 4) {
+ offset = (addr - EXTIOI_ENABLE_START) / 4;
+ kvm_extioi_access_reg(fd, addr, &lecs->enable[offset], write);
+ }
+
+ for (addr = EXTIOI_BOUNCE_START; addr < EXTIOI_BOUNCE_END; addr += 4) {
+ offset = (addr - EXTIOI_BOUNCE_START) / 4;
+ kvm_extioi_access_reg(fd, addr, &lecs->bounce[offset], write);
+ }
+
+ for (addr = EXTIOI_ISR_START; addr < EXTIOI_ISR_END; addr += 4) {
+ offset = (addr - EXTIOI_ISR_START) / 4;
+ kvm_extioi_access_reg(fd, addr, &lecs->isr[offset], write);
+ }
+
+ for (addr = EXTIOI_COREMAP_START; addr < EXTIOI_COREMAP_END; addr += 4) {
+ offset = (addr - EXTIOI_COREMAP_START) / 4;
+ kvm_extioi_access_reg(fd, addr, &lecs->coremap[offset], write);
+ }
+
+ for (cpu = 0; cpu < lecs->num_cpu; cpu++) {
+ for (addr = EXTIOI_COREISR_START;
+ addr < EXTIOI_COREISR_END; addr += 4) {
+ offset = (addr - EXTIOI_COREISR_START) / 4;
+ kvm_extioi_access_reg(fd, (cpu << 16) | addr,
+ &lecs->cpu[cpu].coreisr[offset], write);
+ }
+ }
+}
+
+int kvm_extioi_get(void *opaque)
+{
+ kvm_extioi_access_regs(opaque, false);
+ kvm_extioi_access_sw_status(opaque, false);
+ return 0;
+}
+
+int kvm_extioi_put(void *opaque, int version_id)
+{
+ LoongArchExtIOIState *les = LOONGARCH_EXTIOI(opaque);
+ int fd = les->dev_fd;
+
+ if (fd == 0) {
+ return 0;
+ }
+
+ kvm_extioi_access_regs(opaque, true);
+ kvm_extioi_access_sw_status(opaque, true);
+ kvm_device_access(fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL,
+ KVM_DEV_LOONGARCH_EXTIOI_CTRL_LOAD_FINISHED,
+ NULL, true, &error_abort);
+ return 0;
+}
+
+void kvm_extioi_realize(DeviceState *dev, Error **errp)
+{
+ LoongArchExtIOICommonState *lecs = LOONGARCH_EXTIOI_COMMON(dev);
+ LoongArchExtIOIState *les = LOONGARCH_EXTIOI(dev);
+ int ret;
+
+ ret = kvm_create_device(kvm_state, KVM_DEV_TYPE_LOONGARCH_EIOINTC, false);
+ if (ret < 0) {
+ fprintf(stderr, "create KVM_LOONGARCH_EIOINTC failed: %s\n",
+ strerror(-ret));
+ abort();
+ }
+
+ les->dev_fd = ret;
+ ret = kvm_device_access(les->dev_fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL,
+ KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU,
+ &lecs->num_cpu, true, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "KVM_LOONGARCH_EXTIOI_INIT_NUM_CPU failed: %s\n",
+ strerror(-ret));
+ abort();
+ }
+
+ ret = kvm_device_access(les->dev_fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL,
+ KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE,
+ &lecs->features, true, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "KVM_LOONGARCH_EXTIOI_INIT_FEATURE failed: %s\n",
+ strerror(-ret));
+ abort();
+ }
+}
diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c
index 74372a2..fc8005c 100644
--- a/hw/intc/loongarch_ipi.c
+++ b/hw/intc/loongarch_ipi.c
@@ -11,6 +11,7 @@
#include "qapi/error.h"
#include "hw/intc/loongarch_ipi.h"
#include "hw/qdev-properties.h"
+#include "system/kvm.h"
#include "target/loongarch/cpu.h"
static AddressSpace *get_iocsr_as(CPUState *cpu)
@@ -91,6 +92,10 @@ static void loongarch_ipi_realize(DeviceState *dev, Error **errp)
lics->cpu[i].ipi = lics;
qdev_init_gpio_out(dev, &lics->cpu[i].irq, 1);
}
+
+ if (kvm_irqchip_in_kernel()) {
+ kvm_ipi_realize(dev, errp);
+ }
}
static void loongarch_ipi_reset_hold(Object *obj, ResetType type)
@@ -117,6 +122,10 @@ static void loongarch_ipi_reset_hold(Object *obj, ResetType type)
core->clear = 0;
memset(core->buf, 0, sizeof(core->buf));
}
+
+ if (kvm_irqchip_in_kernel()) {
+ kvm_ipi_put(obj, 0);
+ }
}
static void loongarch_ipi_cpu_plug(HotplugHandler *hotplug_dev,
@@ -166,6 +175,24 @@ static void loongarch_ipi_cpu_unplug(HotplugHandler *hotplug_dev,
core->cpu = NULL;
}
+static int loongarch_ipi_pre_save(void *opaque)
+{
+ if (kvm_irqchip_in_kernel()) {
+ return kvm_ipi_get(opaque);
+ }
+
+ return 0;
+}
+
+static int loongarch_ipi_post_load(void *opaque, int version_id)
+{
+ if (kvm_irqchip_in_kernel()) {
+ return kvm_ipi_put(opaque, version_id);
+ }
+
+ return 0;
+}
+
static void loongarch_ipi_class_init(ObjectClass *klass, const void *data)
{
LoongsonIPICommonClass *licc = LOONGSON_IPI_COMMON_CLASS(klass);
@@ -182,6 +209,8 @@ static void loongarch_ipi_class_init(ObjectClass *klass, const void *data)
licc->cpu_by_arch_id = loongarch_cpu_by_arch_id;
hc->plug = loongarch_ipi_cpu_plug;
hc->unplug = loongarch_ipi_cpu_unplug;
+ licc->pre_save = loongarch_ipi_pre_save;
+ licc->post_load = loongarch_ipi_post_load;
}
static const TypeInfo loongarch_ipi_types[] = {
diff --git a/hw/intc/loongarch_ipi_kvm.c b/hw/intc/loongarch_ipi_kvm.c
new file mode 100644
index 0000000..dd4c367
--- /dev/null
+++ b/hw/intc/loongarch_ipi_kvm.c
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * LoongArch IPI interrupt KVM support
+ *
+ * Copyright (C) 2025 Loongson Technology Corporation Limited
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/intc/loongarch_ipi.h"
+#include "system/kvm.h"
+#include "target/loongarch/cpu.h"
+
+static void kvm_ipi_access_reg(int fd, uint64_t addr, uint32_t *val, bool write)
+{
+ kvm_device_access(fd, KVM_DEV_LOONGARCH_IPI_GRP_REGS,
+ addr, val, write, &error_abort);
+}
+
+static void kvm_ipi_access_regs(void *opaque, bool write)
+{
+ LoongsonIPICommonState *ipi = (LoongsonIPICommonState *)opaque;
+ LoongarchIPIState *lis = LOONGARCH_IPI(opaque);
+ IPICore *core;
+ uint64_t attr;
+ int i, cpu_index, fd = lis->dev_fd;
+
+ if (fd == 0) {
+ return;
+ }
+
+ for (i = 0; i < ipi->num_cpu; i++) {
+ core = &ipi->cpu[i];
+ if (core->cpu == NULL) {
+ continue;
+ }
+ cpu_index = i;
+
+ attr = (cpu_index << 16) | CORE_STATUS_OFF;
+ kvm_ipi_access_reg(fd, attr, &core->status, write);
+
+ attr = (cpu_index << 16) | CORE_EN_OFF;
+ kvm_ipi_access_reg(fd, attr, &core->en, write);
+
+ attr = (cpu_index << 16) | CORE_SET_OFF;
+ kvm_ipi_access_reg(fd, attr, &core->set, write);
+
+ attr = (cpu_index << 16) | CORE_CLEAR_OFF;
+ kvm_ipi_access_reg(fd, attr, &core->clear, write);
+
+ attr = (cpu_index << 16) | CORE_BUF_20;
+ kvm_ipi_access_reg(fd, attr, &core->buf[0], write);
+
+ attr = (cpu_index << 16) | CORE_BUF_28;
+ kvm_ipi_access_reg(fd, attr, &core->buf[2], write);
+
+ attr = (cpu_index << 16) | CORE_BUF_30;
+ kvm_ipi_access_reg(fd, attr, &core->buf[4], write);
+
+ attr = (cpu_index << 16) | CORE_BUF_38;
+ kvm_ipi_access_reg(fd, attr, &core->buf[6], write);
+ }
+}
+
+int kvm_ipi_get(void *opaque)
+{
+ kvm_ipi_access_regs(opaque, false);
+ return 0;
+}
+
+int kvm_ipi_put(void *opaque, int version_id)
+{
+ kvm_ipi_access_regs(opaque, true);
+ return 0;
+}
+
+void kvm_ipi_realize(DeviceState *dev, Error **errp)
+{
+ LoongarchIPIState *lis = LOONGARCH_IPI(dev);
+ int ret;
+
+ ret = kvm_create_device(kvm_state, KVM_DEV_TYPE_LOONGARCH_IPI, false);
+ if (ret < 0) {
+ fprintf(stderr, "IPI KVM_CREATE_DEVICE failed: %s\n",
+ strerror(-ret));
+ abort();
+ }
+
+ lis->dev_fd = ret;
+}
diff --git a/hw/intc/loongarch_pch_msi.c b/hw/intc/loongarch_pch_msi.c
index 06eb944..f6d1631 100644
--- a/hw/intc/loongarch_pch_msi.c
+++ b/hw/intc/loongarch_pch_msi.c
@@ -13,6 +13,7 @@
#include "hw/pci/msi.h"
#include "hw/misc/unimp.h"
#include "migration/vmstate.h"
+#include "system/kvm.h"
#include "trace.h"
static uint64_t loongarch_msi_mem_read(void *opaque, hwaddr addr, unsigned size)
@@ -26,6 +27,15 @@ static void loongarch_msi_mem_write(void *opaque, hwaddr addr,
LoongArchPCHMSI *s = (LoongArchPCHMSI *)opaque;
int irq_num;
+ if (kvm_irqchip_in_kernel()) {
+ MSIMessage msg;
+
+ msg.address = addr;
+ msg.data = val;
+ kvm_irqchip_send_msi(kvm_state, msg);
+ return;
+ }
+
/*
* vector number is irq number from upper extioi intc
* need subtract irq base to get msi vector offset
diff --git a/hw/intc/loongarch_pch_pic.c b/hw/intc/loongarch_pch_pic.c
index cbba2fc..32f01aa 100644
--- a/hw/intc/loongarch_pch_pic.c
+++ b/hw/intc/loongarch_pch_pic.c
@@ -10,6 +10,7 @@
#include "qemu/log.h"
#include "hw/irq.h"
#include "hw/intc/loongarch_pch_pic.h"
+#include "system/kvm.h"
#include "trace.h"
#include "qapi/error.h"
@@ -48,6 +49,11 @@ static void pch_pic_irq_handler(void *opaque, int irq, int level)
assert(irq < s->irq_num);
trace_loongarch_pch_pic_irq_handler(irq, level);
+ if (kvm_irqchip_in_kernel()) {
+ kvm_set_irq(kvm_state, irq, !!level);
+ return;
+ }
+
if (s->intedge & mask) {
/* Edge triggered */
if (level) {
@@ -82,7 +88,7 @@ static uint64_t pch_pic_read(void *opaque, hwaddr addr, uint64_t field_mask)
addr -= offset;
switch (addr) {
case PCH_PIC_INT_ID:
- val = s->id.data;
+ val = cpu_to_le64(s->id.data);
break;
case PCH_PIC_INT_MASK:
val = s->int_mask;
@@ -104,10 +110,10 @@ static uint64_t pch_pic_read(void *opaque, hwaddr addr, uint64_t field_mask)
val = s->int_polarity;
break;
case PCH_PIC_HTMSI_VEC ... PCH_PIC_HTMSI_VEC_END:
- val = *(uint64_t *)(s->htmsi_vector + addr - PCH_PIC_HTMSI_VEC);
+ val = ldq_le_p(&s->htmsi_vector[addr - PCH_PIC_HTMSI_VEC]);
break;
case PCH_PIC_ROUTE_ENTRY ... PCH_PIC_ROUTE_ENTRY_END:
- val = *(uint64_t *)(s->route_entry + addr - PCH_PIC_ROUTE_ENTRY);
+ val = ldq_le_p(&s->route_entry[addr - PCH_PIC_ROUTE_ENTRY]);
break;
default:
qemu_log_mask(LOG_GUEST_ERROR,
@@ -123,7 +129,8 @@ static void pch_pic_write(void *opaque, hwaddr addr, uint64_t value,
{
LoongArchPICCommonState *s = LOONGARCH_PIC_COMMON(opaque);
uint32_t offset;
- uint64_t old, mask, data, *ptemp;
+ uint64_t old, mask, data;
+ void *ptemp;
offset = addr & 7;
addr -= offset;
@@ -162,12 +169,12 @@ static void pch_pic_write(void *opaque, hwaddr addr, uint64_t value,
s->int_polarity = (s->int_polarity & ~mask) | data;
break;
case PCH_PIC_HTMSI_VEC ... PCH_PIC_HTMSI_VEC_END:
- ptemp = (uint64_t *)(s->htmsi_vector + addr - PCH_PIC_HTMSI_VEC);
- *ptemp = (*ptemp & ~mask) | data;
+ ptemp = &s->htmsi_vector[addr - PCH_PIC_HTMSI_VEC];
+ stq_le_p(ptemp, (ldq_le_p(ptemp) & ~mask) | data);
break;
case PCH_PIC_ROUTE_ENTRY ... PCH_PIC_ROUTE_ENTRY_END:
- ptemp = (uint64_t *)(s->route_entry + addr - PCH_PIC_ROUTE_ENTRY);
- *ptemp = (*ptemp & ~mask) | data;
+ ptemp = (uint64_t *)&s->route_entry[addr - PCH_PIC_ROUTE_ENTRY];
+ stq_le_p(ptemp, (ldq_le_p(ptemp) & ~mask) | data);
break;
default:
qemu_log_mask(LOG_GUEST_ERROR,
@@ -258,6 +265,10 @@ static void loongarch_pic_reset_hold(Object *obj, ResetType type)
if (lpc->parent_phases.hold) {
lpc->parent_phases.hold(obj, type);
}
+
+ if (kvm_irqchip_in_kernel()) {
+ kvm_pic_put(obj, 0);
+ }
}
static void loongarch_pic_realize(DeviceState *dev, Error **errp)
@@ -275,22 +286,49 @@ static void loongarch_pic_realize(DeviceState *dev, Error **errp)
qdev_init_gpio_out(dev, s->parent_irq, s->irq_num);
qdev_init_gpio_in(dev, pch_pic_irq_handler, s->irq_num);
- memory_region_init_io(&s->iomem, OBJECT(dev),
- &loongarch_pch_pic_ops,
- s, TYPE_LOONGARCH_PIC, VIRT_PCH_REG_SIZE);
- sysbus_init_mmio(sbd, &s->iomem);
+
+ if (kvm_irqchip_in_kernel()) {
+ kvm_pic_realize(dev, errp);
+ } else {
+ memory_region_init_io(&s->iomem, OBJECT(dev),
+ &loongarch_pch_pic_ops,
+ s, TYPE_LOONGARCH_PIC, VIRT_PCH_REG_SIZE);
+ sysbus_init_mmio(sbd, &s->iomem);
+ }
+}
+
+static int loongarch_pic_pre_save(LoongArchPICCommonState *opaque)
+{
+ if (kvm_irqchip_in_kernel()) {
+ return kvm_pic_get(opaque);
+ }
+
+ return 0;
+}
+
+static int loongarch_pic_post_load(LoongArchPICCommonState *opaque,
+ int version_id)
+{
+ if (kvm_irqchip_in_kernel()) {
+ return kvm_pic_put(opaque, version_id);
+ }
+
+ return 0;
}
static void loongarch_pic_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
LoongarchPICClass *lpc = LOONGARCH_PIC_CLASS(klass);
+ LoongArchPICCommonClass *lpcc = LOONGARCH_PIC_COMMON_CLASS(klass);
ResettableClass *rc = RESETTABLE_CLASS(klass);
resettable_class_set_parent_phases(rc, NULL, loongarch_pic_reset_hold,
NULL, &lpc->parent_phases);
device_class_set_parent_realize(dc, loongarch_pic_realize,
&lpc->parent_realize);
+ lpcc->pre_save = loongarch_pic_pre_save;
+ lpcc->post_load = loongarch_pic_post_load;
}
static const TypeInfo loongarch_pic_types[] = {
diff --git a/hw/intc/loongarch_pic_kvm.c b/hw/intc/loongarch_pic_kvm.c
new file mode 100644
index 0000000..dd504ec
--- /dev/null
+++ b/hw/intc/loongarch_pic_kvm.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * LoongArch kvm pch pic interrupt support
+ *
+ * Copyright (C) 2025 Loongson Technology Corporation Limited
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/boards.h"
+#include "hw/intc/loongarch_pch_pic.h"
+#include "hw/loongarch/virt.h"
+#include "hw/pci-host/ls7a.h"
+#include "system/kvm.h"
+
+static void kvm_pch_pic_access_reg(int fd, uint64_t addr, void *val, bool write)
+{
+ kvm_device_access(fd, KVM_DEV_LOONGARCH_PCH_PIC_GRP_REGS,
+ addr, val, write, &error_abort);
+}
+
+static void kvm_pch_pic_access(void *opaque, bool write)
+{
+ LoongArchPICCommonState *s = LOONGARCH_PIC_COMMON(opaque);
+ LoongarchPICState *lps = LOONGARCH_PIC(opaque);
+ int fd = lps->dev_fd;
+ int addr, offset;
+
+ if (fd == 0) {
+ return;
+ }
+
+ kvm_pch_pic_access_reg(fd, PCH_PIC_INT_MASK, &s->int_mask, write);
+ kvm_pch_pic_access_reg(fd, PCH_PIC_HTMSI_EN, &s->htmsi_en, write);
+ kvm_pch_pic_access_reg(fd, PCH_PIC_INT_EDGE, &s->intedge, write);
+ kvm_pch_pic_access_reg(fd, PCH_PIC_AUTO_CTRL0, &s->auto_crtl0, write);
+ kvm_pch_pic_access_reg(fd, PCH_PIC_AUTO_CTRL1, &s->auto_crtl1, write);
+
+ for (addr = PCH_PIC_ROUTE_ENTRY;
+ addr < PCH_PIC_ROUTE_ENTRY_END; addr++) {
+ offset = addr - PCH_PIC_ROUTE_ENTRY;
+ kvm_pch_pic_access_reg(fd, addr, &s->route_entry[offset], write);
+ }
+
+ for (addr = PCH_PIC_HTMSI_VEC; addr < PCH_PIC_HTMSI_VEC_END; addr++) {
+ offset = addr - PCH_PIC_HTMSI_VEC;
+ kvm_pch_pic_access_reg(fd, addr, &s->htmsi_vector[offset], write);
+ }
+
+ kvm_pch_pic_access_reg(fd, PCH_PIC_INT_REQUEST, &s->intirr, write);
+ kvm_pch_pic_access_reg(fd, PCH_PIC_INT_STATUS, &s->intisr, write);
+ kvm_pch_pic_access_reg(fd, PCH_PIC_INT_POL, &s->int_polarity, write);
+}
+
+int kvm_pic_get(void *opaque)
+{
+ kvm_pch_pic_access(opaque, false);
+ return 0;
+}
+
+int kvm_pic_put(void *opaque, int version_id)
+{
+ kvm_pch_pic_access(opaque, true);
+ return 0;
+}
+
+void kvm_pic_realize(DeviceState *dev, Error **errp)
+{
+ LoongarchPICState *lps = LOONGARCH_PIC(dev);
+ uint64_t pch_pic_base = VIRT_PCH_REG_BASE;
+ int ret;
+
+ ret = kvm_create_device(kvm_state, KVM_DEV_TYPE_LOONGARCH_PCHPIC, false);
+ if (ret < 0) {
+ fprintf(stderr, "Create KVM_LOONGARCH_PCHPIC failed: %s\n",
+ strerror(-ret));
+ abort();
+ }
+
+ lps->dev_fd = ret;
+ ret = kvm_device_access(lps->dev_fd, KVM_DEV_LOONGARCH_PCH_PIC_GRP_CTRL,
+ KVM_DEV_LOONGARCH_PCH_PIC_CTRL_INIT,
+ &pch_pic_base, true, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "KVM_LOONGARCH_PCH_PIC_INIT failed: %s\n",
+ strerror(-ret));
+ abort();
+ }
+}
diff --git a/hw/intc/loongson_ipi_common.c b/hw/intc/loongson_ipi_common.c
index f32661c..8cd78d4 100644
--- a/hw/intc/loongson_ipi_common.c
+++ b/hw/intc/loongson_ipi_common.c
@@ -11,6 +11,7 @@
#include "hw/irq.h"
#include "qemu/log.h"
#include "migration/vmstate.h"
+#include "system/kvm.h"
#include "trace.h"
MemTxResult loongson_ipi_core_readl(void *opaque, hwaddr addr, uint64_t *data,
@@ -255,6 +256,10 @@ static void loongson_ipi_common_realize(DeviceState *dev, Error **errp)
LoongsonIPICommonState *s = LOONGSON_IPI_COMMON(dev);
SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+ if (kvm_irqchip_in_kernel()) {
+ return;
+ }
+
memory_region_init_io(&s->ipi_iocsr_mem, OBJECT(dev),
&loongson_ipi_iocsr_ops,
s, "loongson_ipi_iocsr", 0x48);
@@ -277,10 +282,38 @@ static void loongson_ipi_common_unrealize(DeviceState *dev)
g_free(s->cpu);
}
+static int loongson_ipi_common_pre_save(void *opaque)
+{
+ IPICore *ipicore = (IPICore *)opaque;
+ LoongsonIPICommonState *s = ipicore->ipi;
+ LoongsonIPICommonClass *licc = LOONGSON_IPI_COMMON_GET_CLASS(s);
+
+ if (licc->pre_save) {
+ return licc->pre_save(s);
+ }
+
+ return 0;
+}
+
+static int loongson_ipi_common_post_load(void *opaque, int version_id)
+{
+ IPICore *ipicore = (IPICore *)opaque;
+ LoongsonIPICommonState *s = ipicore->ipi;
+ LoongsonIPICommonClass *licc = LOONGSON_IPI_COMMON_GET_CLASS(s);
+
+ if (licc->post_load) {
+ return licc->post_load(s, version_id);
+ }
+
+ return 0;
+}
+
static const VMStateDescription vmstate_ipi_core = {
.name = "ipi-single",
.version_id = 2,
.minimum_version_id = 2,
+ .pre_save = loongson_ipi_common_pre_save,
+ .post_load = loongson_ipi_common_post_load,
.fields = (const VMStateField[]) {
VMSTATE_UINT32(status, IPICore),
VMSTATE_UINT32(en, IPICore),
diff --git a/hw/intc/meson.build b/hw/intc/meson.build
index 602da30..faae20b 100644
--- a/hw/intc/meson.build
+++ b/hw/intc/meson.build
@@ -38,11 +38,11 @@ if config_all_devices.has_key('CONFIG_APIC') or \
endif
specific_ss.add(when: 'CONFIG_APIC', if_true: files('apic.c', 'apic_common.c'))
-specific_ss.add(when: 'CONFIG_ARM_GIC', if_true: files('arm_gicv3_cpuif_common.c'))
-specific_ss.add(when: 'CONFIG_ARM_GICV3', if_true: files('arm_gicv3_cpuif.c'))
+arm_common_ss.add(when: 'CONFIG_ARM_GIC', if_true: files('arm_gicv3_cpuif_common.c'))
+arm_common_ss.add(when: 'CONFIG_ARM_GICV3', if_true: files('arm_gicv3_cpuif.c'))
specific_ss.add(when: 'CONFIG_ARM_GIC_KVM', if_true: files('arm_gic_kvm.c'))
specific_ss.add(when: ['CONFIG_ARM_GIC_KVM', 'TARGET_AARCH64'], if_true: files('arm_gicv3_kvm.c', 'arm_gicv3_its_kvm.c'))
-specific_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m_nvic.c'))
+arm_common_ss.add(when: 'CONFIG_ARM_V7M', if_true: files('armv7m_nvic.c'))
specific_ss.add(when: 'CONFIG_GRLIB', if_true: files('grlib_irqmp.c'))
specific_ss.add(when: 'CONFIG_IOAPIC', if_true: files('ioapic.c'))
specific_ss.add(when: 'CONFIG_LOONGSON_LIOINTC', if_true: files('loongson_liointc.c'))
@@ -71,6 +71,13 @@ specific_ss.add(when: 'CONFIG_M68K_IRQC', if_true: files('m68k_irqc.c'))
specific_ss.add(when: 'CONFIG_LOONGSON_IPI_COMMON', if_true: files('loongson_ipi_common.c'))
specific_ss.add(when: 'CONFIG_LOONGSON_IPI', if_true: files('loongson_ipi.c'))
specific_ss.add(when: 'CONFIG_LOONGARCH_IPI', if_true: files('loongarch_ipi.c'))
+specific_ss.add(when: ['CONFIG_KVM', 'CONFIG_LOONGARCH_IPI'],
+ if_true: files('loongarch_ipi_kvm.c'))
specific_ss.add(when: 'CONFIG_LOONGARCH_PCH_PIC', if_true: files('loongarch_pch_pic.c', 'loongarch_pic_common.c'))
+specific_ss.add(when: ['CONFIG_KVM', 'CONFIG_LOONGARCH_PCH_PIC'],
+ if_true: files('loongarch_pic_kvm.c'))
specific_ss.add(when: 'CONFIG_LOONGARCH_PCH_MSI', if_true: files('loongarch_pch_msi.c'))
specific_ss.add(when: 'CONFIG_LOONGARCH_EXTIOI', if_true: files('loongarch_extioi.c', 'loongarch_extioi_common.c'))
+specific_ss.add(when: ['CONFIG_KVM', 'CONFIG_LOONGARCH_EXTIOI'],
+ if_true: files('loongarch_extioi_kvm.c'))
+specific_ss.add(when: 'CONFIG_LOONGARCH_DINTC', if_true: files('loongarch_dintc.c'))
diff --git a/hw/intc/pnv_xive.c b/hw/intc/pnv_xive.c
index 935c0e4..c2ca40b 100644
--- a/hw/intc/pnv_xive.c
+++ b/hw/intc/pnv_xive.c
@@ -470,14 +470,13 @@ static bool pnv_xive_is_cpu_enabled(PnvXive *xive, PowerPCCPU *cpu)
return xive->regs[reg >> 3] & PPC_BIT(bit);
}
-static int pnv_xive_match_nvt(XivePresenter *xptr, uint8_t format,
- uint8_t nvt_blk, uint32_t nvt_idx,
- bool crowd, bool cam_ignore, uint8_t priority,
- uint32_t logic_serv, XiveTCTXMatch *match)
+static bool pnv_xive_match_nvt(XivePresenter *xptr, uint8_t format,
+ uint8_t nvt_blk, uint32_t nvt_idx,
+ bool crowd, bool cam_ignore, uint8_t priority,
+ uint32_t logic_serv, XiveTCTXMatch *match)
{
PnvXive *xive = PNV_XIVE(xptr);
PnvChip *chip = xive->chip;
- int count = 0;
int i, j;
for (i = 0; i < chip->nr_cores; i++) {
@@ -510,17 +509,18 @@ static int pnv_xive_match_nvt(XivePresenter *xptr, uint8_t format,
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a "
"thread context NVT %x/%x\n",
nvt_blk, nvt_idx);
- return -1;
+ match->count++;
+ continue;
}
match->ring = ring;
match->tctx = tctx;
- count++;
+ match->count++;
}
}
}
- return count;
+ return !!match->count;
}
static uint32_t pnv_xive_presenter_get_config(XivePresenter *xptr)
diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c
index ec8b0c6..0663baa 100644
--- a/hw/intc/pnv_xive2.c
+++ b/hw/intc/pnv_xive2.c
@@ -101,24 +101,35 @@ static uint32_t pnv_xive2_block_id(PnvXive2 *xive)
}
/*
- * Remote access to controllers. HW uses MMIOs. For now, a simple scan
- * of the chips is good enough.
- *
- * TODO: Block scope support
+ * Remote access to INT controllers. HW uses MMIOs(?). For now, a simple
+ * scan of all the chips INT controller is good enough.
*/
-static PnvXive2 *pnv_xive2_get_remote(uint8_t blk)
+static PnvXive2 *pnv_xive2_get_remote(uint32_t vsd_type, hwaddr fwd_addr)
{
PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
int i;
for (i = 0; i < pnv->num_chips; i++) {
- Pnv10Chip *chip10 = PNV10_CHIP(pnv->chips[i]);
- PnvXive2 *xive = &chip10->xive;
+ PnvChipClass *k = PNV_CHIP_GET_CLASS(pnv->chips[i]);
+ PnvXive2 *xive = PNV_XIVE2(k->intc_get(pnv->chips[i]));
- if (pnv_xive2_block_id(xive) == blk) {
+ /*
+ * Is this the XIVE matching the forwarded VSD address is for this
+ * VSD type
+ */
+ if ((vsd_type == VST_ESB && fwd_addr == xive->esb_base) ||
+ (vsd_type == VST_END && fwd_addr == xive->end_base) ||
+ ((vsd_type == VST_NVP ||
+ vsd_type == VST_NVG) && fwd_addr == xive->nvpg_base) ||
+ (vsd_type == VST_NVC && fwd_addr == xive->nvc_base)) {
return xive;
}
}
+
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "XIVE: >>>>> %s vsd_type %u fwd_addr 0x%"HWADDR_PRIx
+ " NOT FOUND\n",
+ __func__, vsd_type, fwd_addr);
return NULL;
}
@@ -251,8 +262,7 @@ static uint64_t pnv_xive2_vst_addr(PnvXive2 *xive, uint32_t type, uint8_t blk,
/* Remote VST access */
if (GETFIELD(VSD_MODE, vsd) == VSD_MODE_FORWARD) {
- xive = pnv_xive2_get_remote(blk);
-
+ xive = pnv_xive2_get_remote(type, (vsd & VSD_ADDRESS_MASK));
return xive ? pnv_xive2_vst_addr(xive, type, blk, idx) : 0;
}
@@ -595,20 +605,28 @@ static uint32_t pnv_xive2_get_config(Xive2Router *xrtr)
{
PnvXive2 *xive = PNV_XIVE2(xrtr);
uint32_t cfg = 0;
+ uint64_t reg = xive->cq_regs[CQ_XIVE_CFG >> 3];
- if (xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS) {
+ if (reg & CQ_XIVE_CFG_GEN1_TIMA_OS) {
cfg |= XIVE2_GEN1_TIMA_OS;
}
- if (xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_EN_VP_SAVE_RESTORE) {
+ if (reg & CQ_XIVE_CFG_EN_VP_SAVE_RESTORE) {
cfg |= XIVE2_VP_SAVE_RESTORE;
}
- if (GETFIELD(CQ_XIVE_CFG_HYP_HARD_RANGE,
- xive->cq_regs[CQ_XIVE_CFG >> 3]) == CQ_XIVE_CFG_THREADID_8BITS) {
+ if (GETFIELD(CQ_XIVE_CFG_HYP_HARD_RANGE, reg) ==
+ CQ_XIVE_CFG_THREADID_8BITS) {
cfg |= XIVE2_THREADID_8BITS;
}
+ if (reg & CQ_XIVE_CFG_EN_VP_GRP_PRIORITY) {
+ cfg |= XIVE2_EN_VP_GRP_PRIORITY;
+ }
+
+ cfg = SETFIELD(XIVE2_VP_INT_PRIO, cfg,
+ GETFIELD(CQ_XIVE_CFG_VP_INT_PRIO, reg));
+
return cfg;
}
@@ -622,24 +640,28 @@ static bool pnv_xive2_is_cpu_enabled(PnvXive2 *xive, PowerPCCPU *cpu)
return xive->tctxt_regs[reg >> 3] & PPC_BIT(bit);
}
-static int pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format,
- uint8_t nvt_blk, uint32_t nvt_idx,
- bool crowd, bool cam_ignore, uint8_t priority,
- uint32_t logic_serv, XiveTCTXMatch *match)
+static bool pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format,
+ uint8_t nvt_blk, uint32_t nvt_idx,
+ bool crowd, bool cam_ignore, uint8_t priority,
+ uint32_t logic_serv, XiveTCTXMatch *match)
{
PnvXive2 *xive = PNV_XIVE2(xptr);
PnvChip *chip = xive->chip;
- int count = 0;
int i, j;
bool gen1_tima_os =
xive->cq_regs[CQ_XIVE_CFG >> 3] & CQ_XIVE_CFG_GEN1_TIMA_OS;
+ static int next_start_core;
+ static int next_start_thread;
+ int start_core = next_start_core;
+ int start_thread = next_start_thread;
for (i = 0; i < chip->nr_cores; i++) {
- PnvCore *pc = chip->cores[i];
+ PnvCore *pc = chip->cores[(i + start_core) % chip->nr_cores];
CPUCore *cc = CPU_CORE(pc);
for (j = 0; j < cc->nr_threads; j++) {
- PowerPCCPU *cpu = pc->threads[j];
+ /* Start search for match with different thread each call */
+ PowerPCCPU *cpu = pc->threads[(j + start_thread) % cc->nr_threads];
XiveTCTX *tctx;
int ring;
@@ -669,7 +691,8 @@ static int pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format,
"thread context NVT %x/%x\n",
nvt_blk, nvt_idx);
/* Should set a FIR if we ever model it */
- return -1;
+ match->count++;
+ continue;
}
/*
* For a group notification, we need to know if the
@@ -684,14 +707,23 @@ static int pnv_xive2_match_nvt(XivePresenter *xptr, uint8_t format,
if (!match->tctx) {
match->ring = ring;
match->tctx = tctx;
+
+ next_start_thread = j + start_thread + 1;
+ if (next_start_thread >= cc->nr_threads) {
+ next_start_thread = 0;
+ next_start_core = i + start_core + 1;
+ if (next_start_core >= chip->nr_cores) {
+ next_start_core = 0;
+ }
+ }
}
- count++;
+ match->count++;
}
}
}
}
- return count;
+ return !!match->count;
}
static uint32_t pnv_xive2_presenter_get_config(XivePresenter *xptr)
@@ -1173,7 +1205,8 @@ static void pnv_xive2_ic_cq_write(void *opaque, hwaddr offset,
case CQ_FIRMASK_OR: /* FIR error reporting */
break;
default:
- xive2_error(xive, "CQ: invalid write 0x%"HWADDR_PRIx, offset);
+ xive2_error(xive, "CQ: invalid write 0x%"HWADDR_PRIx" value 0x%"PRIx64,
+ offset, val);
return;
}
@@ -1304,7 +1337,6 @@ static uint64_t pnv_xive2_ic_vc_read(void *opaque, hwaddr offset,
case VC_ENDC_WATCH2_SPEC:
case VC_ENDC_WATCH3_SPEC:
watch_engine = (offset - VC_ENDC_WATCH0_SPEC) >> 6;
- xive->vc_regs[reg] &= ~(VC_ENDC_WATCH_FULL | VC_ENDC_WATCH_CONFLICT);
pnv_xive2_endc_cache_watch_release(xive, watch_engine);
val = xive->vc_regs[reg];
break;
@@ -1315,10 +1347,11 @@ static uint64_t pnv_xive2_ic_vc_read(void *opaque, hwaddr offset,
case VC_ENDC_WATCH3_DATA0:
/*
* Load DATA registers from cache with data requested by the
- * SPEC register
+ * SPEC register. Clear gen_flipped bit in word 1.
*/
watch_engine = (offset - VC_ENDC_WATCH0_DATA0) >> 6;
pnv_xive2_end_cache_load(xive, watch_engine);
+ xive->vc_regs[reg] &= ~(uint64_t)END2_W1_GEN_FLIPPED;
val = xive->vc_regs[reg];
break;
@@ -1386,7 +1419,14 @@ static void pnv_xive2_ic_vc_write(void *opaque, hwaddr offset,
/*
* ESB cache updates (not modeled)
*/
- /* case VC_ESBC_FLUSH_CTRL: */
+ case VC_ESBC_FLUSH_CTRL:
+ if (val & VC_ESBC_FLUSH_CTRL_WANT_CACHE_DISABLE) {
+ xive2_error(xive, "VC: unsupported write @0x%"HWADDR_PRIx
+ " value 0x%"PRIx64" bit[2] poll_want_cache_disable",
+ offset, val);
+ return;
+ }
+ break;
case VC_ESBC_FLUSH_POLL:
xive->vc_regs[VC_ESBC_FLUSH_CTRL >> 3] |= VC_ESBC_FLUSH_CTRL_POLL_VALID;
/* ESB update */
@@ -1402,7 +1442,14 @@ static void pnv_xive2_ic_vc_write(void *opaque, hwaddr offset,
/*
* EAS cache updates (not modeled)
*/
- /* case VC_EASC_FLUSH_CTRL: */
+ case VC_EASC_FLUSH_CTRL:
+ if (val & VC_EASC_FLUSH_CTRL_WANT_CACHE_DISABLE) {
+ xive2_error(xive, "VC: unsupported write @0x%"HWADDR_PRIx
+ " value 0x%"PRIx64" bit[2] poll_want_cache_disable",
+ offset, val);
+ return;
+ }
+ break;
case VC_EASC_FLUSH_POLL:
xive->vc_regs[VC_EASC_FLUSH_CTRL >> 3] |= VC_EASC_FLUSH_CTRL_POLL_VALID;
/* EAS update */
@@ -1441,7 +1488,14 @@ static void pnv_xive2_ic_vc_write(void *opaque, hwaddr offset,
break;
- /* case VC_ENDC_FLUSH_CTRL: */
+ case VC_ENDC_FLUSH_CTRL:
+ if (val & VC_ENDC_FLUSH_CTRL_WANT_CACHE_DISABLE) {
+ xive2_error(xive, "VC: unsupported write @0x%"HWADDR_PRIx
+ " value 0x%"PRIx64" bit[2] poll_want_cache_disable",
+ offset, val);
+ return;
+ }
+ break;
case VC_ENDC_FLUSH_POLL:
xive->vc_regs[VC_ENDC_FLUSH_CTRL >> 3] |= VC_ENDC_FLUSH_CTRL_POLL_VALID;
break;
@@ -1470,7 +1524,8 @@ static void pnv_xive2_ic_vc_write(void *opaque, hwaddr offset,
break;
default:
- xive2_error(xive, "VC: invalid write @%"HWADDR_PRIx, offset);
+ xive2_error(xive, "VC: invalid write @0x%"HWADDR_PRIx" value 0x%"PRIx64,
+ offset, val);
return;
}
@@ -1661,7 +1716,14 @@ static void pnv_xive2_ic_pc_write(void *opaque, hwaddr offset,
pnv_xive2_nxc_update(xive, watch_engine);
break;
- /* case PC_NXC_FLUSH_CTRL: */
+ case PC_NXC_FLUSH_CTRL:
+ if (val & PC_NXC_FLUSH_CTRL_WANT_CACHE_DISABLE) {
+ xive2_error(xive, "VC: unsupported write @0x%"HWADDR_PRIx
+ " value 0x%"PRIx64" bit[2] poll_want_cache_disable",
+ offset, val);
+ return;
+ }
+ break;
case PC_NXC_FLUSH_POLL:
xive->pc_regs[PC_NXC_FLUSH_CTRL >> 3] |= PC_NXC_FLUSH_CTRL_POLL_VALID;
break;
@@ -1678,7 +1740,8 @@ static void pnv_xive2_ic_pc_write(void *opaque, hwaddr offset,
break;
default:
- xive2_error(xive, "PC: invalid write @%"HWADDR_PRIx, offset);
+ xive2_error(xive, "PC: invalid write @0x%"HWADDR_PRIx" value 0x%"PRIx64,
+ offset, val);
return;
}
@@ -1765,7 +1828,8 @@ static void pnv_xive2_ic_tctxt_write(void *opaque, hwaddr offset,
xive->tctxt_regs[reg] = val;
break;
default:
- xive2_error(xive, "TCTXT: invalid write @%"HWADDR_PRIx, offset);
+ xive2_error(xive, "TCTXT: invalid write @0x%"HWADDR_PRIx
+ " data 0x%"PRIx64, offset, val);
return;
}
}
@@ -1836,7 +1900,8 @@ static void pnv_xive2_xscom_write(void *opaque, hwaddr offset,
pnv_xive2_ic_tctxt_write(opaque, mmio_offset, val, size);
break;
default:
- xive2_error(xive, "XSCOM: invalid write @%"HWADDR_PRIx, offset);
+ xive2_error(xive, "XSCOM: invalid write @%"HWADDR_PRIx
+ " value 0x%"PRIx64, offset, val);
}
}
@@ -1904,7 +1969,8 @@ static void pnv_xive2_ic_notify_write(void *opaque, hwaddr offset,
break;
default:
- xive2_error(xive, "NOTIFY: invalid write @%"HWADDR_PRIx, offset);
+ xive2_error(xive, "NOTIFY: invalid write @%"HWADDR_PRIx
+ " value 0x%"PRIx64, offset, val);
}
}
@@ -1946,7 +2012,8 @@ static void pnv_xive2_ic_lsi_write(void *opaque, hwaddr offset,
{
PnvXive2 *xive = PNV_XIVE2(opaque);
- xive2_error(xive, "LSI: invalid write @%"HWADDR_PRIx, offset);
+ xive2_error(xive, "LSI: invalid write @%"HWADDR_PRIx" value 0x%"PRIx64,
+ offset, val);
}
static const MemoryRegionOps pnv_xive2_ic_lsi_ops = {
@@ -2049,7 +2116,8 @@ static void pnv_xive2_ic_sync_write(void *opaque, hwaddr offset,
inject_type = PNV_XIVE2_QUEUE_NXC_ST_RMT_CI;
break;
default:
- xive2_error(xive, "SYNC: invalid write @%"HWADDR_PRIx, offset);
+ xive2_error(xive, "SYNC: invalid write @%"HWADDR_PRIx" value 0x%"PRIx64,
+ offset, val);
return;
}
diff --git a/hw/intc/pnv_xive2_regs.h b/hw/intc/pnv_xive2_regs.h
index e8b87b3..d53300f 100644
--- a/hw/intc/pnv_xive2_regs.h
+++ b/hw/intc/pnv_xive2_regs.h
@@ -66,6 +66,7 @@
#define CQ_XIVE_CFG_GEN1_TIMA_HYP_BLK0 PPC_BIT(26) /* 0 if bit[25]=0 */
#define CQ_XIVE_CFG_GEN1_TIMA_CROWD_DIS PPC_BIT(27) /* 0 if bit[25]=0 */
#define CQ_XIVE_CFG_GEN1_END_ESX PPC_BIT(28)
+#define CQ_XIVE_CFG_EN_VP_GRP_PRIORITY PPC_BIT(32) /* 0 if bit[25]=1 */
#define CQ_XIVE_CFG_EN_VP_SAVE_RESTORE PPC_BIT(38) /* 0 if bit[25]=1 */
#define CQ_XIVE_CFG_EN_VP_SAVE_REST_STRICT PPC_BIT(39) /* 0 if bit[25]=1 */
diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
index b0139f0..9f4c36e 100644
--- a/hw/intc/riscv_aclint.c
+++ b/hw/intc/riscv_aclint.c
@@ -28,6 +28,7 @@
#include "qemu/module.h"
#include "hw/sysbus.h"
#include "target/riscv/cpu.h"
+#include "target/riscv/time_helper.h"
#include "hw/qdev-properties.h"
#include "hw/intc/riscv_aclint.h"
#include "qemu/timer.h"
@@ -240,6 +241,10 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr,
riscv_aclint_mtimer_write_timecmp(mtimer, RISCV_CPU(cpu),
mtimer->hartid_base + i,
mtimer->timecmp[i]);
+ riscv_timer_write_timecmp(env, env->stimer, env->stimecmp, 0, MIP_STIP);
+ riscv_timer_write_timecmp(env, env->vstimer, env->vstimecmp,
+ env->htimedelta, MIP_VSTIP);
+
}
return;
}
@@ -318,12 +323,15 @@ static void riscv_aclint_mtimer_reset_enter(Object *obj, ResetType type)
static const VMStateDescription vmstate_riscv_mtimer = {
.name = "riscv_mtimer",
- .version_id = 1,
- .minimum_version_id = 1,
+ .version_id = 3,
+ .minimum_version_id = 3,
.fields = (const VMStateField[]) {
+ VMSTATE_UINT64(time_delta, RISCVAclintMTimerState),
VMSTATE_VARRAY_UINT32(timecmp, RISCVAclintMTimerState,
num_harts, 0,
vmstate_info_uint64, uint64_t),
+ VMSTATE_TIMER_PTR_VARRAY(timers, RISCVAclintMTimerState,
+ num_harts),
VMSTATE_END_OF_LIST()
}
};
diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c
index 8bcd9f4..a1d9fa5 100644
--- a/hw/intc/riscv_aplic.c
+++ b/hw/intc/riscv_aplic.c
@@ -628,7 +628,7 @@ static void riscv_aplic_request(void *opaque, int irq, int level)
static uint64_t riscv_aplic_read(void *opaque, hwaddr addr, unsigned size)
{
- uint32_t irq, word, idc;
+ uint32_t irq, word, idc, sm;
RISCVAPLICState *aplic = opaque;
/* Reads must be 4 byte words */
@@ -696,6 +696,10 @@ static uint64_t riscv_aplic_read(void *opaque, hwaddr addr, unsigned size)
} else if ((APLIC_TARGET_BASE <= addr) &&
(addr < (APLIC_TARGET_BASE + (aplic->num_irqs - 1) * 4))) {
irq = ((addr - APLIC_TARGET_BASE) >> 2) + 1;
+ sm = aplic->sourcecfg[irq] & APLIC_SOURCECFG_SM_MASK;
+ if (sm == APLIC_SOURCECFG_SM_INACTIVE) {
+ return 0;
+ }
return aplic->target[irq];
} else if (!aplic->msimode && (APLIC_IDC_BASE <= addr) &&
(addr < (APLIC_IDC_BASE + aplic->num_harts * APLIC_IDC_SIZE))) {
@@ -962,10 +966,18 @@ static const Property riscv_aplic_properties[] = {
DEFINE_PROP_BOOL("mmode", RISCVAPLICState, mmode, 0),
};
+static bool riscv_aplic_state_needed(void *opaque)
+{
+ RISCVAPLICState *aplic = opaque;
+
+ return riscv_use_emulated_aplic(aplic->msimode);
+}
+
static const VMStateDescription vmstate_riscv_aplic = {
.name = "riscv_aplic",
- .version_id = 2,
- .minimum_version_id = 2,
+ .version_id = 3,
+ .minimum_version_id = 3,
+ .needed = riscv_aplic_state_needed,
.fields = (const VMStateField[]) {
VMSTATE_UINT32(domaincfg, RISCVAPLICState),
VMSTATE_UINT32(mmsicfgaddr, RISCVAPLICState),
diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c
index 2169988..6174e1a 100644
--- a/hw/intc/riscv_imsic.c
+++ b/hw/intc/riscv_imsic.c
@@ -398,10 +398,16 @@ static const Property riscv_imsic_properties[] = {
DEFINE_PROP_UINT32("num-irqs", RISCVIMSICState, num_irqs, 0),
};
+static bool riscv_imsic_state_needed(void *opaque)
+{
+ return !kvm_irqchip_in_kernel();
+}
+
static const VMStateDescription vmstate_riscv_imsic = {
.name = "riscv_imsic",
- .version_id = 1,
- .minimum_version_id = 1,
+ .version_id = 2,
+ .minimum_version_id = 2,
+ .needed = riscv_imsic_state_needed,
.fields = (const VMStateField[]) {
VMSTATE_VARRAY_UINT32(eidelivery, RISCVIMSICState,
num_pages, 0,
diff --git a/hw/intc/s390_flic.c b/hw/intc/s390_flic.c
index 8f4c9fd..1eed512 100644
--- a/hw/intc/s390_flic.c
+++ b/hw/intc/s390_flic.c
@@ -190,7 +190,7 @@ static void qemu_s390_flic_notify(uint32_t type)
CPU_FOREACH(cs) {
S390CPU *cpu = S390_CPU(cs);
- cs->interrupt_request |= CPU_INTERRUPT_HARD;
+ cpu_set_interrupt(cs, CPU_INTERRUPT_HARD);
/* ignore CPUs that are not sleeping */
if (s390_cpu_get_state(cpu) != S390_CPU_STATE_OPERATING &&
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index 440edb9..e393f5d 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -428,14 +428,13 @@ static int spapr_xive_write_nvt(XiveRouter *xrtr, uint8_t nvt_blk,
g_assert_not_reached();
}
-static int spapr_xive_match_nvt(XivePresenter *xptr, uint8_t format,
- uint8_t nvt_blk, uint32_t nvt_idx,
- bool crowd, bool cam_ignore,
- uint8_t priority,
- uint32_t logic_serv, XiveTCTXMatch *match)
+static bool spapr_xive_match_nvt(XivePresenter *xptr, uint8_t format,
+ uint8_t nvt_blk, uint32_t nvt_idx,
+ bool crowd, bool cam_ignore,
+ uint8_t priority,
+ uint32_t logic_serv, XiveTCTXMatch *match)
{
CPUState *cs;
- int count = 0;
CPU_FOREACH(cs) {
PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -463,16 +462,17 @@ static int spapr_xive_match_nvt(XivePresenter *xptr, uint8_t format,
if (match->tctx) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: already found a thread "
"context NVT %x/%x\n", nvt_blk, nvt_idx);
- return -1;
+ match->count++;
+ continue;
}
match->ring = ring;
match->tctx = tctx;
- count++;
+ match->count++;
}
}
- return count;
+ return !!match->count;
}
static uint32_t spapr_xive_presenter_get_config(XivePresenter *xptr)
diff --git a/hw/intc/trace-events b/hw/intc/trace-events
index 334aa6a..018c609 100644
--- a/hw/intc/trace-events
+++ b/hw/intc/trace-events
@@ -274,11 +274,13 @@ kvm_xive_cpu_connect(uint32_t id) "connect CPU%d to KVM device"
kvm_xive_source_reset(uint32_t srcno) "IRQ 0x%x"
# xive.c
-xive_tctx_accept(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IBP=0x%02x PIPR=0x%02x CPPR=0x%02x NSR=0x%02x ACK"
-xive_tctx_notify(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IBP=0x%02x PIPR=0x%02x CPPR=0x%02x NSR=0x%02x raise !"
-xive_tctx_set_cppr(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IBP=0x%02x PIPR=0x%02x new CPPR=0x%02x NSR=0x%02x"
+xive_tctx_accept(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IPB=0x%02x PIPR=0x%02x CPPR=0x%02x NSR=0x%02x ACK"
+xive_tctx_notify(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IPB=0x%02x PIPR=0x%02x CPPR=0x%02x NSR=0x%02x raise !"
+xive_tctx_set_cppr(uint32_t index, uint8_t ring, uint8_t ipb, uint8_t pipr, uint8_t cppr, uint8_t nsr) "target=%d ring=0x%x IPB=0x%02x PIPR=0x%02x new CPPR=0x%02x NSR=0x%02x"
xive_source_esb_read(uint64_t addr, uint32_t srcno, uint64_t value) "@0x%"PRIx64" IRQ 0x%x val=0x%"PRIx64
xive_source_esb_write(uint64_t addr, uint32_t srcno, uint64_t value) "@0x%"PRIx64" IRQ 0x%x val=0x%"PRIx64
+xive_source_notify(uint32_t srcno) "Processing notification for queued IRQ 0x%x"
+xive_source_blocked(uint32_t srcno) "No action needed for IRQ 0x%x currently"
xive_router_end_notify(uint8_t end_blk, uint32_t end_idx, uint32_t end_data) "END 0x%02x/0x%04x -> enqueue 0x%08x"
xive_router_end_escalate(uint8_t end_blk, uint32_t end_idx, uint8_t esc_blk, uint32_t esc_idx, uint32_t end_data) "END 0x%02x/0x%04x -> escalate END 0x%02x/0x%04x data 0x%08x"
xive_tctx_tm_write(uint32_t index, uint64_t offset, unsigned int size, uint64_t value) "target=%d @0x%"PRIx64" sz=%d val=0x%" PRIx64
@@ -289,6 +291,10 @@ xive_end_source_read(uint8_t end_blk, uint32_t end_idx, uint64_t addr) "END 0x%x
# xive2.c
xive_nvp_backlog_op(uint8_t blk, uint32_t idx, uint8_t op, uint8_t priority, uint8_t rc) "NVP 0x%x/0x%x operation=%d priority=%d rc=%d"
xive_nvgc_backlog_op(bool c, uint8_t blk, uint32_t idx, uint8_t op, uint8_t priority, uint32_t rc) "NVGC crowd=%d 0x%x/0x%x operation=%d priority=%d rc=%d"
+xive_redistribute(uint32_t index, uint8_t ring, uint8_t end_blk, uint32_t end_idx) "Redistribute from target=%d ring=0x%x NVP 0x%x/0x%x"
+xive_end_enqueue(uint8_t end_blk, uint32_t end_idx, uint32_t end_data) "Queue event for END 0x%x/0x%x data=0x%x"
+xive_escalate_end(uint8_t end_blk, uint32_t end_idx, uint8_t esc_blk, uint32_t esc_idx, uint32_t esc_data) "Escalate from END 0x%x/0x%x to END 0x%x/0x%x data=0x%x"
+xive_escalate_esb(uint8_t end_blk, uint32_t end_idx, uint32_t lisn) "Escalate from END 0x%x/0x%x to LISN=0x%x"
# pnv_xive.c
pnv_xive_ic_hw_trigger(uint64_t addr, uint64_t val) "@0x%"PRIx64" val=0x%"PRIx64
diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index d9a199e..200710e 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -335,6 +335,8 @@ static void icp_realize(DeviceState *dev, Error **errp)
return;
}
}
+
+ vmstate_register(NULL, icp->cs->cpu_index, &vmstate_icp_server, icp);
}
static void icp_unrealize(DeviceState *dev)
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index 27b473e..e0ffcf8 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -25,6 +25,58 @@
/*
* XIVE Thread Interrupt Management context
*/
+bool xive_ring_valid(XiveTCTX *tctx, uint8_t ring)
+{
+ uint8_t cur_ring;
+
+ for (cur_ring = ring; cur_ring <= TM_QW3_HV_PHYS;
+ cur_ring += XIVE_TM_RING_SIZE) {
+ if (!(tctx->regs[cur_ring + TM_WORD2] & 0x80)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool xive_nsr_indicates_exception(uint8_t ring, uint8_t nsr)
+{
+ switch (ring) {
+ case TM_QW1_OS:
+ return !!(nsr & TM_QW1_NSR_EO);
+ case TM_QW2_HV_POOL:
+ case TM_QW3_HV_PHYS:
+ return !!(nsr & TM_QW3_NSR_HE);
+ default:
+ g_assert_not_reached();
+ }
+}
+
+bool xive_nsr_indicates_group_exception(uint8_t ring, uint8_t nsr)
+{
+ if ((nsr & TM_NSR_GRP_LVL) > 0) {
+ g_assert(xive_nsr_indicates_exception(ring, nsr));
+ return true;
+ }
+ return false;
+}
+
+uint8_t xive_nsr_exception_ring(uint8_t ring, uint8_t nsr)
+{
+ /* NSR determines if pool/phys ring is for phys or pool interrupt */
+ if ((ring == TM_QW3_HV_PHYS) || (ring == TM_QW2_HV_POOL)) {
+ uint8_t he = (nsr & TM_QW3_NSR_HE) >> 6;
+
+ if (he == TM_QW3_NSR_HE_PHYS) {
+ return TM_QW3_HV_PHYS;
+ } else if (he == TM_QW3_NSR_HE_POOL) {
+ return TM_QW2_HV_POOL;
+ } else {
+ /* Don't support LSI mode */
+ g_assert_not_reached();
+ }
+ }
+ return ring;
+}
static qemu_irq xive_tctx_output(XiveTCTX *tctx, uint8_t ring)
{
@@ -41,74 +93,83 @@ static qemu_irq xive_tctx_output(XiveTCTX *tctx, uint8_t ring)
}
}
-static uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t ring)
+/*
+ * interrupt is accepted on the presentation ring, for PHYS ring the NSR
+ * directs it to the PHYS or POOL rings.
+ */
+uint64_t xive_tctx_accept(XiveTCTX *tctx, uint8_t sig_ring)
{
- uint8_t *regs = &tctx->regs[ring];
- uint8_t nsr = regs[TM_NSR];
+ uint8_t *sig_regs = &tctx->regs[sig_ring];
+ uint8_t nsr = sig_regs[TM_NSR];
- qemu_irq_lower(xive_tctx_output(tctx, ring));
+ g_assert(sig_ring == TM_QW1_OS || sig_ring == TM_QW3_HV_PHYS);
- if (regs[TM_NSR] != 0) {
- uint8_t cppr = regs[TM_PIPR];
- uint8_t alt_ring;
- uint8_t *alt_regs;
+ g_assert(tctx->regs[TM_QW2_HV_POOL + TM_NSR] == 0);
+ g_assert(tctx->regs[TM_QW2_HV_POOL + TM_PIPR] == 0);
+ g_assert(tctx->regs[TM_QW2_HV_POOL + TM_CPPR] == 0);
- /* POOL interrupt uses IPB in QW2, POOL ring */
- if ((ring == TM_QW3_HV_PHYS) && (nsr & (TM_QW3_NSR_HE_POOL << 6))) {
- alt_ring = TM_QW2_HV_POOL;
- } else {
- alt_ring = ring;
- }
- alt_regs = &tctx->regs[alt_ring];
+ if (xive_nsr_indicates_exception(sig_ring, nsr)) {
+ uint8_t cppr = sig_regs[TM_PIPR];
+ uint8_t ring;
+ uint8_t *regs;
+
+ ring = xive_nsr_exception_ring(sig_ring, nsr);
+ regs = &tctx->regs[ring];
- regs[TM_CPPR] = cppr;
+ sig_regs[TM_CPPR] = cppr;
/*
* If the interrupt was for a specific VP, reset the pending
* buffer bit, otherwise clear the logical server indicator
*/
- if (regs[TM_NSR] & TM_NSR_GRP_LVL) {
- regs[TM_NSR] &= ~TM_NSR_GRP_LVL;
- } else {
- alt_regs[TM_IPB] &= ~xive_priority_to_ipb(cppr);
+ if (!xive_nsr_indicates_group_exception(sig_ring, nsr)) {
+ regs[TM_IPB] &= ~xive_priority_to_ipb(cppr);
}
- /* Drop the exception bit and any group/crowd */
- regs[TM_NSR] = 0;
+ /* Clear the exception from NSR */
+ sig_regs[TM_NSR] = 0;
+ qemu_irq_lower(xive_tctx_output(tctx, sig_ring));
- trace_xive_tctx_accept(tctx->cs->cpu_index, alt_ring,
- alt_regs[TM_IPB], regs[TM_PIPR],
- regs[TM_CPPR], regs[TM_NSR]);
+ trace_xive_tctx_accept(tctx->cs->cpu_index, ring,
+ regs[TM_IPB], sig_regs[TM_PIPR],
+ sig_regs[TM_CPPR], sig_regs[TM_NSR]);
}
- return ((uint64_t)nsr << 8) | regs[TM_CPPR];
+ return ((uint64_t)nsr << 8) | sig_regs[TM_CPPR];
}
-void xive_tctx_notify(XiveTCTX *tctx, uint8_t ring, uint8_t group_level)
+/* Change PIPR and calculate NSR and irq based on PIPR, CPPR, group */
+void xive_tctx_pipr_set(XiveTCTX *tctx, uint8_t ring, uint8_t pipr,
+ uint8_t group_level)
{
- /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */
- uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring;
- uint8_t *alt_regs = &tctx->regs[alt_ring];
+ uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring);
uint8_t *regs = &tctx->regs[ring];
- if (alt_regs[TM_PIPR] < alt_regs[TM_CPPR]) {
+ g_assert(!xive_nsr_indicates_group_exception(ring, sig_regs[TM_NSR]));
+
+ sig_regs[TM_PIPR] = pipr;
+
+ if (pipr < sig_regs[TM_CPPR]) {
switch (ring) {
case TM_QW1_OS:
- regs[TM_NSR] = TM_QW1_NSR_EO | (group_level & 0x3F);
+ sig_regs[TM_NSR] = TM_QW1_NSR_EO | (group_level & 0x3F);
break;
case TM_QW2_HV_POOL:
- alt_regs[TM_NSR] = (TM_QW3_NSR_HE_POOL << 6) | (group_level & 0x3F);
+ sig_regs[TM_NSR] = (TM_QW3_NSR_HE_POOL << 6) | (group_level & 0x3F);
break;
case TM_QW3_HV_PHYS:
- regs[TM_NSR] = (TM_QW3_NSR_HE_PHYS << 6) | (group_level & 0x3F);
+ sig_regs[TM_NSR] = (TM_QW3_NSR_HE_PHYS << 6) | (group_level & 0x3F);
break;
default:
g_assert_not_reached();
}
trace_xive_tctx_notify(tctx->cs->cpu_index, ring,
- regs[TM_IPB], alt_regs[TM_PIPR],
- alt_regs[TM_CPPR], alt_regs[TM_NSR]);
+ regs[TM_IPB], pipr,
+ sig_regs[TM_CPPR], sig_regs[TM_NSR]);
qemu_irq_raise(xive_tctx_output(tctx, ring));
+ } else {
+ sig_regs[TM_NSR] = 0;
+ qemu_irq_lower(xive_tctx_output(tctx, ring));
}
}
@@ -124,25 +185,32 @@ void xive_tctx_reset_signal(XiveTCTX *tctx, uint8_t ring)
static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
{
- uint8_t *regs = &tctx->regs[ring];
+ uint8_t *sig_regs = &tctx->regs[ring];
uint8_t pipr_min;
uint8_t ring_min;
+ g_assert(ring == TM_QW1_OS || ring == TM_QW3_HV_PHYS);
+
+ g_assert(tctx->regs[TM_QW2_HV_POOL + TM_NSR] == 0);
+ g_assert(tctx->regs[TM_QW2_HV_POOL + TM_PIPR] == 0);
+ g_assert(tctx->regs[TM_QW2_HV_POOL + TM_CPPR] == 0);
+
+ /* XXX: should show pool IPB for PHYS ring */
trace_xive_tctx_set_cppr(tctx->cs->cpu_index, ring,
- regs[TM_IPB], regs[TM_PIPR],
- cppr, regs[TM_NSR]);
+ sig_regs[TM_IPB], sig_regs[TM_PIPR],
+ cppr, sig_regs[TM_NSR]);
if (cppr > XIVE_PRIORITY_MAX) {
cppr = 0xff;
}
- tctx->regs[ring + TM_CPPR] = cppr;
+ sig_regs[TM_CPPR] = cppr;
/*
* Recompute the PIPR based on local pending interrupts. The PHYS
* ring must take the minimum of both the PHYS and POOL PIPR values.
*/
- pipr_min = xive_ipb_to_pipr(regs[TM_IPB]);
+ pipr_min = xive_ipb_to_pipr(sig_regs[TM_IPB]);
ring_min = ring;
/* PHYS updates also depend on POOL values */
@@ -151,7 +219,6 @@ static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
/* POOL values only matter if POOL ctx is valid */
if (pool_regs[TM_WORD2] & 0x80) {
-
uint8_t pool_pipr = xive_ipb_to_pipr(pool_regs[TM_IPB]);
/*
@@ -165,30 +232,39 @@ static void xive_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
}
}
- regs[TM_PIPR] = pipr_min;
+ /* CPPR has changed, this may present or preclude a pending exception */
+ xive_tctx_pipr_set(tctx, ring_min, pipr_min, 0);
+}
+
+static void xive_tctx_pipr_recompute_from_ipb(XiveTCTX *tctx, uint8_t ring)
+{
+ uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring);
+ uint8_t *regs = &tctx->regs[ring];
- /* CPPR has changed, check if we need to raise a pending exception */
- xive_tctx_notify(tctx, ring_min, 0);
+ /* Does not support a presented group interrupt */
+ g_assert(!xive_nsr_indicates_group_exception(ring, sig_regs[TM_NSR]));
+
+ xive_tctx_pipr_set(tctx, ring, xive_ipb_to_pipr(regs[TM_IPB]), 0);
}
-void xive_tctx_pipr_update(XiveTCTX *tctx, uint8_t ring, uint8_t priority,
- uint8_t group_level)
- {
- /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */
- uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring;
- uint8_t *alt_regs = &tctx->regs[alt_ring];
+void xive_tctx_pipr_present(XiveTCTX *tctx, uint8_t ring, uint8_t priority,
+ uint8_t group_level)
+{
+ uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring);
uint8_t *regs = &tctx->regs[ring];
+ uint8_t pipr = xive_priority_to_pipr(priority);
if (group_level == 0) {
- /* VP-specific */
regs[TM_IPB] |= xive_priority_to_ipb(priority);
- alt_regs[TM_PIPR] = xive_ipb_to_pipr(regs[TM_IPB]);
- } else {
- /* VP-group */
- alt_regs[TM_PIPR] = xive_priority_to_pipr(priority);
+ if (pipr >= sig_regs[TM_PIPR]) {
+ /* VP interrupts can come here with lower priority than PIPR */
+ return;
+ }
}
- xive_tctx_notify(tctx, ring, group_level);
- }
+ g_assert(pipr <= xive_ipb_to_pipr(regs[TM_IPB]));
+ g_assert(pipr < sig_regs[TM_PIPR]);
+ xive_tctx_pipr_set(tctx, ring, pipr, group_level);
+}
/*
* XIVE Thread Interrupt Management Area (TIMA)
@@ -206,25 +282,78 @@ static uint64_t xive_tm_ack_hv_reg(XivePresenter *xptr, XiveTCTX *tctx,
return xive_tctx_accept(tctx, TM_QW3_HV_PHYS);
}
+static void xive_pool_cam_decode(uint32_t cam, uint8_t *nvt_blk,
+ uint32_t *nvt_idx, bool *vp)
+{
+ if (nvt_blk) {
+ *nvt_blk = xive_nvt_blk(cam);
+ }
+ if (nvt_idx) {
+ *nvt_idx = xive_nvt_idx(cam);
+ }
+ if (vp) {
+ *vp = !!(cam & TM_QW2W2_VP);
+ }
+}
+
+static uint32_t xive_tctx_get_pool_cam(XiveTCTX *tctx, uint8_t *nvt_blk,
+ uint32_t *nvt_idx, bool *vp)
+{
+ uint32_t qw2w2 = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]);
+ uint32_t cam = be32_to_cpu(qw2w2);
+
+ xive_pool_cam_decode(cam, nvt_blk, nvt_idx, vp);
+ return qw2w2;
+}
+
+static void xive_tctx_set_pool_cam(XiveTCTX *tctx, uint32_t qw2w2)
+{
+ memcpy(&tctx->regs[TM_QW2_HV_POOL + TM_WORD2], &qw2w2, 4);
+}
+
static uint64_t xive_tm_pull_pool_ctx(XivePresenter *xptr, XiveTCTX *tctx,
hwaddr offset, unsigned size)
{
- uint32_t qw2w2_prev = xive_tctx_word2(&tctx->regs[TM_QW2_HV_POOL]);
uint32_t qw2w2;
+ uint32_t qw2w2_new;
+ uint8_t nvt_blk;
+ uint32_t nvt_idx;
+ bool vp;
+
+ qw2w2 = xive_tctx_get_pool_cam(tctx, &nvt_blk, &nvt_idx, &vp);
+
+ if (!vp) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pull invalid POOL NVT %x/%x !?\n",
+ nvt_blk, nvt_idx);
+ }
+
+ /* Invalidate CAM line */
+ qw2w2_new = xive_set_field32(TM_QW2W2_VP, qw2w2, 0);
+ xive_tctx_set_pool_cam(tctx, qw2w2_new);
+
+ xive_tctx_reset_signal(tctx, TM_QW1_OS);
+ xive_tctx_reset_signal(tctx, TM_QW2_HV_POOL);
+ /* Re-check phys for interrupts if pool was disabled */
+ xive_tctx_pipr_recompute_from_ipb(tctx, TM_QW3_HV_PHYS);
- qw2w2 = xive_set_field32(TM_QW2W2_VP, qw2w2_prev, 0);
- memcpy(&tctx->regs[TM_QW2_HV_POOL + TM_WORD2], &qw2w2, 4);
return qw2w2;
}
static uint64_t xive_tm_pull_phys_ctx(XivePresenter *xptr, XiveTCTX *tctx,
hwaddr offset, unsigned size)
{
- uint8_t qw3b8_prev = tctx->regs[TM_QW3_HV_PHYS + TM_WORD2];
- uint8_t qw3b8;
+ uint8_t qw3b8 = tctx->regs[TM_QW3_HV_PHYS + TM_WORD2];
+ uint8_t qw3b8_new;
+
+ qw3b8 = tctx->regs[TM_QW3_HV_PHYS + TM_WORD2];
+ if (!(qw3b8 & TM_QW3B8_VT)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pulling invalid PHYS thread!?\n");
+ }
+ qw3b8_new = qw3b8 & ~TM_QW3B8_VT;
+ tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] = qw3b8_new;
- qw3b8 = qw3b8_prev & ~TM_QW3B8_VT;
- tctx->regs[TM_QW3_HV_PHYS + TM_WORD2] = qw3b8;
+ xive_tctx_reset_signal(tctx, TM_QW1_OS);
+ xive_tctx_reset_signal(tctx, TM_QW3_HV_PHYS);
return qw3b8;
}
@@ -255,14 +384,14 @@ static uint64_t xive_tm_vt_poll(XivePresenter *xptr, XiveTCTX *tctx,
static const uint8_t xive_tm_hw_view[] = {
3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0, /* QW-0 User */
- 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 3, 3, 0, 0, 0, 0, /* QW-1 OS */
+ 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 3, 3, 0, 0, 0, 3, /* QW-1 OS */
0, 0, 3, 3, 0, 3, 3, 0, 3, 3, 3, 3, 0, 0, 0, 0, /* QW-2 POOL */
3, 3, 3, 3, 0, 3, 0, 2, 3, 0, 0, 3, 3, 3, 3, 0, /* QW-3 PHYS */
};
static const uint8_t xive_tm_hv_view[] = {
3, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 0, 0, 0, /* QW-0 User */
- 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 3, 3, 0, 0, 0, 0, /* QW-1 OS */
+ 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 3, 3, 0, 0, 0, 3, /* QW-1 OS */
0, 0, 3, 3, 0, 3, 3, 0, 0, 3, 3, 3, 0, 0, 0, 0, /* QW-2 POOL */
3, 3, 3, 3, 0, 3, 0, 2, 3, 0, 0, 3, 0, 0, 0, 0, /* QW-3 PHYS */
};
@@ -326,7 +455,7 @@ static void xive_tm_raw_write(XiveTCTX *tctx, hwaddr offset, uint64_t value,
*/
if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA @%"
- HWADDR_PRIx"\n", offset);
+ HWADDR_PRIx" size %d\n", offset, size);
return;
}
@@ -357,7 +486,7 @@ static uint64_t xive_tm_raw_read(XiveTCTX *tctx, hwaddr offset, unsigned size)
*/
if (size < 4 || !mask || ring_offset == TM_QW0_USER) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access at TIMA @%"
- HWADDR_PRIx"\n", offset);
+ HWADDR_PRIx" size %d\n", offset, size);
return -1;
}
@@ -403,6 +532,12 @@ static void xive_tm_set_os_lgs(XivePresenter *xptr, XiveTCTX *tctx,
xive_tctx_set_lgs(tctx, TM_QW1_OS, value & 0xff);
}
+static void xive_tm_set_pool_lgs(XivePresenter *xptr, XiveTCTX *tctx,
+ hwaddr offset, uint64_t value, unsigned size)
+{
+ xive_tctx_set_lgs(tctx, TM_QW2_HV_POOL, value & 0xff);
+}
+
/*
* Adjust the PIPR to allow a CPU to process event queues of other
* priorities during one physical interrupt cycle.
@@ -410,7 +545,12 @@ static void xive_tm_set_os_lgs(XivePresenter *xptr, XiveTCTX *tctx,
static void xive_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx,
hwaddr offset, uint64_t value, unsigned size)
{
- xive_tctx_pipr_update(tctx, TM_QW1_OS, value & 0xff, 0);
+ uint8_t ring = TM_QW1_OS;
+ uint8_t *regs = &tctx->regs[ring];
+
+ /* XXX: how should this work exactly? */
+ regs[TM_IPB] |= xive_priority_to_ipb(value & 0xff);
+ xive_tctx_pipr_recompute_from_ipb(tctx, ring);
}
static void xive_os_cam_decode(uint32_t cam, uint8_t *nvt_blk,
@@ -454,7 +594,7 @@ static uint64_t xive_tm_pull_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
qw1w2 = xive_tctx_get_os_cam(tctx, &nvt_blk, &nvt_idx, &vo);
if (!vo) {
- qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pulling invalid NVT %x/%x !?\n",
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pull invalid OS NVT %x/%x !?\n",
nvt_blk, nvt_idx);
}
@@ -466,7 +606,7 @@ static uint64_t xive_tm_pull_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
return qw1w2;
}
-static void xive_tctx_need_resend(XiveRouter *xrtr, XiveTCTX *tctx,
+static void xive_tctx_restore_nvp(XiveRouter *xrtr, XiveTCTX *tctx,
uint8_t nvt_blk, uint32_t nvt_idx)
{
XiveNVT nvt;
@@ -492,16 +632,6 @@ static void xive_tctx_need_resend(XiveRouter *xrtr, XiveTCTX *tctx,
uint8_t *regs = &tctx->regs[TM_QW1_OS];
regs[TM_IPB] |= ipb;
}
-
- /*
- * Always call xive_tctx_pipr_update(). Even if there were no
- * escalation triggered, there could be a pending interrupt which
- * was saved when the context was pulled and that we need to take
- * into account by recalculating the PIPR (which is not
- * saved/restored).
- * It will also raise the External interrupt signal if needed.
- */
- xive_tctx_pipr_update(tctx, TM_QW1_OS, 0xFF, 0); /* fxb */
}
/*
@@ -523,7 +653,17 @@ static void xive_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
/* Check the interrupt pending bits */
if (vo) {
- xive_tctx_need_resend(XIVE_ROUTER(xptr), tctx, nvt_blk, nvt_idx);
+ xive_tctx_restore_nvp(XIVE_ROUTER(xptr), tctx, nvt_blk, nvt_idx);
+
+ /*
+ * Always call xive_tctx_recompute_from_ipb(). Even if there were no
+ * escalation triggered, there could be a pending interrupt which
+ * was saved when the context was pulled and that we need to take
+ * into account by recalculating the PIPR (which is not
+ * saved/restored).
+ * It will also raise the External interrupt signal if needed.
+ */
+ xive_tctx_pipr_recompute_from_ipb(tctx, TM_QW1_OS); /* fxb */
}
}
@@ -542,6 +682,8 @@ typedef struct XiveTmOp {
uint8_t page_offset;
uint32_t op_offset;
unsigned size;
+ bool hw_ok;
+ bool sw_ok;
void (*write_handler)(XivePresenter *xptr, XiveTCTX *tctx,
hwaddr offset,
uint64_t value, unsigned size);
@@ -554,34 +696,34 @@ static const XiveTmOp xive_tm_operations[] = {
* MMIOs below 2K : raw values and special operations without side
* effects
*/
- { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, xive_tm_set_os_cppr,
- NULL },
- { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 4, xive_tm_push_os_ctx,
- NULL },
- { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, xive_tm_set_hv_cppr,
- NULL },
- { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push,
- NULL },
- { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL,
- xive_tm_vt_poll },
+ { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, true, true,
+ xive_tm_set_os_cppr, NULL },
+ { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 4, true, true,
+ xive_tm_push_os_ctx, NULL },
+ { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, true, true,
+ xive_tm_set_hv_cppr, NULL },
+ { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, false, true,
+ xive_tm_vt_push, NULL },
+ { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, true, true,
+ NULL, xive_tm_vt_poll },
/* MMIOs above 2K : special operations with side effects */
- { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, NULL,
- xive_tm_ack_os_reg },
- { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending,
- NULL },
- { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 4, NULL,
- xive_tm_pull_os_ctx },
- { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 8, NULL,
- xive_tm_pull_os_ctx },
- { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG, 2, NULL,
- xive_tm_ack_hv_reg },
- { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 4, NULL,
- xive_tm_pull_pool_ctx },
- { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, NULL,
- xive_tm_pull_pool_ctx },
- { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX, 1, NULL,
- xive_tm_pull_phys_ctx },
+ { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, true, false,
+ NULL, xive_tm_ack_os_reg },
+ { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, true, false,
+ xive_tm_set_os_pending, NULL },
+ { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 4, true, false,
+ NULL, xive_tm_pull_os_ctx },
+ { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 8, true, false,
+ NULL, xive_tm_pull_os_ctx },
+ { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG, 2, true, false,
+ NULL, xive_tm_ack_hv_reg },
+ { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 4, true, false,
+ NULL, xive_tm_pull_pool_ctx },
+ { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, true, false,
+ NULL, xive_tm_pull_pool_ctx },
+ { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX, 1, true, false,
+ NULL, xive_tm_pull_phys_ctx },
};
static const XiveTmOp xive2_tm_operations[] = {
@@ -589,50 +731,58 @@ static const XiveTmOp xive2_tm_operations[] = {
* MMIOs below 2K : raw values and special operations without side
* effects
*/
- { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, xive2_tm_set_os_cppr,
- NULL },
- { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 4, xive2_tm_push_os_ctx,
- NULL },
- { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 8, xive2_tm_push_os_ctx,
- NULL },
- { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_LGS, 1, xive_tm_set_os_lgs,
- NULL },
- { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, xive2_tm_set_hv_cppr,
- NULL },
- { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, xive_tm_vt_push,
- NULL },
- { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, NULL,
- xive_tm_vt_poll },
- { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_T, 1, xive2_tm_set_hv_target,
- NULL },
+ { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_CPPR, 1, true, true,
+ xive2_tm_set_os_cppr, NULL },
+ { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 4, true, true,
+ xive2_tm_push_os_ctx, NULL },
+ { XIVE_TM_HV_PAGE, TM_QW1_OS + TM_WORD2, 8, true, true,
+ xive2_tm_push_os_ctx, NULL },
+ { XIVE_TM_OS_PAGE, TM_QW1_OS + TM_LGS, 1, true, true,
+ xive_tm_set_os_lgs, NULL },
+ { XIVE_TM_HV_PAGE, TM_QW2_HV_POOL + TM_WORD2, 4, true, true,
+ xive2_tm_push_pool_ctx, NULL },
+ { XIVE_TM_HV_PAGE, TM_QW2_HV_POOL + TM_WORD2, 8, true, true,
+ xive2_tm_push_pool_ctx, NULL },
+ { XIVE_TM_HV_PAGE, TM_QW2_HV_POOL + TM_LGS, 1, true, true,
+ xive_tm_set_pool_lgs, NULL },
+ { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_CPPR, 1, true, true,
+ xive2_tm_set_hv_cppr, NULL },
+ { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, false, true,
+ xive2_tm_push_phys_ctx, NULL },
+ { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_WORD2, 1, true, true,
+ NULL, xive_tm_vt_poll },
+ { XIVE_TM_HV_PAGE, TM_QW3_HV_PHYS + TM_T, 1, true, true,
+ xive2_tm_set_hv_target, NULL },
/* MMIOs above 2K : special operations with side effects */
- { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, NULL,
- xive_tm_ack_os_reg },
- { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, xive_tm_set_os_pending,
- NULL },
- { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_G2, 4, NULL,
- xive2_tm_pull_os_ctx },
- { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 4, NULL,
- xive2_tm_pull_os_ctx },
- { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 8, NULL,
- xive2_tm_pull_os_ctx },
- { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG, 2, NULL,
- xive_tm_ack_hv_reg },
- { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX_G2, 4, NULL,
- xive_tm_pull_pool_ctx },
- { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 4, NULL,
- xive_tm_pull_pool_ctx },
- { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, NULL,
- xive_tm_pull_pool_ctx },
- { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_OL, 1, xive2_tm_pull_os_ctx_ol,
- NULL },
- { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_G2, 4, NULL,
- xive_tm_pull_phys_ctx },
- { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX, 1, NULL,
- xive_tm_pull_phys_ctx },
- { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_OL, 1, xive2_tm_pull_phys_ctx_ol,
- NULL },
+ { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_REG, 2, true, false,
+ NULL, xive_tm_ack_os_reg },
+ { XIVE_TM_OS_PAGE, TM_SPC_SET_OS_PENDING, 1, true, false,
+ xive2_tm_set_os_pending, NULL },
+ { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_G2, 4, true, false,
+ NULL, xive2_tm_pull_os_ctx },
+ { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 4, true, false,
+ NULL, xive2_tm_pull_os_ctx },
+ { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX, 8, true, false,
+ NULL, xive2_tm_pull_os_ctx },
+ { XIVE_TM_HV_PAGE, TM_SPC_ACK_HV_REG, 2, true, false,
+ NULL, xive_tm_ack_hv_reg },
+ { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX_G2, 4, true, false,
+ NULL, xive2_tm_pull_pool_ctx },
+ { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 4, true, false,
+ NULL, xive2_tm_pull_pool_ctx },
+ { XIVE_TM_HV_PAGE, TM_SPC_PULL_POOL_CTX, 8, true, false,
+ NULL, xive2_tm_pull_pool_ctx },
+ { XIVE_TM_HV_PAGE, TM_SPC_PULL_OS_CTX_OL, 1, true, false,
+ xive2_tm_pull_os_ctx_ol, NULL },
+ { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_G2, 4, true, false,
+ NULL, xive2_tm_pull_phys_ctx },
+ { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX, 1, true, false,
+ NULL, xive2_tm_pull_phys_ctx },
+ { XIVE_TM_HV_PAGE, TM_SPC_PULL_PHYS_CTX_OL, 1, true, false,
+ xive2_tm_pull_phys_ctx_ol, NULL },
+ { XIVE_TM_OS_PAGE, TM_SPC_ACK_OS_EL, 1, true, false,
+ xive2_tm_ack_os_el, NULL },
};
static const XiveTmOp *xive_tm_find_op(XivePresenter *xptr, hwaddr offset,
@@ -674,21 +824,31 @@ void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
uint64_t value, unsigned size)
{
const XiveTmOp *xto;
+ uint8_t ring = offset & TM_RING_OFFSET;
+ bool is_valid = xive_ring_valid(tctx, ring);
+ bool hw_owned = is_valid;
trace_xive_tctx_tm_write(tctx->cs->cpu_index, offset, size, value);
/*
- * TODO: check V bit in Q[0-3]W2
- */
-
- /*
* First, check for special operations in the 2K region
*/
+ xto = xive_tm_find_op(tctx->xptr, offset, size, true);
+ if (xto) {
+ if (hw_owned && !xto->hw_ok) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: undefined write to HW TIMA "
+ "@%"HWADDR_PRIx" size %d\n", offset, size);
+ }
+ if (!hw_owned && !xto->sw_ok) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: undefined write to SW TIMA "
+ "@%"HWADDR_PRIx" size %d\n", offset, size);
+ }
+ }
+
if (offset & TM_SPECIAL_OP) {
- xto = xive_tm_find_op(tctx->xptr, offset, size, true);
if (!xto) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid write access at TIMA "
- "@%"HWADDR_PRIx"\n", offset);
+ "@%"HWADDR_PRIx" size %d\n", offset, size);
} else {
xto->write_handler(xptr, tctx, offset, value, size);
}
@@ -698,7 +858,6 @@ void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
/*
* Then, for special operations in the region below 2K.
*/
- xto = xive_tm_find_op(tctx->xptr, offset, size, true);
if (xto) {
xto->write_handler(xptr, tctx, offset, value, size);
return;
@@ -707,6 +866,11 @@ void xive_tctx_tm_write(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
/*
* Finish with raw access to the register values
*/
+ if (hw_owned) {
+ /* Store context operations are dangerous when context is valid */
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: undefined write to HW TIMA "
+ "@%"HWADDR_PRIx" size %d\n", offset, size);
+ }
xive_tm_raw_write(tctx, offset, value, size);
}
@@ -714,20 +878,30 @@ uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
unsigned size)
{
const XiveTmOp *xto;
+ uint8_t ring = offset & TM_RING_OFFSET;
+ bool is_valid = xive_ring_valid(tctx, ring);
+ bool hw_owned = is_valid;
uint64_t ret;
- /*
- * TODO: check V bit in Q[0-3]W2
- */
+ xto = xive_tm_find_op(tctx->xptr, offset, size, false);
+ if (xto) {
+ if (hw_owned && !xto->hw_ok) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: undefined read to HW TIMA "
+ "@%"HWADDR_PRIx" size %d\n", offset, size);
+ }
+ if (!hw_owned && !xto->sw_ok) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: undefined read to SW TIMA "
+ "@%"HWADDR_PRIx" size %d\n", offset, size);
+ }
+ }
/*
* First, check for special operations in the 2K region
*/
if (offset & TM_SPECIAL_OP) {
- xto = xive_tm_find_op(tctx->xptr, offset, size, false);
if (!xto) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid read access to TIMA"
- "@%"HWADDR_PRIx"\n", offset);
+ "@%"HWADDR_PRIx" size %d\n", offset, size);
return -1;
}
ret = xto->read_handler(xptr, tctx, offset, size);
@@ -737,7 +911,6 @@ uint64_t xive_tctx_tm_read(XivePresenter *xptr, XiveTCTX *tctx, hwaddr offset,
/*
* Then, for special operations in the region below 2K.
*/
- xto = xive_tm_find_op(tctx->xptr, offset, size, false);
if (xto) {
ret = xto->read_handler(xptr, tctx, offset, size);
goto out;
@@ -1191,6 +1364,7 @@ static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
/* Forward the source event notification for routing */
if (ret) {
+ trace_xive_source_notify(srcno);
xive_source_notify(xsrc, srcno);
}
break;
@@ -1286,6 +1460,8 @@ out:
/* Forward the source event notification for routing */
if (notify) {
xive_source_notify(xsrc, srcno);
+ } else {
+ trace_xive_source_blocked(srcno);
}
}
@@ -1672,8 +1848,8 @@ uint32_t xive_get_vpgroup_size(uint32_t nvp_index)
return 1U << (first_zero + 1);
}
-static uint8_t xive_get_group_level(bool crowd, bool ignore,
- uint32_t nvp_blk, uint32_t nvp_index)
+uint8_t xive_get_group_level(bool crowd, bool ignore,
+ uint32_t nvp_blk, uint32_t nvp_index)
{
int first_zero;
uint8_t level;
@@ -1791,15 +1967,14 @@ int xive_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
* This is our simple Xive Presenter Engine model. It is merged in the
* Router as it does not require an extra object.
*/
-bool xive_presenter_notify(XiveFabric *xfb, uint8_t format,
+bool xive_presenter_match(XiveFabric *xfb, uint8_t format,
uint8_t nvt_blk, uint32_t nvt_idx,
bool crowd, bool cam_ignore, uint8_t priority,
- uint32_t logic_serv, bool *precluded)
+ uint32_t logic_serv, XiveTCTXMatch *match)
{
XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xfb);
- XiveTCTXMatch match = { .tctx = NULL, .ring = 0, .precluded = false };
- uint8_t group_level;
- int count;
+
+ memset(match, 0, sizeof(*match));
/*
* Ask the machine to scan the interrupt controllers for a match.
@@ -1824,22 +1999,8 @@ bool xive_presenter_notify(XiveFabric *xfb, uint8_t format,
* a new command to the presenters (the equivalent of the "assign"
* power bus command in the documented full notify sequence.
*/
- count = xfc->match_nvt(xfb, format, nvt_blk, nvt_idx, crowd, cam_ignore,
- priority, logic_serv, &match);
- if (count < 0) {
- return false;
- }
-
- /* handle CPU exception delivery */
- if (count) {
- group_level = xive_get_group_level(crowd, cam_ignore, nvt_blk, nvt_idx);
- trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring, group_level);
- xive_tctx_pipr_update(match.tctx, match.ring, priority, group_level);
- } else {
- *precluded = match.precluded;
- }
-
- return !!count;
+ return xfc->match_nvt(xfb, format, nvt_blk, nvt_idx, crowd, cam_ignore,
+ priority, logic_serv, match);
}
/*
@@ -1876,7 +2037,7 @@ void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas)
uint8_t nvt_blk;
uint32_t nvt_idx;
XiveNVT nvt;
- bool found, precluded;
+ XiveTCTXMatch match;
uint8_t end_blk = xive_get_field64(EAS_END_BLOCK, eas->w);
uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w);
@@ -1956,16 +2117,16 @@ void xive_router_end_notify(XiveRouter *xrtr, XiveEAS *eas)
return;
}
- found = xive_presenter_notify(xrtr->xfb, format, nvt_blk, nvt_idx,
- false /* crowd */,
- xive_get_field32(END_W7_F0_IGNORE, end.w7),
- priority,
- xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7),
- &precluded);
- /* we don't support VP-group notification on P9, so precluded is not used */
/* TODO: Auto EOI. */
-
- if (found) {
+ /* we don't support VP-group notification on P9, so precluded is not used */
+ if (xive_presenter_match(xrtr->xfb, format, nvt_blk, nvt_idx,
+ false /* crowd */,
+ xive_get_field32(END_W7_F0_IGNORE, end.w7),
+ priority,
+ xive_get_field32(END_W7_F1_LOG_SERVER_ID, end.w7),
+ &match)) {
+ trace_xive_presenter_notify(nvt_blk, nvt_idx, match.ring, 0);
+ xive_tctx_pipr_present(match.tctx, match.ring, priority, 0);
return;
}
diff --git a/hw/intc/xive2.c b/hw/intc/xive2.c
index a08cf90..fbb3b79 100644
--- a/hw/intc/xive2.c
+++ b/hw/intc/xive2.c
@@ -19,6 +19,13 @@
#include "hw/ppc/xive2_regs.h"
#include "trace.h"
+static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk,
+ uint32_t end_idx, uint32_t end_data,
+ bool redistribute);
+
+static int xive2_tctx_get_nvp_indexes(XiveTCTX *tctx, uint8_t ring,
+ uint8_t *nvp_blk, uint32_t *nvp_idx);
+
uint32_t xive2_router_get_config(Xive2Router *xrtr)
{
Xive2RouterClass *xrc = XIVE2_ROUTER_GET_CLASS(xrtr);
@@ -88,6 +95,35 @@ static void xive2_nvgc_set_backlog(Xive2Nvgc *nvgc, uint8_t priority,
}
}
+static uint32_t xive2_nvgc_get_idx(uint32_t nvp_idx, uint8_t group)
+{
+ uint32_t nvgc_idx;
+
+ if (group > 0) {
+ nvgc_idx = (nvp_idx & (0xffffffffULL << group)) |
+ ((1 << (group - 1)) - 1);
+ } else {
+ nvgc_idx = nvp_idx;
+ }
+
+ return nvgc_idx;
+}
+
+static uint8_t xive2_nvgc_get_blk(uint8_t nvp_blk, uint8_t crowd)
+{
+ uint8_t nvgc_blk;
+
+ if (crowd > 0) {
+ crowd = (crowd == 3) ? 4 : crowd;
+ nvgc_blk = (nvp_blk & (0xffffffffULL << crowd)) |
+ ((1 << (crowd - 1)) - 1);
+ } else {
+ nvgc_blk = nvp_blk;
+ }
+
+ return nvgc_blk;
+}
+
uint64_t xive2_presenter_nvgc_backlog_op(XivePresenter *xptr,
bool crowd,
uint8_t blk, uint32_t idx,
@@ -188,12 +224,27 @@ void xive2_eas_pic_print_info(Xive2Eas *eas, uint32_t lisn, GString *buf)
(uint32_t) xive_get_field64(EAS2_END_DATA, eas->w));
}
+#define XIVE2_QSIZE_CHUNK_CL 128
+#define XIVE2_QSIZE_CHUNK_4k 4096
+/* Calculate max number of queue entries for an END */
+static uint32_t xive2_end_get_qentries(Xive2End *end)
+{
+ uint32_t w3 = end->w3;
+ uint32_t qsize = xive_get_field32(END2_W3_QSIZE, w3);
+ if (xive_get_field32(END2_W3_CL, w3)) {
+ g_assert(qsize <= 4);
+ return (XIVE2_QSIZE_CHUNK_CL << qsize) / sizeof(uint32_t);
+ } else {
+ g_assert(qsize <= 12);
+ return (XIVE2_QSIZE_CHUNK_4k << qsize) / sizeof(uint32_t);
+ }
+}
+
void xive2_end_queue_pic_print_info(Xive2End *end, uint32_t width, GString *buf)
{
uint64_t qaddr_base = xive2_end_qaddr(end);
- uint32_t qsize = xive_get_field32(END2_W3_QSIZE, end->w3);
uint32_t qindex = xive_get_field32(END2_W1_PAGE_OFF, end->w1);
- uint32_t qentries = 1 << (qsize + 10);
+ uint32_t qentries = xive2_end_get_qentries(end);
int i;
/*
@@ -223,8 +274,7 @@ void xive2_end_pic_print_info(Xive2End *end, uint32_t end_idx, GString *buf)
uint64_t qaddr_base = xive2_end_qaddr(end);
uint32_t qindex = xive_get_field32(END2_W1_PAGE_OFF, end->w1);
uint32_t qgen = xive_get_field32(END2_W1_GENERATION, end->w1);
- uint32_t qsize = xive_get_field32(END2_W3_QSIZE, end->w3);
- uint32_t qentries = 1 << (qsize + 10);
+ uint32_t qentries = xive2_end_get_qentries(end);
uint32_t nvx_blk = xive_get_field32(END2_W6_VP_BLOCK, end->w6);
uint32_t nvx_idx = xive_get_field32(END2_W6_VP_OFFSET, end->w6);
@@ -341,13 +391,12 @@ void xive2_nvgc_pic_print_info(Xive2Nvgc *nvgc, uint32_t nvgc_idx, GString *buf)
static void xive2_end_enqueue(Xive2End *end, uint32_t data)
{
uint64_t qaddr_base = xive2_end_qaddr(end);
- uint32_t qsize = xive_get_field32(END2_W3_QSIZE, end->w3);
uint32_t qindex = xive_get_field32(END2_W1_PAGE_OFF, end->w1);
uint32_t qgen = xive_get_field32(END2_W1_GENERATION, end->w1);
uint64_t qaddr = qaddr_base + (qindex << 2);
uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff));
- uint32_t qentries = 1 << (qsize + 10);
+ uint32_t qentries = xive2_end_get_qentries(end);
if (dma_memory_write(&address_space_memory, qaddr, &qdata, sizeof(qdata),
MEMTXATTRS_UNSPECIFIED)) {
@@ -361,8 +410,8 @@ static void xive2_end_enqueue(Xive2End *end, uint32_t data)
qgen ^= 1;
end->w1 = xive_set_field32(END2_W1_GENERATION, end->w1, qgen);
- /* TODO(PowerNV): reset GF bit on a cache watch operation */
- end->w1 = xive_set_field32(END2_W1_GEN_FLIPPED, end->w1, qgen);
+ /* Set gen flipped to 1, it gets reset on a cache watch operation */
+ end->w1 = xive_set_field32(END2_W1_GEN_FLIPPED, end->w1, 1);
}
end->w1 = xive_set_field32(END2_W1_PAGE_OFF, end->w1, qindex);
}
@@ -492,12 +541,13 @@ static void xive2_presenter_backlog_decr(XivePresenter *xptr,
*/
static void xive2_tctx_save_ctx(Xive2Router *xrtr, XiveTCTX *tctx,
- uint8_t nvp_blk, uint32_t nvp_idx,
- uint8_t ring)
+ uint8_t ring,
+ uint8_t nvp_blk, uint32_t nvp_idx)
{
CPUPPCState *env = &POWERPC_CPU(tctx->cs)->env;
uint32_t pir = env->spr_cb[SPR_PIR].default_value;
Xive2Nvp nvp;
+ uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring);
uint8_t *regs = &tctx->regs[ring];
if (xive2_router_get_nvp(xrtr, nvp_blk, nvp_idx, &nvp)) {
@@ -533,7 +583,14 @@ static void xive2_tctx_save_ctx(Xive2Router *xrtr, XiveTCTX *tctx,
}
nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, regs[TM_IPB]);
- nvp.w2 = xive_set_field32(NVP2_W2_CPPR, nvp.w2, regs[TM_CPPR]);
+
+ if ((nvp.w0 & NVP2_W0_P) || ring != TM_QW2_HV_POOL) {
+ /*
+ * Non-pool contexts always save CPPR (ignore p bit). XXX: Clarify
+ * whether that is the correct behaviour.
+ */
+ nvp.w2 = xive_set_field32(NVP2_W2_CPPR, nvp.w2, sig_regs[TM_CPPR]);
+ }
if (nvp.w0 & NVP2_W0_L) {
/*
* Typically not used. If LSMFB is restored with 0, it will
@@ -555,6 +612,7 @@ static void xive2_tctx_save_ctx(Xive2Router *xrtr, XiveTCTX *tctx,
xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, &nvp, 1);
}
+/* POOL cam is the same as OS cam encoding */
static void xive2_cam_decode(uint32_t cam, uint8_t *nvp_blk,
uint32_t *nvp_idx, bool *valid, bool *hw)
{
@@ -584,6 +642,67 @@ static uint32_t xive2_tctx_hw_cam_line(XivePresenter *xptr, XiveTCTX *tctx)
return xive2_nvp_cam_line(blk, 1 << tid_shift | (pir & tid_mask));
}
+static void xive2_redistribute(Xive2Router *xrtr, XiveTCTX *tctx, uint8_t ring)
+{
+ uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring);
+ uint8_t nsr = sig_regs[TM_NSR];
+ uint8_t pipr = sig_regs[TM_PIPR];
+ uint8_t crowd = NVx_CROWD_LVL(nsr);
+ uint8_t group = NVx_GROUP_LVL(nsr);
+ uint8_t nvgc_blk, end_blk, nvp_blk;
+ uint32_t nvgc_idx, end_idx, nvp_idx;
+ Xive2Nvgc nvgc;
+ uint8_t prio_limit;
+ uint32_t cfg;
+
+ /* redistribution is only for group/crowd interrupts */
+ if (!xive_nsr_indicates_group_exception(ring, nsr)) {
+ return;
+ }
+
+ /* Don't check return code since ring is expected to be invalidated */
+ xive2_tctx_get_nvp_indexes(tctx, ring, &nvp_blk, &nvp_idx);
+
+ trace_xive_redistribute(tctx->cs->cpu_index, ring, nvp_blk, nvp_idx);
+
+ trace_xive_redistribute(tctx->cs->cpu_index, ring, nvp_blk, nvp_idx);
+ /* convert crowd/group to blk/idx */
+ nvgc_idx = xive2_nvgc_get_idx(nvp_idx, group);
+ nvgc_blk = xive2_nvgc_get_blk(nvp_blk, crowd);
+
+ /* Use blk/idx to retrieve the NVGC */
+ if (xive2_router_get_nvgc(xrtr, crowd, nvgc_blk, nvgc_idx, &nvgc)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: no %s %x/%x\n",
+ crowd ? "NVC" : "NVG", nvgc_blk, nvgc_idx);
+ return;
+ }
+
+ /* retrieve the END blk/idx from the NVGC */
+ end_blk = xive_get_field32(NVGC2_W1_END_BLK, nvgc.w1);
+ end_idx = xive_get_field32(NVGC2_W1_END_IDX, nvgc.w1);
+
+ /* determine number of priorities being used */
+ cfg = xive2_router_get_config(xrtr);
+ if (cfg & XIVE2_EN_VP_GRP_PRIORITY) {
+ prio_limit = 1 << GETFIELD(NVGC2_W1_PSIZE, nvgc.w1);
+ } else {
+ prio_limit = 1 << GETFIELD(XIVE2_VP_INT_PRIO, cfg);
+ }
+
+ /* add priority offset to end index */
+ end_idx += pipr % prio_limit;
+
+ /* trigger the group END */
+ xive2_router_end_notify(xrtr, end_blk, end_idx, 0, true);
+
+ /* clear interrupt indication for the context */
+ sig_regs[TM_NSR] = 0;
+ sig_regs[TM_PIPR] = sig_regs[TM_CPPR];
+ xive_tctx_reset_signal(tctx, ring);
+}
+
+static void xive2_tctx_process_pending(XiveTCTX *tctx, uint8_t sig_ring);
+
static uint64_t xive2_tm_pull_ctx(XivePresenter *xptr, XiveTCTX *tctx,
hwaddr offset, unsigned size, uint8_t ring)
{
@@ -595,10 +714,11 @@ static uint64_t xive2_tm_pull_ctx(XivePresenter *xptr, XiveTCTX *tctx,
uint8_t cur_ring;
bool valid;
bool do_save;
+ uint8_t nsr;
xive2_cam_decode(cam, &nvp_blk, &nvp_idx, &valid, &do_save);
- if (!valid) {
+ if (xive2_tctx_get_nvp_indexes(tctx, ring, &nvp_blk, &nvp_idx)) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: pulling invalid NVP %x/%x !?\n",
nvp_blk, nvp_idx);
}
@@ -608,21 +728,53 @@ static uint64_t xive2_tm_pull_ctx(XivePresenter *xptr, XiveTCTX *tctx,
cur_ring += XIVE_TM_RING_SIZE) {
uint32_t ringw2 = xive_tctx_word2(&tctx->regs[cur_ring]);
uint32_t ringw2_new = xive_set_field32(TM2_QW1W2_VO, ringw2, 0);
+ bool is_valid = !!(xive_get_field32(TM2_QW1W2_VO, ringw2));
+ uint8_t *sig_regs;
+
memcpy(&tctx->regs[cur_ring + TM_WORD2], &ringw2_new, 4);
+
+ /* Skip the rest for USER or invalid contexts */
+ if ((cur_ring == TM_QW0_USER) || !is_valid) {
+ continue;
+ }
+
+ /* Active group/crowd interrupts need to be redistributed */
+ sig_regs = xive_tctx_signal_regs(tctx, ring);
+ nsr = sig_regs[TM_NSR];
+ if (xive_nsr_indicates_group_exception(cur_ring, nsr)) {
+ /* Ensure ring matches NSR (for HV NSR POOL vs PHYS rings) */
+ if (cur_ring == xive_nsr_exception_ring(cur_ring, nsr)) {
+ xive2_redistribute(xrtr, tctx, cur_ring);
+ }
+ }
+
+ /*
+ * Lower external interrupt line of requested ring and below except for
+ * USER, which doesn't exist.
+ */
+ if (xive_nsr_indicates_exception(cur_ring, nsr)) {
+ if (cur_ring == xive_nsr_exception_ring(cur_ring, nsr)) {
+ xive_tctx_reset_signal(tctx, cur_ring);
+ }
+ }
}
- if (xive2_router_get_config(xrtr) & XIVE2_VP_SAVE_RESTORE && do_save) {
- xive2_tctx_save_ctx(xrtr, tctx, nvp_blk, nvp_idx, ring);
+ if (ring == TM_QW2_HV_POOL) {
+ /* Re-check phys for interrupts if pool was disabled */
+ nsr = tctx->regs[TM_QW3_HV_PHYS + TM_NSR];
+ if (xive_nsr_indicates_exception(TM_QW3_HV_PHYS, nsr)) {
+ /* Ring must be PHYS because POOL would have been redistributed */
+ g_assert(xive_nsr_exception_ring(TM_QW3_HV_PHYS, nsr) ==
+ TM_QW3_HV_PHYS);
+ } else {
+ xive2_tctx_process_pending(tctx, TM_QW3_HV_PHYS);
+ }
}
- /*
- * Lower external interrupt line of requested ring and below except for
- * USER, which doesn't exist.
- */
- for (cur_ring = TM_QW1_OS; cur_ring <= ring;
- cur_ring += XIVE_TM_RING_SIZE) {
- xive_tctx_reset_signal(tctx, cur_ring);
+ if (xive2_router_get_config(xrtr) & XIVE2_VP_SAVE_RESTORE && do_save) {
+ xive2_tctx_save_ctx(xrtr, tctx, ring, nvp_blk, nvp_idx);
}
+
return target_ringw2;
}
@@ -632,6 +784,18 @@ uint64_t xive2_tm_pull_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
return xive2_tm_pull_ctx(xptr, tctx, offset, size, TM_QW1_OS);
}
+uint64_t xive2_tm_pull_pool_ctx(XivePresenter *xptr, XiveTCTX *tctx,
+ hwaddr offset, unsigned size)
+{
+ return xive2_tm_pull_ctx(xptr, tctx, offset, size, TM_QW2_HV_POOL);
+}
+
+uint64_t xive2_tm_pull_phys_ctx(XivePresenter *xptr, XiveTCTX *tctx,
+ hwaddr offset, unsigned size)
+{
+ return xive2_tm_pull_ctx(xptr, tctx, offset, size, TM_QW3_HV_PHYS);
+}
+
#define REPORT_LINE_GEN1_SIZE 16
static void xive2_tm_report_line_gen1(XiveTCTX *tctx, uint8_t *data,
@@ -741,12 +905,15 @@ void xive2_tm_pull_phys_ctx_ol(XivePresenter *xptr, XiveTCTX *tctx,
xive2_tm_pull_ctx_ol(xptr, tctx, offset, value, size, TM_QW3_HV_PHYS);
}
-static uint8_t xive2_tctx_restore_os_ctx(Xive2Router *xrtr, XiveTCTX *tctx,
- uint8_t nvp_blk, uint32_t nvp_idx,
- Xive2Nvp *nvp)
+static uint8_t xive2_tctx_restore_ctx(Xive2Router *xrtr, XiveTCTX *tctx,
+ uint8_t ring,
+ uint8_t nvp_blk, uint32_t nvp_idx,
+ Xive2Nvp *nvp)
{
CPUPPCState *env = &POWERPC_CPU(tctx->cs)->env;
uint32_t pir = env->spr_cb[SPR_PIR].default_value;
+ uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring);
+ uint8_t *regs = &tctx->regs[ring];
uint8_t cppr;
if (!xive2_nvp_is_hw(nvp)) {
@@ -759,10 +926,10 @@ static uint8_t xive2_tctx_restore_os_ctx(Xive2Router *xrtr, XiveTCTX *tctx,
nvp->w2 = xive_set_field32(NVP2_W2_CPPR, nvp->w2, 0);
xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, nvp, 2);
- tctx->regs[TM_QW1_OS + TM_CPPR] = cppr;
- tctx->regs[TM_QW1_OS + TM_LSMFB] = xive_get_field32(NVP2_W2_LSMFB, nvp->w2);
- tctx->regs[TM_QW1_OS + TM_LGS] = xive_get_field32(NVP2_W2_LGS, nvp->w2);
- tctx->regs[TM_QW1_OS + TM_T] = xive_get_field32(NVP2_W2_T, nvp->w2);
+ sig_regs[TM_CPPR] = cppr;
+ regs[TM_LSMFB] = xive_get_field32(NVP2_W2_LSMFB, nvp->w2);
+ regs[TM_LGS] = xive_get_field32(NVP2_W2_LGS, nvp->w2);
+ regs[TM_T] = xive_get_field32(NVP2_W2_T, nvp->w2);
nvp->w1 = xive_set_field32(NVP2_W1_CO, nvp->w1, 1);
nvp->w1 = xive_set_field32(NVP2_W1_CO_THRID_VALID, nvp->w1, 1);
@@ -771,9 +938,18 @@ static uint8_t xive2_tctx_restore_os_ctx(Xive2Router *xrtr, XiveTCTX *tctx,
/*
* Checkout privilege: 0:OS, 1:Pool, 2:Hard
*
- * TODO: we only support OS push/pull
+ * TODO: we don't support hard push/pull
*/
- nvp->w1 = xive_set_field32(NVP2_W1_CO_PRIV, nvp->w1, 0);
+ switch (ring) {
+ case TM_QW1_OS:
+ nvp->w1 = xive_set_field32(NVP2_W1_CO_PRIV, nvp->w1, 0);
+ break;
+ case TM_QW2_HV_POOL:
+ nvp->w1 = xive_set_field32(NVP2_W1_CO_PRIV, nvp->w1, 1);
+ break;
+ default:
+ g_assert_not_reached();
+ }
xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, nvp, 1);
@@ -781,18 +957,14 @@ static uint8_t xive2_tctx_restore_os_ctx(Xive2Router *xrtr, XiveTCTX *tctx,
return cppr;
}
-static void xive2_tctx_need_resend(Xive2Router *xrtr, XiveTCTX *tctx,
+/* Restore TIMA VP context from NVP backlog */
+static void xive2_tctx_restore_nvp(Xive2Router *xrtr, XiveTCTX *tctx,
+ uint8_t ring,
uint8_t nvp_blk, uint32_t nvp_idx,
bool do_restore)
{
- XivePresenter *xptr = XIVE_PRESENTER(xrtr);
+ uint8_t *regs = &tctx->regs[ring];
uint8_t ipb;
- uint8_t backlog_level;
- uint8_t group_level;
- uint8_t first_group;
- uint8_t backlog_prio;
- uint8_t group_prio;
- uint8_t *regs = &tctx->regs[TM_QW1_OS];
Xive2Nvp nvp;
/*
@@ -812,9 +984,8 @@ static void xive2_tctx_need_resend(Xive2Router *xrtr, XiveTCTX *tctx,
}
/* Automatically restore thread context registers */
- if (xive2_router_get_config(xrtr) & XIVE2_VP_SAVE_RESTORE &&
- do_restore) {
- xive2_tctx_restore_os_ctx(xrtr, tctx, nvp_blk, nvp_idx, &nvp);
+ if (xive2_router_get_config(xrtr) & XIVE2_VP_SAVE_RESTORE && do_restore) {
+ xive2_tctx_restore_ctx(xrtr, tctx, ring, nvp_blk, nvp_idx, &nvp);
}
ipb = xive_get_field32(NVP2_W2_IPB, nvp.w2);
@@ -822,143 +993,230 @@ static void xive2_tctx_need_resend(Xive2Router *xrtr, XiveTCTX *tctx,
nvp.w2 = xive_set_field32(NVP2_W2_IPB, nvp.w2, 0);
xive2_router_write_nvp(xrtr, nvp_blk, nvp_idx, &nvp, 2);
}
+ /* IPB bits in the backlog are merged with the TIMA IPB bits */
regs[TM_IPB] |= ipb;
- backlog_prio = xive_ipb_to_pipr(ipb);
- backlog_level = 0;
-
- first_group = xive_get_field32(NVP2_W0_PGOFIRST, nvp.w0);
- if (first_group && regs[TM_LSMFB] < backlog_prio) {
- group_prio = xive2_presenter_backlog_scan(xptr, nvp_blk, nvp_idx,
- first_group, &group_level);
- regs[TM_LSMFB] = group_prio;
- if (regs[TM_LGS] && group_prio < backlog_prio) {
- /* VP can take a group interrupt */
- xive2_presenter_backlog_decr(xptr, nvp_blk, nvp_idx,
- group_prio, group_level);
- backlog_prio = group_prio;
- backlog_level = group_level;
- }
- }
-
- /*
- * Compute the PIPR based on the restored state.
- * It will raise the External interrupt signal if needed.
- */
- xive_tctx_pipr_update(tctx, TM_QW1_OS, backlog_prio, backlog_level);
}
/*
- * Updating the OS CAM line can trigger a resend of interrupt
+ * Updating the ring CAM line can trigger a resend of interrupt
*/
-void xive2_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
- hwaddr offset, uint64_t value, unsigned size)
+static void xive2_tm_push_ctx(XivePresenter *xptr, XiveTCTX *tctx,
+ hwaddr offset, uint64_t value, unsigned size,
+ uint8_t ring)
{
uint32_t cam;
- uint32_t qw1w2;
- uint64_t qw1dw1;
+ uint32_t w2;
+ uint64_t dw1;
uint8_t nvp_blk;
uint32_t nvp_idx;
- bool vo;
+ bool v;
bool do_restore;
+ if (xive_ring_valid(tctx, ring)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: Attempt to push VP to enabled"
+ " ring 0x%02x\n", ring);
+ return;
+ }
+
/* First update the thead context */
switch (size) {
+ case 1:
+ tctx->regs[ring + TM_WORD2] = value & 0xff;
+ cam = xive2_tctx_hw_cam_line(xptr, tctx);
+ cam |= ((value & 0xc0) << 24); /* V and H bits */
+ break;
case 4:
cam = value;
- qw1w2 = cpu_to_be32(cam);
- memcpy(&tctx->regs[TM_QW1_OS + TM_WORD2], &qw1w2, 4);
+ w2 = cpu_to_be32(cam);
+ memcpy(&tctx->regs[ring + TM_WORD2], &w2, 4);
break;
case 8:
cam = value >> 32;
- qw1dw1 = cpu_to_be64(value);
- memcpy(&tctx->regs[TM_QW1_OS + TM_WORD2], &qw1dw1, 8);
+ dw1 = cpu_to_be64(value);
+ memcpy(&tctx->regs[ring + TM_WORD2], &dw1, 8);
break;
default:
g_assert_not_reached();
}
- xive2_cam_decode(cam, &nvp_blk, &nvp_idx, &vo, &do_restore);
+ xive2_cam_decode(cam, &nvp_blk, &nvp_idx, &v, &do_restore);
/* Check the interrupt pending bits */
- if (vo) {
- xive2_tctx_need_resend(XIVE2_ROUTER(xptr), tctx, nvp_blk, nvp_idx,
- do_restore);
+ if (v) {
+ Xive2Router *xrtr = XIVE2_ROUTER(xptr);
+ uint8_t cur_ring;
+
+ xive2_tctx_restore_nvp(xrtr, tctx, ring,
+ nvp_blk, nvp_idx, do_restore);
+
+ for (cur_ring = TM_QW1_OS; cur_ring <= ring;
+ cur_ring += XIVE_TM_RING_SIZE) {
+ uint8_t *sig_regs = xive_tctx_signal_regs(tctx, cur_ring);
+ uint8_t nsr = sig_regs[TM_NSR];
+
+ if (!xive_ring_valid(tctx, cur_ring)) {
+ continue;
+ }
+
+ if (cur_ring == TM_QW2_HV_POOL) {
+ if (xive_nsr_indicates_exception(cur_ring, nsr)) {
+ g_assert(xive_nsr_exception_ring(cur_ring, nsr) ==
+ TM_QW3_HV_PHYS);
+ xive2_redistribute(xrtr, tctx,
+ xive_nsr_exception_ring(ring, nsr));
+ }
+ xive2_tctx_process_pending(tctx, TM_QW3_HV_PHYS);
+ break;
+ }
+ xive2_tctx_process_pending(tctx, cur_ring);
+ }
}
}
+void xive2_tm_push_os_ctx(XivePresenter *xptr, XiveTCTX *tctx,
+ hwaddr offset, uint64_t value, unsigned size)
+{
+ xive2_tm_push_ctx(xptr, tctx, offset, value, size, TM_QW1_OS);
+}
+
+void xive2_tm_push_pool_ctx(XivePresenter *xptr, XiveTCTX *tctx,
+ hwaddr offset, uint64_t value, unsigned size)
+{
+ xive2_tm_push_ctx(xptr, tctx, offset, value, size, TM_QW2_HV_POOL);
+}
+
+void xive2_tm_push_phys_ctx(XivePresenter *xptr, XiveTCTX *tctx,
+ hwaddr offset, uint64_t value, unsigned size)
+{
+ xive2_tm_push_ctx(xptr, tctx, offset, value, size, TM_QW3_HV_PHYS);
+}
+
+/* returns -1 if ring is invalid, but still populates block and index */
static int xive2_tctx_get_nvp_indexes(XiveTCTX *tctx, uint8_t ring,
- uint32_t *nvp_blk, uint32_t *nvp_idx)
+ uint8_t *nvp_blk, uint32_t *nvp_idx)
{
- uint32_t w2, cam;
+ uint32_t w2;
+ uint32_t cam = 0;
+ int rc = 0;
w2 = xive_tctx_word2(&tctx->regs[ring]);
switch (ring) {
case TM_QW1_OS:
if (!(be32_to_cpu(w2) & TM2_QW1W2_VO)) {
- return -1;
+ rc = -1;
}
cam = xive_get_field32(TM2_QW1W2_OS_CAM, w2);
break;
case TM_QW2_HV_POOL:
if (!(be32_to_cpu(w2) & TM2_QW2W2_VP)) {
- return -1;
+ rc = -1;
}
cam = xive_get_field32(TM2_QW2W2_POOL_CAM, w2);
break;
case TM_QW3_HV_PHYS:
if (!(be32_to_cpu(w2) & TM2_QW3W2_VT)) {
- return -1;
+ rc = -1;
}
cam = xive2_tctx_hw_cam_line(tctx->xptr, tctx);
break;
default:
- return -1;
+ rc = -1;
}
*nvp_blk = xive2_nvp_blk(cam);
*nvp_idx = xive2_nvp_idx(cam);
- return 0;
+ return rc;
}
-static void xive2_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
+static void xive2_tctx_accept_el(XivePresenter *xptr, XiveTCTX *tctx,
+ uint8_t ring, uint8_t cl_ring)
{
- uint8_t *regs = &tctx->regs[ring];
- Xive2Router *xrtr = XIVE2_ROUTER(tctx->xptr);
- uint8_t old_cppr, backlog_prio, first_group, group_level = 0;
- uint8_t pipr_min, lsmfb_min, ring_min;
- bool group_enabled;
- uint32_t nvp_blk, nvp_idx;
+ uint64_t rd;
+ Xive2Router *xrtr = XIVE2_ROUTER(xptr);
+ uint32_t nvp_idx, xive2_cfg;
+ uint8_t nvp_blk;
Xive2Nvp nvp;
- int rc;
+ uint64_t phys_addr;
+ uint8_t OGen = 0;
- trace_xive_tctx_set_cppr(tctx->cs->cpu_index, ring,
- regs[TM_IPB], regs[TM_PIPR],
- cppr, regs[TM_NSR]);
+ xive2_tctx_get_nvp_indexes(tctx, cl_ring, &nvp_blk, &nvp_idx);
- if (cppr > XIVE_PRIORITY_MAX) {
- cppr = 0xff;
+ if (xive2_router_get_nvp(xrtr, (uint8_t)nvp_blk, nvp_idx, &nvp)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No NVP %x/%x\n",
+ nvp_blk, nvp_idx);
+ return;
+ }
+
+ if (!xive2_nvp_is_valid(&nvp)) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid NVP %x/%x\n",
+ nvp_blk, nvp_idx);
+ return;
}
- old_cppr = regs[TM_CPPR];
- regs[TM_CPPR] = cppr;
+
+ rd = xive_tctx_accept(tctx, ring);
+
+ if (ring == TM_QW1_OS) {
+ OGen = tctx->regs[ring + TM_OGEN];
+ }
+ xive2_cfg = xive2_router_get_config(xrtr);
+ phys_addr = xive2_nvp_reporting_addr(&nvp);
+ uint8_t report_data[REPORT_LINE_GEN1_SIZE];
+ memset(report_data, 0xff, sizeof(report_data));
+ if ((OGen == 1) || (xive2_cfg & XIVE2_GEN1_TIMA_OS)) {
+ report_data[8] = (rd >> 8) & 0xff;
+ report_data[9] = rd & 0xff;
+ } else {
+ report_data[0] = (rd >> 8) & 0xff;
+ report_data[1] = rd & 0xff;
+ }
+ cpu_physical_memory_write(phys_addr, report_data, REPORT_LINE_GEN1_SIZE);
+}
+
+void xive2_tm_ack_os_el(XivePresenter *xptr, XiveTCTX *tctx,
+ hwaddr offset, uint64_t value, unsigned size)
+{
+ xive2_tctx_accept_el(xptr, tctx, TM_QW1_OS, TM_QW1_OS);
+}
+
+/* Re-calculate and present pending interrupts */
+static void xive2_tctx_process_pending(XiveTCTX *tctx, uint8_t sig_ring)
+{
+ uint8_t *sig_regs = &tctx->regs[sig_ring];
+ Xive2Router *xrtr = XIVE2_ROUTER(tctx->xptr);
+ uint8_t backlog_prio;
+ uint8_t first_group;
+ uint8_t group_level;
+ uint8_t pipr_min;
+ uint8_t lsmfb_min;
+ uint8_t ring_min;
+ uint8_t cppr = sig_regs[TM_CPPR];
+ bool group_enabled;
+ Xive2Nvp nvp;
+ int rc;
+
+ g_assert(sig_ring == TM_QW3_HV_PHYS || sig_ring == TM_QW1_OS);
+ g_assert(sig_regs[TM_WORD2] & 0x80);
+ g_assert(!xive_nsr_indicates_group_exception(sig_ring, sig_regs[TM_NSR]));
/*
* Recompute the PIPR based on local pending interrupts. It will
* be adjusted below if needed in case of pending group interrupts.
*/
- pipr_min = xive_ipb_to_pipr(regs[TM_IPB]);
- group_enabled = !!regs[TM_LGS];
- lsmfb_min = (group_enabled) ? regs[TM_LSMFB] : 0xff;
- ring_min = ring;
+again:
+ pipr_min = xive_ipb_to_pipr(sig_regs[TM_IPB]);
+ group_enabled = !!sig_regs[TM_LGS];
+ lsmfb_min = group_enabled ? sig_regs[TM_LSMFB] : 0xff;
+ ring_min = sig_ring;
+ group_level = 0;
/* PHYS updates also depend on POOL values */
- if (ring == TM_QW3_HV_PHYS) {
- uint8_t *pregs = &tctx->regs[TM_QW2_HV_POOL];
+ if (sig_ring == TM_QW3_HV_PHYS) {
+ uint8_t *pool_regs = &tctx->regs[TM_QW2_HV_POOL];
/* POOL values only matter if POOL ctx is valid */
- if (pregs[TM_WORD2] & 0x80) {
-
- uint8_t pool_pipr = xive_ipb_to_pipr(pregs[TM_IPB]);
- uint8_t pool_lsmfb = pregs[TM_LSMFB];
+ if (pool_regs[TM_WORD2] & 0x80) {
+ uint8_t pool_pipr = xive_ipb_to_pipr(pool_regs[TM_IPB]);
+ uint8_t pool_lsmfb = pool_regs[TM_LSMFB];
/*
* Determine highest priority interrupt and
@@ -972,7 +1230,7 @@ static void xive2_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
}
/* Values needed for group priority calculation */
- if (pregs[TM_LGS] && (pool_lsmfb < lsmfb_min)) {
+ if (pool_regs[TM_LGS] && (pool_lsmfb < lsmfb_min)) {
group_enabled = true;
lsmfb_min = pool_lsmfb;
if (lsmfb_min < pipr_min) {
@@ -981,32 +1239,26 @@ static void xive2_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
}
}
}
- regs[TM_PIPR] = pipr_min;
-
- rc = xive2_tctx_get_nvp_indexes(tctx, ring_min, &nvp_blk, &nvp_idx);
- if (rc) {
- qemu_log_mask(LOG_GUEST_ERROR, "XIVE: set CPPR on invalid context\n");
- return;
- }
-
- if (cppr < old_cppr) {
- /*
- * FIXME: check if there's a group interrupt being presented
- * and if the new cppr prevents it. If so, then the group
- * interrupt needs to be re-added to the backlog and
- * re-triggered (see re-trigger END info in the NVGC
- * structure)
- */
- }
if (group_enabled &&
lsmfb_min < cppr &&
- lsmfb_min < regs[TM_PIPR]) {
+ lsmfb_min < pipr_min) {
+
+ uint8_t nvp_blk;
+ uint32_t nvp_idx;
+
/*
* Thread has seen a group interrupt with a higher priority
* than the new cppr or pending local interrupt. Check the
* backlog
*/
+ rc = xive2_tctx_get_nvp_indexes(tctx, ring_min, &nvp_blk, &nvp_idx);
+ if (rc) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: set CPPR on invalid "
+ "context\n");
+ return;
+ }
+
if (xive2_router_get_nvp(xrtr, nvp_blk, nvp_idx, &nvp)) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No NVP %x/%x\n",
nvp_blk, nvp_idx);
@@ -1030,14 +1282,85 @@ static void xive2_tctx_set_cppr(XiveTCTX *tctx, uint8_t ring, uint8_t cppr)
nvp_blk, nvp_idx,
first_group, &group_level);
tctx->regs[ring_min + TM_LSMFB] = backlog_prio;
- if (backlog_prio != 0xFF) {
- xive2_presenter_backlog_decr(tctx->xptr, nvp_blk, nvp_idx,
- backlog_prio, group_level);
- regs[TM_PIPR] = backlog_prio;
+ if (backlog_prio != lsmfb_min) {
+ /*
+ * If the group backlog scan finds a less favored or no interrupt,
+ * then re-do the processing which may turn up a more favored
+ * interrupt from IPB or the other pool. Backlog should not
+ * find a priority < LSMFB.
+ */
+ g_assert(backlog_prio >= lsmfb_min);
+ goto again;
+ }
+
+ xive2_presenter_backlog_decr(tctx->xptr, nvp_blk, nvp_idx,
+ backlog_prio, group_level);
+ pipr_min = backlog_prio;
+ }
+
+ if (pipr_min > cppr) {
+ pipr_min = cppr;
+ }
+ xive_tctx_pipr_set(tctx, ring_min, pipr_min, group_level);
+}
+
+/* NOTE: CPPR only exists for TM_QW1_OS and TM_QW3_HV_PHYS */
+static void xive2_tctx_set_cppr(XiveTCTX *tctx, uint8_t sig_ring, uint8_t cppr)
+{
+ uint8_t *sig_regs = &tctx->regs[sig_ring];
+ Xive2Router *xrtr = XIVE2_ROUTER(tctx->xptr);
+ uint8_t old_cppr;
+ uint8_t nsr = sig_regs[TM_NSR];
+
+ g_assert(sig_ring == TM_QW1_OS || sig_ring == TM_QW3_HV_PHYS);
+
+ g_assert(tctx->regs[TM_QW2_HV_POOL + TM_NSR] == 0);
+ g_assert(tctx->regs[TM_QW2_HV_POOL + TM_PIPR] == 0);
+ g_assert(tctx->regs[TM_QW2_HV_POOL + TM_CPPR] == 0);
+
+ /* XXX: should show pool IPB for PHYS ring */
+ trace_xive_tctx_set_cppr(tctx->cs->cpu_index, sig_ring,
+ sig_regs[TM_IPB], sig_regs[TM_PIPR],
+ cppr, nsr);
+
+ if (cppr > XIVE_PRIORITY_MAX) {
+ cppr = 0xff;
+ }
+
+ old_cppr = sig_regs[TM_CPPR];
+ sig_regs[TM_CPPR] = cppr;
+
+ /* Handle increased CPPR priority (lower value) */
+ if (cppr < old_cppr) {
+ if (cppr <= sig_regs[TM_PIPR]) {
+ /* CPPR lowered below PIPR, must un-present interrupt */
+ if (xive_nsr_indicates_exception(sig_ring, nsr)) {
+ if (xive_nsr_indicates_group_exception(sig_ring, nsr)) {
+ /* redistribute precluded active grp interrupt */
+ xive2_redistribute(xrtr, tctx,
+ xive_nsr_exception_ring(sig_ring, nsr));
+ return;
+ }
+ }
+
+ /* interrupt is VP directed, pending in IPB */
+ xive_tctx_pipr_set(tctx, sig_ring, cppr, 0);
+ return;
+ } else {
+ /* CPPR was lowered, but still above PIPR. No action needed. */
+ return;
}
}
- /* CPPR has changed, check if we need to raise a pending exception */
- xive_tctx_notify(tctx, ring_min, group_level);
+
+ /* CPPR didn't change, nothing needs to be done */
+ if (cppr == old_cppr) {
+ return;
+ }
+
+ /* CPPR priority decreased (higher value) */
+ if (!xive_nsr_indicates_exception(sig_ring, nsr)) {
+ xive2_tctx_process_pending(tctx, sig_ring);
+ }
}
void xive2_tm_set_hv_cppr(XivePresenter *xptr, XiveTCTX *tctx,
@@ -1052,6 +1375,34 @@ void xive2_tm_set_os_cppr(XivePresenter *xptr, XiveTCTX *tctx,
xive2_tctx_set_cppr(tctx, TM_QW1_OS, value & 0xff);
}
+/*
+ * Adjust the IPB to allow a CPU to process event queues of other
+ * priorities during one physical interrupt cycle.
+ */
+void xive2_tm_set_os_pending(XivePresenter *xptr, XiveTCTX *tctx,
+ hwaddr offset, uint64_t value, unsigned size)
+{
+ Xive2Router *xrtr = XIVE2_ROUTER(xptr);
+ uint8_t ring = TM_QW1_OS;
+ uint8_t *regs = &tctx->regs[ring];
+ uint8_t priority = value & 0xff;
+
+ /*
+ * XXX: should this simply set a bit in IPB and wait for it to be picked
+ * up next cycle, or is it supposed to present it now? We implement the
+ * latter here.
+ */
+ regs[TM_IPB] |= xive_priority_to_ipb(priority);
+ if (xive_ipb_to_pipr(regs[TM_IPB]) >= regs[TM_PIPR]) {
+ return;
+ }
+ if (xive_nsr_indicates_group_exception(ring, regs[TM_NSR])) {
+ xive2_redistribute(xrtr, tctx, ring);
+ }
+
+ xive_tctx_pipr_present(tctx, ring, priority, 0);
+}
+
static void xive2_tctx_set_target(XiveTCTX *tctx, uint8_t ring, uint8_t target)
{
uint8_t *regs = &tctx->regs[ring];
@@ -1259,9 +1610,7 @@ int xive2_presenter_tctx_match(XivePresenter *xptr, XiveTCTX *tctx,
bool xive2_tm_irq_precluded(XiveTCTX *tctx, int ring, uint8_t priority)
{
- /* HV_POOL ring uses HV_PHYS NSR, CPPR and PIPR registers */
- uint8_t alt_ring = (ring == TM_QW2_HV_POOL) ? TM_QW3_HV_PHYS : ring;
- uint8_t *alt_regs = &tctx->regs[alt_ring];
+ uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring);
/*
* The xive2_presenter_tctx_match() above tells if there's a match
@@ -1269,7 +1618,7 @@ bool xive2_tm_irq_precluded(XiveTCTX *tctx, int ring, uint8_t priority)
* priority to know if the thread can take the interrupt now or if
* it is precluded.
*/
- if (priority < alt_regs[TM_CPPR]) {
+ if (priority < sig_regs[TM_PIPR]) {
return false;
}
return true;
@@ -1322,12 +1671,14 @@ static bool xive2_router_end_es_notify(Xive2Router *xrtr, uint8_t end_blk,
* message has the same parameters than in the function below.
*/
static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk,
- uint32_t end_idx, uint32_t end_data)
+ uint32_t end_idx, uint32_t end_data,
+ bool redistribute)
{
Xive2End end;
uint8_t priority;
uint8_t format;
- bool found, precluded;
+ XiveTCTXMatch match;
+ bool crowd, cam_ignore;
uint8_t nvx_blk;
uint32_t nvx_idx;
@@ -1350,7 +1701,8 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk,
return;
}
- if (xive2_end_is_enqueue(&end)) {
+ if (!redistribute && xive2_end_is_enqueue(&end)) {
+ trace_xive_end_enqueue(end_blk, end_idx, end_data);
xive2_end_enqueue(&end, end_data);
/* Enqueuing event data modifies the EQ toggle and index */
xive2_router_write_end(xrtr, end_blk, end_idx, &end, 1);
@@ -1396,16 +1748,28 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk,
*/
nvx_blk = xive_get_field32(END2_W6_VP_BLOCK, end.w6);
nvx_idx = xive_get_field32(END2_W6_VP_OFFSET, end.w6);
-
- found = xive_presenter_notify(xrtr->xfb, format, nvx_blk, nvx_idx,
- xive2_end_is_crowd(&end), xive2_end_is_ignore(&end),
- priority,
- xive_get_field32(END2_W7_F1_LOG_SERVER_ID, end.w7),
- &precluded);
+ crowd = xive2_end_is_crowd(&end);
+ cam_ignore = xive2_end_is_ignore(&end);
/* TODO: Auto EOI. */
+ if (xive_presenter_match(xrtr->xfb, format, nvx_blk, nvx_idx,
+ crowd, cam_ignore, priority,
+ xive_get_field32(END2_W7_F1_LOG_SERVER_ID, end.w7),
+ &match)) {
+ XiveTCTX *tctx = match.tctx;
+ uint8_t ring = match.ring;
+ uint8_t *sig_regs = xive_tctx_signal_regs(tctx, ring);
+ uint8_t nsr = sig_regs[TM_NSR];
+ uint8_t group_level;
+
+ if (priority < sig_regs[TM_PIPR] &&
+ xive_nsr_indicates_group_exception(ring, nsr)) {
+ xive2_redistribute(xrtr, tctx, xive_nsr_exception_ring(ring, nsr));
+ }
- if (found) {
+ group_level = xive_get_group_level(crowd, cam_ignore, nvx_blk, nvx_idx);
+ trace_xive_presenter_notify(nvx_blk, nvx_idx, ring, group_level);
+ xive_tctx_pipr_present(tctx, ring, priority, group_level);
return;
}
@@ -1423,7 +1787,7 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk,
return;
}
- if (!xive2_end_is_ignore(&end)) {
+ if (!cam_ignore) {
uint8_t ipb;
Xive2Nvp nvp;
@@ -1452,9 +1816,6 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk,
} else {
Xive2Nvgc nvgc;
uint32_t backlog;
- bool crowd;
-
- crowd = xive2_end_is_crowd(&end);
/*
* For groups and crowds, the per-priority backlog
@@ -1486,9 +1847,7 @@ static void xive2_router_end_notify(Xive2Router *xrtr, uint8_t end_blk,
if (backlog == 1) {
XiveFabricClass *xfc = XIVE_FABRIC_GET_CLASS(xrtr->xfb);
xfc->broadcast(xrtr->xfb, nvx_blk, nvx_idx,
- xive2_end_is_crowd(&end),
- xive2_end_is_ignore(&end),
- priority);
+ crowd, cam_ignore, priority);
if (!xive2_end_is_precluded_escalation(&end)) {
/*
@@ -1522,18 +1881,41 @@ do_escalation:
}
}
- /*
- * The END trigger becomes an Escalation trigger
- */
- xive2_router_end_notify(xrtr,
- xive_get_field32(END2_W4_END_BLOCK, end.w4),
- xive_get_field32(END2_W4_ESC_END_INDEX, end.w4),
- xive_get_field32(END2_W5_ESC_END_DATA, end.w5));
+ if (xive2_end_is_escalate_end(&end)) {
+ /*
+ * Perform END Adaptive escalation processing
+ * The END trigger becomes an Escalation trigger
+ */
+ uint8_t esc_blk = xive_get_field32(END2_W4_END_BLOCK, end.w4);
+ uint32_t esc_idx = xive_get_field32(END2_W4_ESC_END_INDEX, end.w4);
+ uint32_t esc_data = xive_get_field32(END2_W5_ESC_END_DATA, end.w5);
+ trace_xive_escalate_end(end_blk, end_idx, esc_blk, esc_idx, esc_data);
+ xive2_router_end_notify(xrtr, esc_blk, esc_idx, esc_data, false);
+ } /* end END adaptive escalation */
+
+ else {
+ uint32_t lisn; /* Logical Interrupt Source Number */
+
+ /*
+ * Perform ESB escalation processing
+ * E[N] == 1 --> N
+ * Req[Block] <- E[ESB_Block]
+ * Req[Index] <- E[ESB_Index]
+ * Req[Offset] <- 0x000
+ * Execute <ESB Store> Req command
+ */
+ lisn = XIVE_EAS(xive_get_field32(END2_W4_END_BLOCK, end.w4),
+ xive_get_field32(END2_W4_ESC_END_INDEX, end.w4));
+
+ trace_xive_escalate_esb(end_blk, end_idx, lisn);
+ xive2_notify(xrtr, lisn, true /* pq_checked */);
+ }
+
+ return;
}
-void xive2_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked)
+void xive2_notify(Xive2Router *xrtr , uint32_t lisn, bool pq_checked)
{
- Xive2Router *xrtr = XIVE2_ROUTER(xn);
uint8_t eas_blk = XIVE_EAS_BLOCK(lisn);
uint32_t eas_idx = XIVE_EAS_INDEX(lisn);
Xive2Eas eas;
@@ -1576,13 +1958,31 @@ void xive2_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked)
return;
}
+ /* TODO: add support for EAS resume */
+ if (xive2_eas_is_resume(&eas)) {
+ qemu_log_mask(LOG_UNIMP,
+ "XIVE: EAS resume processing unimplemented - LISN %x\n",
+ lisn);
+ return;
+ }
+
/*
* The event trigger becomes an END trigger
*/
xive2_router_end_notify(xrtr,
- xive_get_field64(EAS2_END_BLOCK, eas.w),
- xive_get_field64(EAS2_END_INDEX, eas.w),
- xive_get_field64(EAS2_END_DATA, eas.w));
+ xive_get_field64(EAS2_END_BLOCK, eas.w),
+ xive_get_field64(EAS2_END_INDEX, eas.w),
+ xive_get_field64(EAS2_END_DATA, eas.w),
+ false);
+ return;
+}
+
+void xive2_router_notify(XiveNotifier *xn, uint32_t lisn, bool pq_checked)
+{
+ Xive2Router *xrtr = XIVE2_ROUTER(xn);
+
+ xive2_notify(xrtr, lisn, pq_checked);
+ return;
}
static const Property xive2_router_properties[] = {
diff --git a/hw/isa/isa-superio.c b/hw/isa/isa-superio.c
index 2853485..941b0f9 100644
--- a/hw/isa/isa-superio.c
+++ b/hw/isa/isa-superio.c
@@ -15,6 +15,7 @@
#include "qemu/module.h"
#include "qapi/error.h"
#include "system/blockdev.h"
+#include "system/system.h"
#include "chardev/char.h"
#include "hw/char/parallel.h"
#include "hw/block/fdc.h"
diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c
index 304dffa..c9cb8f7 100644
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -132,6 +132,11 @@ static void ich9_cc_init(ICH9LPCState *lpc)
static void ich9_cc_reset(ICH9LPCState *lpc)
{
uint8_t *c = lpc->chip_config;
+ uint32_t gcs = ICH9_CC_GCS_DEFAULT;
+
+ if (lpc->pin_strap.spkr_hi) {
+ gcs |= ICH9_CC_GCS_NO_REBOOT;
+ }
memset(lpc->chip_config, 0, sizeof(lpc->chip_config));
@@ -142,7 +147,7 @@ static void ich9_cc_reset(ICH9LPCState *lpc)
pci_set_long(c + ICH9_CC_D27IR, ICH9_CC_DIR_DEFAULT);
pci_set_long(c + ICH9_CC_D26IR, ICH9_CC_DIR_DEFAULT);
pci_set_long(c + ICH9_CC_D25IR, ICH9_CC_DIR_DEFAULT);
- pci_set_long(c + ICH9_CC_GCS, ICH9_CC_GCS_DEFAULT);
+ pci_set_long(c + ICH9_CC_GCS, gcs);
ich9_cc_update(lpc);
}
diff --git a/hw/loongarch/Kconfig b/hw/loongarch/Kconfig
index bb2838b..8024ddf 100644
--- a/hw/loongarch/Kconfig
+++ b/hw/loongarch/Kconfig
@@ -15,6 +15,7 @@ config LOONGARCH_VIRT
select LOONGARCH_PCH_PIC
select LOONGARCH_PCH_MSI
select LOONGARCH_EXTIOI
+ select LOONGARCH_DINTC
select LS7A_RTC
select SMBIOS
select ACPI_CPU_HOTPLUG
diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c
index 9b6292e..3dd48cb 100644
--- a/hw/loongarch/boot.c
+++ b/hw/loongarch/boot.c
@@ -35,19 +35,19 @@ struct loongarch_linux_hdr {
uint32_t pe_header_offset;
} QEMU_PACKED;
-struct memmap_entry *memmap_table;
-unsigned memmap_entries;
-
-ram_addr_t initrd_offset;
-uint64_t initrd_size;
-
-static const unsigned int slave_boot_code[] = {
+static const unsigned int aux_boot_code[] = {
/* Configure reset ebase. */
0x0400302c, /* csrwr $t0, LOONGARCH_CSR_EENTRY */
/* Disable interrupt. */
0x0380100c, /* ori $t0, $zero,0x4 */
0x04000180, /* csrxchg $zero, $t0, LOONGARCH_CSR_CRMD */
+ 0x03400000, /* nop */
+
+ 0x0400800c, /* csrrd $t0, LOONGARCH_CSR_CPUNUM */
+ 0x034ffd8c, /* andi $t0, $t0, 0x3ff */
+ 0x0015000d, /* move $t1, $zero */
+ 0x5800718d, /* beq $t0, $t1, 112 */
/* Clear mailbox. */
0x1400002d, /* lu12i.w $t1, 1(0x1) */
@@ -87,6 +87,26 @@ static const unsigned int slave_boot_code[] = {
0x06480dac, /* iocsrrd.d $t0, $t1 */
0x00150181, /* move $ra, $t0 */
0x4c000020, /* jirl $zero, $ra,0 */
+ /* BSP Core */
+ 0x03400000, /* nop */
+ 0x1800000d, /* pcaddi $t1, 0 */
+ 0x28c0a1a4, /* ld.d $a0, $t1, 40 */
+ 0x1800000d, /* pcaddi $t1, 0 */
+ 0x28c0a1a5, /* ld.d $a1, $t1, 40 */
+ 0x1800000d, /* pcaddi $t1, 0 */
+ 0x28c0a1a6, /* ld.d $a2, $t1, 40 */
+ 0x1800000d, /* pcaddi $t1, 0 */
+ 0x28c0a1ac, /* ld.d $t0, $t1, 40 */
+ 0x00150181, /* move $ra, $t0 */
+ 0x4c000020, /* jirl $zero, $ra,0 */
+ 0x00000000, /* .dword 0 A0 */
+ 0x00000000,
+ 0x00000000, /* .dword 0 A1 */
+ 0x00000000,
+ 0x00000000, /* .dword 0 A2 */
+ 0x00000000,
+ 0x00000000, /* .dword 0 PC */
+ 0x00000000,
};
static inline void *guidcpy(void *dst, const void *src)
@@ -94,12 +114,16 @@ static inline void *guidcpy(void *dst, const void *src)
return memcpy(dst, src, sizeof(efi_guid_t));
}
-static void init_efi_boot_memmap(struct efi_system_table *systab,
+static void init_efi_boot_memmap(MachineState *ms,
+ struct efi_system_table *systab,
void *p, void *start)
{
unsigned i;
struct efi_boot_memmap *boot_memmap = p;
efi_guid_t tbl_guid = LINUX_EFI_BOOT_MEMMAP_GUID;
+ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(ms);
+ struct memmap_entry *memmap_table;
+ unsigned int memmap_entries;
/* efi_configuration_table 1 */
guidcpy(&systab->tables[0].guid, &tbl_guid);
@@ -111,6 +135,8 @@ static void init_efi_boot_memmap(struct efi_system_table *systab,
boot_memmap->map_size = 0;
efi_memory_desc_t *map = p + sizeof(struct efi_boot_memmap);
+ memmap_table = lvms->memmap_table;
+ memmap_entries = lvms->memmap_entries;
for (i = 0; i < memmap_entries; i++) {
map = (void *)boot_memmap + sizeof(*map);
map[i].type = memmap_table[i].type;
@@ -121,7 +147,8 @@ static void init_efi_boot_memmap(struct efi_system_table *systab,
}
}
-static void init_efi_initrd_table(struct efi_system_table *systab,
+static void init_efi_initrd_table(struct loongarch_boot_info *info,
+ struct efi_system_table *systab,
void *p, void *start)
{
efi_guid_t tbl_guid = LINUX_EFI_INITRD_MEDIA_GUID;
@@ -132,8 +159,8 @@ static void init_efi_initrd_table(struct efi_system_table *systab,
systab->tables[1].table = (struct efi_configuration_table *)(p - start);
systab->nr_tables = 2;
- initrd_table->base = initrd_offset;
- initrd_table->size = initrd_size;
+ initrd_table->base = info->initrd_addr;
+ initrd_table->size = info->initrd_size;
}
static void init_efi_fdt_table(struct efi_system_table *systab)
@@ -146,10 +173,12 @@ static void init_efi_fdt_table(struct efi_system_table *systab)
systab->nr_tables = 3;
}
-static void init_systab(struct loongarch_boot_info *info, void *p, void *start)
+static void init_systab(MachineState *ms,
+ struct loongarch_boot_info *info, void *p, void *start)
{
void *bp_tables_start;
struct efi_system_table *systab = p;
+ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(ms);
info->a2 = p - start;
@@ -166,10 +195,10 @@ static void init_systab(struct loongarch_boot_info *info, void *p, void *start)
systab->tables = p;
bp_tables_start = p;
- init_efi_boot_memmap(systab, p, start);
+ init_efi_boot_memmap(ms, systab, p, start);
p += ROUND_UP(sizeof(struct efi_boot_memmap) +
- sizeof(efi_memory_desc_t) * memmap_entries, 64 * KiB);
- init_efi_initrd_table(systab, p, start);
+ sizeof(efi_memory_desc_t) * lvms->memmap_entries, 64 * KiB);
+ init_efi_initrd_table(info, systab, p, start);
p += ROUND_UP(sizeof(struct efi_initrd), 64 * KiB);
init_efi_fdt_table(systab);
@@ -276,7 +305,7 @@ static ram_addr_t alloc_initrd_memory(struct loongarch_boot_info *info,
static int64_t load_kernel_info(struct loongarch_boot_info *info)
{
- uint64_t kernel_entry, kernel_low, kernel_high;
+ uint64_t kernel_entry, kernel_low, kernel_high, initrd_offset = 0;
ssize_t kernel_size;
kernel_size = load_elf(info->kernel_filename, NULL,
@@ -299,7 +328,8 @@ static int64_t load_kernel_info(struct loongarch_boot_info *info)
}
if (info->initrd_filename) {
- initrd_size = get_image_size(info->initrd_filename);
+ ssize_t initrd_size = get_image_size(info->initrd_filename);
+
if (initrd_size > 0) {
initrd_offset = ROUND_UP(kernel_high + 4 * kernel_size, 64 * KiB);
initrd_offset = alloc_initrd_memory(info, initrd_offset,
@@ -308,34 +338,19 @@ static int64_t load_kernel_info(struct loongarch_boot_info *info)
initrd_offset, initrd_size);
}
- if (initrd_size == (target_ulong)-1) {
+ if (initrd_size == -1) {
error_report("could not load initial ram disk '%s'",
info->initrd_filename);
exit(1);
}
- } else {
- initrd_size = 0;
+
+ info->initrd_addr = initrd_offset;
+ info->initrd_size = initrd_size;
}
return kernel_entry;
}
-static void reset_load_elf(void *opaque)
-{
- LoongArchCPU *cpu = opaque;
- CPULoongArchState *env = &cpu->env;
-
- cpu_reset(CPU(cpu));
- if (env->load_elf) {
- if (cpu == LOONGARCH_CPU(first_cpu)) {
- env->gpr[4] = env->boot_info->a0;
- env->gpr[5] = env->boot_info->a1;
- env->gpr[6] = env->boot_info->a2;
- }
- cpu_set_pc(CPU(cpu), env->elf_address);
- }
-}
-
static void fw_cfg_add_kernel_info(struct loongarch_boot_info *info,
FWCfgState *fw_cfg)
{
@@ -369,22 +384,23 @@ static void loongarch_firmware_boot(LoongArchVirtMachineState *lvms,
fw_cfg_add_kernel_info(info, lvms->fw_cfg);
}
-static void init_boot_rom(struct loongarch_boot_info *info, void *p)
+static void init_boot_rom(MachineState *ms,
+ struct loongarch_boot_info *info, void *p)
{
void *start = p;
init_cmdline(info, p, start);
p += COMMAND_LINE_SIZE;
- init_systab(info, p, start);
+ init_systab(ms, info, p, start);
}
-static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info)
+static void loongarch_direct_kernel_boot(MachineState *ms,
+ struct loongarch_boot_info *info)
{
void *p, *bp;
int64_t kernel_addr = VIRT_FLASH0_BASE;
- LoongArchCPU *lacpu;
- CPUState *cs;
+ uint64_t *data;
if (info->kernel_filename) {
kernel_addr = load_kernel_info(info);
@@ -397,25 +413,19 @@ static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info)
/* Load cmdline and system tables at [0 - 1 MiB] */
p = g_malloc0(1 * MiB);
bp = p;
- init_boot_rom(info, p);
+ init_boot_rom(ms, info, p);
rom_add_blob_fixed_as("boot_info", bp, 1 * MiB, 0, &address_space_memory);
/* Load slave boot code at pflash0 . */
void *boot_code = g_malloc0(VIRT_FLASH0_SIZE);
- memcpy(boot_code, &slave_boot_code, sizeof(slave_boot_code));
+ memcpy(boot_code, &aux_boot_code, sizeof(aux_boot_code));
+ data = (uint64_t *)(boot_code + sizeof(aux_boot_code));
+ *(data - 4) = cpu_to_le64(info->a0);
+ *(data - 3) = cpu_to_le64(info->a1);
+ *(data - 2) = cpu_to_le64(info->a2);
+ *(data - 1) = cpu_to_le64(kernel_addr);
rom_add_blob_fixed("boot_code", boot_code, VIRT_FLASH0_SIZE, VIRT_FLASH0_BASE);
- CPU_FOREACH(cs) {
- lacpu = LOONGARCH_CPU(cs);
- lacpu->env.load_elf = true;
- if (cs == first_cpu) {
- lacpu->env.elf_address = kernel_addr;
- } else {
- lacpu->env.elf_address = VIRT_FLASH0_BASE;
- }
- lacpu->env.boot_info = info;
- }
-
g_free(boot_code);
g_free(bp);
}
@@ -423,12 +433,6 @@ static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info)
void loongarch_load_kernel(MachineState *ms, struct loongarch_boot_info *info)
{
LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(ms);
- int i;
-
- /* register reset function */
- for (i = 0; i < ms->smp.cpus; i++) {
- qemu_register_reset(reset_load_elf, LOONGARCH_CPU(qemu_get_cpu(i)));
- }
info->kernel_filename = ms->kernel_filename;
info->kernel_cmdline = ms->kernel_cmdline;
@@ -437,6 +441,6 @@ void loongarch_load_kernel(MachineState *ms, struct loongarch_boot_info *info)
if (lvms->bios_loaded) {
loongarch_firmware_boot(lvms, info);
} else {
- loongarch_direct_kernel_boot(info);
+ loongarch_direct_kernel_boot(ms, info);
}
}
diff --git a/hw/loongarch/virt-acpi-build.c b/hw/loongarch/virt-acpi-build.c
index 073b6de..8c2228a 100644
--- a/hw/loongarch/virt-acpi-build.c
+++ b/hw/loongarch/virt-acpi-build.c
@@ -557,7 +557,9 @@ static void acpi_build(AcpiBuildTables *tables, MachineState *machine)
acpi_add_table(table_offsets, tables_blob);
build_srat(tables_blob, tables->linker, machine);
acpi_add_table(table_offsets, tables_blob);
- spcr_setup(tables_blob, tables->linker, machine);
+
+ if (machine->acpi_spcr_enabled)
+ spcr_setup(tables_blob, tables->linker, machine);
if (machine->numa_state->num_nodes) {
if (machine->numa_state->have_numa_distance) {
@@ -575,8 +577,8 @@ static void acpi_build(AcpiBuildTables *tables, MachineState *machine)
acpi_add_table(table_offsets, tables_blob);
{
AcpiMcfgInfo mcfg = {
- .base = cpu_to_le64(VIRT_PCI_CFG_BASE),
- .size = cpu_to_le64(VIRT_PCI_CFG_SIZE),
+ .base = VIRT_PCI_CFG_BASE,
+ .size = VIRT_PCI_CFG_SIZE,
};
build_mcfg(tables_blob, tables->linker, &mcfg, lvms->oem_id,
lvms->oem_table_id);
diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index 1b50404..c176042 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -28,6 +28,7 @@
#include "hw/intc/loongarch_extioi.h"
#include "hw/intc/loongarch_pch_pic.h"
#include "hw/intc/loongarch_pch_msi.h"
+#include "hw/intc/loongarch_dintc.h"
#include "hw/pci-host/ls7a.h"
#include "hw/pci-host/gpex.h"
#include "hw/misc/unimp.h"
@@ -46,6 +47,31 @@
#include "hw/block/flash.h"
#include "hw/virtio/virtio-iommu.h"
#include "qemu/error-report.h"
+#include "kvm/kvm_loongarch.h"
+
+static void virt_get_dmsi(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(obj);
+ OnOffAuto dmsi = lvms->dmsi;
+
+ visit_type_OnOffAuto(v, name, &dmsi, errp);
+
+}
+static void virt_set_dmsi(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(obj);
+
+ visit_type_OnOffAuto(v, name, &lvms->dmsi, errp);
+
+ if (lvms->dmsi == ON_OFF_AUTO_OFF) {
+ lvms->misc_feature &= ~BIT(IOCSRF_DMSI);
+ lvms->misc_status &= ~BIT_ULL(IOCSRM_DMSI_EN);
+ } else if (lvms->dmsi == ON_OFF_AUTO_ON) {
+ lvms->misc_feature = BIT(IOCSRF_DMSI);
+ }
+}
static void virt_get_veiointc(Object *obj, Visitor *v, const char *name,
void *opaque, Error **errp)
@@ -136,6 +162,10 @@ static void virt_build_smbios(LoongArchVirtMachineState *lvms)
return;
}
+ if (kvm_enabled()) {
+ product = "KVM Virtual Machine";
+ }
+
smbios_set_defaults("QEMU", product, mc->name);
smbios_get_tables(ms, SMBIOS_ENTRY_POINT_TYPE_64,
@@ -168,8 +198,15 @@ static void virt_powerdown_req(Notifier *notifier, void *opaque)
acpi_send_event(s->acpi_ged, ACPI_POWER_DOWN_STATUS);
}
-static void memmap_add_entry(uint64_t address, uint64_t length, uint32_t type)
+static void memmap_add_entry(MachineState *ms, uint64_t address,
+ uint64_t length, uint32_t type)
{
+ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(ms);
+ struct memmap_entry *memmap_table;
+ unsigned int memmap_entries;
+
+ memmap_table = lvms->memmap_table;
+ memmap_entries = lvms->memmap_entries;
/* Ensure there are no duplicate entries. */
for (unsigned i = 0; i < memmap_entries; i++) {
assert(memmap_table[i].address != address);
@@ -182,6 +219,8 @@ static void memmap_add_entry(uint64_t address, uint64_t length, uint32_t type)
memmap_table[memmap_entries].type = cpu_to_le32(type);
memmap_table[memmap_entries].reserved = 0;
memmap_entries++;
+ lvms->memmap_table = memmap_table;
+ lvms->memmap_entries = memmap_entries;
}
static DeviceState *create_acpi_ged(DeviceState *pch_pic,
@@ -342,13 +381,17 @@ static void virt_cpu_irq_init(LoongArchVirtMachineState *lvms)
&error_abort);
hotplug_handler_plug(HOTPLUG_HANDLER(lvms->extioi), DEVICE(cs),
&error_abort);
+ if (lvms->dintc) {
+ hotplug_handler_plug(HOTPLUG_HANDLER(lvms->dintc), DEVICE(cs),
+ &error_abort);
+ }
}
}
static void virt_irq_init(LoongArchVirtMachineState *lvms)
{
DeviceState *pch_pic, *pch_msi;
- DeviceState *ipi, *extioi;
+ DeviceState *ipi, *extioi, *dintc;
SysBusDevice *d;
int i, start, num;
@@ -394,6 +437,33 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms)
* +--------+ +---------+ +---------+
* | UARTs | | Devices | | Devices |
* +--------+ +---------+ +---------+
+ *
+ *
+ * Advanced Extended IRQ model
+ *
+ * +-----+ +---------------------------------+ +-------+
+ * | IPI | --> | CPUINTC | <-- | Timer |
+ * +-----+ +---------------------------------+ +-------+
+ * ^ ^ ^
+ * | | |
+ * +-------------+ +----------+ +---------+ +-------+
+ * | EIOINTC | | DINTC | | LIOINTC | <-- | UARTs |
+ * +-------------+ +----------+ +---------+ +-------+
+ * ^ ^ ^
+ * | | |
+ * +---------+ +---------+ |
+ * | PCH-PIC | | PCH-MSI | |
+ * +---------+ +---------+ |
+ * ^ ^ ^ |
+ * | | | |
+ * +---------+ +---------+ +---------+
+ * | Devices | | PCH-LPC | | Devices |
+ * +---------+ +---------+ +---------+
+ * ^
+ * |
+ * +---------+
+ * | Devices |
+ * +---------+
*/
/* Create IPI device */
@@ -401,11 +471,13 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms)
lvms->ipi = ipi;
sysbus_realize_and_unref(SYS_BUS_DEVICE(ipi), &error_fatal);
- /* IPI iocsr memory region */
- memory_region_add_subregion(&lvms->system_iocsr, SMP_IPI_MAILBOX,
- sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 0));
- memory_region_add_subregion(&lvms->system_iocsr, MAIL_SEND_ADDR,
- sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 1));
+ /* Create DINTC device*/
+ if (virt_has_dmsi(lvms)) {
+ dintc = qdev_new(TYPE_LOONGARCH_DINTC);
+ lvms->dintc = dintc;
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dintc), &error_fatal);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dintc), 0, VIRT_DINTC_BASE);
+ }
/* Create EXTIOI device */
extioi = qdev_new(TYPE_LOONGARCH_EXTIOI);
@@ -414,12 +486,6 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms)
qdev_prop_set_bit(extioi, "has-virtualization-extension", true);
}
sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), &error_fatal);
- memory_region_add_subregion(&lvms->system_iocsr, APIC_BASE,
- sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 0));
- if (virt_is_veiointc_enabled(lvms)) {
- memory_region_add_subregion(&lvms->system_iocsr, EXTIOI_VIRT_BASE,
- sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 1));
- }
virt_cpu_irq_init(lvms);
pch_pic = qdev_new(TYPE_LOONGARCH_PIC);
@@ -427,13 +493,6 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms)
qdev_prop_set_uint32(pch_pic, "pch_pic_irq_num", num);
d = SYS_BUS_DEVICE(pch_pic);
sysbus_realize_and_unref(d, &error_fatal);
- memory_region_add_subregion(get_system_memory(), VIRT_IOAPIC_REG_BASE,
- sysbus_mmio_get_region(d, 0));
-
- /* Connect pch_pic irqs to extioi */
- for (i = 0; i < num; i++) {
- qdev_connect_gpio_out(DEVICE(d), i, qdev_get_gpio_in(extioi, i));
- }
pch_msi = qdev_new(TYPE_LOONGARCH_PCH_MSI);
start = num;
@@ -443,12 +502,40 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms)
d = SYS_BUS_DEVICE(pch_msi);
sysbus_realize_and_unref(d, &error_fatal);
sysbus_mmio_map(d, 0, VIRT_PCH_MSI_ADDR_LOW);
- for (i = 0; i < num; i++) {
- /* Connect pch_msi irqs to extioi */
- qdev_connect_gpio_out(DEVICE(d), i,
- qdev_get_gpio_in(extioi, i + start));
- }
+ if (kvm_irqchip_in_kernel()) {
+ kvm_loongarch_init_irq_routing();
+ } else {
+ /* IPI iocsr memory region */
+ memory_region_add_subregion(&lvms->system_iocsr, SMP_IPI_MAILBOX,
+ sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 0));
+ memory_region_add_subregion(&lvms->system_iocsr, MAIL_SEND_ADDR,
+ sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 1));
+
+ /* EXTIOI iocsr memory region */
+ memory_region_add_subregion(&lvms->system_iocsr, APIC_BASE,
+ sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 0));
+ if (virt_is_veiointc_enabled(lvms)) {
+ memory_region_add_subregion(&lvms->system_iocsr, EXTIOI_VIRT_BASE,
+ sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 1));
+ }
+
+ /* PCH_PIC memory region */
+ memory_region_add_subregion(get_system_memory(), VIRT_IOAPIC_REG_BASE,
+ sysbus_mmio_get_region(SYS_BUS_DEVICE(pch_pic), 0));
+
+ /* Connect pch_pic irqs to extioi */
+ for (i = 0; i < VIRT_PCH_PIC_IRQ_NUM; i++) {
+ qdev_connect_gpio_out(DEVICE(pch_pic), i,
+ qdev_get_gpio_in(extioi, i));
+ }
+
+ for (i = VIRT_PCH_PIC_IRQ_NUM; i < EXTIOI_IRQS; i++) {
+ /* Connect pch_msi irqs to extioi */
+ qdev_connect_gpio_out(DEVICE(pch_msi), i - VIRT_PCH_PIC_IRQ_NUM,
+ qdev_get_gpio_in(extioi, i));
+ }
+ }
virt_devices_init(pch_pic, lvms);
}
@@ -509,10 +596,18 @@ static MemTxResult virt_iocsr_misc_write(void *opaque, hwaddr addr,
switch (addr) {
case MISC_FUNC_REG:
+ if (kvm_irqchip_in_kernel()) {
+ return MEMTX_OK;
+ }
+
if (!virt_is_veiointc_enabled(lvms)) {
return MEMTX_OK;
}
+ if (virt_has_dmsi(lvms) && val & BIT_ULL(IOCSRM_DMSI_EN)) {
+ lvms->misc_status |= BIT_ULL(IOCSRM_DMSI_EN);
+ }
+
features = address_space_ldl(&lvms->as_iocsr,
EXTIOI_VIRT_BASE + EXTIOI_VIRT_CONFIG,
attrs, NULL);
@@ -548,6 +643,9 @@ static MemTxResult virt_iocsr_misc_read(void *opaque, hwaddr addr,
break;
case FEATURE_REG:
ret = BIT(IOCSRF_MSI) | BIT(IOCSRF_EXTIOI) | BIT(IOCSRF_CSRIPI);
+ if (virt_has_dmsi(lvms)) {
+ ret |= BIT(IOCSRF_DMSI);
+ }
if (kvm_enabled()) {
ret |= BIT(IOCSRF_VM);
}
@@ -559,6 +657,10 @@ static MemTxResult virt_iocsr_misc_read(void *opaque, hwaddr addr,
ret = 0x303030354133ULL; /* "3A5000" */
break;
case MISC_FUNC_REG:
+ if (kvm_irqchip_in_kernel()) {
+ return MEMTX_OK;
+ }
+
if (!virt_is_veiointc_enabled(lvms)) {
ret |= BIT_ULL(IOCSRM_EXTIOI_EN);
break;
@@ -573,6 +675,10 @@ static MemTxResult virt_iocsr_misc_read(void *opaque, hwaddr addr,
if (features & BIT(EXTIOI_ENABLE_INT_ENCODE)) {
ret |= BIT_ULL(IOCSRM_EXTIOI_INT_ENCODE);
}
+ if (virt_has_dmsi(lvms) &&
+ (lvms->misc_status & BIT_ULL(IOCSRM_DMSI_EN))) {
+ ret |= BIT_ULL(IOCSRM_DMSI_EN);
+ }
break;
default:
g_assert_not_reached();
@@ -619,13 +725,13 @@ static void fw_cfg_add_memory(MachineState *ms)
}
if (size >= gap) {
- memmap_add_entry(base, gap, 1);
+ memmap_add_entry(ms, base, gap, 1);
size -= gap;
base = VIRT_HIGHMEM_BASE;
}
if (size) {
- memmap_add_entry(base, size, 1);
+ memmap_add_entry(ms, base, size, 1);
base += size;
}
@@ -640,7 +746,7 @@ static void fw_cfg_add_memory(MachineState *ms)
* lowram: [base, +(gap - numa_info[0].node_mem))
* highram: [VIRT_HIGHMEM_BASE, +(ram_size - gap))
*/
- memmap_add_entry(base, gap - numa_info[0].node_mem, 1);
+ memmap_add_entry(ms, base, gap - numa_info[0].node_mem, 1);
size = ram_size - gap;
base = VIRT_HIGHMEM_BASE;
} else {
@@ -648,7 +754,26 @@ static void fw_cfg_add_memory(MachineState *ms)
}
if (size) {
- memmap_add_entry(base, size, 1);
+ memmap_add_entry(ms, base, size, 1);
+ }
+}
+
+static void virt_check_dmsi(MachineState *machine)
+{
+ LoongArchCPU *cpu;
+ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(machine);
+
+ cpu = LOONGARCH_CPU(first_cpu);
+ if (lvms->dmsi == ON_OFF_AUTO_AUTO) {
+ if (cpu->msgint != ON_OFF_AUTO_OFF) {
+ lvms->misc_feature = BIT(IOCSRF_DMSI);
+ }
+ }
+
+ if (lvms->dmsi == ON_OFF_AUTO_ON && cpu->msgint == ON_OFF_AUTO_OFF) {
+ error_report("Fail to enable dmsi , cpu msgint is off "
+ "pleass add cpu feature mesgint=on.");
+ exit(EXIT_FAILURE);
}
}
@@ -686,6 +811,7 @@ static void virt_init(MachineState *machine)
}
qdev_realize_and_unref(DEVICE(cpuobj), NULL, &error_fatal);
}
+ virt_check_dmsi(machine);
fw_cfg_add_memory(machine);
/* Node0 memory */
@@ -734,8 +860,8 @@ static void virt_init(MachineState *machine)
rom_set_fw(lvms->fw_cfg);
if (lvms->fw_cfg != NULL) {
fw_cfg_add_file(lvms->fw_cfg, "etc/memmap",
- memmap_table,
- sizeof(struct memmap_entry) * (memmap_entries));
+ lvms->memmap_table,
+ sizeof(struct memmap_entry) * lvms->memmap_entries);
}
/* Initialize the IO interrupt subsystem */
@@ -816,6 +942,8 @@ static void virt_initfn(Object *obj)
if (tcg_enabled()) {
lvms->veiointc = ON_OFF_AUTO_OFF;
}
+
+ lvms->dmsi = ON_OFF_AUTO_AUTO;
lvms->acpi = ON_OFF_AUTO_AUTO;
lvms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
lvms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
@@ -979,10 +1107,14 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev,
/* Notify ipi and extioi irqchip to remove interrupt routing to CPU */
hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->ipi), dev, &error_abort);
hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->extioi), dev, &error_abort);
+ if (lvms->dintc) {
+ hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->dintc), dev, &error_abort);
+ }
/* Notify acpi ged CPU removed */
hotplug_handler_unplug(HOTPLUG_HANDLER(lvms->acpi_ged), dev, &error_abort);
+ qemu_unregister_resettable(OBJECT(dev));
cpu_slot = virt_find_cpu_slot(MACHINE(lvms), cpu->phy_id);
cpu_slot->cpu = NULL;
}
@@ -1002,11 +1134,16 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev,
hotplug_handler_plug(HOTPLUG_HANDLER(lvms->extioi), dev, &error_abort);
}
+ if (lvms->dintc) {
+ hotplug_handler_plug(HOTPLUG_HANDLER(lvms->dintc), dev, &error_abort);
+ }
+
if (lvms->acpi_ged) {
hotplug_handler_plug(HOTPLUG_HANDLER(lvms->acpi_ged), dev,
&error_abort);
}
+ qemu_register_resettable(OBJECT(dev));
cpu_slot = virt_find_cpu_slot(MACHINE(lvms), cpu->phy_id);
cpu_slot->cpu = CPU(dev);
}
@@ -1208,6 +1345,10 @@ static void virt_class_init(ObjectClass *oc, const void *data)
NULL, NULL);
object_class_property_set_description(oc, "v-eiointc",
"Enable Virt Extend I/O Interrupt Controller.");
+ object_class_property_add(oc, "dmsi", "OnOffAuto",
+ virt_get_dmsi, virt_set_dmsi, NULL, NULL);
+ object_class_property_set_description(oc, "dmsi",
+ "Enable direct Message-interrupts Controller.");
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
machine_class_allow_dynamic_sysbus_dev(mc, TYPE_UEFI_VARS_SYSBUS);
#ifdef CONFIG_TPM
diff --git a/hw/m68k/virt.c b/hw/m68k/virt.c
index 875fd00..98cfe43 100644
--- a/hw/m68k/virt.c
+++ b/hw/m68k/virt.c
@@ -367,10 +367,17 @@ type_init(virt_machine_register_types)
#define DEFINE_VIRT_MACHINE(major, minor) \
DEFINE_VIRT_MACHINE_IMPL(false, major, minor)
+static void virt_machine_10_2_options(MachineClass *mc)
+{
+}
+DEFINE_VIRT_MACHINE_AS_LATEST(10, 2)
+
static void virt_machine_10_1_options(MachineClass *mc)
{
+ virt_machine_10_2_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_10_1, hw_compat_10_1_len);
}
-DEFINE_VIRT_MACHINE_AS_LATEST(10, 1)
+DEFINE_VIRT_MACHINE(10, 1)
static void virt_machine_10_0_options(MachineClass *mc)
{
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 94e7274..be609ff 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -8,6 +8,7 @@
*
* SPDX-License-Identifier: GPL-v2-only
*/
+#include <math.h>
#include "qemu/osdep.h"
#include "qemu/units.h"
@@ -225,10 +226,16 @@ static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv)
* future.
*/
for (i = 0; i < ct3d->dc.num_regions; i++) {
+ ct3d->dc.regions[i].nonvolatile = false;
+ ct3d->dc.regions[i].sharable = false;
+ ct3d->dc.regions[i].hw_managed_coherency = false;
+ ct3d->dc.regions[i].ic_specific_dc_management = false;
+ ct3d->dc.regions[i].rdonly = false;
ct3_build_cdat_entries_for_mr(&(table[cur_ent]),
dsmad_handle++,
ct3d->dc.regions[i].len,
- false, true, region_base);
+ ct3d->dc.regions[i].nonvolatile,
+ true, region_base);
ct3d->dc.regions[i].dsmadhandle = dsmad_handle - 1;
cur_ent += CT3_CDAT_NUM_ENTRIES;
@@ -634,6 +641,8 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
uint64_t region_len;
uint64_t decode_len;
uint64_t blk_size = 2 * MiB;
+ /* Only 1 block size is supported for now. */
+ uint64_t supported_blk_size_bitmask = blk_size;
CXLDCRegion *region;
MemoryRegion *mr;
uint64_t dc_size;
@@ -679,9 +688,11 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
.block_size = blk_size,
/* dsmad_handle set when creating CDAT table entries */
.flags = 0,
+ .supported_blk_size_bitmask = supported_blk_size_bitmask,
};
ct3d->dc.total_capacity += region->len;
region->blk_bitmap = bitmap_new(region->len / region->block_size);
+ qemu_mutex_init(&region->bitmap_lock);
}
QTAILQ_INIT(&ct3d->dc.extents);
QTAILQ_INIT(&ct3d->dc.extents_pending);
@@ -1010,6 +1021,7 @@ void ct3_set_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
return;
}
+ QEMU_LOCK_GUARD(&region->bitmap_lock);
bitmap_set(region->blk_bitmap, (dpa - region->base) / region->block_size,
len / region->block_size);
}
@@ -1036,6 +1048,7 @@ bool ct3_test_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
* if bits between [dpa, dpa + len) are all 1s, meaning the DPA range is
* backed with DC extents, return true; else return false.
*/
+ QEMU_LOCK_GUARD(&region->bitmap_lock);
return find_next_zero_bit(region->blk_bitmap, nr + nbits, nr) == nr + nbits;
}
@@ -1057,6 +1070,7 @@ void ct3_clear_region_block_backed(CXLType3Dev *ct3d, uint64_t dpa,
nr = (dpa - region->base) / region->block_size;
nbits = len / region->block_size;
+ QEMU_LOCK_GUARD(&region->bitmap_lock);
bitmap_clear(region->blk_bitmap, nr, nbits);
}
@@ -1576,9 +1590,9 @@ void qmp_cxl_inject_correctable_error(const char *path, CxlCorErrorType type,
pcie_aer_inject_error(PCI_DEVICE(obj), &err);
}
-static void cxl_assign_event_header(CXLEventRecordHdr *hdr,
- const QemuUUID *uuid, uint32_t flags,
- uint8_t length, uint64_t timestamp)
+void cxl_assign_event_header(CXLEventRecordHdr *hdr,
+ const QemuUUID *uuid, uint32_t flags,
+ uint8_t length, uint64_t timestamp)
{
st24_le_p(&hdr->flags, flags);
hdr->length = length;
@@ -1866,28 +1880,13 @@ void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
}
}
-/* CXL r3.1 Table 8-50: Dynamic Capacity Event Record */
-static const QemuUUID dynamic_capacity_uuid = {
- .data = UUID(0xca95afa7, 0xf183, 0x4018, 0x8c, 0x2f,
- 0x95, 0x26, 0x8e, 0x10, 0x1a, 0x2a),
-};
-
-typedef enum CXLDCEventType {
- DC_EVENT_ADD_CAPACITY = 0x0,
- DC_EVENT_RELEASE_CAPACITY = 0x1,
- DC_EVENT_FORCED_RELEASE_CAPACITY = 0x2,
- DC_EVENT_REGION_CONFIG_UPDATED = 0x3,
- DC_EVENT_ADD_CAPACITY_RSP = 0x4,
- DC_EVENT_CAPACITY_RELEASED = 0x5,
-} CXLDCEventType;
-
/*
* Check whether the range [dpa, dpa + len - 1] has overlaps with extents in
* the list.
* Return value: return true if has overlaps; otherwise, return false
*/
-static bool cxl_extents_overlaps_dpa_range(CXLDCExtentList *list,
- uint64_t dpa, uint64_t len)
+bool cxl_extents_overlaps_dpa_range(CXLDCExtentList *list,
+ uint64_t dpa, uint64_t len)
{
CXLDCExtent *ent;
Range range1, range2;
@@ -1932,8 +1931,8 @@ bool cxl_extents_contains_dpa_range(CXLDCExtentList *list,
return false;
}
-static bool cxl_extent_groups_overlaps_dpa_range(CXLDCExtentGroupList *list,
- uint64_t dpa, uint64_t len)
+bool cxl_extent_groups_overlaps_dpa_range(CXLDCExtentGroupList *list,
+ uint64_t dpa, uint64_t len)
{
CXLDCExtentGroup *group;
@@ -1958,15 +1957,11 @@ static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path,
CxlDynamicCapacityExtentList *records, Error **errp)
{
Object *obj;
- CXLEventDynamicCapacity dCap = {};
- CXLEventRecordHdr *hdr = &dCap.hdr;
CXLType3Dev *dcd;
- uint8_t flags = 1 << CXL_EVENT_TYPE_INFO;
uint32_t num_extents = 0;
CxlDynamicCapacityExtentList *list;
CXLDCExtentGroup *group = NULL;
g_autofree CXLDCExtentRaw *extents = NULL;
- uint8_t enc_log = CXL_EVENT_TYPE_DYNAMIC_CAP;
uint64_t dpa, offset, len, block_size;
g_autofree unsigned long *blk_bitmap = NULL;
int i;
@@ -2076,40 +2071,10 @@ static void qmp_cxl_process_dynamic_capacity_prescriptive(const char *path,
}
if (group) {
cxl_extent_group_list_insert_tail(&dcd->dc.extents_pending, group);
+ dcd->dc.total_extent_count += num_extents;
}
- /*
- * CXL r3.1 section 8.2.9.2.1.6: Dynamic Capacity Event Record
- *
- * All Dynamic Capacity event records shall set the Event Record Severity
- * field in the Common Event Record Format to Informational Event. All
- * Dynamic Capacity related events shall be logged in the Dynamic Capacity
- * Event Log.
- */
- cxl_assign_event_header(hdr, &dynamic_capacity_uuid, flags, sizeof(dCap),
- cxl_device_get_timestamp(&dcd->cxl_dstate));
-
- dCap.type = type;
- /* FIXME: for now, validity flag is cleared */
- dCap.validity_flags = 0;
- stw_le_p(&dCap.host_id, hid);
- /* only valid for DC_REGION_CONFIG_UPDATED event */
- dCap.updated_region_id = 0;
- for (i = 0; i < num_extents; i++) {
- memcpy(&dCap.dynamic_capacity_extent, &extents[i],
- sizeof(CXLDCExtentRaw));
-
- dCap.flags = 0;
- if (i < num_extents - 1) {
- /* Set "More" flag */
- dCap.flags |= BIT(0);
- }
-
- if (cxl_event_insert(&dcd->cxl_dstate, enc_log,
- (CXLEventRecordRaw *)&dCap)) {
- cxl_event_irq_assert(dcd);
- }
- }
+ cxl_create_dc_event_records_for_extents(dcd, type, extents, num_extents);
}
void qmp_cxl_add_dynamic_capacity(const char *path, uint16_t host_id,
diff --git a/hw/meson.build b/hw/meson.build
index b91f761..1022bdb 100644
--- a/hw/meson.build
+++ b/hw/meson.build
@@ -1,3 +1,26 @@
+# Enter target code first to reuse variables associated
+subdir('alpha')
+subdir('arm')
+subdir('avr')
+subdir('hppa')
+subdir('xenpv') # i386 uses it
+subdir('i386')
+subdir('loongarch')
+subdir('m68k')
+subdir('microblaze')
+subdir('mips')
+subdir('openrisc')
+subdir('ppc')
+subdir('remote')
+subdir('riscv')
+subdir('rx')
+subdir('s390x')
+subdir('sh4')
+subdir('sparc')
+subdir('sparc64')
+subdir('tricore')
+subdir('xtensa')
+
subdir('9pfs')
subdir('acpi')
subdir('adc')
@@ -39,30 +62,9 @@ subdir('uefi')
subdir('ufs')
subdir('usb')
subdir('vfio')
+subdir('vfio-user')
subdir('virtio')
subdir('vmapple')
subdir('watchdog')
subdir('xen')
-subdir('xenpv')
subdir('fsi')
-
-subdir('alpha')
-subdir('arm')
-subdir('avr')
-subdir('hppa')
-subdir('i386')
-subdir('loongarch')
-subdir('m68k')
-subdir('microblaze')
-subdir('mips')
-subdir('openrisc')
-subdir('ppc')
-subdir('remote')
-subdir('riscv')
-subdir('rx')
-subdir('s390x')
-subdir('sh4')
-subdir('sparc')
-subdir('sparc64')
-subdir('tricore')
-subdir('xtensa')
diff --git a/hw/microblaze/Kconfig b/hw/microblaze/Kconfig
index b0214b2..72d8072 100644
--- a/hw/microblaze/Kconfig
+++ b/hw/microblaze/Kconfig
@@ -1,7 +1,7 @@
config PETALOGIX_S3ADSP1800
bool
default y
- depends on MICROBLAZE
+ depends on MICROBLAZE && FDT
select PFLASH_CFI01
select XILINX
select XILINX_AXI
@@ -11,7 +11,7 @@ config PETALOGIX_S3ADSP1800
config PETALOGIX_ML605
bool
default y
- depends on MICROBLAZE
+ depends on MICROBLAZE && FDT
select PFLASH_CFI01
select SERIAL_MM
select SSI_M25P80
diff --git a/hw/microblaze/petalogix_ml605_mmu.c b/hw/microblaze/petalogix_ml605_mmu.c
index bea6b68..6e923c4 100644
--- a/hw/microblaze/petalogix_ml605_mmu.c
+++ b/hw/microblaze/petalogix_ml605_mmu.c
@@ -80,8 +80,6 @@ petalogix_ml605_init(MachineState *machine)
MemoryRegion *phys_lmb_bram = g_new(MemoryRegion, 1);
MemoryRegion *phys_ram = g_new(MemoryRegion, 1);
qemu_irq irq[32];
- EndianMode endianness = TARGET_BIG_ENDIAN ? ENDIAN_MODE_BIG
- : ENDIAN_MODE_LITTLE;
/* init CPUs */
cpu = MICROBLAZE_CPU(object_new(TYPE_MICROBLAZE_CPU));
@@ -113,7 +111,7 @@ petalogix_ml605_init(MachineState *machine)
dev = qdev_new("xlnx.xps-intc");
- qdev_prop_set_enum(dev, "endianness", endianness);
+ qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
qdev_prop_set_uint32(dev, "kind-of-intr", 1 << TIMER_IRQ);
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, INTC_BASEADDR);
@@ -129,7 +127,7 @@ petalogix_ml605_init(MachineState *machine)
/* 2 timers at irq 2 @ 100 Mhz. */
dev = qdev_new("xlnx.xps-timer");
- qdev_prop_set_enum(dev, "endianness", endianness);
+ qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
qdev_prop_set_uint32(dev, "one-timer-only", 0);
qdev_prop_set_uint32(dev, "clock-frequency", 100 * 1000000);
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
@@ -177,7 +175,7 @@ petalogix_ml605_init(MachineState *machine)
SSIBus *spi;
dev = qdev_new("xlnx.xps-spi");
- qdev_prop_set_enum(dev, "endianness", endianness);
+ qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
qdev_prop_set_uint8(dev, "num-ss-bits", NUM_SPI_FLASHES);
busdev = SYS_BUS_DEVICE(dev);
sysbus_realize_and_unref(busdev, &error_fatal);
@@ -218,12 +216,7 @@ petalogix_ml605_init(MachineState *machine)
static void petalogix_ml605_machine_init(MachineClass *mc)
{
- if (TARGET_BIG_ENDIAN) {
- mc->desc = "PetaLogix linux refdesign for xilinx ml605 (big endian)";
- mc->deprecation_reason = "big endian support is not tested";
- } else {
- mc->desc = "PetaLogix linux refdesign for xilinx ml605 (little endian)";
- }
+ mc->desc = "PetaLogix linux refdesign for xilinx ml605 (little endian)";
mc->init = petalogix_ml605_init;
}
diff --git a/hw/microblaze/petalogix_s3adsp1800_mmu.c b/hw/microblaze/petalogix_s3adsp1800_mmu.c
index 032f6f7..e8d0ddf 100644
--- a/hw/microblaze/petalogix_s3adsp1800_mmu.c
+++ b/hw/microblaze/petalogix_s3adsp1800_mmu.c
@@ -58,9 +58,20 @@
#define TYPE_PETALOGIX_S3ADSP1800_MACHINE \
MACHINE_TYPE_NAME("petalogix-s3adsp1800")
+struct S3Adsp1800MachineState {
+ MachineState parent_class;
+
+ EndianMode endianness;
+};
+
+OBJECT_DECLARE_TYPE(S3Adsp1800MachineState, MachineClass,
+ PETALOGIX_S3ADSP1800_MACHINE)
+
+
static void
petalogix_s3adsp1800_init(MachineState *machine)
{
+ S3Adsp1800MachineState *psms = PETALOGIX_S3ADSP1800_MACHINE(machine);
ram_addr_t ram_size = machine->ram_size;
DeviceState *dev;
MicroBlazeCPU *cpu;
@@ -71,13 +82,12 @@ petalogix_s3adsp1800_init(MachineState *machine)
MemoryRegion *phys_ram = g_new(MemoryRegion, 1);
qemu_irq irq[32];
MemoryRegion *sysmem = get_system_memory();
- EndianMode endianness = TARGET_BIG_ENDIAN ? ENDIAN_MODE_BIG
- : ENDIAN_MODE_LITTLE;
+ EndianMode endianness = psms->endianness;
cpu = MICROBLAZE_CPU(object_new(TYPE_MICROBLAZE_CPU));
object_property_set_str(OBJECT(cpu), "version", "7.10.d", &error_abort);
object_property_set_bool(OBJECT(cpu), "little-endian",
- !TARGET_BIG_ENDIAN, &error_abort);
+ endianness == ENDIAN_MODE_LITTLE, &error_abort);
qdev_realize(DEVICE(cpu), NULL, &error_abort);
/* Attach emulated BRAM through the LMB. */
@@ -135,20 +145,41 @@ petalogix_s3adsp1800_init(MachineState *machine)
create_unimplemented_device("xps_gpio", GPIO_BASEADDR, 0x10000);
- microblaze_load_kernel(cpu, !TARGET_BIG_ENDIAN, ddr_base, ram_size,
- machine->initrd_filename,
+ microblaze_load_kernel(cpu, endianness == ENDIAN_MODE_LITTLE, ddr_base,
+ ram_size, machine->initrd_filename,
BINARY_DEVICE_TREE_FILE,
NULL);
}
+static int machine_get_endianness(Object *obj, Error **errp G_GNUC_UNUSED)
+{
+ S3Adsp1800MachineState *ms = PETALOGIX_S3ADSP1800_MACHINE(obj);
+ return ms->endianness;
+}
+
+static void machine_set_endianness(Object *obj, int endianness, Error **errp)
+{
+ S3Adsp1800MachineState *ms = PETALOGIX_S3ADSP1800_MACHINE(obj);
+ ms->endianness = endianness;
+}
+
static void petalogix_s3adsp1800_machine_class_init(ObjectClass *oc,
const void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
+ ObjectProperty *prop;
mc->desc = "PetaLogix linux refdesign for xilinx Spartan 3ADSP1800";
mc->init = petalogix_s3adsp1800_init;
mc->is_default = true;
+
+ prop = object_class_property_add_enum(oc, "endianness", "EndianMode",
+ &EndianMode_lookup,
+ machine_get_endianness,
+ machine_set_endianness);
+ object_property_set_default_str(prop, TARGET_BIG_ENDIAN ? "big" : "little");
+ object_class_property_set_description(oc, "endianness",
+ "Defines whether the machine runs in big or little endian mode");
}
static const TypeInfo petalogix_s3adsp1800_machine_types[] = {
@@ -156,6 +187,7 @@ static const TypeInfo petalogix_s3adsp1800_machine_types[] = {
.name = TYPE_PETALOGIX_S3ADSP1800_MACHINE,
.parent = TYPE_MACHINE,
.class_init = petalogix_s3adsp1800_machine_class_init,
+ .instance_size = sizeof(S3Adsp1800MachineState),
},
};
diff --git a/hw/microblaze/xlnx-zynqmp-pmu.c b/hw/microblaze/xlnx-zynqmp-pmu.c
index ed40b5f..e909802 100644
--- a/hw/microblaze/xlnx-zynqmp-pmu.c
+++ b/hw/microblaze/xlnx-zynqmp-pmu.c
@@ -181,12 +181,7 @@ static void xlnx_zynqmp_pmu_init(MachineState *machine)
static void xlnx_zynqmp_pmu_machine_init(MachineClass *mc)
{
- if (TARGET_BIG_ENDIAN) {
- mc->desc = "Xilinx ZynqMP PMU machine (big endian)";
- mc->deprecation_reason = "big endian support is not tested";
- } else {
- mc->desc = "Xilinx ZynqMP PMU machine (little endian)";
- }
+ mc->desc = "Xilinx ZynqMP PMU machine (little endian)";
mc->init = xlnx_zynqmp_pmu_init;
}
diff --git a/hw/mips/Kconfig b/hw/mips/Kconfig
index b09c89a..b59cb2f 100644
--- a/hw/mips/Kconfig
+++ b/hw/mips/Kconfig
@@ -13,13 +13,6 @@ config MALTA
select SERIAL_MM
select SMBUS_EEPROM
-config MIPSSIM
- bool
- default y
- depends on MIPS
- select SERIAL_MM
- select MIPSNET
-
config JAZZ
bool
default y
@@ -76,7 +69,7 @@ config LOONGSON3V
config MIPS_CPS
bool
- select MIPS_ITU
+ select MIPS_ITU if TCG
config MIPS_BOSTON
bool
diff --git a/hw/mips/cps.c b/hw/mips/cps.c
index 2a3ba3f..e47695e 100644
--- a/hw/mips/cps.c
+++ b/hw/mips/cps.c
@@ -24,7 +24,7 @@
#include "hw/mips/mips.h"
#include "hw/qdev-clock.h"
#include "hw/qdev-properties.h"
-#include "system/kvm.h"
+#include "system/tcg.h"
#include "system/reset.h"
qemu_irq get_cps_irq(MIPSCPSState *s, int pin_number)
@@ -59,7 +59,7 @@ static bool cpu_mips_itu_supported(CPUMIPSState *env)
{
bool is_mt = (env->CP0_Config5 & (1 << CP0C5_VP)) || ase_mt_available(env);
- return is_mt && !kvm_enabled();
+ return is_mt && tcg_enabled();
}
static void mips_cps_realize(DeviceState *dev, Error **errp)
diff --git a/hw/mips/loongson3_virt.c b/hw/mips/loongson3_virt.c
index de6fbcc..672083d 100644
--- a/hw/mips/loongson3_virt.c
+++ b/hw/mips/loongson3_virt.c
@@ -49,6 +49,7 @@
#include "system/qtest.h"
#include "system/reset.h"
#include "system/runstate.h"
+#include "system/system.h"
#include "qemu/error-report.h"
#define PM_CNTL_MODE 0x10
diff --git a/hw/mips/malta.c b/hw/mips/malta.c
index cbdbb21..02da629 100644
--- a/hw/mips/malta.c
+++ b/hw/mips/malta.c
@@ -52,6 +52,7 @@
#include "system/qtest.h"
#include "system/reset.h"
#include "system/runstate.h"
+#include "system/system.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "system/kvm.h"
@@ -1190,7 +1191,7 @@ void mips_malta_init(MachineState *machine)
* In little endian mode the 32bit words in the bios are swapped,
* a neat trick which allows bi-endian firmware.
*/
- if (!TARGET_BIG_ENDIAN) {
+ if (!TARGET_BIG_ENDIAN && bios_size > 0) {
uint32_t *end, *addr;
const size_t swapsize = MIN(bios_size, 0x3e0000);
addr = rom_ptr(FLASH_ADDRESS, swapsize);
diff --git a/hw/mips/meson.build b/hw/mips/meson.build
index 31dbd2b..390f0fd 100644
--- a/hw/mips/meson.build
+++ b/hw/mips/meson.build
@@ -8,7 +8,6 @@ mips_ss.add(when: 'CONFIG_MIPS_CPS', if_true: files('cps.c'))
if 'CONFIG_TCG' in config_all_accel
mips_ss.add(when: 'CONFIG_JAZZ', if_true: files('jazz.c'))
-mips_ss.add(when: 'CONFIG_MIPSSIM', if_true: files('mipssim.c'))
mips_ss.add(when: 'CONFIG_FULOONG', if_true: files('fuloong2e.c'))
mips_ss.add(when: 'CONFIG_MIPS_BOSTON', if_true: files('boston.c'))
endif
diff --git a/hw/mips/mipssim.c b/hw/mips/mipssim.c
deleted file mode 100644
index e843307..0000000
--- a/hw/mips/mipssim.c
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * QEMU/mipssim emulation
- *
- * Emulates a very simple machine model similar to the one used by the
- * proprietary MIPS emulator.
- *
- * Copyright (c) 2007 Thiemo Seufer
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu/datadir.h"
-#include "system/address-spaces.h"
-#include "hw/clock.h"
-#include "hw/mips/mips.h"
-#include "hw/char/serial-mm.h"
-#include "net/net.h"
-#include "system/system.h"
-#include "hw/boards.h"
-#include "hw/loader.h"
-#include "elf.h"
-#include "hw/sysbus.h"
-#include "hw/qdev-properties.h"
-#include "qemu/error-report.h"
-#include "system/qtest.h"
-#include "system/reset.h"
-#include "cpu.h"
-
-#define BIOS_SIZE (4 * MiB)
-
-static struct _loaderparams {
- int ram_size;
- const char *kernel_filename;
- const char *kernel_cmdline;
- const char *initrd_filename;
-} loaderparams;
-
-typedef struct ResetData {
- MIPSCPU *cpu;
- uint64_t vector;
-} ResetData;
-
-static uint64_t load_kernel(void)
-{
- uint64_t entry, kernel_high, initrd_size;
- long kernel_size;
- ram_addr_t initrd_offset;
-
- kernel_size = load_elf(loaderparams.kernel_filename, NULL,
- cpu_mips_kseg0_to_phys, NULL,
- &entry, NULL,
- &kernel_high, NULL,
- TARGET_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
- EM_MIPS, 1, 0);
- if (kernel_size < 0) {
- error_report("could not load kernel '%s': %s",
- loaderparams.kernel_filename,
- load_elf_strerror(kernel_size));
- exit(1);
- }
-
- /* load initrd */
- initrd_size = 0;
- initrd_offset = 0;
- if (loaderparams.initrd_filename) {
- initrd_size = get_image_size(loaderparams.initrd_filename);
- if (initrd_size > 0) {
- initrd_offset = ROUND_UP(kernel_high, INITRD_PAGE_SIZE);
- if (initrd_offset + initrd_size > loaderparams.ram_size) {
- error_report("memory too small for initial ram disk '%s'",
- loaderparams.initrd_filename);
- exit(1);
- }
- initrd_size = load_image_targphys(loaderparams.initrd_filename,
- initrd_offset, loaderparams.ram_size - initrd_offset);
- }
- if (initrd_size == (target_ulong) -1) {
- error_report("could not load initial ram disk '%s'",
- loaderparams.initrd_filename);
- exit(1);
- }
- }
- return entry;
-}
-
-static void main_cpu_reset(void *opaque)
-{
- ResetData *s = (ResetData *)opaque;
- CPUMIPSState *env = &s->cpu->env;
-
- cpu_reset(CPU(s->cpu));
- env->active_tc.PC = s->vector & ~(target_ulong)1;
- if (s->vector & 1) {
- env->hflags |= MIPS_HFLAG_M16;
- }
-}
-
-static void mipsnet_init(int base, qemu_irq irq)
-{
- DeviceState *dev;
- SysBusDevice *s;
-
- dev = qemu_create_nic_device("mipsnet", true, NULL);
- if (!dev) {
- return;
- }
-
- s = SYS_BUS_DEVICE(dev);
- sysbus_realize_and_unref(s, &error_fatal);
- sysbus_connect_irq(s, 0, irq);
- memory_region_add_subregion(get_system_io(),
- base,
- sysbus_mmio_get_region(s, 0));
-}
-
-static void
-mips_mipssim_init(MachineState *machine)
-{
- const char *kernel_filename = machine->kernel_filename;
- const char *kernel_cmdline = machine->kernel_cmdline;
- const char *initrd_filename = machine->initrd_filename;
- const char *bios_name = TARGET_BIG_ENDIAN ? "mips_bios.bin"
- : "mipsel_bios.bin";
- char *filename;
- MemoryRegion *address_space_mem = get_system_memory();
- MemoryRegion *isa = g_new(MemoryRegion, 1);
- MemoryRegion *bios = g_new(MemoryRegion, 1);
- Clock *cpuclk;
- MIPSCPU *cpu;
- CPUMIPSState *env;
- ResetData *reset_info;
- int bios_size;
-
- cpuclk = clock_new(OBJECT(machine), "cpu-refclk");
-#ifdef TARGET_MIPS64
- clock_set_hz(cpuclk, 6000000); /* 6 MHz */
-#else
- clock_set_hz(cpuclk, 12000000); /* 12 MHz */
-#endif
-
- /* Init CPUs. */
- cpu = mips_cpu_create_with_clock(machine->cpu_type, cpuclk,
- TARGET_BIG_ENDIAN);
- env = &cpu->env;
-
- reset_info = g_new0(ResetData, 1);
- reset_info->cpu = cpu;
- reset_info->vector = env->active_tc.PC;
- qemu_register_reset(main_cpu_reset, reset_info);
-
- /* Allocate RAM. */
- memory_region_init_rom(bios, NULL, "mips_mipssim.bios", BIOS_SIZE,
- &error_fatal);
-
- memory_region_add_subregion(address_space_mem, 0, machine->ram);
-
- /* Map the BIOS / boot exception handler. */
- memory_region_add_subregion(address_space_mem, 0x1fc00000LL, bios);
- /* Load a BIOS / boot exception handler image. */
- filename = qemu_find_file(QEMU_FILE_TYPE_BIOS,
- machine->firmware ?: bios_name);
- if (filename) {
- bios_size = load_image_targphys(filename, 0x1fc00000LL, BIOS_SIZE);
- g_free(filename);
- } else {
- bios_size = -1;
- }
- if ((bios_size < 0 || bios_size > BIOS_SIZE) &&
- machine->firmware && !qtest_enabled()) {
- /* Bail out if we have neither a kernel image nor boot vector code. */
- error_report("Could not load MIPS bios '%s'", machine->firmware);
- exit(1);
- } else {
- /* We have a boot vector start address. */
- env->active_tc.PC = (target_long)(int32_t)0xbfc00000;
- }
-
- if (kernel_filename) {
- loaderparams.ram_size = machine->ram_size;
- loaderparams.kernel_filename = kernel_filename;
- loaderparams.kernel_cmdline = kernel_cmdline;
- loaderparams.initrd_filename = initrd_filename;
- reset_info->vector = load_kernel();
- }
-
- /* Init CPU internal devices. */
- cpu_mips_irq_init_cpu(cpu);
- cpu_mips_clock_init(cpu);
-
- /*
- * Register 64 KB of ISA IO space at 0x1fd00000. But without interrupts
- * (except for the hardcoded serial port interrupt) -device cannot work,
- * so do not expose the ISA bus to the user.
- */
- memory_region_init_alias(isa, NULL, "isa_mmio",
- get_system_io(), 0, 0x00010000);
- memory_region_add_subregion(get_system_memory(), 0x1fd00000, isa);
-
- /*
- * A single 16450 sits at offset 0x3f8. It is attached to
- * MIPS CPU INT2, which is interrupt 4.
- */
- if (serial_hd(0)) {
- DeviceState *dev = qdev_new(TYPE_SERIAL_MM);
-
- qdev_prop_set_chr(dev, "chardev", serial_hd(0));
- qdev_prop_set_uint8(dev, "regshift", 0);
- qdev_prop_set_uint8(dev, "endianness", DEVICE_LITTLE_ENDIAN);
- sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
- sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, env->irq[4]);
- memory_region_add_subregion(get_system_io(), 0x3f8,
- sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0));
- }
-
- /* MIPSnet uses the MIPS CPU INT0, which is interrupt 2. */
- mipsnet_init(0x4200, env->irq[2]);
-}
-
-static void mips_mipssim_machine_init(MachineClass *mc)
-{
- mc->desc = "MIPS MIPSsim platform";
- mc->init = mips_mipssim_init;
-#ifdef TARGET_MIPS64
- mc->default_cpu_type = MIPS_CPU_TYPE_NAME("5Kf");
-#else
- mc->default_cpu_type = MIPS_CPU_TYPE_NAME("24Kf");
-#endif
- mc->default_ram_id = "mips_mipssim.ram";
-}
-
-DEFINE_MACHINE("mipssim", mips_mipssim_machine_init)
diff --git a/hw/misc/Kconfig b/hw/misc/Kconfig
index ec0fa5a..4e35657 100644
--- a/hw/misc/Kconfig
+++ b/hw/misc/Kconfig
@@ -47,6 +47,18 @@ config A9SCU
config ARM11SCU
bool
+config MAX78000_AES
+ bool
+
+config MAX78000_GCR
+ bool
+
+config MAX78000_ICC
+ bool
+
+config MAX78000_TRNG
+ bool
+
config MOS6522
bool
@@ -107,6 +119,7 @@ config STM32L4X5_RCC
config MIPS_ITU
bool
+ depends on TCG
config MPS2_FPGAIO
bool
diff --git a/hw/misc/aspeed_hace.c b/hw/misc/aspeed_hace.c
index f4bff32..726368f 100644
--- a/hw/misc/aspeed_hace.c
+++ b/hw/misc/aspeed_hace.c
@@ -10,14 +10,17 @@
*/
#include "qemu/osdep.h"
+#include "qemu/cutils.h"
#include "qemu/log.h"
#include "qemu/error-report.h"
+#include "qemu/iov.h"
#include "hw/misc/aspeed_hace.h"
#include "qapi/error.h"
#include "migration/vmstate.h"
#include "crypto/hash.h"
#include "hw/qdev-properties.h"
#include "hw/irq.h"
+#include "trace.h"
#define R_CRYPT_CMD (0x10 / 4)
@@ -27,9 +30,12 @@
#define TAG_IRQ BIT(15)
#define R_HASH_SRC (0x20 / 4)
-#define R_HASH_DEST (0x24 / 4)
+#define R_HASH_DIGEST (0x24 / 4)
#define R_HASH_KEY_BUFF (0x28 / 4)
#define R_HASH_SRC_LEN (0x2c / 4)
+#define R_HASH_SRC_HI (0x90 / 4)
+#define R_HASH_DIGEST_HI (0x94 / 4)
+#define R_HASH_KEY_BUFF_HI (0x98 / 4)
#define R_HASH_CMD (0x30 / 4)
/* Hash algorithm selection */
@@ -84,6 +90,42 @@ static const struct {
QCRYPTO_HASH_ALGO_SHA256 },
};
+static void hace_hexdump(const char *desc, const char *buf, size_t size)
+{
+ g_autoptr(GString) str = g_string_sized_new(64);
+ size_t len;
+ size_t i;
+
+ for (i = 0; i < size; i += len) {
+ len = MIN(16, size - i);
+ g_string_truncate(str, 0);
+ qemu_hexdump_line(str, buf + i, len, 1, 4);
+ trace_aspeed_hace_hexdump(desc, i, str->str);
+ }
+}
+
+static void hace_iov_hexdump(const char *desc, const struct iovec *iov,
+ const unsigned int iov_cnt)
+{
+ size_t size = 0;
+ char *buf;
+ int i;
+
+ for (i = 0; i < iov_cnt; i++) {
+ size += iov[i].iov_len;
+ }
+
+ buf = g_malloc(size);
+
+ if (!buf) {
+ return;
+ }
+
+ iov_to_buf(iov, iov_cnt, 0, buf, size);
+ hace_hexdump(desc, buf, size);
+ g_free(buf);
+}
+
static int hash_algo_lookup(uint32_t reg)
{
int i;
@@ -142,171 +184,269 @@ static bool has_padding(AspeedHACEState *s, struct iovec *iov,
return false;
}
-static int reconstruct_iov(AspeedHACEState *s, struct iovec *iov, int id,
- uint32_t *pad_offset)
+static uint64_t hash_get_source_addr(AspeedHACEState *s)
{
- int i, iov_count;
- if (*pad_offset != 0) {
- s->iov_cache[s->iov_count].iov_base = iov[id].iov_base;
- s->iov_cache[s->iov_count].iov_len = *pad_offset;
- ++s->iov_count;
- }
- for (i = 0; i < s->iov_count; i++) {
- iov[i].iov_base = s->iov_cache[i].iov_base;
- iov[i].iov_len = s->iov_cache[i].iov_len;
+ AspeedHACEClass *ahc = ASPEED_HACE_GET_CLASS(s);
+ uint64_t src_addr = 0;
+
+ src_addr = deposit64(src_addr, 0, 32, s->regs[R_HASH_SRC]);
+ if (ahc->has_dma64) {
+ src_addr = deposit64(src_addr, 32, 32, s->regs[R_HASH_SRC_HI]);
}
- iov_count = s->iov_count;
- s->iov_count = 0;
- s->total_req_len = 0;
- return iov_count;
+
+ return src_addr;
}
-static void do_hash_operation(AspeedHACEState *s, int algo, bool sg_mode,
- bool acc_mode)
+static int hash_prepare_direct_iov(AspeedHACEState *s, struct iovec *iov,
+ bool acc_mode, bool *acc_final_request)
{
- struct iovec iov[ASPEED_HACE_MAX_SG];
uint32_t total_msg_len;
uint32_t pad_offset;
- g_autofree uint8_t *digest_buf = NULL;
- size_t digest_len = 0;
- bool sg_acc_mode_final_request = false;
- int i;
+ uint64_t src;
void *haddr;
- Error *local_err = NULL;
+ hwaddr plen;
+ int iov_idx;
+
+ plen = s->regs[R_HASH_SRC_LEN];
+ src = hash_get_source_addr(s);
+ trace_aspeed_hace_hash_addr("src", src);
+ haddr = address_space_map(&s->dram_as, src, &plen, false,
+ MEMTXATTRS_UNSPECIFIED);
+ if (haddr == NULL) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Unable to map address, addr=0x%" HWADDR_PRIx
+ " ,plen=0x%" HWADDR_PRIx "\n",
+ __func__, src, plen);
+ return -1;
+ }
- if (acc_mode && s->hash_ctx == NULL) {
- s->hash_ctx = qcrypto_hash_new(algo, &local_err);
- if (s->hash_ctx == NULL) {
- qemu_log_mask(LOG_GUEST_ERROR, "qcrypto hash failed : %s",
- error_get_pretty(local_err));
- error_free(local_err);
- return;
+ iov[0].iov_base = haddr;
+ iov_idx = 1;
+
+ if (acc_mode) {
+ s->total_req_len += plen;
+
+ if (has_padding(s, &iov[0], plen, &total_msg_len,
+ &pad_offset)) {
+ /* Padding being present indicates the final request */
+ *acc_final_request = true;
+ iov[0].iov_len = pad_offset;
+ } else {
+ iov[0].iov_len = plen;
}
+ } else {
+ iov[0].iov_len = plen;
}
- if (sg_mode) {
- uint32_t len = 0;
-
- for (i = 0; !(len & SG_LIST_LEN_LAST); i++) {
- uint32_t addr, src;
- hwaddr plen;
+ return iov_idx;
+}
- if (i == ASPEED_HACE_MAX_SG) {
- qemu_log_mask(LOG_GUEST_ERROR,
- "aspeed_hace: guest failed to set end of sg list marker\n");
- break;
- }
+static int hash_prepare_sg_iov(AspeedHACEState *s, struct iovec *iov,
+ bool acc_mode, bool *acc_final_request)
+{
+ uint32_t total_msg_len;
+ uint32_t pad_offset;
+ uint32_t len = 0;
+ uint32_t sg_addr;
+ uint64_t src;
+ int iov_idx;
+ hwaddr plen;
+ void *haddr;
- src = s->regs[R_HASH_SRC] + (i * SG_LIST_ENTRY_SIZE);
+ src = hash_get_source_addr(s);
+ for (iov_idx = 0; !(len & SG_LIST_LEN_LAST); iov_idx++) {
+ if (iov_idx == ASPEED_HACE_MAX_SG) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Failed to set end of sg list marker\n",
+ __func__);
+ return -1;
+ }
- len = address_space_ldl_le(&s->dram_as, src,
+ len = address_space_ldl_le(&s->dram_as, src,
+ MEMTXATTRS_UNSPECIFIED, NULL);
+ sg_addr = address_space_ldl_le(&s->dram_as, src + SG_LIST_LEN_SIZE,
MEMTXATTRS_UNSPECIFIED, NULL);
+ sg_addr &= SG_LIST_ADDR_MASK;
+ trace_aspeed_hace_hash_sg(iov_idx, src, sg_addr, len);
+ /*
+ * To maintain compatibility with older SoCs such as the AST2600,
+ * the AST2700 HW automatically set bit 34 of the 64-bit sg_addr.
+ * As a result, the firmware only needs to provide a 32-bit sg_addr
+ * containing bits [31:0]. This is sufficient for the AST2700, as
+ * it uses a DRAM offset rather than a DRAM address.
+ */
+ plen = len & SG_LIST_LEN_MASK;
+ haddr = address_space_map(&s->dram_as, sg_addr, &plen, false,
+ MEMTXATTRS_UNSPECIFIED);
- addr = address_space_ldl_le(&s->dram_as, src + SG_LIST_LEN_SIZE,
- MEMTXATTRS_UNSPECIFIED, NULL);
- addr &= SG_LIST_ADDR_MASK;
+ if (haddr == NULL) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Unable to map address, sg_addr=0x%x, "
+ "plen=0x%" HWADDR_PRIx "\n",
+ __func__, sg_addr, plen);
+ return -1;
+ }
- plen = len & SG_LIST_LEN_MASK;
- haddr = address_space_map(&s->dram_as, addr, &plen, false,
- MEMTXATTRS_UNSPECIFIED);
- if (haddr == NULL) {
- qemu_log_mask(LOG_GUEST_ERROR,
- "%s: qcrypto failed\n", __func__);
- return;
- }
- iov[i].iov_base = haddr;
- if (acc_mode) {
- s->total_req_len += plen;
-
- if (has_padding(s, &iov[i], plen, &total_msg_len,
- &pad_offset)) {
- /* Padding being present indicates the final request */
- sg_acc_mode_final_request = true;
- iov[i].iov_len = pad_offset;
- } else {
- iov[i].iov_len = plen;
- }
+ src += SG_LIST_ENTRY_SIZE;
+
+ iov[iov_idx].iov_base = haddr;
+ if (acc_mode) {
+ s->total_req_len += plen;
+
+ if (has_padding(s, &iov[iov_idx], plen, &total_msg_len,
+ &pad_offset)) {
+ /* Padding being present indicates the final request */
+ *acc_final_request = true;
+ iov[iov_idx].iov_len = pad_offset;
} else {
- iov[i].iov_len = plen;
+ iov[iov_idx].iov_len = plen;
}
+ } else {
+ iov[iov_idx].iov_len = plen;
}
- } else {
- hwaddr len = s->regs[R_HASH_SRC_LEN];
+ }
- haddr = address_space_map(&s->dram_as, s->regs[R_HASH_SRC],
- &len, false, MEMTXATTRS_UNSPECIFIED);
- if (haddr == NULL) {
- qemu_log_mask(LOG_GUEST_ERROR, "%s: qcrypto failed\n", __func__);
+ return iov_idx;
+}
+
+static uint64_t hash_get_digest_addr(AspeedHACEState *s)
+{
+ AspeedHACEClass *ahc = ASPEED_HACE_GET_CLASS(s);
+ uint64_t digest_addr = 0;
+
+ digest_addr = deposit64(digest_addr, 0, 32, s->regs[R_HASH_DIGEST]);
+ if (ahc->has_dma64) {
+ digest_addr = deposit64(digest_addr, 32, 32, s->regs[R_HASH_DIGEST_HI]);
+ }
+
+ return digest_addr;
+}
+
+static void hash_write_digest_and_unmap_iov(AspeedHACEState *s,
+ struct iovec *iov,
+ int iov_idx,
+ uint8_t *digest_buf,
+ size_t digest_len)
+{
+ uint64_t digest_addr = 0;
+
+ digest_addr = hash_get_digest_addr(s);
+ trace_aspeed_hace_hash_addr("digest", digest_addr);
+ if (address_space_write(&s->dram_as, digest_addr,
+ MEMTXATTRS_UNSPECIFIED,
+ digest_buf, digest_len)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Failed to write digest to 0x%" HWADDR_PRIx "\n",
+ __func__, digest_addr);
+ }
+
+ if (trace_event_get_state_backends(TRACE_ASPEED_HACE_HEXDUMP)) {
+ hace_hexdump("digest", (char *)digest_buf, digest_len);
+ }
+
+ for (; iov_idx > 0; iov_idx--) {
+ address_space_unmap(&s->dram_as, iov[iov_idx - 1].iov_base,
+ iov[iov_idx - 1].iov_len, false,
+ iov[iov_idx - 1].iov_len);
+ }
+}
+
+static void hash_execute_non_acc_mode(AspeedHACEState *s, int algo,
+ struct iovec *iov, int iov_idx)
+{
+ g_autofree uint8_t *digest_buf = NULL;
+ Error *local_err = NULL;
+ size_t digest_len = 0;
+
+ if (qcrypto_hash_bytesv(algo, iov, iov_idx, &digest_buf,
+ &digest_len, &local_err) < 0) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: qcrypto hash bytesv failed : %s",
+ __func__, error_get_pretty(local_err));
+ error_free(local_err);
+ return;
+ }
+
+ hash_write_digest_and_unmap_iov(s, iov, iov_idx, digest_buf, digest_len);
+}
+
+static void hash_execute_acc_mode(AspeedHACEState *s, int algo,
+ struct iovec *iov, int iov_idx,
+ bool final_request)
+{
+ g_autofree uint8_t *digest_buf = NULL;
+ Error *local_err = NULL;
+ size_t digest_len = 0;
+
+ trace_aspeed_hace_hash_execute_acc_mode(final_request);
+
+ if (s->hash_ctx == NULL) {
+ s->hash_ctx = qcrypto_hash_new(algo, &local_err);
+ if (s->hash_ctx == NULL) {
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: qcrypto hash new failed : %s",
+ __func__, error_get_pretty(local_err));
+ error_free(local_err);
return;
}
- iov[0].iov_base = haddr;
- iov[0].iov_len = len;
- i = 1;
-
- if (s->iov_count) {
- /*
- * In aspeed sdk kernel driver, sg_mode is disabled in hash_final().
- * Thus if we received a request with sg_mode disabled, it is
- * required to check whether cache is empty. If no, we should
- * combine cached iov and the current iov.
- */
- s->total_req_len += len;
- if (has_padding(s, iov, len, &total_msg_len, &pad_offset)) {
- i = reconstruct_iov(s, iov, 0, &pad_offset);
- }
- }
}
- if (acc_mode) {
- if (qcrypto_hash_updatev(s->hash_ctx, iov, i, &local_err) < 0) {
- qemu_log_mask(LOG_GUEST_ERROR, "qcrypto hash update failed : %s",
- error_get_pretty(local_err));
+ if (qcrypto_hash_updatev(s->hash_ctx, iov, iov_idx, &local_err) < 0) {
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: qcrypto hash updatev failed : %s",
+ __func__, error_get_pretty(local_err));
+ error_free(local_err);
+ return;
+ }
+
+ if (final_request) {
+ if (qcrypto_hash_finalize_bytes(s->hash_ctx, &digest_buf,
+ &digest_len, &local_err)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: qcrypto hash finalize bytes failed : %s",
+ __func__, error_get_pretty(local_err));
error_free(local_err);
- return;
+ local_err = NULL;
}
- if (sg_acc_mode_final_request) {
- if (qcrypto_hash_finalize_bytes(s->hash_ctx, &digest_buf,
- &digest_len, &local_err)) {
- qemu_log_mask(LOG_GUEST_ERROR,
- "qcrypto hash finalize failed : %s",
- error_get_pretty(local_err));
- error_free(local_err);
- local_err = NULL;
- }
+ qcrypto_hash_free(s->hash_ctx);
+
+ s->hash_ctx = NULL;
+ s->total_req_len = 0;
+ }
- qcrypto_hash_free(s->hash_ctx);
+ hash_write_digest_and_unmap_iov(s, iov, iov_idx, digest_buf, digest_len);
+}
- s->hash_ctx = NULL;
- s->iov_count = 0;
- s->total_req_len = 0;
- }
- } else if (qcrypto_hash_bytesv(algo, iov, i, &digest_buf,
- &digest_len, &local_err) < 0) {
- qemu_log_mask(LOG_GUEST_ERROR, "qcrypto hash bytesv failed : %s",
- error_get_pretty(local_err));
- error_free(local_err);
- return;
+static void do_hash_operation(AspeedHACEState *s, int algo, bool sg_mode,
+ bool acc_mode)
+{
+ QEMU_UNINITIALIZED struct iovec iov[ASPEED_HACE_MAX_SG];
+ bool acc_final_request = false;
+ int iov_idx = -1;
+
+ /* Prepares the iov for hashing operations based on the selected mode */
+ if (sg_mode) {
+ iov_idx = hash_prepare_sg_iov(s, iov, acc_mode, &acc_final_request);
+ } else {
+ iov_idx = hash_prepare_direct_iov(s, iov, acc_mode,
+ &acc_final_request);
}
- if (address_space_write(&s->dram_as, s->regs[R_HASH_DEST],
- MEMTXATTRS_UNSPECIFIED,
- digest_buf, digest_len)) {
+ if (iov_idx <= 0) {
qemu_log_mask(LOG_GUEST_ERROR,
- "aspeed_hace: address space write failed\n");
+ "%s: Failed to prepare iov\n", __func__);
+ return;
}
- for (; i > 0; i--) {
- address_space_unmap(&s->dram_as, iov[i - 1].iov_base,
- iov[i - 1].iov_len, false,
- iov[i - 1].iov_len);
+ if (trace_event_get_state_backends(TRACE_ASPEED_HACE_HEXDUMP)) {
+ hace_iov_hexdump("plaintext", iov, iov_idx);
}
- /*
- * Set status bits to indicate completion. Testing shows hardware sets
- * these irrespective of HASH_IRQ_EN.
- */
- s->regs[R_STATUS] |= HASH_IRQ;
+ /* Executes the hash operation */
+ if (acc_mode) {
+ hash_execute_acc_mode(s, algo, iov, iov_idx, acc_final_request);
+ } else {
+ hash_execute_non_acc_mode(s, algo, iov, iov_idx);
+ }
}
static uint64_t aspeed_hace_read(void *opaque, hwaddr addr, unsigned int size)
@@ -315,12 +455,7 @@ static uint64_t aspeed_hace_read(void *opaque, hwaddr addr, unsigned int size)
addr >>= 2;
- if (addr >= ASPEED_HACE_NR_REGS) {
- qemu_log_mask(LOG_GUEST_ERROR,
- "%s: Out-of-bounds read at offset 0x%" HWADDR_PRIx "\n",
- __func__, addr << 2);
- return 0;
- }
+ trace_aspeed_hace_read(addr << 2, s->regs[addr]);
return s->regs[addr];
}
@@ -333,12 +468,7 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data,
addr >>= 2;
- if (addr >= ASPEED_HACE_NR_REGS) {
- qemu_log_mask(LOG_GUEST_ERROR,
- "%s: Out-of-bounds write at offset 0x%" HWADDR_PRIx "\n",
- __func__, addr << 2);
- return;
- }
+ trace_aspeed_hace_write(addr << 2, data);
switch (addr) {
case R_STATUS:
@@ -362,7 +492,7 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data,
case R_HASH_SRC:
data &= ahc->src_mask;
break;
- case R_HASH_DEST:
+ case R_HASH_DIGEST:
data &= ahc->dest_mask;
break;
case R_HASH_KEY_BUFF:
@@ -390,10 +520,16 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data,
qemu_log_mask(LOG_GUEST_ERROR,
"%s: Invalid hash algorithm selection 0x%"PRIx64"\n",
__func__, data & ahc->hash_mask);
- break;
+ } else {
+ do_hash_operation(s, algo, data & HASH_SG_EN,
+ ((data & HASH_HMAC_MASK) == HASH_DIGEST_ACCUM));
}
- do_hash_operation(s, algo, data & HASH_SG_EN,
- ((data & HASH_HMAC_MASK) == HASH_DIGEST_ACCUM));
+
+ /*
+ * Set status bits to indicate completion. Testing shows hardware sets
+ * these irrespective of HASH_IRQ_EN.
+ */
+ s->regs[R_STATUS] |= HASH_IRQ;
if (data & HASH_IRQ_EN) {
qemu_irq_raise(s->irq);
@@ -410,6 +546,15 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data,
}
}
break;
+ case R_HASH_SRC_HI:
+ data &= ahc->src_hi_mask;
+ break;
+ case R_HASH_DIGEST_HI:
+ data &= ahc->dest_hi_mask;
+ break;
+ case R_HASH_KEY_BUFF_HI:
+ data &= ahc->key_hi_mask;
+ break;
default:
break;
}
@@ -430,14 +575,14 @@ static const MemoryRegionOps aspeed_hace_ops = {
static void aspeed_hace_reset(DeviceState *dev)
{
struct AspeedHACEState *s = ASPEED_HACE(dev);
+ AspeedHACEClass *ahc = ASPEED_HACE_GET_CLASS(s);
if (s->hash_ctx != NULL) {
qcrypto_hash_free(s->hash_ctx);
s->hash_ctx = NULL;
}
- memset(s->regs, 0, sizeof(s->regs));
- s->iov_count = 0;
+ memset(s->regs, 0, ahc->nr_regs << 2);
s->total_req_len = 0;
}
@@ -445,11 +590,13 @@ static void aspeed_hace_realize(DeviceState *dev, Error **errp)
{
AspeedHACEState *s = ASPEED_HACE(dev);
SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+ AspeedHACEClass *ahc = ASPEED_HACE_GET_CLASS(s);
sysbus_init_irq(sbd, &s->irq);
+ s->regs = g_new(uint32_t, ahc->nr_regs);
memory_region_init_io(&s->iomem, OBJECT(s), &aspeed_hace_ops, s,
- TYPE_ASPEED_HACE, 0x1000);
+ TYPE_ASPEED_HACE, ahc->nr_regs << 2);
if (!s->dram_mr) {
error_setg(errp, TYPE_ASPEED_HACE ": 'dram' link not set");
@@ -469,21 +616,28 @@ static const Property aspeed_hace_properties[] = {
static const VMStateDescription vmstate_aspeed_hace = {
.name = TYPE_ASPEED_HACE,
- .version_id = 1,
- .minimum_version_id = 1,
+ .version_id = 2,
+ .minimum_version_id = 2,
.fields = (const VMStateField[]) {
- VMSTATE_UINT32_ARRAY(regs, AspeedHACEState, ASPEED_HACE_NR_REGS),
VMSTATE_UINT32(total_req_len, AspeedHACEState),
- VMSTATE_UINT32(iov_count, AspeedHACEState),
VMSTATE_END_OF_LIST(),
}
};
+static void aspeed_hace_unrealize(DeviceState *dev)
+{
+ AspeedHACEState *s = ASPEED_HACE(dev);
+
+ g_free(s->regs);
+ s->regs = NULL;
+}
+
static void aspeed_hace_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->realize = aspeed_hace_realize;
+ dc->unrealize = aspeed_hace_unrealize;
device_class_set_legacy_reset(dc, aspeed_hace_reset);
device_class_set_props(dc, aspeed_hace_properties);
dc->vmsd = &vmstate_aspeed_hace;
@@ -504,6 +658,7 @@ static void aspeed_ast2400_hace_class_init(ObjectClass *klass, const void *data)
dc->desc = "AST2400 Hash and Crypto Engine";
+ ahc->nr_regs = 0x64 >> 2;
ahc->src_mask = 0x0FFFFFFF;
ahc->dest_mask = 0x0FFFFFF8;
ahc->key_mask = 0x0FFFFFC0;
@@ -523,6 +678,7 @@ static void aspeed_ast2500_hace_class_init(ObjectClass *klass, const void *data)
dc->desc = "AST2500 Hash and Crypto Engine";
+ ahc->nr_regs = 0x64 >> 2;
ahc->src_mask = 0x3fffffff;
ahc->dest_mask = 0x3ffffff8;
ahc->key_mask = 0x3FFFFFC0;
@@ -542,6 +698,7 @@ static void aspeed_ast2600_hace_class_init(ObjectClass *klass, const void *data)
dc->desc = "AST2600 Hash and Crypto Engine";
+ ahc->nr_regs = 0x64 >> 2;
ahc->src_mask = 0x7FFFFFFF;
ahc->dest_mask = 0x7FFFFFF8;
ahc->key_mask = 0x7FFFFFF8;
@@ -561,6 +718,7 @@ static void aspeed_ast1030_hace_class_init(ObjectClass *klass, const void *data)
dc->desc = "AST1030 Hash and Crypto Engine";
+ ahc->nr_regs = 0x64 >> 2;
ahc->src_mask = 0x7FFFFFFF;
ahc->dest_mask = 0x7FFFFFF8;
ahc->key_mask = 0x7FFFFFF8;
@@ -580,17 +738,36 @@ static void aspeed_ast2700_hace_class_init(ObjectClass *klass, const void *data)
dc->desc = "AST2700 Hash and Crypto Engine";
+ ahc->nr_regs = 0x9C >> 2;
ahc->src_mask = 0x7FFFFFFF;
ahc->dest_mask = 0x7FFFFFF8;
ahc->key_mask = 0x7FFFFFF8;
ahc->hash_mask = 0x00147FFF;
/*
+ * The AST2700 supports a maximum DRAM size of 8 GB, with a DRAM
+ * addressable range from 0x0_0000_0000 to 0x1_FFFF_FFFF. Since this range
+ * fits within 34 bits, only bits [33:0] are needed to store the DRAM
+ * offset. To optimize address storage, the high physical address bits
+ * [1:0] of the source, digest and key buffer addresses are stored as
+ * dram_offset bits [33:32].
+ *
+ * This approach eliminates the need to reduce the high part of the DRAM
+ * physical address for DMA operations. Previously, this was calculated as
+ * (high physical address bits [7:0] - 4), since the DRAM start address is
+ * 0x4_00000000, making the high part address [7:0] - 4.
+ */
+ ahc->src_hi_mask = 0x00000003;
+ ahc->dest_hi_mask = 0x00000003;
+ ahc->key_hi_mask = 0x00000003;
+
+ /*
* Currently, it does not support the CRYPT command. Instead, it only
* sends an interrupt to notify the firmware that the crypt command
* has completed. It is a temporary workaround.
*/
ahc->raise_crypt_interrupt_workaround = true;
+ ahc->has_dma64 = true;
}
static const TypeInfo aspeed_ast2700_hace_info = {
diff --git a/hw/misc/aspeed_sbc.c b/hw/misc/aspeed_sbc.c
index a7d101b..2fc5db7 100644
--- a/hw/misc/aspeed_sbc.c
+++ b/hw/misc/aspeed_sbc.c
@@ -15,9 +15,14 @@
#include "hw/misc/aspeed_sbc.h"
#include "qapi/error.h"
#include "migration/vmstate.h"
+#include "trace.h"
#define R_PROT (0x000 / 4)
+#define R_CMD (0x004 / 4)
+#define R_ADDR (0x010 / 4)
#define R_STATUS (0x014 / 4)
+#define R_CAMP1 (0x020 / 4)
+#define R_CAMP2 (0x024 / 4)
#define R_QSR (0x040 / 4)
/* R_STATUS */
@@ -41,6 +46,20 @@
#define QSR_RSA_MASK (0x3 << 12)
#define QSR_HASH_MASK (0x3 << 10)
+#define OTP_MEMORY_SIZE 0x4000
+/* OTP command */
+#define SBC_OTP_CMD_READ 0x23b1e361
+#define SBC_OTP_CMD_WRITE 0x23b1e362
+#define SBC_OTP_CMD_PROG 0x23b1e364
+
+#define OTP_DATA_DWORD_COUNT (0x800)
+#define OTP_TOTAL_DWORD_COUNT (0x1000)
+
+/* Voltage mode */
+#define MODE_REGISTER (0x1000)
+#define MODE_REGISTER_A (0x3000)
+#define MODE_REGISTER_B (0x5000)
+
static uint64_t aspeed_sbc_read(void *opaque, hwaddr addr, unsigned int size)
{
AspeedSBCState *s = ASPEED_SBC(opaque);
@@ -57,6 +76,142 @@ static uint64_t aspeed_sbc_read(void *opaque, hwaddr addr, unsigned int size)
return s->regs[addr];
}
+static bool aspeed_sbc_otp_read(AspeedSBCState *s,
+ uint32_t otp_addr)
+{
+ MemTxResult ret;
+ AspeedOTPState *otp = &s->otp;
+ uint32_t value, otp_offset;
+ bool is_data = false;
+
+ if (otp_addr < OTP_DATA_DWORD_COUNT) {
+ is_data = true;
+ } else if (otp_addr >= OTP_TOTAL_DWORD_COUNT) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Invalid OTP addr 0x%x\n",
+ otp_addr);
+ return false;
+ }
+
+ otp_offset = otp_addr << 2;
+ ret = address_space_read(&otp->as, otp_offset, MEMTXATTRS_UNSPECIFIED,
+ &value, sizeof(value));
+ if (ret != MEMTX_OK) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Failed to read OTP memory, addr = %x\n",
+ otp_addr);
+ return false;
+ }
+ s->regs[R_CAMP1] = value;
+ trace_aspeed_sbc_otp_read(otp_addr, value);
+
+ if (is_data) {
+ ret = address_space_read(&otp->as, otp_offset + 4,
+ MEMTXATTRS_UNSPECIFIED,
+ &value, sizeof(value));
+ if (ret != MEMTX_OK) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Failed to read OTP memory, addr = %x\n",
+ otp_addr);
+ return false;
+ }
+ s->regs[R_CAMP2] = value;
+ trace_aspeed_sbc_otp_read(otp_addr + 1, value);
+ }
+
+ return true;
+}
+
+static bool mode_handler(uint32_t otp_addr)
+{
+ switch (otp_addr) {
+ case MODE_REGISTER:
+ case MODE_REGISTER_A:
+ case MODE_REGISTER_B:
+ /* HW behavior, do nothing here */
+ return true;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Unsupported address 0x%x\n",
+ otp_addr);
+ return false;
+ }
+}
+
+static bool aspeed_sbc_otp_write(AspeedSBCState *s,
+ uint32_t otp_addr)
+{
+ if (otp_addr == 0) {
+ trace_aspeed_sbc_ignore_cmd(otp_addr);
+ return true;
+ } else {
+ if (mode_handler(otp_addr) == false) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool aspeed_sbc_otp_prog(AspeedSBCState *s,
+ uint32_t otp_addr)
+{
+ MemTxResult ret;
+ AspeedOTPState *otp = &s->otp;
+ uint32_t value = s->regs[R_CAMP1];
+
+ ret = address_space_write(&otp->as, otp_addr, MEMTXATTRS_UNSPECIFIED,
+ &value, sizeof(value));
+ if (ret != MEMTX_OK) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Failed to write OTP memory, addr = %x\n",
+ otp_addr);
+ return false;
+ }
+
+ trace_aspeed_sbc_otp_prog(otp_addr, value);
+
+ return true;
+}
+
+static void aspeed_sbc_handle_command(void *opaque, uint32_t cmd)
+{
+ AspeedSBCState *s = ASPEED_SBC(opaque);
+ AspeedSBCClass *sc = ASPEED_SBC_GET_CLASS(opaque);
+ bool ret = false;
+ uint32_t otp_addr;
+
+ if (!sc->has_otp) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: OTP memory is not supported\n",
+ __func__);
+ return;
+ }
+
+ s->regs[R_STATUS] &= ~(OTP_MEM_IDLE | OTP_IDLE);
+ otp_addr = s->regs[R_ADDR];
+
+ switch (cmd) {
+ case SBC_OTP_CMD_READ:
+ ret = aspeed_sbc_otp_read(s, otp_addr);
+ break;
+ case SBC_OTP_CMD_WRITE:
+ ret = aspeed_sbc_otp_write(s, otp_addr);
+ break;
+ case SBC_OTP_CMD_PROG:
+ ret = aspeed_sbc_otp_prog(s, otp_addr);
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Unknown command 0x%x\n",
+ __func__, cmd);
+ break;
+ }
+
+ trace_aspeed_sbc_handle_cmd(cmd, otp_addr, ret);
+ s->regs[R_STATUS] |= (OTP_MEM_IDLE | OTP_IDLE);
+}
+
static void aspeed_sbc_write(void *opaque, hwaddr addr, uint64_t data,
unsigned int size)
{
@@ -78,6 +233,9 @@ static void aspeed_sbc_write(void *opaque, hwaddr addr, uint64_t data,
"%s: write to read only register 0x%" HWADDR_PRIx "\n",
__func__, addr << 2);
return;
+ case R_CMD:
+ aspeed_sbc_handle_command(opaque, data);
+ return;
default:
break;
}
@@ -115,10 +273,30 @@ static void aspeed_sbc_reset(DeviceState *dev)
s->regs[R_QSR] = s->signing_settings;
}
+static void aspeed_sbc_instance_init(Object *obj)
+{
+ AspeedSBCClass *sc = ASPEED_SBC_GET_CLASS(obj);
+ AspeedSBCState *s = ASPEED_SBC(obj);
+
+ if (sc->has_otp) {
+ object_initialize_child(OBJECT(s), "otp", &s->otp,
+ TYPE_ASPEED_OTP);
+ }
+}
+
static void aspeed_sbc_realize(DeviceState *dev, Error **errp)
{
AspeedSBCState *s = ASPEED_SBC(dev);
SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+ AspeedSBCClass *sc = ASPEED_SBC_GET_CLASS(dev);
+
+ if (sc->has_otp) {
+ object_property_set_int(OBJECT(&s->otp), "size",
+ OTP_MEMORY_SIZE, &error_abort);
+ if (!qdev_realize(DEVICE(&s->otp), NULL, errp)) {
+ return;
+ }
+ }
memory_region_init_io(&s->iomem, OBJECT(s), &aspeed_sbc_ops, s,
TYPE_ASPEED_SBC, 0x1000);
@@ -155,6 +333,7 @@ static const TypeInfo aspeed_sbc_info = {
.name = TYPE_ASPEED_SBC,
.parent = TYPE_SYS_BUS_DEVICE,
.instance_size = sizeof(AspeedSBCState),
+ .instance_init = aspeed_sbc_instance_init,
.class_init = aspeed_sbc_class_init,
.class_size = sizeof(AspeedSBCClass)
};
@@ -162,8 +341,10 @@ static const TypeInfo aspeed_sbc_info = {
static void aspeed_ast2600_sbc_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
+ AspeedSBCClass *sc = ASPEED_SBC_CLASS(klass);
dc->desc = "AST2600 Secure Boot Controller";
+ sc->has_otp = true;
}
static const TypeInfo aspeed_ast2600_sbc_info = {
@@ -172,9 +353,25 @@ static const TypeInfo aspeed_ast2600_sbc_info = {
.class_init = aspeed_ast2600_sbc_class_init,
};
+static void aspeed_ast10x0_sbc_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ AspeedSBCClass *sc = ASPEED_SBC_CLASS(klass);
+
+ dc->desc = "AST10X0 Secure Boot Controller";
+ sc->has_otp = true;
+}
+
+static const TypeInfo aspeed_ast10x0_sbc_info = {
+ .name = TYPE_ASPEED_AST10X0_SBC,
+ .parent = TYPE_ASPEED_SBC,
+ .class_init = aspeed_ast10x0_sbc_class_init,
+};
+
static void aspeed_sbc_register_types(void)
{
type_register_static(&aspeed_ast2600_sbc_info);
+ type_register_static(&aspeed_ast10x0_sbc_info);
type_register_static(&aspeed_sbc_info);
}
diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c
index 4930e00..a0ab5ee 100644
--- a/hw/misc/aspeed_scu.c
+++ b/hw/misc/aspeed_scu.c
@@ -91,6 +91,7 @@
#define BMC_DEV_ID TO_REG(0x1A4)
#define AST2600_PROT_KEY TO_REG(0x00)
+#define AST2600_PROT_KEY2 TO_REG(0x10)
#define AST2600_SILICON_REV TO_REG(0x04)
#define AST2600_SILICON_REV2 TO_REG(0x14)
#define AST2600_SYS_RST_CTRL TO_REG(0x40)
@@ -176,6 +177,7 @@
#define AST2700_SCUIO_UARTCLK_GEN TO_REG(0x330)
#define AST2700_SCUIO_HUARTCLK_GEN TO_REG(0x334)
#define AST2700_SCUIO_CLK_DUTY_MEAS_RST TO_REG(0x388)
+#define AST2700_SCUIO_FREQ_CNT_CTL TO_REG(0x3A0)
#define SCU_IO_REGION_SIZE 0x1000
@@ -722,6 +724,8 @@ static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset,
int reg = TO_REG(offset);
/* Truncate here so bitwise operations below behave as expected */
uint32_t data = data64;
+ bool prot_data_state = data == ASPEED_SCU_PROT_KEY;
+ bool unlocked = s->regs[AST2600_PROT_KEY] && s->regs[AST2600_PROT_KEY2];
if (reg >= ASPEED_AST2600_SCU_NR_REGS) {
qemu_log_mask(LOG_GUEST_ERROR,
@@ -730,15 +734,24 @@ static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset,
return;
}
- if (reg > PROT_KEY && !s->regs[PROT_KEY]) {
+ if ((reg != AST2600_PROT_KEY && reg != AST2600_PROT_KEY2) && !unlocked) {
qemu_log_mask(LOG_GUEST_ERROR, "%s: SCU is locked!\n", __func__);
+ return;
}
trace_aspeed_scu_write(offset, size, data);
switch (reg) {
case AST2600_PROT_KEY:
- s->regs[reg] = (data == ASPEED_SCU_PROT_KEY) ? 1 : 0;
+ /*
+ * Writing a value to SCU000 will modify both protection
+ * registers to each protection register individually.
+ */
+ s->regs[AST2600_PROT_KEY] = prot_data_state;
+ s->regs[AST2600_PROT_KEY2] = prot_data_state;
+ return;
+ case AST2600_PROT_KEY2:
+ s->regs[AST2600_PROT_KEY2] = prot_data_state;
return;
case AST2600_HW_STRAP1:
case AST2600_HW_STRAP2:
@@ -1022,6 +1035,10 @@ static void aspeed_ast2700_scuio_write(void *opaque, hwaddr offset,
s->regs[reg - 1] ^= data;
updated = true;
break;
+ case AST2700_SCUIO_FREQ_CNT_CTL:
+ s->regs[reg] = deposit32(s->regs[reg], 6, 1, !!(data & BIT(1)));
+ updated = true;
+ break;
default:
qemu_log_mask(LOG_GUEST_ERROR,
"%s: Unhandled write at offset 0x%" HWADDR_PRIx "\n",
@@ -1066,6 +1083,7 @@ static const uint32_t ast2700_a0_resets_io[ASPEED_AST2700_SCU_NR_REGS] = {
[AST2700_SCUIO_UARTCLK_GEN] = 0x00014506,
[AST2700_SCUIO_HUARTCLK_GEN] = 0x000145c0,
[AST2700_SCUIO_CLK_DUTY_MEAS_RST] = 0x0c9100d2,
+ [AST2700_SCUIO_FREQ_CNT_CTL] = 0x00000080,
};
static void aspeed_2700_scuio_class_init(ObjectClass *klass, const void *data)
diff --git a/hw/misc/aspeed_sdmc.c b/hw/misc/aspeed_sdmc.c
index f04d993..dff7cc3 100644
--- a/hw/misc/aspeed_sdmc.c
+++ b/hw/misc/aspeed_sdmc.c
@@ -570,6 +570,9 @@ static void aspeed_2700_sdmc_reset(DeviceState *dev)
/* Set ram size bit and defaults values */
s->regs[R_MAIN_CONF] = asc->compute_conf(s, 0);
+ /* Skipping dram init */
+ s->regs[R_MAIN_CONTROL] = BIT(16);
+
if (s->unlocked) {
s->regs[R_2700_PROT] = PROT_UNLOCKED;
}
diff --git a/hw/misc/ivshmem-flat.c b/hw/misc/ivshmem-flat.c
index be28c24..27ee8c9 100644
--- a/hw/misc/ivshmem-flat.c
+++ b/hw/misc/ivshmem-flat.c
@@ -138,6 +138,8 @@ static void ivshmem_flat_remove_peer(IvshmemFTState *s, uint16_t peer_id)
static void ivshmem_flat_add_vector(IvshmemFTState *s, IvshmemPeer *peer,
int vector_fd)
{
+ Error *err = NULL;
+
if (peer->vector_counter >= IVSHMEM_MAX_VECTOR_NUM) {
trace_ivshmem_flat_add_vector_failure(peer->vector_counter,
vector_fd, peer->id);
@@ -154,7 +156,10 @@ static void ivshmem_flat_add_vector(IvshmemFTState *s, IvshmemPeer *peer,
* peer.
*/
peer->vector[peer->vector_counter].id = peer->vector_counter;
- g_unix_set_fd_nonblocking(vector_fd, true, NULL);
+ if (!qemu_set_blocking(vector_fd, false, &err)) {
+ /* FIXME handle the error */
+ warn_report_err(err);
+ }
event_notifier_init_fd(&peer->vector[peer->vector_counter].event_notifier,
vector_fd);
@@ -362,7 +367,7 @@ static bool ivshmem_flat_connect_server(DeviceState *dev, Error **errp)
*
* ivshmem_flat_recv_msg() calls return 'msg' and 'fd'.
*
- * See ./docs/specs/ivshmem-spec.txt for details on the protocol.
+ * See docs/specs/ivshmem-spec.rst for details on the protocol.
*/
/* Step 0 */
diff --git a/hw/misc/ivshmem-pci.c b/hw/misc/ivshmem-pci.c
index 5a10bca..2748db9 100644
--- a/hw/misc/ivshmem-pci.c
+++ b/hw/misc/ivshmem-pci.c
@@ -479,6 +479,11 @@ static void process_msg_shmem(IVShmemState *s, int fd, Error **errp)
struct stat buf;
size_t size;
+ if (fd < 0) {
+ error_setg(errp, "server didn't provide fd with shared memory message");
+ return;
+ }
+
if (s->ivshmem_bar2) {
error_setg(errp, "server sent unexpected shared memory message");
close(fd);
@@ -535,7 +540,12 @@ static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd,
IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd);
event_notifier_init_fd(&peer->eventfds[vector], fd);
- g_unix_set_fd_nonblocking(fd, true, NULL); /* msix/irqfd poll non block */
+
+ /* msix/irqfd poll non block */
+ if (!qemu_set_blocking(fd, false, errp)) {
+ close(fd);
+ return;
+ }
if (posn == s->vm_id) {
setup_interrupt(s, vector, errp);
@@ -553,7 +563,9 @@ static void process_msg(IVShmemState *s, int64_t msg, int fd, Error **errp)
if (msg < -1 || msg > IVSHMEM_MAX_PEERS) {
error_setg(errp, "server sent invalid message %" PRId64, msg);
- close(fd);
+ if (fd >= 0) {
+ close(fd);
+ }
return;
}
diff --git a/hw/misc/max78000_aes.c b/hw/misc/max78000_aes.c
new file mode 100644
index 0000000..d883ddd
--- /dev/null
+++ b/hw/misc/max78000_aes.c
@@ -0,0 +1,229 @@
+/*
+ * MAX78000 AES
+ *
+ * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "trace.h"
+#include "hw/irq.h"
+#include "migration/vmstate.h"
+#include "hw/misc/max78000_aes.h"
+#include "crypto/aes.h"
+
+static void max78000_aes_set_status(Max78000AesState *s)
+{
+ s->status = 0;
+ if (s->result_index >= 16) {
+ s->status |= OUTPUT_FULL;
+ }
+ if (s->result_index == 0) {
+ s->status |= OUTPUT_EMPTY;
+ }
+ if (s->data_index >= 16) {
+ s->status |= INPUT_FULL;
+ }
+ if (s->data_index == 0) {
+ s->status |= INPUT_EMPTY;
+ }
+}
+
+static uint64_t max78000_aes_read(void *opaque, hwaddr addr,
+ unsigned int size)
+{
+ Max78000AesState *s = opaque;
+ switch (addr) {
+ case CTRL:
+ return s->ctrl;
+
+ case STATUS:
+ return s->status;
+
+ case INTFL:
+ return s->intfl;
+
+ case INTEN:
+ return s->inten;
+
+ case FIFO:
+ if (s->result_index >= 4) {
+ s->intfl &= ~DONE;
+ s->result_index -= 4;
+ max78000_aes_set_status(s);
+ return ldl_be_p(&s->result[s->result_index]);
+ } else{
+ return 0;
+ }
+
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%"
+ HWADDR_PRIx "\n", __func__, addr);
+ break;
+
+ }
+ return 0;
+}
+
+static void max78000_aes_do_crypto(Max78000AesState *s)
+{
+ int keylen = 256;
+ uint8_t *keydata = s->key;
+ if ((s->ctrl & KEY_SIZE) == 0) {
+ keylen = 128;
+ keydata += 16;
+ } else if ((s->ctrl & KEY_SIZE) == 1 << 6) {
+ keylen = 192;
+ keydata += 8;
+ }
+
+ /*
+ * The MAX78000 AES engine stores an internal key, which it uses only
+ * for decryption. This results in the slighly odd looking pairs of
+ * set_encrypt and set_decrypt calls below; s->internal_key is
+ * being stored for later use in both cases.
+ */
+ AES_KEY key;
+ if ((s->ctrl & TYPE) == 0) {
+ AES_set_encrypt_key(keydata, keylen, &key);
+ AES_set_decrypt_key(keydata, keylen, &s->internal_key);
+ AES_encrypt(s->data, s->result, &key);
+ s->result_index = 16;
+ } else if ((s->ctrl & TYPE) == 1 << 8) {
+ AES_set_decrypt_key(keydata, keylen, &key);
+ AES_set_decrypt_key(keydata, keylen, &s->internal_key);
+ AES_decrypt(s->data, s->result, &key);
+ s->result_index = 16;
+ } else{
+ AES_decrypt(s->data, s->result, &s->internal_key);
+ s->result_index = 16;
+ }
+ s->intfl |= DONE;
+}
+
+static void max78000_aes_write(void *opaque, hwaddr addr,
+ uint64_t val64, unsigned int size)
+{
+ Max78000AesState *s = opaque;
+ uint32_t val = val64;
+ switch (addr) {
+ case CTRL:
+ if (val & OUTPUT_FLUSH) {
+ s->result_index = 0;
+ val &= ~OUTPUT_FLUSH;
+ }
+ if (val & INPUT_FLUSH) {
+ s->data_index = 0;
+ val &= ~INPUT_FLUSH;
+ }
+ if (val & START) {
+ max78000_aes_do_crypto(s);
+ }
+
+ /* Hardware appears to stay enabled even if 0 written */
+ s->ctrl = val | (s->ctrl & AES_EN);
+ break;
+
+ case FIFO:
+ assert(s->data_index <= 12);
+ stl_be_p(&s->data[12 - s->data_index], val);
+ s->data_index += 4;
+ if (s->data_index >= 16) {
+ s->data_index = 0;
+ max78000_aes_do_crypto(s);
+ }
+ break;
+
+ case KEY_BASE ... KEY_END - 4:
+ stl_be_p(&s->key[(KEY_END - KEY_BASE - 4) - (addr - KEY_BASE)], val);
+ break;
+
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%"
+ HWADDR_PRIx "\n", __func__, addr);
+ break;
+
+ }
+ max78000_aes_set_status(s);
+}
+
+static void max78000_aes_reset_hold(Object *obj, ResetType type)
+{
+ Max78000AesState *s = MAX78000_AES(obj);
+ s->ctrl = 0;
+ s->status = 0;
+ s->intfl = 0;
+ s->inten = 0;
+
+ s->data_index = 0;
+ s->result_index = 0;
+
+ memset(s->data, 0, sizeof(s->data));
+ memset(s->key, 0, sizeof(s->key));
+ memset(s->result, 0, sizeof(s->result));
+ memset(&s->internal_key, 0, sizeof(s->internal_key));
+}
+
+static const MemoryRegionOps max78000_aes_ops = {
+ .read = max78000_aes_read,
+ .write = max78000_aes_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid.min_access_size = 4,
+ .valid.max_access_size = 4,
+};
+
+static const VMStateDescription vmstate_max78000_aes = {
+ .name = TYPE_MAX78000_AES,
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (const VMStateField[]) {
+ VMSTATE_UINT32(ctrl, Max78000AesState),
+ VMSTATE_UINT32(status, Max78000AesState),
+ VMSTATE_UINT32(intfl, Max78000AesState),
+ VMSTATE_UINT32(inten, Max78000AesState),
+ VMSTATE_UINT8_ARRAY(data, Max78000AesState, 16),
+ VMSTATE_UINT8_ARRAY(key, Max78000AesState, 32),
+ VMSTATE_UINT8_ARRAY(result, Max78000AesState, 16),
+ VMSTATE_UINT32_ARRAY(internal_key.rd_key, Max78000AesState, 60),
+ VMSTATE_INT32(internal_key.rounds, Max78000AesState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void max78000_aes_init(Object *obj)
+{
+ Max78000AesState *s = MAX78000_AES(obj);
+ sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq);
+
+ memory_region_init_io(&s->mmio, obj, &max78000_aes_ops, s,
+ TYPE_MAX78000_AES, 0xc00);
+ sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio);
+
+}
+
+static void max78000_aes_class_init(ObjectClass *klass, const void *data)
+{
+ ResettableClass *rc = RESETTABLE_CLASS(klass);
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ rc->phases.hold = max78000_aes_reset_hold;
+ dc->vmsd = &vmstate_max78000_aes;
+
+}
+
+static const TypeInfo max78000_aes_info = {
+ .name = TYPE_MAX78000_AES,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(Max78000AesState),
+ .instance_init = max78000_aes_init,
+ .class_init = max78000_aes_class_init,
+};
+
+static void max78000_aes_register_types(void)
+{
+ type_register_static(&max78000_aes_info);
+}
+
+type_init(max78000_aes_register_types)
diff --git a/hw/misc/max78000_gcr.c b/hw/misc/max78000_gcr.c
new file mode 100644
index 0000000..fbbc92c
--- /dev/null
+++ b/hw/misc/max78000_gcr.c
@@ -0,0 +1,351 @@
+/*
+ * MAX78000 Global Control Registers
+ *
+ * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "trace.h"
+#include "hw/irq.h"
+#include "system/runstate.h"
+#include "migration/vmstate.h"
+#include "hw/qdev-properties.h"
+#include "hw/char/max78000_uart.h"
+#include "hw/misc/max78000_trng.h"
+#include "hw/misc/max78000_aes.h"
+#include "hw/misc/max78000_gcr.h"
+
+
+static void max78000_gcr_reset_hold(Object *obj, ResetType type)
+{
+ DeviceState *dev = DEVICE(obj);
+ Max78000GcrState *s = MAX78000_GCR(dev);
+ s->sysctrl = 0x21002;
+ s->rst0 = 0;
+ /* All clocks are always ready */
+ s->clkctrl = 0x3e140008;
+ s->pm = 0x3f000;
+ s->pclkdiv = 0;
+ s->pclkdis0 = 0xffffffff;
+ s->memctrl = 0x5;
+ s->memz = 0;
+ s->sysst = 0;
+ s->rst1 = 0;
+ s->pckdis1 = 0xffffffff;
+ s->eventen = 0;
+ s->revision = 0xa1;
+ s->sysie = 0;
+ s->eccerr = 0;
+ s->ecced = 0;
+ s->eccie = 0;
+ s->eccaddr = 0;
+}
+
+static uint64_t max78000_gcr_read(void *opaque, hwaddr addr,
+ unsigned int size)
+{
+ Max78000GcrState *s = opaque;
+
+ switch (addr) {
+ case SYSCTRL:
+ return s->sysctrl;
+
+ case RST0:
+ return s->rst0;
+
+ case CLKCTRL:
+ return s->clkctrl;
+
+ case PM:
+ return s->pm;
+
+ case PCLKDIV:
+ return s->pclkdiv;
+
+ case PCLKDIS0:
+ return s->pclkdis0;
+
+ case MEMCTRL:
+ return s->memctrl;
+
+ case MEMZ:
+ return s->memz;
+
+ case SYSST:
+ return s->sysst;
+
+ case RST1:
+ return s->rst1;
+
+ case PCKDIS1:
+ return s->pckdis1;
+
+ case EVENTEN:
+ return s->eventen;
+
+ case REVISION:
+ return s->revision;
+
+ case SYSIE:
+ return s->sysie;
+
+ case ECCERR:
+ return s->eccerr;
+
+ case ECCED:
+ return s->ecced;
+
+ case ECCIE:
+ return s->eccie;
+
+ case ECCADDR:
+ return s->eccaddr;
+
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%"
+ HWADDR_PRIx "\n", __func__, addr);
+ return 0;
+
+ }
+}
+
+static void max78000_gcr_write(void *opaque, hwaddr addr,
+ uint64_t val64, unsigned int size)
+{
+ Max78000GcrState *s = opaque;
+ uint32_t val = val64;
+ uint8_t zero[0xc000] = {0};
+ switch (addr) {
+ case SYSCTRL:
+ /* Checksum calculations always pass immediately */
+ s->sysctrl = (val & 0x30000) | 0x1002;
+ break;
+
+ case RST0:
+ if (val & SYSTEM_RESET) {
+ qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+ }
+ if (val & PERIPHERAL_RESET) {
+ /*
+ * Peripheral reset resets all peripherals. The CPU
+ * retains its state. The GPIO, watchdog timers, AoD,
+ * RAM retention, and general control registers (GCR),
+ * including the clock configuration, are unaffected.
+ */
+ val = UART2_RESET | UART1_RESET | UART0_RESET |
+ ADC_RESET | CNN_RESET | TRNG_RESET |
+ RTC_RESET | I2C0_RESET | SPI1_RESET |
+ TMR3_RESET | TMR2_RESET | TMR1_RESET |
+ TMR0_RESET | WDT0_RESET | DMA_RESET;
+ }
+ if (val & SOFT_RESET) {
+ /* Soft reset also resets GPIO */
+ val = UART2_RESET | UART1_RESET | UART0_RESET |
+ ADC_RESET | CNN_RESET | TRNG_RESET |
+ RTC_RESET | I2C0_RESET | SPI1_RESET |
+ TMR3_RESET | TMR2_RESET | TMR1_RESET |
+ TMR0_RESET | GPIO1_RESET | GPIO0_RESET |
+ DMA_RESET;
+ }
+ if (val & UART2_RESET) {
+ device_cold_reset(s->uart2);
+ }
+ if (val & UART1_RESET) {
+ device_cold_reset(s->uart1);
+ }
+ if (val & UART0_RESET) {
+ device_cold_reset(s->uart0);
+ }
+ if (val & TRNG_RESET) {
+ device_cold_reset(s->trng);
+ }
+ if (val & AES_RESET) {
+ device_cold_reset(s->aes);
+ }
+ /* TODO: As other devices are implemented, add them here */
+ break;
+
+ case CLKCTRL:
+ s->clkctrl = val | SYSCLK_RDY;
+ break;
+
+ case PM:
+ s->pm = val;
+ break;
+
+ case PCLKDIV:
+ s->pclkdiv = val;
+ break;
+
+ case PCLKDIS0:
+ s->pclkdis0 = val;
+ break;
+
+ case MEMCTRL:
+ s->memctrl = val;
+ break;
+
+ case MEMZ:
+ if (val & ram0) {
+ address_space_write(&s->sram_as, SYSRAM0_START,
+ MEMTXATTRS_UNSPECIFIED, zero, 0x8000);
+ }
+ if (val & ram1) {
+ address_space_write(&s->sram_as, SYSRAM1_START,
+ MEMTXATTRS_UNSPECIFIED, zero, 0x8000);
+ }
+ if (val & ram2) {
+ address_space_write(&s->sram_as, SYSRAM2_START,
+ MEMTXATTRS_UNSPECIFIED, zero, 0xC000);
+ }
+ if (val & ram3) {
+ address_space_write(&s->sram_as, SYSRAM3_START,
+ MEMTXATTRS_UNSPECIFIED, zero, 0x4000);
+ }
+ break;
+
+ case SYSST:
+ s->sysst = val;
+ break;
+
+ case RST1:
+ /* TODO: As other devices are implemented, add them here */
+ s->rst1 = val;
+ break;
+
+ case PCKDIS1:
+ s->pckdis1 = val;
+ break;
+
+ case EVENTEN:
+ s->eventen = val;
+ break;
+
+ case REVISION:
+ s->revision = val;
+ break;
+
+ case SYSIE:
+ s->sysie = val;
+ break;
+
+ case ECCERR:
+ s->eccerr = val;
+ break;
+
+ case ECCED:
+ s->ecced = val;
+ break;
+
+ case ECCIE:
+ s->eccie = val;
+ break;
+
+ case ECCADDR:
+ s->eccaddr = val;
+ break;
+
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%" HWADDR_PRIx "\n",
+ __func__, addr);
+ break;
+
+ }
+}
+
+static const Property max78000_gcr_properties[] = {
+ DEFINE_PROP_LINK("sram", Max78000GcrState, sram,
+ TYPE_MEMORY_REGION, MemoryRegion*),
+ DEFINE_PROP_LINK("uart0", Max78000GcrState, uart0,
+ TYPE_MAX78000_UART, DeviceState*),
+ DEFINE_PROP_LINK("uart1", Max78000GcrState, uart1,
+ TYPE_MAX78000_UART, DeviceState*),
+ DEFINE_PROP_LINK("uart2", Max78000GcrState, uart2,
+ TYPE_MAX78000_UART, DeviceState*),
+ DEFINE_PROP_LINK("trng", Max78000GcrState, trng,
+ TYPE_MAX78000_TRNG, DeviceState*),
+ DEFINE_PROP_LINK("aes", Max78000GcrState, aes,
+ TYPE_MAX78000_AES, DeviceState*),
+};
+
+static const MemoryRegionOps max78000_gcr_ops = {
+ .read = max78000_gcr_read,
+ .write = max78000_gcr_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid.min_access_size = 4,
+ .valid.max_access_size = 4,
+};
+
+static const VMStateDescription vmstate_max78000_gcr = {
+ .name = TYPE_MAX78000_GCR,
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (const VMStateField[]) {
+ VMSTATE_UINT32(sysctrl, Max78000GcrState),
+ VMSTATE_UINT32(rst0, Max78000GcrState),
+ VMSTATE_UINT32(clkctrl, Max78000GcrState),
+ VMSTATE_UINT32(pm, Max78000GcrState),
+ VMSTATE_UINT32(pclkdiv, Max78000GcrState),
+ VMSTATE_UINT32(pclkdis0, Max78000GcrState),
+ VMSTATE_UINT32(memctrl, Max78000GcrState),
+ VMSTATE_UINT32(memz, Max78000GcrState),
+ VMSTATE_UINT32(sysst, Max78000GcrState),
+ VMSTATE_UINT32(rst1, Max78000GcrState),
+ VMSTATE_UINT32(pckdis1, Max78000GcrState),
+ VMSTATE_UINT32(eventen, Max78000GcrState),
+ VMSTATE_UINT32(revision, Max78000GcrState),
+ VMSTATE_UINT32(sysie, Max78000GcrState),
+ VMSTATE_UINT32(eccerr, Max78000GcrState),
+ VMSTATE_UINT32(ecced, Max78000GcrState),
+ VMSTATE_UINT32(eccie, Max78000GcrState),
+ VMSTATE_UINT32(eccaddr, Max78000GcrState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void max78000_gcr_init(Object *obj)
+{
+ Max78000GcrState *s = MAX78000_GCR(obj);
+
+ memory_region_init_io(&s->mmio, obj, &max78000_gcr_ops, s,
+ TYPE_MAX78000_GCR, 0x400);
+ sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio);
+
+}
+
+static void max78000_gcr_realize(DeviceState *dev, Error **errp)
+{
+ Max78000GcrState *s = MAX78000_GCR(dev);
+
+ address_space_init(&s->sram_as, s->sram, "sram");
+}
+
+static void max78000_gcr_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ ResettableClass *rc = RESETTABLE_CLASS(klass);
+
+ device_class_set_props(dc, max78000_gcr_properties);
+
+ dc->realize = max78000_gcr_realize;
+ dc->vmsd = &vmstate_max78000_gcr;
+ rc->phases.hold = max78000_gcr_reset_hold;
+}
+
+static const TypeInfo max78000_gcr_info = {
+ .name = TYPE_MAX78000_GCR,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(Max78000GcrState),
+ .instance_init = max78000_gcr_init,
+ .class_init = max78000_gcr_class_init,
+};
+
+static void max78000_gcr_register_types(void)
+{
+ type_register_static(&max78000_gcr_info);
+}
+
+type_init(max78000_gcr_register_types)
diff --git a/hw/misc/max78000_icc.c b/hw/misc/max78000_icc.c
new file mode 100644
index 0000000..6f7d2b2
--- /dev/null
+++ b/hw/misc/max78000_icc.c
@@ -0,0 +1,120 @@
+/*
+ * MAX78000 Instruction Cache
+ *
+ * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "trace.h"
+#include "hw/irq.h"
+#include "migration/vmstate.h"
+#include "hw/misc/max78000_icc.h"
+
+
+static uint64_t max78000_icc_read(void *opaque, hwaddr addr,
+ unsigned int size)
+{
+ Max78000IccState *s = opaque;
+ switch (addr) {
+ case ICC_INFO:
+ return s->info;
+
+ case ICC_SZ:
+ return s->sz;
+
+ case ICC_CTRL:
+ return s->ctrl;
+
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Bad offset 0x%" HWADDR_PRIx "\n",
+ __func__, addr);
+ return 0;
+
+ }
+}
+
+static void max78000_icc_write(void *opaque, hwaddr addr,
+ uint64_t val64, unsigned int size)
+{
+ Max78000IccState *s = opaque;
+
+ switch (addr) {
+ case ICC_CTRL:
+ s->ctrl = 0x10000 | (val64 & 1);
+ break;
+
+ case ICC_INVALIDATE:
+ break;
+
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Bad offset 0x%" HWADDR_PRIx "\n",
+ __func__, addr);
+ break;
+ }
+}
+
+static const MemoryRegionOps max78000_icc_ops = {
+ .read = max78000_icc_read,
+ .write = max78000_icc_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid.min_access_size = 4,
+ .valid.max_access_size = 4,
+};
+
+static const VMStateDescription max78000_icc_vmstate = {
+ .name = TYPE_MAX78000_ICC,
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (const VMStateField[]) {
+ VMSTATE_UINT32(info, Max78000IccState),
+ VMSTATE_UINT32(sz, Max78000IccState),
+ VMSTATE_UINT32(ctrl, Max78000IccState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void max78000_icc_reset_hold(Object *obj, ResetType type)
+{
+ Max78000IccState *s = MAX78000_ICC(obj);
+ s->info = 0;
+ s->sz = 0x10000010;
+ s->ctrl = 0x10000;
+}
+
+static void max78000_icc_init(Object *obj)
+{
+ Max78000IccState *s = MAX78000_ICC(obj);
+
+ memory_region_init_io(&s->mmio, obj, &max78000_icc_ops, s,
+ TYPE_MAX78000_ICC, 0x800);
+ sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio);
+}
+
+static void max78000_icc_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ ResettableClass *rc = RESETTABLE_CLASS(klass);
+
+ rc->phases.hold = max78000_icc_reset_hold;
+ dc->vmsd = &max78000_icc_vmstate;
+}
+
+static const TypeInfo max78000_icc_info = {
+ .name = TYPE_MAX78000_ICC,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(Max78000IccState),
+ .instance_init = max78000_icc_init,
+ .class_init = max78000_icc_class_init,
+};
+
+static void max78000_icc_register_types(void)
+{
+ type_register_static(&max78000_icc_info);
+}
+
+type_init(max78000_icc_register_types)
diff --git a/hw/misc/max78000_trng.c b/hw/misc/max78000_trng.c
new file mode 100644
index 0000000..ecdaef5
--- /dev/null
+++ b/hw/misc/max78000_trng.c
@@ -0,0 +1,139 @@
+/*
+ * MAX78000 True Random Number Generator
+ *
+ * Copyright (c) 2025 Jackson Donaldson <jcksn@duck.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "trace.h"
+#include "hw/irq.h"
+#include "migration/vmstate.h"
+#include "hw/misc/max78000_trng.h"
+#include "qemu/guest-random.h"
+
+static uint64_t max78000_trng_read(void *opaque, hwaddr addr,
+ unsigned int size)
+{
+ uint32_t data;
+
+ Max78000TrngState *s = opaque;
+ switch (addr) {
+ case CTRL:
+ return s->ctrl;
+
+ case STATUS:
+ return 1;
+
+ case DATA:
+ /*
+ * When interrupts are enabled, reading random data should cause a
+ * new interrupt to be generated; since there's always a random number
+ * available, we could qemu_set_irq(s->irq, s->ctrl & RND_IE). Because
+ * of how trng_write is set up, this is always a noop, so don't
+ */
+ qemu_guest_getrandom_nofail(&data, sizeof(data));
+ return data;
+
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%"
+ HWADDR_PRIx "\n", __func__, addr);
+ break;
+ }
+ return 0;
+}
+
+static void max78000_trng_write(void *opaque, hwaddr addr,
+ uint64_t val64, unsigned int size)
+{
+ Max78000TrngState *s = opaque;
+ uint32_t val = val64;
+ switch (addr) {
+ case CTRL:
+ /* TODO: implement AES keygen */
+ s->ctrl = val;
+
+ /*
+ * This device models random number generation as taking 0 time.
+ * A new random number is always available, so the condition for the
+ * RND interrupt is always fulfilled; we can just set irq to 1.
+ */
+ if (val & RND_IE) {
+ qemu_set_irq(s->irq, 1);
+ } else{
+ qemu_set_irq(s->irq, 0);
+ }
+ break;
+
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%"
+ HWADDR_PRIx "\n", __func__, addr);
+ break;
+ }
+}
+
+static void max78000_trng_reset_hold(Object *obj, ResetType type)
+{
+ Max78000TrngState *s = MAX78000_TRNG(obj);
+ s->ctrl = 0;
+ s->status = 0;
+ s->data = 0;
+}
+
+static const MemoryRegionOps max78000_trng_ops = {
+ .read = max78000_trng_read,
+ .write = max78000_trng_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid.min_access_size = 4,
+ .valid.max_access_size = 4,
+};
+
+static const VMStateDescription max78000_trng_vmstate = {
+ .name = TYPE_MAX78000_TRNG,
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (const VMStateField[]) {
+ VMSTATE_UINT32(ctrl, Max78000TrngState),
+ VMSTATE_UINT32(status, Max78000TrngState),
+ VMSTATE_UINT32(data, Max78000TrngState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void max78000_trng_init(Object *obj)
+{
+ Max78000TrngState *s = MAX78000_TRNG(obj);
+ sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq);
+
+ memory_region_init_io(&s->mmio, obj, &max78000_trng_ops, s,
+ TYPE_MAX78000_TRNG, 0x1000);
+ sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio);
+
+}
+
+static void max78000_trng_class_init(ObjectClass *klass, const void *data)
+{
+ ResettableClass *rc = RESETTABLE_CLASS(klass);
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ rc->phases.hold = max78000_trng_reset_hold;
+ dc->vmsd = &max78000_trng_vmstate;
+
+}
+
+static const TypeInfo max78000_trng_info = {
+ .name = TYPE_MAX78000_TRNG,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(Max78000TrngState),
+ .instance_init = max78000_trng_init,
+ .class_init = max78000_trng_class_init,
+};
+
+static void max78000_trng_register_types(void)
+{
+ type_register_static(&max78000_trng_info);
+}
+
+type_init(max78000_trng_register_types)
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
index 6d47de4..b1d8d8e 100644
--- a/hw/misc/meson.build
+++ b/hw/misc/meson.build
@@ -70,6 +70,10 @@ system_ss.add(when: 'CONFIG_IMX', if_true: files(
'imx_ccm.c',
'imx_rngc.c',
))
+system_ss.add(when: 'CONFIG_MAX78000_AES', if_true: files('max78000_aes.c'))
+system_ss.add(when: 'CONFIG_MAX78000_GCR', if_true: files('max78000_gcr.c'))
+system_ss.add(when: 'CONFIG_MAX78000_ICC', if_true: files('max78000_icc.c'))
+system_ss.add(when: 'CONFIG_MAX78000_TRNG', if_true: files('max78000_trng.c'))
system_ss.add(when: 'CONFIG_NPCM7XX', if_true: files(
'npcm_clk.c',
'npcm_gcr.c',
diff --git a/hw/misc/stm32_rcc.c b/hw/misc/stm32_rcc.c
index 94e8dae..5815b3e 100644
--- a/hw/misc/stm32_rcc.c
+++ b/hw/misc/stm32_rcc.c
@@ -60,7 +60,7 @@ static void stm32_rcc_write(void *opaque, hwaddr addr,
uint32_t value = val64;
uint32_t prev_value, new_value, irq_offset;
- trace_stm32_rcc_write(value, addr);
+ trace_stm32_rcc_write(addr, value);
if (addr > STM32_RCC_DCKCFGR2) {
qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%"HWADDR_PRIx"\n",
diff --git a/hw/misc/trace-events b/hw/misc/trace-events
index 4383808..eeb9243 100644
--- a/hw/misc/trace-events
+++ b/hw/misc/trace-events
@@ -90,6 +90,12 @@ slavio_sysctrl_mem_readl(uint32_t ret) "Read system control 0x%08x"
slavio_led_mem_writew(uint32_t val) "Write diagnostic LED 0x%04x"
slavio_led_mem_readw(uint32_t ret) "Read diagnostic LED 0x%04x"
+# aspeed_sbc.c
+aspeed_sbc_ignore_cmd(uint32_t cmd) "Ignoring command 0x%" PRIx32
+aspeed_sbc_handle_cmd(uint32_t cmd, uint32_t addr, bool ret) "Handling command 0x%" PRIx32 " for OTP addr 0x%" PRIx32 " Result: %d"
+aspeed_sbc_otp_read(uint32_t addr, uint32_t value) "OTP Memory read: addr 0x%" PRIx32 " value 0x%" PRIx32
+aspeed_sbc_otp_prog(uint32_t addr, uint32_t value) "OTP Memory write: addr 0x%" PRIx32 " value 0x%" PRIx32
+
# aspeed_scu.c
aspeed_scu_write(uint64_t offset, unsigned size, uint32_t data) "To 0x%" PRIx64 " of size %u: 0x%" PRIx32
aspeed_scu_read(uint64_t offset, unsigned size, uint32_t data) "To 0x%" PRIx64 " of size %u: 0x%" PRIx32
@@ -302,6 +308,14 @@ aspeed_peci_read(uint64_t offset, uint64_t data) "offset 0x%" PRIx64 " data 0x%"
aspeed_peci_write(uint64_t offset, uint64_t data) "offset 0x%" PRIx64 " data 0x%" PRIx64
aspeed_peci_raise_interrupt(uint32_t ctrl, uint32_t status) "ctrl 0x%" PRIx32 " status 0x%" PRIx32
+# aspeed_hace.c
+aspeed_hace_read(uint64_t offset, uint64_t data) "offset 0x%" PRIx64 " data 0x%" PRIx64
+aspeed_hace_write(uint64_t offset, uint64_t data) "offset 0x%" PRIx64 " data 0x%" PRIx64
+aspeed_hace_hash_sg(int index, uint64_t list_addr, uint64_t buf_addr, uint32_t len) "%d: list_addr 0x%" PRIx64 " buf_addr 0x%" PRIx64 " len 0x%" PRIx32
+aspeed_hace_hash_addr(const char *s, uint64_t addr) "%s: 0x%" PRIx64
+aspeed_hace_hash_execute_acc_mode(bool final_request) "final request: %d"
+aspeed_hace_hexdump(const char *desc, uint32_t offset, char *s) "%s: 0x%08x: %s"
+
# bcm2835_property.c
bcm2835_mbox_property(uint32_t tag, uint32_t bufsize, size_t resplen) "mbox property tag:0x%08x in_sz:%u out_sz:%zu"
diff --git a/hw/misc/xlnx-versal-cframe-reg.c b/hw/misc/xlnx-versal-cframe-reg.c
index 1ce083e..95e167b 100644
--- a/hw/misc/xlnx-versal-cframe-reg.c
+++ b/hw/misc/xlnx-versal-cframe-reg.c
@@ -693,6 +693,14 @@ static void cframe_reg_init(Object *obj)
fifo32_create(&s->new_f_data, FRAME_NUM_WORDS);
}
+static void cframe_reg_finalize(Object *obj)
+{
+ XlnxVersalCFrameReg *s = XLNX_VERSAL_CFRAME_REG(obj);
+
+ fifo32_destroy(&s->new_f_data);
+ g_tree_destroy(s->cframes);
+}
+
static const VMStateDescription vmstate_cframe = {
.name = "cframe",
.version_id = 1,
@@ -833,6 +841,7 @@ static const TypeInfo cframe_reg_info = {
.instance_size = sizeof(XlnxVersalCFrameReg),
.class_init = cframe_reg_class_init,
.instance_init = cframe_reg_init,
+ .instance_finalize = cframe_reg_finalize,
.interfaces = (const InterfaceInfo[]) {
{ TYPE_XLNX_CFI_IF },
{ }
diff --git a/hw/misc/xlnx-versal-crl.c b/hw/misc/xlnx-versal-crl.c
index 08ff2fc..10e6af0 100644
--- a/hw/misc/xlnx-versal-crl.c
+++ b/hw/misc/xlnx-versal-crl.c
@@ -1,16 +1,13 @@
/*
* QEMU model of the Clock-Reset-LPD (CRL).
*
- * Copyright (c) 2022 Advanced Micro Devices, Inc.
+ * Copyright (c) 2022-2025 Advanced Micro Devices, Inc.
* SPDX-License-Identifier: GPL-2.0-or-later
*
* Written by Edgar E. Iglesias <edgar.iglesias@amd.com>
*/
#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu/log.h"
-#include "qemu/bitops.h"
#include "migration/vmstate.h"
#include "hw/qdev-properties.h"
#include "hw/sysbus.h"
@@ -58,90 +55,144 @@ static uint64_t crl_disable_prew(RegisterInfo *reg, uint64_t val64)
return 0;
}
-static void crl_reset_dev(XlnxVersalCRL *s, DeviceState *dev,
- bool rst_old, bool rst_new)
+static DeviceState **versal_decode_periph_rst(XlnxVersalCRLBase *s,
+ hwaddr addr, size_t *count)
{
- device_cold_reset(dev);
-}
+ size_t idx;
+ XlnxVersalCRL *xvc = XLNX_VERSAL_CRL(s);
-static void crl_reset_cpu(XlnxVersalCRL *s, ARMCPU *armcpu,
- bool rst_old, bool rst_new)
-{
- if (rst_new) {
- arm_set_cpu_off(arm_cpu_mp_affinity(armcpu));
- } else {
- arm_set_cpu_on_and_reset(arm_cpu_mp_affinity(armcpu));
- }
-}
+ *count = 1;
-#define REGFIELD_RESET(type, s, reg, f, new_val, dev) { \
- bool old_f = ARRAY_FIELD_EX32((s)->regs, reg, f); \
- bool new_f = FIELD_EX32(new_val, reg, f); \
- \
- /* Detect edges. */ \
- if (dev && old_f != new_f) { \
- crl_reset_ ## type(s, dev, old_f, new_f); \
- } \
-}
+ switch (addr) {
+ case A_RST_CPU_R5:
+ return xvc->cfg.rpu;
-static uint64_t crl_rst_r5_prew(RegisterInfo *reg, uint64_t val64)
-{
- XlnxVersalCRL *s = XLNX_VERSAL_CRL(reg->opaque);
+ case A_RST_ADMA:
+ /* A single register fans out to all DMA reset inputs */
+ *count = ARRAY_SIZE(xvc->cfg.adma);
+ return xvc->cfg.adma;
- REGFIELD_RESET(cpu, s, RST_CPU_R5, RESET_CPU0, val64, s->cfg.cpu_r5[0]);
- REGFIELD_RESET(cpu, s, RST_CPU_R5, RESET_CPU1, val64, s->cfg.cpu_r5[1]);
- return val64;
-}
+ case A_RST_UART0 ... A_RST_UART1:
+ idx = (addr - A_RST_UART0) / sizeof(uint32_t);
+ return xvc->cfg.uart + idx;
-static uint64_t crl_rst_adma_prew(RegisterInfo *reg, uint64_t val64)
-{
- XlnxVersalCRL *s = XLNX_VERSAL_CRL(reg->opaque);
- int i;
+ case A_RST_GEM0 ... A_RST_GEM1:
+ idx = (addr - A_RST_GEM0) / sizeof(uint32_t);
+ return xvc->cfg.gem + idx;
- /* A single register fans out to all ADMA reset inputs. */
- for (i = 0; i < ARRAY_SIZE(s->cfg.adma); i++) {
- REGFIELD_RESET(dev, s, RST_ADMA, RESET, val64, s->cfg.adma[i]);
+ case A_RST_USB0:
+ return xvc->cfg.usb;
+
+ default:
+ /* invalid or unimplemented */
+ g_assert_not_reached();
}
- return val64;
}
-static uint64_t crl_rst_uart0_prew(RegisterInfo *reg, uint64_t val64)
+static DeviceState **versal2_decode_periph_rst(XlnxVersalCRLBase *s,
+ hwaddr addr, size_t *count)
{
- XlnxVersalCRL *s = XLNX_VERSAL_CRL(reg->opaque);
+ size_t idx;
+ XlnxVersal2CRL *xvc = XLNX_VERSAL2_CRL(s);
- REGFIELD_RESET(dev, s, RST_UART0, RESET, val64, s->cfg.uart[0]);
- return val64;
-}
+ *count = 1;
-static uint64_t crl_rst_uart1_prew(RegisterInfo *reg, uint64_t val64)
-{
- XlnxVersalCRL *s = XLNX_VERSAL_CRL(reg->opaque);
+ switch (addr) {
+ case A_VERSAL2_RST_RPU_A ... A_VERSAL2_RST_RPU_E:
+ idx = (addr - A_VERSAL2_RST_RPU_A) / sizeof(uint32_t);
+ idx *= 2; /* two RPUs per RST_RPU_x registers */
+ return xvc->cfg.rpu + idx;
- REGFIELD_RESET(dev, s, RST_UART1, RESET, val64, s->cfg.uart[1]);
- return val64;
-}
+ case A_VERSAL2_RST_ADMA:
+ /* A single register fans out to all DMA reset inputs */
+ *count = ARRAY_SIZE(xvc->cfg.adma);
+ return xvc->cfg.adma;
-static uint64_t crl_rst_gem0_prew(RegisterInfo *reg, uint64_t val64)
-{
- XlnxVersalCRL *s = XLNX_VERSAL_CRL(reg->opaque);
+ case A_VERSAL2_RST_SDMA:
+ *count = ARRAY_SIZE(xvc->cfg.sdma);
+ return xvc->cfg.sdma;
- REGFIELD_RESET(dev, s, RST_GEM0, RESET, val64, s->cfg.gem[0]);
- return val64;
+ case A_VERSAL2_RST_UART0 ... A_VERSAL2_RST_UART1:
+ idx = (addr - A_VERSAL2_RST_UART0) / sizeof(uint32_t);
+ return xvc->cfg.uart + idx;
+
+ case A_VERSAL2_RST_GEM0 ... A_VERSAL2_RST_GEM1:
+ idx = (addr - A_VERSAL2_RST_GEM0) / sizeof(uint32_t);
+ return xvc->cfg.gem + idx;
+
+ case A_VERSAL2_RST_USB0 ... A_VERSAL2_RST_USB1:
+ idx = (addr - A_VERSAL2_RST_USB0) / sizeof(uint32_t);
+ return xvc->cfg.usb + idx;
+
+ case A_VERSAL2_RST_CAN0 ... A_VERSAL2_RST_CAN3:
+ idx = (addr - A_VERSAL2_RST_CAN0) / sizeof(uint32_t);
+ return xvc->cfg.can + idx;
+
+ default:
+ /* invalid or unimplemented */
+ return NULL;
+ }
}
-static uint64_t crl_rst_gem1_prew(RegisterInfo *reg, uint64_t val64)
+static uint64_t crl_rst_cpu_prew(RegisterInfo *reg, uint64_t val64)
{
- XlnxVersalCRL *s = XLNX_VERSAL_CRL(reg->opaque);
+ XlnxVersalCRLBase *s = XLNX_VERSAL_CRL_BASE(reg->opaque);
+ XlnxVersalCRLBaseClass *xvcbc = XLNX_VERSAL_CRL_BASE_GET_CLASS(s);
+ DeviceState **dev;
+ size_t i, count;
+
+ dev = xvcbc->decode_periph_rst(s, reg->access->addr, &count);
+
+ for (i = 0; i < 2; i++) {
+ bool prev, new;
+ uint64_t aff;
+
+ prev = extract32(s->regs[reg->access->addr / 4], i, 1);
+ new = extract32(val64, i, 1);
+
+ if (prev == new) {
+ continue;
+ }
+
+ aff = arm_cpu_mp_affinity(ARM_CPU(dev[i]));
+
+ if (new) {
+ arm_set_cpu_off(aff);
+ } else {
+ arm_set_cpu_on_and_reset(aff);
+ }
+ }
- REGFIELD_RESET(dev, s, RST_GEM1, RESET, val64, s->cfg.gem[1]);
return val64;
}
-static uint64_t crl_rst_usb_prew(RegisterInfo *reg, uint64_t val64)
+static uint64_t crl_rst_dev_prew(RegisterInfo *reg, uint64_t val64)
{
- XlnxVersalCRL *s = XLNX_VERSAL_CRL(reg->opaque);
+ XlnxVersalCRLBase *s = XLNX_VERSAL_CRL_BASE(reg->opaque);
+ XlnxVersalCRLBaseClass *xvcbc = XLNX_VERSAL_CRL_BASE_GET_CLASS(s);
+ DeviceState **dev;
+ bool prev, new;
+ size_t i, count;
+
+ dev = xvcbc->decode_periph_rst(s, reg->access->addr, &count);
+
+ if (dev == NULL) {
+ return val64;
+ }
+
+ prev = s->regs[reg->access->addr / 4] & 0x1;
+ new = val64 & 0x1;
+
+ if (prev == new) {
+ return val64;
+ }
+
+ for (i = 0; i < count; i++) {
+ if (dev[i]) {
+ device_cold_reset(dev[i]);
+ }
+ }
- REGFIELD_RESET(dev, s, RST_USB0, RESET, val64, s->cfg.usb);
return val64;
}
@@ -247,27 +298,27 @@ static const RegisterAccessInfo crl_regs_info[] = {
},{ .name = "RST_CPU_R5", .addr = A_RST_CPU_R5,
.reset = 0x17,
.rsvd = 0x8,
- .pre_write = crl_rst_r5_prew,
+ .pre_write = crl_rst_cpu_prew,
},{ .name = "RST_ADMA", .addr = A_RST_ADMA,
.reset = 0x1,
- .pre_write = crl_rst_adma_prew,
+ .pre_write = crl_rst_dev_prew,
},{ .name = "RST_GEM0", .addr = A_RST_GEM0,
.reset = 0x1,
- .pre_write = crl_rst_gem0_prew,
+ .pre_write = crl_rst_dev_prew,
},{ .name = "RST_GEM1", .addr = A_RST_GEM1,
.reset = 0x1,
- .pre_write = crl_rst_gem1_prew,
+ .pre_write = crl_rst_dev_prew,
},{ .name = "RST_SPARE", .addr = A_RST_SPARE,
.reset = 0x1,
},{ .name = "RST_USB0", .addr = A_RST_USB0,
.reset = 0x1,
- .pre_write = crl_rst_usb_prew,
+ .pre_write = crl_rst_dev_prew,
},{ .name = "RST_UART0", .addr = A_RST_UART0,
.reset = 0x1,
- .pre_write = crl_rst_uart0_prew,
+ .pre_write = crl_rst_dev_prew,
},{ .name = "RST_UART1", .addr = A_RST_UART1,
.reset = 0x1,
- .pre_write = crl_rst_uart1_prew,
+ .pre_write = crl_rst_dev_prew,
},{ .name = "RST_SPI0", .addr = A_RST_SPI0,
.reset = 0x1,
},{ .name = "RST_SPI1", .addr = A_RST_SPI1,
@@ -301,7 +352,247 @@ static const RegisterAccessInfo crl_regs_info[] = {
}
};
-static void crl_reset_enter(Object *obj, ResetType type)
+static const RegisterAccessInfo versal2_crl_regs_info[] = {
+ { .name = "ERR_CTRL", .addr = A_VERSAL2_ERR_CTRL,
+ .reset = 0x1,
+ },{ .name = "WPROT", .addr = A_VERSAL2_WPROT,
+ },{ .name = "RPLL_CTRL", .addr = A_VERSAL2_RPLL_CTRL,
+ .reset = 0x24809,
+ .rsvd = 0xf88c00f6,
+ },{ .name = "RPLL_CFG", .addr = A_VERSAL2_RPLL_CFG,
+ .reset = 0x7e5dcc6c,
+ .rsvd = 0x1801210,
+ },{ .name = "FLXPLL_CTRL", .addr = A_VERSAL2_FLXPLL_CTRL,
+ .reset = 0x24809,
+ .rsvd = 0xf88c00f6,
+ },{ .name = "FLXPLL_CFG", .addr = A_VERSAL2_FLXPLL_CFG,
+ .reset = 0x7e5dcc6c,
+ .rsvd = 0x1801210,
+ },{ .name = "PLL_STATUS", .addr = A_VERSAL2_PLL_STATUS,
+ .reset = 0xf,
+ .rsvd = 0xf0,
+ .ro = 0xf,
+ },{ .name = "RPLL_TO_XPD_CTRL", .addr = A_VERSAL2_RPLL_TO_XPD_CTRL,
+ .reset = 0x2000100,
+ .rsvd = 0xfdfc00ff,
+ },{ .name = "LPX_TOP_SWITCH_CTRL", .addr = A_VERSAL2_LPX_TOP_SWITCH_CTRL,
+ .reset = 0xe000300,
+ .rsvd = 0xf1fc00f8,
+ },{ .name = "LPX_LSBUS_CLK_CTRL", .addr = A_VERSAL2_LPX_LSBUS_CLK_CTRL,
+ .reset = 0x2000800,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "RPU_CLK_CTRL", .addr = A_VERSAL2_RPU_CLK_CTRL,
+ .reset = 0x3f00300,
+ .rsvd = 0xfc0c00f8,
+ },{ .name = "OCM_CLK_CTRL", .addr = A_VERSAL2_OCM_CLK_CTRL,
+ .reset = 0x1e00000,
+ .rsvd = 0xfe1fffff,
+ },{ .name = "IOU_SWITCH_CLK_CTRL", .addr = A_VERSAL2_IOU_SWITCH_CLK_CTRL,
+ .reset = 0x2000500,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "GEM0_REF_CTRL", .addr = A_VERSAL2_GEM0_REF_CTRL,
+ .reset = 0xe000a00,
+ .rsvd = 0xf1fc00f8,
+ },{ .name = "GEM1_REF_CTRL", .addr = A_VERSAL2_GEM1_REF_CTRL,
+ .reset = 0xe000a00,
+ .rsvd = 0xf1fc00f8,
+ },{ .name = "GEM_TSU_REF_CLK_CTRL", .addr = A_VERSAL2_GEM_TSU_REF_CLK_CTRL,
+ .reset = 0x300,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "USB0_BUS_REF_CLK_CTRL",
+ .addr = A_VERSAL2_USB0_BUS_REF_CLK_CTRL,
+ .reset = 0x2001900,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "USB1_BUS_REF_CLK_CTRL",
+ .addr = A_VERSAL2_USB1_BUS_REF_CLK_CTRL,
+ .reset = 0x2001900,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "UART0_REF_CLK_CTRL", .addr = A_VERSAL2_UART0_REF_CLK_CTRL,
+ .reset = 0xc00,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "UART1_REF_CLK_CTRL", .addr = A_VERSAL2_UART1_REF_CLK_CTRL,
+ .reset = 0xc00,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "SPI0_REF_CLK_CTRL", .addr = A_VERSAL2_SPI0_REF_CLK_CTRL,
+ .reset = 0x600,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "SPI1_REF_CLK_CTRL", .addr = A_VERSAL2_SPI1_REF_CLK_CTRL,
+ .reset = 0x600,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "CAN0_REF_2X_CTRL", .addr = A_VERSAL2_CAN0_REF_2X_CTRL,
+ .reset = 0xc00,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "CAN1_REF_2X_CTRL", .addr = A_VERSAL2_CAN1_REF_2X_CTRL,
+ .reset = 0xc00,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "CAN2_REF_2X_CTRL", .addr = A_VERSAL2_CAN2_REF_2X_CTRL,
+ .reset = 0xc00,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "CAN3_REF_2X_CTRL", .addr = A_VERSAL2_CAN3_REF_2X_CTRL,
+ .reset = 0xc00,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "I3C0_REF_CTRL", .addr = A_VERSAL2_I3C0_REF_CTRL,
+ .reset = 0x2000c00,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "I3C1_REF_CTRL", .addr = A_VERSAL2_I3C1_REF_CTRL,
+ .reset = 0x2000c00,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "I3C2_REF_CTRL", .addr = A_VERSAL2_I3C2_REF_CTRL,
+ .reset = 0x2000c00,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "I3C3_REF_CTRL", .addr = A_VERSAL2_I3C3_REF_CTRL,
+ .reset = 0x2000c00,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "I3C4_REF_CTRL", .addr = A_VERSAL2_I3C4_REF_CTRL,
+ .reset = 0x2000c00,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "I3C5_REF_CTRL", .addr = A_VERSAL2_I3C5_REF_CTRL,
+ .reset = 0x2000c00,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "I3C6_REF_CTRL", .addr = A_VERSAL2_I3C6_REF_CTRL,
+ .reset = 0x2000c00,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "I3C7_REF_CTRL", .addr = A_VERSAL2_I3C7_REF_CTRL,
+ .reset = 0x2000c00,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "DBG_LPX_CTRL", .addr = A_VERSAL2_DBG_LPX_CTRL,
+ .reset = 0x300,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "TIMESTAMP_REF_CTRL", .addr = A_VERSAL2_TIMESTAMP_REF_CTRL,
+ .reset = 0x2000c00,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "SAFETY_CHK", .addr = A_VERSAL2_SAFETY_CHK,
+ },{ .name = "ASU_CLK_CTRL", .addr = A_VERSAL2_ASU_CLK_CTRL,
+ .reset = 0x2000f04,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "DBG_TSTMP_CLK_CTRL", .addr = A_VERSAL2_DBG_TSTMP_CLK_CTRL,
+ .reset = 0x300,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "MMI_TOPSW_CLK_CTRL", .addr = A_VERSAL2_MMI_TOPSW_CLK_CTRL,
+ .reset = 0x2000300,
+ .rsvd = 0xfdfc00f8,
+ },{ .name = "WWDT_PLL_CLK_CTRL", .addr = A_VERSAL2_WWDT_PLL_CLK_CTRL,
+ .reset = 0xc00,
+ .rsvd = 0xfffc00f8,
+ },{ .name = "RCLK_CTRL", .addr = A_VERSAL2_RCLK_CTRL,
+ .rsvd = 0xc040,
+ },{ .name = "RST_RPU_A", .addr = A_VERSAL2_RST_RPU_A,
+ .reset = 0x10303,
+ .rsvd = 0xfffefcfc,
+ .pre_write = crl_rst_cpu_prew,
+ },{ .name = "RST_RPU_B", .addr = A_VERSAL2_RST_RPU_B,
+ .reset = 0x10303,
+ .rsvd = 0xfffefcfc,
+ .pre_write = crl_rst_cpu_prew,
+ },{ .name = "RST_RPU_C", .addr = A_VERSAL2_RST_RPU_C,
+ .reset = 0x10303,
+ .rsvd = 0xfffefcfc,
+ .pre_write = crl_rst_cpu_prew,
+ },{ .name = "RST_RPU_D", .addr = A_VERSAL2_RST_RPU_D,
+ .reset = 0x10303,
+ .rsvd = 0xfffefcfc,
+ .pre_write = crl_rst_cpu_prew,
+ },{ .name = "RST_RPU_E", .addr = A_VERSAL2_RST_RPU_E,
+ .reset = 0x10303,
+ .rsvd = 0xfffefcfc,
+ .pre_write = crl_rst_cpu_prew,
+ },{ .name = "RST_RPU_GD_0", .addr = A_VERSAL2_RST_RPU_GD_0,
+ .reset = 0x3,
+ },{ .name = "RST_RPU_GD_1", .addr = A_VERSAL2_RST_RPU_GD_1,
+ .reset = 0x3,
+ },{ .name = "RST_ASU_GD", .addr = A_VERSAL2_RST_ASU_GD,
+ .reset = 0x3,
+ },{ .name = "RST_ADMA", .addr = A_VERSAL2_RST_ADMA,
+ .reset = 0x1,
+ .pre_write = crl_rst_dev_prew,
+ },{ .name = "RST_SDMA", .addr = A_VERSAL2_RST_SDMA,
+ .pre_write = crl_rst_dev_prew,
+ .reset = 0x1,
+ },{ .name = "RST_GEM0", .addr = A_VERSAL2_RST_GEM0,
+ .reset = 0x1,
+ .pre_write = crl_rst_dev_prew,
+ },{ .name = "RST_GEM1", .addr = A_VERSAL2_RST_GEM1,
+ .reset = 0x1,
+ .pre_write = crl_rst_dev_prew,
+ },{ .name = "RST_USB0", .addr = A_VERSAL2_RST_USB0,
+ .reset = 0x1,
+ .pre_write = crl_rst_dev_prew,
+ },{ .name = "RST_USB1", .addr = A_VERSAL2_RST_USB1,
+ .reset = 0x1,
+ .pre_write = crl_rst_dev_prew,
+ },{ .name = "RST_UART0", .addr = A_VERSAL2_RST_UART0,
+ .reset = 0x1,
+ .pre_write = crl_rst_dev_prew,
+ },{ .name = "RST_UART1", .addr = A_VERSAL2_RST_UART1,
+ .reset = 0x1,
+ .pre_write = crl_rst_dev_prew,
+ },{ .name = "RST_SPI0", .addr = A_VERSAL2_RST_SPI0,
+ .reset = 0x1,
+ },{ .name = "RST_SPI1", .addr = A_VERSAL2_RST_SPI1,
+ .reset = 0x1,
+ },{ .name = "RST_CAN0", .addr = A_VERSAL2_RST_CAN0,
+ .reset = 0x1,
+ .pre_write = crl_rst_dev_prew,
+ },{ .name = "RST_CAN1", .addr = A_VERSAL2_RST_CAN1,
+ .reset = 0x1,
+ .pre_write = crl_rst_dev_prew,
+ },{ .name = "RST_CAN2", .addr = A_VERSAL2_RST_CAN2,
+ .reset = 0x1,
+ .pre_write = crl_rst_dev_prew,
+ },{ .name = "RST_CAN3", .addr = A_VERSAL2_RST_CAN3,
+ .reset = 0x1,
+ .pre_write = crl_rst_dev_prew,
+ },{ .name = "RST_I3C0", .addr = A_VERSAL2_RST_I3C0,
+ .reset = 0x1,
+ },{ .name = "RST_I3C1", .addr = A_VERSAL2_RST_I3C1,
+ .reset = 0x1,
+ },{ .name = "RST_I3C2", .addr = A_VERSAL2_RST_I3C2,
+ .reset = 0x1,
+ },{ .name = "RST_I3C3", .addr = A_VERSAL2_RST_I3C3,
+ .reset = 0x1,
+ },{ .name = "RST_I3C4", .addr = A_VERSAL2_RST_I3C4,
+ .reset = 0x1,
+ },{ .name = "RST_I3C5", .addr = A_VERSAL2_RST_I3C5,
+ .reset = 0x1,
+ },{ .name = "RST_I3C6", .addr = A_VERSAL2_RST_I3C6,
+ .reset = 0x1,
+ },{ .name = "RST_I3C7", .addr = A_VERSAL2_RST_I3C7,
+ .reset = 0x1,
+ },{ .name = "RST_DBG_LPX", .addr = A_VERSAL2_RST_DBG_LPX,
+ .reset = 0x3,
+ .rsvd = 0xfc,
+ },{ .name = "RST_GPIO", .addr = A_VERSAL2_RST_GPIO,
+ .reset = 0x1,
+ },{ .name = "RST_TTC", .addr = A_VERSAL2_RST_TTC,
+ .reset = 0xff,
+ },{ .name = "RST_TIMESTAMP", .addr = A_VERSAL2_RST_TIMESTAMP,
+ .reset = 0x1,
+ },{ .name = "RST_SWDT0", .addr = A_VERSAL2_RST_SWDT0,
+ .reset = 0x1,
+ },{ .name = "RST_SWDT1", .addr = A_VERSAL2_RST_SWDT1,
+ .reset = 0x1,
+ },{ .name = "RST_SWDT2", .addr = A_VERSAL2_RST_SWDT2,
+ .reset = 0x1,
+ },{ .name = "RST_SWDT3", .addr = A_VERSAL2_RST_SWDT3,
+ .reset = 0x1,
+ },{ .name = "RST_SWDT4", .addr = A_VERSAL2_RST_SWDT4,
+ .reset = 0x1,
+ },{ .name = "RST_IPI", .addr = A_VERSAL2_RST_IPI,
+ },{ .name = "RST_SYSMON", .addr = A_VERSAL2_RST_SYSMON,
+ },{ .name = "ASU_MB_RST_MODE", .addr = A_VERSAL2_ASU_MB_RST_MODE,
+ .reset = 0x1,
+ .rsvd = 0xf8,
+ },{ .name = "FPX_TOPSW_MUX_CTRL", .addr = A_VERSAL2_FPX_TOPSW_MUX_CTRL,
+ .reset = 0x1,
+ },{ .name = "RST_FPX", .addr = A_VERSAL2_RST_FPX,
+ .reset = 0x3,
+ },{ .name = "RST_MMI", .addr = A_VERSAL2_RST_MMI,
+ .reset = 0x1,
+ },{ .name = "RST_OCM", .addr = A_VERSAL2_RST_OCM,
+ }
+};
+
+static void versal_crl_reset_enter(Object *obj, ResetType type)
{
XlnxVersalCRL *s = XLNX_VERSAL_CRL(obj);
unsigned int i;
@@ -311,7 +602,17 @@ static void crl_reset_enter(Object *obj, ResetType type)
}
}
-static void crl_reset_hold(Object *obj, ResetType type)
+static void versal2_crl_reset_enter(Object *obj, ResetType type)
+{
+ XlnxVersal2CRL *s = XLNX_VERSAL2_CRL(obj);
+ size_t i;
+
+ for (i = 0; i < VERSAL2_CRL_R_MAX; ++i) {
+ register_reset(&s->regs_info[i]);
+ }
+}
+
+static void versal_crl_reset_hold(Object *obj, ResetType type)
{
XlnxVersalCRL *s = XLNX_VERSAL_CRL(obj);
@@ -328,25 +629,27 @@ static const MemoryRegionOps crl_ops = {
},
};
-static void crl_init(Object *obj)
+static void versal_crl_init(Object *obj)
{
XlnxVersalCRL *s = XLNX_VERSAL_CRL(obj);
+ XlnxVersalCRLBase *xvcb = XLNX_VERSAL_CRL_BASE(obj);
SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
int i;
- s->reg_array =
+ xvcb->reg_array =
register_init_block32(DEVICE(obj), crl_regs_info,
ARRAY_SIZE(crl_regs_info),
s->regs_info, s->regs,
&crl_ops,
XLNX_VERSAL_CRL_ERR_DEBUG,
CRL_R_MAX * 4);
- sysbus_init_mmio(sbd, &s->reg_array->mem);
+ xvcb->regs = s->regs;
+ sysbus_init_mmio(sbd, &xvcb->reg_array->mem);
sysbus_init_irq(sbd, &s->irq);
- for (i = 0; i < ARRAY_SIZE(s->cfg.cpu_r5); ++i) {
- object_property_add_link(obj, "cpu_r5[*]", TYPE_ARM_CPU,
- (Object **)&s->cfg.cpu_r5[i],
+ for (i = 0; i < ARRAY_SIZE(s->cfg.rpu); ++i) {
+ object_property_add_link(obj, "rpu[*]", TYPE_ARM_CPU,
+ (Object **)&s->cfg.rpu[i],
qdev_prop_allow_set_link_before_realize,
OBJ_PROP_LINK_STRONG);
}
@@ -372,19 +675,88 @@ static void crl_init(Object *obj)
OBJ_PROP_LINK_STRONG);
}
- object_property_add_link(obj, "usb", TYPE_DEVICE,
- (Object **)&s->cfg.gem[i],
- qdev_prop_allow_set_link_before_realize,
- OBJ_PROP_LINK_STRONG);
+ for (i = 0; i < ARRAY_SIZE(s->cfg.usb); ++i) {
+ object_property_add_link(obj, "usb[*]", TYPE_DEVICE,
+ (Object **)&s->cfg.usb[i],
+ qdev_prop_allow_set_link_before_realize,
+ OBJ_PROP_LINK_STRONG);
+ }
+}
+
+static void versal2_crl_init(Object *obj)
+{
+ XlnxVersal2CRL *s = XLNX_VERSAL2_CRL(obj);
+ XlnxVersalCRLBase *xvcb = XLNX_VERSAL_CRL_BASE(obj);
+ SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+ size_t i;
+
+ xvcb->reg_array = register_init_block32(DEVICE(obj), versal2_crl_regs_info,
+ ARRAY_SIZE(versal2_crl_regs_info),
+ s->regs_info, s->regs,
+ &crl_ops,
+ XLNX_VERSAL_CRL_ERR_DEBUG,
+ VERSAL2_CRL_R_MAX * 4);
+ xvcb->regs = s->regs;
+
+ sysbus_init_mmio(sbd, &xvcb->reg_array->mem);
+
+ for (i = 0; i < ARRAY_SIZE(s->cfg.rpu); ++i) {
+ object_property_add_link(obj, "rpu[*]", TYPE_ARM_CPU,
+ (Object **)&s->cfg.rpu[i],
+ qdev_prop_allow_set_link_before_realize,
+ OBJ_PROP_LINK_STRONG);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(s->cfg.adma); ++i) {
+ object_property_add_link(obj, "adma[*]", TYPE_DEVICE,
+ (Object **)&s->cfg.adma[i],
+ qdev_prop_allow_set_link_before_realize,
+ OBJ_PROP_LINK_STRONG);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(s->cfg.sdma); ++i) {
+ object_property_add_link(obj, "sdma[*]", TYPE_DEVICE,
+ (Object **)&s->cfg.sdma[i],
+ qdev_prop_allow_set_link_before_realize,
+ OBJ_PROP_LINK_STRONG);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(s->cfg.uart); ++i) {
+ object_property_add_link(obj, "uart[*]", TYPE_DEVICE,
+ (Object **)&s->cfg.uart[i],
+ qdev_prop_allow_set_link_before_realize,
+ OBJ_PROP_LINK_STRONG);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(s->cfg.gem); ++i) {
+ object_property_add_link(obj, "gem[*]", TYPE_DEVICE,
+ (Object **)&s->cfg.gem[i],
+ qdev_prop_allow_set_link_before_realize,
+ OBJ_PROP_LINK_STRONG);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(s->cfg.usb); ++i) {
+ object_property_add_link(obj, "usb[*]", TYPE_DEVICE,
+ (Object **)&s->cfg.usb[i],
+ qdev_prop_allow_set_link_before_realize,
+ OBJ_PROP_LINK_STRONG);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(s->cfg.can); ++i) {
+ object_property_add_link(obj, "can[*]", TYPE_DEVICE,
+ (Object **)&s->cfg.can[i],
+ qdev_prop_allow_set_link_before_realize,
+ OBJ_PROP_LINK_STRONG);
+ }
}
static void crl_finalize(Object *obj)
{
- XlnxVersalCRL *s = XLNX_VERSAL_CRL(obj);
+ XlnxVersalCRLBase *s = XLNX_VERSAL_CRL_BASE(obj);
register_finalize_block(s->reg_array);
}
-static const VMStateDescription vmstate_crl = {
+static const VMStateDescription vmstate_versal_crl = {
.name = TYPE_XLNX_VERSAL_CRL,
.version_id = 1,
.minimum_version_id = 1,
@@ -394,29 +766,69 @@ static const VMStateDescription vmstate_crl = {
}
};
-static void crl_class_init(ObjectClass *klass, const void *data)
+static const VMStateDescription vmstate_versal2_crl = {
+ .name = TYPE_XLNX_VERSAL2_CRL,
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (const VMStateField[]) {
+ VMSTATE_UINT32_ARRAY(regs, XlnxVersal2CRL, VERSAL2_CRL_R_MAX),
+ VMSTATE_END_OF_LIST(),
+ }
+};
+
+static void versal_crl_class_init(ObjectClass *klass, const void *data)
{
- ResettableClass *rc = RESETTABLE_CLASS(klass);
DeviceClass *dc = DEVICE_CLASS(klass);
+ XlnxVersalCRLBaseClass *xvcc = XLNX_VERSAL_CRL_BASE_CLASS(klass);
+ ResettableClass *rc = RESETTABLE_CLASS(klass);
- dc->vmsd = &vmstate_crl;
+ dc->vmsd = &vmstate_versal_crl;
+ rc->phases.enter = versal_crl_reset_enter;
+ rc->phases.hold = versal_crl_reset_hold;
+ xvcc->decode_periph_rst = versal_decode_periph_rst;
+}
- rc->phases.enter = crl_reset_enter;
- rc->phases.hold = crl_reset_hold;
+static void versal2_crl_class_init(ObjectClass *klass, const void *data)
+{
+ XlnxVersalCRLBaseClass *xvcc = XLNX_VERSAL_CRL_BASE_CLASS(klass);
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ ResettableClass *rc = RESETTABLE_CLASS(klass);
+
+ dc->vmsd = &vmstate_versal2_crl;
+ rc->phases.enter = versal2_crl_reset_enter;
+ xvcc->decode_periph_rst = versal2_decode_periph_rst;
}
-static const TypeInfo crl_info = {
- .name = TYPE_XLNX_VERSAL_CRL,
+static const TypeInfo crl_base_info = {
+ .name = TYPE_XLNX_VERSAL_CRL_BASE,
.parent = TYPE_SYS_BUS_DEVICE,
- .instance_size = sizeof(XlnxVersalCRL),
- .class_init = crl_class_init,
- .instance_init = crl_init,
+ .instance_size = sizeof(XlnxVersalCRLBase),
+ .class_size = sizeof(XlnxVersalCRLBaseClass),
.instance_finalize = crl_finalize,
+ .abstract = true,
+};
+
+static const TypeInfo versal_crl_info = {
+ .name = TYPE_XLNX_VERSAL_CRL,
+ .parent = TYPE_XLNX_VERSAL_CRL_BASE,
+ .instance_size = sizeof(XlnxVersalCRL),
+ .instance_init = versal_crl_init,
+ .class_init = versal_crl_class_init,
+};
+
+static const TypeInfo versal2_crl_info = {
+ .name = TYPE_XLNX_VERSAL2_CRL,
+ .parent = TYPE_XLNX_VERSAL_CRL_BASE,
+ .instance_size = sizeof(XlnxVersal2CRL),
+ .instance_init = versal2_crl_init,
+ .class_init = versal2_crl_class_init,
};
static void crl_register_types(void)
{
- type_register_static(&crl_info);
+ type_register_static(&crl_base_info);
+ type_register_static(&versal_crl_info);
+ type_register_static(&versal2_crl_info);
}
type_init(crl_register_types)
diff --git a/hw/net/Kconfig b/hw/net/Kconfig
index 7f80218..2b513d6 100644
--- a/hw/net/Kconfig
+++ b/hw/net/Kconfig
@@ -82,9 +82,6 @@ config OPENCORES_ETH
config XGMAC
bool
-config MIPSNET
- bool
-
config ALLWINNER_EMAC
bool
diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c
index 50025d5..4444666 100644
--- a/hw/net/cadence_gem.c
+++ b/hw/net/cadence_gem.c
@@ -1756,6 +1756,7 @@ static void gem_realize(DeviceState *dev, Error **errp)
sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq[i]);
}
+ gem_init_register_masks(s);
qemu_macaddr_default_if_unset(&s->conf.macaddr);
s->nic = qemu_new_nic(&net_gem_info, &s->conf,
@@ -1776,7 +1777,6 @@ static void gem_init(Object *obj)
DB_PRINT("\n");
- gem_init_register_masks(s);
memory_region_init_io(&s->iomem, OBJECT(s), &gem_ops, s,
"enet", sizeof(s->regs));
diff --git a/hw/net/can/can_sja1000.c b/hw/net/can/can_sja1000.c
index 5b6ba9d..6b08e97 100644
--- a/hw/net/can/can_sja1000.c
+++ b/hw/net/can/can_sja1000.c
@@ -842,7 +842,6 @@ ssize_t can_sja_receive(CanBusClientState *client, const qemu_can_frame *frames,
s->status_pel |= 0x01; /* Set the Receive Buffer Status. DS-p23 */
s->interrupt_pel |= 0x01;
s->status_pel &= ~(1 << 4);
- s->status_pel |= (1 << 0);
can_sja_update_pel_irq(s);
} else { /* BasicCAN mode */
diff --git a/hw/net/can/ctucan_core.c b/hw/net/can/ctucan_core.c
index 17131a4..6bd99c4 100644
--- a/hw/net/can/ctucan_core.c
+++ b/hw/net/can/ctucan_core.c
@@ -28,7 +28,6 @@
#include "qemu/osdep.h"
#include "qemu/log.h"
-#include "qemu/bswap.h"
#include "qemu/bitops.h"
#include "hw/irq.h"
#include "migration/vmstate.h"
diff --git a/hw/net/can/xlnx-versal-canfd.c b/hw/net/can/xlnx-versal-canfd.c
index 3eb1119..3433486 100644
--- a/hw/net/can/xlnx-versal-canfd.c
+++ b/hw/net/can/xlnx-versal-canfd.c
@@ -35,12 +35,8 @@
#include "hw/irq.h"
#include "hw/register.h"
#include "qapi/error.h"
-#include "qemu/bitops.h"
#include "qemu/log.h"
-#include "qemu/cutils.h"
-#include "qemu/event_notifier.h"
#include "hw/qdev-properties.h"
-#include "qom/object_interfaces.h"
#include "migration/vmstate.h"
#include "hw/net/xlnx-versal-canfd.h"
#include "trace.h"
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index cba4999..a80a7b0 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -127,10 +127,8 @@ struct E1000State_st {
QEMUTimer *flush_queue_timer;
/* Compatibility flags for migration to/from qemu 1.3.0 and older */
-#define E1000_FLAG_MAC_BIT 2
#define E1000_FLAG_TSO_BIT 3
#define E1000_FLAG_VET_BIT 4
-#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
#define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
#define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
@@ -1212,52 +1210,51 @@ enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
-#define markflag(x) ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
/* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
* f - flag bits (up to 6 possible flags)
* n - flag needed
- * p - partially implenented */
+ * p - partially implemented */
static const uint8_t mac_reg_access[0x8000] = {
- [IPAV] = markflag(MAC), [WUC] = markflag(MAC),
- [IP6AT] = markflag(MAC), [IP4AT] = markflag(MAC),
- [FFVT] = markflag(MAC), [WUPM] = markflag(MAC),
- [ECOL] = markflag(MAC), [MCC] = markflag(MAC),
- [DC] = markflag(MAC), [TNCRS] = markflag(MAC),
- [RLEC] = markflag(MAC), [XONRXC] = markflag(MAC),
- [XOFFTXC] = markflag(MAC), [RFC] = markflag(MAC),
- [TSCTFC] = markflag(MAC), [MGTPRC] = markflag(MAC),
- [WUS] = markflag(MAC), [AIT] = markflag(MAC),
- [FFLT] = markflag(MAC), [FFMT] = markflag(MAC),
- [SCC] = markflag(MAC), [FCRUC] = markflag(MAC),
- [LATECOL] = markflag(MAC), [COLC] = markflag(MAC),
- [SEQEC] = markflag(MAC), [CEXTERR] = markflag(MAC),
- [XONTXC] = markflag(MAC), [XOFFRXC] = markflag(MAC),
- [RJC] = markflag(MAC), [RNBC] = markflag(MAC),
- [MGTPDC] = markflag(MAC), [MGTPTC] = markflag(MAC),
- [RUC] = markflag(MAC), [ROC] = markflag(MAC),
- [GORCL] = markflag(MAC), [GORCH] = markflag(MAC),
- [GOTCL] = markflag(MAC), [GOTCH] = markflag(MAC),
- [BPRC] = markflag(MAC), [MPRC] = markflag(MAC),
- [TSCTC] = markflag(MAC), [PRC64] = markflag(MAC),
- [PRC127] = markflag(MAC), [PRC255] = markflag(MAC),
- [PRC511] = markflag(MAC), [PRC1023] = markflag(MAC),
- [PRC1522] = markflag(MAC), [PTC64] = markflag(MAC),
- [PTC127] = markflag(MAC), [PTC255] = markflag(MAC),
- [PTC511] = markflag(MAC), [PTC1023] = markflag(MAC),
- [PTC1522] = markflag(MAC), [MPTC] = markflag(MAC),
- [BPTC] = markflag(MAC),
-
- [TDFH] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [TDFT] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFH] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFT] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [PBM] = markflag(MAC) | MAC_ACCESS_PARTIAL,
+ [IPAV] = MAC_ACCESS_FLAG_NEEDED, [WUC] = MAC_ACCESS_FLAG_NEEDED,
+ [IP6AT] = MAC_ACCESS_FLAG_NEEDED, [IP4AT] = MAC_ACCESS_FLAG_NEEDED,
+ [FFVT] = MAC_ACCESS_FLAG_NEEDED, [WUPM] = MAC_ACCESS_FLAG_NEEDED,
+ [ECOL] = MAC_ACCESS_FLAG_NEEDED, [MCC] = MAC_ACCESS_FLAG_NEEDED,
+ [DC] = MAC_ACCESS_FLAG_NEEDED, [TNCRS] = MAC_ACCESS_FLAG_NEEDED,
+ [RLEC] = MAC_ACCESS_FLAG_NEEDED, [XONRXC] = MAC_ACCESS_FLAG_NEEDED,
+ [XOFFTXC] = MAC_ACCESS_FLAG_NEEDED, [RFC] = MAC_ACCESS_FLAG_NEEDED,
+ [TSCTFC] = MAC_ACCESS_FLAG_NEEDED, [MGTPRC] = MAC_ACCESS_FLAG_NEEDED,
+ [WUS] = MAC_ACCESS_FLAG_NEEDED, [AIT] = MAC_ACCESS_FLAG_NEEDED,
+ [FFLT] = MAC_ACCESS_FLAG_NEEDED, [FFMT] = MAC_ACCESS_FLAG_NEEDED,
+ [SCC] = MAC_ACCESS_FLAG_NEEDED, [FCRUC] = MAC_ACCESS_FLAG_NEEDED,
+ [LATECOL] = MAC_ACCESS_FLAG_NEEDED, [COLC] = MAC_ACCESS_FLAG_NEEDED,
+ [SEQEC] = MAC_ACCESS_FLAG_NEEDED, [CEXTERR] = MAC_ACCESS_FLAG_NEEDED,
+ [XONTXC] = MAC_ACCESS_FLAG_NEEDED, [XOFFRXC] = MAC_ACCESS_FLAG_NEEDED,
+ [RJC] = MAC_ACCESS_FLAG_NEEDED, [RNBC] = MAC_ACCESS_FLAG_NEEDED,
+ [MGTPDC] = MAC_ACCESS_FLAG_NEEDED, [MGTPTC] = MAC_ACCESS_FLAG_NEEDED,
+ [RUC] = MAC_ACCESS_FLAG_NEEDED, [ROC] = MAC_ACCESS_FLAG_NEEDED,
+ [GORCL] = MAC_ACCESS_FLAG_NEEDED, [GORCH] = MAC_ACCESS_FLAG_NEEDED,
+ [GOTCL] = MAC_ACCESS_FLAG_NEEDED, [GOTCH] = MAC_ACCESS_FLAG_NEEDED,
+ [BPRC] = MAC_ACCESS_FLAG_NEEDED, [MPRC] = MAC_ACCESS_FLAG_NEEDED,
+ [TSCTC] = MAC_ACCESS_FLAG_NEEDED, [PRC64] = MAC_ACCESS_FLAG_NEEDED,
+ [PRC127] = MAC_ACCESS_FLAG_NEEDED, [PRC255] = MAC_ACCESS_FLAG_NEEDED,
+ [PRC511] = MAC_ACCESS_FLAG_NEEDED, [PRC1023] = MAC_ACCESS_FLAG_NEEDED,
+ [PRC1522] = MAC_ACCESS_FLAG_NEEDED, [PTC64] = MAC_ACCESS_FLAG_NEEDED,
+ [PTC127] = MAC_ACCESS_FLAG_NEEDED, [PTC255] = MAC_ACCESS_FLAG_NEEDED,
+ [PTC511] = MAC_ACCESS_FLAG_NEEDED, [PTC1023] = MAC_ACCESS_FLAG_NEEDED,
+ [PTC1522] = MAC_ACCESS_FLAG_NEEDED, [MPTC] = MAC_ACCESS_FLAG_NEEDED,
+ [BPTC] = MAC_ACCESS_FLAG_NEEDED,
+
+ [TDFH] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [TDFT] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [TDFHS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [TDFTS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [TDFPC] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFH] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFT] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFHS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFTS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFPC] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [PBM] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
};
static void
@@ -1419,13 +1416,6 @@ static int e1000_tx_tso_post_load(void *opaque, int version_id)
return 0;
}
-static bool e1000_full_mac_needed(void *opaque)
-{
- E1000State *s = opaque;
-
- return chkflag(MAC);
-}
-
static bool e1000_tso_state_needed(void *opaque)
{
E1000State *s = opaque;
@@ -1451,7 +1441,6 @@ static const VMStateDescription vmstate_e1000_full_mac_state = {
.name = "e1000/full_mac_state",
.version_id = 1,
.minimum_version_id = 1,
- .needed = e1000_full_mac_needed,
.fields = (const VMStateField[]) {
VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
VMSTATE_END_OF_LIST()
@@ -1679,8 +1668,6 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
static const Property e1000_properties[] = {
DEFINE_NIC_PROPERTIES(E1000State, conf),
- DEFINE_PROP_BIT("extra_mac_registers", E1000State,
- compat_flags, E1000_FLAG_MAC_BIT, true),
DEFINE_PROP_BIT("migrate_tso_props", E1000State,
compat_flags, E1000_FLAG_TSO_BIT, true),
DEFINE_PROP_BIT("init-vet", E1000State,
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index 2413858..8fef598 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -341,11 +341,6 @@ e1000e_intmgr_collect_delayed_causes(E1000ECore *core)
{
uint32_t res;
- if (msix_enabled(core->owner)) {
- assert(core->delayed_causes == 0);
- return 0;
- }
-
res = core->delayed_causes;
core->delayed_causes = 0;
@@ -2827,8 +2822,9 @@ e1000e_update_rx_offloads(E1000ECore *core)
trace_e1000e_rx_set_cso(cso_state);
if (core->has_vnet) {
- qemu_set_offload(qemu_get_queue(core->owner_nic)->peer,
- cso_state, 0, 0, 0, 0, 0, 0);
+ NetOffloads ol = { .csum = cso_state };
+
+ qemu_set_offload(qemu_get_queue(core->owner_nic)->peer, &ol);
}
}
diff --git a/hw/net/fsl_etsec/etsec.c b/hw/net/fsl_etsec/etsec.c
index d14cb2a..846f6cb 100644
--- a/hw/net/fsl_etsec/etsec.c
+++ b/hw/net/fsl_etsec/etsec.c
@@ -389,6 +389,7 @@ static void etsec_realize(DeviceState *dev, Error **errp)
{
eTSEC *etsec = ETSEC_COMMON(dev);
+ qemu_macaddr_default_if_unset(&etsec->conf.macaddr);
etsec->nic = qemu_new_nic(&net_etsec_info, &etsec->conf,
object_get_typename(OBJECT(dev)), dev->id,
&dev->mem_reentrancy_guard, etsec);
diff --git a/hw/net/i82596.c b/hw/net/i82596.c
index 64ed3c8..c1ff3e6 100644
--- a/hw/net/i82596.c
+++ b/hw/net/i82596.c
@@ -5,7 +5,7 @@
* This work is licensed under the GNU GPL license version 2 or later.
*
* This software was written to be compatible with the specification:
- * https://www.intel.com/assets/pdf/general/82596ca.pdf
+ * https://parisc.docs.kernel.org/en/latest/_downloads/96672be0650d9fc046bbcea40b92482f/82596CA.pdf
*/
#include "qemu/osdep.h"
@@ -177,6 +177,26 @@ static void set_individual_address(I82596State *s, uint32_t addr)
trace_i82596_new_mac(nc->info_str);
}
+static void i82596_configure(I82596State *s, uint32_t addr)
+{
+ uint8_t byte_cnt;
+ byte_cnt = get_byte(addr + 8) & 0x0f;
+
+ byte_cnt = MAX(byte_cnt, 4);
+ byte_cnt = MIN(byte_cnt, sizeof(s->config));
+ /* copy byte_cnt max. */
+ address_space_read(&address_space_memory, addr + 8,
+ MEMTXATTRS_UNSPECIFIED, s->config, byte_cnt);
+ /* config byte according to page 35ff */
+ s->config[2] &= 0x82; /* mask valid bits */
+ s->config[2] |= 0x40;
+ s->config[7] &= 0xf7; /* clear zero bit */
+ assert(I596_NOCRC_INS == 0); /* do CRC insertion */
+ s->config[10] = MAX(s->config[10], 5); /* min frame length */
+ s->config[12] &= 0x40; /* only full duplex field valid */
+ s->config[13] |= 0x3f; /* set ones in byte 13 */
+}
+
static void set_multicast_list(I82596State *s, uint32_t addr)
{
uint16_t mc_count, i;
@@ -234,7 +254,6 @@ static void command_loop(I82596State *s)
{
uint16_t cmd;
uint16_t status;
- uint8_t byte_cnt;
DBG(printf("STARTING COMMAND LOOP cmd_p=%08x\n", s->cmd_p));
@@ -254,20 +273,7 @@ static void command_loop(I82596State *s)
set_individual_address(s, s->cmd_p);
break;
case CmdConfigure:
- byte_cnt = get_byte(s->cmd_p + 8) & 0x0f;
- byte_cnt = MAX(byte_cnt, 4);
- byte_cnt = MIN(byte_cnt, sizeof(s->config));
- /* copy byte_cnt max. */
- address_space_read(&address_space_memory, s->cmd_p + 8,
- MEMTXATTRS_UNSPECIFIED, s->config, byte_cnt);
- /* config byte according to page 35ff */
- s->config[2] &= 0x82; /* mask valid bits */
- s->config[2] |= 0x40;
- s->config[7] &= 0xf7; /* clear zero bit */
- assert(I596_NOCRC_INS == 0); /* do CRC insertion */
- s->config[10] = MAX(s->config[10], 5); /* min frame length */
- s->config[12] &= 0x40; /* only full duplex field valid */
- s->config[13] |= 0x3f; /* set ones in byte 13 */
+ i82596_configure(s, s->cmd_p);
break;
case CmdTDR:
/* get signal LINK */
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 39e3ce1..45d8fd7 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -3058,8 +3058,9 @@ igb_update_rx_offloads(IGBCore *core)
trace_e1000e_rx_set_cso(cso_state);
if (core->has_vnet) {
- qemu_set_offload(qemu_get_queue(core->owner_nic)->peer,
- cso_state, 0, 0, 0, 0, 0, 0);
+ NetOffloads ol = {.csum = cso_state };
+
+ qemu_set_offload(qemu_get_queue(core->owner_nic)->peer, &ol);
}
}
diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c
index 31d72c4..9b0db8f 100644
--- a/hw/net/igbvf.c
+++ b/hw/net/igbvf.c
@@ -251,10 +251,12 @@ static void igbvf_pci_realize(PCIDevice *dev, Error **errp)
memory_region_init_io(&s->mmio, OBJECT(dev), &mmio_ops, s, "igbvf-mmio",
IGBVF_MMIO_SIZE);
- pcie_sriov_vf_register_bar(dev, IGBVF_MMIO_BAR_IDX, &s->mmio);
+ pci_register_bar(dev, IGBVF_MMIO_BAR_IDX, PCI_BASE_ADDRESS_MEM_TYPE_64 |
+ PCI_BASE_ADDRESS_MEM_PREFETCH, &s->mmio);
memory_region_init(&s->msix, OBJECT(dev), "igbvf-msix", IGBVF_MSIX_SIZE);
- pcie_sriov_vf_register_bar(dev, IGBVF_MSIX_BAR_IDX, &s->msix);
+ pci_register_bar(dev, IGBVF_MSIX_BAR_IDX, PCI_BASE_ADDRESS_MEM_TYPE_64 |
+ PCI_BASE_ADDRESS_MEM_PREFETCH, &s->msix);
ret = msix_init(dev, IGBVF_MSIX_VEC_NUM, &s->msix, IGBVF_MSIX_BAR_IDX, 0,
&s->msix, IGBVF_MSIX_BAR_IDX, 0x2000, 0x70, errp);
diff --git a/hw/net/lan9118.c b/hw/net/lan9118.c
index 6dda1e5..3017e12 100644
--- a/hw/net/lan9118.c
+++ b/hw/net/lan9118.c
@@ -21,6 +21,7 @@
#include "hw/ptimer.h"
#include "hw/qdev-properties.h"
#include "qapi/error.h"
+#include "qemu/bswap.h"
#include "qemu/log.h"
#include "qemu/module.h"
#include <zlib.h> /* for crc32 */
diff --git a/hw/net/meson.build b/hw/net/meson.build
index e6759e2..913eaed 100644
--- a/hw/net/meson.build
+++ b/hw/net/meson.build
@@ -23,7 +23,6 @@ system_ss.add(when: 'CONFIG_LAN9118_PHY', if_true: files('lan9118_phy.c'))
system_ss.add(when: 'CONFIG_NE2000_ISA', if_true: files('ne2000-isa.c'))
system_ss.add(when: 'CONFIG_OPENCORES_ETH', if_true: files('opencores_eth.c'))
system_ss.add(when: 'CONFIG_XGMAC', if_true: files('xgmac.c'))
-system_ss.add(when: 'CONFIG_MIPSNET', if_true: files('mipsnet.c'))
system_ss.add(when: 'CONFIG_XILINX_AXI', if_true: files('xilinx_axienet.c'))
system_ss.add(when: 'CONFIG_ALLWINNER_EMAC', if_true: files('allwinner_emac.c'))
system_ss.add(when: 'CONFIG_ALLWINNER_SUN8I_EMAC', if_true: files('allwinner-sun8i-emac.c'))
diff --git a/hw/net/mipsnet.c b/hw/net/mipsnet.c
deleted file mode 100644
index 583aa1c..0000000
--- a/hw/net/mipsnet.c
+++ /dev/null
@@ -1,297 +0,0 @@
-#include "qemu/osdep.h"
-#include "hw/irq.h"
-#include "hw/qdev-properties.h"
-#include "net/net.h"
-#include "qemu/module.h"
-#include "trace.h"
-#include "hw/sysbus.h"
-#include "migration/vmstate.h"
-#include "qom/object.h"
-
-/* MIPSnet register offsets */
-
-#define MIPSNET_DEV_ID 0x00
-#define MIPSNET_BUSY 0x08
-#define MIPSNET_RX_DATA_COUNT 0x0c
-#define MIPSNET_TX_DATA_COUNT 0x10
-#define MIPSNET_INT_CTL 0x14
-# define MIPSNET_INTCTL_TXDONE 0x00000001
-# define MIPSNET_INTCTL_RXDONE 0x00000002
-# define MIPSNET_INTCTL_TESTBIT 0x80000000
-#define MIPSNET_INTERRUPT_INFO 0x18
-#define MIPSNET_RX_DATA_BUFFER 0x1c
-#define MIPSNET_TX_DATA_BUFFER 0x20
-
-#define MAX_ETH_FRAME_SIZE 1514
-
-#define TYPE_MIPS_NET "mipsnet"
-OBJECT_DECLARE_SIMPLE_TYPE(MIPSnetState, MIPS_NET)
-
-struct MIPSnetState {
- SysBusDevice parent_obj;
-
- uint32_t busy;
- uint32_t rx_count;
- uint32_t rx_read;
- uint32_t tx_count;
- uint32_t tx_written;
- uint32_t intctl;
- uint8_t rx_buffer[MAX_ETH_FRAME_SIZE];
- uint8_t tx_buffer[MAX_ETH_FRAME_SIZE];
- MemoryRegion io;
- qemu_irq irq;
- NICState *nic;
- NICConf conf;
-};
-
-static void mipsnet_reset(MIPSnetState *s)
-{
- s->busy = 1;
- s->rx_count = 0;
- s->rx_read = 0;
- s->tx_count = 0;
- s->tx_written = 0;
- s->intctl = 0;
- memset(s->rx_buffer, 0, MAX_ETH_FRAME_SIZE);
- memset(s->tx_buffer, 0, MAX_ETH_FRAME_SIZE);
-}
-
-static void mipsnet_update_irq(MIPSnetState *s)
-{
- int isr = !!s->intctl;
- trace_mipsnet_irq(isr, s->intctl);
- qemu_set_irq(s->irq, isr);
-}
-
-static int mipsnet_buffer_full(MIPSnetState *s)
-{
- if (s->rx_count >= MAX_ETH_FRAME_SIZE) {
- return 1;
- }
- return 0;
-}
-
-static int mipsnet_can_receive(NetClientState *nc)
-{
- MIPSnetState *s = qemu_get_nic_opaque(nc);
-
- if (s->busy) {
- return 0;
- }
- return !mipsnet_buffer_full(s);
-}
-
-static ssize_t mipsnet_receive(NetClientState *nc,
- const uint8_t *buf, size_t size)
-{
- MIPSnetState *s = qemu_get_nic_opaque(nc);
-
- trace_mipsnet_receive(size);
- if (!mipsnet_can_receive(nc)) {
- return 0;
- }
-
- if (size >= sizeof(s->rx_buffer)) {
- return 0;
- }
- s->busy = 1;
-
- /* Just accept everything. */
-
- /* Write packet data. */
- memcpy(s->rx_buffer, buf, size);
-
- s->rx_count = size;
- s->rx_read = 0;
-
- /* Now we can signal we have received something. */
- s->intctl |= MIPSNET_INTCTL_RXDONE;
- mipsnet_update_irq(s);
-
- return size;
-}
-
-static uint64_t mipsnet_ioport_read(void *opaque, hwaddr addr,
- unsigned int size)
-{
- MIPSnetState *s = opaque;
- int ret = 0;
-
- addr &= 0x3f;
- switch (addr) {
- case MIPSNET_DEV_ID:
- ret = be32_to_cpu(0x4d495053); /* MIPS */
- break;
- case MIPSNET_DEV_ID + 4:
- ret = be32_to_cpu(0x4e455430); /* NET0 */
- break;
- case MIPSNET_BUSY:
- ret = s->busy;
- break;
- case MIPSNET_RX_DATA_COUNT:
- ret = s->rx_count;
- break;
- case MIPSNET_TX_DATA_COUNT:
- ret = s->tx_count;
- break;
- case MIPSNET_INT_CTL:
- ret = s->intctl;
- s->intctl &= ~MIPSNET_INTCTL_TESTBIT;
- break;
- case MIPSNET_INTERRUPT_INFO:
- /* XXX: This seems to be a per-VPE interrupt number. */
- ret = 0;
- break;
- case MIPSNET_RX_DATA_BUFFER:
- if (s->rx_count) {
- s->rx_count--;
- ret = s->rx_buffer[s->rx_read++];
- if (mipsnet_can_receive(s->nic->ncs)) {
- qemu_flush_queued_packets(qemu_get_queue(s->nic));
- }
- }
- break;
- /* Reads as zero. */
- case MIPSNET_TX_DATA_BUFFER:
- default:
- break;
- }
- trace_mipsnet_read(addr, ret);
- return ret;
-}
-
-static void mipsnet_ioport_write(void *opaque, hwaddr addr,
- uint64_t val, unsigned int size)
-{
- MIPSnetState *s = opaque;
-
- addr &= 0x3f;
- trace_mipsnet_write(addr, val);
- switch (addr) {
- case MIPSNET_TX_DATA_COUNT:
- s->tx_count = (val <= MAX_ETH_FRAME_SIZE) ? val : 0;
- s->tx_written = 0;
- break;
- case MIPSNET_INT_CTL:
- if (val & MIPSNET_INTCTL_TXDONE) {
- s->intctl &= ~MIPSNET_INTCTL_TXDONE;
- } else if (val & MIPSNET_INTCTL_RXDONE) {
- s->intctl &= ~MIPSNET_INTCTL_RXDONE;
- } else if (val & MIPSNET_INTCTL_TESTBIT) {
- mipsnet_reset(s);
- s->intctl |= MIPSNET_INTCTL_TESTBIT;
- } else if (!val) {
- /* ACK testbit interrupt, flag was cleared on read. */
- }
- s->busy = !!s->intctl;
- mipsnet_update_irq(s);
- if (mipsnet_can_receive(s->nic->ncs)) {
- qemu_flush_queued_packets(qemu_get_queue(s->nic));
- }
- break;
- case MIPSNET_TX_DATA_BUFFER:
- s->tx_buffer[s->tx_written++] = val;
- if ((s->tx_written >= MAX_ETH_FRAME_SIZE)
- || (s->tx_written == s->tx_count)) {
- /* Send buffer. */
- trace_mipsnet_send(s->tx_written);
- qemu_send_packet(qemu_get_queue(s->nic),
- s->tx_buffer, s->tx_written);
- s->tx_count = s->tx_written = 0;
- s->intctl |= MIPSNET_INTCTL_TXDONE;
- s->busy = 1;
- mipsnet_update_irq(s);
- }
- break;
- /* Read-only registers */
- case MIPSNET_DEV_ID:
- case MIPSNET_BUSY:
- case MIPSNET_RX_DATA_COUNT:
- case MIPSNET_INTERRUPT_INFO:
- case MIPSNET_RX_DATA_BUFFER:
- default:
- break;
- }
-}
-
-static const VMStateDescription vmstate_mipsnet = {
- .name = "mipsnet",
- .version_id = 0,
- .minimum_version_id = 0,
- .fields = (const VMStateField[]) {
- VMSTATE_UINT32(busy, MIPSnetState),
- VMSTATE_UINT32(rx_count, MIPSnetState),
- VMSTATE_UINT32(rx_read, MIPSnetState),
- VMSTATE_UINT32(tx_count, MIPSnetState),
- VMSTATE_UINT32(tx_written, MIPSnetState),
- VMSTATE_UINT32(intctl, MIPSnetState),
- VMSTATE_BUFFER(rx_buffer, MIPSnetState),
- VMSTATE_BUFFER(tx_buffer, MIPSnetState),
- VMSTATE_END_OF_LIST()
- }
-};
-
-static NetClientInfo net_mipsnet_info = {
- .type = NET_CLIENT_DRIVER_NIC,
- .size = sizeof(NICState),
- .receive = mipsnet_receive,
-};
-
-static const MemoryRegionOps mipsnet_ioport_ops = {
- .read = mipsnet_ioport_read,
- .write = mipsnet_ioport_write,
- .impl.min_access_size = 1,
- .impl.max_access_size = 4,
-};
-
-static void mipsnet_realize(DeviceState *dev, Error **errp)
-{
- SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
- MIPSnetState *s = MIPS_NET(dev);
-
- memory_region_init_io(&s->io, OBJECT(dev), &mipsnet_ioport_ops, s,
- "mipsnet-io", 36);
- sysbus_init_mmio(sbd, &s->io);
- sysbus_init_irq(sbd, &s->irq);
-
- s->nic = qemu_new_nic(&net_mipsnet_info, &s->conf,
- object_get_typename(OBJECT(dev)), dev->id,
- &dev->mem_reentrancy_guard, s);
- qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
-}
-
-static void mipsnet_sysbus_reset(DeviceState *dev)
-{
- MIPSnetState *s = MIPS_NET(dev);
- mipsnet_reset(s);
-}
-
-static const Property mipsnet_properties[] = {
- DEFINE_NIC_PROPERTIES(MIPSnetState, conf),
-};
-
-static void mipsnet_class_init(ObjectClass *klass, const void *data)
-{
- DeviceClass *dc = DEVICE_CLASS(klass);
-
- dc->realize = mipsnet_realize;
- set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
- dc->desc = "MIPS Simulator network device";
- device_class_set_legacy_reset(dc, mipsnet_sysbus_reset);
- dc->vmsd = &vmstate_mipsnet;
- device_class_set_props(dc, mipsnet_properties);
-}
-
-static const TypeInfo mipsnet_info = {
- .name = TYPE_MIPS_NET,
- .parent = TYPE_SYS_BUS_DEVICE,
- .instance_size = sizeof(MIPSnetState),
- .class_init = mipsnet_class_init,
-};
-
-static void mipsnet_register_types(void)
-{
- type_register_static(&mipsnet_info);
-}
-
-type_init(mipsnet_register_types)
diff --git a/hw/net/npcm_gmac.c b/hw/net/npcm_gmac.c
index a434112..5e32cd3 100644
--- a/hw/net/npcm_gmac.c
+++ b/hw/net/npcm_gmac.c
@@ -516,8 +516,6 @@ static void gmac_try_send_next_packet(NPCMGMACState *gmac)
uint32_t desc_addr;
struct NPCMGMACTxDesc tx_desc;
uint32_t tx_buf_addr, tx_buf_len;
- uint16_t length = 0;
- uint8_t *buf = tx_send_buffer;
uint32_t prev_buf_size = 0;
int csum = 0;
@@ -568,22 +566,20 @@ static void gmac_try_send_next_packet(NPCMGMACState *gmac)
tx_buf_addr = tx_desc.tdes2;
gmac->regs[R_NPCM_DMA_CUR_TX_BUF_ADDR] = tx_buf_addr;
tx_buf_len = TX_DESC_TDES1_BFFR1_SZ_MASK(tx_desc.tdes1);
- buf = &tx_send_buffer[prev_buf_size];
- if ((prev_buf_size + tx_buf_len) > sizeof(buf)) {
+ if ((prev_buf_size + tx_buf_len) > tx_buffer_size) {
tx_buffer_size = prev_buf_size + tx_buf_len;
tx_send_buffer = g_realloc(tx_send_buffer, tx_buffer_size);
- buf = &tx_send_buffer[prev_buf_size];
}
/* step 5 */
- if (dma_memory_read(&address_space_memory, tx_buf_addr, buf,
+ if (dma_memory_read(&address_space_memory, tx_buf_addr,
+ tx_send_buffer + prev_buf_size,
tx_buf_len, MEMTXATTRS_UNSPECIFIED)) {
qemu_log_mask(LOG_GUEST_ERROR, "%s: Failed to read packet @ 0x%x\n",
__func__, tx_buf_addr);
return;
}
- length += tx_buf_len;
prev_buf_size += tx_buf_len;
/* If not chained we'll have a second buffer. */
@@ -591,30 +587,32 @@ static void gmac_try_send_next_packet(NPCMGMACState *gmac)
tx_buf_addr = tx_desc.tdes3;
gmac->regs[R_NPCM_DMA_CUR_TX_BUF_ADDR] = tx_buf_addr;
tx_buf_len = TX_DESC_TDES1_BFFR2_SZ_MASK(tx_desc.tdes1);
- buf = &tx_send_buffer[prev_buf_size];
- if ((prev_buf_size + tx_buf_len) > sizeof(buf)) {
+ if ((prev_buf_size + tx_buf_len) > tx_buffer_size) {
tx_buffer_size = prev_buf_size + tx_buf_len;
tx_send_buffer = g_realloc(tx_send_buffer, tx_buffer_size);
- buf = &tx_send_buffer[prev_buf_size];
}
- if (dma_memory_read(&address_space_memory, tx_buf_addr, buf,
+ if (dma_memory_read(&address_space_memory, tx_buf_addr,
+ tx_send_buffer + prev_buf_size,
tx_buf_len, MEMTXATTRS_UNSPECIFIED)) {
qemu_log_mask(LOG_GUEST_ERROR,
"%s: Failed to read packet @ 0x%x\n",
__func__, tx_buf_addr);
return;
}
- length += tx_buf_len;
prev_buf_size += tx_buf_len;
}
if (tx_desc.tdes1 & TX_DESC_TDES1_LAST_SEG_MASK) {
+ /*
+ * This will truncate the packet at 64K.
+ * TODO: find out if this is the correct behaviour.
+ */
+ uint16_t length = prev_buf_size;
net_checksum_calculate(tx_send_buffer, length, csum);
qemu_send_packet(qemu_get_queue(gmac->nic), tx_send_buffer, length);
trace_npcm_gmac_packet_sent(DEVICE(gmac)->canonical_path, length);
- buf = tx_send_buffer;
- length = 0;
+ prev_buf_size = 0;
}
/* step 6 */
diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h
index 6e0962f..ae06c1c 100644
--- a/hw/net/rocker/rocker.h
+++ b/hw/net/rocker/rocker.h
@@ -36,15 +36,7 @@ static inline G_GNUC_PRINTF(1, 2) int DPRINTF(const char *fmt, ...)
}
#endif
-#define __le16 uint16_t
-#define __le32 uint32_t
-#define __le64 uint64_t
-
-#define __be16 uint16_t
-#define __be32 uint32_t
-#define __be64 uint64_t
-
-static inline bool ipv4_addr_is_multicast(__be32 addr)
+static inline bool ipv4_addr_is_multicast(uint32_t addr)
{
return (addr & htonl(0xf0000000)) == htonl(0xe0000000);
}
@@ -52,8 +44,8 @@ static inline bool ipv4_addr_is_multicast(__be32 addr)
typedef struct ipv6_addr {
union {
uint8_t addr8[16];
- __be16 addr16[8];
- __be32 addr32[4];
+ uint16_t addr16[8];
+ uint32_t addr32[4];
};
} Ipv6Addr;
diff --git a/hw/net/rocker/rocker_hw.h b/hw/net/rocker/rocker_hw.h
index 1786323..7ec6bfb 100644
--- a/hw/net/rocker/rocker_hw.h
+++ b/hw/net/rocker/rocker_hw.h
@@ -9,10 +9,6 @@
#ifndef ROCKER_HW_H
#define ROCKER_HW_H
-#define __le16 uint16_t
-#define __le32 uint32_t
-#define __le64 uint64_t
-
/*
* Return codes
*/
@@ -124,12 +120,12 @@ enum {
*/
typedef struct rocker_desc {
- __le64 buf_addr;
+ uint64_t buf_addr;
uint64_t cookie;
- __le16 buf_size;
- __le16 tlv_size;
- __le16 rsvd[5]; /* pad to 32 bytes */
- __le16 comp_err;
+ uint16_t buf_size;
+ uint16_t tlv_size;
+ uint16_t rsvd[5]; /* pad to 32 bytes */
+ uint16_t comp_err;
} __attribute__((packed, aligned(8))) RockerDesc;
/*
@@ -137,9 +133,9 @@ typedef struct rocker_desc {
*/
typedef struct rocker_tlv {
- __le32 type;
- __le16 len;
- __le16 rsvd;
+ uint32_t type;
+ uint16_t len;
+ uint16_t rsvd;
} __attribute__((packed, aligned(8))) RockerTlv;
/* cmd msg */
diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c
index 3378f63..4aed178 100644
--- a/hw/net/rocker/rocker_of_dpa.c
+++ b/hw/net/rocker/rocker_of_dpa.c
@@ -52,10 +52,10 @@ typedef struct of_dpa_flow_key {
uint32_t tunnel_id; /* overlay tunnel id */
uint32_t tbl_id; /* table id */
struct {
- __be16 vlan_id; /* 0 if no VLAN */
+ uint16_t vlan_id; /* 0 if no VLAN */
MACAddr src; /* ethernet source address */
MACAddr dst; /* ethernet destination address */
- __be16 type; /* ethernet frame type */
+ uint16_t type; /* ethernet frame type */
} eth;
struct {
uint8_t proto; /* IP protocol or ARP opcode */
@@ -66,14 +66,14 @@ typedef struct of_dpa_flow_key {
union {
struct {
struct {
- __be32 src; /* IP source address */
- __be32 dst; /* IP destination address */
+ uint32_t src; /* IP source address */
+ uint32_t dst; /* IP destination address */
} addr;
union {
struct {
- __be16 src; /* TCP/UDP/SCTP source port */
- __be16 dst; /* TCP/UDP/SCTP destination port */
- __be16 flags; /* TCP flags */
+ uint16_t src; /* TCP/UDP/SCTP source port */
+ uint16_t dst; /* TCP/UDP/SCTP destination port */
+ uint16_t flags; /* TCP flags */
} tp;
struct {
MACAddr sha; /* ARP source hardware address */
@@ -86,11 +86,11 @@ typedef struct of_dpa_flow_key {
Ipv6Addr src; /* IPv6 source address */
Ipv6Addr dst; /* IPv6 destination address */
} addr;
- __be32 label; /* IPv6 flow label */
+ uint32_t label; /* IPv6 flow label */
struct {
- __be16 src; /* TCP/UDP/SCTP source port */
- __be16 dst; /* TCP/UDP/SCTP destination port */
- __be16 flags; /* TCP flags */
+ uint16_t src; /* TCP/UDP/SCTP source port */
+ uint16_t dst; /* TCP/UDP/SCTP destination port */
+ uint16_t flags; /* TCP flags */
} tp;
struct {
Ipv6Addr target; /* ND target address */
@@ -112,13 +112,13 @@ typedef struct of_dpa_flow_action {
struct {
uint32_t group_id;
uint32_t tun_log_lport;
- __be16 vlan_id;
+ uint16_t vlan_id;
} write;
struct {
- __be16 new_vlan_id;
+ uint16_t new_vlan_id;
uint32_t out_pport;
uint8_t copy_to_cpu;
- __be16 vlan_id;
+ uint16_t vlan_id;
} apply;
} OfDpaFlowAction;
@@ -143,7 +143,7 @@ typedef struct of_dpa_flow {
typedef struct of_dpa_flow_pkt_fields {
uint32_t tunnel_id;
struct eth_header *ethhdr;
- __be16 *h_proto;
+ uint16_t *h_proto;
struct vlan_header *vlanhdr;
struct ip_header *ipv4hdr;
struct ip6_header *ipv6hdr;
@@ -180,7 +180,7 @@ typedef struct of_dpa_group {
uint32_t group_id;
MACAddr src_mac;
MACAddr dst_mac;
- __be16 vlan_id;
+ uint16_t vlan_id;
} l2_rewrite;
struct {
uint16_t group_count;
@@ -190,13 +190,13 @@ typedef struct of_dpa_group {
uint32_t group_id;
MACAddr src_mac;
MACAddr dst_mac;
- __be16 vlan_id;
+ uint16_t vlan_id;
uint8_t ttl_check;
} l3_unicast;
};
} OfDpaGroup;
-static int of_dpa_mask2prefix(__be32 mask)
+static int of_dpa_mask2prefix(uint32_t mask)
{
int i;
int count = 32;
@@ -451,7 +451,7 @@ static void of_dpa_flow_pkt_parse(OfDpaFlowContext *fc,
fc->iovcnt = iovcnt + 2;
}
-static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, __be16 vlan_id)
+static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, uint16_t vlan_id)
{
OfDpaFlowPktFields *fields = &fc->fields;
uint16_t h_proto = fields->ethhdr->h_proto;
@@ -486,7 +486,7 @@ static void of_dpa_flow_pkt_strip_vlan(OfDpaFlowContext *fc)
static void of_dpa_flow_pkt_hdr_rewrite(OfDpaFlowContext *fc,
uint8_t *src_mac, uint8_t *dst_mac,
- __be16 vlan_id)
+ uint16_t vlan_id)
{
OfDpaFlowPktFields *fields = &fc->fields;
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index 15b8f75..324fb93 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -57,6 +57,7 @@
#include "system/dma.h"
#include "qemu/module.h"
#include "qemu/timer.h"
+#include "qemu/bswap.h"
#include "net/net.h"
#include "net/eth.h"
#include "system/system.h"
@@ -1816,7 +1817,7 @@ static int rtl8139_transmit_one(RTL8139State *s, int descriptor)
PCIDevice *d = PCI_DEVICE(s);
int txsize = s->TxStatus[descriptor] & 0x1fff;
- uint8_t txbuffer[0x2000];
+ QEMU_UNINITIALIZED uint8_t txbuffer[0x2000];
DPRINTF("+++ transmit reading %d bytes from host memory at 0x%08x\n",
txsize, s->TxAddr[descriptor]);
diff --git a/hw/net/trace-events b/hw/net/trace-events
index 72b69c4..e82d749 100644
--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -20,13 +20,6 @@ lan9118_phy_reset(void) ""
lance_mem_readw(uint64_t addr, uint32_t ret) "addr=0x%"PRIx64"val=0x%04x"
lance_mem_writew(uint64_t addr, uint32_t val) "addr=0x%"PRIx64"val=0x%04x"
-# mipsnet.c
-mipsnet_send(uint32_t size) "sending len=%u"
-mipsnet_receive(uint32_t size) "receiving len=%u"
-mipsnet_read(uint64_t addr, uint32_t val) "read addr=0x%" PRIx64 " val=0x%x"
-mipsnet_write(uint64_t addr, uint64_t val) "write addr=0x%" PRIx64 " val=0x%" PRIx64
-mipsnet_irq(uint32_t isr, uint32_t intctl) "set irq to %d (0x%02x)"
-
# ne2000.c
ne2000_read(uint64_t addr, uint64_t val) "read addr=0x%" PRIx64 " val=0x%" PRIx64
ne2000_write(uint64_t addr, uint64_t val) "write addr=0x%" PRIx64 " val=0x%" PRIx64
diff --git a/hw/net/tulip.c b/hw/net/tulip.c
index 63fe513..319af90 100644
--- a/hw/net/tulip.c
+++ b/hw/net/tulip.c
@@ -629,7 +629,7 @@ static void tulip_setup_filter_addr(TULIPState *s, uint8_t *buf, int n)
static void tulip_setup_frame(TULIPState *s,
struct tulip_descriptor *desc)
{
- uint8_t buf[4096];
+ QEMU_UNINITIALIZED uint8_t buf[4096];
int len = (desc->control >> TDES1_BUF1_SIZE_SHIFT) & TDES1_BUF1_SIZE_MASK;
int i;
diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c
index 72df6d7..0740d5a 100644
--- a/hw/net/vhost_net-stub.c
+++ b/hw/net/vhost_net-stub.c
@@ -13,7 +13,6 @@
#include "qemu/osdep.h"
#include "net/net.h"
#include "net/tap.h"
-#include "net/vhost-user.h"
#include "hw/virtio/virtio-net.h"
#include "net/vhost_net.h"
@@ -47,9 +46,8 @@ void vhost_net_cleanup(struct vhost_net *net)
{
}
-uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
+void vhost_net_get_features_ex(struct vhost_net *net, uint64_t *features)
{
- return features;
}
int vhost_net_get_config(struct vhost_net *net, uint8_t *config,
@@ -63,13 +61,12 @@ int vhost_net_set_config(struct vhost_net *net, const uint8_t *data,
return 0;
}
-void vhost_net_ack_features(struct vhost_net *net, uint64_t features)
+void vhost_net_ack_features_ex(struct vhost_net *net, const uint64_t *features)
{
}
-uint64_t vhost_net_get_acked_features(VHostNetState *net)
+void vhost_net_get_acked_features_ex(VHostNetState *net, uint64_t *features)
{
- return 0;
}
bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
@@ -101,7 +98,7 @@ VHostNetState *get_vhost_net(NetClientState *nc)
return 0;
}
-int vhost_set_vring_enable(NetClientState *nc, int enable)
+int vhost_net_set_vring_enable(NetClientState *nc, int enable)
{
return 0;
}
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 891f235..a8ee18a 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -16,7 +16,6 @@
#include "qemu/osdep.h"
#include "net/net.h"
#include "net/tap.h"
-#include "net/vhost-user.h"
#include "net/vhost-vdpa.h"
#include "standard-headers/linux/vhost_types.h"
@@ -36,95 +35,9 @@
#include "hw/virtio/virtio-bus.h"
#include "linux-headers/linux/vhost.h"
-
-/* Features supported by host kernel. */
-static const int kernel_feature_bits[] = {
- VIRTIO_F_NOTIFY_ON_EMPTY,
- VIRTIO_RING_F_INDIRECT_DESC,
- VIRTIO_RING_F_EVENT_IDX,
- VIRTIO_NET_F_MRG_RXBUF,
- VIRTIO_F_VERSION_1,
- VIRTIO_NET_F_MTU,
- VIRTIO_F_IOMMU_PLATFORM,
- VIRTIO_F_RING_PACKED,
- VIRTIO_F_RING_RESET,
- VIRTIO_F_IN_ORDER,
- VIRTIO_F_NOTIFICATION_DATA,
- VIRTIO_NET_F_RSC_EXT,
- VIRTIO_NET_F_HASH_REPORT,
- VHOST_INVALID_FEATURE_BIT
-};
-
-/* Features supported by others. */
-static const int user_feature_bits[] = {
- VIRTIO_F_NOTIFY_ON_EMPTY,
- VIRTIO_F_NOTIFICATION_DATA,
- VIRTIO_RING_F_INDIRECT_DESC,
- VIRTIO_RING_F_EVENT_IDX,
-
- VIRTIO_F_ANY_LAYOUT,
- VIRTIO_F_VERSION_1,
- VIRTIO_NET_F_CSUM,
- VIRTIO_NET_F_GUEST_CSUM,
- VIRTIO_NET_F_GSO,
- VIRTIO_NET_F_GUEST_TSO4,
- VIRTIO_NET_F_GUEST_TSO6,
- VIRTIO_NET_F_GUEST_ECN,
- VIRTIO_NET_F_GUEST_UFO,
- VIRTIO_NET_F_HOST_TSO4,
- VIRTIO_NET_F_HOST_TSO6,
- VIRTIO_NET_F_HOST_ECN,
- VIRTIO_NET_F_HOST_UFO,
- VIRTIO_NET_F_MRG_RXBUF,
- VIRTIO_NET_F_MTU,
- VIRTIO_F_IOMMU_PLATFORM,
- VIRTIO_F_RING_PACKED,
- VIRTIO_F_RING_RESET,
- VIRTIO_F_IN_ORDER,
- VIRTIO_NET_F_RSS,
- VIRTIO_NET_F_RSC_EXT,
- VIRTIO_NET_F_HASH_REPORT,
- VIRTIO_NET_F_GUEST_USO4,
- VIRTIO_NET_F_GUEST_USO6,
- VIRTIO_NET_F_HOST_USO,
-
- /* This bit implies RARP isn't sent by QEMU out of band */
- VIRTIO_NET_F_GUEST_ANNOUNCE,
-
- VIRTIO_NET_F_MQ,
-
- VHOST_INVALID_FEATURE_BIT
-};
-
-static const int *vhost_net_get_feature_bits(struct vhost_net *net)
-{
- const int *feature_bits = 0;
-
- switch (net->nc->info->type) {
- case NET_CLIENT_DRIVER_TAP:
- feature_bits = kernel_feature_bits;
- break;
- case NET_CLIENT_DRIVER_VHOST_USER:
- feature_bits = user_feature_bits;
- break;
-#ifdef CONFIG_VHOST_NET_VDPA
- case NET_CLIENT_DRIVER_VHOST_VDPA:
- feature_bits = vdpa_feature_bits;
- break;
-#endif
- default:
- error_report("Feature bits not defined for this type: %d",
- net->nc->info->type);
- break;
- }
-
- return feature_bits;
-}
-
-uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
+void vhost_net_get_features_ex(struct vhost_net *net, uint64_t *features)
{
- return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
- features);
+ vhost_get_features_ex(&net->dev, net->feature_bits, features);
}
int vhost_net_get_config(struct vhost_net *net, uint8_t *config,
uint32_t config_len)
@@ -137,10 +50,11 @@ int vhost_net_set_config(struct vhost_net *net, const uint8_t *data,
return vhost_dev_set_config(&net->dev, data, offset, size, flags);
}
-void vhost_net_ack_features(struct vhost_net *net, uint64_t features)
+void vhost_net_ack_features_ex(struct vhost_net *net, const uint64_t *features)
{
- net->dev.acked_features = net->dev.backend_features;
- vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features);
+ virtio_features_copy(net->dev.acked_features_ex,
+ net->dev.backend_features_ex);
+ vhost_ack_features_ex(&net->dev, net->feature_bits, features);
}
uint64_t vhost_net_get_max_queues(VHostNetState *net)
@@ -148,18 +62,18 @@ uint64_t vhost_net_get_max_queues(VHostNetState *net)
return net->dev.max_queues;
}
-uint64_t vhost_net_get_acked_features(VHostNetState *net)
+void vhost_net_get_acked_features_ex(VHostNetState *net, uint64_t *features)
{
- return net->dev.acked_features;
+ virtio_features_copy(features, net->dev.acked_features_ex);
}
void vhost_net_save_acked_features(NetClientState *nc)
{
-#ifdef CONFIG_VHOST_NET_USER
- if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
- vhost_user_save_acked_features(nc);
+ struct vhost_net *net = get_vhost_net(nc);
+
+ if (net && net->save_acked_features) {
+ net->save_acked_features(nc);
}
-#endif
}
static void vhost_net_disable_notifiers_nvhosts(VirtIODevice *dev,
@@ -320,7 +234,8 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
int r;
bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL;
struct vhost_net *net = g_new0(struct vhost_net, 1);
- uint64_t features = 0;
+ uint64_t missing_features[VIRTIO_FEATURES_NU64S];
+ uint64_t features[VIRTIO_FEATURES_NU64S];
Error *local_err = NULL;
if (!options->net_backend) {
@@ -329,6 +244,11 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
}
net->nc = options->net_backend;
net->dev.nvqs = options->nvqs;
+ net->feature_bits = options->feature_bits;
+ net->save_acked_features = options->save_acked_features;
+ net->max_tx_queue_size = options->max_tx_queue_size;
+ net->is_vhost_user = options->is_vhost_user;
+ virtio_features_clear(features);
net->dev.max_queues = 1;
net->dev.vqs = net->vqs;
@@ -343,7 +263,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
net->backend = r;
net->dev.protocol_features = 0;
} else {
- net->dev.backend_features = 0;
+ virtio_features_clear(net->dev.backend_features_ex);
net->dev.protocol_features = 0;
net->backend = -1;
@@ -363,28 +283,29 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF);
}
- if (~net->dev.features & net->dev.backend_features) {
- fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
- " for backend\n",
- (uint64_t)(~net->dev.features & net->dev.backend_features));
+
+ if (virtio_features_andnot(missing_features,
+ net->dev.backend_features_ex,
+ net->dev.features_ex)) {
+ fprintf(stderr, "vhost lacks feature mask 0x" VIRTIO_FEATURES_FMT
+ " for backend\n", VIRTIO_FEATURES_PR(missing_features));
goto fail;
}
}
/* Set sane init value. Override when guest acks. */
-#ifdef CONFIG_VHOST_NET_USER
- if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
- features = vhost_user_get_acked_features(net->nc);
- if (~net->dev.features & features) {
- fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64
- " for backend\n",
- (uint64_t)(~net->dev.features & features));
+ if (options->get_acked_features) {
+ virtio_features_from_u64(features,
+ options->get_acked_features(net->nc));
+ if (virtio_features_andnot(missing_features, features,
+ net->dev.features_ex)) {
+ fprintf(stderr, "vhost lacks feature mask 0x" VIRTIO_FEATURES_FMT
+ " for backend\n", VIRTIO_FEATURES_PR(missing_features));
goto fail;
}
}
-#endif
- vhost_net_ack_features(net, features);
+ vhost_net_ack_features_ex(net, features);
return net;
@@ -525,7 +446,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
* because vhost user doesn't interrupt masking/unmasking
* properly.
*/
- if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
+ if (net->is_vhost_user) {
dev->use_guest_notifier_mask = false;
}
}
@@ -551,7 +472,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
if (peer->vring_enable) {
/* restore vring enable state */
- r = vhost_set_vring_enable(peer, peer->vring_enable);
+ r = vhost_net_set_vring_enable(peer, peer->vring_enable);
if (r < 0) {
goto err_guest_notifiers;
@@ -649,44 +570,21 @@ void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask)
{
vhost_config_mask(&net->dev, dev, mask);
}
+
VHostNetState *get_vhost_net(NetClientState *nc)
{
- VHostNetState *vhost_net = 0;
-
if (!nc) {
return 0;
}
- switch (nc->info->type) {
- case NET_CLIENT_DRIVER_TAP:
- vhost_net = tap_get_vhost_net(nc);
- /*
- * tap_get_vhost_net() can return NULL if a tap net-device backend is
- * created with 'vhost=off' option, 'vhostforce=off' or no vhost or
- * vhostforce or vhostfd options at all. Please see net_init_tap_one().
- * Hence, we omit the assertion here.
- */
- break;
-#ifdef CONFIG_VHOST_NET_USER
- case NET_CLIENT_DRIVER_VHOST_USER:
- vhost_net = vhost_user_get_vhost_net(nc);
- assert(vhost_net);
- break;
-#endif
-#ifdef CONFIG_VHOST_NET_VDPA
- case NET_CLIENT_DRIVER_VHOST_VDPA:
- vhost_net = vhost_vdpa_get_vhost_net(nc);
- assert(vhost_net);
- break;
-#endif
- default:
- break;
+ if (nc->info->get_vhost_net) {
+ return nc->info->get_vhost_net(nc);
}
- return vhost_net;
+ return NULL;
}
-int vhost_set_vring_enable(NetClientState *nc, int enable)
+int vhost_net_set_vring_enable(NetClientState *nc, int enable)
{
VHostNetState *net = get_vhost_net(nc);
const VhostOps *vhost_ops = net->dev.vhost_ops;
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 221252e..3311671 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -90,6 +90,25 @@
VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \
VIRTIO_NET_RSS_HASH_TYPE_UDP_EX)
+/*
+ * Features starting from VIRTIO_NET_FEATURES_MAP_MIN bit correspond
+ * to guest offloads in the VIRTIO_NET_OFFLOAD_MAP range
+ */
+#define VIRTIO_NET_OFFLOAD_MAP_MIN 46
+#define VIRTIO_NET_OFFLOAD_MAP_LENGTH 4
+#define VIRTIO_NET_OFFLOAD_MAP MAKE_64BIT_MASK( \
+ VIRTIO_NET_OFFLOAD_MAP_MIN, \
+ VIRTIO_NET_OFFLOAD_MAP_LENGTH)
+#define VIRTIO_NET_FEATURES_MAP_MIN 65
+#define VIRTIO_NET_F2O_SHIFT (VIRTIO_NET_OFFLOAD_MAP_MIN - \
+ VIRTIO_NET_FEATURES_MAP_MIN + 64)
+
+static bool virtio_has_tunnel_hdr(const uint64_t *features)
+{
+ return virtio_has_feature_ex(features, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO) ||
+ virtio_has_feature_ex(features, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO);
+}
+
static const VirtIOFeature feature_sizes[] = {
{.flags = 1ULL << VIRTIO_NET_F_MAC,
.end = endof(struct virtio_net_config, mac)},
@@ -158,7 +177,7 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ?
VIRTIO_NET_RSS_MAX_TABLE_LEN : 1);
virtio_stl_p(vdev, &netcfg.supported_hash_types,
- VIRTIO_NET_RSS_SUPPORTED_HASHES);
+ n->rss_data.supported_hash_types);
memcpy(config, &netcfg, n->config_size);
/*
@@ -636,8 +655,18 @@ static int peer_has_uso(VirtIONet *n)
return qemu_has_uso(qemu_get_queue(n->nic)->peer);
}
+static bool peer_has_tunnel(VirtIONet *n)
+{
+ if (!peer_has_vnet_hdr(n)) {
+ return false;
+ }
+
+ return qemu_has_tunnel(qemu_get_queue(n->nic)->peer);
+}
+
static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
- int version_1, int hash_report)
+ int version_1, int hash_report,
+ int tunnel)
{
int i;
NetClientState *nc;
@@ -645,9 +674,11 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
n->mergeable_rx_bufs = mergeable_rx_bufs;
if (version_1) {
- n->guest_hdr_len = hash_report ?
- sizeof(struct virtio_net_hdr_v1_hash) :
- sizeof(struct virtio_net_hdr_mrg_rxbuf);
+ n->guest_hdr_len = tunnel ?
+ sizeof(struct virtio_net_hdr_v1_hash_tunnel) :
+ (hash_report ?
+ sizeof(struct virtio_net_hdr_v1_hash) :
+ sizeof(struct virtio_net_hdr_mrg_rxbuf));
n->rss_data.populate_hash = !!hash_report;
} else {
n->guest_hdr_len = n->mergeable_rx_bufs ?
@@ -670,34 +701,36 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
static int virtio_net_max_tx_queue_size(VirtIONet *n)
{
NetClientState *peer = n->nic_conf.peers.ncs[0];
+ struct vhost_net *net;
- /*
- * Backends other than vhost-user or vhost-vdpa don't support max queue
- * size.
- */
if (!peer) {
- return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
+ goto default_value;
}
- switch(peer->info->type) {
- case NET_CLIENT_DRIVER_VHOST_USER:
- case NET_CLIENT_DRIVER_VHOST_VDPA:
- return VIRTQUEUE_MAX_SIZE;
- default:
- return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
- };
+ net = get_vhost_net(peer);
+
+ if (!net || !net->max_tx_queue_size) {
+ goto default_value;
+ }
+
+ return net->max_tx_queue_size;
+
+default_value:
+ return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE;
}
static int peer_attach(VirtIONet *n, int index)
{
NetClientState *nc = qemu_get_subqueue(n->nic, index);
+ struct vhost_net *net;
if (!nc->peer) {
return 0;
}
- if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
- vhost_set_vring_enable(nc->peer, 1);
+ net = get_vhost_net(nc->peer);
+ if (net && net->is_vhost_user) {
+ vhost_net_set_vring_enable(nc->peer, 1);
}
if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
@@ -714,13 +747,15 @@ static int peer_attach(VirtIONet *n, int index)
static int peer_detach(VirtIONet *n, int index)
{
NetClientState *nc = qemu_get_subqueue(n->nic, index);
+ struct vhost_net *net;
if (!nc->peer) {
return 0;
}
- if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
- vhost_set_vring_enable(nc->peer, 0);
+ net = get_vhost_net(nc->peer);
+ if (net && net->is_vhost_user) {
+ vhost_net_set_vring_enable(nc->peer, 0);
}
if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
@@ -752,79 +787,6 @@ static void virtio_net_set_queue_pairs(VirtIONet *n)
static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
-static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
- Error **errp)
-{
- VirtIONet *n = VIRTIO_NET(vdev);
- NetClientState *nc = qemu_get_queue(n->nic);
-
- /* Firstly sync all virtio-net possible supported features */
- features |= n->host_features;
-
- virtio_add_feature(&features, VIRTIO_NET_F_MAC);
-
- if (!peer_has_vnet_hdr(n)) {
- virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
- virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
- virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
- virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
-
- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
-
- virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
-
- virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
- }
-
- if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
- virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
- }
-
- if (!peer_has_uso(n)) {
- virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
- }
-
- if (!get_vhost_net(nc->peer)) {
- return features;
- }
-
- if (!ebpf_rss_is_loaded(&n->ebpf_rss)) {
- virtio_clear_feature(&features, VIRTIO_NET_F_RSS);
- }
- features = vhost_net_get_features(get_vhost_net(nc->peer), features);
- vdev->backend_features = features;
-
- if (n->mtu_bypass_backend &&
- (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
- features |= (1ULL << VIRTIO_NET_F_MTU);
- }
-
- /*
- * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
- * enabled. This happens in the vDPA case.
- *
- * Make sure the feature set is not incoherent, as the driver could refuse
- * to start.
- *
- * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
- * helping guest to notify the new location with vDPA devices that does not
- * support it.
- */
- if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE);
- }
-
- return features;
-}
-
static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
{
uint64_t features = 0;
@@ -842,17 +804,31 @@ static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
static void virtio_net_apply_guest_offloads(VirtIONet *n)
{
- qemu_set_offload(qemu_get_queue(n->nic)->peer,
- !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
- !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
- !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
- !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
- !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
- !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
- !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
+ NetOffloads ol = {
+ .csum = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
+ .tso4 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
+ .tso6 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
+ .ecn = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
+ .ufo = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
+ .uso4 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
+ .uso6 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)),
+ .tnl = !!(n->curr_guest_offloads &
+ (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED)),
+ .tnl_csum = !!(n->curr_guest_offloads &
+ (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED)),
+ };
+
+ qemu_set_offload(qemu_get_queue(n->nic)->peer, &ol);
}
-static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
+static uint64_t virtio_net_features_to_offload(const uint64_t *features)
+{
+ return (features[0] & ~VIRTIO_NET_OFFLOAD_MAP) |
+ ((features[1] << VIRTIO_NET_F2O_SHIFT) & VIRTIO_NET_OFFLOAD_MAP);
+}
+
+static uint64_t
+virtio_net_guest_offloads_by_features(const uint64_t *features)
{
static const uint64_t guest_offloads_mask =
(1ULL << VIRTIO_NET_F_GUEST_CSUM) |
@@ -861,15 +837,17 @@ static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
(1ULL << VIRTIO_NET_F_GUEST_ECN) |
(1ULL << VIRTIO_NET_F_GUEST_UFO) |
(1ULL << VIRTIO_NET_F_GUEST_USO4) |
- (1ULL << VIRTIO_NET_F_GUEST_USO6);
+ (1ULL << VIRTIO_NET_F_GUEST_USO6) |
+ (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED) |
+ (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED);
- return guest_offloads_mask & features;
+ return guest_offloads_mask & virtio_net_features_to_offload(features);
}
uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n)
{
VirtIODevice *vdev = VIRTIO_DEVICE(n);
- return virtio_net_guest_offloads_by_features(vdev->guest_features);
+ return virtio_net_guest_offloads_by_features(vdev->guest_features_ex);
}
typedef struct {
@@ -948,34 +926,40 @@ static void failover_add_primary(VirtIONet *n, Error **errp)
error_propagate(errp, err);
}
-static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
+static void virtio_net_set_features(VirtIODevice *vdev,
+ const uint64_t *in_features)
{
+ uint64_t features[VIRTIO_FEATURES_NU64S];
VirtIONet *n = VIRTIO_NET(vdev);
Error *err = NULL;
int i;
+ virtio_features_copy(features, in_features);
if (n->mtu_bypass_backend &&
!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) {
- features &= ~(1ULL << VIRTIO_NET_F_MTU);
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_MTU);
}
virtio_net_set_multiqueue(n,
- virtio_has_feature(features, VIRTIO_NET_F_RSS) ||
- virtio_has_feature(features, VIRTIO_NET_F_MQ));
+ virtio_has_feature_ex(features,
+ VIRTIO_NET_F_RSS) ||
+ virtio_has_feature_ex(features,
+ VIRTIO_NET_F_MQ));
virtio_net_set_mrg_rx_bufs(n,
- virtio_has_feature(features,
+ virtio_has_feature_ex(features,
VIRTIO_NET_F_MRG_RXBUF),
- virtio_has_feature(features,
+ virtio_has_feature_ex(features,
VIRTIO_F_VERSION_1),
- virtio_has_feature(features,
- VIRTIO_NET_F_HASH_REPORT));
+ virtio_has_feature_ex(features,
+ VIRTIO_NET_F_HASH_REPORT),
+ virtio_has_tunnel_hdr(features));
- n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
- virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4);
- n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) &&
- virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6);
- n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS);
+ n->rsc4_enabled = virtio_has_feature_ex(features, VIRTIO_NET_F_RSC_EXT) &&
+ virtio_has_feature_ex(features, VIRTIO_NET_F_GUEST_TSO4);
+ n->rsc6_enabled = virtio_has_feature_ex(features, VIRTIO_NET_F_RSC_EXT) &&
+ virtio_has_feature_ex(features, VIRTIO_NET_F_GUEST_TSO6);
+ n->rss_data.redirect = virtio_has_feature_ex(features, VIRTIO_NET_F_RSS);
if (n->has_vnet_hdr) {
n->curr_guest_offloads =
@@ -989,7 +973,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
if (!get_vhost_net(nc->peer)) {
continue;
}
- vhost_net_ack_features(get_vhost_net(nc->peer), features);
+ vhost_net_ack_features_ex(get_vhost_net(nc->peer), features);
/*
* keep acked_features in NetVhostUserState up-to-date so it
@@ -998,11 +982,14 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
vhost_net_save_acked_features(nc->peer);
}
- if (!virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
- memset(n->vlans, 0xff, MAX_VLAN >> 3);
+ if (virtio_has_feature_ex(features, VIRTIO_NET_F_CTRL_VLAN) !=
+ virtio_has_feature_ex(vdev->guest_features_ex,
+ VIRTIO_NET_F_CTRL_VLAN)) {
+ bool vlan = virtio_has_feature_ex(features, VIRTIO_NET_F_CTRL_VLAN);
+ memset(n->vlans, vlan ? 0 : 0xff, MAX_VLAN >> 3);
}
- if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
+ if (virtio_has_feature_ex(features, VIRTIO_NET_F_STANDBY)) {
qapi_event_send_failover_negotiated(n->netclient_name);
qatomic_set(&n->failover_primary_hidden, false);
failover_add_primary(n, &err);
@@ -1251,7 +1238,7 @@ static void rss_data_to_rss_config(struct VirtioNetRssData *data,
{
config->redirect = data->redirect;
config->populate_hash = data->populate_hash;
- config->hash_types = data->hash_types;
+ config->hash_types = data->runtime_hash_types;
config->indirections_len = data->indirections_len;
config->default_queue = data->default_queue;
}
@@ -1286,6 +1273,10 @@ static void virtio_net_detach_ebpf_rss(VirtIONet *n)
static void virtio_net_commit_rss_config(VirtIONet *n)
{
+ if (n->rss_data.peer_hash_available) {
+ return;
+ }
+
if (n->rss_data.enabled) {
n->rss_data.enabled_software_rss = n->rss_data.populate_hash;
if (n->rss_data.populate_hash) {
@@ -1300,7 +1291,7 @@ static void virtio_net_commit_rss_config(VirtIONet *n)
}
trace_virtio_net_rss_enable(n,
- n->rss_data.hash_types,
+ n->rss_data.runtime_hash_types,
n->rss_data.indirections_len,
sizeof(n->rss_data.key));
} else {
@@ -1353,6 +1344,8 @@ exit:
static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp)
{
+ Error *err = NULL;
+
if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
return true;
}
@@ -1370,7 +1363,11 @@ static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp)
return virtio_net_load_ebpf_fds(n, errp);
}
- ebpf_rss_load(&n->ebpf_rss, &error_warn);
+ ebpf_rss_load(&n->ebpf_rss, &err);
+ /* Beware, ebpf_rss_load() can return false with @err unset */
+ if (err) {
+ warn_report_err(err);
+ }
return true;
}
@@ -1411,7 +1408,7 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n,
err_value = (uint32_t)s;
goto error;
}
- n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
+ n->rss_data.runtime_hash_types = virtio_ldl_p(vdev, &cfg.hash_types);
n->rss_data.indirections_len =
virtio_lduw_p(vdev, &cfg.indirection_table_mask);
if (!do_rss) {
@@ -1474,12 +1471,12 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n,
err_value = temp.b;
goto error;
}
- if (!temp.b && n->rss_data.hash_types) {
+ if (!temp.b && n->rss_data.runtime_hash_types) {
err_msg = "No key provided";
err_value = 0;
goto error;
}
- if (!temp.b && !n->rss_data.hash_types) {
+ if (!temp.b && !n->rss_data.runtime_hash_types) {
virtio_net_disable_rss(n);
return queue_pairs;
}
@@ -1881,7 +1878,7 @@ static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
- n->rss_data.hash_types);
+ n->rss_data.runtime_hash_types);
if (net_hash_type > NetPktRssIpV6UdpEx) {
if (n->rss_data.populate_hash) {
hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE;
@@ -1911,9 +1908,9 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
VirtIONet *n = qemu_get_nic_opaque(nc);
VirtIONetQueue *q;
VirtIODevice *vdev = VIRTIO_DEVICE(n);
- VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
- size_t lens[VIRTQUEUE_MAX_SIZE];
- struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
+ QEMU_UNINITIALIZED VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
+ QEMU_UNINITIALIZED size_t lens[VIRTQUEUE_MAX_SIZE];
+ QEMU_UNINITIALIZED struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
struct virtio_net_hdr_v1_hash extra_hdr;
unsigned mhdr_cnt = 0;
size_t offset, i, guest_offset, j;
@@ -1963,10 +1960,10 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
virtio_error(vdev, "virtio-net unexpected empty queue: "
"i %zd mergeable %d offset %zd, size %zd, "
"guest hdr len %zd, host hdr len %zd "
- "guest features 0x%" PRIx64,
+ "guest features 0x" VIRTIO_FEATURES_FMT,
i, n->mergeable_rx_bufs, offset, size,
n->guest_hdr_len, n->host_hdr_len,
- vdev->guest_features);
+ VIRTIO_FEATURES_PR(vdev->guest_features_ex));
}
err = -1;
goto err;
@@ -3022,11 +3019,10 @@ static void virtio_net_del_queue(VirtIONet *n, int index)
virtio_del_queue(vdev, index * 2 + 1);
}
-static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs)
+static void virtio_net_change_num_queues(VirtIONet *n, int new_num_queues)
{
VirtIODevice *vdev = VIRTIO_DEVICE(n);
int old_num_queues = virtio_get_num_queues(vdev);
- int new_num_queues = new_max_queue_pairs * 2 + 1;
int i;
assert(old_num_queues >= 3);
@@ -3062,32 +3058,142 @@ static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
int max = multiqueue ? n->max_queue_pairs : 1;
n->multiqueue = multiqueue;
- virtio_net_change_num_queue_pairs(n, max);
+ virtio_net_change_num_queues(n, max * 2 + 1);
virtio_net_set_queue_pairs(n);
}
-static int virtio_net_pre_load_queues(VirtIODevice *vdev)
+static int virtio_net_pre_load_queues(VirtIODevice *vdev, uint32_t n)
{
- virtio_net_set_multiqueue(VIRTIO_NET(vdev),
- virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_RSS) ||
- virtio_has_feature(vdev->guest_features, VIRTIO_NET_F_MQ));
+ virtio_net_change_num_queues(VIRTIO_NET(vdev), n);
return 0;
}
+static void virtio_net_get_features(VirtIODevice *vdev, uint64_t *features,
+ Error **errp)
+{
+ VirtIONet *n = VIRTIO_NET(vdev);
+ NetClientState *nc = qemu_get_queue(n->nic);
+ uint32_t supported_hash_types = n->rss_data.supported_hash_types;
+ uint32_t peer_hash_types = n->rss_data.peer_hash_types;
+ bool use_own_hash =
+ (supported_hash_types & VIRTIO_NET_RSS_SUPPORTED_HASHES) ==
+ supported_hash_types;
+ bool use_peer_hash =
+ n->rss_data.peer_hash_available &&
+ (supported_hash_types & peer_hash_types) == supported_hash_types;
+
+ /* Firstly sync all virtio-net possible supported features */
+ virtio_features_or(features, features, n->host_features_ex);
+
+ virtio_add_feature_ex(features, VIRTIO_NET_F_MAC);
+
+ if (!peer_has_vnet_hdr(n)) {
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_CSUM);
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_HOST_TSO4);
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_HOST_TSO6);
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_HOST_ECN);
+
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_CSUM);
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_TSO4);
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_TSO6);
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_ECN);
+
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_HOST_USO);
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_USO4);
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_USO6);
+
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO);
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO);
+ virtio_clear_feature_ex(features,
+ VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM);
+ virtio_clear_feature_ex(features,
+ VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM);
+
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_HASH_REPORT);
+ }
+
+ if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_UFO);
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_HOST_UFO);
+ }
+ if (!peer_has_uso(n)) {
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_HOST_USO);
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_USO4);
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_USO6);
+ }
+
+ if (!peer_has_tunnel(n)) {
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO);
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO);
+ virtio_clear_feature_ex(features,
+ VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM);
+ virtio_clear_feature_ex(features,
+ VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM);
+ }
+
+ if (!get_vhost_net(nc->peer)) {
+ if (!use_own_hash) {
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_HASH_REPORT);
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_RSS);
+ } else if (virtio_has_feature_ex(features, VIRTIO_NET_F_RSS)) {
+ virtio_net_load_ebpf(n, errp);
+ }
+
+ return;
+ }
+
+ if (!use_peer_hash) {
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_HASH_REPORT);
+
+ if (!use_own_hash || !virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
+ if (!virtio_net_load_ebpf(n, errp)) {
+ return;
+ }
+
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_RSS);
+ }
+ }
+
+ vhost_net_get_features_ex(get_vhost_net(nc->peer), features);
+ virtio_features_copy(vdev->backend_features_ex, features);
+
+ if (n->mtu_bypass_backend &&
+ (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) {
+ virtio_add_feature_ex(features, VIRTIO_NET_F_MTU);
+ }
+
+ /*
+ * Since GUEST_ANNOUNCE is emulated the feature bit could be set without
+ * enabled. This happens in the vDPA case.
+ *
+ * Make sure the feature set is not incoherent, as the driver could refuse
+ * to start.
+ *
+ * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes,
+ * helping guest to notify the new location with vDPA devices that does not
+ * support it.
+ */
+ if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) {
+ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_ANNOUNCE);
+ }
+}
+
static int virtio_net_post_load_device(void *opaque, int version_id)
{
VirtIONet *n = opaque;
VirtIODevice *vdev = VIRTIO_DEVICE(n);
int i, link_down;
+ bool has_tunnel_hdr = virtio_has_tunnel_hdr(vdev->guest_features_ex);
trace_virtio_net_post_load_device();
virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs,
virtio_vdev_has_feature(vdev,
VIRTIO_F_VERSION_1),
virtio_vdev_has_feature(vdev,
- VIRTIO_NET_F_HASH_REPORT));
+ VIRTIO_NET_F_HASH_REPORT),
+ has_tunnel_hdr);
/* MAC_TABLE_ENTRIES may be different from the saved image */
if (n->mac_table.in_use > MAC_TABLE_ENTRIES) {
@@ -3314,6 +3420,17 @@ static const VMStateDescription vmstate_virtio_net_has_vnet = {
},
};
+static int virtio_net_rss_post_load(void *opaque, int version_id)
+{
+ VirtIONet *n = VIRTIO_NET(opaque);
+
+ if (version_id == 1) {
+ n->rss_data.supported_hash_types = VIRTIO_NET_RSS_SUPPORTED_HASHES;
+ }
+
+ return 0;
+}
+
static bool virtio_net_rss_needed(void *opaque)
{
return VIRTIO_NET(opaque)->rss_data.enabled;
@@ -3321,14 +3438,16 @@ static bool virtio_net_rss_needed(void *opaque)
static const VMStateDescription vmstate_virtio_net_rss = {
.name = "virtio-net-device/rss",
- .version_id = 1,
+ .version_id = 2,
.minimum_version_id = 1,
+ .post_load = virtio_net_rss_post_load,
.needed = virtio_net_rss_needed,
.fields = (const VMStateField[]) {
VMSTATE_BOOL(rss_data.enabled, VirtIONet),
VMSTATE_BOOL(rss_data.redirect, VirtIONet),
VMSTATE_BOOL(rss_data.populate_hash, VirtIONet),
- VMSTATE_UINT32(rss_data.hash_types, VirtIONet),
+ VMSTATE_UINT32(rss_data.runtime_hash_types, VirtIONet),
+ VMSTATE_UINT32_V(rss_data.supported_hash_types, VirtIONet, 2),
VMSTATE_UINT16(rss_data.indirections_len, VirtIONet),
VMSTATE_UINT16(rss_data.default_queue, VirtIONet),
VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet,
@@ -3894,12 +4013,13 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
n->vqs[0].tx_waiting = 0;
n->tx_burst = n->net_conf.txburst;
- virtio_net_set_mrg_rx_bufs(n, 0, 0, 0);
+ virtio_net_set_mrg_rx_bufs(n, 0, 0, 0, 0);
n->promisc = 1; /* for compatibility */
n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
n->vlans = g_malloc0(MAX_VLAN >> 3);
+ memset(n->vlans, 0xff, MAX_VLAN >> 3);
nc = qemu_get_queue(n->nic);
nc->rxfilter_notify_enabled = 1;
@@ -3915,8 +4035,17 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
net_rx_pkt_init(&n->rx_pkt);
- if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
- virtio_net_load_ebpf(n, errp);
+ if (qemu_get_vnet_hash_supported_types(qemu_get_queue(n->nic)->peer,
+ &n->rss_data.peer_hash_types)) {
+ n->rss_data.peer_hash_available = true;
+ n->rss_data.supported_hash_types =
+ n->rss_data.specified_hash_types.on_bits |
+ (n->rss_data.specified_hash_types.auto_bits &
+ n->rss_data.peer_hash_types);
+ } else {
+ n->rss_data.supported_hash_types =
+ n->rss_data.specified_hash_types.on_bits |
+ n->rss_data.specified_hash_types.auto_bits;
}
}
@@ -3990,7 +4119,6 @@ static void virtio_net_reset(VirtIODevice *vdev)
memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
- memset(n->vlans, 0, MAX_VLAN >> 3);
/* Flush any async TX */
for (i = 0; i < n->max_queue_pairs; i++) {
@@ -4133,6 +4261,58 @@ static const Property virtio_net_properties[] = {
VIRTIO_NET_F_GUEST_USO6, true),
DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
VIRTIO_NET_F_HOST_USO, true),
+ DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv4", VirtIONet,
+ rss_data.specified_hash_types,
+ VIRTIO_NET_HASH_REPORT_IPv4 - 1,
+ ON_OFF_AUTO_AUTO),
+ DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp4", VirtIONet,
+ rss_data.specified_hash_types,
+ VIRTIO_NET_HASH_REPORT_TCPv4 - 1,
+ ON_OFF_AUTO_AUTO),
+ DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp4", VirtIONet,
+ rss_data.specified_hash_types,
+ VIRTIO_NET_HASH_REPORT_UDPv4 - 1,
+ ON_OFF_AUTO_AUTO),
+ DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv6", VirtIONet,
+ rss_data.specified_hash_types,
+ VIRTIO_NET_HASH_REPORT_IPv6 - 1,
+ ON_OFF_AUTO_AUTO),
+ DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp6", VirtIONet,
+ rss_data.specified_hash_types,
+ VIRTIO_NET_HASH_REPORT_TCPv6 - 1,
+ ON_OFF_AUTO_AUTO),
+ DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp6", VirtIONet,
+ rss_data.specified_hash_types,
+ VIRTIO_NET_HASH_REPORT_UDPv6 - 1,
+ ON_OFF_AUTO_AUTO),
+ DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv6ex", VirtIONet,
+ rss_data.specified_hash_types,
+ VIRTIO_NET_HASH_REPORT_IPv6_EX - 1,
+ ON_OFF_AUTO_AUTO),
+ DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp6ex", VirtIONet,
+ rss_data.specified_hash_types,
+ VIRTIO_NET_HASH_REPORT_TCPv6_EX - 1,
+ ON_OFF_AUTO_AUTO),
+ DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp6ex", VirtIONet,
+ rss_data.specified_hash_types,
+ VIRTIO_NET_HASH_REPORT_UDPv6_EX - 1,
+ ON_OFF_AUTO_AUTO),
+ VIRTIO_DEFINE_PROP_FEATURE("host_tunnel", VirtIONet,
+ host_features_ex,
+ VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO,
+ false),
+ VIRTIO_DEFINE_PROP_FEATURE("host_tunnel_csum", VirtIONet,
+ host_features_ex,
+ VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM,
+ false),
+ VIRTIO_DEFINE_PROP_FEATURE("guest_tunnel", VirtIONet,
+ host_features_ex,
+ VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO,
+ false),
+ VIRTIO_DEFINE_PROP_FEATURE("guest_tunnel_csum", VirtIONet,
+ host_features_ex,
+ VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM,
+ false),
};
static void virtio_net_class_init(ObjectClass *klass, const void *data)
@@ -4147,8 +4327,8 @@ static void virtio_net_class_init(ObjectClass *klass, const void *data)
vdc->unrealize = virtio_net_device_unrealize;
vdc->get_config = virtio_net_get_config;
vdc->set_config = virtio_net_set_config;
- vdc->get_features = virtio_net_get_features;
- vdc->set_features = virtio_net_set_features;
+ vdc->get_features_ex = virtio_net_get_features;
+ vdc->set_features_ex = virtio_net_set_features;
vdc->bad_features = virtio_net_bad_features;
vdc->reset = virtio_net_reset;
vdc->queue_reset = virtio_net_queue_reset;
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 83d942a..0373237 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -22,7 +22,6 @@
#include "net/tap.h"
#include "net/checksum.h"
#include "system/system.h"
-#include "qemu/bswap.h"
#include "qemu/log.h"
#include "qemu/module.h"
#include "hw/pci/msix.h"
@@ -41,19 +40,9 @@
#define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1
#define VMXNET3_MSIX_BAR_SIZE 0x2000
-/* Compatibility flags for migration */
-#define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0
-#define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS \
- (1 << VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT)
-#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT 1
-#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE \
- (1 << VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT)
-
#define VMXNET3_EXP_EP_OFFSET (0x48)
-#define VMXNET3_MSI_OFFSET(s) \
- ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x50 : 0x84)
-#define VMXNET3_MSIX_OFFSET(s) \
- ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0 : 0x9c)
+#define VMXNET3_MSI_OFFSET (0x84)
+#define VMXNET3_MSIX_OFFSET (0x9c)
#define VMXNET3_DSN_OFFSET (0x100)
#define VMXNET3_BAR0_IDX (0)
@@ -61,8 +50,7 @@
#define VMXNET3_MSIX_BAR_IDX (2)
#define VMXNET3_OFF_MSIX_TABLE (0x000)
-#define VMXNET3_OFF_MSIX_PBA(s) \
- ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x800 : 0x1000)
+#define VMXNET3_OFF_MSIX_PBA (0x1000)
/* Link speed in Mbps should be shifted by 16 */
#define VMXNET3_LINK_SPEED (1000 << 16)
@@ -1334,14 +1322,11 @@ static void vmxnet3_update_features(VMXNET3State *s)
s->lro_supported, rxcso_supported,
s->rx_vlan_stripping);
if (s->peer_has_vhdr) {
- qemu_set_offload(qemu_get_queue(s->nic)->peer,
- rxcso_supported,
- s->lro_supported,
- s->lro_supported,
- 0,
- 0,
- 0,
- 0);
+ NetOffloads ol = { .csum = rxcso_supported,
+ .tso4 = s->lro_supported,
+ .tso6 = s->lro_supported };
+
+ qemu_set_offload(qemu_get_queue(s->nic)->peer, &ol);
}
}
@@ -2122,8 +2107,8 @@ vmxnet3_init_msix(VMXNET3State *s)
&s->msix_bar,
VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_TABLE,
&s->msix_bar,
- VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA(s),
- VMXNET3_MSIX_OFFSET(s), NULL);
+ VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA,
+ VMXNET3_MSIX_OFFSET, NULL);
if (0 > res) {
VMW_WRPRN("Failed to initialize MSI-X, error %d", res);
@@ -2221,7 +2206,7 @@ static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
/* Interrupt pin A */
pci_dev->config[PCI_INTERRUPT_PIN] = 0x01;
- ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS,
+ ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET, VMXNET3_MAX_NMSIX_INTRS,
VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK, NULL);
/* Any error other than -ENOTSUP(board's MSI support is broken)
* is a programming error. Fall back to INTx silently on -ENOTSUP */
@@ -2249,6 +2234,7 @@ static void vmxnet3_instance_init(Object *obj)
device_add_bootindex_property(obj, &s->conf.bootindex,
"bootindex", "/ethernet-phy@0",
DEVICE(obj));
+ PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS;
}
static void vmxnet3_pci_uninit(PCIDevice *pci_dev)
@@ -2472,30 +2458,12 @@ static const VMStateDescription vmstate_vmxnet3 = {
static const Property vmxnet3_properties[] = {
DEFINE_NIC_PROPERTIES(VMXNET3State, conf),
- DEFINE_PROP_BIT("x-old-msi-offsets", VMXNET3State, compat_flags,
- VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT, false),
- DEFINE_PROP_BIT("x-disable-pcie", VMXNET3State, compat_flags,
- VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT, false),
};
-static void vmxnet3_realize(DeviceState *qdev, Error **errp)
-{
- VMXNET3Class *vc = VMXNET3_DEVICE_GET_CLASS(qdev);
- PCIDevice *pci_dev = PCI_DEVICE(qdev);
- VMXNET3State *s = VMXNET3(qdev);
-
- if (!(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE)) {
- pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
- }
-
- vc->parent_dc_realize(qdev, errp);
-}
-
static void vmxnet3_class_init(ObjectClass *class, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(class);
PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
- VMXNET3Class *vc = VMXNET3_DEVICE_CLASS(class);
c->realize = vmxnet3_pci_realize;
c->exit = vmxnet3_pci_uninit;
@@ -2506,8 +2474,6 @@ static void vmxnet3_class_init(ObjectClass *class, const void *data)
c->class_id = PCI_CLASS_NETWORK_ETHERNET;
c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE;
c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
- device_class_set_parent_realize(dc, vmxnet3_realize,
- &vc->parent_dc_realize);
dc->desc = "VMWare Paravirtualized Ethernet v3";
device_class_set_legacy_reset(dc, vmxnet3_qdev_reset);
dc->vmsd = &vmstate_vmxnet3;
diff --git a/hw/net/vmxnet3.h b/hw/net/vmxnet3.h
index f9283f9..dbc69d5 100644
--- a/hw/net/vmxnet3.h
+++ b/hw/net/vmxnet3.h
@@ -63,8 +63,8 @@
* details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ * along with this program; if not, see
+ * <https://www.gnu.org/licenses/>.
*
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
diff --git a/hw/net/xgmac.c b/hw/net/xgmac.c
index 9c87c4e..d45f872 100644
--- a/hw/net/xgmac.c
+++ b/hw/net/xgmac.c
@@ -207,7 +207,7 @@ static void xgmac_enet_send(XgmacState *s)
struct desc bd;
int frame_size;
int len;
- uint8_t frame[8192];
+ QEMU_UNINITIALIZED uint8_t frame[8192];
uint8_t *ptr;
ptr = frame;
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index fd93550..cd81f73 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -22,7 +22,7 @@
*
* Usage
* -----
- * See docs/system/nvme.rst for extensive documentation.
+ * See docs/system/devices/nvme.rst for extensive documentation.
*
* Add options:
* -drive file=<file>,if=none,id=<drive_id>
@@ -1057,7 +1057,8 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl,
*/
#define SEG_CHUNK_SIZE 256
- NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld;
+ QEMU_UNINITIALIZED NvmeSglDescriptor segment[SEG_CHUNK_SIZE];
+ NvmeSglDescriptor *sgld, *last_sgld;
uint64_t nsgld;
uint32_t seg_len;
uint16_t status;
@@ -5128,7 +5129,7 @@ static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
uint64_t off, NvmeRequest *req)
{
- uint32_t nslist[1024];
+ uint32_t nslist[1024] = {};
uint32_t trans_len;
int i = 0;
uint32_t nsid;
@@ -5138,7 +5139,6 @@ static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
return NVME_INVALID_FIELD | NVME_DNR;
}
- memset(nslist, 0x0, sizeof(nslist));
trans_len = MIN(sizeof(nslist) - off, buf_len);
while ((nsid = find_first_bit(n->changed_nsids, NVME_CHANGED_NSID_SIZE)) !=
@@ -6816,7 +6816,7 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
switch (sel) {
case NVME_NS_ATTACHMENT_ATTACH:
- if (nvme_ns(n, nsid)) {
+ if (nvme_ns(ctrl, nsid)) {
return NVME_NS_ALREADY_ATTACHED | NVME_DNR;
}
@@ -6824,7 +6824,7 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
return NVME_NS_PRIVATE | NVME_DNR;
}
- if (!nvme_csi_supported(n, ns->csi)) {
+ if (!nvme_csi_supported(ctrl, ns->csi)) {
return NVME_IOCS_NOT_SUPPORTED | NVME_DNR;
}
@@ -6834,6 +6834,10 @@ static uint16_t nvme_ns_attachment(NvmeCtrl *n, NvmeRequest *req)
break;
case NVME_NS_ATTACHMENT_DETACH:
+ if (!nvme_ns(ctrl, nsid)) {
+ return NVME_NS_NOT_ATTACHED | NVME_DNR;
+ }
+
nvme_detach_ns(ctrl, ns);
nvme_update_dsm_limits(ctrl, NULL);
@@ -8335,6 +8339,11 @@ static bool nvme_check_params(NvmeCtrl *n, Error **errp)
host_memory_backend_set_mapped(n->pmr.dev, true);
}
+ if (!n->params.mdts || ((1 << n->params.mdts) + 1) > IOV_MAX) {
+ error_setg(errp, "mdts exceeds IOV_MAX");
+ return false;
+ }
+
if (n->params.zasl > n->params.mdts) {
error_setg(errp, "zoned.zasl (Zone Append Size Limit) must be less "
"than or equal to mdts (Maximum Data Transfer Size)");
@@ -8699,12 +8708,8 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
msix_table_offset);
memory_region_add_subregion(&n->bar0, 0, &n->iomem);
- if (pci_is_vf(pci_dev)) {
- pcie_sriov_vf_register_bar(pci_dev, 0, &n->bar0);
- } else {
- pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
- PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0);
- }
+ pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_64, &n->bar0);
ret = msix_init(pci_dev, nr_vectors,
&n->bar0, 0, msix_table_offset,
@@ -8776,7 +8781,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
uint8_t *pci_conf = pci_dev->config;
uint64_t cap = ldq_le_p(&n->bar.cap);
NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
- uint32_t ctratt;
+ uint32_t ctratt = le32_to_cpu(id->ctratt);
uint16_t oacs;
memcpy(n->cse.acs, nvme_cse_acs_default, sizeof(n->cse.acs));
@@ -8794,10 +8799,11 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
id->oaes = cpu_to_le32(NVME_OAES_NS_ATTR);
- ctratt = NVME_CTRATT_ELBAS;
+ ctratt |= NVME_CTRATT_ELBAS;
if (n->params.ctratt.mem) {
ctratt |= NVME_CTRATT_MEM;
}
+ id->ctratt = cpu_to_le32(ctratt);
id->rab = 6;
@@ -8880,17 +8886,6 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
id->psd[0].enlat = cpu_to_le32(0x10);
id->psd[0].exlat = cpu_to_le32(0x4);
- id->cmic |= NVME_CMIC_MULTI_CTRL;
- ctratt |= NVME_CTRATT_ENDGRPS;
-
- id->endgidmax = cpu_to_le16(0x1);
-
- if (n->subsys->endgrp.fdp.enabled) {
- ctratt |= NVME_CTRATT_FDPS;
- }
-
- id->ctratt = cpu_to_le32(ctratt);
-
NVME_CAP_SET_MQES(cap, n->params.mqes);
NVME_CAP_SET_CQR(cap, 1);
NVME_CAP_SET_TO(cap, 0xf);
@@ -8923,6 +8918,20 @@ static int nvme_init_subsys(NvmeCtrl *n, Error **errp)
}
n->subsys = NVME_SUBSYS(dev);
+ } else {
+ NvmeIdCtrl *id = &n->id_ctrl;
+ uint32_t ctratt = le32_to_cpu(id->ctratt);
+
+ id->cmic |= NVME_CMIC_MULTI_CTRL;
+ ctratt |= NVME_CTRATT_ENDGRPS;
+
+ id->endgidmax = cpu_to_le16(0x1);
+
+ if (n->subsys->endgrp.fdp.enabled) {
+ ctratt |= NVME_CTRATT_FDPS;
+ }
+
+ id->ctratt = cpu_to_le32(ctratt);
}
cntlid = nvme_subsys_register_ctrl(n, errp);
diff --git a/hw/nvram/aspeed_otp.c b/hw/nvram/aspeed_otp.c
new file mode 100644
index 0000000..dcf8ed3
--- /dev/null
+++ b/hw/nvram/aspeed_otp.c
@@ -0,0 +1,190 @@
+/*
+ * ASPEED OTP (One-Time Programmable) memory
+ *
+ * Copyright (C) 2025 Aspeed
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "system/block-backend.h"
+#include "hw/qdev-properties.h"
+#include "hw/nvram/aspeed_otp.h"
+#include "hw/nvram/trace.h"
+
+static uint64_t aspeed_otp_read(void *opaque, hwaddr offset, unsigned size)
+{
+ AspeedOTPState *s = opaque;
+ uint64_t val = 0;
+
+ memcpy(&val, s->storage + offset, size);
+
+ return val;
+}
+
+static bool valid_program_data(uint32_t otp_addr,
+ uint32_t value, uint32_t prog_bit)
+{
+ uint32_t programmed_bits, has_programmable_bits;
+ bool is_odd = otp_addr & 1;
+
+ /*
+ * prog_bit uses 0s to indicate target bits to program:
+ * - if OTP word is even-indexed, programmed bits flip 0->1
+ * - if odd, bits flip 1->0
+ * Bit programming is one-way only and irreversible.
+ */
+ if (is_odd) {
+ programmed_bits = ~value & prog_bit;
+ } else {
+ programmed_bits = value & (~prog_bit);
+ }
+
+ /* If any bit can be programmed, accept the request */
+ has_programmable_bits = value ^ (~prog_bit);
+
+ if (programmed_bits) {
+ trace_aspeed_otp_prog_conflict(otp_addr, programmed_bits);
+ for (int i = 0; i < 32; ++i) {
+ if (programmed_bits & (1U << i)) {
+ trace_aspeed_otp_prog_bit(i);
+ }
+ }
+ }
+
+ return has_programmable_bits != 0;
+}
+
+static bool program_otpmem_data(void *opaque, uint32_t otp_addr,
+ uint32_t prog_bit, uint32_t *value)
+{
+ AspeedOTPState *s = opaque;
+ bool is_odd = otp_addr & 1;
+ uint32_t otp_offset = otp_addr << 2;
+
+ memcpy(value, s->storage + otp_offset, sizeof(uint32_t));
+
+ if (!valid_program_data(otp_addr, *value, prog_bit)) {
+ return false;
+ }
+
+ if (is_odd) {
+ *value &= ~prog_bit;
+ } else {
+ *value |= ~prog_bit;
+ }
+
+ return true;
+}
+
+static void aspeed_otp_write(void *opaque, hwaddr otp_addr,
+ uint64_t val, unsigned size)
+{
+ AspeedOTPState *s = opaque;
+ uint32_t otp_offset, value;
+
+ if (!program_otpmem_data(s, otp_addr, val, &value)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Failed to program data, value = %x, bit = %"PRIx64"\n",
+ __func__, value, val);
+ return;
+ }
+
+ otp_offset = otp_addr << 2;
+ memcpy(s->storage + otp_offset, &value, size);
+
+ if (s->blk) {
+ if (blk_pwrite(s->blk, otp_offset, size, &value, 0) < 0) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Failed to write %x to %x\n",
+ __func__, value, otp_offset);
+
+ return;
+ }
+ }
+ trace_aspeed_otp_prog(otp_offset, val, value);
+}
+
+static bool aspeed_otp_init_storage(AspeedOTPState *s, Error **errp)
+{
+ uint32_t *p;
+ int i, num;
+ uint64_t perm;
+
+ if (s->blk) {
+ perm = BLK_PERM_CONSISTENT_READ |
+ (blk_supports_write_perm(s->blk) ? BLK_PERM_WRITE : 0);
+ if (blk_set_perm(s->blk, perm, BLK_PERM_ALL, errp) < 0) {
+ return false;
+ }
+ if (blk_pread(s->blk, 0, s->size, s->storage, 0) < 0) {
+ error_setg(errp, "Failed to read the initial flash content");
+ return false;
+ }
+ } else {
+ num = s->size / sizeof(uint32_t);
+ p = (uint32_t *)s->storage;
+ for (i = 0; i < num; i++) {
+ p[i] = (i % 2 == 0) ? 0x00000000 : 0xFFFFFFFF;
+ }
+ }
+ return true;
+}
+
+static const MemoryRegionOps aspeed_otp_ops = {
+ .read = aspeed_otp_read,
+ .write = aspeed_otp_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 4,
+ .valid.unaligned = true,
+ .impl.unaligned = true
+};
+
+static void aspeed_otp_realize(DeviceState *dev, Error **errp)
+{
+ AspeedOTPState *s = ASPEED_OTP(dev);
+
+ if (s->size == 0) {
+ error_setg(errp, "aspeed.otp: 'size' property must be set");
+ return;
+ }
+
+ s->storage = blk_blockalign(s->blk, s->size);
+
+ if (!aspeed_otp_init_storage(s, errp)) {
+ return;
+ }
+
+ memory_region_init_io(&s->mmio, OBJECT(dev), &aspeed_otp_ops,
+ s, "aspeed.otp", s->size);
+ address_space_init(&s->as, &s->mmio, NULL);
+}
+
+static const Property aspeed_otp_properties[] = {
+ DEFINE_PROP_UINT64("size", AspeedOTPState, size, 0),
+ DEFINE_PROP_DRIVE("drive", AspeedOTPState, blk),
+};
+
+static void aspeed_otp_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ dc->realize = aspeed_otp_realize;
+ device_class_set_props(dc, aspeed_otp_properties);
+}
+
+static const TypeInfo aspeed_otp_info = {
+ .name = TYPE_ASPEED_OTP,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(AspeedOTPState),
+ .class_init = aspeed_otp_class_init,
+};
+
+static void aspeed_otp_register_types(void)
+{
+ type_register_static(&aspeed_otp_info);
+}
+
+type_init(aspeed_otp_register_types)
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index 237b9f7..aa24050 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -817,62 +817,6 @@ void fw_cfg_modify_i64(FWCfgState *s, uint16_t key, uint64_t value)
g_free(old);
}
-void fw_cfg_set_order_override(FWCfgState *s, int order)
-{
- assert(s->fw_cfg_order_override == 0);
- s->fw_cfg_order_override = order;
-}
-
-void fw_cfg_reset_order_override(FWCfgState *s)
-{
- assert(s->fw_cfg_order_override != 0);
- s->fw_cfg_order_override = 0;
-}
-
-/*
- * This is the legacy order list. For legacy systems, files are in
- * the fw_cfg in the order defined below, by the "order" value. Note
- * that some entries (VGA ROMs, NIC option ROMS, etc.) go into a
- * specific area, but there may be more than one and they occur in the
- * order that the user specifies them on the command line. Those are
- * handled in a special manner, using the order override above.
- *
- * For non-legacy, the files are sorted by filename to avoid this kind
- * of complexity in the future.
- *
- * This is only for x86, other arches don't implement versioning so
- * they won't set legacy mode.
- */
-static struct {
- const char *name;
- int order;
-} fw_cfg_order[] = {
- { "etc/boot-menu-wait", 10 },
- { "bootsplash.jpg", 11 },
- { "bootsplash.bmp", 12 },
- { "etc/boot-fail-wait", 15 },
- { "etc/smbios/smbios-tables", 20 },
- { "etc/smbios/smbios-anchor", 30 },
- { "etc/e820", 40 },
- { "etc/reserved-memory-end", 50 },
- { "genroms/kvmvapic.bin", 55 },
- { "genroms/linuxboot.bin", 60 },
- { }, /* VGA ROMs from pc_vga_init come here, 70. */
- { }, /* NIC option ROMs from pc_nic_init come here, 80. */
- { "etc/system-states", 90 },
- { }, /* User ROMs come here, 100. */
- { }, /* Device FW comes here, 110. */
- { "etc/extra-pci-roots", 120 },
- { "etc/acpi/tables", 130 },
- { "etc/table-loader", 140 },
- { "etc/tpm/log", 150 },
- { "etc/acpi/rsdp", 160 },
- { "bootorder", 170 },
- { "etc/msr_feature_control", 180 },
-
-#define FW_CFG_ORDER_OVERRIDE_LAST 200
-};
-
/*
* Any sub-page size update to these table MRs will be lost during migration,
* as we use aligned size in ram_load_precopy() -> qemu_ram_resize() path.
@@ -890,29 +834,6 @@ static void fw_cfg_acpi_mr_save(FWCfgState *s, const char *filename, size_t len)
}
}
-static int get_fw_cfg_order(FWCfgState *s, const char *name)
-{
- int i;
-
- if (s->fw_cfg_order_override > 0) {
- return s->fw_cfg_order_override;
- }
-
- for (i = 0; i < ARRAY_SIZE(fw_cfg_order); i++) {
- if (fw_cfg_order[i].name == NULL) {
- continue;
- }
-
- if (strcmp(name, fw_cfg_order[i].name) == 0) {
- return fw_cfg_order[i].order;
- }
- }
-
- /* Stick unknown stuff at the end. */
- warn_report("Unknown firmware file in legacy mode: %s", name);
- return FW_CFG_ORDER_OVERRIDE_LAST;
-}
-
void fw_cfg_add_file_callback(FWCfgState *s, const char *filename,
FWCfgCallback select_cb,
FWCfgWriteCallback write_cb,
@@ -921,7 +842,6 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename,
{
int i, index, count;
size_t dsize;
- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
int order = 0;
if (!s->files) {
@@ -933,22 +853,11 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename,
count = be32_to_cpu(s->files->count);
assert(count < fw_cfg_file_slots(s));
- /* Find the insertion point. */
- if (mc->legacy_fw_cfg_order) {
- /*
- * Sort by order. For files with the same order, we keep them
- * in the sequence in which they were added.
- */
- order = get_fw_cfg_order(s, filename);
- for (index = count;
- index > 0 && order < s->entry_order[index - 1];
- index--);
- } else {
- /* Sort by file name. */
- for (index = count;
- index > 0 && strcmp(filename, s->files->f[index - 1].name) < 0;
- index--);
- }
+ /* Find the insertion point, sorting by file name. */
+ for (index = count;
+ index > 0 && strcmp(filename, s->files->f[index - 1].name) < 0;
+ index--)
+ ;
/*
* Move all the entries from the index point and after down one
@@ -1058,7 +967,6 @@ bool fw_cfg_add_file_from_generator(FWCfgState *s,
static void fw_cfg_machine_reset(void *opaque)
{
- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
FWCfgState *s = opaque;
void *ptr;
size_t len;
@@ -1068,11 +976,9 @@ static void fw_cfg_machine_reset(void *opaque)
ptr = fw_cfg_modify_file(s, "bootorder", (uint8_t *)buf, len);
g_free(ptr);
- if (!mc->legacy_fw_cfg_order) {
- buf = get_boot_devices_lchs_list(&len);
- ptr = fw_cfg_modify_file(s, "bios-geometry", (uint8_t *)buf, len);
- g_free(ptr);
- }
+ buf = get_boot_devices_lchs_list(&len);
+ ptr = fw_cfg_modify_file(s, "bios-geometry", (uint8_t *)buf, len);
+ g_free(ptr);
}
static void fw_cfg_machine_ready(struct Notifier *n, void *data)
diff --git a/hw/nvram/meson.build b/hw/nvram/meson.build
index 10f3639..b66f236 100644
--- a/hw/nvram/meson.build
+++ b/hw/nvram/meson.build
@@ -19,3 +19,7 @@ system_ss.add(when: 'CONFIG_XLNX_BBRAM', if_true: files('xlnx-bbram.c'))
specific_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr_nvram.c'))
specific_ss.add(when: 'CONFIG_ACPI', if_true: files('fw_cfg-acpi.c'))
+
+system_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files(
+ 'aspeed_otp.c',
+ )) \ No newline at end of file
diff --git a/hw/nvram/trace-events b/hw/nvram/trace-events
index 5e33b24..7084bf7 100644
--- a/hw/nvram/trace-events
+++ b/hw/nvram/trace-events
@@ -1,5 +1,10 @@
# See docs/devel/tracing.rst for syntax documentation.
+# aspeed_otp.c
+aspeed_otp_prog(uint32_t addr, uint32_t prog_value, uint32_t value) "OTP Memory program: addr 0x%" PRIx32 " prog_value 0x%" PRIx32 " value 0x%" PRIx32
+aspeed_otp_prog_conflict(uint32_t addr, uint32_t bits) "Conflict at addr=0x%x, bits=0x%08x"
+aspeed_otp_prog_bit(int bit) "Programmed bit %d"
+
# ds1225y.c
nvram_read(uint32_t addr, uint32_t ret) "read addr %d: 0x%02x"
nvram_write(uint32_t addr, uint32_t old, uint32_t val) "write addr %d: 0x%02x -> 0x%02x"
diff --git a/hw/openrisc/cputimer.c b/hw/openrisc/cputimer.c
index 6331997..51da226 100644
--- a/hw/openrisc/cputimer.c
+++ b/hw/openrisc/cputimer.c
@@ -105,7 +105,7 @@ static void openrisc_timer_cb(void *opaque)
CPUState *cs = CPU(cpu);
cpu->env.ttmr |= TTMR_IP;
- cs->interrupt_request |= CPU_INTERRUPT_TIMER;
+ cpu_set_interrupt(cs, CPU_INTERRUPT_TIMER);
}
switch (cpu->env.ttmr & TTMR_M) {
diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c
index 3a29dfe..1bccedd 100644
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c
@@ -34,7 +34,6 @@ typedef struct PXBBus PXBBus;
DECLARE_INSTANCE_CHECKER(PXBBus, PXB_BUS,
TYPE_PXB_BUS)
-#define TYPE_PXB_PCIE_BUS "pxb-pcie-bus"
DECLARE_INSTANCE_CHECKER(PXBBus, PXB_PCIE_BUS,
TYPE_PXB_PCIE_BUS)
diff --git a/hw/pci-host/Kconfig b/hw/pci-host/Kconfig
index 35c0415..8cbb830 100644
--- a/hw/pci-host/Kconfig
+++ b/hw/pci-host/Kconfig
@@ -46,6 +46,10 @@ config PCI_I440FX
select PCI
select PAM
+config PCI_EXPRESS_ASPEED
+ bool
+ select PCI_EXPRESS
+
config PCI_EXPRESS_Q35
bool
select PCI_EXPRESS
@@ -54,6 +58,7 @@ config PCI_EXPRESS_Q35
config PCI_EXPRESS_GENERIC_BRIDGE
bool
select PCI_EXPRESS
+ imply ACPI_PCI
config PCI_EXPRESS_XILINX
bool
diff --git a/hw/pci-host/aspeed_pcie.c b/hw/pci-host/aspeed_pcie.c
new file mode 100644
index 0000000..f759344
--- /dev/null
+++ b/hw/pci-host/aspeed_pcie.c
@@ -0,0 +1,1015 @@
+/*
+ * ASPEED PCIe Host Controller
+ *
+ * Copyright (C) 2025 ASPEED Technology Inc.
+ * Copyright (c) 2022 Cédric Le Goater <clg@kaod.org>
+ *
+ * Authors:
+ * Cédric Le Goater <clg@kaod.org>
+ * Jamin Lin <jamin_lin@aspeedtech.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Based on previous work from Cédric Le Goater.
+ * Modifications extend support for the ASPEED AST2600 and AST2700 platforms.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "hw/qdev-properties.h"
+#include "hw/registerfields.h"
+#include "hw/irq.h"
+#include "hw/pci/pci_host.h"
+#include "hw/pci/pcie_port.h"
+#include "hw/pci-host/aspeed_pcie.h"
+#include "hw/pci/msi.h"
+#include "trace.h"
+
+/*
+ * PCIe Root Device
+ * This device exists only on AST2600.
+ */
+
+static void aspeed_pcie_root_device_class_init(ObjectClass *klass,
+ const void *data)
+{
+ PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+ dc->desc = "ASPEED PCIe Root Device";
+ k->vendor_id = PCI_VENDOR_ID_ASPEED;
+ k->device_id = 0x2600;
+ k->class_id = PCI_CLASS_BRIDGE_HOST;
+ k->subsystem_vendor_id = k->vendor_id;
+ k->subsystem_id = k->device_id;
+ k->revision = 0;
+
+ /*
+ * PCI-facing part of the host bridge,
+ * not usable without the host-facing part
+ */
+ dc->user_creatable = false;
+}
+
+static const TypeInfo aspeed_pcie_root_device_info = {
+ .name = TYPE_ASPEED_PCIE_ROOT_DEVICE,
+ .parent = TYPE_PCI_DEVICE,
+ .instance_size = sizeof(AspeedPCIERootDeviceState),
+ .class_init = aspeed_pcie_root_device_class_init,
+ .interfaces = (const InterfaceInfo[]) {
+ { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+ { },
+ },
+};
+
+/*
+ * PCIe Root Port
+ */
+
+static void aspeed_pcie_root_port_class_init(ObjectClass *klass,
+ const void *data)
+{
+ PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass);
+
+ dc->desc = "ASPEED PCIe Root Port";
+ k->vendor_id = PCI_VENDOR_ID_ASPEED;
+ k->device_id = 0x1150;
+ dc->user_creatable = true;
+
+ rpc->aer_offset = 0x100;
+}
+
+static const TypeInfo aspeed_pcie_root_port_info = {
+ .name = TYPE_ASPEED_PCIE_ROOT_PORT,
+ .parent = TYPE_PCIE_ROOT_PORT,
+ .instance_size = sizeof(AspeedPCIERootPortState),
+ .class_init = aspeed_pcie_root_port_class_init,
+};
+
+/*
+ * PCIe Root Complex (RC)
+ */
+
+#define ASPEED_PCIE_CFG_RC_MAX_MSI 64
+
+static void aspeed_pcie_rc_set_irq(void *opaque, int irq, int level)
+{
+ AspeedPCIERcState *rc = (AspeedPCIERcState *) opaque;
+ AspeedPCIECfgState *cfg =
+ container_of(rc, AspeedPCIECfgState, rc);
+ bool intx;
+
+ assert(irq < PCI_NUM_PINS);
+
+ if (level) {
+ cfg->regs[cfg->rc_regs->int_sts_reg] |= BIT(irq);
+ } else {
+ cfg->regs[cfg->rc_regs->int_sts_reg] &= ~BIT(irq);
+ }
+
+ intx = !!(cfg->regs[cfg->rc_regs->int_sts_reg] &
+ cfg->regs[cfg->rc_regs->int_en_reg]);
+ trace_aspeed_pcie_rc_intx_set_irq(cfg->id, irq, intx);
+ qemu_set_irq(rc->irq, intx);
+}
+
+static int aspeed_pcie_rc_map_irq(PCIDevice *pci_dev, int irq_num)
+{
+ return irq_num % PCI_NUM_PINS;
+}
+
+static void aspeed_pcie_rc_msi_notify(AspeedPCIERcState *rc, uint64_t data)
+{
+ AspeedPCIECfgState *cfg =
+ container_of(rc, AspeedPCIECfgState, rc);
+ uint32_t reg;
+
+ /* Written data is the HW IRQ number */
+ assert(data < ASPEED_PCIE_CFG_RC_MAX_MSI);
+
+ reg = (data < 32) ?
+ cfg->rc_regs->msi_sts0_reg : cfg->rc_regs->msi_sts1_reg;
+ cfg->regs[reg] |= BIT(data % 32);
+
+ trace_aspeed_pcie_rc_msi_set_irq(cfg->id, data, 1);
+ qemu_set_irq(rc->irq, 1);
+}
+
+static void aspeed_pcie_rc_msi_write(void *opaque, hwaddr addr, uint64_t data,
+ unsigned int size)
+{
+ AspeedPCIERcState *rc = ASPEED_PCIE_RC(opaque);
+ AspeedPCIECfgState *cfg =
+ container_of(rc, AspeedPCIECfgState, rc);
+
+ trace_aspeed_pcie_rc_msi_notify(cfg->id, addr + rc->msi_addr, data);
+ aspeed_pcie_rc_msi_notify(rc, data);
+}
+
+static const MemoryRegionOps aspeed_pcie_rc_msi_ops = {
+ .write = aspeed_pcie_rc_msi_write,
+ .read = NULL,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+};
+
+static AddressSpace *aspeed_pcie_rc_get_as(PCIBus *bus, void *opaque, int devfn)
+{
+ AspeedPCIERcState *rc = ASPEED_PCIE_RC(opaque);
+ return &rc->iommu_as;
+}
+
+static const PCIIOMMUOps aspeed_pcie_rc_iommu_ops = {
+ .get_address_space = aspeed_pcie_rc_get_as,
+};
+
+static void aspeed_pcie_rc_realize(DeviceState *dev, Error **errp)
+{
+ PCIExpressHost *pex = PCIE_HOST_BRIDGE(dev);
+ AspeedPCIERcState *rc = ASPEED_PCIE_RC(dev);
+ AspeedPCIECfgState *cfg =
+ container_of(rc, AspeedPCIECfgState, rc);
+ PCIHostState *pci = PCI_HOST_BRIDGE(dev);
+ SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+ g_autofree char *ioport_window_name = NULL;
+ g_autofree char *mmio_window_name = NULL;
+ g_autofree char *iommu_root_name = NULL;
+ g_autofree char *dram_alias_name = NULL;
+ g_autofree char *root_bus_name = NULL;
+
+ /* PCI configuration space */
+ pcie_host_mmcfg_init(pex, PCIE_MMCFG_SIZE_MAX);
+ sysbus_init_mmio(sbd, &pex->mmio);
+
+ /* MMIO and IO region */
+ memory_region_init(&rc->mmio, OBJECT(rc), "mmio", UINT64_MAX);
+ memory_region_init(&rc->io, OBJECT(rc), "io", 0x10000);
+
+ mmio_window_name = g_strdup_printf("pcie.%d.mmio_window", cfg->id);
+ memory_region_init_io(&rc->mmio_window, OBJECT(rc), &unassigned_io_ops,
+ OBJECT(rc), mmio_window_name, UINT64_MAX);
+ ioport_window_name = g_strdup_printf("pcie.%d.ioport_window", cfg->id);
+ memory_region_init_io(&rc->io_window, OBJECT(rc), &unassigned_io_ops,
+ OBJECT(rc), ioport_window_name, 0x10000);
+
+ memory_region_add_subregion(&rc->mmio_window, 0, &rc->mmio);
+ memory_region_add_subregion(&rc->io_window, 0, &rc->io);
+ sysbus_init_mmio(sbd, &rc->mmio_window);
+ sysbus_init_mmio(sbd, &rc->io_window);
+
+ sysbus_init_irq(sbd, &rc->irq);
+ root_bus_name = g_strdup_printf("pcie.rc%d", cfg->id);
+ pci->bus = pci_register_root_bus(dev, root_bus_name,
+ aspeed_pcie_rc_set_irq,
+ aspeed_pcie_rc_map_irq, rc, &rc->mmio,
+ &rc->io, 0, 4, TYPE_PCIE_BUS);
+ pci->bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
+
+ /*
+ * PCIe memory view setup
+ *
+ * Background:
+ * - On AST2700, all Root Complexes use the same MSI address. This MSI
+ * address is not normal system RAM - it is a PCI system memory address.
+ * If we map the MSI/MSI-X window into real system memory, a write from
+ * one EP can be seen by all RCs and wrongly trigger interrupts on them.
+ *
+ * Design:
+ * - MSI/MSI-X here is just a placeholder address so RC and EP can talk.
+ * We make a separate MMIO space (iommu_root) for the MSI window so the
+ * writes stay local to each RC.
+ *
+ * DMA:
+ * - EPs still need access to real system memory for DMA. We add a DRAM
+ * alias in the PCI space so DMA works as expected.
+ */
+ iommu_root_name = g_strdup_printf("pcie.%d.iommu_root", cfg->id);
+ memory_region_init(&rc->iommu_root, OBJECT(rc), iommu_root_name,
+ UINT64_MAX);
+ address_space_init(&rc->iommu_as, &rc->iommu_root, iommu_root_name);
+ /* setup MSI */
+ memory_region_init_io(&rc->msi_window, OBJECT(rc),
+ &aspeed_pcie_rc_msi_ops, rc,
+ "msi_window", 4);
+ memory_region_add_subregion(&rc->iommu_root, rc->msi_addr,
+ &rc->msi_window);
+ /* setup DRAM for DMA */
+ assert(rc->dram_mr != NULL);
+ dram_alias_name = g_strdup_printf("pcie.%d.dram_alias", cfg->id);
+ memory_region_init_alias(&rc->dram_alias, OBJECT(rc), dram_alias_name,
+ rc->dram_mr, 0, memory_region_size(rc->dram_mr));
+ memory_region_add_subregion(&rc->iommu_root, rc->dram_base,
+ &rc->dram_alias);
+ pci_setup_iommu(pci->bus, &aspeed_pcie_rc_iommu_ops, rc);
+
+ /* setup root device */
+ if (rc->has_rd) {
+ object_initialize_child(OBJECT(rc), "root_device", &rc->root_device,
+ TYPE_ASPEED_PCIE_ROOT_DEVICE);
+ qdev_prop_set_int32(DEVICE(&rc->root_device), "addr",
+ PCI_DEVFN(0, 0));
+ qdev_prop_set_bit(DEVICE(&rc->root_device), "multifunction", false);
+ if (!qdev_realize(DEVICE(&rc->root_device), BUS(pci->bus), errp)) {
+ return;
+ }
+ }
+
+ /* setup root port */
+ qdev_prop_set_int32(DEVICE(&rc->root_port), "addr", rc->rp_addr);
+ qdev_prop_set_uint16(DEVICE(&rc->root_port), "chassis", cfg->id);
+ if (!qdev_realize(DEVICE(&rc->root_port), BUS(pci->bus), errp)) {
+ return;
+ }
+}
+
+static const char *aspeed_pcie_rc_root_bus_path(PCIHostState *host_bridge,
+ PCIBus *rootbus)
+{
+ AspeedPCIERcState *rc = ASPEED_PCIE_RC(host_bridge);
+ AspeedPCIECfgState *cfg =
+ container_of(rc, AspeedPCIECfgState, rc);
+
+ snprintf(rc->name, sizeof(rc->name), "%04x:%02x", cfg->id, rc->bus_nr);
+
+ return rc->name;
+}
+
+static void aspeed_pcie_rc_instance_init(Object *obj)
+{
+ AspeedPCIERcState *rc = ASPEED_PCIE_RC(obj);
+ AspeedPCIERootPortState *root_port = &rc->root_port;
+
+ object_initialize_child(obj, "root_port", root_port,
+ TYPE_ASPEED_PCIE_ROOT_PORT);
+}
+
+static const Property aspeed_pcie_rc_props[] = {
+ DEFINE_PROP_UINT32("bus-nr", AspeedPCIERcState, bus_nr, 0),
+ DEFINE_PROP_BOOL("has-rd", AspeedPCIERcState, has_rd, 0),
+ DEFINE_PROP_UINT32("rp-addr", AspeedPCIERcState, rp_addr, 0),
+ DEFINE_PROP_UINT32("msi-addr", AspeedPCIERcState, msi_addr, 0),
+ DEFINE_PROP_UINT64("dram-base", AspeedPCIERcState, dram_base, 0),
+ DEFINE_PROP_LINK("dram", AspeedPCIERcState, dram_mr, TYPE_MEMORY_REGION,
+ MemoryRegion *),
+};
+
+static void aspeed_pcie_rc_class_init(ObjectClass *klass, const void *data)
+{
+ PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->desc = "ASPEED PCIe RC";
+ dc->realize = aspeed_pcie_rc_realize;
+ dc->fw_name = "pci";
+ set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+
+ hc->root_bus_path = aspeed_pcie_rc_root_bus_path;
+ device_class_set_props(dc, aspeed_pcie_rc_props);
+
+ msi_nonbroken = true;
+}
+
+static const TypeInfo aspeed_pcie_rc_info = {
+ .name = TYPE_ASPEED_PCIE_RC,
+ .parent = TYPE_PCIE_HOST_BRIDGE,
+ .instance_size = sizeof(AspeedPCIERcState),
+ .instance_init = aspeed_pcie_rc_instance_init,
+ .class_init = aspeed_pcie_rc_class_init,
+};
+
+/*
+ * PCIe Config
+ *
+ * AHB to PCIe Bus Bridge (H2X)
+ *
+ * On the AST2600:
+ * NOTE: rc_l is not supported by this model.
+ * - Registers 0x00 - 0x7F are shared by both PCIe0 (rc_l) and PCIe1 (rc_h).
+ * - Registers 0x80 - 0xBF are specific to PCIe0.
+ * - Registers 0xC0 - 0xFF are specific to PCIe1.
+ *
+ * On the AST2700:
+ * - The register range 0x00 - 0xFF is assigned to a single PCIe configuration.
+ * - There are three PCIe Root Complexes (RCs), each with its own dedicated H2X
+ * register set of size 0x100 (covering offsets 0x00 to 0xFF).
+ */
+
+/* AST2600 */
+REG32(H2X_CTRL, 0x00)
+ FIELD(H2X_CTRL, CLEAR_RX, 4, 1)
+REG32(H2X_TX_CLEAR, 0x08)
+ FIELD(H2X_TX_CLEAR, IDLE, 0, 1)
+REG32(H2X_RDATA, 0x0C)
+REG32(H2X_TX_DESC0, 0x10)
+REG32(H2X_TX_DESC1, 0x14)
+REG32(H2X_TX_DESC2, 0x18)
+REG32(H2X_TX_DESC3, 0x1C)
+REG32(H2X_TX_DATA, 0x20)
+REG32(H2X_TX_STS, 0x24)
+ FIELD(H2X_TX_STS, IDLE, 31, 1)
+ FIELD(H2X_TX_STS, RC_L_TX_COMP, 24, 1)
+ FIELD(H2X_TX_STS, RC_H_TX_COMP, 25, 1)
+ FIELD(H2X_TX_STS, TRIG, 0, 1)
+REG32(H2X_RC_H_CTRL, 0xC0)
+REG32(H2X_RC_H_INT_EN, 0xC4)
+REG32(H2X_RC_H_INT_STS, 0xC8)
+ SHARED_FIELD(H2X_RC_INT_INTDONE, 4, 1)
+ SHARED_FIELD(H2X_RC_INT_INTX, 0, 4)
+REG32(H2X_RC_H_RDATA, 0xCC)
+REG32(H2X_RC_H_MSI_EN0, 0xE0)
+REG32(H2X_RC_H_MSI_EN1, 0xE4)
+REG32(H2X_RC_H_MSI_STS0, 0xE8)
+REG32(H2X_RC_H_MSI_STS1, 0xEC)
+
+/* AST2700 */
+REG32(H2X_CFGE_INT_STS, 0x08)
+ FIELD(H2X_CFGE_INT_STS, TX_IDEL, 0, 1)
+ FIELD(H2X_CFGE_INT_STS, RX_BUSY, 1, 1)
+REG32(H2X_CFGI_TLP, 0x20)
+ FIELD(H2X_CFGI_TLP, ADDR, 0, 16)
+ FIELD(H2X_CFGI_TLP, BEN, 16, 4)
+ FIELD(H2X_CFGI_TLP, WR, 20, 1)
+REG32(H2X_CFGI_WDATA, 0x24)
+REG32(H2X_CFGI_CTRL, 0x28)
+ FIELD(H2X_CFGI_CTRL, FIRE, 0, 1)
+REG32(H2X_CFGI_RDATA, 0x2C)
+REG32(H2X_CFGE_TLP1, 0x30)
+REG32(H2X_CFGE_TLPN, 0x34)
+REG32(H2X_CFGE_CTRL, 0x38)
+ FIELD(H2X_CFGE_CTRL, FIRE, 0, 1)
+REG32(H2X_CFGE_RDATA, 0x3C)
+REG32(H2X_INT_EN, 0x40)
+REG32(H2X_INT_STS, 0x48)
+ FIELD(H2X_INT_STS, INTX, 0, 4)
+REG32(H2X_MSI_EN0, 0x50)
+REG32(H2X_MSI_EN1, 0x54)
+REG32(H2X_MSI_STS0, 0x58)
+REG32(H2X_MSI_STS1, 0x5C)
+
+#define TLP_FMTTYPE_CFGRD0 0x04 /* Configuration Read Type 0 */
+#define TLP_FMTTYPE_CFGWR0 0x44 /* Configuration Write Type 0 */
+#define TLP_FMTTYPE_CFGRD1 0x05 /* Configuration Read Type 1 */
+#define TLP_FMTTYPE_CFGWR1 0x45 /* Configuration Write Type 1 */
+
+#define PCIE_CFG_FMTTYPE_MASK(x) (((x) >> 24) & 0xff)
+#define PCIE_CFG_BYTE_EN(x) ((x) & 0xf)
+
+static const AspeedPCIERegMap aspeed_regmap = {
+ .rc = {
+ .int_en_reg = R_H2X_RC_H_INT_EN,
+ .int_sts_reg = R_H2X_RC_H_INT_STS,
+ .msi_sts0_reg = R_H2X_RC_H_MSI_STS0,
+ .msi_sts1_reg = R_H2X_RC_H_MSI_STS1,
+ },
+};
+
+static const AspeedPCIERegMap aspeed_2700_regmap = {
+ .rc = {
+ .int_en_reg = R_H2X_INT_EN,
+ .int_sts_reg = R_H2X_INT_STS,
+ .msi_sts0_reg = R_H2X_MSI_STS0,
+ .msi_sts1_reg = R_H2X_MSI_STS1,
+ },
+};
+
+static uint64_t aspeed_pcie_cfg_read(void *opaque, hwaddr addr,
+ unsigned int size)
+{
+ AspeedPCIECfgState *s = ASPEED_PCIE_CFG(opaque);
+ uint32_t reg = addr >> 2;
+ uint32_t value = 0;
+
+ value = s->regs[reg];
+
+ trace_aspeed_pcie_cfg_read(s->id, addr, value);
+
+ return value;
+}
+
+static void aspeed_pcie_cfg_translate_write(uint8_t byte_en, uint32_t *addr,
+ uint64_t *val, int *len)
+{
+ uint64_t packed_val = 0;
+ int first_bit = -1;
+ int index = 0;
+ int i;
+
+ *len = ctpop8(byte_en);
+
+ if (*len == 0 || *len > 4) {
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid byte enable: 0x%x\n",
+ __func__, byte_en);
+ return;
+ }
+
+ /* Special case: full 4-byte write must be 4-byte aligned */
+ if (byte_en == 0x0f) {
+ if ((*addr & 0x3) != 0) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: 4-byte write not 4-byte aligned: addr=0x%x\n",
+ __func__, *addr);
+ return;
+ }
+ *val &= 0xffffffffULL;
+ return;
+ }
+
+ for (i = 0; i < 4; i++) {
+ if (byte_en & (1 << i)) {
+ if (first_bit < 0) {
+ first_bit = i;
+ }
+ packed_val |= ((*val >> (i * 8)) & 0xff) << (index * 8);
+ index++;
+ }
+ }
+
+ *addr += first_bit;
+ *val = packed_val;
+}
+
+static void aspeed_pcie_cfg_readwrite(AspeedPCIECfgState *s,
+ const AspeedPCIECfgTxDesc *desc)
+{
+ AspeedPCIERcState *rc = &s->rc;
+ PCIHostState *pci = NULL;
+ PCIDevice *pdev = NULL;
+ uint32_t cfg_addr;
+ uint32_t offset;
+ uint8_t byte_en;
+ bool is_write;
+ uint8_t devfn;
+ uint64_t val;
+ uint8_t bus;
+ int len;
+
+ val = ~0;
+ is_write = !!(desc->desc0 & BIT(30));
+ cfg_addr = desc->desc2;
+
+ bus = (cfg_addr >> 24) & 0xff;
+ devfn = (cfg_addr >> 16) & 0xff;
+ offset = cfg_addr & 0xffc;
+
+ pci = PCI_HOST_BRIDGE(rc);
+
+ /*
+ * On the AST2600, the RC_H bus number range from 0x80 to 0xFF, with the
+ * root device and root port assigned to bus 0x80 instead of the standard
+ * 0x00. To allow the PCI subsystem to correctly discover devices on the
+ * root bus, bus 0x80 is remapped to 0x00.
+ */
+ if (bus == rc->bus_nr) {
+ bus = 0;
+ }
+
+ pdev = pci_find_device(pci->bus, bus, devfn);
+ if (!pdev) {
+ s->regs[desc->rdata_reg] = ~0;
+ goto out;
+ }
+
+ switch (PCIE_CFG_FMTTYPE_MASK(desc->desc0)) {
+ case TLP_FMTTYPE_CFGWR0:
+ case TLP_FMTTYPE_CFGWR1:
+ byte_en = PCIE_CFG_BYTE_EN(desc->desc1);
+ val = desc->wdata;
+ aspeed_pcie_cfg_translate_write(byte_en, &offset, &val, &len);
+ pci_host_config_write_common(pdev, offset, pci_config_size(pdev),
+ val, len);
+ break;
+ case TLP_FMTTYPE_CFGRD0:
+ case TLP_FMTTYPE_CFGRD1:
+ val = pci_host_config_read_common(pdev, offset,
+ pci_config_size(pdev), 4);
+ s->regs[desc->rdata_reg] = val;
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid CFG type. DESC0=0x%x\n",
+ __func__, desc->desc0);
+ }
+
+out:
+ trace_aspeed_pcie_cfg_rw(s->id, is_write ? "write" : "read", bus, devfn,
+ cfg_addr, val);
+}
+
+static void aspeed_pcie_cfg_write(void *opaque, hwaddr addr, uint64_t data,
+ unsigned int size)
+{
+ AspeedPCIECfgState *s = ASPEED_PCIE_CFG(opaque);
+ AspeedPCIECfgTxDesc desc;
+ uint32_t reg = addr >> 2;
+ uint32_t rc_reg;
+
+ trace_aspeed_pcie_cfg_write(s->id, addr, data);
+
+ switch (reg) {
+ case R_H2X_CTRL:
+ if (data & R_H2X_CTRL_CLEAR_RX_MASK) {
+ s->regs[R_H2X_RDATA] = ~0;
+ }
+ break;
+ case R_H2X_TX_CLEAR:
+ if (data & R_H2X_TX_CLEAR_IDLE_MASK) {
+ s->regs[R_H2X_TX_STS] &= ~R_H2X_TX_STS_IDLE_MASK;
+ }
+ break;
+ case R_H2X_TX_STS:
+ if (data & R_H2X_TX_STS_TRIG_MASK) {
+ desc.desc0 = s->regs[R_H2X_TX_DESC0];
+ desc.desc1 = s->regs[R_H2X_TX_DESC1];
+ desc.desc2 = s->regs[R_H2X_TX_DESC2];
+ desc.desc3 = s->regs[R_H2X_TX_DESC3];
+ desc.wdata = s->regs[R_H2X_TX_DATA];
+ desc.rdata_reg = R_H2X_RC_H_RDATA;
+ aspeed_pcie_cfg_readwrite(s, &desc);
+ rc_reg = s->rc_regs->int_sts_reg;
+ s->regs[rc_reg] |= H2X_RC_INT_INTDONE_MASK;
+ s->regs[R_H2X_TX_STS] |=
+ BIT(R_H2X_TX_STS_RC_H_TX_COMP_SHIFT);
+ s->regs[R_H2X_TX_STS] |= R_H2X_TX_STS_IDLE_MASK;
+ }
+ break;
+ /* preserve INTx status */
+ case R_H2X_RC_H_INT_STS:
+ if (data & H2X_RC_INT_INTDONE_MASK) {
+ s->regs[R_H2X_TX_STS] &= ~R_H2X_TX_STS_RC_H_TX_COMP_MASK;
+ }
+ s->regs[reg] &= ~data | H2X_RC_INT_INTX_MASK;
+ break;
+ /*
+ * These status registers are used for notify sources ISR are executed.
+ * If one source ISR is executed, it will clear one bit.
+ * If it clear all bits, it means to initialize this register status
+ * rather than sources ISR are executed.
+ */
+ case R_H2X_RC_H_MSI_STS0:
+ case R_H2X_RC_H_MSI_STS1:
+ if (data == 0) {
+ return ;
+ }
+
+ s->regs[reg] &= ~data;
+ if (data == 0xffffffff) {
+ return;
+ }
+
+ if (!s->regs[R_H2X_RC_H_MSI_STS0] &&
+ !s->regs[R_H2X_RC_H_MSI_STS1]) {
+ trace_aspeed_pcie_rc_msi_clear_irq(s->id, 0);
+ qemu_set_irq(s->rc.irq, 0);
+ }
+ break;
+ default:
+ s->regs[reg] = data;
+ break;
+ }
+}
+
+static const MemoryRegionOps aspeed_pcie_cfg_ops = {
+ .read = aspeed_pcie_cfg_read,
+ .write = aspeed_pcie_cfg_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 4,
+ },
+};
+
+static void aspeed_pcie_cfg_instance_init(Object *obj)
+{
+ AspeedPCIECfgState *s = ASPEED_PCIE_CFG(obj);
+
+ object_initialize_child(obj, "rc", &s->rc, TYPE_ASPEED_PCIE_RC);
+ object_property_add_alias(obj, "dram", OBJECT(&s->rc), "dram");
+ object_property_add_alias(obj, "dram-base", OBJECT(&s->rc), "dram-base");
+
+ return;
+}
+
+static void aspeed_pcie_cfg_reset(DeviceState *dev)
+{
+ AspeedPCIECfgState *s = ASPEED_PCIE_CFG(dev);
+ AspeedPCIECfgClass *apc = ASPEED_PCIE_CFG_GET_CLASS(s);
+
+ memset(s->regs, 0, apc->nr_regs << 2);
+ memset(s->tlpn_fifo, 0, sizeof(s->tlpn_fifo));
+ s->tlpn_idx = 0;
+}
+
+static void aspeed_pcie_cfg_realize(DeviceState *dev, Error **errp)
+{
+ SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+ AspeedPCIECfgState *s = ASPEED_PCIE_CFG(dev);
+ AspeedPCIECfgClass *apc = ASPEED_PCIE_CFG_GET_CLASS(s);
+ g_autofree char *name = NULL;
+
+ s->rc_regs = &apc->reg_map->rc;
+ s->regs = g_new(uint32_t, apc->nr_regs);
+ name = g_strdup_printf(TYPE_ASPEED_PCIE_CFG ".regs.%d", s->id);
+ memory_region_init_io(&s->mmio, OBJECT(s), apc->reg_ops, s, name,
+ apc->nr_regs << 2);
+ sysbus_init_mmio(sbd, &s->mmio);
+
+ object_property_set_int(OBJECT(&s->rc), "bus-nr",
+ apc->rc_bus_nr,
+ &error_abort);
+ object_property_set_bool(OBJECT(&s->rc), "has-rd",
+ apc->rc_has_rd,
+ &error_abort);
+ object_property_set_int(OBJECT(&s->rc), "rp-addr",
+ apc->rc_rp_addr,
+ &error_abort);
+ object_property_set_int(OBJECT(&s->rc), "msi-addr",
+ apc->rc_msi_addr,
+ &error_abort);
+ if (!sysbus_realize(SYS_BUS_DEVICE(&s->rc), errp)) {
+ return;
+ }
+}
+
+static void aspeed_pcie_cfg_unrealize(DeviceState *dev)
+{
+ AspeedPCIECfgState *s = ASPEED_PCIE_CFG(dev);
+
+ g_free(s->regs);
+ s->regs = NULL;
+}
+
+static const Property aspeed_pcie_cfg_props[] = {
+ DEFINE_PROP_UINT32("id", AspeedPCIECfgState, id, 0),
+};
+
+static void aspeed_pcie_cfg_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ AspeedPCIECfgClass *apc = ASPEED_PCIE_CFG_CLASS(klass);
+
+ dc->desc = "ASPEED PCIe Config";
+ dc->realize = aspeed_pcie_cfg_realize;
+ dc->unrealize = aspeed_pcie_cfg_unrealize;
+ device_class_set_legacy_reset(dc, aspeed_pcie_cfg_reset);
+ device_class_set_props(dc, aspeed_pcie_cfg_props);
+
+ apc->reg_ops = &aspeed_pcie_cfg_ops;
+ apc->reg_map = &aspeed_regmap;
+ apc->nr_regs = 0x100 >> 2;
+ apc->rc_msi_addr = 0x1e77005C;
+ apc->rc_bus_nr = 0x80;
+ apc->rc_has_rd = true;
+ apc->rc_rp_addr = PCI_DEVFN(8, 0);
+}
+
+static const TypeInfo aspeed_pcie_cfg_info = {
+ .name = TYPE_ASPEED_PCIE_CFG,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_init = aspeed_pcie_cfg_instance_init,
+ .instance_size = sizeof(AspeedPCIECfgState),
+ .class_init = aspeed_pcie_cfg_class_init,
+ .class_size = sizeof(AspeedPCIECfgClass),
+};
+
+static void aspeed_2700_pcie_cfg_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned int size)
+{
+ AspeedPCIECfgState *s = ASPEED_PCIE_CFG(opaque);
+ AspeedPCIECfgTxDesc desc;
+ uint32_t reg = addr >> 2;
+
+ trace_aspeed_pcie_cfg_write(s->id, addr, data);
+
+ switch (reg) {
+ case R_H2X_CFGE_INT_STS:
+ if (data & R_H2X_CFGE_INT_STS_TX_IDEL_MASK) {
+ s->regs[R_H2X_CFGE_INT_STS] &= ~R_H2X_CFGE_INT_STS_TX_IDEL_MASK;
+ }
+
+ if (data & R_H2X_CFGE_INT_STS_RX_BUSY_MASK) {
+ s->regs[R_H2X_CFGE_INT_STS] &= ~R_H2X_CFGE_INT_STS_RX_BUSY_MASK;
+ }
+ break;
+ case R_H2X_CFGI_CTRL:
+ if (data & R_H2X_CFGI_CTRL_FIRE_MASK) {
+ /*
+ * Internal access to bridge
+ * Type and BDF are 0
+ */
+ desc.desc0 = 0x04000001 |
+ (ARRAY_FIELD_EX32(s->regs, H2X_CFGI_TLP, WR) << 30);
+ desc.desc1 = 0x00401000 |
+ ARRAY_FIELD_EX32(s->regs, H2X_CFGI_TLP, BEN);
+ desc.desc2 = 0x00000000 |
+ ARRAY_FIELD_EX32(s->regs, H2X_CFGI_TLP, ADDR);
+ desc.wdata = s->regs[R_H2X_CFGI_WDATA];
+ desc.rdata_reg = R_H2X_CFGI_RDATA;
+ aspeed_pcie_cfg_readwrite(s, &desc);
+ }
+ break;
+ case R_H2X_CFGE_TLPN:
+ s->tlpn_fifo[s->tlpn_idx] = data;
+ s->tlpn_idx = (s->tlpn_idx + 1) % ARRAY_SIZE(s->tlpn_fifo);
+ break;
+ case R_H2X_CFGE_CTRL:
+ if (data & R_H2X_CFGE_CTRL_FIRE_MASK) {
+ desc.desc0 = s->regs[R_H2X_CFGE_TLP1];
+ desc.desc1 = s->tlpn_fifo[0];
+ desc.desc2 = s->tlpn_fifo[1];
+ desc.wdata = s->tlpn_fifo[2];
+ desc.rdata_reg = R_H2X_CFGE_RDATA;
+ aspeed_pcie_cfg_readwrite(s, &desc);
+ s->regs[R_H2X_CFGE_INT_STS] |= R_H2X_CFGE_INT_STS_TX_IDEL_MASK;
+ s->regs[R_H2X_CFGE_INT_STS] |= R_H2X_CFGE_INT_STS_RX_BUSY_MASK;
+ s->tlpn_idx = 0;
+ }
+ break;
+
+ case R_H2X_INT_STS:
+ s->regs[reg] &= ~data | R_H2X_INT_STS_INTX_MASK;
+ break;
+ /*
+ * These status registers are used for notify sources ISR are executed.
+ * If one source ISR is executed, it will clear one bit.
+ * If it clear all bits, it means to initialize this register status
+ * rather than sources ISR are executed.
+ */
+ case R_H2X_MSI_STS0:
+ case R_H2X_MSI_STS1:
+ if (data == 0) {
+ return ;
+ }
+
+ s->regs[reg] &= ~data;
+ if (data == 0xffffffff) {
+ return;
+ }
+
+ if (!s->regs[R_H2X_MSI_STS0] &&
+ !s->regs[R_H2X_MSI_STS1]) {
+ trace_aspeed_pcie_rc_msi_clear_irq(s->id, 0);
+ qemu_set_irq(s->rc.irq, 0);
+ }
+ break;
+ default:
+ s->regs[reg] = data;
+ break;
+ }
+}
+
+static const MemoryRegionOps aspeed_2700_pcie_cfg_ops = {
+ .read = aspeed_pcie_cfg_read,
+ .write = aspeed_2700_pcie_cfg_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 4,
+ },
+};
+
+static void aspeed_2700_pcie_cfg_class_init(ObjectClass *klass,
+ const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ AspeedPCIECfgClass *apc = ASPEED_PCIE_CFG_CLASS(klass);
+
+ dc->desc = "ASPEED 2700 PCIe Config";
+ apc->reg_ops = &aspeed_2700_pcie_cfg_ops;
+ apc->reg_map = &aspeed_2700_regmap;
+ apc->nr_regs = 0x100 >> 2;
+ apc->rc_msi_addr = 0x000000F0;
+ apc->rc_bus_nr = 0;
+ apc->rc_has_rd = false;
+ apc->rc_rp_addr = PCI_DEVFN(0, 0);
+}
+
+static const TypeInfo aspeed_2700_pcie_cfg_info = {
+ .name = TYPE_ASPEED_2700_PCIE_CFG,
+ .parent = TYPE_ASPEED_PCIE_CFG,
+ .class_init = aspeed_2700_pcie_cfg_class_init,
+};
+
+/*
+ * PCIe PHY
+ *
+ * PCIe Host Controller (PCIEH)
+ */
+
+/* AST2600 */
+REG32(PEHR_ID, 0x00)
+ FIELD(PEHR_ID, DEV, 16, 16)
+REG32(PEHR_CLASS_CODE, 0x04)
+REG32(PEHR_DATALINK, 0x10)
+REG32(PEHR_PROTECT, 0x7C)
+ FIELD(PEHR_PROTECT, LOCK, 0, 8)
+REG32(PEHR_LINK, 0xC0)
+ FIELD(PEHR_LINK, STS, 5, 1)
+
+/* AST2700 */
+REG32(PEHR_2700_LINK_GEN2, 0x344)
+ FIELD(PEHR_2700_LINK_GEN2, STS, 18, 1)
+REG32(PEHR_2700_LINK_GEN4, 0x358)
+ FIELD(PEHR_2700_LINK_GEN4, STS, 8, 1)
+
+#define ASPEED_PCIE_PHY_UNLOCK 0xA8
+
+static uint64_t aspeed_pcie_phy_read(void *opaque, hwaddr addr,
+ unsigned int size)
+{
+ AspeedPCIEPhyState *s = ASPEED_PCIE_PHY(opaque);
+ uint32_t reg = addr >> 2;
+ uint32_t value = 0;
+
+ value = s->regs[reg];
+
+ trace_aspeed_pcie_phy_read(s->id, addr, value);
+
+ return value;
+}
+
+static void aspeed_pcie_phy_write(void *opaque, hwaddr addr, uint64_t data,
+ unsigned int size)
+{
+ AspeedPCIEPhyState *s = ASPEED_PCIE_PHY(opaque);
+ uint32_t reg = addr >> 2;
+
+ trace_aspeed_pcie_phy_write(s->id, addr, data);
+
+ switch (reg) {
+ case R_PEHR_PROTECT:
+ data &= R_PEHR_PROTECT_LOCK_MASK;
+ s->regs[reg] = !!(data == ASPEED_PCIE_PHY_UNLOCK);
+ break;
+ default:
+ s->regs[reg] = data;
+ break;
+ }
+}
+
+static const MemoryRegionOps aspeed_pcie_phy_ops = {
+ .read = aspeed_pcie_phy_read,
+ .write = aspeed_pcie_phy_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 4,
+ },
+};
+
+static void aspeed_pcie_phy_reset(DeviceState *dev)
+{
+ AspeedPCIEPhyState *s = ASPEED_PCIE_PHY(dev);
+ AspeedPCIEPhyClass *apc = ASPEED_PCIE_PHY_GET_CLASS(s);
+
+ memset(s->regs, 0, apc->nr_regs << 2);
+
+ s->regs[R_PEHR_ID] =
+ (0x1150 << R_PEHR_ID_DEV_SHIFT) | PCI_VENDOR_ID_ASPEED;
+ s->regs[R_PEHR_CLASS_CODE] = 0x06040006;
+ s->regs[R_PEHR_DATALINK] = 0xD7040022;
+ s->regs[R_PEHR_LINK] = R_PEHR_LINK_STS_MASK;
+}
+
+static void aspeed_pcie_phy_realize(DeviceState *dev, Error **errp)
+{
+ AspeedPCIEPhyState *s = ASPEED_PCIE_PHY(dev);
+ AspeedPCIEPhyClass *apc = ASPEED_PCIE_PHY_GET_CLASS(s);
+ SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+ g_autofree char *name = NULL;
+
+ s->regs = g_new(uint32_t, apc->nr_regs);
+ name = g_strdup_printf(TYPE_ASPEED_PCIE_PHY ".regs.%d", s->id);
+ memory_region_init_io(&s->mmio, OBJECT(s), &aspeed_pcie_phy_ops, s, name,
+ apc->nr_regs << 2);
+ sysbus_init_mmio(sbd, &s->mmio);
+}
+
+static void aspeed_pcie_phy_unrealize(DeviceState *dev)
+{
+ AspeedPCIEPhyState *s = ASPEED_PCIE_PHY(dev);
+
+ g_free(s->regs);
+ s->regs = NULL;
+}
+
+static const Property aspeed_pcie_phy_props[] = {
+ DEFINE_PROP_UINT32("id", AspeedPCIEPhyState, id, 0),
+};
+
+static void aspeed_pcie_phy_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ AspeedPCIEPhyClass *apc = ASPEED_PCIE_PHY_CLASS(klass);
+
+ dc->desc = "ASPEED PCIe Phy";
+ dc->realize = aspeed_pcie_phy_realize;
+ dc->unrealize = aspeed_pcie_phy_unrealize;
+ device_class_set_legacy_reset(dc, aspeed_pcie_phy_reset);
+ device_class_set_props(dc, aspeed_pcie_phy_props);
+
+ apc->nr_regs = 0x100 >> 2;
+}
+
+static const TypeInfo aspeed_pcie_phy_info = {
+ .name = TYPE_ASPEED_PCIE_PHY,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(AspeedPCIEPhyState),
+ .class_init = aspeed_pcie_phy_class_init,
+ .class_size = sizeof(AspeedPCIEPhyClass),
+};
+
+static void aspeed_2700_pcie_phy_reset(DeviceState *dev)
+{
+ AspeedPCIEPhyState *s = ASPEED_PCIE_PHY(dev);
+ AspeedPCIEPhyClass *apc = ASPEED_PCIE_PHY_GET_CLASS(s);
+
+ memset(s->regs, 0, apc->nr_regs << 2);
+
+ s->regs[R_PEHR_ID] =
+ (0x1150 << R_PEHR_ID_DEV_SHIFT) | PCI_VENDOR_ID_ASPEED;
+ s->regs[R_PEHR_CLASS_CODE] = 0x06040011;
+ s->regs[R_PEHR_2700_LINK_GEN2] = R_PEHR_2700_LINK_GEN2_STS_MASK;
+ s->regs[R_PEHR_2700_LINK_GEN4] = R_PEHR_2700_LINK_GEN4_STS_MASK;
+}
+
+static void aspeed_2700_pcie_phy_class_init(ObjectClass *klass,
+ const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ AspeedPCIEPhyClass *apc = ASPEED_PCIE_PHY_CLASS(klass);
+
+ dc->desc = "ASPEED AST2700 PCIe Phy";
+ device_class_set_legacy_reset(dc, aspeed_2700_pcie_phy_reset);
+
+ apc->nr_regs = 0x800 >> 2;
+}
+
+static const TypeInfo aspeed_2700_pcie_phy_info = {
+ .name = TYPE_ASPEED_2700_PCIE_PHY,
+ .parent = TYPE_ASPEED_PCIE_PHY,
+ .class_init = aspeed_2700_pcie_phy_class_init,
+};
+
+static void aspeed_pcie_register_types(void)
+{
+ type_register_static(&aspeed_pcie_rc_info);
+ type_register_static(&aspeed_pcie_root_device_info);
+ type_register_static(&aspeed_pcie_root_port_info);
+ type_register_static(&aspeed_pcie_cfg_info);
+ type_register_static(&aspeed_2700_pcie_cfg_info);
+ type_register_static(&aspeed_pcie_phy_info);
+ type_register_static(&aspeed_2700_pcie_phy_info);
+}
+
+type_init(aspeed_pcie_register_types);
+
diff --git a/hw/pci-host/astro.c b/hw/pci-host/astro.c
index 859e308..1024ede 100644
--- a/hw/pci-host/astro.c
+++ b/hw/pci-host/astro.c
@@ -424,22 +424,23 @@ static void elroy_reset(DeviceState *dev)
}
}
-static void elroy_pcihost_init(Object *obj)
+static void elroy_pcihost_realize(DeviceState *dev, Error **errp)
{
- ElroyState *s = ELROY_PCI_HOST_BRIDGE(obj);
- PCIHostState *phb = PCI_HOST_BRIDGE(obj);
- SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+ ElroyState *s = ELROY_PCI_HOST_BRIDGE(dev);
+ PCIHostState *phb = PCI_HOST_BRIDGE(dev);
+ SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+ Object *obj = OBJECT(s);
/* Elroy config access from CPU. */
- memory_region_init_io(&s->this_mem, OBJECT(s), &elroy_chip_ops,
+ memory_region_init_io(&s->this_mem, obj, &elroy_chip_ops,
s, "elroy", 0x2000);
/* Elroy PCI config. */
- memory_region_init_io(&phb->conf_mem, OBJECT(phb),
- &elroy_config_addr_ops, DEVICE(s),
+ memory_region_init_io(&phb->conf_mem, obj,
+ &elroy_config_addr_ops, dev,
"pci-conf-idx", 8);
- memory_region_init_io(&phb->data_mem, OBJECT(phb),
- &elroy_config_data_ops, DEVICE(s),
+ memory_region_init_io(&phb->data_mem, obj,
+ &elroy_config_data_ops, dev,
"pci-conf-data", 8);
memory_region_add_subregion(&s->this_mem, 0x40,
&phb->conf_mem);
@@ -447,8 +448,8 @@ static void elroy_pcihost_init(Object *obj)
&phb->data_mem);
/* Elroy PCI bus memory. */
- memory_region_init(&s->pci_mmio, OBJECT(s), "pci-mmio", UINT64_MAX);
- memory_region_init_io(&s->pci_io, OBJECT(s), &unassigned_io_ops, obj,
+ memory_region_init(&s->pci_mmio, obj, "pci-mmio", UINT64_MAX);
+ memory_region_init_io(&s->pci_io, obj, &unassigned_io_ops, obj,
"pci-isa-mmio",
((uint32_t) IOS_DIST_BASE_SIZE) / ROPES_PER_IOC);
@@ -459,7 +460,7 @@ static void elroy_pcihost_init(Object *obj)
sysbus_init_mmio(sbd, &s->this_mem);
- qdev_init_gpio_in(DEVICE(obj), elroy_set_irq, ELROY_IRQS);
+ qdev_init_gpio_in(dev, elroy_set_irq, ELROY_IRQS);
}
static const VMStateDescription vmstate_elroy = {
@@ -487,6 +488,7 @@ static void elroy_pcihost_class_init(ObjectClass *klass, const void *data)
DeviceClass *dc = DEVICE_CLASS(klass);
device_class_set_legacy_reset(dc, elroy_reset);
+ dc->realize = elroy_pcihost_realize;
dc->vmsd = &vmstate_elroy;
dc->user_creatable = false;
}
@@ -494,7 +496,6 @@ static void elroy_pcihost_class_init(ObjectClass *klass, const void *data)
static const TypeInfo elroy_pcihost_info = {
.name = TYPE_ELROY_PCI_HOST_BRIDGE,
.parent = TYPE_PCI_HOST_BRIDGE,
- .instance_init = elroy_pcihost_init,
.instance_size = sizeof(ElroyState),
.class_init = elroy_pcihost_class_init,
};
diff --git a/hw/pci-host/dino.c b/hw/pci-host/dino.c
index 11b353b..9240534 100644
--- a/hw/pci-host/dino.c
+++ b/hw/pci-host/dino.c
@@ -413,43 +413,7 @@ static void dino_pcihost_reset(DeviceState *dev)
static void dino_pcihost_realize(DeviceState *dev, Error **errp)
{
DinoState *s = DINO_PCI_HOST_BRIDGE(dev);
-
- /* Set up PCI view of memory: Bus master address space. */
- memory_region_init(&s->bm, OBJECT(s), "bm-dino", 4 * GiB);
- memory_region_init_alias(&s->bm_ram_alias, OBJECT(s),
- "bm-system", s->memory_as, 0,
- 0xf0000000 + DINO_MEM_CHUNK_SIZE);
- memory_region_init_alias(&s->bm_pci_alias, OBJECT(s),
- "bm-pci", &s->pci_mem,
- 0xf0000000 + DINO_MEM_CHUNK_SIZE,
- 30 * DINO_MEM_CHUNK_SIZE);
- memory_region_init_alias(&s->bm_cpu_alias, OBJECT(s),
- "bm-cpu", s->memory_as, 0xfff00000,
- 0xfffff);
- memory_region_add_subregion(&s->bm, 0,
- &s->bm_ram_alias);
- memory_region_add_subregion(&s->bm,
- 0xf0000000 + DINO_MEM_CHUNK_SIZE,
- &s->bm_pci_alias);
- memory_region_add_subregion(&s->bm, 0xfff00000,
- &s->bm_cpu_alias);
-
- address_space_init(&s->bm_as, &s->bm, "pci-bm");
-}
-
-static void dino_pcihost_unrealize(DeviceState *dev)
-{
- DinoState *s = DINO_PCI_HOST_BRIDGE(dev);
-
- address_space_destroy(&s->bm_as);
-}
-
-static void dino_pcihost_init(Object *obj)
-{
- DinoState *s = DINO_PCI_HOST_BRIDGE(obj);
- PCIHostState *phb = PCI_HOST_BRIDGE(obj);
- SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
- int i;
+ PCIHostState *phb = PCI_HOST_BRIDGE(dev);
/* Dino PCI access from main memory. */
memory_region_init_io(&s->this_mem, OBJECT(s), &dino_chip_ops,
@@ -476,7 +440,7 @@ static void dino_pcihost_init(Object *obj)
PCI_DEVFN(0, 0), 32, TYPE_PCI_BUS);
/* Set up windows into PCI bus memory. */
- for (i = 1; i < 31; i++) {
+ for (int i = 1; i < 31; i++) {
uint32_t addr = 0xf0000000 + i * DINO_MEM_CHUNK_SIZE;
char *name = g_strdup_printf("PCI Outbound Window %d", i);
memory_region_init_alias(&s->pci_mem_alias[i], OBJECT(s),
@@ -487,9 +451,38 @@ static void dino_pcihost_init(Object *obj)
pci_setup_iommu(phb->bus, &dino_iommu_ops, s);
- sysbus_init_mmio(sbd, &s->this_mem);
+ sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->this_mem);
- qdev_init_gpio_in(DEVICE(obj), dino_set_irq, DINO_IRQS);
+ qdev_init_gpio_in(dev, dino_set_irq, DINO_IRQS);
+
+ /* Set up PCI view of memory: Bus master address space. */
+ memory_region_init(&s->bm, OBJECT(s), "bm-dino", 4 * GiB);
+ memory_region_init_alias(&s->bm_ram_alias, OBJECT(s),
+ "bm-system", s->memory_as, 0,
+ 0xf0000000 + DINO_MEM_CHUNK_SIZE);
+ memory_region_init_alias(&s->bm_pci_alias, OBJECT(s),
+ "bm-pci", &s->pci_mem,
+ 0xf0000000 + DINO_MEM_CHUNK_SIZE,
+ 30 * DINO_MEM_CHUNK_SIZE);
+ memory_region_init_alias(&s->bm_cpu_alias, OBJECT(s),
+ "bm-cpu", s->memory_as, 0xfff00000,
+ 0xfffff);
+ memory_region_add_subregion(&s->bm, 0,
+ &s->bm_ram_alias);
+ memory_region_add_subregion(&s->bm,
+ 0xf0000000 + DINO_MEM_CHUNK_SIZE,
+ &s->bm_pci_alias);
+ memory_region_add_subregion(&s->bm, 0xfff00000,
+ &s->bm_cpu_alias);
+
+ address_space_init(&s->bm_as, &s->bm, "pci-bm");
+}
+
+static void dino_pcihost_unrealize(DeviceState *dev)
+{
+ DinoState *s = DINO_PCI_HOST_BRIDGE(dev);
+
+ address_space_destroy(&s->bm_as);
}
static const Property dino_pcihost_properties[] = {
@@ -511,7 +504,6 @@ static void dino_pcihost_class_init(ObjectClass *klass, const void *data)
static const TypeInfo dino_pcihost_info = {
.name = TYPE_DINO_PCI_HOST_BRIDGE,
.parent = TYPE_PCI_HOST_BRIDGE,
- .instance_init = dino_pcihost_init,
.instance_size = sizeof(DinoState),
.class_init = dino_pcihost_class_init,
};
diff --git a/hw/pci-host/gpex-acpi.c b/hw/pci-host/gpex-acpi.c
index 0aba47c..952a0ac 100644
--- a/hw/pci-host/gpex-acpi.c
+++ b/hw/pci-host/gpex-acpi.c
@@ -1,5 +1,6 @@
#include "qemu/osdep.h"
#include "hw/acpi/aml-build.h"
+#include "hw/acpi/pci.h"
#include "hw/pci-host/gpex.h"
#include "hw/arm/virt.h"
#include "hw/pci/pci_bus.h"
@@ -50,61 +51,10 @@ static void acpi_dsdt_add_pci_route_table(Aml *dev, uint32_t irq,
}
}
-static void acpi_dsdt_add_pci_osc(Aml *dev)
+static Aml *build_pci_host_bridge_dsm_method(void)
{
- Aml *method, *UUID, *ifctx, *ifctx1, *elsectx, *buf;
-
- /* Declare an _OSC (OS Control Handoff) method */
- aml_append(dev, aml_name_decl("SUPP", aml_int(0)));
- aml_append(dev, aml_name_decl("CTRL", aml_int(0)));
- method = aml_method("_OSC", 4, AML_NOTSERIALIZED);
- aml_append(method,
- aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1"));
-
- /* PCI Firmware Specification 3.0
- * 4.5.1. _OSC Interface for PCI Host Bridge Devices
- * The _OSC interface for a PCI/PCI-X/PCI Express hierarchy is
- * identified by the Universal Unique IDentifier (UUID)
- * 33DB4D5B-1FF7-401C-9657-7441C03DD766
- */
- UUID = aml_touuid("33DB4D5B-1FF7-401C-9657-7441C03DD766");
- ifctx = aml_if(aml_equal(aml_arg(0), UUID));
- aml_append(ifctx,
- aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2"));
- aml_append(ifctx,
- aml_create_dword_field(aml_arg(3), aml_int(8), "CDW3"));
- aml_append(ifctx, aml_store(aml_name("CDW2"), aml_name("SUPP")));
- aml_append(ifctx, aml_store(aml_name("CDW3"), aml_name("CTRL")));
-
- /*
- * Allow OS control for all 5 features:
- * PCIeHotplug SHPCHotplug PME AER PCIeCapability.
- */
- aml_append(ifctx, aml_and(aml_name("CTRL"), aml_int(0x1F),
- aml_name("CTRL")));
-
- ifctx1 = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(0x1))));
- aml_append(ifctx1, aml_or(aml_name("CDW1"), aml_int(0x08),
- aml_name("CDW1")));
- aml_append(ifctx, ifctx1);
-
- ifctx1 = aml_if(aml_lnot(aml_equal(aml_name("CDW3"), aml_name("CTRL"))));
- aml_append(ifctx1, aml_or(aml_name("CDW1"), aml_int(0x10),
- aml_name("CDW1")));
- aml_append(ifctx, ifctx1);
-
- aml_append(ifctx, aml_store(aml_name("CTRL"), aml_name("CDW3")));
- aml_append(ifctx, aml_return(aml_arg(3)));
- aml_append(method, ifctx);
-
- elsectx = aml_else();
- aml_append(elsectx, aml_or(aml_name("CDW1"), aml_int(4),
- aml_name("CDW1")));
- aml_append(elsectx, aml_return(aml_arg(3)));
- aml_append(method, elsectx);
- aml_append(dev, method);
-
- method = aml_method("_DSM", 4, AML_NOTSERIALIZED);
+ Aml *method = aml_method("_DSM", 4, AML_NOTSERIALIZED);
+ Aml *UUID, *ifctx, *ifctx1, *buf;
/* PCI Firmware Specification 3.0
* 4.6.1. _DSM for PCI Express Slot Information
@@ -123,7 +73,16 @@ static void acpi_dsdt_add_pci_osc(Aml *dev)
byte_list[0] = 0;
buf = aml_buffer(1, byte_list);
aml_append(method, aml_return(buf));
- aml_append(dev, method);
+ return method;
+}
+
+static void acpi_dsdt_add_host_bridge_methods(Aml *dev,
+ bool enable_native_pcie_hotplug)
+{
+ /* Declare an _OSC (OS Control Handoff) method */
+ aml_append(dev,
+ build_pci_host_bridge_osc_method(enable_native_pcie_hotplug));
+ aml_append(dev, build_pci_host_bridge_dsm_method());
}
void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
@@ -192,7 +151,8 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
if (is_cxl) {
build_cxl_osc_method(dev);
} else {
- acpi_dsdt_add_pci_osc(dev);
+ /* pxb bridges do not have ACPI PCI Hot-plug enabled */
+ acpi_dsdt_add_host_bridge_methods(dev, true);
}
aml_append(scope, dev);
@@ -267,7 +227,7 @@ void acpi_dsdt_add_gpex(Aml *scope, struct GPEXConfig *cfg)
}
aml_append(dev, aml_name_decl("_CRS", rbuf));
- acpi_dsdt_add_pci_osc(dev);
+ acpi_dsdt_add_host_bridge_methods(dev, cfg->pci_native_hotplug);
Aml *dev_res0 = aml_device("%s", "RES0");
aml_append(dev_res0, aml_name_decl("_HID", aml_string("PNP0C02")));
diff --git a/hw/pci-host/gt64120.c b/hw/pci-host/gt64120.c
index 56a6ef9..b1d96f6 100644
--- a/hw/pci-host/gt64120.c
+++ b/hw/pci-host/gt64120.c
@@ -28,6 +28,7 @@
#include "qapi/error.h"
#include "qemu/units.h"
#include "qemu/log.h"
+#include "qemu/bswap.h"
#include "hw/qdev-properties.h"
#include "hw/registerfields.h"
#include "hw/pci/pci_device.h"
@@ -320,38 +321,6 @@ static void gt64120_isd_mapping(GT64120State *s)
memory_region_transaction_commit();
}
-static void gt64120_update_pci_cfgdata_mapping(GT64120State *s)
-{
- /* Indexed on MByteSwap bit, see Table 158: PCI_0 Command, Offset: 0xc00 */
- static const MemoryRegionOps *pci_host_data_ops[] = {
- &pci_host_data_be_ops, &pci_host_data_le_ops
- };
- PCIHostState *phb = PCI_HOST_BRIDGE(s);
-
- memory_region_transaction_begin();
-
- /*
- * The setting of the MByteSwap bit and MWordSwap bit in the PCI Internal
- * Command Register determines how data transactions from the CPU to/from
- * PCI are handled along with the setting of the Endianness bit in the CPU
- * Configuration Register. See:
- * - Table 16: 32-bit PCI Transaction Endianness
- * - Table 158: PCI_0 Command, Offset: 0xc00
- */
-
- if (memory_region_is_mapped(&phb->data_mem)) {
- memory_region_del_subregion(&s->ISD_mem, &phb->data_mem);
- object_unparent(OBJECT(&phb->data_mem));
- }
- memory_region_init_io(&phb->data_mem, OBJECT(phb),
- pci_host_data_ops[s->regs[GT_PCI0_CMD] & 1],
- s, "pci-conf-data", 4);
- memory_region_add_subregion_overlap(&s->ISD_mem, GT_PCI0_CFGDATA << 2,
- &phb->data_mem, 1);
-
- memory_region_transaction_commit();
-}
-
static void gt64120_pci_mapping(GT64120State *s)
{
memory_region_transaction_begin();
@@ -645,7 +614,6 @@ static void gt64120_writel(void *opaque, hwaddr addr,
case GT_PCI0_CMD:
case GT_PCI1_CMD:
s->regs[saddr] = val & 0x0401fc0f;
- gt64120_update_pci_cfgdata_mapping(s);
break;
case GT_PCI0_TOR:
case GT_PCI0_BS_SCS10:
@@ -1024,6 +992,48 @@ static const MemoryRegionOps isd_mem_ops = {
},
};
+static bool bswap(const GT64120State *s)
+{
+ PCIHostState *phb = PCI_HOST_BRIDGE(s);
+ /*check for bus == 0 && device == 0, Bits 11:15 = Device , Bits 16:23 = Bus*/
+ bool is_phb_dev0 = extract32(phb->config_reg, 11, 13) == 0;
+ bool le_mode = FIELD_EX32(s->regs[GT_PCI0_CMD], GT_PCI0_CMD, MByteSwap);
+ /* Only swap for non-bridge devices in big-endian mode */
+ return !le_mode && !is_phb_dev0;
+}
+
+static uint64_t gt64120_pci_data_read(void *opaque, hwaddr addr, unsigned size)
+{
+ GT64120State *s = opaque;
+ uint32_t val = pci_host_data_le_ops.read(opaque, addr, size);
+
+ if (bswap(s)) {
+ val = bswap32(val);
+ }
+ return val;
+}
+
+static void gt64120_pci_data_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ GT64120State *s = opaque;
+
+ if (bswap(s)) {
+ val = bswap32(val);
+ }
+ pci_host_data_le_ops.write(opaque, addr, val, size);
+}
+
+static const MemoryRegionOps gt64120_pci_data_ops = {
+ .read = gt64120_pci_data_read,
+ .write = gt64120_pci_data_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+};
+
static void gt64120_reset(DeviceState *dev)
{
GT64120State *s = GT64120_PCI_HOST_BRIDGE(dev);
@@ -1178,7 +1188,6 @@ static void gt64120_reset(DeviceState *dev)
gt64120_isd_mapping(s);
gt64120_pci_mapping(s);
- gt64120_update_pci_cfgdata_mapping(s);
}
static void gt64120_realize(DeviceState *dev, Error **errp)
@@ -1202,6 +1211,12 @@ static void gt64120_realize(DeviceState *dev, Error **errp)
memory_region_add_subregion_overlap(&s->ISD_mem, GT_PCI0_CFGADDR << 2,
&phb->conf_mem, 1);
+ memory_region_init_io(&phb->data_mem, OBJECT(phb),
+ &gt64120_pci_data_ops,
+ s, "pci-conf-data", 4);
+ memory_region_add_subregion_overlap(&s->ISD_mem, GT_PCI0_CFGDATA << 2,
+ &phb->data_mem, 1);
+
/*
* The whole address space decoded by the GT-64120A doesn't generate
diff --git a/hw/pci-host/meson.build b/hw/pci-host/meson.build
index 937a0f7..86b754d 100644
--- a/hw/pci-host/meson.build
+++ b/hw/pci-host/meson.build
@@ -2,6 +2,7 @@ pci_ss = ss.source_set()
pci_ss.add(when: 'CONFIG_PAM', if_true: files('pam.c'))
pci_ss.add(when: 'CONFIG_PCI_BONITO', if_true: files('bonito.c'))
pci_ss.add(when: 'CONFIG_GT64120', if_true: files('gt64120.c'))
+pci_ss.add(when: 'CONFIG_PCI_EXPRESS_ASPEED', if_true: files('aspeed_pcie.c'))
pci_ss.add(when: 'CONFIG_PCI_EXPRESS_DESIGNWARE', if_true: files('designware.c'))
pci_ss.add(when: 'CONFIG_PCI_EXPRESS_GENERIC_BRIDGE', if_true: files('gpex.c'))
pci_ss.add(when: ['CONFIG_PCI_EXPRESS_GENERIC_BRIDGE', 'CONFIG_ACPI'], if_true: files('gpex-acpi.c'))
diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c
index a4335f4..5d8383f 100644
--- a/hw/pci-host/pnv_phb3.c
+++ b/hw/pci-host/pnv_phb3.c
@@ -8,6 +8,7 @@
*/
#include "qemu/osdep.h"
#include "qemu/log.h"
+#include "qemu/bswap.h"
#include "qapi/visitor.h"
#include "qapi/error.h"
#include "hw/pci-host/pnv_phb3_regs.h"
diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index 77ea352..1899205 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -8,6 +8,7 @@
*/
#include "qemu/osdep.h"
#include "qemu/log.h"
+#include "qemu/bswap.h"
#include "qapi/visitor.h"
#include "qapi/error.h"
#include "target/ppc/cpu.h"
diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c
index e97a515..975d191 100644
--- a/hw/pci-host/ppce500.c
+++ b/hw/pci-host/ppce500.c
@@ -16,12 +16,10 @@
#include "qemu/osdep.h"
#include "hw/irq.h"
-#include "hw/ppc/e500-ccsr.h"
#include "hw/qdev-properties.h"
#include "migration/vmstate.h"
#include "hw/pci/pci_device.h"
#include "hw/pci/pci_host.h"
-#include "qemu/bswap.h"
#include "hw/pci-host/ppce500.h"
#include "qom/object.h"
@@ -418,11 +416,12 @@ static const VMStateDescription vmstate_ppce500_pci = {
static void e500_pcihost_bridge_realize(PCIDevice *d, Error **errp)
{
PPCE500PCIBridgeState *b = PPC_E500_PCI_BRIDGE(d);
- PPCE500CCSRState *ccsr = CCSR(
+ SysBusDevice *ccsr = SYS_BUS_DEVICE(
object_resolve_path_component(qdev_get_machine(), "e500-ccsr"));
+ MemoryRegion *ccsr_space = sysbus_mmio_get_region(ccsr, 0);
- memory_region_init_alias(&b->bar0, OBJECT(ccsr), "e500-pci-bar0", &ccsr->ccsr_space,
- 0, int128_get64(ccsr->ccsr_space.size));
+ memory_region_init_alias(&b->bar0, OBJECT(ccsr), "e500-pci-bar0",
+ ccsr_space, 0, int128_get64(ccsr_space->size));
pci_register_bar(d, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &b->bar0);
}
diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c
index 21f7ca6..f8c0be5 100644
--- a/hw/pci-host/raven.c
+++ b/hw/pci-host/raven.c
@@ -24,7 +24,6 @@
*/
#include "qemu/osdep.h"
-#include "qemu/datadir.h"
#include "qemu/units.h"
#include "qemu/log.h"
#include "qapi/error.h"
@@ -35,9 +34,7 @@
#include "migration/vmstate.h"
#include "hw/intc/i8259.h"
#include "hw/irq.h"
-#include "hw/loader.h"
#include "hw/or-irq.h"
-#include "elf.h"
#include "qom/object.h"
#define TYPE_RAVEN_PCI_DEVICE "raven"
@@ -47,10 +44,6 @@ OBJECT_DECLARE_SIMPLE_TYPE(RavenPCIState, RAVEN_PCI_DEVICE)
struct RavenPCIState {
PCIDevice dev;
-
- uint32_t elf_machine;
- char *bios_name;
- MemoryRegion bios;
};
typedef struct PRePPCIState PREPPCIState;
@@ -75,11 +68,8 @@ struct PRePPCIState {
RavenPCIState pci_dev;
int contiguous_map;
- bool is_legacy_prep;
};
-#define BIOS_SIZE (1 * MiB)
-
#define PCI_IO_BASE_ADDR 0x80000000 /* Physical address on main bus */
static inline uint32_t raven_pci_io_config(hwaddr addr)
@@ -243,22 +233,18 @@ static void raven_pcihost_realizefn(DeviceState *d, Error **errp)
MemoryRegion *address_space_mem = get_system_memory();
int i;
- if (s->is_legacy_prep) {
- for (i = 0; i < PCI_NUM_PINS; i++) {
- sysbus_init_irq(dev, &s->pci_irqs[i]);
- }
- } else {
- /* According to PReP specification section 6.1.6 "System Interrupt
- * Assignments", all PCI interrupts are routed via IRQ 15 */
- s->or_irq = OR_IRQ(object_new(TYPE_OR_IRQ));
- object_property_set_int(OBJECT(s->or_irq), "num-lines", PCI_NUM_PINS,
- &error_fatal);
- qdev_realize(DEVICE(s->or_irq), NULL, &error_fatal);
- sysbus_init_irq(dev, &s->or_irq->out_irq);
-
- for (i = 0; i < PCI_NUM_PINS; i++) {
- s->pci_irqs[i] = qdev_get_gpio_in(DEVICE(s->or_irq), i);
- }
+ /*
+ * According to PReP specification section 6.1.6 "System Interrupt
+ * Assignments", all PCI interrupts are routed via IRQ 15
+ */
+ s->or_irq = OR_IRQ(object_new(TYPE_OR_IRQ));
+ object_property_set_int(OBJECT(s->or_irq), "num-lines", PCI_NUM_PINS,
+ &error_fatal);
+ qdev_realize(DEVICE(s->or_irq), NULL, &error_fatal);
+ sysbus_init_irq(dev, &s->or_irq->out_irq);
+
+ for (i = 0; i < PCI_NUM_PINS; i++) {
+ s->pci_irqs[i] = qdev_get_gpio_in(DEVICE(s->or_irq), i);
}
qdev_init_gpio_in(d, raven_change_gpio, 1);
@@ -338,48 +324,9 @@ static void raven_pcihost_initfn(Object *obj)
static void raven_realize(PCIDevice *d, Error **errp)
{
- RavenPCIState *s = RAVEN_PCI_DEVICE(d);
- char *filename;
- int bios_size = -1;
-
d->config[PCI_CACHE_LINE_SIZE] = 0x08;
d->config[PCI_LATENCY_TIMER] = 0x10;
d->config[PCI_CAPABILITY_LIST] = 0x00;
-
- if (!memory_region_init_rom_nomigrate(&s->bios, OBJECT(s), "bios",
- BIOS_SIZE, errp)) {
- return;
- }
- memory_region_add_subregion(get_system_memory(), (uint32_t)(-BIOS_SIZE),
- &s->bios);
- if (s->bios_name) {
- filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, s->bios_name);
- if (filename) {
- if (s->elf_machine != EM_NONE) {
- bios_size = load_elf(filename, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL,
- ELFDATA2MSB, s->elf_machine, 0, 0);
- }
- if (bios_size < 0) {
- bios_size = get_image_size(filename);
- if (bios_size > 0 && bios_size <= BIOS_SIZE) {
- hwaddr bios_addr;
- bios_size = (bios_size + 0xfff) & ~0xfff;
- bios_addr = (uint32_t)(-BIOS_SIZE);
- bios_size = load_image_targphys(filename, bios_addr,
- bios_size);
- }
- }
- }
- g_free(filename);
- if (bios_size < 0 || bios_size > BIOS_SIZE) {
- memory_region_del_subregion(get_system_memory(), &s->bios);
- error_setg(errp, "Could not load bios image '%s'", s->bios_name);
- return;
- }
- }
-
- vmstate_register_ram_global(&s->bios);
}
static const VMStateDescription vmstate_raven = {
@@ -422,22 +369,12 @@ static const TypeInfo raven_info = {
},
};
-static const Property raven_pcihost_properties[] = {
- DEFINE_PROP_UINT32("elf-machine", PREPPCIState, pci_dev.elf_machine,
- EM_NONE),
- DEFINE_PROP_STRING("bios-name", PREPPCIState, pci_dev.bios_name),
- /* Temporary workaround until legacy prep machine is removed */
- DEFINE_PROP_BOOL("is-legacy-prep", PREPPCIState, is_legacy_prep,
- false),
-};
-
static void raven_pcihost_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
dc->realize = raven_pcihost_realizefn;
- device_class_set_props(dc, raven_pcihost_properties);
dc->fw_name = "pci";
}
diff --git a/hw/pci-host/sh_pci.c b/hw/pci-host/sh_pci.c
index de8f6a8..62fb945 100644
--- a/hw/pci-host/sh_pci.c
+++ b/hw/pci-host/sh_pci.c
@@ -28,7 +28,6 @@
#include "hw/irq.h"
#include "hw/pci/pci_device.h"
#include "hw/pci/pci_host.h"
-#include "qemu/bswap.h"
#include "qemu/module.h"
#include "qom/object.h"
diff --git a/hw/pci-host/trace-events b/hw/pci-host/trace-events
index 0a816b9..a6fd88c 100644
--- a/hw/pci-host/trace-events
+++ b/hw/pci-host/trace-events
@@ -1,5 +1,16 @@
# See docs/devel/tracing.rst for syntax documentation.
+# aspeed_pcie.c
+aspeed_pcie_rc_intx_set_irq(uint32_t id, int num, int level) "%d: num %d set IRQ leve %d"
+aspeed_pcie_rc_msi_notify(uint32_t id, uint64_t addr, uint64_t data) "%d: 0x%" PRIx64 " data 0x%" PRIx64
+aspeed_pcie_rc_msi_set_irq(uint32_t id, uint64_t unm, int level) "%d: num 0x%" PRIx64 " set IRQ level %d"
+aspeed_pcie_rc_msi_clear_irq(uint32_t id, int level) "%d: clear IRQ level %d"
+aspeed_pcie_cfg_read(uint32_t id, uint64_t addr, uint32_t value) "%d: addr 0x%" PRIx64 " value 0x%" PRIx32
+aspeed_pcie_cfg_write(uint32_t id, uint64_t addr, uint32_t value) "%d: addr 0x%" PRIx64 " value 0x%" PRIx32
+aspeed_pcie_cfg_rw(uint32_t id, const char *dir, uint8_t bus, uint8_t devfn, uint64_t addr, uint64_t data) "%d: %s bus:0x%x devfn:0x%x addr 0x%" PRIx64 " data 0x%" PRIx64
+aspeed_pcie_phy_read(uint32_t id, uint64_t addr, uint32_t value) "%d: addr 0x%" PRIx64 " value 0x%" PRIx32
+aspeed_pcie_phy_write(uint32_t id, uint64_t addr, uint32_t value) "%d: addr 0x%" PRIx64 " value 0x%" PRIx32
+
# bonito.c
bonito_spciconf_small_access(uint64_t addr, unsigned size) "PCI config address is smaller then 32-bit, addr: 0x%"PRIx64", size: %u"
diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 66f27b9..8c7f670 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -72,7 +72,7 @@ static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
return dev->msix_pba + vector / 8;
}
-static int msix_is_pending(PCIDevice *dev, int vector)
+int msix_is_pending(PCIDevice *dev, unsigned int vector)
{
return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
}
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index f5ab510..acc03fd 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -32,6 +32,7 @@
#include "hw/pci/pci_host.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"
+#include "migration/cpr.h"
#include "migration/qemu-file-types.h"
#include "migration/vmstate.h"
#include "net/net.h"
@@ -128,6 +129,12 @@ static GSequence *pci_acpi_index_list(void)
return used_acpi_index_list;
}
+static void pci_set_master(PCIDevice *d, bool enable)
+{
+ memory_region_set_enabled(&d->bus_master_enable_region, enable);
+ d->is_master = enable; /* cache the status */
+}
+
static void pci_init_bus_master(PCIDevice *pci_dev)
{
AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
@@ -135,7 +142,7 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
memory_region_init_alias(&pci_dev->bus_master_enable_region,
OBJECT(pci_dev), "bus master",
dma_as->root, 0, memory_region_size(dma_as->root));
- memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
+ pci_set_master(pci_dev, false);
memory_region_add_subregion(&pci_dev->bus_master_container_region, 0,
&pci_dev->bus_master_enable_region);
}
@@ -531,6 +538,10 @@ static void pci_reset_regions(PCIDevice *dev)
static void pci_do_device_reset(PCIDevice *dev)
{
+ if ((dev->cap_present & QEMU_PCI_SKIP_RESET_ON_CPR) && cpr_is_incoming()) {
+ return;
+ }
+
pci_device_deassert_intx(dev);
assert(dev->irq_state == 0);
@@ -804,9 +815,8 @@ static int get_pci_config_device(QEMUFile *f, void *pv, size_t size,
pci_bridge_update_mappings(PCI_BRIDGE(s));
}
- memory_region_set_enabled(&s->bus_master_enable_region,
- pci_get_word(s->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER);
+ pci_set_master(s, pci_get_word(s->config + PCI_COMMAND)
+ & PCI_COMMAND_MASTER);
g_free(config);
return 0;
@@ -916,7 +926,7 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
* This makes us compatible with old devices
* which never set or clear this bit. */
s->config[PCI_STATUS] &= ~PCI_STATUS_INTERRUPT;
- vmstate_save_state(f, &vmstate_pci_device, s, NULL);
+ vmstate_save_state(f, &vmstate_pci_device, s, NULL, &error_fatal);
/* Restore the interrupt status bit. */
pci_update_irq_status(s);
}
@@ -924,7 +934,8 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
int pci_device_load(PCIDevice *s, QEMUFile *f)
{
int ret;
- ret = vmstate_load_state(f, &vmstate_pci_device, s, s->version_id);
+ ret = vmstate_load_state(f, &vmstate_pci_device, s, s->version_id,
+ &error_fatal);
/* Restore the interrupt status bit. */
pci_update_irq_status(s);
return ret;
@@ -974,14 +985,15 @@ static int pci_parse_devaddr(const char *addr, int *domp, int *busp,
slot = val;
- if (funcp != NULL) {
- if (*e != '.')
+ if (funcp != NULL && *e != '\0') {
+ if (*e != '.') {
return -1;
-
+ }
p = e + 1;
val = strtoul(p, &e, 16);
- if (e == p)
+ if (e == p) {
return -1;
+ }
func = val;
}
@@ -1480,9 +1492,6 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
: pci_get_bus(pci_dev)->address_space_mem;
if (pci_is_vf(pci_dev)) {
- PCIDevice *pf = pci_dev->exp.sriov_vf.pf;
- assert(!pf || type == pf->exp.sriov_pf.vf_bar_type[region_num]);
-
r->addr = pci_bar_address(pci_dev, region_num, r->type, r->size);
if (r->addr != PCI_BAR_UNMAPPED) {
memory_region_add_subregion_overlap(r->address_space,
@@ -1725,7 +1734,7 @@ static void pci_update_mappings(PCIDevice *d)
pci_update_vga(d);
}
-static inline int pci_irq_disabled(PCIDevice *d)
+int pci_irq_disabled(PCIDevice *d)
{
return pci_get_word(d->config + PCI_COMMAND) & PCI_COMMAND_INTX_DISABLE;
}
@@ -1787,9 +1796,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
if (ranges_overlap(addr, l, PCI_COMMAND, 2)) {
pci_update_irq_disabled(d, was_irq_disabled);
- memory_region_set_enabled(&d->bus_master_enable_region,
- (pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER) && d->enabled);
+ pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND) &
+ PCI_COMMAND_MASTER) && d->enabled);
}
msi_write_config(d, addr, val_in, l);
@@ -2045,13 +2053,15 @@ bool pci_init_nic_in_slot(PCIBus *rootbus, const char *model,
int dom, busnr, devfn;
PCIDevice *pci_dev;
unsigned slot;
+ unsigned func;
+
PCIBus *bus;
if (!nd) {
return false;
}
- if (!devaddr || pci_parse_devaddr(devaddr, &dom, &busnr, &slot, NULL) < 0) {
+ if (!devaddr || pci_parse_devaddr(devaddr, &dom, &busnr, &slot, &func) < 0) {
error_report("Invalid PCI device address %s for device %s",
devaddr, model);
exit(1);
@@ -2062,7 +2072,7 @@ bool pci_init_nic_in_slot(PCIBus *rootbus, const char *model,
exit(1);
}
- devfn = PCI_DEVFN(slot, 0);
+ devfn = PCI_DEVFN(slot, func);
bus = pci_find_bus_nr(rootbus, busnr);
if (!bus) {
@@ -2900,6 +2910,19 @@ static void pci_device_get_iommu_bus_devfn(PCIDevice *dev,
}
}
+ /*
+ * When multiple PCI Express Root Buses are defined using pxb-pcie,
+ * the IOMMU configuration may be specific to each root bus. However,
+ * pxb-pcie acts as a special root complex whose parent is effectively
+ * the default root complex(pcie.0). Ensure that we retrieve the
+ * correct IOMMU ops(if any) in such cases.
+ */
+ if (pci_bus_is_express(iommu_bus) && pci_bus_is_root(iommu_bus)) {
+ if (parent_bus->iommu_per_bus) {
+ break;
+ }
+ }
+
iommu_bus = parent_bus;
}
@@ -2935,6 +2958,23 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
return &address_space_memory;
}
+int pci_iommu_init_iotlb_notifier(PCIDevice *dev, IOMMUNotifier *n,
+ IOMMUNotify fn, void *opaque)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->init_iotlb_notifier) {
+ iommu_bus->iommu_ops->init_iotlb_notifier(bus, iommu_bus->iommu_opaque,
+ devfn, n, fn, opaque);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
Error **errp)
{
@@ -2966,6 +3006,168 @@ void pci_device_unset_iommu_device(PCIDevice *dev)
}
}
+int pci_pri_request_page(PCIDevice *dev, uint32_t pasid, bool priv_req,
+ bool exec_req, hwaddr addr, bool lpig,
+ uint16_t prgi, bool is_read, bool is_write)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if (!dev->is_master ||
+ ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+ return -EPERM;
+ }
+
+ if (!pcie_pri_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->pri_request_page) {
+ return iommu_bus->iommu_ops->pri_request_page(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, priv_req,
+ exec_req, addr, lpig, prgi,
+ is_read, is_write);
+ }
+
+ return -ENODEV;
+}
+
+int pci_pri_register_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUPRINotifier *notifier)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if (!dev->is_master ||
+ ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->pri_register_notifier) {
+ iommu_bus->iommu_ops->pri_register_notifier(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, notifier);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+void pci_pri_unregister_notifier(PCIDevice *dev, uint32_t pasid)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->pri_unregister_notifier) {
+ iommu_bus->iommu_ops->pri_unregister_notifier(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid);
+ }
+}
+
+ssize_t pci_ats_request_translation(PCIDevice *dev, uint32_t pasid,
+ bool priv_req, bool exec_req,
+ hwaddr addr, size_t length,
+ bool no_write, IOMMUTLBEntry *result,
+ size_t result_length,
+ uint32_t *err_count)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if (!dev->is_master ||
+ ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+ return -EPERM;
+ }
+
+ if (result_length == 0) {
+ return -ENOSPC;
+ }
+
+ if (!pcie_ats_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->ats_request_translation) {
+ return iommu_bus->iommu_ops->ats_request_translation(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, priv_req,
+ exec_req, addr, length,
+ no_write, result,
+ result_length, err_count);
+ }
+
+ return -ENODEV;
+}
+
+int pci_iommu_register_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUNotifier *n)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->register_iotlb_notifier) {
+ iommu_bus->iommu_ops->register_iotlb_notifier(bus,
+ iommu_bus->iommu_opaque, devfn,
+ pasid, n);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+int pci_iommu_unregister_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUNotifier *n)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->unregister_iotlb_notifier) {
+ iommu_bus->iommu_ops->unregister_iotlb_notifier(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, n);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+int pci_iommu_get_iotlb_info(PCIDevice *dev, uint8_t *addr_width,
+ uint32_t *min_page_size)
+{
+ PCIBus *iommu_bus;
+
+ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, NULL, NULL);
+ if (iommu_bus && iommu_bus->iommu_ops->get_iotlb_info) {
+ iommu_bus->iommu_ops->get_iotlb_info(iommu_bus->iommu_opaque,
+ addr_width, min_page_size);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
{
/*
@@ -2979,6 +3181,24 @@ void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
bus->iommu_opaque = opaque;
}
+/*
+ * Similar to pci_setup_iommu(), but sets iommu_per_bus to true,
+ * indicating that the IOMMU is specific to this bus. This is used by
+ * IOMMU implementations that are tied to a specific PCIe root complex.
+ *
+ * In QEMU, pxb-pcie behaves as a special root complex whose parent is
+ * effectively the default root complex (pcie.0). The iommu_per_bus
+ * is checked in pci_device_get_iommu_bus_devfn() to ensure the correct
+ * IOMMU ops are returned, avoiding the use of the parent’s IOMMU when
+ * it's not appropriate.
+ */
+void pci_setup_iommu_per_bus(PCIBus *bus, const PCIIOMMUOps *ops,
+ void *opaque)
+{
+ pci_setup_iommu(bus, ops, opaque);
+ bus->iommu_per_bus = true;
+}
+
static void pci_dev_get_w64(PCIBus *b, PCIDevice *dev, void *opaque)
{
Range *range = opaque;
@@ -3100,9 +3320,8 @@ void pci_set_enabled(PCIDevice *d, bool state)
d->enabled = state;
pci_update_mappings(d);
- memory_region_set_enabled(&d->bus_master_enable_region,
- (pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER) && d->enabled);
+ pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND)
+ & PCI_COMMAND_MASTER) && d->enabled);
if (qdev_is_realized(&d->qdev)) {
pci_device_reset(d);
}
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index abe83bb..7179d99 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -217,12 +217,6 @@ const MemoryRegionOps pci_host_data_le_ops = {
.endianness = DEVICE_LITTLE_ENDIAN,
};
-const MemoryRegionOps pci_host_data_be_ops = {
- .read = pci_host_data_read,
- .write = pci_host_data_write,
- .endianness = DEVICE_BIG_ENDIAN,
-};
-
static bool pci_host_needed(void *opaque)
{
PCIHostState *s = opaque;
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 1b12db6..b302de6 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1214,3 +1214,89 @@ void pcie_acs_reset(PCIDevice *dev)
pci_set_word(dev->config + dev->exp.acs_cap + PCI_ACS_CTRL, 0);
}
}
+
+/* PASID */
+void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width,
+ bool exec_perm, bool priv_mod)
+{
+ static const uint16_t control_reg_rw_mask = 0x07;
+ uint16_t capability_reg;
+
+ assert(pasid_width <= PCI_EXT_CAP_PASID_MAX_WIDTH);
+
+ pcie_add_capability(dev, PCI_EXT_CAP_ID_PASID, PCI_PASID_VER, offset,
+ PCI_EXT_CAP_PASID_SIZEOF);
+
+ capability_reg = ((uint16_t)pasid_width) << PCI_PASID_CAP_WIDTH_SHIFT;
+ capability_reg |= exec_perm ? PCI_PASID_CAP_EXEC : 0;
+ capability_reg |= priv_mod ? PCI_PASID_CAP_PRIV : 0;
+ pci_set_word(dev->config + offset + PCI_PASID_CAP, capability_reg);
+
+ /* Everything is disabled by default */
+ pci_set_word(dev->config + offset + PCI_PASID_CTRL, 0);
+
+ pci_set_word(dev->wmask + offset + PCI_PASID_CTRL, control_reg_rw_mask);
+
+ dev->exp.pasid_cap = offset;
+}
+
+/* PRI */
+void pcie_pri_init(PCIDevice *dev, uint16_t offset, uint32_t outstanding_pr_cap,
+ bool prg_response_pasid_req)
+{
+ static const uint16_t control_reg_rw_mask = 0x3;
+ static const uint16_t status_reg_rw1_mask = 0x3;
+ static const uint32_t pr_alloc_reg_rw_mask = 0xffffffff;
+ uint16_t status_reg;
+
+ status_reg = prg_response_pasid_req ? PCI_PRI_STATUS_PASID : 0;
+ status_reg |= PCI_PRI_STATUS_STOPPED; /* Stopped by default */
+
+ pcie_add_capability(dev, PCI_EXT_CAP_ID_PRI, PCI_PRI_VER, offset,
+ PCI_EXT_CAP_PRI_SIZEOF);
+ /* Disabled by default */
+
+ pci_set_word(dev->config + offset + PCI_PRI_STATUS, status_reg);
+ pci_set_long(dev->config + offset + PCI_PRI_MAX_REQ, outstanding_pr_cap);
+
+ pci_set_word(dev->wmask + offset + PCI_PRI_CTRL, control_reg_rw_mask);
+ pci_set_word(dev->w1cmask + offset + PCI_PRI_STATUS, status_reg_rw1_mask);
+ pci_set_long(dev->wmask + offset + PCI_PRI_ALLOC_REQ, pr_alloc_reg_rw_mask);
+
+ dev->exp.pri_cap = offset;
+}
+
+uint32_t pcie_pri_get_req_alloc(const PCIDevice *dev)
+{
+ if (!pcie_pri_enabled(dev)) {
+ return 0;
+ }
+ return pci_get_long(dev->config + dev->exp.pri_cap + PCI_PRI_ALLOC_REQ);
+}
+
+bool pcie_pri_enabled(const PCIDevice *dev)
+{
+ if (!pci_is_express(dev) || !dev->exp.pri_cap) {
+ return false;
+ }
+ return (pci_get_word(dev->config + dev->exp.pri_cap + PCI_PRI_CTRL) &
+ PCI_PRI_CTRL_ENABLE) != 0;
+}
+
+bool pcie_pasid_enabled(const PCIDevice *dev)
+{
+ if (!pci_is_express(dev) || !dev->exp.pasid_cap) {
+ return false;
+ }
+ return (pci_get_word(dev->config + dev->exp.pasid_cap + PCI_PASID_CTRL) &
+ PCI_PASID_CTRL_ENABLE) != 0;
+}
+
+bool pcie_ats_enabled(const PCIDevice *dev)
+{
+ if (!pci_is_express(dev) || !dev->exp.ats_cap) {
+ return false;
+ }
+ return (pci_get_word(dev->config + dev->exp.ats_cap + PCI_ATS_CTRL) &
+ PCI_ATS_CTRL_ENABLE) != 0;
+}
diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index 3ad1874..c4f88f0 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -64,6 +64,27 @@ static void unregister_vfs(PCIDevice *dev)
pci_set_word(dev->wmask + dev->exp.sriov_cap + PCI_SRIOV_NUM_VF, 0xffff);
}
+static void consume_config(PCIDevice *dev)
+{
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
+
+ if (pci_get_word(cfg + PCI_SRIOV_CTRL) & PCI_SRIOV_CTRL_VFE) {
+ register_vfs(dev);
+ } else {
+ uint8_t *wmask = dev->wmask + dev->exp.sriov_cap;
+ uint16_t num_vfs = pci_get_word(cfg + PCI_SRIOV_NUM_VF);
+ uint16_t wmask_val = PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI;
+
+ unregister_vfs(dev);
+
+ if (num_vfs <= pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)) {
+ wmask_val |= PCI_SRIOV_CTRL_VFE;
+ }
+
+ pci_set_word(wmask + PCI_SRIOV_CTRL, wmask_val);
+ }
+}
+
static bool pcie_sriov_pf_init_common(PCIDevice *dev, uint16_t offset,
uint16_t vf_dev_id, uint16_t init_vfs,
uint16_t total_vfs, uint16_t vf_offset,
@@ -174,7 +195,9 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
void pcie_sriov_pf_exit(PCIDevice *dev)
{
- uint8_t *cfg = dev->config + dev->exp.sriov_cap;
+ if (dev->exp.sriov_cap == 0) {
+ return;
+ }
if (dev->exp.sriov_pf.vf_user_created) {
uint16_t ven_id = pci_get_word(dev->config + PCI_VENDOR_ID);
@@ -190,6 +213,8 @@ void pcie_sriov_pf_exit(PCIDevice *dev)
pci_config_set_device_id(dev->exp.sriov_pf.vf[i]->config, vf_dev_id);
}
} else {
+ uint8_t *cfg = dev->config + dev->exp.sriov_cap;
+
unparent_vfs(dev, pci_get_word(cfg + PCI_SRIOV_TOTAL_VF));
}
}
@@ -221,17 +246,6 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int region_num,
dev->exp.sriov_pf.vf_bar_type[region_num] = type;
}
-void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
- MemoryRegion *memory)
-{
- uint8_t type;
-
- assert(dev->exp.sriov_vf.pf);
- type = dev->exp.sriov_vf.pf->exp.sriov_pf.vf_bar_type[region_num];
-
- return pci_register_bar(dev, region_num, type, memory);
-}
-
static gint compare_vf_devfns(gconstpointer a, gconstpointer b)
{
return (*(PCIDevice **)a)->devfn - (*(PCIDevice **)b)->devfn;
@@ -416,30 +430,13 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address,
trace_sriov_config_write(dev->name, PCI_SLOT(dev->devfn),
PCI_FUNC(dev->devfn), off, val, len);
- if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) {
- if (val & PCI_SRIOV_CTRL_VFE) {
- register_vfs(dev);
- } else {
- unregister_vfs(dev);
- }
- } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) {
- uint8_t *cfg = dev->config + sriov_cap;
- uint8_t *wmask = dev->wmask + sriov_cap;
- uint16_t num_vfs = pci_get_word(cfg + PCI_SRIOV_NUM_VF);
- uint16_t wmask_val = PCI_SRIOV_CTRL_MSE | PCI_SRIOV_CTRL_ARI;
-
- if (num_vfs <= pci_get_word(cfg + PCI_SRIOV_TOTAL_VF)) {
- wmask_val |= PCI_SRIOV_CTRL_VFE;
- }
-
- pci_set_word(wmask + PCI_SRIOV_CTRL, wmask_val);
- }
+ consume_config(dev);
}
void pcie_sriov_pf_post_load(PCIDevice *dev)
{
if (dev->exp.sriov_cap) {
- register_vfs(dev);
+ consume_config(dev);
}
}
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
index ced6bbc..7091d72 100644
--- a/hw/ppc/Kconfig
+++ b/hw/ppc/Kconfig
@@ -44,6 +44,11 @@ config POWERNV
select SSI_M25P80
select PNV_SPI
+config PPC405
+ bool
+ default y
+ depends on PPC
+
config PPC440
bool
default y
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 809078a..723c97f 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -79,8 +79,6 @@
#define MPC85XX_ESDHC_IRQ 72
#define RTC_REGS_OFFSET 0x68
-#define PLATFORM_CLK_FREQ_HZ (400 * 1000 * 1000)
-
struct boot_info
{
uint32_t dt_base;
@@ -120,7 +118,7 @@ static uint32_t *pci_map_create(void *fdt, uint32_t mpic, int first_slot,
}
static void dt_serial_create(void *fdt, unsigned long long offset,
- const char *soc, const char *mpic,
+ const char *soc, uint32_t freq, const char *mpic,
const char *alias, int idx, bool defcon)
{
char *ser;
@@ -131,7 +129,7 @@ static void dt_serial_create(void *fdt, unsigned long long offset,
qemu_fdt_setprop_string(fdt, ser, "compatible", "ns16550");
qemu_fdt_setprop_cells(fdt, ser, "reg", offset, 0x100);
qemu_fdt_setprop_cell(fdt, ser, "cell-index", idx);
- qemu_fdt_setprop_cell(fdt, ser, "clock-frequency", PLATFORM_CLK_FREQ_HZ);
+ qemu_fdt_setprop_cell(fdt, ser, "clock-frequency", freq);
qemu_fdt_setprop_cells(fdt, ser, "interrupts", 42, 2);
qemu_fdt_setprop_phandle(fdt, ser, "interrupt-parent", mpic);
qemu_fdt_setprop_string(fdt, "/aliases", alias, ser);
@@ -382,8 +380,7 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms,
int fdt_size;
void *fdt;
uint8_t hypercall[16];
- uint32_t clock_freq = PLATFORM_CLK_FREQ_HZ;
- uint32_t tb_freq = PLATFORM_CLK_FREQ_HZ;
+ uint32_t clock_freq, tb_freq;
int i;
char compatible_sb[] = "fsl,mpc8544-immr\0simple-bus";
char *soc;
@@ -411,7 +408,7 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms,
if (dtb_file) {
char *filename;
- filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, dtb_file);
+ filename = qemu_find_file(QEMU_FILE_TYPE_DTB, dtb_file);
if (!filename) {
goto out;
}
@@ -484,6 +481,9 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms,
if (kvmppc_get_hasidle(env)) {
qemu_fdt_setprop(fdt, "/hypervisor", "has-idle", NULL, 0);
}
+ } else {
+ clock_freq = pmc->clock_freq;
+ tb_freq = pmc->tb_freq;
}
/* Create CPU nodes */
@@ -564,12 +564,12 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms,
*/
if (serial_hd(1)) {
dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET,
- soc, mpic, "serial1", 1, false);
+ soc, pmc->clock_freq, mpic, "serial1", 1, false);
}
if (serial_hd(0)) {
dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET,
- soc, mpic, "serial0", 0, true);
+ soc, pmc->clock_freq, mpic, "serial0", 0, true);
}
/* i2c */
@@ -931,7 +931,6 @@ void ppce500_init(MachineState *machine)
CPUPPCState *firstenv = NULL;
MemoryRegion *ccsr_addr_space;
SysBusDevice *s;
- PPCE500CCSRState *ccsr;
I2CBus *i2c;
irqs = g_new0(IrqLines, smp_cpus);
@@ -968,7 +967,7 @@ void ppce500_init(MachineState *machine)
env->spr_cb[SPR_BOOKE_PIR].default_value = cs->cpu_index = i;
env->mpic_iack = pmc->ccsrbar_base + MPC8544_MPIC_REGS_OFFSET + 0xa0;
- ppc_booke_timers_init(cpu, PLATFORM_CLK_FREQ_HZ, PPC_TIMER_E500);
+ ppc_booke_timers_init(cpu, pmc->tb_freq, PPC_TIMER_E500);
/* Register reset handler */
if (!i) {
@@ -993,10 +992,10 @@ void ppce500_init(MachineState *machine)
memory_region_add_subregion(address_space_mem, 0, machine->ram);
dev = qdev_new("e500-ccsr");
+ s = SYS_BUS_DEVICE(dev);
object_property_add_child(OBJECT(machine), "e500-ccsr", OBJECT(dev));
- sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
- ccsr = CCSR(dev);
- ccsr_addr_space = &ccsr->ccsr_space;
+ sysbus_realize_and_unref(s, &error_fatal);
+ ccsr_addr_space = sysbus_mmio_get_region(s, 0);
memory_region_add_subregion(address_space_mem, pmc->ccsrbar_base,
ccsr_addr_space);
@@ -1284,6 +1283,7 @@ static void e500_ccsr_initfn(Object *obj)
PPCE500CCSRState *ccsr = CCSR(obj);
memory_region_init(&ccsr->ccsr_space, obj, "e500-ccsr",
MPC8544_CCSRBAR_SIZE);
+ sysbus_init_mmio(SYS_BUS_DEVICE(ccsr), &ccsr->ccsr_space);
}
static const TypeInfo e500_ccsr_info = {
diff --git a/hw/ppc/e500.h b/hw/ppc/e500.h
index 01db102..00f4905 100644
--- a/hw/ppc/e500.h
+++ b/hw/ppc/e500.h
@@ -5,6 +5,8 @@
#include "hw/platform-bus.h"
#include "qom/object.h"
+#define PLATFORM_CLK_FREQ_HZ (400 * 1000 * 1000)
+
struct PPCE500MachineState {
/*< private >*/
MachineState parent_obj;
@@ -37,6 +39,8 @@ struct PPCE500MachineClass {
hwaddr pci_mmio_base;
hwaddr pci_mmio_bus_base;
hwaddr spin_base;
+ uint32_t clock_freq;
+ uint32_t tb_freq;
};
void ppce500_init(MachineState *machine);
diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c
index 775b9d8..4f1d659 100644
--- a/hw/ppc/e500plat.c
+++ b/hw/ppc/e500plat.c
@@ -93,6 +93,8 @@ static void e500plat_machine_class_init(ObjectClass *oc, const void *data)
pmc->pci_mmio_base = 0xC00000000ULL;
pmc->pci_mmio_bus_base = 0xE0000000ULL;
pmc->spin_base = 0xFEF000000ULL;
+ pmc->clock_freq = PLATFORM_CLK_FREQ_HZ;
+ pmc->tb_freq = PLATFORM_CLK_FREQ_HZ;
mc->desc = "generic paravirt e500 platform";
mc->init = e500plat_init;
diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build
index 9893f8a..170b90a 100644
--- a/hw/ppc/meson.build
+++ b/hw/ppc/meson.build
@@ -57,6 +57,8 @@ ppc_ss.add(when: 'CONFIG_POWERNV', if_true: files(
'pnv_n1_chiplet.c',
))
# PowerPC 4xx boards
+ppc_ss.add(when: 'CONFIG_PPC405', if_true: files(
+ 'ppe42_machine.c'))
ppc_ss.add(when: 'CONFIG_PPC440', if_true: files(
'ppc440_bamboo.c',
'ppc440_uc.c'))
diff --git a/hw/ppc/mpc8544ds.c b/hw/ppc/mpc8544ds.c
index 97fb0f3..5826985 100644
--- a/hw/ppc/mpc8544ds.c
+++ b/hw/ppc/mpc8544ds.c
@@ -55,6 +55,8 @@ static void mpc8544ds_machine_class_init(ObjectClass *oc, const void *data)
pmc->pci_mmio_bus_base = 0xC0000000ULL;
pmc->pci_pio_base = 0xE1000000ULL;
pmc->spin_base = 0xEF000000ULL;
+ pmc->clock_freq = PLATFORM_CLK_FREQ_HZ;
+ pmc->tb_freq = PLATFORM_CLK_FREQ_HZ;
mc->desc = "mpc8544ds";
mc->init = mpc8544ds_init;
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 4a49e9d..f0469cd 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -21,6 +21,7 @@
#include "qemu/osdep.h"
#include "qemu/datadir.h"
+#include "qemu/log.h"
#include "qemu/units.h"
#include "qemu/cutils.h"
#include "qapi/error.h"
@@ -490,6 +491,37 @@ static void pnv_chip_power10_dt_populate(PnvChip *chip, void *fdt)
pnv_dt_lpc(chip, fdt, 0, PNV10_LPCM_BASE(chip), PNV10_LPCM_SIZE);
}
+static void pnv_chip_power11_dt_populate(PnvChip *chip, void *fdt)
+{
+ static const char compat[] = "ibm,power11-xscom\0ibm,xscom";
+ int i;
+
+ pnv_dt_xscom(chip, fdt, 0,
+ cpu_to_be64(PNV11_XSCOM_BASE(chip)),
+ cpu_to_be64(PNV11_XSCOM_SIZE),
+ compat, sizeof(compat));
+
+ for (i = 0; i < chip->nr_cores; i++) {
+ PnvCore *pnv_core = chip->cores[i];
+ int offset;
+
+ offset = pnv_dt_core(chip, pnv_core, fdt);
+
+ _FDT((fdt_setprop(fdt, offset, "ibm,pa-features",
+ pa_features_31, sizeof(pa_features_31))));
+
+ if (pnv_core->big_core) {
+ i++; /* Big-core groups two QEMU cores */
+ }
+ }
+
+ if (chip->ram_size) {
+ pnv_dt_memory(fdt, chip->chip_id, chip->ram_start, chip->ram_size);
+ }
+
+ pnv_dt_lpc(chip, fdt, 0, PNV11_LPCM_BASE(chip), PNV11_LPCM_SIZE);
+}
+
static void pnv_dt_rtc(ISADevice *d, void *fdt, int lpc_off)
{
uint32_t io_base = d->ioport_id;
@@ -822,6 +854,26 @@ static ISABus *pnv_chip_power10_isa_create(PnvChip *chip, Error **errp)
return pnv_lpc_isa_create(&chip10->lpc, false, errp);
}
+static ISABus *pnv_chip_power11_isa_create(PnvChip *chip, Error **errp)
+{
+ Pnv11Chip *chip11 = PNV11_CHIP(chip);
+ qemu_irq irq;
+
+ irq = qdev_get_gpio_in(DEVICE(&chip11->psi), PSIHB9_IRQ_LPCHC);
+ qdev_connect_gpio_out_named(DEVICE(&chip11->lpc), "LPCHC", 0, irq);
+
+ irq = qdev_get_gpio_in(DEVICE(&chip11->psi), PSIHB9_IRQ_LPC_SIRQ0);
+ qdev_connect_gpio_out_named(DEVICE(&chip11->lpc), "SERIRQ", 0, irq);
+ irq = qdev_get_gpio_in(DEVICE(&chip11->psi), PSIHB9_IRQ_LPC_SIRQ1);
+ qdev_connect_gpio_out_named(DEVICE(&chip11->lpc), "SERIRQ", 1, irq);
+ irq = qdev_get_gpio_in(DEVICE(&chip11->psi), PSIHB9_IRQ_LPC_SIRQ2);
+ qdev_connect_gpio_out_named(DEVICE(&chip11->lpc), "SERIRQ", 2, irq);
+ irq = qdev_get_gpio_in(DEVICE(&chip11->psi), PSIHB9_IRQ_LPC_SIRQ3);
+ qdev_connect_gpio_out_named(DEVICE(&chip11->lpc), "SERIRQ", 3, irq);
+
+ return pnv_lpc_isa_create(&chip11->lpc, false, errp);
+}
+
static ISABus *pnv_isa_create(PnvChip *chip, Error **errp)
{
return PNV_CHIP_GET_CLASS(chip)->isa_create(chip, errp);
@@ -885,6 +937,12 @@ static uint64_t pnv_chip_power10_xscom_core_base(PnvChip *chip,
return PNV10_XSCOM_EC_BASE(core_id);
}
+static uint64_t pnv_chip_power11_xscom_core_base(PnvChip *chip,
+ uint32_t core_id)
+{
+ return PNV11_XSCOM_EC_BASE(core_id);
+}
+
static bool pnv_match_cpu(const char *default_type, const char *cpu_type)
{
PowerPCCPUClass *ppc_default =
@@ -914,6 +972,16 @@ static void pnv_chip_power10_pic_print_info(PnvChip *chip, GString *buf)
pnv_chip_power9_pic_print_info_child, buf);
}
+static void pnv_chip_power11_pic_print_info(PnvChip *chip, GString *buf)
+{
+ Pnv11Chip *chip11 = PNV11_CHIP(chip);
+
+ pnv_xive2_pic_print_info(&chip11->xive, buf);
+ pnv_psi_pic_print_info(&chip11->psi, buf);
+ object_child_foreach_recursive(OBJECT(chip),
+ pnv_chip_power9_pic_print_info_child, buf);
+}
+
/* Always give the first 1GB to chip 0 else we won't boot */
static uint64_t pnv_chip_get_ram_size(PnvMachineState *pnv, int chip_id)
{
@@ -1421,6 +1489,60 @@ static void pnv_chip_power10_intc_print_info(PnvChip *chip, PowerPCCPU *cpu,
xive_tctx_pic_print_info(XIVE_TCTX(pnv_cpu_state(cpu)->intc), buf);
}
+static void *pnv_chip_power10_intc_get(PnvChip *chip)
+{
+ return &PNV10_CHIP(chip)->xive;
+}
+
+static void pnv_chip_power11_intc_create(PnvChip *chip, PowerPCCPU *cpu,
+ Error **errp)
+{
+ Pnv11Chip *chip11 = PNV11_CHIP(chip);
+ Error *local_err = NULL;
+ Object *obj;
+ PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+
+ /*
+ * The core creates its interrupt presenter but the XIVE2 interrupt
+ * controller object is initialized afterwards. Hopefully, it's
+ * only used at runtime.
+ */
+ obj = xive_tctx_create(OBJECT(cpu), XIVE_PRESENTER(&chip11->xive),
+ &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ pnv_cpu->intc = obj;
+}
+
+static void pnv_chip_power11_intc_reset(PnvChip *chip, PowerPCCPU *cpu)
+{
+ PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+
+ xive_tctx_reset(XIVE_TCTX(pnv_cpu->intc));
+}
+
+static void pnv_chip_power11_intc_destroy(PnvChip *chip, PowerPCCPU *cpu)
+{
+ PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+
+ xive_tctx_destroy(XIVE_TCTX(pnv_cpu->intc));
+ pnv_cpu->intc = NULL;
+}
+
+static void pnv_chip_power11_intc_print_info(PnvChip *chip, PowerPCCPU *cpu,
+ GString *buf)
+{
+ xive_tctx_pic_print_info(XIVE_TCTX(pnv_cpu_state(cpu)->intc), buf);
+}
+
+static void *pnv_chip_power11_intc_get(PnvChip *chip)
+{
+ return &PNV11_CHIP(chip)->xive;
+}
+
/*
* Allowed core identifiers on a POWER8 Processor Chip :
*
@@ -1451,6 +1573,8 @@ static void pnv_chip_power10_intc_print_info(PnvChip *chip, PowerPCCPU *cpu,
#define POWER10_CORE_MASK (0xffffffffffffffull)
+#define POWER11_CORE_MASK (0xffffffffffffffull)
+
static void pnv_chip_power8_instance_init(Object *obj)
{
Pnv8Chip *chip8 = PNV8_CHIP(obj);
@@ -1794,12 +1918,83 @@ static void pnv_chip_power9_pec_realize(PnvChip *chip, Error **errp)
}
}
+static uint64_t pnv_handle_sprd_load(CPUPPCState *env)
+{
+ PowerPCCPU *cpu = env_archcpu(env);
+ PnvCore *pc = pnv_cpu_state(cpu)->pnv_core;
+ uint64_t sprc = env->spr[SPR_POWER_SPRC];
+
+ if (pc->big_core) {
+ pc = pnv_chip_find_core(pc->chip, CPU_CORE(pc)->core_id & ~0x1);
+ }
+
+ switch (sprc & 0x3e0) {
+ case 0: /* SCRATCH0-3 */
+ case 1: /* SCRATCH4-7 */
+ return pc->scratch[(sprc >> 3) & 0x7];
+
+ case 0x1e0: /* core thread state */
+ if (env->excp_model == POWERPC_EXCP_POWER9) {
+ /*
+ * Only implement for POWER9 because skiboot uses it to check
+ * big-core mode. Other bits are unimplemented so we would
+ * prefer to get unimplemented message on POWER10 if it were
+ * used anywhere.
+ */
+ if (pc->big_core) {
+ return PPC_BIT(63);
+ } else {
+ return 0;
+ }
+ }
+ /* fallthru */
+
+ default:
+ qemu_log_mask(LOG_UNIMP, "mfSPRD: Unimplemented SPRC:0x"
+ TARGET_FMT_lx"\n", sprc);
+ break;
+ }
+ return 0;
+}
+
+static void pnv_handle_sprd_store(CPUPPCState *env, uint64_t val)
+{
+ PowerPCCPU *cpu = env_archcpu(env);
+ uint64_t sprc = env->spr[SPR_POWER_SPRC];
+ PnvCore *pc = pnv_cpu_state(cpu)->pnv_core;
+ int nr;
+
+ if (pc->big_core) {
+ pc = pnv_chip_find_core(pc->chip, CPU_CORE(pc)->core_id & ~0x1);
+ }
+
+ switch (sprc & 0x3e0) {
+ case 0: /* SCRATCH0-3 */
+ case 1: /* SCRATCH4-7 */
+ /*
+ * Log stores to SCRATCH, because some firmware uses these for
+ * debugging and logging, but they would normally be read by the BMC,
+ * which is not implemented in QEMU yet. This gives a way to get at the
+ * information. Could also dump these upon checkstop.
+ */
+ nr = (sprc >> 3) & 0x7;
+ pc->scratch[nr] = val;
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "mtSPRD: Unimplemented SPRC:0x"
+ TARGET_FMT_lx"\n", sprc);
+ break;
+ }
+}
+
static void pnv_chip_power9_realize(DeviceState *dev, Error **errp)
{
PnvChipClass *pcc = PNV_CHIP_GET_CLASS(dev);
Pnv9Chip *chip9 = PNV9_CHIP(dev);
PnvChip *chip = PNV_CHIP(dev);
Pnv9Psi *psi9 = &chip9->psi;
+ PowerPCCPU *cpu;
+ PowerPCCPUClass *cpu_class;
Error *local_err = NULL;
int i;
@@ -1827,6 +2022,12 @@ static void pnv_chip_power9_realize(DeviceState *dev, Error **errp)
return;
}
+ /* Set handlers for Special registers, such as SPRD */
+ cpu = chip->cores[0]->threads[0];
+ cpu_class = POWERPC_CPU_GET_CLASS(cpu);
+ cpu_class->load_sprd = pnv_handle_sprd_load;
+ cpu_class->store_sprd = pnv_handle_sprd_store;
+
/* XIVE interrupt controller (POWER9) */
object_property_set_int(OBJECT(&chip9->xive), "ic-bar",
PNV9_XIVE_IC_BASE(chip), &error_fatal);
@@ -2078,6 +2279,8 @@ static void pnv_chip_power10_realize(DeviceState *dev, Error **errp)
PnvChipClass *pcc = PNV_CHIP_GET_CLASS(dev);
PnvChip *chip = PNV_CHIP(dev);
Pnv10Chip *chip10 = PNV10_CHIP(dev);
+ PowerPCCPU *cpu;
+ PowerPCCPUClass *cpu_class;
Error *local_err = NULL;
int i;
@@ -2105,6 +2308,12 @@ static void pnv_chip_power10_realize(DeviceState *dev, Error **errp)
return;
}
+ /* Set handlers for Special registers, such as SPRD */
+ cpu = chip->cores[0]->threads[0];
+ cpu_class = POWERPC_CPU_GET_CLASS(cpu);
+ cpu_class->load_sprd = pnv_handle_sprd_load;
+ cpu_class->store_sprd = pnv_handle_sprd_store;
+
/* XIVE2 interrupt controller (POWER10) */
object_property_set_int(OBJECT(&chip10->xive), "ic-bar",
PNV10_XIVE2_IC_BASE(chip), &error_fatal);
@@ -2264,6 +2473,302 @@ static void pnv_chip_power10_realize(DeviceState *dev, Error **errp)
}
}
+static void pnv_chip_power11_instance_init(Object *obj)
+{
+ PnvChip *chip = PNV_CHIP(obj);
+ Pnv11Chip *chip11 = PNV11_CHIP(obj);
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj);
+ int i;
+
+ object_initialize_child(obj, "adu", &chip11->adu, TYPE_PNV_ADU);
+
+ /*
+ * Use Power10 device models for PSI/LPC/OCC/SBE/HOMER as corresponding
+ * device models for Power11 are same
+ */
+ object_initialize_child(obj, "psi", &chip11->psi, TYPE_PNV10_PSI);
+ object_initialize_child(obj, "lpc", &chip11->lpc, TYPE_PNV10_LPC);
+ object_initialize_child(obj, "occ", &chip11->occ, TYPE_PNV10_OCC);
+ object_initialize_child(obj, "sbe", &chip11->sbe, TYPE_PNV10_SBE);
+ object_initialize_child(obj, "homer", &chip11->homer, TYPE_PNV10_HOMER);
+
+ object_initialize_child(obj, "xive", &chip11->xive, TYPE_PNV_XIVE2);
+ object_property_add_alias(obj, "xive-fabric", OBJECT(&chip11->xive),
+ "xive-fabric");
+ object_initialize_child(obj, "chiptod", &chip11->chiptod,
+ TYPE_PNV11_CHIPTOD);
+ object_initialize_child(obj, "n1-chiplet", &chip11->n1_chiplet,
+ TYPE_PNV_N1_CHIPLET);
+
+ chip->num_pecs = pcc->num_pecs;
+
+ for (i = 0; i < chip->num_pecs; i++) {
+ object_initialize_child(obj, "pec[*]", &chip11->pecs[i],
+ TYPE_PNV_PHB5_PEC);
+ }
+
+ for (i = 0; i < pcc->i2c_num_engines; i++) {
+ object_initialize_child(obj, "i2c[*]", &chip11->i2c[i], TYPE_PNV_I2C);
+ }
+
+ for (i = 0; i < PNV10_CHIP_MAX_PIB_SPIC; i++) {
+ object_initialize_child(obj, "pib_spic[*]", &chip11->pib_spic[i],
+ TYPE_PNV_SPI);
+ }
+}
+
+static void pnv_chip_power11_quad_realize(Pnv11Chip *chip11, Error **errp)
+{
+ PnvChip *chip = PNV_CHIP(chip11);
+ int i;
+
+ chip11->nr_quads = DIV_ROUND_UP(chip->nr_cores, 4);
+ chip11->quads = g_new0(PnvQuad, chip11->nr_quads);
+
+ for (i = 0; i < chip11->nr_quads; i++) {
+ PnvQuad *eq = &chip11->quads[i];
+
+ pnv_chip_quad_realize_one(chip, eq, chip->cores[i * 4],
+ PNV_QUAD_TYPE_NAME("power11"));
+
+ pnv_xscom_add_subregion(chip, PNV11_XSCOM_EQ_BASE(eq->quad_id),
+ &eq->xscom_regs);
+
+ pnv_xscom_add_subregion(chip, PNV11_XSCOM_QME_BASE(eq->quad_id),
+ &eq->xscom_qme_regs);
+ }
+}
+
+static void pnv_chip_power11_phb_realize(PnvChip *chip, Error **errp)
+{
+ Pnv11Chip *chip11 = PNV11_CHIP(chip);
+ int i;
+
+ for (i = 0; i < chip->num_pecs; i++) {
+ PnvPhb4PecState *pec = &chip11->pecs[i];
+ PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec);
+ uint32_t pec_cplt_base;
+ uint32_t pec_nest_base;
+ uint32_t pec_pci_base;
+
+ object_property_set_int(OBJECT(pec), "index", i, &error_fatal);
+ object_property_set_int(OBJECT(pec), "chip-id", chip->chip_id,
+ &error_fatal);
+ object_property_set_link(OBJECT(pec), "chip", OBJECT(chip),
+ &error_fatal);
+ if (!qdev_realize(DEVICE(pec), NULL, errp)) {
+ return;
+ }
+
+ pec_cplt_base = pecc->xscom_cplt_base(pec);
+ pec_nest_base = pecc->xscom_nest_base(pec);
+ pec_pci_base = pecc->xscom_pci_base(pec);
+
+ pnv_xscom_add_subregion(chip, pec_cplt_base,
+ &pec->nest_pervasive.xscom_ctrl_regs_mr);
+ pnv_xscom_add_subregion(chip, pec_nest_base, &pec->nest_regs_mr);
+ pnv_xscom_add_subregion(chip, pec_pci_base, &pec->pci_regs_mr);
+ }
+}
+
+static void pnv_chip_power11_realize(DeviceState *dev, Error **errp)
+{
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(dev);
+ PnvChip *chip = PNV_CHIP(dev);
+ Pnv11Chip *chip11 = PNV11_CHIP(dev);
+ PowerPCCPU *cpu;
+ PowerPCCPUClass *cpu_class;
+ Error *local_err = NULL;
+ int i;
+
+ /* XSCOM bridge is first */
+ pnv_xscom_init(chip, PNV11_XSCOM_SIZE, PNV11_XSCOM_BASE(chip));
+
+ pcc->parent_realize(dev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /* Set handlers for Special registers, such as SPRD */
+ cpu = chip->cores[0]->threads[0];
+ cpu_class = POWERPC_CPU_GET_CLASS(cpu);
+ cpu_class->load_sprd = pnv_handle_sprd_load;
+ cpu_class->store_sprd = pnv_handle_sprd_store;
+
+ /* ADU */
+ object_property_set_link(OBJECT(&chip11->adu), "lpc", OBJECT(&chip11->lpc),
+ &error_abort);
+ if (!qdev_realize(DEVICE(&chip11->adu), NULL, errp)) {
+ return;
+ }
+ pnv_xscom_add_subregion(chip, PNV11_XSCOM_ADU_BASE,
+ &chip11->adu.xscom_regs);
+
+ pnv_chip_power11_quad_realize(chip11, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /* XIVE2 interrupt controller */
+ object_property_set_int(OBJECT(&chip11->xive), "ic-bar",
+ PNV11_XIVE2_IC_BASE(chip), &error_fatal);
+ object_property_set_int(OBJECT(&chip11->xive), "esb-bar",
+ PNV11_XIVE2_ESB_BASE(chip), &error_fatal);
+ object_property_set_int(OBJECT(&chip11->xive), "end-bar",
+ PNV11_XIVE2_END_BASE(chip), &error_fatal);
+ object_property_set_int(OBJECT(&chip11->xive), "nvpg-bar",
+ PNV11_XIVE2_NVPG_BASE(chip), &error_fatal);
+ object_property_set_int(OBJECT(&chip11->xive), "nvc-bar",
+ PNV11_XIVE2_NVC_BASE(chip), &error_fatal);
+ object_property_set_int(OBJECT(&chip11->xive), "tm-bar",
+ PNV11_XIVE2_TM_BASE(chip), &error_fatal);
+ object_property_set_link(OBJECT(&chip11->xive), "chip", OBJECT(chip),
+ &error_abort);
+ if (!sysbus_realize(SYS_BUS_DEVICE(&chip11->xive), errp)) {
+ return;
+ }
+ pnv_xscom_add_subregion(chip, PNV11_XSCOM_XIVE2_BASE,
+ &chip11->xive.xscom_regs);
+
+ /* Processor Service Interface (PSI) Host Bridge */
+ object_property_set_int(OBJECT(&chip11->psi), "bar",
+ PNV11_PSIHB_BASE(chip), &error_fatal);
+ /* PSI can be configured to use 64k ESB pages on Power11 */
+ object_property_set_int(OBJECT(&chip11->psi), "shift", XIVE_ESB_64K,
+ &error_fatal);
+ if (!qdev_realize(DEVICE(&chip11->psi), NULL, errp)) {
+ return;
+ }
+ pnv_xscom_add_subregion(chip, PNV11_XSCOM_PSIHB_BASE,
+ &PNV_PSI(&chip11->psi)->xscom_regs);
+
+ /* LPC */
+ if (!qdev_realize(DEVICE(&chip11->lpc), NULL, errp)) {
+ return;
+ }
+ memory_region_add_subregion(get_system_memory(), PNV11_LPCM_BASE(chip),
+ &chip11->lpc.xscom_regs);
+
+ chip->fw_mr = &chip11->lpc.isa_fw;
+ chip->dt_isa_nodename = g_strdup_printf("/lpcm-opb@%" PRIx64 "/lpc@0",
+ (uint64_t) PNV11_LPCM_BASE(chip));
+
+ /* ChipTOD */
+ object_property_set_bool(OBJECT(&chip11->chiptod), "primary",
+ chip->chip_id == 0, &error_abort);
+ object_property_set_bool(OBJECT(&chip11->chiptod), "secondary",
+ chip->chip_id == 1, &error_abort);
+ object_property_set_link(OBJECT(&chip11->chiptod), "chip", OBJECT(chip),
+ &error_abort);
+ if (!qdev_realize(DEVICE(&chip11->chiptod), NULL, errp)) {
+ return;
+ }
+ pnv_xscom_add_subregion(chip, PNV11_XSCOM_CHIPTOD_BASE,
+ &chip11->chiptod.xscom_regs);
+
+ /* HOMER (must be created before OCC) */
+ object_property_set_link(OBJECT(&chip11->homer), "chip", OBJECT(chip),
+ &error_abort);
+ if (!qdev_realize(DEVICE(&chip11->homer), NULL, errp)) {
+ return;
+ }
+ /* Homer Xscom region */
+ pnv_xscom_add_subregion(chip, PNV11_XSCOM_PBA_BASE,
+ &chip11->homer.pba_regs);
+ /* Homer RAM region */
+ memory_region_add_subregion(get_system_memory(), chip11->homer.base,
+ &chip11->homer.mem);
+
+ /* Create the simplified OCC model */
+ object_property_set_link(OBJECT(&chip11->occ), "homer",
+ OBJECT(&chip11->homer), &error_abort);
+ if (!qdev_realize(DEVICE(&chip11->occ), NULL, errp)) {
+ return;
+ }
+ pnv_xscom_add_subregion(chip, PNV11_XSCOM_OCC_BASE,
+ &chip11->occ.xscom_regs);
+ qdev_connect_gpio_out(DEVICE(&chip11->occ), 0, qdev_get_gpio_in(
+ DEVICE(&chip11->psi), PSIHB9_IRQ_OCC));
+
+ /* OCC SRAM model */
+ memory_region_add_subregion(get_system_memory(),
+ PNV11_OCC_SENSOR_BASE(chip),
+ &chip11->occ.sram_regs);
+
+ /* SBE */
+ if (!qdev_realize(DEVICE(&chip11->sbe), NULL, errp)) {
+ return;
+ }
+ pnv_xscom_add_subregion(chip, PNV11_XSCOM_SBE_CTRL_BASE,
+ &chip11->sbe.xscom_ctrl_regs);
+ pnv_xscom_add_subregion(chip, PNV11_XSCOM_SBE_MBOX_BASE,
+ &chip11->sbe.xscom_mbox_regs);
+ qdev_connect_gpio_out(DEVICE(&chip11->sbe), 0, qdev_get_gpio_in(
+ DEVICE(&chip11->psi), PSIHB9_IRQ_PSU));
+
+ /* N1 chiplet */
+ if (!qdev_realize(DEVICE(&chip11->n1_chiplet), NULL, errp)) {
+ return;
+ }
+ pnv_xscom_add_subregion(chip, PNV11_XSCOM_N1_CHIPLET_CTRL_REGS_BASE,
+ &chip11->n1_chiplet.nest_pervasive.xscom_ctrl_regs_mr);
+
+ pnv_xscom_add_subregion(chip, PNV11_XSCOM_N1_PB_SCOM_EQ_BASE,
+ &chip11->n1_chiplet.xscom_pb_eq_mr);
+
+ pnv_xscom_add_subregion(chip, PNV11_XSCOM_N1_PB_SCOM_ES_BASE,
+ &chip11->n1_chiplet.xscom_pb_es_mr);
+
+ /* PHBs */
+ pnv_chip_power11_phb_realize(chip, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /*
+ * I2C
+ */
+ for (i = 0; i < pcc->i2c_num_engines; i++) {
+ Object *obj = OBJECT(&chip11->i2c[i]);
+
+ object_property_set_int(obj, "engine", i + 1, &error_fatal);
+ object_property_set_int(obj, "num-busses",
+ pcc->i2c_ports_per_engine[i],
+ &error_fatal);
+ object_property_set_link(obj, "chip", OBJECT(chip), &error_abort);
+ if (!qdev_realize(DEVICE(obj), NULL, errp)) {
+ return;
+ }
+ pnv_xscom_add_subregion(chip, PNV11_XSCOM_I2CM_BASE +
+ (chip11->i2c[i].engine - 1) *
+ PNV11_XSCOM_I2CM_SIZE,
+ &chip11->i2c[i].xscom_regs);
+ qdev_connect_gpio_out(DEVICE(&chip11->i2c[i]), 0,
+ qdev_get_gpio_in(DEVICE(&chip11->psi),
+ PSIHB9_IRQ_SBE_I2C));
+ }
+ /* PIB SPI Controller */
+ for (i = 0; i < PNV10_CHIP_MAX_PIB_SPIC; i++) {
+ object_property_set_int(OBJECT(&chip11->pib_spic[i]), "spic_num",
+ i, &error_fatal);
+ /* pib_spic[2] connected to 25csm04 which implements 1 byte transfer */
+ object_property_set_int(OBJECT(&chip11->pib_spic[i]), "transfer_len",
+ (i == 2) ? 1 : 4, &error_fatal);
+ object_property_set_int(OBJECT(&chip11->pib_spic[i]), "chip-id",
+ chip->chip_id, &error_fatal);
+ if (!sysbus_realize(SYS_BUS_DEVICE(OBJECT
+ (&chip11->pib_spic[i])), errp)) {
+ return;
+ }
+ pnv_xscom_add_subregion(chip, PNV11_XSCOM_PIB_SPIC_BASE +
+ i * PNV11_XSCOM_PIB_SPIC_SIZE,
+ &chip11->pib_spic[i].xscom_spic_regs);
+ }
+}
+
static void pnv_rainier_i2c_init(PnvMachineState *pnv)
{
int i;
@@ -2315,6 +2820,7 @@ static void pnv_chip_power10_class_init(ObjectClass *klass, const void *data)
k->intc_reset = pnv_chip_power10_intc_reset;
k->intc_destroy = pnv_chip_power10_intc_destroy;
k->intc_print_info = pnv_chip_power10_intc_print_info;
+ k->intc_get = pnv_chip_power10_intc_get;
k->isa_create = pnv_chip_power10_isa_create;
k->dt_populate = pnv_chip_power10_dt_populate;
k->pic_print_info = pnv_chip_power10_pic_print_info;
@@ -2329,6 +2835,40 @@ static void pnv_chip_power10_class_init(ObjectClass *klass, const void *data)
&k->parent_realize);
}
+static uint32_t pnv_chip_power11_xscom_pcba(PnvChip *chip, uint64_t addr)
+{
+ addr &= (PNV11_XSCOM_SIZE - 1);
+ return addr >> 3;
+}
+
+static void pnv_chip_power11_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PnvChipClass *k = PNV_CHIP_CLASS(klass);
+ static const int i2c_ports_per_engine[PNV10_CHIP_MAX_I2C] = {14, 14, 2, 16};
+
+ k->chip_cfam_id = 0x220da04980000000ull; /* P11 DD2.0 (with NX) */
+ k->cores_mask = POWER11_CORE_MASK;
+ k->get_pir_tir = pnv_get_pir_tir_p10;
+ k->intc_create = pnv_chip_power11_intc_create;
+ k->intc_reset = pnv_chip_power11_intc_reset;
+ k->intc_destroy = pnv_chip_power11_intc_destroy;
+ k->intc_print_info = pnv_chip_power11_intc_print_info;
+ k->intc_get = pnv_chip_power11_intc_get;
+ k->isa_create = pnv_chip_power11_isa_create;
+ k->dt_populate = pnv_chip_power11_dt_populate;
+ k->pic_print_info = pnv_chip_power11_pic_print_info;
+ k->xscom_core_base = pnv_chip_power11_xscom_core_base;
+ k->xscom_pcba = pnv_chip_power11_xscom_pcba;
+ dc->desc = "PowerNV Chip Power11";
+ k->num_pecs = PNV10_CHIP_MAX_PEC;
+ k->i2c_num_engines = PNV10_CHIP_MAX_I2C;
+ k->i2c_ports_per_engine = i2c_ports_per_engine;
+
+ device_class_set_parent_realize(dc, pnv_chip_power11_realize,
+ &k->parent_realize);
+}
+
static void pnv_chip_core_sanitize(PnvMachineState *pnv, PnvChip *chip,
Error **errp)
{
@@ -2608,65 +3148,88 @@ static void pnv_pic_print_info(InterruptStatsProvider *obj, GString *buf)
}
}
-static int pnv_match_nvt(XiveFabric *xfb, uint8_t format,
- uint8_t nvt_blk, uint32_t nvt_idx,
- bool crowd, bool cam_ignore, uint8_t priority,
- uint32_t logic_serv,
- XiveTCTXMatch *match)
+static bool pnv_match_nvt(XiveFabric *xfb, uint8_t format,
+ uint8_t nvt_blk, uint32_t nvt_idx,
+ bool crowd, bool cam_ignore, uint8_t priority,
+ uint32_t logic_serv,
+ XiveTCTXMatch *match)
{
PnvMachineState *pnv = PNV_MACHINE(xfb);
- int total_count = 0;
int i;
for (i = 0; i < pnv->num_chips; i++) {
Pnv9Chip *chip9 = PNV9_CHIP(pnv->chips[i]);
XivePresenter *xptr = XIVE_PRESENTER(&chip9->xive);
XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
- int count;
- count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd,
- cam_ignore, priority, logic_serv, match);
+ xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd,
+ cam_ignore, priority, logic_serv, match);
+ }
- if (count < 0) {
- return count;
- }
+ return !!match->count;
+}
+
+static bool pnv10_xive_match_nvt(XiveFabric *xfb, uint8_t format,
+ uint8_t nvt_blk, uint32_t nvt_idx,
+ bool crowd, bool cam_ignore, uint8_t priority,
+ uint32_t logic_serv,
+ XiveTCTXMatch *match)
+{
+ PnvMachineState *pnv = PNV_MACHINE(xfb);
+ int i;
+
+ for (i = 0; i < pnv->num_chips; i++) {
+ Pnv10Chip *chip10 = PNV10_CHIP(pnv->chips[i]);
+ XivePresenter *xptr = XIVE_PRESENTER(&chip10->xive);
+ XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
- total_count += count;
+ xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd,
+ cam_ignore, priority, logic_serv, match);
}
- return total_count;
+ return !!match->count;
}
-static int pnv10_xive_match_nvt(XiveFabric *xfb, uint8_t format,
+static int pnv10_xive_broadcast(XiveFabric *xfb,
uint8_t nvt_blk, uint32_t nvt_idx,
- bool crowd, bool cam_ignore, uint8_t priority,
- uint32_t logic_serv,
- XiveTCTXMatch *match)
+ bool crowd, bool cam_ignore,
+ uint8_t priority)
{
PnvMachineState *pnv = PNV_MACHINE(xfb);
- int total_count = 0;
int i;
for (i = 0; i < pnv->num_chips; i++) {
Pnv10Chip *chip10 = PNV10_CHIP(pnv->chips[i]);
XivePresenter *xptr = XIVE_PRESENTER(&chip10->xive);
XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
- int count;
- count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd,
- cam_ignore, priority, logic_serv, match);
+ xpc->broadcast(xptr, nvt_blk, nvt_idx, crowd, cam_ignore, priority);
+ }
+ return 0;
+}
- if (count < 0) {
- return count;
- }
+static bool pnv11_xive_match_nvt(XiveFabric *xfb, uint8_t format,
+ uint8_t nvt_blk, uint32_t nvt_idx,
+ bool crowd, bool cam_ignore, uint8_t priority,
+ uint32_t logic_serv,
+ XiveTCTXMatch *match)
+{
+ PnvMachineState *pnv = PNV_MACHINE(xfb);
+ int i;
+
+ for (i = 0; i < pnv->num_chips; i++) {
+ Pnv11Chip *chip11 = PNV11_CHIP(pnv->chips[i]);
+ XivePresenter *xptr = XIVE_PRESENTER(&chip11->xive);
+ XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
- total_count += count;
+ xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd,
+ cam_ignore, priority, logic_serv, match);
}
- return total_count;
+ return !!match->count;
}
-static int pnv10_xive_broadcast(XiveFabric *xfb,
+static int pnv11_xive_broadcast(XiveFabric *xfb,
uint8_t nvt_blk, uint32_t nvt_idx,
bool crowd, bool cam_ignore,
uint8_t priority)
@@ -2675,8 +3238,8 @@ static int pnv10_xive_broadcast(XiveFabric *xfb,
int i;
for (i = 0; i < pnv->num_chips; i++) {
- Pnv10Chip *chip10 = PNV10_CHIP(pnv->chips[i]);
- XivePresenter *xptr = XIVE_PRESENTER(&chip10->xive);
+ Pnv11Chip *chip11 = PNV11_CHIP(pnv->chips[i]);
+ XivePresenter *xptr = XIVE_PRESENTER(&chip11->xive);
XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
xpc->broadcast(xptr, nvt_blk, nvt_idx, crowd, cam_ignore, priority);
@@ -2858,6 +3421,46 @@ static void pnv_machine_p10_rainier_class_init(ObjectClass *oc,
pmc->i2c_init = pnv_rainier_i2c_init;
}
+static void pnv_machine_power11_class_init(ObjectClass *oc, const void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ PnvMachineClass *pmc = PNV_MACHINE_CLASS(oc);
+ XiveFabricClass *xfc = XIVE_FABRIC_CLASS(oc);
+ static const char compat[] = "qemu,powernv11\0ibm,powernv";
+
+ static GlobalProperty phb_compat[] = {
+ { TYPE_PNV_PHB, "version", "5" },
+ { TYPE_PNV_PHB_ROOT_PORT, "version", "5" },
+ };
+
+ compat_props_add(mc->compat_props, phb_compat, G_N_ELEMENTS(phb_compat));
+
+ pmc->compat = compat;
+ pmc->compat_size = sizeof(compat);
+ pmc->max_smt_threads = 4;
+ pmc->has_lpar_per_thread = true;
+ pmc->quirk_tb_big_core = true;
+ pmc->dt_power_mgt = pnv_dt_power_mgt;
+
+ xfc->match_nvt = pnv11_xive_match_nvt;
+ xfc->broadcast = pnv11_xive_broadcast;
+
+ mc->desc = "IBM PowerNV (Non-Virtualized) Power11";
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power11_v2.0");
+
+ object_class_property_add_bool(oc, "big-core",
+ pnv_machine_get_big_core,
+ pnv_machine_set_big_core);
+ object_class_property_set_description(oc, "big-core",
+ "Use big-core (aka fused-core) mode");
+
+ object_class_property_add_bool(oc, "lpar-per-core",
+ pnv_machine_get_lpar_per_core,
+ pnv_machine_set_lpar_per_core);
+ object_class_property_set_description(oc, "lpar-per-core",
+ "Use 1 LPAR per core mode");
+}
+
static void pnv_cpu_do_nmi_on_cpu(CPUState *cs, run_on_cpu_data arg)
{
CPUPPCState *env = cpu_env(cs);
@@ -2963,8 +3566,24 @@ static void pnv_machine_class_init(ObjectClass *oc, const void *data)
.parent = TYPE_PNV10_CHIP, \
}
+#define DEFINE_PNV11_CHIP_TYPE(type, class_initfn) \
+ { \
+ .name = type, \
+ .class_init = class_initfn, \
+ .parent = TYPE_PNV11_CHIP, \
+ }
+
static const TypeInfo types[] = {
{
+ .name = MACHINE_TYPE_NAME("powernv11"),
+ .parent = TYPE_PNV_MACHINE,
+ .class_init = pnv_machine_power11_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_XIVE_FABRIC },
+ { },
+ },
+ },
+ {
.name = MACHINE_TYPE_NAME("powernv10-rainier"),
.parent = MACHINE_TYPE_NAME("powernv10"),
.class_init = pnv_machine_p10_rainier_class_init,
@@ -3019,6 +3638,17 @@ static const TypeInfo types[] = {
},
/*
+ * P11 chip and variants
+ */
+ {
+ .name = TYPE_PNV11_CHIP,
+ .parent = TYPE_PNV_CHIP,
+ .instance_init = pnv_chip_power11_instance_init,
+ .instance_size = sizeof(Pnv11Chip),
+ },
+ DEFINE_PNV11_CHIP_TYPE(TYPE_PNV_CHIP_POWER11, pnv_chip_power11_class_init),
+
+ /*
* P10 chip and variants
*/
{
diff --git a/hw/ppc/pnv_chiptod.c b/hw/ppc/pnv_chiptod.c
index b9e9c7b..f887a18 100644
--- a/hw/ppc/pnv_chiptod.c
+++ b/hw/ppc/pnv_chiptod.c
@@ -210,6 +210,22 @@ static void chiptod_power10_broadcast_ttype(PnvChipTOD *sender,
}
}
+static void chiptod_power11_broadcast_ttype(PnvChipTOD *sender,
+ uint32_t trigger)
+{
+ PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
+ int i;
+
+ for (i = 0; i < pnv->num_chips; i++) {
+ Pnv11Chip *chip11 = PNV11_CHIP(pnv->chips[i]);
+ PnvChipTOD *chiptod = &chip11->chiptod;
+
+ if (chiptod != sender) {
+ chiptod_receive_ttype(chiptod, trigger);
+ }
+ }
+}
+
static PnvCore *pnv_chip_get_core_by_xscom_base(PnvChip *chip,
uint32_t xscom_base)
{
@@ -283,6 +299,12 @@ static PnvCore *chiptod_power10_tx_ttype_target(PnvChipTOD *chiptod,
}
}
+static PnvCore *chiptod_power11_tx_ttype_target(PnvChipTOD *chiptod,
+ uint64_t val)
+{
+ return chiptod_power10_tx_ttype_target(chiptod, val);
+}
+
static void pnv_chiptod_xscom_write(void *opaque, hwaddr addr,
uint64_t val, unsigned size)
{
@@ -520,6 +542,42 @@ static const TypeInfo pnv_chiptod_power10_type_info = {
}
};
+static int pnv_chiptod_power11_dt_xscom(PnvXScomInterface *dev, void *fdt,
+ int xscom_offset)
+{
+ const char compat[] = "ibm,power-chiptod\0ibm,power11-chiptod";
+
+ return pnv_chiptod_dt_xscom(dev, fdt, xscom_offset, compat, sizeof(compat));
+}
+
+static void pnv_chiptod_power11_class_init(ObjectClass *klass, const void *data)
+{
+ PnvChipTODClass *pctc = PNV_CHIPTOD_CLASS(klass);
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass);
+
+ dc->desc = "PowerNV ChipTOD Controller (Power11)";
+ device_class_set_props(dc, pnv_chiptod_properties);
+
+ xdc->dt_xscom = pnv_chiptod_power11_dt_xscom;
+
+ pctc->broadcast_ttype = chiptod_power11_broadcast_ttype;
+ pctc->tx_ttype_target = chiptod_power11_tx_ttype_target;
+
+ pctc->xscom_size = PNV_XSCOM_CHIPTOD_SIZE;
+}
+
+static const TypeInfo pnv_chiptod_power11_type_info = {
+ .name = TYPE_PNV11_CHIPTOD,
+ .parent = TYPE_PNV_CHIPTOD,
+ .instance_size = sizeof(PnvChipTOD),
+ .class_init = pnv_chiptod_power11_class_init,
+ .interfaces = (const InterfaceInfo[]) {
+ { TYPE_PNV_XSCOM_INTERFACE },
+ { }
+ }
+};
+
static void pnv_chiptod_reset(void *dev)
{
PnvChipTOD *chiptod = PNV_CHIPTOD(dev);
@@ -579,6 +637,7 @@ static void pnv_chiptod_register_types(void)
type_register_static(&pnv_chiptod_type_info);
type_register_static(&pnv_chiptod_power9_type_info);
type_register_static(&pnv_chiptod_power10_type_info);
+ type_register_static(&pnv_chiptod_power11_type_info);
}
type_init(pnv_chiptod_register_types);
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index 08c2022..fb2dfc7 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -473,6 +473,11 @@ static void pnv_core_power10_class_init(ObjectClass *oc, const void *data)
pcc->xscom_size = PNV10_XSCOM_EC_SIZE;
}
+static void pnv_core_power11_class_init(ObjectClass *oc, const void *data)
+{
+ pnv_core_power10_class_init(oc, data);
+}
+
static void pnv_core_class_init(ObjectClass *oc, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
@@ -504,6 +509,7 @@ static const TypeInfo pnv_core_infos[] = {
DEFINE_PNV_CORE_TYPE(power8, "power8nvl_v1.0"),
DEFINE_PNV_CORE_TYPE(power9, "power9_v2.2"),
DEFINE_PNV_CORE_TYPE(power10, "power10_v2.0"),
+ DEFINE_PNV_CORE_TYPE(power11, "power11_v2.0"),
};
DEFINE_TYPES(pnv_core_infos)
@@ -725,6 +731,12 @@ static void pnv_quad_power10_class_init(ObjectClass *oc, const void *data)
pqc->xscom_qme_size = PNV10_XSCOM_QME_SIZE;
}
+static void pnv_quad_power11_class_init(ObjectClass *oc, const void *data)
+{
+ /* Power11 quad is similar to Power10 quad */
+ pnv_quad_power10_class_init(oc, data);
+}
+
static void pnv_quad_class_init(ObjectClass *oc, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
@@ -752,6 +764,11 @@ static const TypeInfo pnv_quad_infos[] = {
.name = PNV_QUAD_TYPE_NAME("power10"),
.class_init = pnv_quad_power10_class_init,
},
+ {
+ .parent = TYPE_PNV_QUAD,
+ .name = PNV_QUAD_TYPE_NAME("power11"),
+ .class_init = pnv_quad_power11_class_init,
+ },
};
DEFINE_TYPES(pnv_quad_infos);
diff --git a/hw/ppc/pnv_occ.c b/hw/ppc/pnv_occ.c
index fa6f31c..24b789c 100644
--- a/hw/ppc/pnv_occ.c
+++ b/hw/ppc/pnv_occ.c
@@ -789,7 +789,7 @@ static bool occ_opal_process_command(PnvOCC *occ,
static bool occ_model_tick(PnvOCC *occ)
{
- struct occ_dynamic_data dynamic_data;
+ QEMU_UNINITIALIZED struct occ_dynamic_data dynamic_data;
if (!occ_read_dynamic_data(occ, &dynamic_data, NULL)) {
/* Can't move OCC state field to safe because we can't map it! */
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
index 43d0d0e..3e436c7 100644
--- a/hw/ppc/ppc.c
+++ b/hw/ppc/ppc.c
@@ -190,6 +190,7 @@ static void ppc970_set_irq(void *opaque, int pin, int level)
if (level) {
trace_ppc_irq_cpu("stop");
cs->halted = 1;
+ cpu_exit(cs);
} else {
trace_ppc_irq_cpu("restart");
cs->halted = 0;
@@ -386,6 +387,7 @@ static void ppc40x_set_irq(void *opaque, int pin, int level)
if (level) {
trace_ppc_irq_cpu("stop");
cs->halted = 1;
+ cpu_exit(cs);
} else {
trace_ppc_irq_cpu("restart");
cs->halted = 0;
diff --git a/hw/ppc/ppc_booke.c b/hw/ppc/ppc_booke.c
index 3872ae2..13403a5 100644
--- a/hw/ppc/ppc_booke.c
+++ b/hw/ppc/ppc_booke.c
@@ -352,7 +352,12 @@ void ppc_booke_timers_init(PowerPCCPU *cpu, uint32_t freq, uint32_t flags)
booke_timer = g_new0(booke_timer_t, 1);
cpu->env.tb_env = tb_env;
- tb_env->flags = flags | PPC_TIMER_BOOKE | PPC_DECR_ZERO_TRIGGERED;
+ if (flags & PPC_TIMER_PPE) {
+ /* PPE's use a modified version of the booke behavior */
+ tb_env->flags = flags | PPC_DECR_UNDERFLOW_TRIGGERED;
+ } else {
+ tb_env->flags = flags | PPC_TIMER_BOOKE | PPC_DECR_ZERO_TRIGGERED;
+ }
tb_env->tb_freq = freq;
tb_env->decr_freq = freq;
diff --git a/hw/ppc/ppce500_spin.c b/hw/ppc/ppce500_spin.c
index 2310f62..bc70e50 100644
--- a/hw/ppc/ppce500_spin.c
+++ b/hw/ppc/ppce500_spin.c
@@ -99,8 +99,7 @@ static void spin_kick(CPUState *cs, run_on_cpu_data data)
cs->halted = 0;
cs->exception_index = -1;
- cs->stopped = false;
- qemu_cpu_kick(cs);
+ cpu_resume(cs);
}
static void spin_write(void *opaque, hwaddr addr, uint64_t value,
diff --git a/hw/ppc/ppe42_machine.c b/hw/ppc/ppe42_machine.c
new file mode 100644
index 0000000..f14a91b
--- /dev/null
+++ b/hw/ppc/ppe42_machine.c
@@ -0,0 +1,101 @@
+/*
+ * Test Machine for the IBM PPE42 processor
+ *
+ * Copyright (c) 2025, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/error-report.h"
+#include "system/address-spaces.h"
+#include "hw/boards.h"
+#include "hw/ppc/ppc.h"
+#include "system/system.h"
+#include "system/reset.h"
+#include "system/kvm.h"
+#include "qapi/error.h"
+
+#define TYPE_PPE42_MACHINE MACHINE_TYPE_NAME("ppe42_machine")
+typedef MachineClass Ppe42MachineClass;
+typedef struct Ppe42MachineState Ppe42MachineState;
+DECLARE_OBJ_CHECKERS(Ppe42MachineState, Ppe42MachineClass,
+ PPE42_MACHINE, TYPE_PPE42_MACHINE)
+
+struct Ppe42MachineState {
+ MachineState parent_obj;
+
+ PowerPCCPU cpu;
+};
+
+static void main_cpu_reset(void *opaque)
+{
+ PowerPCCPU *cpu = opaque;
+
+ cpu_reset(CPU(cpu));
+}
+
+static void ppe42_machine_init(MachineState *machine)
+{
+ Ppe42MachineState *pms = PPE42_MACHINE(machine);
+ PowerPCCPU *cpu = &pms->cpu;
+
+ if (kvm_enabled()) {
+ error_report("machine %s does not support the KVM accelerator",
+ MACHINE_GET_CLASS(machine)->name);
+ exit(EXIT_FAILURE);
+ }
+ if (machine->ram_size > 512 * KiB) {
+ error_report("RAM size more than 512 KiB is not supported");
+ exit(1);
+ }
+
+ /* init CPU */
+ object_initialize_child(OBJECT(pms), "cpu", cpu, machine->cpu_type);
+ if (!qdev_realize(DEVICE(cpu), NULL, &error_fatal)) {
+ return;
+ }
+
+ qemu_register_reset(main_cpu_reset, cpu);
+
+ /* This sets the decrementer timebase */
+ ppc_booke_timers_init(cpu, 37500000, PPC_TIMER_PPE);
+
+ /* RAM */
+ memory_region_add_subregion(get_system_memory(), 0xfff80000, machine->ram);
+}
+
+
+static void ppe42_machine_class_init(ObjectClass *oc, const void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ static const char * const valid_cpu_types[] = {
+ POWERPC_CPU_TYPE_NAME("PPE42"),
+ POWERPC_CPU_TYPE_NAME("PPE42X"),
+ POWERPC_CPU_TYPE_NAME("PPE42XM"),
+ NULL,
+ };
+
+ mc->desc = "PPE42 Test Machine";
+ mc->init = ppe42_machine_init;
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("PPE42XM");
+ mc->valid_cpu_types = valid_cpu_types;
+ mc->default_ram_id = "ram";
+ mc->default_ram_size = 512 * KiB;
+}
+
+static const TypeInfo ppe42_machine_info = {
+ .name = TYPE_PPE42_MACHINE,
+ .parent = TYPE_MACHINE,
+ .instance_size = sizeof(Ppe42MachineState),
+ .class_init = ppe42_machine_class_init,
+ .class_size = sizeof(Ppe42MachineClass),
+};
+
+static void ppe42_machine_register_types(void)
+{
+ type_register_static(&ppe42_machine_info);
+}
+
+type_init(ppe42_machine_register_types);
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index 7395263..982e40e 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -35,6 +35,7 @@
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/log.h"
+#include "qemu/datadir.h"
#include "hw/loader.h"
#include "hw/rtc/mc146818rtc.h"
#include "hw/isa/pc87312.h"
@@ -55,6 +56,8 @@
#define KERNEL_LOAD_ADDR 0x01000000
#define INITRD_LOAD_ADDR 0x01800000
+#define BIOS_ADDR 0xfff00000
+#define BIOS_SIZE (1 * MiB)
#define NVRAM_SIZE 0x2000
static void fw_cfg_boot_set(void *opaque, const char *boot_device,
@@ -241,6 +244,9 @@ static void ibm_40p_init(MachineState *machine)
ISADevice *isa_dev;
ISABus *isa_bus;
void *fw_cfg;
+ MemoryRegion *bios = g_new(MemoryRegion, 1);
+ char *filename;
+ ssize_t bios_size = -1;
uint32_t kernel_base = 0, initrd_base = 0;
long kernel_size = 0, initrd_size = 0;
char boot_device;
@@ -263,10 +269,27 @@ static void ibm_40p_init(MachineState *machine)
cpu_ppc_tb_init(env, 100UL * 1000UL * 1000UL);
qemu_register_reset(ppc_prep_reset, cpu);
+ /* allocate and load firmware */
+ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+ if (!filename) {
+ error_report("Could not find bios image '%s'", bios_name);
+ exit(1);
+ }
+ memory_region_init_rom(bios, NULL, "bios", BIOS_SIZE, &error_fatal);
+ memory_region_add_subregion(get_system_memory(), BIOS_ADDR, bios);
+ bios_size = load_elf(filename, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ ELFDATA2MSB, PPC_ELF_MACHINE, 0, 0);
+ if (bios_size < 0) {
+ bios_size = load_image_targphys(filename, BIOS_ADDR, BIOS_SIZE);
+ }
+ if (bios_size < 0 || bios_size > BIOS_SIZE) {
+ error_report("Could not load bios image '%s'", filename);
+ return;
+ }
+ g_free(filename);
+
/* PCI host */
dev = qdev_new("raven-pcihost");
- qdev_prop_set_string(dev, "bios-name", bios_name);
- qdev_prop_set_uint32(dev, "elf-machine", PPC_ELF_MACHINE);
pcihost = SYS_BUS_DEVICE(dev);
object_property_add_child(qdev_get_machine(), "raven", OBJECT(dev));
sysbus_realize_and_unref(pcihost, &error_fatal);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 702f774..97ab6be 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -77,7 +77,6 @@
#include "hw/virtio/virtio-scsi.h"
#include "hw/virtio/vhost-scsi-common.h"
-#include "system/ram_addr.h"
#include "system/confidential-guest-support.h"
#include "hw/usb.h"
#include "qemu/config-file.h"
@@ -577,7 +576,7 @@ static int spapr_dt_dynamic_memory(SpaprMachineState *spapr, void *fdt,
/*
* Adds ibm,dynamic-reconfiguration-memory node.
- * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation
+ * Refer to docs/specs/ppc-spapr-hotplug.rst for the documentation
* of this device tree node.
*/
static int spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr,
@@ -907,6 +906,7 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
int rtas;
GString *hypertas = g_string_sized_new(256);
GString *qemu_hypertas = g_string_sized_new(256);
+ uint64_t max_device_addr = 0;
uint32_t lrdr_capacity[] = {
0,
0,
@@ -917,13 +917,15 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
/* Do we have device memory? */
if (MACHINE(spapr)->device_memory) {
- uint64_t max_device_addr = MACHINE(spapr)->device_memory->base +
+ max_device_addr = MACHINE(spapr)->device_memory->base +
memory_region_size(&MACHINE(spapr)->device_memory->mr);
-
- lrdr_capacity[0] = cpu_to_be32(max_device_addr >> 32);
- lrdr_capacity[1] = cpu_to_be32(max_device_addr & 0xffffffff);
+ } else if (ms->ram_size == ms->maxram_size) {
+ max_device_addr = ms->ram_size;
}
+ lrdr_capacity[0] = cpu_to_be32(max_device_addr >> 32);
+ lrdr_capacity[1] = cpu_to_be32(max_device_addr & 0xffffffff);
+
_FDT(rtas = fdt_add_subnode(fdt, 0, "rtas"));
/* hypertas */
@@ -2518,7 +2520,7 @@ static void htab_save_cleanup(void *opaque)
static SaveVMHandlers savevm_htab_handlers = {
.save_setup = htab_save_setup,
.save_live_iterate = htab_save_iterate,
- .save_live_complete_precopy = htab_save_complete,
+ .save_complete = htab_save_complete,
.save_cleanup = htab_save_cleanup,
.load_state = htab_load,
};
@@ -4468,21 +4470,14 @@ static void spapr_pic_print_info(InterruptStatsProvider *obj, GString *buf)
/*
* This is a XIVE only operation
*/
-static int spapr_match_nvt(XiveFabric *xfb, uint8_t format,
- uint8_t nvt_blk, uint32_t nvt_idx,
- bool crowd, bool cam_ignore, uint8_t priority,
- uint32_t logic_serv, XiveTCTXMatch *match)
+static bool spapr_match_nvt(XiveFabric *xfb, uint8_t format,
+ uint8_t nvt_blk, uint32_t nvt_idx,
+ bool crowd, bool cam_ignore, uint8_t priority,
+ uint32_t logic_serv, XiveTCTXMatch *match)
{
SpaprMachineState *spapr = SPAPR_MACHINE(xfb);
XivePresenter *xptr = XIVE_PRESENTER(spapr->active_intc);
XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
- int count;
-
- count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd, cam_ignore,
- priority, logic_serv, match);
- if (count < 0) {
- return count;
- }
/*
* When we implement the save and restore of the thread interrupt
@@ -4493,12 +4488,14 @@ static int spapr_match_nvt(XiveFabric *xfb, uint8_t format,
* Until this is done, the sPAPR machine should find at least one
* matching context always.
*/
- if (count == 0) {
+ if (!xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, crowd, cam_ignore,
+ priority, logic_serv, match)) {
qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVT %x/%x is not dispatched\n",
nvt_blk, nvt_idx);
+ return false;
}
- return count;
+ return true;
}
int spapr_get_vcpu_id(PowerPCCPU *cpu)
@@ -4767,14 +4764,25 @@ static void spapr_machine_latest_class_options(MachineClass *mc)
DEFINE_SPAPR_MACHINE_IMPL(false, major, minor)
/*
+ * pseries-10.2
+ */
+static void spapr_machine_10_2_class_options(MachineClass *mc)
+{
+ /* Defaults for the latest behaviour inherited from the base class */
+}
+
+DEFINE_SPAPR_MACHINE_AS_LATEST(10, 2);
+
+/*
* pseries-10.1
*/
static void spapr_machine_10_1_class_options(MachineClass *mc)
{
- /* Defaults for the latest behaviour inherited from the base class */
+ spapr_machine_10_2_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_10_1, hw_compat_10_1_len);
}
-DEFINE_SPAPR_MACHINE_AS_LATEST(10, 1);
+DEFINE_SPAPR_MACHINE(10, 1);
/*
* pseries-10.0
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index f2f5722..0f94c19 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -27,7 +27,6 @@
#include "qapi/error.h"
#include "qapi/visitor.h"
#include "system/hw_accel.h"
-#include "system/ram_addr.h"
#include "target/ppc/cpu.h"
#include "target/ppc/mmu-hash64.h"
#include "cpu-models.h"
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 1e936f3..8c1e0a4 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -8,7 +8,7 @@
#include "qemu/main-loop.h"
#include "qemu/module.h"
#include "qemu/error-report.h"
-#include "exec/tb-flush.h"
+#include "exec/translation-block.h"
#include "exec/target_page.h"
#include "helper_regs.h"
#include "hw/ppc/ppc.h"
@@ -301,7 +301,7 @@ static target_ulong h_page_init(PowerPCCPU *cpu, SpaprMachineState *spapr,
if (kvm_enabled()) {
kvmppc_icbi_range(cpu, pdst, len);
} else if (tcg_enabled()) {
- tb_flush(CPU(cpu));
+ tb_invalidate_phys_range(CPU(cpu), dst, dst + len - 1);
} else {
g_assert_not_reached();
}
@@ -509,8 +509,8 @@ static target_ulong h_cede(PowerPCCPU *cpu, SpaprMachineState *spapr,
if (!cpu_has_work(cs)) {
cs->halted = 1;
cs->exception_index = EXCP_HLT;
- cs->exit_request = 1;
ppc_maybe_interrupt(env);
+ cpu_exit(cs);
}
return H_SUCCESS;
@@ -531,8 +531,8 @@ static target_ulong h_confer_self(PowerPCCPU *cpu)
}
cs->halted = 1;
cs->exception_index = EXCP_HALTED;
- cs->exit_request = 1;
ppc_maybe_interrupt(&cpu->env);
+ cpu_exit(cs);
return H_SUCCESS;
}
@@ -624,8 +624,7 @@ static target_ulong h_confer(PowerPCCPU *cpu, SpaprMachineState *spapr,
}
cs->exception_index = EXCP_YIELD;
- cs->exit_request = 1;
- cpu_loop_exit(cs);
+ cpu_exit(cs);
return H_SUCCESS;
}
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 1ac1185..f909555 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -34,7 +34,6 @@
#include "hw/pci/pci_host.h"
#include "hw/ppc/spapr.h"
#include "hw/pci-host/spapr.h"
-#include "system/ram_addr.h"
#include <libfdt.h>
#include "trace.h"
#include "qemu/error-report.h"
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
index e318d0d..a748a0b 100644
--- a/hw/ppc/spapr_pci_vfio.c
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -24,7 +24,7 @@
#include "hw/pci-host/spapr.h"
#include "hw/pci/msix.h"
#include "hw/pci/pci_device.h"
-#include "hw/vfio/vfio-container.h"
+#include "hw/vfio/vfio-container-legacy.h"
#include "qemu/error-report.h"
#include CONFIG_DEVICES /* CONFIG_VFIO_PCI */
@@ -32,7 +32,7 @@
* Interfaces for IBM EEH (Enhanced Error Handling)
*/
#ifdef CONFIG_VFIO_PCI
-static bool vfio_eeh_container_ok(VFIOContainer *container)
+static bool vfio_eeh_container_ok(VFIOLegacyContainer *container)
{
/*
* As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO
@@ -60,7 +60,7 @@ static bool vfio_eeh_container_ok(VFIOContainer *container)
return true;
}
-static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
+static int vfio_eeh_container_op(VFIOLegacyContainer *container, uint32_t op)
{
struct vfio_eeh_pe_op pe_op = {
.argsz = sizeof(pe_op),
@@ -83,10 +83,10 @@ static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
return ret;
}
-static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
+static VFIOLegacyContainer *vfio_eeh_as_container(AddressSpace *as)
{
VFIOAddressSpace *space = vfio_address_space_get(as);
- VFIOContainerBase *bcontainer = NULL;
+ VFIOContainer *bcontainer = NULL;
if (QLIST_EMPTY(&space->containers)) {
/* No containers to act on */
@@ -106,19 +106,19 @@ static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
out:
vfio_address_space_put(space);
- return container_of(bcontainer, VFIOContainer, bcontainer);
+ return VFIO_IOMMU_LEGACY(bcontainer);
}
static bool vfio_eeh_as_ok(AddressSpace *as)
{
- VFIOContainer *container = vfio_eeh_as_container(as);
+ VFIOLegacyContainer *container = vfio_eeh_as_container(as);
return (container != NULL) && vfio_eeh_container_ok(container);
}
static int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
{
- VFIOContainer *container = vfio_eeh_as_container(as);
+ VFIOLegacyContainer *container = vfio_eeh_as_container(as);
if (!container) {
return -ENODEV;
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 78309db..143bc8c 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -221,7 +221,7 @@ static void rtas_stop_self(PowerPCCPU *cpu, SpaprMachineState *spapr,
cs->halted = 1;
ppc_store_lpcr(cpu, env->spr[SPR_LPCR] & ~pcc->lpcr_pm);
kvmppc_set_reg_ppc_online(cpu, 0);
- qemu_cpu_kick(cs);
+ cpu_exit(cs);
}
static void rtas_ibm_suspend_me(PowerPCCPU *cpu, SpaprMachineState *spapr,
diff --git a/hw/ppc/spapr_tpm_proxy.c b/hw/ppc/spapr_tpm_proxy.c
index 862eeaa..1297b3a 100644
--- a/hw/ppc/spapr_tpm_proxy.c
+++ b/hw/ppc/spapr_tpm_proxy.c
@@ -41,8 +41,8 @@ static ssize_t tpm_execute(SpaprTpmProxy *tpm_proxy, target_ulong *args)
target_ulong data_in_size = args[2];
uint64_t data_out = ppc64_phys_to_real(args[3]);
target_ulong data_out_size = args[4];
- uint8_t buf_in[TPM_SPAPR_BUFSIZE];
- uint8_t buf_out[TPM_SPAPR_BUFSIZE];
+ QEMU_UNINITIALIZED uint8_t buf_in[TPM_SPAPR_BUFSIZE];
+ QEMU_UNINITIALIZED uint8_t buf_out[TPM_SPAPR_BUFSIZE];
ssize_t ret;
trace_spapr_tpm_execute(data_in, data_in_size, data_out, data_out_size);
diff --git a/hw/remote/memory.c b/hw/remote/memory.c
index 00193a5..8195aa5 100644
--- a/hw/remote/memory.c
+++ b/hw/remote/memory.c
@@ -11,7 +11,6 @@
#include "qemu/osdep.h"
#include "hw/remote/memory.h"
-#include "system/ram_addr.h"
#include "qapi/error.h"
static void remote_sysmem_reset(void)
diff --git a/hw/remote/proxy-memory-listener.c b/hw/remote/proxy-memory-listener.c
index 30ac749..e1a52d2 100644
--- a/hw/remote/proxy-memory-listener.c
+++ b/hw/remote/proxy-memory-listener.c
@@ -12,7 +12,6 @@
#include "qemu/range.h"
#include "system/memory.h"
#include "exec/cpu-common.h"
-#include "system/ram_addr.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "hw/remote/mpqemu-link.h"
diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c
index b0165aa..18e0f7a 100644
--- a/hw/remote/proxy.c
+++ b/hw/remote/proxy.c
@@ -112,8 +112,12 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
return;
}
+ if (!qio_channel_set_blocking(dev->ioc, true, errp)) {
+ object_unref(dev->ioc);
+ return;
+ }
+
qemu_mutex_init(&dev->io_mutex);
- qio_channel_set_blocking(dev->ioc, true, NULL);
pci_conf[PCI_LATENCY_TIMER] = 0xff;
pci_conf[PCI_INTERRUPT_PIN] = 0x01;
diff --git a/hw/remote/remote-obj.c b/hw/remote/remote-obj.c
index 8588290..3402068 100644
--- a/hw/remote/remote-obj.c
+++ b/hw/remote/remote-obj.c
@@ -107,7 +107,11 @@ static void remote_object_machine_done(Notifier *notifier, void *data)
error_report_err(err);
return;
}
- qio_channel_set_blocking(ioc, false, NULL);
+ if (!qio_channel_set_blocking(ioc, false, &err)) {
+ error_report_err(err);
+ object_unref(OBJECT(ioc));
+ return;
+ }
o->dev = dev;
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
index ea6165e..216b487 100644
--- a/hw/remote/vfio-user-obj.c
+++ b/hw/remote/vfio-user-obj.c
@@ -75,12 +75,17 @@ OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
*/
#define VFU_OBJECT_ERROR(o, fmt, ...) \
{ \
+ error_report((fmt), ## __VA_ARGS__); \
if (vfu_object_auto_shutdown()) { \
- error_setg(&error_abort, (fmt), ## __VA_ARGS__); \
- } else { \
- error_report((fmt), ## __VA_ARGS__); \
+ /* \
+ * FIXME This looks inappropriate. The error is serious \
+ * enough programming error to warrant aborting the process \
+ * when auto-shutdown is enabled, yet harmless enough to \
+ * permit carrying on when it's disabled. Makes no sense. \
+ */ \
+ abort(); \
} \
- } \
+ }
struct VfuObjectClass {
ObjectClass parent_class;
diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
index e6a0ac1..fc9c35b 100644
--- a/hw/riscv/Kconfig
+++ b/hw/riscv/Kconfig
@@ -119,3 +119,12 @@ config SPIKE
select HTIF
select RISCV_ACLINT
select SIFIVE_PLIC
+
+config XIANGSHAN_KUNMINGHU
+ bool
+ default y
+ depends on RISCV64
+ select RISCV_ACLINT
+ select RISCV_APLIC
+ select RISCV_IMSIC
+ select SERIAL_MM
diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
index 765b9e2..828a867 100644
--- a/hw/riscv/boot.c
+++ b/hw/riscv/boot.c
@@ -37,7 +37,7 @@
bool riscv_is_32bit(RISCVHartArrayState *harts)
{
RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(&harts->harts[0]);
- return mcc->misa_mxl_max == MXL_RV32;
+ return mcc->def->misa_mxl_max == MXL_RV32;
}
/*
diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
index c22f3a7..2a8d5b1 100644
--- a/hw/riscv/meson.build
+++ b/hw/riscv/meson.build
@@ -13,5 +13,6 @@ riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c'))
riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files(
'riscv-iommu.c', 'riscv-iommu-pci.c', 'riscv-iommu-sys.c', 'riscv-iommu-hpm.c'))
riscv_ss.add(when: 'CONFIG_MICROBLAZE_V', if_true: files('microblaze-v-generic.c'))
+riscv_ss.add(when: 'CONFIG_XIANGSHAN_KUNMINGHU', if_true: files('xiangshan_kmh.c'))
hw_arch += {'riscv': riscv_ss}
diff --git a/hw/riscv/riscv-iommu-bits.h b/hw/riscv/riscv-iommu-bits.h
index 1017d73..47fe01b 100644
--- a/hw/riscv/riscv-iommu-bits.h
+++ b/hw/riscv/riscv-iommu-bits.h
@@ -79,6 +79,7 @@ struct riscv_iommu_pq_record {
#define RISCV_IOMMU_CAP_SV39 BIT_ULL(9)
#define RISCV_IOMMU_CAP_SV48 BIT_ULL(10)
#define RISCV_IOMMU_CAP_SV57 BIT_ULL(11)
+#define RISCV_IOMMU_CAP_SVRSW60T59B BIT_ULL(14)
#define RISCV_IOMMU_CAP_SV32X4 BIT_ULL(16)
#define RISCV_IOMMU_CAP_SV39X4 BIT_ULL(17)
#define RISCV_IOMMU_CAP_SV48X4 BIT_ULL(18)
diff --git a/hw/riscv/riscv-iommu-pci.c b/hw/riscv/riscv-iommu-pci.c
index 1f44eef..cdb4a7a 100644
--- a/hw/riscv/riscv-iommu-pci.c
+++ b/hw/riscv/riscv-iommu-pci.c
@@ -68,12 +68,6 @@ typedef struct RISCVIOMMUStatePci {
RISCVIOMMUState iommu; /* common IOMMU state */
} RISCVIOMMUStatePci;
-struct RISCVIOMMUPciClass {
- /*< public >*/
- DeviceRealize parent_realize;
- ResettablePhases parent_phases;
-};
-
/* interrupt delivery callback */
static void riscv_iommu_pci_notify(RISCVIOMMUState *iommu, unsigned vector)
{
diff --git a/hw/riscv/riscv-iommu-sys.c b/hw/riscv/riscv-iommu-sys.c
index 74e76b9..e34d00a 100644
--- a/hw/riscv/riscv-iommu-sys.c
+++ b/hw/riscv/riscv-iommu-sys.c
@@ -53,12 +53,6 @@ struct RISCVIOMMUStateSys {
uint8_t *msix_pba;
};
-struct RISCVIOMMUSysClass {
- /*< public >*/
- DeviceRealize parent_realize;
- ResettablePhases parent_phases;
-};
-
static uint64_t msix_table_mmio_read(void *opaque, hwaddr addr,
unsigned size)
{
diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c
index a877e5d..b33c7fe 100644
--- a/hw/riscv/riscv-iommu.c
+++ b/hw/riscv/riscv-iommu.c
@@ -558,6 +558,7 @@ static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s,
MemTxResult res;
dma_addr_t addr;
uint64_t intn;
+ size_t offset;
uint32_t n190;
uint64_t pte[2];
int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR;
@@ -565,16 +566,18 @@ static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s,
/* Interrupt File Number */
intn = riscv_iommu_pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask);
- if (intn >= 256) {
+ offset = intn * sizeof(pte);
+
+ /* fetch MSI PTE */
+ addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN));
+ if (addr & offset) {
/* Interrupt file number out of range */
res = MEMTX_ACCESS_ERROR;
cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT;
goto err;
}
- /* fetch MSI PTE */
- addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN));
- addr = addr | (intn * sizeof(pte));
+ addr |= offset;
res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte),
MEMTXATTRS_UNSPECIFIED);
if (res != MEMTX_OK) {
@@ -866,6 +869,145 @@ static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s,
return true;
}
+/**
+ * pdt_memory_read: PDT wrapper of dma_memory_read.
+ *
+ * @s: IOMMU Device State
+ * @ctx: Device Translation Context with devid and pasid set
+ * @addr: address within that address space
+ * @buf: buffer with the data transferred
+ * @len: length of the data transferred
+ * @attrs: memory transaction attributes
+ */
+static MemTxResult pdt_memory_read(RISCVIOMMUState *s,
+ RISCVIOMMUContext *ctx,
+ dma_addr_t addr,
+ void *buf, dma_addr_t len,
+ MemTxAttrs attrs)
+{
+ uint64_t gatp_mode, pte;
+ struct {
+ unsigned char step;
+ unsigned char levels;
+ unsigned char ptidxbits;
+ unsigned char ptesize;
+ } sc;
+ MemTxResult ret;
+ dma_addr_t base = addr;
+
+ /* G stages translation mode */
+ gatp_mode = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD);
+ if (gatp_mode == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) {
+ goto out;
+ }
+
+ /* G stages translation tables root pointer */
+ base = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD));
+
+ /* Start at step 0 */
+ sc.step = 0;
+
+ if (s->fctl & RISCV_IOMMU_FCTL_GXL) {
+ /* 32bit mode for GXL == 1 */
+ switch (gatp_mode) {
+ case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV32X4)) {
+ return MEMTX_ACCESS_ERROR;
+ }
+ sc.levels = 2;
+ sc.ptidxbits = 10;
+ sc.ptesize = 4;
+ break;
+ default:
+ return MEMTX_ACCESS_ERROR;
+ }
+ } else {
+ /* 64bit mode for GXL == 0 */
+ switch (gatp_mode) {
+ case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV39X4)) {
+ return MEMTX_ACCESS_ERROR;
+ }
+ sc.levels = 3;
+ sc.ptidxbits = 9;
+ sc.ptesize = 8;
+ break;
+ case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV48X4)) {
+ return MEMTX_ACCESS_ERROR;
+ }
+ sc.levels = 4;
+ sc.ptidxbits = 9;
+ sc.ptesize = 8;
+ break;
+ case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4:
+ if (!(s->cap & RISCV_IOMMU_CAP_SV57X4)) {
+ return MEMTX_ACCESS_ERROR;
+ }
+ sc.levels = 5;
+ sc.ptidxbits = 9;
+ sc.ptesize = 8;
+ break;
+ default:
+ return MEMTX_ACCESS_ERROR;
+ }
+ }
+
+ do {
+ const unsigned va_bits = (sc.step ? 0 : 2) + sc.ptidxbits;
+ const unsigned va_skip = TARGET_PAGE_BITS + sc.ptidxbits *
+ (sc.levels - 1 - sc.step);
+ const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1);
+ const dma_addr_t pte_addr = base + idx * sc.ptesize;
+
+ /* Address range check before first level lookup */
+ if (!sc.step) {
+ const uint64_t va_mask = (1ULL << (va_skip + va_bits)) - 1;
+ if ((addr & va_mask) != addr) {
+ return MEMTX_ACCESS_ERROR;
+ }
+ }
+
+ /* Read page table entry */
+ if (sc.ptesize == 4) {
+ uint32_t pte32 = 0;
+ ret = ldl_le_dma(s->target_as, pte_addr, &pte32, attrs);
+ pte = pte32;
+ } else {
+ ret = ldq_le_dma(s->target_as, pte_addr, &pte, attrs);
+ }
+ if (ret != MEMTX_OK) {
+ return ret;
+ }
+
+ sc.step++;
+ hwaddr ppn = pte >> PTE_PPN_SHIFT;
+
+ if (!(pte & PTE_V)) {
+ return MEMTX_ACCESS_ERROR; /* Invalid PTE */
+ } else if (!(pte & (PTE_R | PTE_W | PTE_X))) {
+ base = PPN_PHYS(ppn); /* Inner PTE, continue walking */
+ } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) {
+ return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W */
+ } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) {
+ return MEMTX_ACCESS_ERROR; /* Reserved leaf PTE flags: PTE_W + PTE_X */
+ } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) {
+ return MEMTX_ACCESS_ERROR; /* Misaligned PPN */
+ } else {
+ /* Leaf PTE, translation completed. */
+ base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1));
+ break;
+ }
+
+ if (sc.step == sc.levels) {
+ return MEMTX_ACCESS_ERROR; /* Can't find leaf PTE */
+ }
+ } while (1);
+
+out:
+ return dma_memory_read(s->target_as, base, buf, len, attrs);
+}
+
/*
* RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk
*
@@ -1038,7 +1180,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
*/
const int split = depth * 9 + 8;
addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK;
- if (dma_memory_read(s->target_as, addr, &de, sizeof(de),
+ if (pdt_memory_read(s, ctx, addr, &de, sizeof(de),
MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
}
@@ -1053,7 +1195,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
/* Leaf entry in PDT */
addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK;
- if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2,
+ if (pdt_memory_read(s, ctx, addr, &dc.ta, sizeof(uint64_t) * 2,
MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) {
return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT;
}
@@ -1935,11 +2077,7 @@ static void riscv_iommu_process_dbg(RISCVIOMMUState *s)
iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10);
} else {
iova = iotlb.translated_addr & ~iotlb.addr_mask;
- iova >>= TARGET_PAGE_BITS;
- iova &= RISCV_IOMMU_TR_RESPONSE_PPN;
-
- /* We do not support superpages (> 4kbs) for now */
- iova &= ~RISCV_IOMMU_TR_RESPONSE_S;
+ iova = set_field(0, RISCV_IOMMU_TR_RESPONSE_PPN, PPN_DOWN(iova));
}
riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova);
}
@@ -2355,7 +2493,8 @@ static void riscv_iommu_realize(DeviceState *dev, Error **errp)
}
if (s->enable_g_stage) {
s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 |
- RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4;
+ RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4 |
+ RISCV_IOMMU_CAP_SVRSW60T59B;
}
if (s->hpm_cntrs > 0) {
diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c
index 1eef2fb..f1406cb 100644
--- a/hw/riscv/virt-acpi-build.c
+++ b/hw/riscv/virt-acpi-build.c
@@ -270,11 +270,8 @@ spcr_setup(GArray *table_data, BIOSLinker *linker, RISCVVirtState *s)
#define RHCT_NODE_ARRAY_OFFSET 56
/*
- * ACPI spec, Revision 6.5+
- * 5.2.36 RISC-V Hart Capabilities Table (RHCT)
- * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/16
- * https://drive.google.com/file/d/1nP3nFiH4jkPMp6COOxP6123DCZKR-tia/view
- * https://drive.google.com/file/d/1sKbOa8m1UZw1JkquZYe3F1zQBN1xXsaf/view
+ * ACPI spec, Revision 6.6
+ * 5.2.37 RISC-V Hart Capabilities Table (RHCT)
*/
static void build_rhct(GArray *table_data,
BIOSLinker *linker,
@@ -287,7 +284,7 @@ static void build_rhct(GArray *table_data,
uint32_t isa_offset, num_rhct_nodes, cmo_offset = 0;
RISCVCPU *cpu = &s->soc[0].harts[0];
uint32_t mmu_offset = 0;
- uint8_t satp_mode_max;
+ bool rv32 = riscv_cpu_is_32bit(cpu);
g_autofree char *isa = NULL;
AcpiTable table = { .sig = "RHCT", .rev = 1, .oem_id = s->oem_id,
@@ -307,7 +304,7 @@ static void build_rhct(GArray *table_data,
num_rhct_nodes++;
}
- if (cpu->cfg.satp_mode.supported != 0) {
+ if (!rv32 && cpu->cfg.max_satp_mode >= VM_1_10_SV39) {
num_rhct_nodes++;
}
@@ -367,22 +364,21 @@ static void build_rhct(GArray *table_data,
}
/* MMU node structure */
- if (cpu->cfg.satp_mode.supported != 0) {
- satp_mode_max = satp_mode_max_from_map(cpu->cfg.satp_mode.map);
+ if (!rv32 && cpu->cfg.max_satp_mode >= VM_1_10_SV39) {
mmu_offset = table_data->len - table.table_offset;
build_append_int_noprefix(table_data, 2, 2); /* Type */
build_append_int_noprefix(table_data, 8, 2); /* Length */
build_append_int_noprefix(table_data, 0x1, 2); /* Revision */
build_append_int_noprefix(table_data, 0, 1); /* Reserved */
/* MMU Type */
- if (satp_mode_max == VM_1_10_SV57) {
+ if (cpu->cfg.max_satp_mode == VM_1_10_SV57) {
build_append_int_noprefix(table_data, 2, 1); /* Sv57 */
- } else if (satp_mode_max == VM_1_10_SV48) {
+ } else if (cpu->cfg.max_satp_mode == VM_1_10_SV48) {
build_append_int_noprefix(table_data, 1, 1); /* Sv48 */
- } else if (satp_mode_max == VM_1_10_SV39) {
+ } else if (cpu->cfg.max_satp_mode == VM_1_10_SV39) {
build_append_int_noprefix(table_data, 0, 1); /* Sv39 */
} else {
- assert(1);
+ g_assert_not_reached();
}
}
@@ -422,7 +418,10 @@ static void build_rhct(GArray *table_data,
acpi_table_end(linker, &table);
}
-/* FADT */
+/*
+ * ACPI spec, Revision 6.6
+ * 5.2.9 Fixed ACPI Description Table (MADT)
+ */
static void build_fadt_rev6(GArray *table_data,
BIOSLinker *linker,
RISCVVirtState *s,
@@ -430,7 +429,7 @@ static void build_fadt_rev6(GArray *table_data,
{
AcpiFadtData fadt = {
.rev = 6,
- .minor_ver = 5,
+ .minor_ver = 6,
.flags = 1 << ACPI_FADT_F_HW_REDUCED_ACPI,
.xdsdt_tbl_offset = &dsdt_tbl_offset,
};
@@ -509,11 +508,8 @@ static void build_dsdt(GArray *table_data,
}
/*
- * ACPI spec, Revision 6.5+
+ * ACPI spec, Revision 6.6
* 5.2.12 Multiple APIC Description Table (MADT)
- * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/15
- * https://drive.google.com/file/d/1R6k4MshhN3WTT-hwqAquu5nX6xSEqK2l/view
- * https://drive.google.com/file/d/1oMGPyOD58JaPgMl1pKasT-VKsIKia7zR/view
*/
static void build_madt(GArray *table_data,
BIOSLinker *linker,
@@ -538,7 +534,7 @@ static void build_madt(GArray *table_data,
hart_index_bits = imsic_num_bits(imsic_max_hart_per_socket);
- AcpiTable table = { .sig = "APIC", .rev = 6, .oem_id = s->oem_id,
+ AcpiTable table = { .sig = "APIC", .rev = 7, .oem_id = s->oem_id,
.oem_table_id = s->oem_table_id };
acpi_table_begin(&table, table_data);
@@ -813,10 +809,8 @@ static void build_rimt(GArray *table_data, BIOSLinker *linker,
}
/*
- * ACPI spec, Revision 6.5+
+ * ACPI spec, Revision 6.6
* 5.2.16 System Resource Affinity Table (SRAT)
- * REF: https://github.com/riscv-non-isa/riscv-acpi/issues/25
- * https://drive.google.com/file/d/1YTdDx2IPm5IeZjAW932EYU-tUtgS08tX/view
*/
static void
build_srat(GArray *table_data, BIOSLinker *linker, RISCVVirtState *vms)
@@ -895,7 +889,10 @@ static void virt_acpi_build(RISCVVirtState *s, AcpiBuildTables *tables)
}
acpi_add_table(table_offsets, tables_blob);
- spcr_setup(tables_blob, tables->linker, s);
+
+ if (ms->acpi_spcr_enabled) {
+ spcr_setup(tables_blob, tables->linker, s);
+ }
acpi_add_table(table_offsets, tables_blob);
{
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 0dcced1..47e573f 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -237,10 +237,10 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket,
uint32_t cpu_phandle;
MachineState *ms = MACHINE(s);
bool is_32_bit = riscv_is_32bit(&s->soc[0]);
- uint8_t satp_mode_max;
for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) {
RISCVCPU *cpu_ptr = &s->soc[socket].harts[cpu];
+ int8_t satp_mode_max = cpu_ptr->cfg.max_satp_mode;
g_autofree char *cpu_name = NULL;
g_autofree char *core_name = NULL;
g_autofree char *intc_name = NULL;
@@ -252,8 +252,7 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket,
s->soc[socket].hartid_base + cpu);
qemu_fdt_add_subnode(ms->fdt, cpu_name);
- if (cpu_ptr->cfg.satp_mode.supported != 0) {
- satp_mode_max = satp_mode_max_from_map(cpu_ptr->cfg.satp_mode.map);
+ if (satp_mode_max != -1) {
sv_name = g_strdup_printf("riscv,%s",
satp_mode_str(satp_mode_max, is_32_bit));
qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", sv_name);
@@ -312,8 +311,7 @@ static void create_fdt_socket_memory(RISCVVirtState *s, int socket)
size = riscv_socket_mem_size(ms, socket);
mem_name = g_strdup_printf("/memory@%"HWADDR_PRIx, addr);
qemu_fdt_add_subnode(ms->fdt, mem_name);
- qemu_fdt_setprop_cells(ms->fdt, mem_name, "reg",
- addr >> 32, addr, size >> 32, size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, mem_name, "reg", 2, addr, 2, size);
qemu_fdt_setprop_string(ms->fdt, mem_name, "device_type", "memory");
riscv_socket_fdt_write_id(ms, mem_name, socket);
}
@@ -325,7 +323,7 @@ static void create_fdt_socket_clint(RISCVVirtState *s,
int cpu;
g_autofree char *clint_name = NULL;
g_autofree uint32_t *clint_cells = NULL;
- unsigned long clint_addr;
+ hwaddr clint_addr;
MachineState *ms = MACHINE(s);
static const char * const clint_compat[2] = {
"sifive,clint0", "riscv,clint0"
@@ -341,14 +339,14 @@ static void create_fdt_socket_clint(RISCVVirtState *s,
}
clint_addr = s->memmap[VIRT_CLINT].base +
- (s->memmap[VIRT_CLINT].size * socket);
- clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr);
+ s->memmap[VIRT_CLINT].size * socket;
+ clint_name = g_strdup_printf("/soc/clint@%"HWADDR_PRIx, clint_addr);
qemu_fdt_add_subnode(ms->fdt, clint_name);
qemu_fdt_setprop_string_array(ms->fdt, clint_name, "compatible",
(char **)&clint_compat,
ARRAY_SIZE(clint_compat));
- qemu_fdt_setprop_cells(ms->fdt, clint_name, "reg",
- 0x0, clint_addr, 0x0, s->memmap[VIRT_CLINT].size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, clint_name, "reg",
+ 2, clint_addr, 2, s->memmap[VIRT_CLINT].size);
qemu_fdt_setprop(ms->fdt, clint_name, "interrupts-extended",
clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4);
riscv_socket_fdt_write_id(ms, clint_name, socket);
@@ -389,8 +387,8 @@ static void create_fdt_socket_aclint(RISCVVirtState *s,
qemu_fdt_add_subnode(ms->fdt, name);
qemu_fdt_setprop_string(ms->fdt, name, "compatible",
"riscv,aclint-mswi");
- qemu_fdt_setprop_cells(ms->fdt, name, "reg",
- 0x0, addr, 0x0, RISCV_ACLINT_SWI_SIZE);
+ qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg",
+ 2, addr, 2, RISCV_ACLINT_SWI_SIZE);
qemu_fdt_setprop(ms->fdt, name, "interrupts-extended",
aclint_mswi_cells, aclint_cells_size);
qemu_fdt_setprop(ms->fdt, name, "interrupt-controller", NULL, 0);
@@ -412,11 +410,11 @@ static void create_fdt_socket_aclint(RISCVVirtState *s,
qemu_fdt_add_subnode(ms->fdt, name);
qemu_fdt_setprop_string(ms->fdt, name, "compatible",
"riscv,aclint-mtimer");
- qemu_fdt_setprop_cells(ms->fdt, name, "reg",
- 0x0, addr + RISCV_ACLINT_DEFAULT_MTIME,
- 0x0, size - RISCV_ACLINT_DEFAULT_MTIME,
- 0x0, addr + RISCV_ACLINT_DEFAULT_MTIMECMP,
- 0x0, RISCV_ACLINT_DEFAULT_MTIME);
+ qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg",
+ 2, addr + RISCV_ACLINT_DEFAULT_MTIME,
+ 2, size - RISCV_ACLINT_DEFAULT_MTIME,
+ 2, addr + RISCV_ACLINT_DEFAULT_MTIMECMP,
+ 2, RISCV_ACLINT_DEFAULT_MTIME);
qemu_fdt_setprop(ms->fdt, name, "interrupts-extended",
aclint_mtimer_cells, aclint_cells_size);
riscv_socket_fdt_write_id(ms, name, socket);
@@ -430,8 +428,8 @@ static void create_fdt_socket_aclint(RISCVVirtState *s,
qemu_fdt_add_subnode(ms->fdt, name);
qemu_fdt_setprop_string(ms->fdt, name, "compatible",
"riscv,aclint-sswi");
- qemu_fdt_setprop_cells(ms->fdt, name, "reg",
- 0x0, addr, 0x0, s->memmap[VIRT_ACLINT_SSWI].size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg",
+ 2, addr, 2, s->memmap[VIRT_ACLINT_SSWI].size);
qemu_fdt_setprop(ms->fdt, name, "interrupts-extended",
aclint_sswi_cells, aclint_cells_size);
qemu_fdt_setprop(ms->fdt, name, "interrupt-controller", NULL, 0);
@@ -495,8 +493,8 @@ static void create_fdt_socket_plic(RISCVVirtState *s,
s->soc[socket].num_harts * sizeof(uint32_t) * 4);
}
- qemu_fdt_setprop_cells(ms->fdt, plic_name, "reg",
- 0x0, plic_addr, 0x0, s->memmap[VIRT_PLIC].size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, plic_name, "reg",
+ 2, plic_addr, 2, s->memmap[VIRT_PLIC].size);
qemu_fdt_setprop_cell(ms->fdt, plic_name, "riscv,ndev",
VIRT_IRQCHIP_NUM_SOURCES - 1);
riscv_socket_fdt_write_id(ms, plic_name, socket);
@@ -657,8 +655,8 @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket,
qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle);
}
- qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg",
- 0x0, aplic_addr, 0x0, aplic_size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, aplic_name, "reg",
+ 2, aplic_addr, 2, aplic_size);
qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources",
VIRT_IRQCHIP_NUM_SOURCES);
@@ -858,9 +856,7 @@ static void create_fdt_virtio(RISCVVirtState *s, uint32_t irq_virtio_phandle)
qemu_fdt_add_subnode(ms->fdt, name);
qemu_fdt_setprop_string(ms->fdt, name, "compatible", "virtio,mmio");
- qemu_fdt_setprop_cells(ms->fdt, name, "reg",
- 0x0, addr,
- 0x0, size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", 2, addr, 2, size);
qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent",
irq_virtio_phandle);
if (s->aia_type == VIRT_AIA_TYPE_NONE) {
@@ -898,8 +894,8 @@ static void create_fdt_pcie(RISCVVirtState *s,
if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) {
qemu_fdt_setprop_cell(ms->fdt, name, "msi-parent", msi_pcie_phandle);
}
- qemu_fdt_setprop_cells(ms->fdt, name, "reg", 0,
- s->memmap[VIRT_PCIE_ECAM].base, 0, s->memmap[VIRT_PCIE_ECAM].size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", 2,
+ s->memmap[VIRT_PCIE_ECAM].base, 2, s->memmap[VIRT_PCIE_ECAM].size);
qemu_fdt_setprop_sized_cells(ms->fdt, name, "ranges",
1, FDT_PCI_RANGE_IOPORT, 2, 0,
2, s->memmap[VIRT_PCIE_PIO].base, 2, s->memmap[VIRT_PCIE_PIO].size,
@@ -936,8 +932,9 @@ static void create_fdt_reset(RISCVVirtState *s, uint32_t *phandle)
qemu_fdt_setprop_string_array(ms->fdt, name, "compatible",
(char **)&compat, ARRAY_SIZE(compat));
}
- qemu_fdt_setprop_cells(ms->fdt, name, "reg",
- 0x0, s->memmap[VIRT_TEST].base, 0x0, s->memmap[VIRT_TEST].size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg",
+ 2, s->memmap[VIRT_TEST].base,
+ 2, s->memmap[VIRT_TEST].size);
qemu_fdt_setprop_cell(ms->fdt, name, "phandle", test_phandle);
test_phandle = qemu_fdt_get_phandle(ms->fdt, name);
g_free(name);
@@ -969,9 +966,9 @@ static void create_fdt_uart(RISCVVirtState *s,
s->memmap[VIRT_UART0].base);
qemu_fdt_add_subnode(ms->fdt, name);
qemu_fdt_setprop_string(ms->fdt, name, "compatible", "ns16550a");
- qemu_fdt_setprop_cells(ms->fdt, name, "reg",
- 0x0, s->memmap[VIRT_UART0].base,
- 0x0, s->memmap[VIRT_UART0].size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg",
+ 2, s->memmap[VIRT_UART0].base,
+ 2, s->memmap[VIRT_UART0].size);
qemu_fdt_setprop_cell(ms->fdt, name, "clock-frequency", 3686400);
qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_mmio_phandle);
if (s->aia_type == VIRT_AIA_TYPE_NONE) {
@@ -995,8 +992,9 @@ static void create_fdt_rtc(RISCVVirtState *s,
qemu_fdt_add_subnode(ms->fdt, name);
qemu_fdt_setprop_string(ms->fdt, name, "compatible",
"google,goldfish-rtc");
- qemu_fdt_setprop_cells(ms->fdt, name, "reg",
- 0x0, s->memmap[VIRT_RTC].base, 0x0, s->memmap[VIRT_RTC].size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg",
+ 2, s->memmap[VIRT_RTC].base,
+ 2, s->memmap[VIRT_RTC].size);
qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent",
irq_mmio_phandle);
if (s->aia_type == VIRT_AIA_TYPE_NONE) {
@@ -1090,8 +1088,7 @@ static void create_fdt_iommu_sys(RISCVVirtState *s, uint32_t irq_chip,
qemu_fdt_setprop_cell(fdt, iommu_node, "#iommu-cells", 1);
qemu_fdt_setprop_cell(fdt, iommu_node, "phandle", iommu_phandle);
- qemu_fdt_setprop_cells(fdt, iommu_node, "reg",
- addr >> 32, addr, size >> 32, size);
+ qemu_fdt_setprop_sized_cells(fdt, iommu_node, "reg", 2, addr, 2, size);
qemu_fdt_setprop_cell(fdt, iommu_node, "interrupt-parent", irq_chip);
qemu_fdt_setprop_cells(fdt, iommu_node, "interrupts",
diff --git a/hw/riscv/xiangshan_kmh.c b/hw/riscv/xiangshan_kmh.c
new file mode 100644
index 0000000..a95fd61
--- /dev/null
+++ b/hw/riscv/xiangshan_kmh.c
@@ -0,0 +1,220 @@
+/*
+ * QEMU RISC-V Board Compatible with the Xiangshan Kunminghu
+ * FPGA prototype platform
+ *
+ * Copyright (c) 2025 Beijing Institute of Open Source Chip (BOSC)
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Provides a board compatible with the Xiangshan Kunminghu
+ * FPGA prototype platform:
+ *
+ * 0) UART (16550A)
+ * 1) CLINT (Core-Local Interruptor)
+ * 2) IMSIC (Incoming MSI Controller)
+ * 3) APLIC (Advanced Platform-Level Interrupt Controller)
+ *
+ * More information can be found in our Github repository:
+ * https://github.com/OpenXiangShan/XiangShan
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "system/address-spaces.h"
+#include "hw/boards.h"
+#include "hw/char/serial-mm.h"
+#include "hw/intc/riscv_aclint.h"
+#include "hw/intc/riscv_aplic.h"
+#include "hw/intc/riscv_imsic.h"
+#include "hw/qdev-properties.h"
+#include "hw/riscv/boot.h"
+#include "hw/riscv/xiangshan_kmh.h"
+#include "hw/riscv/riscv_hart.h"
+#include "system/system.h"
+
+static const MemMapEntry xiangshan_kmh_memmap[] = {
+ [XIANGSHAN_KMH_ROM] = { 0x1000, 0xF000 },
+ [XIANGSHAN_KMH_UART0] = { 0x310B0000, 0x10000 },
+ [XIANGSHAN_KMH_CLINT] = { 0x38000000, 0x10000 },
+ [XIANGSHAN_KMH_APLIC_M] = { 0x31100000, 0x4000 },
+ [XIANGSHAN_KMH_APLIC_S] = { 0x31120000, 0x4000 },
+ [XIANGSHAN_KMH_IMSIC_M] = { 0x3A800000, 0x10000 },
+ [XIANGSHAN_KMH_IMSIC_S] = { 0x3B000000, 0x80000 },
+ [XIANGSHAN_KMH_DRAM] = { 0x80000000, 0x0 },
+};
+
+static DeviceState *xiangshan_kmh_create_aia(uint32_t num_harts)
+{
+ int i;
+ const MemMapEntry *memmap = xiangshan_kmh_memmap;
+ hwaddr addr = 0;
+ DeviceState *aplic_m = NULL;
+
+ /* M-level IMSICs */
+ addr = memmap[XIANGSHAN_KMH_IMSIC_M].base;
+ for (i = 0; i < num_harts; i++) {
+ riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0), i, true,
+ 1, XIANGSHAN_KMH_IMSIC_NUM_IDS);
+ }
+
+ /* S-level IMSICs */
+ addr = memmap[XIANGSHAN_KMH_IMSIC_S].base;
+ for (i = 0; i < num_harts; i++) {
+ riscv_imsic_create(addr +
+ i * IMSIC_HART_SIZE(XIANGSHAN_KMH_IMSIC_GUEST_BITS),
+ i, false, 1 + XIANGSHAN_KMH_IMSIC_GUEST_BITS,
+ XIANGSHAN_KMH_IMSIC_NUM_IDS);
+ }
+
+ /* M-level APLIC */
+ aplic_m = riscv_aplic_create(memmap[XIANGSHAN_KMH_APLIC_M].base,
+ memmap[XIANGSHAN_KMH_APLIC_M].size,
+ 0, 0, XIANGSHAN_KMH_APLIC_NUM_SOURCES,
+ 1, true, true, NULL);
+
+ /* S-level APLIC */
+ riscv_aplic_create(memmap[XIANGSHAN_KMH_APLIC_S].base,
+ memmap[XIANGSHAN_KMH_APLIC_S].size,
+ 0, 0, XIANGSHAN_KMH_APLIC_NUM_SOURCES,
+ 1, true, false, aplic_m);
+
+ return aplic_m;
+}
+
+static void xiangshan_kmh_soc_realize(DeviceState *dev, Error **errp)
+{
+ MachineState *ms = MACHINE(qdev_get_machine());
+ XiangshanKmhSoCState *s = XIANGSHAN_KMH_SOC(dev);
+ const MemMapEntry *memmap = xiangshan_kmh_memmap;
+ MemoryRegion *system_memory = get_system_memory();
+ uint32_t num_harts = ms->smp.cpus;
+
+ qdev_prop_set_uint32(DEVICE(&s->cpus), "num-harts", num_harts);
+ qdev_prop_set_uint32(DEVICE(&s->cpus), "hartid-base", 0);
+ qdev_prop_set_string(DEVICE(&s->cpus), "cpu-type",
+ TYPE_RISCV_CPU_XIANGSHAN_KMH);
+ sysbus_realize(SYS_BUS_DEVICE(&s->cpus), &error_fatal);
+
+ /* AIA */
+ s->irqchip = xiangshan_kmh_create_aia(num_harts);
+
+ /* UART */
+ serial_mm_init(system_memory, memmap[XIANGSHAN_KMH_UART0].base, 2,
+ qdev_get_gpio_in(s->irqchip, XIANGSHAN_KMH_UART0_IRQ),
+ 115200, serial_hd(0), DEVICE_LITTLE_ENDIAN);
+
+ /* CLINT */
+ riscv_aclint_swi_create(memmap[XIANGSHAN_KMH_CLINT].base,
+ 0, num_harts, false);
+ riscv_aclint_mtimer_create(memmap[XIANGSHAN_KMH_CLINT].base +
+ RISCV_ACLINT_SWI_SIZE,
+ RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
+ 0, num_harts, RISCV_ACLINT_DEFAULT_MTIMECMP,
+ RISCV_ACLINT_DEFAULT_MTIME,
+ XIANGSHAN_KMH_CLINT_TIMEBASE_FREQ, true);
+
+ /* ROM */
+ memory_region_init_rom(&s->rom, OBJECT(dev), "xiangshan.kunminghu.rom",
+ memmap[XIANGSHAN_KMH_ROM].size, &error_fatal);
+ memory_region_add_subregion(system_memory,
+ memmap[XIANGSHAN_KMH_ROM].base, &s->rom);
+}
+
+static void xiangshan_kmh_soc_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->realize = xiangshan_kmh_soc_realize;
+ dc->user_creatable = false;
+}
+
+static void xiangshan_kmh_soc_instance_init(Object *obj)
+{
+ XiangshanKmhSoCState *s = XIANGSHAN_KMH_SOC(obj);
+
+ object_initialize_child(obj, "cpus", &s->cpus, TYPE_RISCV_HART_ARRAY);
+}
+
+static const TypeInfo xiangshan_kmh_soc_info = {
+ .name = TYPE_XIANGSHAN_KMH_SOC,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(XiangshanKmhSoCState),
+ .instance_init = xiangshan_kmh_soc_instance_init,
+ .class_init = xiangshan_kmh_soc_class_init,
+};
+
+static void xiangshan_kmh_soc_register_types(void)
+{
+ type_register_static(&xiangshan_kmh_soc_info);
+}
+type_init(xiangshan_kmh_soc_register_types)
+
+static void xiangshan_kmh_machine_init(MachineState *machine)
+{
+ XiangshanKmhState *s = XIANGSHAN_KMH_MACHINE(machine);
+ const MemMapEntry *memmap = xiangshan_kmh_memmap;
+ MemoryRegion *system_memory = get_system_memory();
+ hwaddr start_addr = memmap[XIANGSHAN_KMH_DRAM].base;
+
+ /* Initialize SoC */
+ object_initialize_child(OBJECT(machine), "soc", &s->soc,
+ TYPE_XIANGSHAN_KMH_SOC);
+ qdev_realize(DEVICE(&s->soc), NULL, &error_fatal);
+
+ /* Register RAM */
+ memory_region_add_subregion(system_memory,
+ memmap[XIANGSHAN_KMH_DRAM].base,
+ machine->ram);
+
+ /* ROM reset vector */
+ riscv_setup_rom_reset_vec(machine, &s->soc.cpus,
+ start_addr,
+ memmap[XIANGSHAN_KMH_ROM].base,
+ memmap[XIANGSHAN_KMH_ROM].size, 0, 0);
+ if (machine->firmware) {
+ riscv_load_firmware(machine->firmware, &start_addr, NULL);
+ }
+
+ /* Note: dtb has been integrated into firmware(OpenSBI) when compiling */
+}
+
+static void xiangshan_kmh_machine_class_init(ObjectClass *klass, const void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(klass);
+ static const char *const valid_cpu_types[] = {
+ TYPE_RISCV_CPU_XIANGSHAN_KMH,
+ NULL
+ };
+
+ mc->desc = "RISC-V Board compatible with the Xiangshan " \
+ "Kunminghu FPGA prototype platform";
+ mc->init = xiangshan_kmh_machine_init;
+ mc->max_cpus = XIANGSHAN_KMH_MAX_CPUS;
+ mc->default_cpu_type = TYPE_RISCV_CPU_XIANGSHAN_KMH;
+ mc->valid_cpu_types = valid_cpu_types;
+ mc->default_ram_id = "xiangshan.kunminghu.ram";
+}
+
+static const TypeInfo xiangshan_kmh_machine_info = {
+ .name = TYPE_XIANGSHAN_KMH_MACHINE,
+ .parent = TYPE_MACHINE,
+ .instance_size = sizeof(XiangshanKmhState),
+ .class_init = xiangshan_kmh_machine_class_init,
+};
+
+static void xiangshan_kmh_machine_register_types(void)
+{
+ type_register_static(&xiangshan_kmh_machine_info);
+}
+type_init(xiangshan_kmh_machine_register_types)
diff --git a/hw/s390x/ap-stub.c b/hw/s390x/ap-stub.c
new file mode 100644
index 0000000..001fe5f
--- /dev/null
+++ b/hw/s390x/ap-stub.c
@@ -0,0 +1,21 @@
+/*
+ * VFIO based AP matrix device assignment
+ *
+ * Copyright 2025 IBM Corp.
+ * Author(s): Rorie Reyes <rreyes@linux.ibm.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/s390x/ap-bridge.h"
+
+int ap_chsc_sei_nt0_get_event(void *res)
+{
+ return EVENT_INFORMATION_NOT_STORED;
+}
+
+bool ap_chsc_sei_nt0_have_event(void)
+{
+ return false;
+}
diff --git a/hw/s390x/ccw-device.c b/hw/s390x/ccw-device.c
index 19c2238..8be1813 100644
--- a/hw/s390x/ccw-device.c
+++ b/hw/s390x/ccw-device.c
@@ -57,7 +57,7 @@ static void ccw_device_set_loadparm(Object *obj, Visitor *v,
Error **errp)
{
CcwDevice *dev = CCW_DEVICE(obj);
- char *val;
+ g_autofree char *val = NULL;
int index;
index = object_property_get_int(obj, "bootindex", NULL);
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
index 7d4e1f5..b513f89 100644
--- a/hw/s390x/cpu-topology.c
+++ b/hw/s390x/cpu-topology.c
@@ -23,8 +23,8 @@
#include "target/s390x/cpu.h"
#include "hw/s390x/s390-virtio-ccw.h"
#include "hw/s390x/cpu-topology.h"
-#include "qapi/qapi-commands-machine-target.h"
-#include "qapi/qapi-events-machine-target.h"
+#include "qapi/qapi-commands-machine-s390x.h"
+#include "qapi/qapi-events-machine-s390x.h"
/*
* s390_topology is used to keep the topology information.
diff --git a/hw/s390x/event-facility.c b/hw/s390x/event-facility.c
index 7b7bf23..fee286e 100644
--- a/hw/s390x/event-facility.c
+++ b/hw/s390x/event-facility.c
@@ -4,6 +4,7 @@
* handles SCLP event types
* - Signal Quiesce - system power down
* - ASCII Console Data - VT220 read and write
+ * - Control-Program Identification - Send OS data from guest to host
*
* Copyright IBM, Corp. 2012
*
@@ -40,6 +41,7 @@ struct SCLPEventFacility {
SysBusDevice parent_obj;
SCLPEventsBus sbus;
SCLPEvent quiesce, cpu_hotplug;
+ SCLPEventCPI cpi;
/* guest's receive mask */
union {
uint32_t receive_mask_pieces[2];
diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build
index 3bbebfd..1bc8583 100644
--- a/hw/s390x/meson.build
+++ b/hw/s390x/meson.build
@@ -13,6 +13,7 @@ s390x_ss.add(files(
's390-skeys.c',
's390-stattrib.c',
'sclp.c',
+ 'sclpcpi.c',
'sclpcpu.c',
'sclpquiesce.c',
'tod.c',
@@ -33,6 +34,7 @@ s390x_ss.add(when: 'CONFIG_S390_CCW_VIRTIO', if_true: files(
))
s390x_ss.add(when: 'CONFIG_TERMINAL3270', if_true: files('3270-ccw.c'))
s390x_ss.add(when: 'CONFIG_VFIO', if_true: files('s390-pci-vfio.c'))
+s390x_ss.add(when: 'CONFIG_VFIO_AP', if_false: files('ap-stub.c'))
virtio_ss = ss.source_set()
virtio_ss.add(files('virtio-ccw.c'))
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index e6aa445..5282089 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -384,9 +384,9 @@ static uint64_t get_table_index(uint64_t iova, int8_t ett)
return calc_sx(iova);
case ZPCI_ETT_RT:
return calc_rtx(iova);
+ default:
+ g_assert_not_reached();
}
-
- return -1;
}
static bool entry_isvalid(uint64_t entry, int8_t ett)
@@ -397,22 +397,24 @@ static bool entry_isvalid(uint64_t entry, int8_t ett)
case ZPCI_ETT_ST:
case ZPCI_ETT_RT:
return rt_entry_isvalid(entry);
+ default:
+ g_assert_not_reached();
}
-
- return false;
}
/* Return true if address translation is done */
static bool translate_iscomplete(uint64_t entry, int8_t ett)
{
switch (ett) {
- case 0:
+ case ZPCI_ETT_ST:
return (entry & ZPCI_TABLE_FC) ? true : false;
- case 1:
+ case ZPCI_ETT_RT:
return false;
+ case ZPCI_ETT_PT:
+ return true;
+ default:
+ g_assert_not_reached();
}
-
- return true;
}
static uint64_t get_frame_size(int8_t ett)
@@ -424,9 +426,9 @@ static uint64_t get_frame_size(int8_t ett)
return 1ULL << 20;
case ZPCI_ETT_RT:
return 1ULL << 31;
+ default:
+ g_assert_not_reached();
}
-
- return 0;
}
static uint64_t get_next_table_origin(uint64_t entry, int8_t ett)
@@ -438,9 +440,9 @@ static uint64_t get_next_table_origin(uint64_t entry, int8_t ett)
return get_st_pto(entry);
case ZPCI_ETT_RT:
return get_rt_sto(entry);
+ default:
+ g_assert_not_reached();
}
-
- return 0;
}
/**
@@ -650,7 +652,16 @@ static const PCIIOMMUOps s390_iommu_ops = {
.get_address_space = s390_pci_dma_iommu,
};
-static uint8_t set_ind_atomic(uint64_t ind_loc, uint8_t to_be_set)
+/**
+ * set_ind_bit_atomic - Atomically set a bit in an indicator
+ *
+ * @ind_loc: Address of the indicator
+ * @to_be_set: Bit to set
+ *
+ * Returns true if the bit was set by this function, false if it was
+ * already set or mapping failed.
+ */
+static bool set_ind_bit_atomic(uint64_t ind_loc, uint8_t to_be_set)
{
uint8_t expected, actual;
hwaddr len = 1;
@@ -660,7 +671,7 @@ static uint8_t set_ind_atomic(uint64_t ind_loc, uint8_t to_be_set)
ind_addr = cpu_physical_memory_map(ind_loc, &len, true);
if (!ind_addr) {
s390_pci_generate_error_event(ERR_EVENT_AIRERR, 0, 0, 0, 0);
- return -1;
+ return false;
}
actual = *ind_addr;
do {
@@ -669,7 +680,7 @@ static uint8_t set_ind_atomic(uint64_t ind_loc, uint8_t to_be_set)
} while (actual != expected);
cpu_physical_memory_unmap((void *)ind_addr, len, 1, len);
- return actual;
+ return (actual & to_be_set) ? false : true;
}
static void s390_msi_ctrl_write(void *opaque, hwaddr addr, uint64_t data,
@@ -691,10 +702,10 @@ static void s390_msi_ctrl_write(void *opaque, hwaddr addr, uint64_t data,
ind_bit = pbdev->routes.adapter.ind_offset;
sum_bit = pbdev->routes.adapter.summary_offset;
- set_ind_atomic(pbdev->routes.adapter.ind_addr + (ind_bit + vec) / 8,
+ set_ind_bit_atomic(pbdev->routes.adapter.ind_addr + (ind_bit + vec) / 8,
0x80 >> ((ind_bit + vec) % 8));
- if (!set_ind_atomic(pbdev->routes.adapter.summary_addr + sum_bit / 8,
- 0x80 >> (sum_bit % 8))) {
+ if (set_ind_bit_atomic(pbdev->routes.adapter.summary_addr + sum_bit / 8,
+ 0x80 >> (sum_bit % 8))) {
css_adapter_interrupt(CSS_IO_ADAPTER_PCI, pbdev->isc);
}
}
@@ -889,6 +900,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp)
s390_pci_init_default_group();
css_register_io_adapters(CSS_IO_ADAPTER_PCI, true, false,
S390_ADAPTER_SUPPRESSIBLE, errp);
+ s390_pcihost_kvm_realize();
}
static void s390_pcihost_unrealize(DeviceState *dev)
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index b5dddb2..a3bb5aa 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -16,6 +16,7 @@
#include "exec/target_page.h"
#include "system/memory.h"
#include "qemu/error-report.h"
+#include "qemu/bswap.h"
#include "system/hw_accel.h"
#include "hw/boards.h"
#include "hw/pci/pci_device.h"
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
index aaf9131..9e31029 100644
--- a/hw/s390x/s390-pci-vfio.c
+++ b/hw/s390x/s390-pci-vfio.c
@@ -20,7 +20,7 @@
#include "hw/s390x/s390-pci-clp.h"
#include "hw/s390x/s390-pci-vfio.h"
#include "hw/vfio/pci.h"
-#include "hw/vfio/vfio-container.h"
+#include "hw/vfio/vfio-container-legacy.h"
#include "hw/vfio/vfio-helpers.h"
/*
@@ -62,7 +62,7 @@ S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
{
S390PCIDMACount *cnt;
uint32_t avail;
- VFIOPCIDevice *vpdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+ VFIOPCIDevice *vpdev = VFIO_PCI_DEVICE(pbdev->pdev);
int id;
assert(vpdev);
@@ -108,7 +108,7 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev,
{
struct vfio_info_cap_header *hdr;
struct vfio_device_info_cap_zpci_base *cap;
- VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+ VFIOPCIDevice *vpci = VFIO_PCI_DEVICE(pbdev->pdev);
uint64_t vfio_size;
hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
@@ -162,7 +162,7 @@ static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info,
{
struct vfio_info_cap_header *hdr;
struct vfio_device_info_cap_zpci_base *cap;
- VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+ VFIOPCIDevice *vpci = VFIO_PCI_DEVICE(pbdev->pdev);
hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE);
@@ -185,7 +185,7 @@ static void s390_pci_read_group(S390PCIBusDevice *pbdev,
struct vfio_device_info_cap_zpci_group *cap;
S390pciState *s = s390_get_phb();
ClpRspQueryPciGrp *resgrp;
- VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+ VFIOPCIDevice *vpci = VFIO_PCI_DEVICE(pbdev->pdev);
uint8_t start_gid = pbdev->zpci_fn.pfgid;
hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_GROUP);
@@ -264,7 +264,7 @@ static void s390_pci_read_util(S390PCIBusDevice *pbdev,
{
struct vfio_info_cap_header *hdr;
struct vfio_device_info_cap_zpci_util *cap;
- VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+ VFIOPCIDevice *vpci = VFIO_PCI_DEVICE(pbdev->pdev);
hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_UTIL);
@@ -291,7 +291,7 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev,
{
struct vfio_info_cap_header *hdr;
struct vfio_device_info_cap_zpci_pfip *cap;
- VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+ VFIOPCIDevice *vpci = VFIO_PCI_DEVICE(pbdev->pdev);
hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_PFIP);
@@ -314,7 +314,7 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev,
static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev)
{
- VFIOPCIDevice *vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+ VFIOPCIDevice *vfio_pci = VFIO_PCI_DEVICE(pbdev->pdev);
return vfio_get_device_info(vfio_pci->vbasedev.fd);
}
diff --git a/hw/s390x/s390-skeys.c b/hw/s390x/s390-skeys.c
index aedb62b..8eeecfd 100644
--- a/hw/s390x/s390-skeys.c
+++ b/hw/s390x/s390-skeys.c
@@ -17,7 +17,6 @@
#include "hw/s390x/storage-keys.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-machine.h"
-#include "qapi/qapi-commands-misc-target.h"
#include "qobject/qdict.h"
#include "qemu/error-report.h"
#include "system/memory_mapping.h"
diff --git a/hw/s390x/s390-stattrib-kvm.c b/hw/s390x/s390-stattrib-kvm.c
index e1fee36..73df1f6 100644
--- a/hw/s390x/s390-stattrib-kvm.c
+++ b/hw/s390x/s390-stattrib-kvm.c
@@ -10,13 +10,13 @@
*/
#include "qemu/osdep.h"
+#include "exec/target_page.h"
#include "hw/s390x/s390-virtio-ccw.h"
#include "migration/qemu-file.h"
#include "hw/s390x/storage-attributes.h"
#include "qemu/error-report.h"
#include "system/kvm.h"
#include "system/memory_mapping.h"
-#include "system/ram_addr.h"
#include "kvm/kvm_s390x.h"
#include "qapi/error.h"
diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c
index f74cf32..aa18537 100644
--- a/hw/s390x/s390-stattrib.c
+++ b/hw/s390x/s390-stattrib.c
@@ -11,12 +11,12 @@
#include "qemu/osdep.h"
#include "qemu/units.h"
+#include "exec/target_page.h"
#include "migration/qemu-file.h"
#include "migration/register.h"
#include "hw/qdev-properties.h"
#include "hw/s390x/storage-attributes.h"
#include "qemu/error-report.h"
-#include "system/ram_addr.h"
#include "qapi/error.h"
#include "qobject/qdict.h"
#include "cpu.h"
@@ -338,7 +338,7 @@ static const TypeInfo qemu_s390_stattrib_info = {
static SaveVMHandlers savevm_s390_stattrib_handlers = {
.save_setup = cmma_save_setup,
.save_live_iterate = cmma_save_iterate,
- .save_live_complete_precopy = cmma_save_complete,
+ .save_complete = cmma_save_complete,
.state_pending_exact = cmma_state_pending,
.state_pending_estimate = cmma_state_pending,
.save_cleanup = cmma_save_cleanup,
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index f69a4d8..ad2c481 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -13,7 +13,6 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
-#include "system/ram_addr.h"
#include "system/confidential-guest-support.h"
#include "hw/boards.h"
#include "hw/s390x/sclp.h"
@@ -260,9 +259,21 @@ static void s390_create_sclpconsole(SCLPDevice *sclp,
qdev_realize_and_unref(dev, ev_fac_bus, &error_fatal);
}
+static void s390_create_sclpcpi(SCLPDevice *sclp)
+{
+ SCLPEventFacility *ef = sclp->event_facility;
+ BusState *ev_fac_bus = sclp_get_event_facility_bus(ef);
+ DeviceState *dev;
+
+ dev = qdev_new(TYPE_SCLP_EVENT_CPI);
+ object_property_add_child(OBJECT(ef), "sclpcpi", OBJECT(dev));
+ qdev_realize_and_unref(dev, ev_fac_bus, &error_fatal);
+}
+
static void ccw_init(MachineState *machine)
{
MachineClass *mc = MACHINE_GET_CLASS(machine);
+ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc);
S390CcwMachineState *ms = S390_CCW_MACHINE(machine);
int ret;
VirtualCssBus *css_bus;
@@ -323,6 +334,12 @@ static void ccw_init(MachineState *machine)
/* init the TOD clock */
s390_init_tod();
+
+ /* init SCLP event Control-Program Identification */
+ if (s390mc->use_cpi) {
+ s390_create_sclpcpi(ms->sclp);
+ }
+
}
static void s390_cpu_plug(HotplugHandler *hotplug_dev,
@@ -783,6 +800,7 @@ static void ccw_machine_class_init(ObjectClass *oc, const void *data)
DumpSKeysInterface *dsi = DUMP_SKEYS_INTERFACE_CLASS(oc);
s390mc->max_threads = 1;
+ s390mc->use_cpi = true;
mc->reset = s390_machine_reset;
mc->block_default_type = IF_VIRTIO;
mc->no_cdrom = 1;
@@ -892,14 +910,26 @@ static const TypeInfo ccw_machine_info = {
DEFINE_CCW_MACHINE_IMPL(false, major, minor)
+static void ccw_machine_10_2_instance_options(MachineState *machine)
+{
+}
+
+static void ccw_machine_10_2_class_options(MachineClass *mc)
+{
+}
+DEFINE_CCW_MACHINE_AS_LATEST(10, 2);
+
static void ccw_machine_10_1_instance_options(MachineState *machine)
{
+ ccw_machine_10_2_instance_options(machine);
}
static void ccw_machine_10_1_class_options(MachineClass *mc)
{
+ ccw_machine_10_2_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_10_1, hw_compat_10_1_len);
}
-DEFINE_CCW_MACHINE_AS_LATEST(10, 1);
+DEFINE_CCW_MACHINE(10, 1);
static void ccw_machine_10_0_instance_options(MachineState *machine)
{
@@ -908,6 +938,9 @@ static void ccw_machine_10_0_instance_options(MachineState *machine)
static void ccw_machine_10_0_class_options(MachineClass *mc)
{
+ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc);
+ s390mc->use_cpi = false;
+
ccw_machine_10_1_class_options(mc);
compat_props_add(mc->compat_props, hw_compat_10_0, hw_compat_10_0_len);
}
@@ -1145,20 +1178,6 @@ static void ccw_machine_4_2_class_options(MachineClass *mc)
}
DEFINE_CCW_MACHINE(4, 2);
-static void ccw_machine_4_1_instance_options(MachineState *machine)
-{
- static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V4_1 };
- ccw_machine_4_2_instance_options(machine);
- s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat);
-}
-
-static void ccw_machine_4_1_class_options(MachineClass *mc)
-{
- ccw_machine_4_2_class_options(mc);
- compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
-}
-DEFINE_CCW_MACHINE(4, 1);
-
static void ccw_machine_register_types(void)
{
type_register_static(&ccw_machine_info);
diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
index 9718564..8602a56 100644
--- a/hw/s390x/sclp.c
+++ b/hw/s390x/sclp.c
@@ -16,6 +16,7 @@
#include "qemu/units.h"
#include "qapi/error.h"
#include "hw/boards.h"
+#include "system/memory.h"
#include "hw/s390x/sclp.h"
#include "hw/s390x/event-facility.h"
#include "hw/s390x/s390-pci-bus.h"
@@ -303,12 +304,15 @@ int sclp_service_call(S390CPU *cpu, uint64_t sccb, uint32_t code)
SCLPDeviceClass *sclp_c = SCLP_GET_CLASS(sclp);
SCCBHeader header;
g_autofree SCCB *work_sccb = NULL;
+ AddressSpace *as = CPU(cpu)->as;
+ const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
+ MemTxResult ret;
/* first some basic checks on program checks */
if (env->psw.mask & PSW_MASK_PSTATE) {
return -PGM_PRIVILEGED;
}
- if (cpu_physical_memory_is_io(sccb)) {
+ if (address_space_is_io(CPU(cpu)->as, sccb)) {
return -PGM_ADDRESSING;
}
if ((sccb & ~0x1fffUL) == 0 || (sccb & ~0x1fffUL) == env->psa
@@ -317,7 +321,10 @@ int sclp_service_call(S390CPU *cpu, uint64_t sccb, uint32_t code)
}
/* the header contains the actual length of the sccb */
- cpu_physical_memory_read(sccb, &header, sizeof(SCCBHeader));
+ ret = address_space_read(as, sccb, attrs, &header, sizeof(SCCBHeader));
+ if (ret != MEMTX_OK) {
+ return -PGM_ADDRESSING;
+ }
/* Valid sccb sizes */
if (be16_to_cpu(header.length) < sizeof(SCCBHeader)) {
@@ -330,7 +337,11 @@ int sclp_service_call(S390CPU *cpu, uint64_t sccb, uint32_t code)
* the host has checked the values
*/
work_sccb = g_malloc0(be16_to_cpu(header.length));
- cpu_physical_memory_read(sccb, work_sccb, be16_to_cpu(header.length));
+ ret = address_space_read(as, sccb, attrs,
+ work_sccb, be16_to_cpu(header.length));
+ if (ret != MEMTX_OK) {
+ return -PGM_ADDRESSING;
+ }
if (!sclp_command_code_valid(code)) {
work_sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND);
@@ -344,8 +355,11 @@ int sclp_service_call(S390CPU *cpu, uint64_t sccb, uint32_t code)
sclp_c->execute(sclp, work_sccb, code);
out_write:
- cpu_physical_memory_write(sccb, work_sccb,
- be16_to_cpu(work_sccb->h.length));
+ ret = address_space_write(as, sccb, attrs,
+ work_sccb, be16_to_cpu(header.length));
+ if (ret != MEMTX_OK) {
+ return -PGM_PROTECTION;
+ }
sclp_c->service_interrupt(sclp, sccb);
diff --git a/hw/s390x/sclpcpi.c b/hw/s390x/sclpcpi.c
new file mode 100644
index 0000000..7aa039d
--- /dev/null
+++ b/hw/s390x/sclpcpi.c
@@ -0,0 +1,212 @@
+ /*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * SCLP event type 11 - Control-Program Identification (CPI):
+ * CPI is used to send program identifiers from the guest to the
+ * Service-Call Logical Processor (SCLP). It is not sent by the SCLP.
+ *
+ * Control-program identifiers provide data about the guest operating
+ * system. The control-program identifiers are: system type, system name,
+ * system level and sysplex name.
+ *
+ * In Linux, all the control-program identifiers are user configurable. The
+ * system type, system name, and sysplex name use EBCDIC characters from
+ * this set: capital A-Z, 0-9, $, @, #, and blank. In Linux, the system
+ * type, system name and sysplex name are arbitrary free-form texts.
+ *
+ * In Linux, the 8-byte hexadecimal system-level has the format
+ * 0x<a><b><cc><dd><eeee><ff><gg><hh>, where:
+ * <a>: is a 4-bit digit, its most significant bit indicates hypervisor use
+ * <b>: is one digit that represents Linux distributions as follows
+ * 0: generic Linux
+ * 1: Red Hat Enterprise Linux
+ * 2: SUSE Linux Enterprise Server
+ * 3: Canonical Ubuntu
+ * 4: Fedora
+ * 5: openSUSE Leap
+ * 6: Debian GNU/Linux
+ * 7: Red Hat Enterprise Linux CoreOS
+ * <cc>: are two digits for a distribution-specific encoding of the major
+ * version of the distribution
+ * <dd>: are two digits for a distribution-specific encoding of the minor
+ * version of the distribution
+ * <eeee>: are four digits for the patch level of the distribution
+ * <ff>: are two digits for the major version of the kernel
+ * <gg>: are two digits for the minor version of the kernel
+ * <hh>: are two digits for the stable version of the kernel
+ * (e.g. 74872343805430528, when converted to hex is 0x010a000000060b00). On
+ * machines prior to z16, some of the values are not available to display.
+ *
+ * Sysplex refers to a cluster of logical partitions that communicates and
+ * co-operates with each other.
+ *
+ * The CPI feature is supported since 10.1.
+ *
+ * Copyright IBM, Corp. 2024
+ *
+ * Authors:
+ * Shalini Chellathurai Saroja <shalini@linux.ibm.com>
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/timer.h"
+#include "hw/s390x/event-facility.h"
+#include "hw/s390x/ebcdic.h"
+#include "qapi/qapi-visit-machine.h"
+#include "migration/vmstate.h"
+
+typedef struct Data {
+ uint8_t id_format;
+ uint8_t reserved0;
+ uint8_t system_type[8];
+ uint64_t reserved1;
+ uint8_t system_name[8];
+ uint64_t reserved2;
+ uint64_t system_level;
+ uint64_t reserved3;
+ uint8_t sysplex_name[8];
+ uint8_t reserved4[16];
+} QEMU_PACKED Data;
+
+typedef struct ControlProgramIdMsg {
+ EventBufferHeader ebh;
+ Data data;
+} QEMU_PACKED ControlProgramIdMsg;
+
+static bool can_handle_event(uint8_t type)
+{
+ return type == SCLP_EVENT_CTRL_PGM_ID;
+}
+
+static sccb_mask_t send_mask(void)
+{
+ return 0;
+}
+
+/* Enable SCLP to accept buffers of event type CPI from the control-program. */
+static sccb_mask_t receive_mask(void)
+{
+ return SCLP_EVENT_MASK_CTRL_PGM_ID;
+}
+
+static int write_event_data(SCLPEvent *event, EventBufferHeader *evt_buf_hdr)
+{
+ ControlProgramIdMsg *cpim = container_of(evt_buf_hdr, ControlProgramIdMsg,
+ ebh);
+ SCLPEventCPI *e = SCLP_EVENT_CPI(event);
+
+ ascii_put(e->system_type, (char *)cpim->data.system_type,
+ sizeof(cpim->data.system_type));
+ ascii_put(e->system_name, (char *)cpim->data.system_name,
+ sizeof(cpim->data.system_name));
+ ascii_put(e->sysplex_name, (char *)cpim->data.sysplex_name,
+ sizeof(cpim->data.sysplex_name));
+ e->system_level = ldq_be_p(&cpim->data.system_level);
+ e->timestamp = qemu_clock_get_ns(QEMU_CLOCK_HOST);
+
+ cpim->ebh.flags = SCLP_EVENT_BUFFER_ACCEPTED;
+ return SCLP_RC_NORMAL_COMPLETION;
+}
+
+static char *get_system_type(Object *obj, Error **errp)
+{
+ SCLPEventCPI *e = SCLP_EVENT_CPI(obj);
+
+ return g_strndup((char *) e->system_type, sizeof(e->system_type));
+}
+
+static char *get_system_name(Object *obj, Error **errp)
+{
+ SCLPEventCPI *e = SCLP_EVENT_CPI(obj);
+
+ return g_strndup((char *) e->system_name, sizeof(e->system_name));
+}
+
+static char *get_sysplex_name(Object *obj, Error **errp)
+{
+ SCLPEventCPI *e = SCLP_EVENT_CPI(obj);
+
+ return g_strndup((char *) e->sysplex_name, sizeof(e->sysplex_name));
+}
+
+static void get_system_level(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ SCLPEventCPI *e = SCLP_EVENT_CPI(obj);
+
+ visit_type_uint64(v, name, &e->system_level, errp);
+}
+
+static void get_timestamp(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ SCLPEventCPI *e = SCLP_EVENT_CPI(obj);
+
+ visit_type_uint64(v, name, &e->timestamp, errp);
+}
+
+static const VMStateDescription vmstate_sclpcpi = {
+ .name = "s390_control_program_id",
+ .version_id = 0,
+ .fields = (const VMStateField[]) {
+ VMSTATE_UINT8_ARRAY(system_type, SCLPEventCPI, 8),
+ VMSTATE_UINT8_ARRAY(system_name, SCLPEventCPI, 8),
+ VMSTATE_UINT64(system_level, SCLPEventCPI),
+ VMSTATE_UINT8_ARRAY(sysplex_name, SCLPEventCPI, 8),
+ VMSTATE_UINT64(timestamp, SCLPEventCPI),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void cpi_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ SCLPEventClass *k = SCLP_EVENT_CLASS(klass);
+
+ dc->user_creatable = false;
+ dc->vmsd = &vmstate_sclpcpi;
+
+ k->can_handle_event = can_handle_event;
+ k->get_send_mask = send_mask;
+ k->get_receive_mask = receive_mask;
+ k->write_event_data = write_event_data;
+
+ object_class_property_add_str(klass, "system_type", get_system_type, NULL);
+ object_class_property_set_description(klass, "system_type",
+ "operating system e.g. \"LINUX \"");
+
+ object_class_property_add_str(klass, "system_name", get_system_name, NULL);
+ object_class_property_set_description(klass, "system_name",
+ "user configurable name of the VM e.g. \"TESTVM \"");
+
+ object_class_property_add_str(klass, "sysplex_name", get_sysplex_name,
+ NULL);
+ object_class_property_set_description(klass, "sysplex_name",
+ "name of the cluster which the VM belongs to, if any"
+ " e.g. \"PLEX \"");
+
+ object_class_property_add(klass, "system_level", "uint64", get_system_level,
+ NULL, NULL, NULL);
+ object_class_property_set_description(klass, "system_level",
+ "distribution and kernel version in Linux e.g. 74872343805430528");
+
+ object_class_property_add(klass, "timestamp", "uint64", get_timestamp,
+ NULL, NULL, NULL);
+ object_class_property_set_description(klass, "timestamp",
+ "latest update of CPI data in nanoseconds since the UNIX EPOCH");
+}
+
+static const TypeInfo sclp_cpi_info = {
+ .name = TYPE_SCLP_EVENT_CPI,
+ .parent = TYPE_SCLP_EVENT,
+ .instance_size = sizeof(SCLPEventCPI),
+ .class_init = cpi_class_init,
+};
+
+static void sclp_cpi_register_types(void)
+{
+ type_register_static(&sclp_cpi_info);
+}
+
+type_init(sclp_cpi_register_types)
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index d2f85b3..4cb1ced 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -1130,13 +1130,13 @@ static int virtio_ccw_load_queue(DeviceState *d, int n, QEMUFile *f)
static void virtio_ccw_save_config(DeviceState *d, QEMUFile *f)
{
VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
- vmstate_save_state(f, &vmstate_virtio_ccw_dev, dev, NULL);
+ vmstate_save_state(f, &vmstate_virtio_ccw_dev, dev, NULL, &error_fatal);
}
static int virtio_ccw_load_config(DeviceState *d, QEMUFile *f)
{
VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
- return vmstate_load_state(f, &vmstate_virtio_ccw_dev, dev, 1);
+ return vmstate_load_state(f, &vmstate_virtio_ccw_dev, dev, 1, &error_fatal);
}
static void virtio_ccw_pre_plugged(DeviceState *d, Error **errp)
diff --git a/hw/scsi/esp.c b/hw/scsi/esp.c
index f24991f..1d264c4 100644
--- a/hw/scsi/esp.c
+++ b/hw/scsi/esp.c
@@ -275,6 +275,7 @@ static int esp_select(ESPState *s)
if (!s->current_dev) {
/* No such drive */
s->rregs[ESP_RSTAT] = 0;
+ s->asc_mode = ESP_ASC_MODE_DIS;
s->rregs[ESP_RINTR] = INTR_DC;
esp_raise_irq(s);
return -1;
@@ -284,6 +285,7 @@ static int esp_select(ESPState *s)
* Note that we deliberately don't raise the IRQ here: this will be done
* either in esp_transfer_data() or esp_command_complete()
*/
+ s->asc_mode = ESP_ASC_MODE_INI;
return 0;
}
@@ -308,6 +310,7 @@ static void do_command_phase(ESPState *s)
if (!current_lun) {
/* No such drive */
s->rregs[ESP_RSTAT] = 0;
+ s->asc_mode = ESP_ASC_MODE_DIS;
s->rregs[ESP_RINTR] = INTR_DC;
s->rregs[ESP_RSEQ] = SEQ_0;
esp_raise_irq(s);
@@ -487,8 +490,10 @@ static void esp_do_dma(ESPState *s)
case STAT_MO:
if (s->dma_memory_read) {
len = MIN(len, fifo8_num_free(&s->cmdfifo));
- s->dma_memory_read(s->dma_opaque, buf, len);
- esp_set_tc(s, esp_get_tc(s) - len);
+ if (len) {
+ s->dma_memory_read(s->dma_opaque, buf, len);
+ esp_set_tc(s, esp_get_tc(s) - len);
+ }
} else {
len = esp_fifo_pop_buf(s, buf, fifo8_num_used(&s->fifo));
len = MIN(fifo8_num_free(&s->cmdfifo), len);
@@ -541,9 +546,11 @@ static void esp_do_dma(ESPState *s)
trace_esp_do_dma(cmdlen, len);
if (s->dma_memory_read) {
len = MIN(len, fifo8_num_free(&s->cmdfifo));
- s->dma_memory_read(s->dma_opaque, buf, len);
- fifo8_push_all(&s->cmdfifo, buf, len);
- esp_set_tc(s, esp_get_tc(s) - len);
+ if (len) {
+ s->dma_memory_read(s->dma_opaque, buf, len);
+ fifo8_push_all(&s->cmdfifo, buf, len);
+ esp_set_tc(s, esp_get_tc(s) - len);
+ }
} else {
len = esp_fifo_pop_buf(s, buf, fifo8_num_used(&s->fifo));
len = MIN(fifo8_num_free(&s->cmdfifo), len);
@@ -572,8 +579,10 @@ static void esp_do_dma(ESPState *s)
switch (s->rregs[ESP_CMD]) {
case CMD_TI | CMD_DMA:
if (s->dma_memory_read) {
- s->dma_memory_read(s->dma_opaque, s->async_buf, len);
- esp_set_tc(s, esp_get_tc(s) - len);
+ if (len) {
+ s->dma_memory_read(s->dma_opaque, s->async_buf, len);
+ esp_set_tc(s, esp_get_tc(s) - len);
+ }
} else {
/* Copy FIFO data to device */
len = MIN(s->async_len, ESP_FIFO_SZ);
@@ -625,7 +634,9 @@ static void esp_do_dma(ESPState *s)
switch (s->rregs[ESP_CMD]) {
case CMD_TI | CMD_DMA:
if (s->dma_memory_write) {
- s->dma_memory_write(s->dma_opaque, s->async_buf, len);
+ if (len) {
+ s->dma_memory_write(s->dma_opaque, s->async_buf, len);
+ }
} else {
/* Copy device data to FIFO */
len = MIN(len, fifo8_num_free(&s->fifo));
@@ -675,6 +686,7 @@ static void esp_do_dma(ESPState *s)
buf[0] = s->status;
if (s->dma_memory_write) {
+ /* Length already non-zero */
s->dma_memory_write(s->dma_opaque, buf, len);
} else {
esp_fifo_push_buf(s, buf, len);
@@ -709,6 +721,7 @@ static void esp_do_dma(ESPState *s)
buf[0] = 0;
if (s->dma_memory_write) {
+ /* Length already non-zero */
s->dma_memory_write(s->dma_opaque, buf, len);
} else {
esp_fifo_push_buf(s, buf, len);
@@ -1012,6 +1025,7 @@ void esp_transfer_data(SCSIRequest *req, uint32_t len)
*/
s->rregs[ESP_RINTR] |= INTR_BS | INTR_FC;
s->rregs[ESP_RSEQ] = SEQ_CD;
+ esp_raise_irq(s);
break;
case CMD_SELATNS | CMD_DMA:
@@ -1022,20 +1036,21 @@ void esp_transfer_data(SCSIRequest *req, uint32_t len)
*/
s->rregs[ESP_RINTR] |= INTR_BS;
s->rregs[ESP_RSEQ] = SEQ_MO;
+ esp_raise_irq(s);
break;
case CMD_TI | CMD_DMA:
case CMD_TI:
/*
- * Bus service interrupt raised because of initial change to
- * DATA phase
+ * If the final COMMAND phase data was transferred using a TI
+ * command, clear ESP_CMD to terminate the TI command and raise
+ * the completion interrupt
*/
s->rregs[ESP_CMD] = 0;
s->rregs[ESP_RINTR] |= INTR_BS;
+ esp_raise_irq(s);
break;
}
-
- esp_raise_irq(s);
}
/*
@@ -1090,6 +1105,7 @@ void esp_hard_reset(ESPState *s)
fifo8_reset(&s->cmdfifo);
s->dma = 0;
s->dma_cb = NULL;
+ s->asc_mode = ESP_ASC_MODE_DIS;
s->rregs[ESP_CFG1] = 7;
}
@@ -1113,6 +1129,38 @@ static void parent_esp_reset(ESPState *s, int irq, int level)
}
}
+static bool esp_cmd_is_valid(ESPState *s, uint8_t cmd)
+{
+ uint8_t cmd_group = (cmd & CMD_GRP_MASK) >> 4;
+
+ /* Always allow misc commands */
+ if (cmd_group == CMD_GRP_MISC) {
+ return true;
+ }
+
+ switch (s->asc_mode) {
+ case ESP_ASC_MODE_DIS:
+ /* Disconnected mode: only allow disconnected commands */
+ if (cmd_group == CMD_GRP_DISC) {
+ return true;
+ }
+ break;
+
+ case ESP_ASC_MODE_INI:
+ /* Initiator mode: allow initiator commands */
+ if (cmd_group == CMD_GRP_INIT) {
+ return true;
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+
+ trace_esp_invalid_cmd(cmd, s->asc_mode);
+ return false;
+}
+
static void esp_run_cmd(ESPState *s)
{
uint8_t cmd = s->rregs[ESP_CMD];
@@ -1158,6 +1206,7 @@ static void esp_run_cmd(ESPState *s)
break;
case CMD_MSGACC:
trace_esp_mem_writeb_cmd_msgacc(cmd);
+ s->asc_mode = ESP_ASC_MODE_DIS;
s->rregs[ESP_RINTR] |= INTR_DC;
s->rregs[ESP_RSEQ] = 0;
s->rregs[ESP_RFLAGS] = 0;
@@ -1268,6 +1317,11 @@ void esp_reg_write(ESPState *s, uint32_t saddr, uint64_t val)
break;
case ESP_CMD:
s->rregs[saddr] = val;
+ if (!esp_cmd_is_valid(s, s->rregs[saddr])) {
+ s->rregs[ESP_RSTAT] |= INTR_IL;
+ esp_raise_irq(s);
+ break;
+ }
esp_run_cmd(s);
break;
case ESP_WBUSID ... ESP_WSYNO:
@@ -1325,6 +1379,14 @@ static bool esp_is_between_version_5_and_6(void *opaque, int version_id)
return version_id >= 5 && version_id <= 6;
}
+static bool esp_is_version_8(void *opaque, int version_id)
+{
+ ESPState *s = ESP(opaque);
+
+ version_id = MIN(version_id, s->mig_version_id);
+ return version_id >= 8;
+}
+
int esp_pre_save(void *opaque)
{
ESPState *s = ESP(object_resolve_path_component(
@@ -1356,13 +1418,18 @@ static int esp_post_load(void *opaque, int version_id)
}
}
+ if (version_id < 8) {
+ /* Assume initiator mode to allow all commands to continue */
+ s->asc_mode = ESP_ASC_MODE_INI;
+ }
+
s->mig_version_id = vmstate_esp.version_id;
return 0;
}
const VMStateDescription vmstate_esp = {
.name = "esp",
- .version_id = 7,
+ .version_id = 8,
.minimum_version_id = 3,
.post_load = esp_post_load,
.fields = (const VMStateField[]) {
@@ -1394,6 +1461,7 @@ const VMStateDescription vmstate_esp = {
esp_is_between_version_5_and_6),
VMSTATE_UINT8_TEST(lun, ESPState, esp_is_version_6),
VMSTATE_BOOL(drq_state, ESPState),
+ VMSTATE_UINT8_TEST(asc_mode, ESPState, esp_is_version_8),
VMSTATE_END_OF_LIST()
},
};
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
index f4f2ef3..9ea4aa0 100644
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -1112,7 +1112,7 @@ bad:
static void lsi_memcpy(LSIState *s, uint32_t dest, uint32_t src, int count)
{
int n;
- uint8_t buf[LSI_BUF_SIZE];
+ QEMU_UNINITIALIZED uint8_t buf[LSI_BUF_SIZE];
trace_lsi_memcpy(dest, src, count);
while (count) {
diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index 55cd188..844643d 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -981,13 +981,11 @@ static int megasas_event_wait(MegasasState *s, MegasasCmd *cmd)
static int megasas_dcmd_pd_get_list(MegasasState *s, MegasasCmd *cmd)
{
- struct mfi_pd_list info;
- size_t dcmd_size = sizeof(info);
+ struct mfi_pd_list info = {};
BusChild *kid;
uint32_t offset, dcmd_limit, num_pd_disks = 0, max_pd_disks;
dma_addr_t residual;
- memset(&info, 0, dcmd_size);
offset = 8;
dcmd_limit = offset + sizeof(struct mfi_pd_address);
if (cmd->iov_size < dcmd_limit) {
@@ -1429,11 +1427,10 @@ static int megasas_dcmd_cfg_read(MegasasState *s, MegasasCmd *cmd)
static int megasas_dcmd_get_properties(MegasasState *s, MegasasCmd *cmd)
{
- struct mfi_ctrl_props info;
+ struct mfi_ctrl_props info = {};
size_t dcmd_size = sizeof(info);
dma_addr_t residual;
- memset(&info, 0x0, dcmd_size);
if (cmd->iov_size < dcmd_size) {
trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size,
dcmd_size);
diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c
index 1ebe0b8..4ada35b 100644
--- a/hw/scsi/mptsas.c
+++ b/hw/scsi/mptsas.c
@@ -579,11 +579,11 @@ static void mptsas_process_ioc_init(MPTSASState *s, MPIMsgIOCInit *req)
}
memset(&reply, 0, sizeof(reply));
- reply.WhoInit = s->who_init;
+ reply.WhoInit = req->WhoInit;
reply.MsgLength = sizeof(reply) / 4;
reply.Function = req->Function;
- reply.MaxDevices = s->max_devices;
- reply.MaxBuses = s->max_buses;
+ reply.MaxDevices = req->MaxDevices;
+ reply.MaxBuses = req->MaxBuses;
reply.MsgContext = req->MsgContext;
mptsas_fix_ioc_init_reply_endianness(&reply);
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index cb4af1b..b4782c6 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -74,7 +74,7 @@ struct SCSIDiskClass {
*/
DMAIOFunc *dma_readv;
DMAIOFunc *dma_writev;
- bool (*need_fua_emulation)(SCSICommand *cmd);
+ bool (*need_fua)(SCSICommand *cmd);
void (*update_sense)(SCSIRequest *r);
};
@@ -85,7 +85,7 @@ typedef struct SCSIDiskReq {
uint32_t sector_count;
uint32_t buflen;
bool started;
- bool need_fua_emulation;
+ bool need_fua;
struct iovec iov;
QEMUIOVector qiov;
BlockAcctCookie acct;
@@ -389,24 +389,6 @@ static bool scsi_is_cmd_fua(SCSICommand *cmd)
}
}
-static void scsi_write_do_fua(SCSIDiskReq *r)
-{
- SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
-
- assert(r->req.aiocb == NULL);
- assert(!r->req.io_canceled);
-
- if (r->need_fua_emulation) {
- block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
- BLOCK_ACCT_FLUSH);
- r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
- return;
- }
-
- scsi_req_complete(&r->req, GOOD);
- scsi_req_unref(&r->req);
-}
-
static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret)
{
assert(r->req.aiocb == NULL);
@@ -416,12 +398,7 @@ static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret)
r->sector += r->sector_count;
r->sector_count = 0;
- if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
- scsi_write_do_fua(r);
- return;
- } else {
- scsi_req_complete(&r->req, GOOD);
- }
+ scsi_req_complete(&r->req, GOOD);
done:
scsi_req_unref(&r->req);
@@ -564,7 +541,7 @@ static void scsi_read_data(SCSIRequest *req)
first = !r->started;
r->started = true;
- if (first && r->need_fua_emulation) {
+ if (first && r->need_fua) {
block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
BLOCK_ACCT_FLUSH);
r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read_cb, r);
@@ -589,8 +566,7 @@ static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
r->sector += n;
r->sector_count -= n;
if (r->sector_count == 0) {
- scsi_write_do_fua(r);
- return;
+ scsi_req_complete(&r->req, GOOD);
} else {
scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
trace_scsi_disk_write_complete_noio(r->req.tag, r->qiov.size);
@@ -623,6 +599,7 @@ static void scsi_write_data(SCSIRequest *req)
SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
+ BlockCompletionFunc *cb;
/* No data transfer may already be in progress */
assert(r->req.aiocb == NULL);
@@ -648,11 +625,10 @@ static void scsi_write_data(SCSIRequest *req)
if (r->req.cmd.buf[0] == VERIFY_10 || r->req.cmd.buf[0] == VERIFY_12 ||
r->req.cmd.buf[0] == VERIFY_16) {
- if (r->req.sg) {
- scsi_dma_complete_noio(r, 0);
- } else {
- scsi_write_complete_noio(r, 0);
- }
+ block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
+ BLOCK_ACCT_FLUSH);
+ cb = r->req.sg ? scsi_dma_complete : scsi_write_complete;
+ r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, cb, r);
return;
}
@@ -2391,7 +2367,7 @@ static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf)
scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
return 0;
}
- r->need_fua_emulation = sdc->need_fua_emulation(&r->req.cmd);
+ r->need_fua = sdc->need_fua(&r->req.cmd);
if (r->sector_count == 0) {
scsi_req_complete(&r->req, GOOD);
}
@@ -3137,7 +3113,8 @@ BlockAIOCB *scsi_dma_writev(int64_t offset, QEMUIOVector *iov,
{
SCSIDiskReq *r = opaque;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
- return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
+ int flags = r->need_fua ? BDRV_REQ_FUA : 0;
+ return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, flags, cb, cb_opaque);
}
static char *scsi_property_get_loadparm(Object *obj, Error **errp)
@@ -3186,7 +3163,7 @@ static void scsi_disk_base_class_initfn(ObjectClass *klass, const void *data)
device_class_set_legacy_reset(dc, scsi_disk_reset);
sdc->dma_readv = scsi_dma_readv;
sdc->dma_writev = scsi_dma_writev;
- sdc->need_fua_emulation = scsi_is_cmd_fua;
+ sdc->need_fua = scsi_is_cmd_fua;
}
static const TypeInfo scsi_disk_base_info = {
@@ -3215,7 +3192,7 @@ static const Property scsi_hd_properties[] = {
DEFINE_PROP_BIT("removable", SCSIDiskState, features,
SCSI_DISK_F_REMOVABLE, false),
DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
- SCSI_DISK_F_DPOFUA, false),
+ SCSI_DISK_F_DPOFUA, true),
DEFINE_PROP_UINT64("wwn", SCSIDiskState, qdev.wwn, 0),
DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, qdev.port_wwn, 0),
DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
@@ -3338,7 +3315,7 @@ static void scsi_block_class_initfn(ObjectClass *klass, const void *data)
sdc->dma_readv = scsi_block_dma_readv;
sdc->dma_writev = scsi_block_dma_writev;
sdc->update_sense = scsi_block_update_sense;
- sdc->need_fua_emulation = scsi_block_no_fua;
+ sdc->need_fua = scsi_block_no_fua;
dc->desc = "SCSI block device passthrough";
device_class_set_props(dc, scsi_block_properties);
dc->vmsd = &vmstate_scsi_disk_state;
diff --git a/hw/scsi/spapr_vscsi.c b/hw/scsi/spapr_vscsi.c
index 20f70fb..f0a7dd2 100644
--- a/hw/scsi/spapr_vscsi.c
+++ b/hw/scsi/spapr_vscsi.c
@@ -630,7 +630,7 @@ static void vscsi_save_request(QEMUFile *f, SCSIRequest *sreq)
vscsi_req *req = sreq->hba_private;
assert(req->active);
- vmstate_save_state(f, &vmstate_spapr_vscsi_req, req, NULL);
+ vmstate_save_state(f, &vmstate_spapr_vscsi_req, req, NULL, &error_fatal);
trace_spapr_vscsi_save_request(req->qtag, req->cur_desc_num,
req->cur_desc_offset);
@@ -642,15 +642,17 @@ static void *vscsi_load_request(QEMUFile *f, SCSIRequest *sreq)
VSCSIState *s = VIO_SPAPR_VSCSI_DEVICE(bus->qbus.parent);
vscsi_req *req;
int rc;
+ Error *local_err = NULL;
assert(sreq->tag < VSCSI_REQ_LIMIT);
req = &s->reqs[sreq->tag];
assert(!req->active);
memset(req, 0, sizeof(*req));
- rc = vmstate_load_state(f, &vmstate_spapr_vscsi_req, req, 1);
+ rc = vmstate_load_state(f, &vmstate_spapr_vscsi_req, req, 1, &local_err);
if (rc) {
fprintf(stderr, "VSCSI: failed loading request tag#%u\n", sreq->tag);
+ error_report_err(local_err);
return NULL;
}
assert(req->active);
diff --git a/hw/scsi/trace-events b/hw/scsi/trace-events
index f0f2a98..6c2788e 100644
--- a/hw/scsi/trace-events
+++ b/hw/scsi/trace-events
@@ -198,6 +198,7 @@ esp_mem_writeb_cmd_ensel(uint32_t val) "Enable selection (0x%2.2x)"
esp_mem_writeb_cmd_dissel(uint32_t val) "Disable selection (0x%2.2x)"
esp_mem_writeb_cmd_ti(uint32_t val) "Transfer Information (0x%2.2x)"
esp_set_phase(const char *phase) "setting bus phase to %s"
+esp_invalid_cmd(uint8_t cmd, uint8_t asc_mode) "command 0x%x asc_mode 0x%x"
# esp-pci.c
esp_pci_error_invalid_dma_direction(void) "invalid DMA transfer direction"
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 34ae14f..d817fc4 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -116,11 +116,7 @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req, QemuMutex *vq_lock)
}
virtqueue_push(vq, &req->elem, req->qsgl.size + req->resp_iov.size);
- if (s->dataplane_started && !s->dataplane_fenced) {
- virtio_notify_irqfd(vdev, vq);
- } else {
- virtio_notify(vdev, vq);
- }
+ virtio_notify(vdev, vq);
if (vq_lock) {
qemu_mutex_unlock(vq_lock);
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
index d5825b6..7c98b1b 100644
--- a/hw/scsi/vmw_pvscsi.c
+++ b/hw/scsi/vmw_pvscsi.c
@@ -68,18 +68,7 @@ struct PVSCSIClass {
OBJECT_DECLARE_TYPE(PVSCSIState, PVSCSIClass, PVSCSI)
-/* Compatibility flags for migration */
-#define PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT 0
-#define PVSCSI_COMPAT_OLD_PCI_CONFIGURATION \
- (1 << PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT)
-#define PVSCSI_COMPAT_DISABLE_PCIE_BIT 1
-#define PVSCSI_COMPAT_DISABLE_PCIE \
- (1 << PVSCSI_COMPAT_DISABLE_PCIE_BIT)
-
-#define PVSCSI_USE_OLD_PCI_CONFIGURATION(s) \
- ((s)->compat_flags & PVSCSI_COMPAT_OLD_PCI_CONFIGURATION)
-#define PVSCSI_MSI_OFFSET(s) \
- (PVSCSI_USE_OLD_PCI_CONFIGURATION(s) ? 0x50 : 0x7c)
+#define PVSCSI_MSI_OFFSET (0x7c)
#define PVSCSI_EXP_EP_OFFSET (0x40)
typedef struct PVSCSIRingInfo {
@@ -129,8 +118,6 @@ struct PVSCSIState {
uint8_t msi_used; /* For migration compatibility */
PVSCSIRingInfo rings; /* Data transfer rings manager */
uint32_t resetting; /* Reset in progress */
-
- uint32_t compat_flags;
};
typedef struct PVSCSIRequest {
@@ -1110,7 +1097,7 @@ pvscsi_init_msi(PVSCSIState *s)
int res;
PCIDevice *d = PCI_DEVICE(s);
- res = msi_init(d, PVSCSI_MSI_OFFSET(s), PVSCSI_MSIX_NUM_VECTORS,
+ res = msi_init(d, PVSCSI_MSI_OFFSET, PVSCSI_MSIX_NUM_VECTORS,
PVSCSI_USE_64BIT, PVSCSI_PER_VECTOR_MASK, NULL);
if (res < 0) {
trace_pvscsi_init_msi_fail(res);
@@ -1158,15 +1145,11 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp)
trace_pvscsi_state("init");
/* PCI subsystem ID, subsystem vendor ID, revision */
- if (PVSCSI_USE_OLD_PCI_CONFIGURATION(s)) {
- pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, 0x1000);
- } else {
- pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID,
- PCI_VENDOR_ID_VMWARE);
- pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID,
- PCI_DEVICE_ID_VMWARE_PVSCSI);
- pci_config_set_revision(pci_dev->config, 0x2);
- }
+ pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID,
+ PCI_VENDOR_ID_VMWARE);
+ pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID,
+ PCI_DEVICE_ID_VMWARE_PVSCSI);
+ pci_config_set_revision(pci_dev->config, 0x2);
/* PCI latency timer = 255 */
pci_dev->config[PCI_LATENCY_TIMER] = 0xff;
@@ -1234,21 +1217,8 @@ pvscsi_post_load(void *opaque, int version_id)
return 0;
}
-static bool pvscsi_vmstate_need_pcie_device(void *opaque)
-{
- PVSCSIState *s = PVSCSI(opaque);
-
- return !(s->compat_flags & PVSCSI_COMPAT_DISABLE_PCIE);
-}
-
-static bool pvscsi_vmstate_test_pci_device(void *opaque, int version_id)
-{
- return !pvscsi_vmstate_need_pcie_device(opaque);
-}
-
static const VMStateDescription vmstate_pvscsi_pcie_device = {
.name = "pvscsi/pcie",
- .needed = pvscsi_vmstate_need_pcie_device,
.fields = (const VMStateField[]) {
VMSTATE_PCI_DEVICE(parent_obj, PVSCSIState),
VMSTATE_END_OF_LIST()
@@ -1262,9 +1232,6 @@ static const VMStateDescription vmstate_pvscsi = {
.pre_save = pvscsi_pre_save,
.post_load = pvscsi_post_load,
.fields = (const VMStateField[]) {
- VMSTATE_STRUCT_TEST(parent_obj, PVSCSIState,
- pvscsi_vmstate_test_pci_device, 0,
- vmstate_pci_device, PCIDevice),
VMSTATE_UINT8(msi_used, PVSCSIState),
VMSTATE_UINT32(resetting, PVSCSIState),
VMSTATE_UINT64(reg_interrupt_status, PVSCSIState),
@@ -1298,30 +1265,17 @@ static const VMStateDescription vmstate_pvscsi = {
static const Property pvscsi_properties[] = {
DEFINE_PROP_UINT8("use_msg", PVSCSIState, use_msg, 1),
- DEFINE_PROP_BIT("x-old-pci-configuration", PVSCSIState, compat_flags,
- PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT, false),
- DEFINE_PROP_BIT("x-disable-pcie", PVSCSIState, compat_flags,
- PVSCSI_COMPAT_DISABLE_PCIE_BIT, false),
};
-static void pvscsi_realize(DeviceState *qdev, Error **errp)
+static void pvscsi_instance_init(Object *obj)
{
- PVSCSIClass *pvs_c = PVSCSI_GET_CLASS(qdev);
- PCIDevice *pci_dev = PCI_DEVICE(qdev);
- PVSCSIState *s = PVSCSI(qdev);
-
- if (!(s->compat_flags & PVSCSI_COMPAT_DISABLE_PCIE)) {
- pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
- }
-
- pvs_c->parent_dc_realize(qdev, errp);
+ PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS;
}
static void pvscsi_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
- PVSCSIClass *pvs_k = PVSCSI_CLASS(klass);
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
k->realize = pvscsi_realizefn;
@@ -1330,8 +1284,6 @@ static void pvscsi_class_init(ObjectClass *klass, const void *data)
k->device_id = PCI_DEVICE_ID_VMWARE_PVSCSI;
k->class_id = PCI_CLASS_STORAGE_SCSI;
k->subsystem_id = 0x1000;
- device_class_set_parent_realize(dc, pvscsi_realize,
- &pvs_k->parent_dc_realize);
device_class_set_legacy_reset(dc, pvscsi_reset);
dc->vmsd = &vmstate_pvscsi;
device_class_set_props(dc, pvscsi_properties);
@@ -1346,6 +1298,7 @@ static const TypeInfo pvscsi_info = {
.class_size = sizeof(PVSCSIClass),
.instance_size = sizeof(PVSCSIState),
.class_init = pvscsi_class_init,
+ .instance_init = pvscsi_instance_init,
.interfaces = (const InterfaceInfo[]) {
{ TYPE_HOTPLUG_HANDLER },
{ INTERFACE_PCIE_DEVICE },
diff --git a/hw/scsi/vmw_pvscsi.h b/hw/scsi/vmw_pvscsi.h
index 17fcf66..a3ae517 100644
--- a/hw/scsi/vmw_pvscsi.h
+++ b/hw/scsi/vmw_pvscsi.h
@@ -14,8 +14,8 @@
* details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ * along with this program; if not, see
+ * <https://www.gnu.org/licenses/>.
*
* Maintained by: Arvind Kumar <arvindkumar@vmware.com>
*
diff --git a/hw/sd/allwinner-sdhost.c b/hw/sd/allwinner-sdhost.c
index b31da5c..9d61b37 100644
--- a/hw/sd/allwinner-sdhost.c
+++ b/hw/sd/allwinner-sdhost.c
@@ -233,7 +233,7 @@ static void allwinner_sdhost_send_command(AwSdHostState *s)
{
SDRequest request;
uint8_t resp[16];
- int rlen;
+ size_t rlen;
/* Auto clear load flag */
s->command &= ~SD_CMDR_LOAD;
@@ -246,10 +246,7 @@ static void allwinner_sdhost_send_command(AwSdHostState *s)
request.arg = s->command_arg;
/* Send request to SD bus */
- rlen = sdbus_do_command(&s->sdbus, &request, resp);
- if (rlen < 0) {
- goto error;
- }
+ rlen = sdbus_do_command(&s->sdbus, &request, resp, sizeof(resp));
/* If the command has a response, store it in the response registers */
if ((s->command & SD_CMDR_RESPONSE)) {
diff --git a/hw/sd/bcm2835_sdhost.c b/hw/sd/bcm2835_sdhost.c
index 29debdf..f7cef7b 100644
--- a/hw/sd/bcm2835_sdhost.c
+++ b/hw/sd/bcm2835_sdhost.c
@@ -113,15 +113,12 @@ static void bcm2835_sdhost_send_command(BCM2835SDHostState *s)
{
SDRequest request;
uint8_t rsp[16];
- int rlen;
+ size_t rlen;
request.cmd = s->cmd & SDCMD_CMD_MASK;
request.arg = s->cmdarg;
- rlen = sdbus_do_command(&s->sdbus, &request, rsp);
- if (rlen < 0) {
- goto error;
- }
+ rlen = sdbus_do_command(&s->sdbus, &request, rsp, sizeof(rsp));
if (!(s->cmd & SDCMD_NO_RESPONSE)) {
if (rlen == 0 || (rlen == 4 && (s->cmd & SDCMD_LONG_RESPONSE))) {
goto error;
diff --git a/hw/sd/core.c b/hw/sd/core.c
index 4b30218..d3c9017 100644
--- a/hw/sd/core.c
+++ b/hw/sd/core.c
@@ -90,7 +90,8 @@ void sdbus_set_voltage(SDBus *sdbus, uint16_t millivolts)
}
}
-int sdbus_do_command(SDBus *sdbus, SDRequest *req, uint8_t *response)
+size_t sdbus_do_command(SDBus *sdbus, SDRequest *req,
+ uint8_t *resp, size_t respsz)
{
SDState *card = get_card(sdbus);
@@ -98,7 +99,7 @@ int sdbus_do_command(SDBus *sdbus, SDRequest *req, uint8_t *response)
if (card) {
SDCardClass *sc = SDMMC_COMMON_GET_CLASS(card);
- return sc->do_command(card, req, response);
+ return sc->do_command(card, req, resp, respsz);
}
return 0;
diff --git a/hw/sd/omap_mmc.c b/hw/sd/omap_mmc.c
index b7648d4..5a1d25d 100644
--- a/hw/sd/omap_mmc.c
+++ b/hw/sd/omap_mmc.c
@@ -130,7 +130,8 @@ static void omap_mmc_command(OMAPMMCState *host, int cmd, int dir,
sd_rsp_type_t resptype, int init)
{
uint32_t rspstatus, mask;
- int rsplen, timeout;
+ size_t rsplen;
+ int timeout;
SDRequest request;
uint8_t response[16];
@@ -157,7 +158,7 @@ static void omap_mmc_command(OMAPMMCState *host, int cmd, int dir,
request.arg = host->arg;
request.crc = 0; /* FIXME */
- rsplen = sdbus_do_command(&host->sdbus, &request, response);
+ rsplen = sdbus_do_command(&host->sdbus, &request, response, sizeof(response));
/* TODO: validate CRCs */
switch (resptype) {
diff --git a/hw/sd/pl181.c b/hw/sd/pl181.c
index b8fc9f8..5d56ead 100644
--- a/hw/sd/pl181.c
+++ b/hw/sd/pl181.c
@@ -173,14 +173,12 @@ static void pl181_do_command(PL181State *s)
{
SDRequest request;
uint8_t response[16];
- int rlen;
+ size_t rlen;
request.cmd = s->cmd & PL181_CMD_INDEX;
request.arg = s->cmdarg;
trace_pl181_command_send(request.cmd, request.arg);
- rlen = sdbus_do_command(&s->sdbus, &request, response);
- if (rlen < 0)
- goto error;
+ rlen = sdbus_do_command(&s->sdbus, &request, response, sizeof(response));
if (s->cmd & PL181_CMD_RESPONSE) {
if (rlen == 0 || (rlen == 4 && (s->cmd & PL181_CMD_LONGRESP)))
goto error;
diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index c275fdd..d7a496d 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -61,6 +61,7 @@
typedef enum {
sd_r0 = 0, /* no response */
sd_r1, /* normal response command */
+ spi_r2, /* STATUS */
sd_r2_i, /* CID register */
sd_r2_s, /* CSD register */
sd_r3, /* OCR register */
@@ -146,7 +147,6 @@ struct SDState {
/* Runtime changeables */
- uint32_t mode; /* current card mode, one of SDCardModes */
int32_t state; /* current card state, one of SDCardStates */
uint32_t vhs;
bool wp_switch;
@@ -195,7 +195,6 @@ static bool sd_is_emmc(SDState *sd)
static const char *sd_version_str(enum SDPhySpecificationVersion version)
{
static const char *sdphy_version[] = {
- [SD_PHY_SPECv1_10_VERS] = "v1.10",
[SD_PHY_SPECv2_00_VERS] = "v2.00",
[SD_PHY_SPECv3_01_VERS] = "v3.01",
};
@@ -247,6 +246,7 @@ static const char *sd_response_name(sd_rsp_type_t rsp)
static const char *response_name[] = {
[sd_r0] = "RESP#0 (no response)",
[sd_r1] = "RESP#1 (normal cmd)",
+ [spi_r2] = "RESP#2 (STATUS reg)",
[sd_r2_i] = "RESP#2 (CID reg)",
[sd_r2_s] = "RESP#2 (CSD reg)",
[sd_r3] = "RESP#3 (OCR reg)",
@@ -313,27 +313,24 @@ static void sd_set_voltage(SDState *sd, uint16_t millivolts)
}
}
-static void sd_set_mode(SDState *sd)
+static enum SDCardModes sd_mode(SDState *sd)
{
switch (sd->state) {
case sd_inactive_state:
- sd->mode = sd_inactive;
- break;
-
+ return sd_inactive;
case sd_idle_state:
case sd_ready_state:
case sd_identification_state:
- sd->mode = sd_card_identification_mode;
- break;
-
+ return sd_card_identification_mode;
case sd_standby_state:
case sd_transfer_state:
case sd_sendingdata_state:
case sd_receivingdata_state:
case sd_programming_state:
case sd_disconnect_state:
- sd->mode = sd_data_transfer_mode;
- break;
+ return sd_data_transfer_mode;
+ default:
+ g_assert_not_reached();
}
}
@@ -409,11 +406,7 @@ static void sd_set_ocr(SDState *sd)
static void sd_set_scr(SDState *sd)
{
sd->scr[0] = 0 << 4; /* SCR structure version 1.0 */
- if (sd->spec_version == SD_PHY_SPECv1_10_VERS) {
- sd->scr[0] |= 1; /* Spec Version 1.10 */
- } else {
- sd->scr[0] |= 2; /* Spec Version 2.00 or Version 3.0X */
- }
+ sd->scr[0] |= 2; /* Spec Version 2.00 or Version 3.0X */
sd->scr[1] = (2 << 4) /* SDSC Card (Security Version 1.01) */
| 0b0101; /* 1-bit or 4-bit width bus modes */
sd->scr[2] = 0x00; /* Extended Security is not supported. */
@@ -729,16 +722,82 @@ static int sd_req_crc_validate(SDRequest *req)
return sd_crc7(buffer, 5) != req->crc; /* TODO */
}
+static size_t sd_response_size(SDState *sd, sd_rsp_type_t rtype)
+{
+ switch (rtype) {
+ case sd_r1:
+ case sd_r1b:
+ return sd_is_spi(sd) ? 1 : 4;
+
+ case spi_r2:
+ assert(sd_is_spi(sd));
+ return 2;
+
+ case sd_r2_i:
+ case sd_r2_s:
+ assert(!sd_is_spi(sd));
+ return 16;
+
+ case sd_r3:
+ case sd_r7:
+ return sd_is_spi(sd) ? 5 : 4;
+
+ case sd_r6:
+ assert(!sd_is_spi(sd));
+ return 4;
+
+ case sd_r0:
+ case sd_illegal:
+ return sd_is_spi(sd) ? 1 : 0;
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
static void sd_response_r1_make(SDState *sd, uint8_t *response)
{
- stl_be_p(response, sd->card_status);
+ if (sd_is_spi(sd)) {
+ response[0] = sd->state == sd_idle_state
+ && !FIELD_EX32(sd->ocr, OCR, CARD_POWER_UP);
+ response[0] |= FIELD_EX32(sd->card_status, CSR, ERASE_RESET) << 1;
+ response[0] |= FIELD_EX32(sd->card_status, CSR, ILLEGAL_COMMAND) << 2;
+ response[0] |= FIELD_EX32(sd->card_status, CSR, COM_CRC_ERROR) << 3;
+ response[0] |= FIELD_EX32(sd->card_status, CSR, ERASE_SEQ_ERROR) << 4;
+ response[0] |= FIELD_EX32(sd->card_status, CSR, ADDRESS_ERROR) << 5;
+ response[0] |= FIELD_EX32(sd->card_status, CSR, BLOCK_LEN_ERROR) << 6;
+ response[0] |= 0 << 7;
+ } else {
+ stl_be_p(response, sd->card_status);
+ }
/* Clear the "clear on read" status bits */
sd->card_status &= ~CARD_STATUS_C;
}
+static void spi_response_r2_make(SDState *sd, uint8_t *resp)
+{
+ /* Prepend R1 */
+ sd_response_r1_make(sd, resp);
+
+ resp[1] = FIELD_EX32(sd->card_status, CSR, CARD_IS_LOCKED) << 0;
+ resp[1] |= (FIELD_EX32(sd->card_status, CSR, LOCK_UNLOCK_FAILED)
+ || FIELD_EX32(sd->card_status, CSR, WP_ERASE_SKIP)) << 1;
+ resp[1] |= FIELD_EX32(sd->card_status, CSR, ERROR) << 2;
+ resp[1] |= FIELD_EX32(sd->card_status, CSR, CC_ERROR) << 3;
+ resp[1] |= FIELD_EX32(sd->card_status, CSR, CARD_ECC_FAILED) << 4;
+ resp[1] |= FIELD_EX32(sd->card_status, CSR, WP_VIOLATION) << 5;
+ resp[1] |= FIELD_EX32(sd->card_status, CSR, ERASE_PARAM) << 6;
+ resp[1] |= FIELD_EX32(sd->card_status, CSR, OUT_OF_RANGE) << 7;
+}
+
static void sd_response_r3_make(SDState *sd, uint8_t *response)
{
+ if (sd_is_spi(sd)) {
+ /* Prepend R1 */
+ sd_response_r1_make(sd, response);
+ response++;
+ }
stl_be_p(response, sd->ocr & ACMD41_R3_MASK);
}
@@ -756,6 +815,11 @@ static void sd_response_r6_make(SDState *sd, uint8_t *response)
static void sd_response_r7_make(SDState *sd, uint8_t *response)
{
+ if (sd_is_spi(sd)) {
+ /* Prepend R1 */
+ sd_response_r1_make(sd, response);
+ response++;
+ }
stl_be_p(response, sd->vhs);
}
@@ -769,14 +833,14 @@ static uint32_t sd_blk_len(SDState *sd)
/*
* This requires a disk image that has two boot partitions inserted at the
- * beginning of it. The size of the boot partitions is the "boot-size"
- * property.
+ * beginning of it, followed by an RPMB partition. The size of the boot
+ * partitions is the "boot-partition-size" property.
*/
-static uint32_t sd_bootpart_offset(SDState *sd)
+static uint32_t sd_part_offset(SDState *sd)
{
unsigned partition_access;
- if (!sd->boot_part_size || !sd_is_emmc(sd)) {
+ if (!sd_is_emmc(sd)) {
return 0;
}
@@ -785,9 +849,9 @@ static uint32_t sd_bootpart_offset(SDState *sd)
switch (partition_access) {
case EXT_CSD_PART_CONFIG_ACC_DEFAULT:
return sd->boot_part_size * 2;
- case EXT_CSD_PART_CONFIG_ACC_BOOT0:
+ case EXT_CSD_PART_CONFIG_ACC_BOOT1:
return 0;
- case EXT_CSD_PART_CONFIG_ACC_BOOT0 + 1:
+ case EXT_CSD_PART_CONFIG_ACC_BOOT2:
return sd->boot_part_size * 1;
default:
g_assert_not_reached();
@@ -952,7 +1016,7 @@ static const VMStateDescription sd_vmstate = {
.minimum_version_id = 2,
.pre_load = sd_vmstate_pre_load,
.fields = (const VMStateField[]) {
- VMSTATE_UINT32(mode, SDState),
+ VMSTATE_UNUSED(4),
VMSTATE_INT32(state, SDState),
VMSTATE_UINT8_ARRAY(cid, SDState, 16),
VMSTATE_UINT8_ARRAY(csd, SDState, 16),
@@ -988,7 +1052,7 @@ static const VMStateDescription sd_vmstate = {
static void sd_blk_read(SDState *sd, uint64_t addr, uint32_t len)
{
trace_sdcard_read_block(addr, len);
- addr += sd_bootpart_offset(sd);
+ addr += sd_part_offset(sd);
if (!sd->blk || blk_pread(sd->blk, addr, len, sd->data, 0) < 0) {
fprintf(stderr, "sd_blk_read: read error on host side\n");
}
@@ -997,7 +1061,7 @@ static void sd_blk_read(SDState *sd, uint64_t addr, uint32_t len)
static void sd_blk_write(SDState *sd, uint64_t addr, uint32_t len)
{
trace_sdcard_write_block(addr, len);
- addr += sd_bootpart_offset(sd);
+ addr += sd_part_offset(sd);
if (!sd->blk || blk_pwrite(sd->blk, addr, len, sd->data, 0) < 0) {
fprintf(stderr, "sd_blk_write: write error on host side\n");
}
@@ -1252,7 +1316,7 @@ static sd_rsp_type_t sd_invalid_state_for_cmd(SDState *sd, SDRequest req)
static sd_rsp_type_t sd_invalid_mode_for_cmd(SDState *sd, SDRequest req)
{
qemu_log_mask(LOG_GUEST_ERROR, "%s: CMD%i in a wrong mode: %s (spec %s)\n",
- sd->proto->name, req.cmd, sd_mode_name(sd->mode),
+ sd->proto->name, req.cmd, sd_mode_name(sd_mode(sd)),
sd_version_str(sd->spec_version));
return sd_illegal;
@@ -1305,7 +1369,7 @@ static sd_rsp_type_t sd_cmd_to_sendingdata(SDState *sd, SDRequest req,
const void *data, size_t size)
{
if (sd->state != sd_transfer_state) {
- sd_invalid_state_for_cmd(sd, req);
+ return sd_invalid_state_for_cmd(sd, req);
}
sd->state = sd_sendingdata_state;
@@ -1341,14 +1405,6 @@ static sd_rsp_type_t sd_cmd_GO_IDLE_STATE(SDState *sd, SDRequest req)
return sd_is_spi(sd) ? sd_r1 : sd_r0;
}
-/* CMD1 */
-static sd_rsp_type_t spi_cmd_SEND_OP_COND(SDState *sd, SDRequest req)
-{
- sd->state = sd_transfer_state;
-
- return sd_r1;
-}
-
/* CMD2 */
static sd_rsp_type_t sd_cmd_ALL_SEND_CID(SDState *sd, SDRequest req)
{
@@ -1420,11 +1476,17 @@ static sd_rsp_type_t emmc_cmd_sleep_awake(SDState *sd, SDRequest req)
/* CMD6 */
static sd_rsp_type_t sd_cmd_SWITCH_FUNCTION(SDState *sd, SDRequest req)
{
- if (sd->mode != sd_data_transfer_mode) {
+ if (sd_mode(sd) != sd_data_transfer_mode) {
return sd_invalid_mode_for_cmd(sd, req);
}
- if (sd->state != sd_transfer_state) {
- return sd_invalid_state_for_cmd(sd, req);
+ if (sd_is_spi(sd)) {
+ if (sd->state == sd_idle_state) {
+ return sd_invalid_state_for_cmd(sd, req);
+ }
+ } else {
+ if (sd->state != sd_transfer_state) {
+ return sd_invalid_state_for_cmd(sd, req);
+ }
}
sd_function_switch(sd, req.arg);
@@ -1488,9 +1550,6 @@ static sd_rsp_type_t sd_cmd_DE_SELECT_CARD(SDState *sd, SDRequest req)
/* CMD8 */
static sd_rsp_type_t sd_cmd_SEND_IF_COND(SDState *sd, SDRequest req)
{
- if (sd->spec_version < SD_PHY_SPECv2_00_VERS) {
- return sd_cmd_illegal(sd, req);
- }
if (sd->state != sd_idle_state) {
return sd_invalid_state_for_cmd(sd, req);
}
@@ -1517,14 +1576,30 @@ static sd_rsp_type_t emmc_cmd_SEND_EXT_CSD(SDState *sd, SDRequest req)
sd->ext_csd, sizeof(sd->ext_csd));
}
-/* CMD9 */
-static sd_rsp_type_t spi_cmd_SEND_CSD(SDState *sd, SDRequest req)
+static sd_rsp_type_t spi_cmd_SEND_CxD(SDState *sd, SDRequest req,
+ const void *data, size_t size)
{
+ /*
+ * XXX as of v10.1.0-rc1 command is reached in sd_idle_state,
+ * so disable this check.
if (sd->state != sd_standby_state) {
return sd_invalid_state_for_cmd(sd, req);
}
- return sd_cmd_to_sendingdata(sd, req, sd_req_get_address(sd, req),
- sd->csd, 16);
+ */
+
+ /*
+ * Since SPI returns CSD and CID on the DAT lines,
+ * switch to sd_transfer_state.
+ */
+ sd->state = sd_transfer_state;
+
+ return sd_cmd_to_sendingdata(sd, req, 0, data, size);
+}
+
+/* CMD9 */
+static sd_rsp_type_t spi_cmd_SEND_CSD(SDState *sd, SDRequest req)
+{
+ return spi_cmd_SEND_CxD(sd, req, sd->csd, sizeof(sd->csd));
}
static sd_rsp_type_t sd_cmd_SEND_CSD(SDState *sd, SDRequest req)
@@ -1539,11 +1614,7 @@ static sd_rsp_type_t sd_cmd_SEND_CSD(SDState *sd, SDRequest req)
/* CMD10 */
static sd_rsp_type_t spi_cmd_SEND_CID(SDState *sd, SDRequest req)
{
- if (sd->state != sd_standby_state) {
- return sd_invalid_state_for_cmd(sd, req);
- }
- return sd_cmd_to_sendingdata(sd, req, sd_req_get_address(sd, req),
- sd->cid, 16);
+ return spi_cmd_SEND_CxD(sd, req, sd->cid, sizeof(sd->cid));
}
static sd_rsp_type_t sd_cmd_SEND_CID(SDState *sd, SDRequest req)
@@ -1575,7 +1646,7 @@ static sd_rsp_type_t sd_cmd_STOP_TRANSMISSION(SDState *sd, SDRequest req)
/* CMD13 */
static sd_rsp_type_t sd_cmd_SEND_STATUS(SDState *sd, SDRequest req)
{
- if (sd->mode != sd_data_transfer_mode) {
+ if (sd_mode(sd) != sd_data_transfer_mode) {
return sd_invalid_mode_for_cmd(sd, req);
}
@@ -1592,7 +1663,7 @@ static sd_rsp_type_t sd_cmd_SEND_STATUS(SDState *sd, SDRequest req)
}
if (sd_is_spi(sd)) {
- return sd_r2_s;
+ return spi_r2;
}
return sd_req_rca_same(sd, req) ? sd_r1 : sd_r0;
@@ -1601,7 +1672,7 @@ static sd_rsp_type_t sd_cmd_SEND_STATUS(SDState *sd, SDRequest req)
/* CMD15 */
static sd_rsp_type_t sd_cmd_GO_INACTIVE_STATE(SDState *sd, SDRequest req)
{
- if (sd->mode != sd_data_transfer_mode) {
+ if (sd_mode(sd) != sd_data_transfer_mode) {
return sd_invalid_mode_for_cmd(sd, req);
}
switch (sd->state) {
@@ -1906,8 +1977,14 @@ static sd_rsp_type_t sd_acmd_SET_BUS_WIDTH(SDState *sd, SDRequest req)
/* ACMD13 */
static sd_rsp_type_t sd_acmd_SD_STATUS(SDState *sd, SDRequest req)
{
- return sd_cmd_to_sendingdata(sd, req, 0,
- sd->sd_status, sizeof(sd->sd_status));
+ sd_rsp_type_t rsp;
+
+ rsp = sd_cmd_to_sendingdata(sd, req, 0,
+ sd->sd_status, sizeof(sd->sd_status));
+ if (sd_is_spi(sd) && rsp != sd_illegal) {
+ return spi_r2;
+ }
+ return rsp;
}
/* ACMD22 */
@@ -1967,6 +2044,9 @@ static sd_rsp_type_t sd_cmd_SEND_OP_COND(SDState *sd, SDRequest req)
sd->state = sd_ready_state;
}
+ if (sd_is_spi(sd)) {
+ return sd_r1;
+ }
return sd_r3;
}
@@ -1998,7 +2078,9 @@ static sd_rsp_type_t sd_normal_command(SDState *sd, SDRequest req)
if (req.cmd != 55 || sd->expecting_acmd) {
trace_sdcard_normal_command(sd->proto->name,
sd->last_cmd_name, req.cmd,
- req.arg, sd_state_name(sd->state));
+ req.arg,
+ sd_mode_name(sd_mode(sd)),
+ sd_state_name(sd->state));
}
/* Not interpreting this as an app command */
@@ -2084,7 +2166,9 @@ static sd_rsp_type_t sd_app_command(SDState *sd,
{
sd->last_cmd_name = sd_acmd_name(sd, req.cmd);
trace_sdcard_app_command(sd->proto->name, sd->last_cmd_name,
- req.cmd, req.arg, sd_state_name(sd->state));
+ req.cmd, req.arg,
+ sd_mode_name(sd_mode(sd)),
+ sd_state_name(sd->state));
sd->card_status |= APP_CMD;
if (sd->proto->acmd[req.cmd].handler) {
@@ -2139,8 +2223,9 @@ static bool cmd_valid_while_locked(SDState *sd, unsigned cmd)
return cmd_class == 0 || cmd_class == 7;
}
-static int sd_do_command(SDState *sd, SDRequest *req,
- uint8_t *response) {
+static size_t sd_do_command(SDState *sd, SDRequest *req,
+ uint8_t *response, size_t respsz)
+{
int last_state;
sd_rsp_type_t rtype;
int rsplen;
@@ -2183,7 +2268,6 @@ static int sd_do_command(SDState *sd, SDRequest *req,
}
last_state = sd->state;
- sd_set_mode(sd);
if (sd->expecting_acmd) {
sd->expecting_acmd = false;
@@ -2203,36 +2287,37 @@ static int sd_do_command(SDState *sd, SDRequest *req,
}
send_response:
+ rsplen = sd_response_size(sd, rtype);
+ assert(rsplen <= respsz);
+
switch (rtype) {
case sd_r1:
case sd_r1b:
sd_response_r1_make(sd, response);
- rsplen = 4;
+ break;
+
+ case spi_r2:
+ spi_response_r2_make(sd, response);
break;
case sd_r2_i:
memcpy(response, sd->cid, sizeof(sd->cid));
- rsplen = 16;
break;
case sd_r2_s:
memcpy(response, sd->csd, sizeof(sd->csd));
- rsplen = 16;
break;
case sd_r3:
sd_response_r3_make(sd, response);
- rsplen = 4;
break;
case sd_r6:
sd_response_r6_make(sd, response);
- rsplen = 4;
break;
case sd_r7:
sd_response_r7_make(sd, response);
- rsplen = 4;
break;
case sd_r0:
@@ -2244,7 +2329,6 @@ send_response:
sd->data_offset = 0;
/* fall-through */
case sd_illegal:
- rsplen = 0;
break;
default:
g_assert_not_reached();
@@ -2510,7 +2594,7 @@ static const SDProto sd_proto_spi = {
.name = "SPI",
.cmd = {
[0] = {0, sd_spi, "GO_IDLE_STATE", sd_cmd_GO_IDLE_STATE},
- [1] = {0, sd_spi, "SEND_OP_COND", spi_cmd_SEND_OP_COND},
+ [1] = {0, sd_spi, "SEND_OP_COND", sd_cmd_SEND_OP_COND},
[5] = {9, sd_spi, "IO_SEND_OP_COND", sd_cmd_optional},
[6] = {10, sd_spi, "SWITCH_FUNCTION", sd_cmd_SWITCH_FUNCTION},
[8] = {0, sd_spi, "SEND_IF_COND", sd_cmd_SEND_IF_COND},
@@ -2546,7 +2630,7 @@ static const SDProto sd_proto_spi = {
[13] = {8, sd_spi, "SD_STATUS", sd_acmd_SD_STATUS},
[22] = {8, sd_spi, "SEND_NUM_WR_BLOCKS", sd_acmd_SEND_NUM_WR_BLOCKS},
[23] = {8, sd_spi, "SET_WR_BLK_ERASE_COUNT", sd_acmd_SET_WR_BLK_ERASE_COUNT},
- [41] = {8, sd_spi, "SEND_OP_COND", spi_cmd_SEND_OP_COND},
+ [41] = {8, sd_spi, "SEND_OP_COND", sd_cmd_SEND_OP_COND},
[42] = {8, sd_spi, "SET_CLR_CARD_DETECT", sd_acmd_SET_CLR_CARD_DETECT},
[51] = {8, sd_spi, "SEND_SCR", sd_acmd_SEND_SCR},
},
@@ -2681,7 +2765,7 @@ static void sd_realize(DeviceState *dev, Error **errp)
int ret;
switch (sd->spec_version) {
- case SD_PHY_SPECv1_10_VERS
+ case SD_PHY_SPECv2_00_VERS
... SD_PHY_SPECv3_01_VERS:
break;
default:
@@ -2726,6 +2810,15 @@ static void sd_realize(DeviceState *dev, Error **errp)
}
blk_set_dev_ops(sd->blk, &sd_block_ops, sd);
}
+ if (sd->boot_part_size % (128 * KiB) ||
+ sd->boot_part_size > 255 * 128 * KiB) {
+ g_autofree char *size_str = size_to_str(sd->boot_part_size);
+
+ error_setg(errp, "Invalid boot partition size: %s", size_str);
+ error_append_hint(errp,
+ "The boot partition size must be multiples of 128K"
+ "and not larger than 32640K.\n");
+ }
}
static void emmc_realize(DeviceState *dev, Error **errp)
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index 226ff13..89b595c 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -337,7 +337,7 @@ static void sdhci_send_command(SDHCIState *s)
{
SDRequest request;
uint8_t response[16];
- int rlen;
+ size_t rlen;
bool timeout = false;
s->errintsts = 0;
@@ -346,7 +346,7 @@ static void sdhci_send_command(SDHCIState *s)
request.arg = s->argument;
trace_sdhci_send_command(request.cmd, request.arg);
- rlen = sdbus_do_command(&s->sdbus, &request, response);
+ rlen = sdbus_do_command(&s->sdbus, &request, response, sizeof(response));
if (s->cmdreg & SDHC_CMD_RESPONSE) {
if (rlen == 4) {
@@ -400,7 +400,7 @@ static void sdhci_end_transfer(SDHCIState *s)
request.cmd = 0x0C;
request.arg = 0;
trace_sdhci_end_transfer(request.cmd, request.arg);
- sdbus_do_command(&s->sdbus, &request, response);
+ sdbus_do_command(&s->sdbus, &request, response, sizeof(response));
/* Auto CMD12 response goes to the upper Response register */
s->rspreg[3] = ldl_be_p(response);
}
@@ -1578,10 +1578,6 @@ static void sdhci_sysbus_finalize(Object *obj)
{
SDHCIState *s = SYSBUS_SDHCI(obj);
- if (s->dma_mr) {
- object_unparent(OBJECT(s->dma_mr));
- }
-
sdhci_uninitfn(s);
}
diff --git a/hw/sd/sdmmc-internal.h b/hw/sd/sdmmc-internal.h
index 91eb5b6..ce6bc4e 100644
--- a/hw/sd/sdmmc-internal.h
+++ b/hw/sd/sdmmc-internal.h
@@ -116,7 +116,8 @@ DECLARE_OBJ_CHECKERS(SDState, SDCardClass, SDMMC_COMMON, TYPE_SDMMC_COMMON)
#define EXT_CSD_PART_CONFIG_ACC_MASK (0x7)
#define EXT_CSD_PART_CONFIG_ACC_DEFAULT (0x0)
-#define EXT_CSD_PART_CONFIG_ACC_BOOT0 (0x1)
+#define EXT_CSD_PART_CONFIG_ACC_BOOT1 (0x1)
+#define EXT_CSD_PART_CONFIG_ACC_BOOT2 (0x2)
#define EXT_CSD_PART_CONFIG_EN_MASK (0x7 << 3)
#define EXT_CSD_PART_CONFIG_EN_BOOT0 (0x1 << 3)
diff --git a/hw/sd/ssi-sd.c b/hw/sd/ssi-sd.c
index 6c90a86..3aacbd0 100644
--- a/hw/sd/ssi-sd.c
+++ b/hw/sd/ssi-sd.c
@@ -70,23 +70,6 @@ struct ssi_sd_state {
#define TYPE_SSI_SD "ssi-sd"
OBJECT_DECLARE_SIMPLE_TYPE(ssi_sd_state, SSI_SD)
-/* State word bits. */
-#define SSI_SDR_LOCKED 0x0001
-#define SSI_SDR_WP_ERASE 0x0002
-#define SSI_SDR_ERROR 0x0004
-#define SSI_SDR_CC_ERROR 0x0008
-#define SSI_SDR_ECC_FAILED 0x0010
-#define SSI_SDR_WP_VIOLATION 0x0020
-#define SSI_SDR_ERASE_PARAM 0x0040
-#define SSI_SDR_OUT_OF_RANGE 0x0080
-#define SSI_SDR_IDLE 0x0100
-#define SSI_SDR_ERASE_RESET 0x0200
-#define SSI_SDR_ILLEGAL_COMMAND 0x0400
-#define SSI_SDR_COM_CRC_ERROR 0x0800
-#define SSI_SDR_ERASE_SEQ_ERROR 0x1000
-#define SSI_SDR_ADDRESS_ERROR 0x2000
-#define SSI_SDR_PARAMETER_ERROR 0x4000
-
/* multiple block write */
#define SSI_TOKEN_MULTI_WRITE 0xfc
/* terminate multiple block write */
@@ -104,7 +87,11 @@ static uint32_t ssi_sd_transfer(SSIPeripheral *dev, uint32_t val)
{
ssi_sd_state *s = SSI_SD(dev);
SDRequest request;
- uint8_t longresp[16];
+ uint8_t longresp[5];
+
+ if (!sdbus_get_inserted(&s->sdbus)) {
+ return SSI_DUMMY;
+ }
/*
* Special case: allow CMD12 (STOP TRANSMISSION) while reading data.
@@ -146,8 +133,9 @@ static uint32_t ssi_sd_transfer(SSIPeripheral *dev, uint32_t val)
/* manually issue cmd12 to stop the transfer */
request.cmd = 12;
request.arg = 0;
- s->arglen = sdbus_do_command(&s->sdbus, &request, longresp);
- if (s->arglen <= 0) {
+ s->arglen = sdbus_do_command(&s->sdbus, &request,
+ longresp, sizeof(longresp));
+ if (s->arglen == 0) {
s->arglen = 1;
/* a zero value indicates the card is busy */
s->response[0] = 0;
@@ -170,73 +158,15 @@ static uint32_t ssi_sd_transfer(SSIPeripheral *dev, uint32_t val)
/* FIXME: Check CRC. */
request.cmd = s->cmd;
request.arg = ldl_be_p(s->cmdarg);
- DPRINTF("CMD%d arg 0x%08x\n", s->cmd, request.arg);
- s->arglen = sdbus_do_command(&s->sdbus, &request, longresp);
- if (s->arglen <= 0) {
- s->arglen = 1;
- s->response[0] = 4;
- DPRINTF("SD command failed\n");
- } else if (s->cmd == 8 || s->cmd == 58) {
- /* CMD8/CMD58 returns R3/R7 response */
- DPRINTF("Returned R3/R7\n");
- s->arglen = 5;
- s->response[0] = 1;
- memcpy(&s->response[1], longresp, 4);
- } else if (s->arglen != 4) {
- BADF("Unexpected response to cmd %d\n", s->cmd);
- /* Illegal command is about as near as we can get. */
- s->arglen = 1;
- s->response[0] = 4;
- } else {
- /* All other commands return status. */
- uint32_t cardstatus;
- uint16_t status;
- /* CMD13 returns a 2-byte statuse work. Other commands
- only return the first byte. */
- s->arglen = (s->cmd == 13) ? 2 : 1;
-
- /* handle R1b */
- if (s->cmd == 28 || s->cmd == 29 || s->cmd == 38) {
- s->stopping = 1;
- }
+ s->arglen = sdbus_do_command(&s->sdbus, &request,
+ longresp, sizeof(longresp));
+ DPRINTF("CMD%d arg 0x%08x = %d\n", s->cmd, request.arg, s->arglen);
+ assert(s->arglen > 0);
+ memcpy(s->response, longresp, s->arglen);
- cardstatus = ldl_be_p(longresp);
- status = 0;
- if (((cardstatus >> 9) & 0xf) < 4)
- status |= SSI_SDR_IDLE;
- if (cardstatus & ERASE_RESET)
- status |= SSI_SDR_ERASE_RESET;
- if (cardstatus & ILLEGAL_COMMAND)
- status |= SSI_SDR_ILLEGAL_COMMAND;
- if (cardstatus & COM_CRC_ERROR)
- status |= SSI_SDR_COM_CRC_ERROR;
- if (cardstatus & ERASE_SEQ_ERROR)
- status |= SSI_SDR_ERASE_SEQ_ERROR;
- if (cardstatus & ADDRESS_ERROR)
- status |= SSI_SDR_ADDRESS_ERROR;
- if (cardstatus & CARD_IS_LOCKED)
- status |= SSI_SDR_LOCKED;
- if (cardstatus & (LOCK_UNLOCK_FAILED | WP_ERASE_SKIP))
- status |= SSI_SDR_WP_ERASE;
- if (cardstatus & SD_ERROR)
- status |= SSI_SDR_ERROR;
- if (cardstatus & CC_ERROR)
- status |= SSI_SDR_CC_ERROR;
- if (cardstatus & CARD_ECC_FAILED)
- status |= SSI_SDR_ECC_FAILED;
- if (cardstatus & WP_VIOLATION)
- status |= SSI_SDR_WP_VIOLATION;
- if (cardstatus & ERASE_PARAM)
- status |= SSI_SDR_ERASE_PARAM;
- if (cardstatus & (OUT_OF_RANGE | CID_CSD_OVERWRITE))
- status |= SSI_SDR_OUT_OF_RANGE;
- /* ??? Don't know what Parameter Error really means, so
- assume it's set if the second byte is nonzero. */
- if (status & 0xff)
- status |= SSI_SDR_PARAMETER_ERROR;
- s->response[0] = status >> 8;
- s->response[1] = status;
- DPRINTF("Card status 0x%02x\n", status);
+ /* handle R1b (busy signal) */
+ if (s->cmd == 28 || s->cmd == 29 || s->cmd == 38) {
+ s->stopping = 1;
}
s->mode = SSI_SD_PREP_RESP;
s->response_pos = 0;
@@ -333,7 +263,7 @@ static int ssi_sd_post_load(void *opaque, int version_id)
return -EINVAL;
}
if (s->mode == SSI_SD_CMDARG &&
- (s->arglen < 0 || s->arglen >= ARRAY_SIZE(s->cmdarg))) {
+ (s->arglen >= ARRAY_SIZE(s->cmdarg))) {
return -EINVAL;
}
if (s->mode == SSI_SD_RESPONSE &&
diff --git a/hw/sd/trace-events b/hw/sd/trace-events
index db06442..8d49840 100644
--- a/hw/sd/trace-events
+++ b/hw/sd/trace-events
@@ -37,8 +37,8 @@ sdhci_write_dataport(uint16_t data_count) "write buffer filled with %u bytes of
sdhci_capareg(const char *desc, uint16_t val) "%s: %u"
# sd.c
-sdcard_normal_command(const char *proto, const char *cmd_desc, uint8_t cmd, uint32_t arg, const char *state) "%s %20s/ CMD%02d arg 0x%08x (state %s)"
-sdcard_app_command(const char *proto, const char *acmd_desc, uint8_t acmd, uint32_t arg, const char *state) "%s %23s/ACMD%02d arg 0x%08x (state %s)"
+sdcard_normal_command(const char *proto, const char *cmd_desc, uint8_t cmd, uint32_t arg, const char *mode, const char *state) "%s %20s/ CMD%02d arg 0x%08x (mode %s, state %s)"
+sdcard_app_command(const char *proto, const char *acmd_desc, uint8_t acmd, uint32_t arg, const char *mode, const char *state) "%s %23s/ACMD%02d arg 0x%08x (mode %s, state %s)"
sdcard_response(const char *rspdesc, int rsplen) "%s (sz:%d)"
sdcard_powerup(void) ""
sdcard_inquiry_cmd41(void) ""
diff --git a/hw/sensor/lsm303dlhc_mag.c b/hw/sensor/lsm303dlhc_mag.c
index f9e501d..cd5773a 100644
--- a/hw/sensor/lsm303dlhc_mag.c
+++ b/hw/sensor/lsm303dlhc_mag.c
@@ -28,7 +28,6 @@
#include "qapi/visitor.h"
#include "qemu/module.h"
#include "qemu/log.h"
-#include "qemu/bswap.h"
enum LSM303DLHCMagReg {
LSM303DLHC_MAG_REG_CRA = 0x00,
diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c
index ad4cd67..13e21a9 100644
--- a/hw/smbios/smbios.c
+++ b/hw/smbios/smbios.c
@@ -17,6 +17,7 @@
#include "qemu/osdep.h"
#include "qemu/units.h"
+#include "qemu/bswap.h"
#include "qapi/error.h"
#include "qemu/config-file.h"
#include "qemu/module.h"
@@ -178,6 +179,10 @@ static const QemuOptDesc qemu_smbios_type0_opts[] = {
.name = "uefi",
.type = QEMU_OPT_BOOL,
.help = "uefi support",
+ },{
+ .name = "vm",
+ .type = QEMU_OPT_BOOL,
+ .help = "virtual machine",
},
{ /* end of list */ }
};
@@ -573,10 +578,14 @@ static void smbios_build_type_0_table(void)
t->bios_characteristics = cpu_to_le64(0x08); /* Not supported */
t->bios_characteristics_extension_bytes[0] = 0;
- t->bios_characteristics_extension_bytes[1] = 0x14; /* TCD/SVVP | VM */
+
+ t->bios_characteristics_extension_bytes[1] = 0x04; /* TCD/SVVP */
if (smbios_type0.uefi) {
t->bios_characteristics_extension_bytes[1] |= 0x08; /* |= UEFI */
}
+ if (smbios_type0.vm) {
+ t->bios_characteristics_extension_bytes[1] |= 0x10; /* |= VM */
+ }
if (smbios_type0.have_major_minor) {
t->system_bios_major_release = smbios_type0.major;
@@ -1404,6 +1413,7 @@ void smbios_entry_add(QemuOpts *opts, Error **errp)
save_opt(&smbios_type0.version, opts, "version");
save_opt(&smbios_type0.date, opts, "date");
smbios_type0.uefi = qemu_opt_get_bool(opts, "uefi", false);
+ smbios_type0.vm = qemu_opt_get_bool(opts, "vm", true);
val = qemu_opt_get(opts, "release");
if (val) {
diff --git a/hw/sparc/leon3.c b/hw/sparc/leon3.c
index 0aeaad3..09d2cec 100644
--- a/hw/sparc/leon3.c
+++ b/hw/sparc/leon3.c
@@ -192,7 +192,7 @@ static void leon3_cache_control_int(CPUSPARCState *env)
static void leon3_irq_ack(CPUSPARCState *env, int intno)
{
- CPUState *cpu = CPU(env_cpu(env));
+ CPUState *cpu = env_cpu(env);
grlib_irqmp_ack(env->irq_manager, cpu->cpu_index, intno);
}
diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c
index 614528b..e33496f 100644
--- a/hw/ssi/aspeed_smc.c
+++ b/hw/ssi/aspeed_smc.c
@@ -1857,7 +1857,8 @@ static void aspeed_1030_fmc_class_init(ObjectClass *klass, const void *data)
asc->resets = aspeed_1030_fmc_resets;
asc->flash_window_base = 0x80000000;
asc->flash_window_size = 0x10000000;
- asc->features = ASPEED_SMC_FEATURE_DMA;
+ asc->features = ASPEED_SMC_FEATURE_DMA |
+ ASPEED_SMC_FEATURE_WDT_CONTROL;
asc->dma_flash_mask = 0x0FFFFFFC;
asc->dma_dram_mask = 0x000BFFFC;
asc->dma_start_length = 1;
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index d1b7bc5..1acba4f 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -38,6 +38,9 @@
#include "hw/timer/i8254.h"
#include "system/address-spaces.h"
#include "qom/object.h"
+#include "qemu/lockable.h"
+#include "qemu/seqlock.h"
+#include "qemu/main-loop.h"
#include "trace.h"
struct hpet_fw_config hpet_fw_cfg = {.count = UINT8_MAX};
@@ -69,9 +72,11 @@ struct HPETState {
SysBusDevice parent_obj;
/*< public >*/
+ QemuMutex lock;
MemoryRegion iomem;
uint64_t hpet_offset;
bool hpet_offset_saved;
+ QemuSeqLock state_version;
qemu_irq irqs[HPET_NUM_IRQ_ROUTES];
uint32_t flags;
uint8_t rtc_irq_level;
@@ -218,12 +223,15 @@ static void update_irq(struct HPETTimer *timer, int set)
timer->fsb & 0xffffffff, MEMTXATTRS_UNSPECIFIED,
NULL);
} else if (timer->config & HPET_TN_TYPE_LEVEL) {
+ BQL_LOCK_GUARD();
qemu_irq_raise(s->irqs[route]);
} else {
+ BQL_LOCK_GUARD();
qemu_irq_pulse(s->irqs[route]);
}
} else {
if (!timer_fsb_route(timer)) {
+ BQL_LOCK_GUARD();
qemu_irq_lower(s->irqs[route]);
}
}
@@ -328,16 +336,16 @@ static const VMStateDescription vmstate_hpet_timer = {
static const VMStateDescription vmstate_hpet = {
.name = "hpet",
.version_id = 2,
- .minimum_version_id = 1,
+ .minimum_version_id = 2,
.pre_save = hpet_pre_save,
.post_load = hpet_post_load,
.fields = (const VMStateField[]) {
VMSTATE_UINT64(config, HPETState),
VMSTATE_UINT64(isr, HPETState),
VMSTATE_UINT64(hpet_counter, HPETState),
- VMSTATE_UINT8_V(num_timers_save, HPETState, 2),
+ VMSTATE_UINT8(num_timers_save, HPETState),
VMSTATE_VALIDATE("num_timers must match", hpet_validate_num_timers),
- VMSTATE_STRUCT_VARRAY_UINT8(timer, HPETState, num_timers, 0,
+ VMSTATE_STRUCT_VARRAY_UINT8(timer, HPETState, num_timers_save, 0,
vmstate_hpet_timer, HPETTimer),
VMSTATE_END_OF_LIST()
},
@@ -426,9 +434,41 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr,
uint64_t cur_tick;
trace_hpet_ram_read(addr);
+ addr &= ~4;
+
+ if (addr == HPET_COUNTER) {
+ unsigned version;
+
+ /*
+ * Write update is rare, so busywait here is unlikely to happen
+ */
+ do {
+ version = seqlock_read_begin(&s->state_version);
+ if (unlikely(!hpet_enabled(s))) {
+ cur_tick = s->hpet_counter;
+ } else {
+ cur_tick = hpet_get_ticks(s);
+ }
+ } while (seqlock_read_retry(&s->state_version, version));
+ trace_hpet_ram_read_reading_counter(addr & 4, cur_tick);
+ return cur_tick >> shift;
+ }
- /*address range of all TN regs*/
- if (addr >= 0x100 && addr <= 0x3ff) {
+ QEMU_LOCK_GUARD(&s->lock);
+ /*address range of all global regs*/
+ if (addr <= 0xff) {
+ switch (addr) {
+ case HPET_ID: // including HPET_PERIOD
+ return s->capability >> shift;
+ case HPET_CFG:
+ return s->config >> shift;
+ case HPET_STATUS:
+ return s->isr >> shift;
+ default:
+ trace_hpet_ram_read_invalid();
+ break;
+ }
+ } else {
uint8_t timer_id = (addr - 0x100) / 0x20;
HPETTimer *timer = &s->timer[timer_id];
@@ -437,7 +477,7 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr,
return 0;
}
- switch (addr & 0x18) {
+ switch (addr & 0x1f) {
case HPET_TN_CFG: // including interrupt capabilities
return timer->config >> shift;
case HPET_TN_CMP: // comparator register
@@ -448,26 +488,6 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr,
trace_hpet_ram_read_invalid();
break;
}
- } else {
- switch (addr & ~4) {
- case HPET_ID: // including HPET_PERIOD
- return s->capability >> shift;
- case HPET_CFG:
- return s->config >> shift;
- case HPET_COUNTER:
- if (hpet_enabled(s)) {
- cur_tick = hpet_get_ticks(s);
- } else {
- cur_tick = s->hpet_counter;
- }
- trace_hpet_ram_read_reading_counter(addr & 4, cur_tick);
- return cur_tick >> shift;
- case HPET_STATUS:
- return s->isr >> shift;
- default:
- trace_hpet_ram_read_invalid();
- break;
- }
}
return 0;
}
@@ -481,10 +501,74 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
int len = MIN(size * 8, 64 - shift);
uint64_t old_val, new_val, cleared;
+ QEMU_LOCK_GUARD(&s->lock);
trace_hpet_ram_write(addr, value);
+ addr &= ~4;
- /*address range of all TN regs*/
- if (addr >= 0x100 && addr <= 0x3ff) {
+ /*address range of all global regs*/
+ if (addr <= 0xff) {
+ switch (addr) {
+ case HPET_ID:
+ return;
+ case HPET_CFG:
+ old_val = s->config;
+ new_val = deposit64(old_val, shift, len, value);
+ new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK);
+ seqlock_write_begin(&s->state_version);
+ s->config = new_val;
+ if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
+ /* Enable main counter and interrupt generation. */
+ s->hpet_offset =
+ ticks_to_ns(s->hpet_counter) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ for (i = 0; i < s->num_timers; i++) {
+ if (timer_enabled(&s->timer[i]) && (s->isr & (1 << i))) {
+ update_irq(&s->timer[i], 1);
+ }
+ hpet_set_timer(&s->timer[i]);
+ }
+ } else if (deactivating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
+ /* Halt main counter and disable interrupt generation. */
+ s->hpet_counter = hpet_get_ticks(s);
+ for (i = 0; i < s->num_timers; i++) {
+ hpet_del_timer(&s->timer[i]);
+ }
+ }
+ seqlock_write_end(&s->state_version);
+
+ /* i8254 and RTC output pins are disabled
+ * when HPET is in legacy mode */
+ if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
+ BQL_LOCK_GUARD();
+ qemu_set_irq(s->pit_enabled, 0);
+ qemu_irq_lower(s->irqs[0]);
+ qemu_irq_lower(s->irqs[RTC_ISA_IRQ]);
+ } else if (deactivating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
+ BQL_LOCK_GUARD();
+ qemu_irq_lower(s->irqs[0]);
+ qemu_set_irq(s->pit_enabled, 1);
+ qemu_set_irq(s->irqs[RTC_ISA_IRQ], s->rtc_irq_level);
+ }
+ break;
+ case HPET_STATUS:
+ new_val = value << shift;
+ cleared = new_val & s->isr;
+ for (i = 0; i < s->num_timers; i++) {
+ if (cleared & (1 << i)) {
+ update_irq(&s->timer[i], 0);
+ }
+ }
+ break;
+ case HPET_COUNTER:
+ if (hpet_enabled(s)) {
+ trace_hpet_ram_write_counter_write_while_enabled();
+ }
+ s->hpet_counter = deposit64(s->hpet_counter, shift, len, value);
+ break;
+ default:
+ trace_hpet_ram_write_invalid();
+ break;
+ }
+ } else {
uint8_t timer_id = (addr - 0x100) / 0x20;
HPETTimer *timer = &s->timer[timer_id];
@@ -550,63 +634,6 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
break;
}
return;
- } else {
- switch (addr & ~4) {
- case HPET_ID:
- return;
- case HPET_CFG:
- old_val = s->config;
- new_val = deposit64(old_val, shift, len, value);
- new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK);
- s->config = new_val;
- if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
- /* Enable main counter and interrupt generation. */
- s->hpet_offset =
- ticks_to_ns(s->hpet_counter) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- for (i = 0; i < s->num_timers; i++) {
- if (timer_enabled(&s->timer[i]) && (s->isr & (1 << i))) {
- update_irq(&s->timer[i], 1);
- }
- hpet_set_timer(&s->timer[i]);
- }
- } else if (deactivating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
- /* Halt main counter and disable interrupt generation. */
- s->hpet_counter = hpet_get_ticks(s);
- for (i = 0; i < s->num_timers; i++) {
- hpet_del_timer(&s->timer[i]);
- }
- }
- /* i8254 and RTC output pins are disabled
- * when HPET is in legacy mode */
- if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
- qemu_set_irq(s->pit_enabled, 0);
- qemu_irq_lower(s->irqs[0]);
- qemu_irq_lower(s->irqs[RTC_ISA_IRQ]);
- } else if (deactivating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
- qemu_irq_lower(s->irqs[0]);
- qemu_set_irq(s->pit_enabled, 1);
- qemu_set_irq(s->irqs[RTC_ISA_IRQ], s->rtc_irq_level);
- }
- break;
- case HPET_STATUS:
- new_val = value << shift;
- cleared = new_val & s->isr;
- for (i = 0; i < s->num_timers; i++) {
- if (cleared & (1 << i)) {
- update_irq(&s->timer[i], 0);
- }
- }
- break;
- case HPET_COUNTER:
- if (hpet_enabled(s)) {
- trace_hpet_ram_write_counter_write_while_enabled();
- }
- s->hpet_counter = deposit64(s->hpet_counter, shift, len, value);
- break;
- default:
- trace_hpet_ram_write_invalid();
- break;
- }
}
}
@@ -662,11 +689,13 @@ static void hpet_handle_legacy_irq(void *opaque, int n, int level)
if (n == HPET_LEGACY_PIT_INT) {
if (!hpet_in_legacy_mode(s)) {
+ BQL_LOCK_GUARD();
qemu_set_irq(s->irqs[0], level);
}
} else {
s->rtc_irq_level = level;
if (!hpet_in_legacy_mode(s)) {
+ BQL_LOCK_GUARD();
qemu_set_irq(s->irqs[RTC_ISA_IRQ], level);
}
}
@@ -677,8 +706,11 @@ static void hpet_init(Object *obj)
SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
HPETState *s = HPET(obj);
+ qemu_mutex_init(&s->lock);
+ seqlock_init(&s->state_version);
/* HPET Area */
memory_region_init_io(&s->iomem, obj, &hpet_ram_ops, s, "hpet", HPET_LEN);
+ memory_region_enable_lockless_io(&s->iomem);
sysbus_init_mmio(sbd, &s->iomem);
}
@@ -689,8 +721,14 @@ static void hpet_realize(DeviceState *dev, Error **errp)
int i;
HPETTimer *timer;
+ if (s->num_timers < HPET_MIN_TIMERS || s->num_timers > HPET_MAX_TIMERS) {
+ error_setg(errp, "hpet.num_timers must be between %d and %d",
+ HPET_MIN_TIMERS, HPET_MAX_TIMERS);
+ return;
+ }
if (!s->intcap) {
- warn_report("Hpet's intcap not initialized");
+ error_setg(errp, "hpet.hpet-intcap not initialized");
+ return;
}
if (hpet_fw_cfg.count == UINT8_MAX) {
/* first instance */
@@ -698,7 +736,7 @@ static void hpet_realize(DeviceState *dev, Error **errp)
}
if (hpet_fw_cfg.count == 8) {
- error_setg(errp, "Only 8 instances of HPET is allowed");
+ error_setg(errp, "Only 8 instances of HPET are allowed");
return;
}
@@ -708,11 +746,6 @@ static void hpet_realize(DeviceState *dev, Error **errp)
sysbus_init_irq(sbd, &s->irqs[i]);
}
- if (s->num_timers < HPET_MIN_TIMERS) {
- s->num_timers = HPET_MIN_TIMERS;
- } else if (s->num_timers > HPET_MAX_TIMERS) {
- s->num_timers = HPET_MAX_TIMERS;
- }
for (i = 0; i < HPET_MAX_TIMERS; i++) {
timer = &s->timer[i];
timer->qemu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, hpet_timer, timer);
diff --git a/hw/uefi/trace.h b/hw/uefi/trace.h
new file mode 100644
index 0000000..6aa1c93
--- /dev/null
+++ b/hw/uefi/trace.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include "trace/trace-hw_uefi.h"
diff --git a/hw/uefi/var-service-core.c b/hw/uefi/var-service-core.c
index 4836a0c..6ab8df0 100644
--- a/hw/uefi/var-service-core.c
+++ b/hw/uefi/var-service-core.c
@@ -12,7 +12,7 @@
#include "hw/uefi/var-service-api.h"
#include "hw/uefi/var-service-edk2.h"
-#include "trace/trace-hw_uefi.h"
+#include "trace.h"
static int uefi_vars_pre_load(void *opaque)
{
@@ -259,8 +259,8 @@ static void uefi_vars_write(void *opaque, hwaddr addr, uint64_t val, unsigned si
uv->buf_size = val;
g_free(uv->buffer);
g_free(uv->pio_xfer_buffer);
- uv->buffer = g_malloc(uv->buf_size);
- uv->pio_xfer_buffer = g_malloc(uv->buf_size);
+ uv->buffer = g_malloc0(uv->buf_size);
+ uv->pio_xfer_buffer = g_malloc0(uv->buf_size);
break;
case UEFI_VARS_REG_DMA_BUFFER_ADDR_LO:
uv->buf_addr_lo = val;
diff --git a/hw/uefi/var-service-json.c b/hw/uefi/var-service-json.c
index ad3462c..f5f1556 100644
--- a/hw/uefi/var-service-json.c
+++ b/hw/uefi/var-service-json.c
@@ -172,7 +172,7 @@ static GString *uefi_vars_to_json(uefi_vars_state *uv)
void uefi_vars_json_init(uefi_vars_state *uv, Error **errp)
{
if (uv->jsonfile) {
- uv->jsonfd = qemu_create(uv->jsonfile, O_RDWR, 0666, errp);
+ uv->jsonfd = qemu_create(uv->jsonfile, O_RDWR | O_BINARY, 0666, errp);
}
}
diff --git a/hw/uefi/var-service-policy.c b/hw/uefi/var-service-policy.c
index 3b1155f..58da4ad 100644
--- a/hw/uefi/var-service-policy.c
+++ b/hw/uefi/var-service-policy.c
@@ -14,7 +14,7 @@
#include "hw/uefi/var-service-api.h"
#include "hw/uefi/var-service-edk2.h"
-#include "trace/trace-hw_uefi.h"
+#include "trace.h"
static void calc_policy(uefi_var_policy *pol);
diff --git a/hw/uefi/var-service-utils.c b/hw/uefi/var-service-utils.c
index c9ef465..258013f 100644
--- a/hw/uefi/var-service-utils.c
+++ b/hw/uefi/var-service-utils.c
@@ -8,7 +8,7 @@
#include "hw/uefi/var-service.h"
-#include "trace/trace-hw_uefi.h"
+#include "trace.h"
/* ------------------------------------------------------------------ */
diff --git a/hw/uefi/var-service-vars.c b/hw/uefi/var-service-vars.c
index 7f98d77..8533533 100644
--- a/hw/uefi/var-service-vars.c
+++ b/hw/uefi/var-service-vars.c
@@ -12,7 +12,7 @@
#include "hw/uefi/var-service-api.h"
#include "hw/uefi/var-service-edk2.h"
-#include "trace/trace-hw_uefi.h"
+#include "trace.h"
#define EFI_VARIABLE_ATTRIBUTE_SUPPORTED \
(EFI_VARIABLE_NON_VOLATILE | \
@@ -357,6 +357,9 @@ uefi_vars_mm_get_next_variable(uefi_vars_state *uv, mm_header *mhdr,
if (uefi_strlen(name, nv->name_size) == 0) {
/* empty string -> first */
var = QTAILQ_FIRST(&uv->variables);
+ while (var && !check_access(uv, var)) {
+ var = QTAILQ_NEXT(var, next);
+ }
if (!var) {
return uefi_vars_mm_error(mhdr, mvar, EFI_NOT_FOUND);
}
@@ -702,12 +705,14 @@ uint32_t uefi_vars_mm_vars_proto(uefi_vars_state *uv)
case SMM_VARIABLE_FUNCTION_READY_TO_BOOT:
trace_uefi_event("ready-to-boot");
uv->ready_to_boot = true;
+ mvar->status = EFI_SUCCESS;
length = 0;
break;
case SMM_VARIABLE_FUNCTION_EXIT_BOOT_SERVICE:
trace_uefi_event("exit-boot-service");
uv->exit_boot_service = true;
+ mvar->status = EFI_SUCCESS;
length = 0;
break;
diff --git a/hw/ufs/lu.c b/hw/ufs/lu.c
index 57b307e..2d8ffd7 100644
--- a/hw/ufs/lu.c
+++ b/hw/ufs/lu.c
@@ -194,7 +194,7 @@ static int ufs_emulate_wlun_inquiry(UfsRequest *req, uint8_t *outbuf,
static UfsReqResult ufs_emulate_scsi_cmd(UfsLu *lu, UfsRequest *req)
{
uint8_t lun = lu->lun;
- uint8_t outbuf[4096];
+ QEMU_UNINITIALIZED uint8_t outbuf[4096];
uint8_t sense_buf[UFS_SENSE_SIZE];
uint8_t scsi_status;
int len = 0;
diff --git a/hw/usb/dev-hid.c b/hw/usb/dev-hid.c
index 54d064e..96623aa 100644
--- a/hw/usb/dev-hid.c
+++ b/hw/usb/dev-hid.c
@@ -491,14 +491,14 @@ static const uint8_t qemu_tablet_hid_report_descriptor[] = {
0xa1, 0x00, /* Collection (Physical) */
0x05, 0x09, /* Usage Page (Button) */
0x19, 0x01, /* Usage Minimum (1) */
- 0x29, 0x03, /* Usage Maximum (3) */
+ 0x29, 0x05, /* Usage Maximum (5) */
0x15, 0x00, /* Logical Minimum (0) */
0x25, 0x01, /* Logical Maximum (1) */
- 0x95, 0x03, /* Report Count (3) */
+ 0x95, 0x05, /* Report Count (5) */
0x75, 0x01, /* Report Size (1) */
0x81, 0x02, /* Input (Data, Variable, Absolute) */
0x95, 0x01, /* Report Count (1) */
- 0x75, 0x05, /* Report Size (5) */
+ 0x75, 0x03, /* Report Size (3) */
0x81, 0x01, /* Input (Constant) */
0x05, 0x01, /* Usage Page (Generic Desktop) */
0x09, 0x30, /* Usage (X) */
diff --git a/hw/usb/dev-network.c b/hw/usb/dev-network.c
index 81cc09d..1df2454 100644
--- a/hw/usb/dev-network.c
+++ b/hw/usb/dev-network.c
@@ -1383,7 +1383,7 @@ static void usb_net_realize(USBDevice *dev, Error **errp)
qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a);
snprintf(s->usbstring_mac, sizeof(s->usbstring_mac),
"%02x%02x%02x%02x%02x%02x",
- 0x40,
+ s->conf.macaddr.a[0],
s->conf.macaddr.a[1],
s->conf.macaddr.a[2],
s->conf.macaddr.a[3],
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 71b5491..72a9f9f 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -577,7 +577,7 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed)
USBDevice *dev;
USBEndpoint *ep;
USBPacket *pkt;
- uint8_t buf[8192];
+ QEMU_UNINITIALIZED uint8_t buf[8192];
bool int_req;
struct ohci_iso_td iso_td;
uint32_t addr;
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 4822c70..e207d05 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -735,6 +735,7 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q, uint32_t qh_addr,
bool spd;
bool queuing = (q != NULL);
uint8_t pid = td->token & 0xff;
+ uint8_t ep_id = (td->token >> 15) & 0xf;
UHCIAsync *async;
async = uhci_async_find_td(s, td_addr);
@@ -778,9 +779,14 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q, uint32_t qh_addr,
switch (pid) {
case USB_TOKEN_OUT:
- case USB_TOKEN_SETUP:
case USB_TOKEN_IN:
break;
+ case USB_TOKEN_SETUP:
+ /* SETUP is only valid to endpoint 0 */
+ if (ep_id == 0) {
+ break;
+ }
+ /* fallthrough */
default:
/* invalid pid : frame interrupted */
s->status |= UHCI_STS_HCPERR;
@@ -829,7 +835,7 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q, uint32_t qh_addr,
return uhci_handle_td_error(s, td, td_addr, USB_RET_NODEV,
int_mask);
}
- ep = usb_ep_get(dev, pid, (td->token >> 15) & 0xf);
+ ep = usb_ep_get(dev, pid, ep_id);
q = uhci_queue_new(s, qh_addr, td, ep);
}
async = uhci_async_alloc(q, td_addr);
diff --git a/hw/vfio-user/Kconfig b/hw/vfio-user/Kconfig
new file mode 100644
index 0000000..24bdf7a
--- /dev/null
+++ b/hw/vfio-user/Kconfig
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+config VFIO_USER
+ bool
+ default y
+ depends on VFIO_PCI
+
diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c
new file mode 100644
index 0000000..e45192f
--- /dev/null
+++ b/hw/vfio-user/container.c
@@ -0,0 +1,353 @@
+/*
+ * Container for vfio-user IOMMU type: rather than communicating with the kernel
+ * vfio driver, we communicate over a socket to a server using the vfio-user
+ * protocol.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <sys/ioctl.h>
+#include <linux/vfio.h>
+#include "qemu/osdep.h"
+
+#include "hw/vfio-user/container.h"
+#include "hw/vfio-user/device.h"
+#include "hw/vfio-user/trace.h"
+#include "hw/vfio/vfio-device.h"
+#include "hw/vfio/vfio-listener.h"
+#include "qapi/error.h"
+
+/*
+ * When DMA space is the physical address space, the region add/del listeners
+ * will fire during memory update transactions. These depend on BQL being held,
+ * so do any resulting map/demap ops async while keeping BQL.
+ */
+static void vfio_user_listener_begin(VFIOContainer *bcontainer)
+{
+ VFIOUserContainer *container = VFIO_IOMMU_USER(bcontainer);
+
+ container->proxy->async_ops = true;
+}
+
+static void vfio_user_listener_commit(VFIOContainer *bcontainer)
+{
+ VFIOUserContainer *container = VFIO_IOMMU_USER(bcontainer);
+
+ /* wait here for any async requests sent during the transaction */
+ container->proxy->async_ops = false;
+ vfio_user_wait_reqs(container->proxy);
+}
+
+static int vfio_user_dma_unmap(const VFIOContainer *bcontainer,
+ hwaddr iova, uint64_t size,
+ IOMMUTLBEntry *iotlb, bool unmap_all)
+{
+ VFIOUserContainer *container = VFIO_IOMMU_USER(bcontainer);
+
+ Error *local_err = NULL;
+ int ret = 0;
+
+ VFIOUserDMAUnmap *msgp = g_malloc(sizeof(*msgp));
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_UNMAP, sizeof(*msgp), 0);
+ msgp->argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+ msgp->flags = unmap_all ? VFIO_DMA_UNMAP_FLAG_ALL : 0;
+ msgp->iova = iova;
+ msgp->size = size;
+ trace_vfio_user_dma_unmap(msgp->iova, msgp->size, msgp->flags,
+ container->proxy->async_ops);
+
+ if (container->proxy->async_ops) {
+ if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, NULL,
+ 0, &local_err)) {
+ error_report_err(local_err);
+ ret = -EFAULT;
+ }
+ } else {
+ if (!vfio_user_send_wait(container->proxy, &msgp->hdr, NULL,
+ 0, &local_err)) {
+ error_report_err(local_err);
+ ret = -EFAULT;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ ret = -msgp->hdr.error_reply;
+ }
+
+ g_free(msgp);
+ }
+
+ return ret;
+}
+
+static int vfio_user_dma_map(const VFIOContainer *bcontainer, hwaddr iova,
+ uint64_t size, void *vaddr, bool readonly,
+ MemoryRegion *mrp)
+{
+ VFIOUserContainer *container = VFIO_IOMMU_USER(bcontainer);
+
+ int fd = memory_region_get_fd(mrp);
+ Error *local_err = NULL;
+ int ret = 0;
+
+ VFIOUserFDs *fds = NULL;
+ VFIOUserDMAMap *msgp = g_malloc0(sizeof(*msgp));
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_MAP, sizeof(*msgp), 0);
+ msgp->argsz = sizeof(struct vfio_iommu_type1_dma_map);
+ msgp->flags = VFIO_DMA_MAP_FLAG_READ;
+ msgp->offset = 0;
+ msgp->iova = iova;
+ msgp->size = size;
+
+ /*
+ * vaddr enters as a QEMU process address; make it either a file offset
+ * for mapped areas or leave as 0.
+ */
+ if (fd != -1) {
+ msgp->offset = qemu_ram_block_host_offset(mrp->ram_block, vaddr);
+ }
+
+ if (!readonly) {
+ msgp->flags |= VFIO_DMA_MAP_FLAG_WRITE;
+ }
+
+ trace_vfio_user_dma_map(msgp->iova, msgp->size, msgp->offset, msgp->flags,
+ container->proxy->async_ops);
+
+ /*
+ * The async_ops case sends without blocking. They're later waited for in
+ * vfio_send_wait_reqs.
+ */
+ if (container->proxy->async_ops) {
+ /* can't use auto variable since we don't block */
+ if (fd != -1) {
+ fds = vfio_user_getfds(1);
+ fds->send_fds = 1;
+ fds->fds[0] = fd;
+ }
+
+ if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, fds,
+ 0, &local_err)) {
+ error_report_err(local_err);
+ ret = -EFAULT;
+ }
+ } else {
+ VFIOUserFDs local_fds = { 1, 0, &fd };
+
+ fds = fd != -1 ? &local_fds : NULL;
+
+ if (!vfio_user_send_wait(container->proxy, &msgp->hdr, fds,
+ 0, &local_err)) {
+ error_report_err(local_err);
+ ret = -EFAULT;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ ret = -msgp->hdr.error_reply;
+ }
+
+ g_free(msgp);
+ }
+
+ return ret;
+}
+
+static int
+vfio_user_set_dirty_page_tracking(const VFIOContainer *bcontainer,
+ bool start, Error **errp)
+{
+ error_setg_errno(errp, ENOTSUP, "Not supported");
+ return -ENOTSUP;
+}
+
+static int vfio_user_query_dirty_bitmap(const VFIOContainer *bcontainer,
+ VFIOBitmap *vbmap, hwaddr iova,
+ hwaddr size, Error **errp)
+{
+ error_setg_errno(errp, ENOTSUP, "Not supported");
+ return -ENOTSUP;
+}
+
+static bool vfio_user_setup(VFIOContainer *bcontainer, Error **errp)
+{
+ VFIOUserContainer *container = VFIO_IOMMU_USER(bcontainer);
+
+ assert(container->proxy->dma_pgsizes != 0);
+ bcontainer->pgsizes = container->proxy->dma_pgsizes;
+ bcontainer->dma_max_mappings = container->proxy->max_dma;
+
+ /* No live migration support yet. */
+ bcontainer->dirty_pages_supported = false;
+ bcontainer->max_dirty_bitmap_size = container->proxy->max_bitmap;
+ bcontainer->dirty_pgsizes = container->proxy->migr_pgsize;
+
+ return true;
+}
+
+static VFIOUserContainer *vfio_user_create_container(VFIODevice *vbasedev,
+ Error **errp)
+{
+ VFIOUserContainer *container;
+
+ container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER));
+ container->proxy = vbasedev->proxy;
+ return container;
+}
+
+/*
+ * Try to mirror vfio_container_connect() as much as possible.
+ */
+static VFIOUserContainer *
+vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev,
+ Error **errp)
+{
+ VFIOContainer *bcontainer;
+ VFIOUserContainer *container;
+ VFIOAddressSpace *space;
+ VFIOIOMMUClass *vioc;
+ int ret;
+
+ space = vfio_address_space_get(as);
+
+ container = vfio_user_create_container(vbasedev, errp);
+ if (!container) {
+ goto put_space_exit;
+ }
+
+ bcontainer = VFIO_IOMMU(container);
+
+ ret = ram_block_uncoordinated_discard_disable(true);
+ if (ret) {
+ error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
+ goto free_container_exit;
+ }
+
+ vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ assert(vioc->setup);
+
+ if (!vioc->setup(bcontainer, errp)) {
+ goto enable_discards_exit;
+ }
+
+ vfio_address_space_insert(space, bcontainer);
+
+ if (!vfio_listener_register(bcontainer, errp)) {
+ goto listener_release_exit;
+ }
+
+ bcontainer->initialized = true;
+
+ return container;
+
+listener_release_exit:
+ vfio_listener_unregister(bcontainer);
+ if (vioc->release) {
+ vioc->release(bcontainer);
+ }
+
+enable_discards_exit:
+ ram_block_uncoordinated_discard_disable(false);
+
+free_container_exit:
+ object_unref(container);
+
+put_space_exit:
+ vfio_address_space_put(space);
+
+ return NULL;
+}
+
+static void vfio_user_container_disconnect(VFIOUserContainer *container)
+{
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ VFIOAddressSpace *space = bcontainer->space;
+
+ ram_block_uncoordinated_discard_disable(false);
+
+ vfio_listener_unregister(bcontainer);
+ if (vioc->release) {
+ vioc->release(bcontainer);
+ }
+
+ object_unref(container);
+
+ vfio_address_space_put(space);
+}
+
+static bool vfio_user_device_get(VFIOUserContainer *container,
+ VFIODevice *vbasedev, Error **errp)
+{
+ struct vfio_device_info info = { .argsz = sizeof(info) };
+
+
+ if (!vfio_user_get_device_info(vbasedev->proxy, &info, errp)) {
+ return false;
+ }
+
+ vbasedev->fd = -1;
+
+ vfio_device_prepare(vbasedev, VFIO_IOMMU(container), &info);
+
+ return true;
+}
+
+/*
+ * vfio_user_device_attach: attach a device to a new container.
+ */
+static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev,
+ AddressSpace *as, Error **errp)
+{
+ VFIOUserContainer *container;
+
+ container = vfio_user_container_connect(as, vbasedev, errp);
+ if (container == NULL) {
+ error_prepend(errp, "failed to connect proxy");
+ return false;
+ }
+
+ return vfio_user_device_get(container, vbasedev, errp);
+}
+
+static void vfio_user_device_detach(VFIODevice *vbasedev)
+{
+ VFIOUserContainer *container = VFIO_IOMMU_USER(vbasedev->bcontainer);
+
+ vfio_device_unprepare(vbasedev);
+
+ vfio_user_container_disconnect(container);
+}
+
+static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single)
+{
+ /* ->needs_reset is always false for vfio-user. */
+ return 0;
+}
+
+static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data)
+{
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
+
+ vioc->setup = vfio_user_setup;
+ vioc->listener_begin = vfio_user_listener_begin,
+ vioc->listener_commit = vfio_user_listener_commit,
+ vioc->dma_map = vfio_user_dma_map;
+ vioc->dma_unmap = vfio_user_dma_unmap;
+ vioc->attach_device = vfio_user_device_attach;
+ vioc->detach_device = vfio_user_device_detach;
+ vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking;
+ vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap;
+ vioc->pci_hot_reset = vfio_user_pci_hot_reset;
+};
+
+static const TypeInfo types[] = {
+ {
+ .name = TYPE_VFIO_IOMMU_USER,
+ .parent = TYPE_VFIO_IOMMU,
+ .instance_size = sizeof(VFIOUserContainer),
+ .class_init = vfio_iommu_user_class_init,
+ },
+};
+
+DEFINE_TYPES(types)
diff --git a/hw/vfio-user/container.h b/hw/vfio-user/container.h
new file mode 100644
index 0000000..a2b42e3
--- /dev/null
+++ b/hw/vfio-user/container.h
@@ -0,0 +1,24 @@
+/*
+ * vfio-user specific definitions.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VFIO_USER_CONTAINER_H
+#define HW_VFIO_USER_CONTAINER_H
+
+#include "qemu/osdep.h"
+
+#include "hw/vfio/vfio-container.h"
+#include "hw/vfio-user/proxy.h"
+
+/* MMU container sub-class for vfio-user. */
+struct VFIOUserContainer {
+ VFIOContainer parent_obj;
+
+ VFIOUserProxy *proxy;
+};
+
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserContainer, VFIO_IOMMU_USER);
+
+#endif /* HW_VFIO_USER_CONTAINER_H */
diff --git a/hw/vfio-user/device.c b/hw/vfio-user/device.c
new file mode 100644
index 0000000..0609a7d
--- /dev/null
+++ b/hw/vfio-user/device.c
@@ -0,0 +1,441 @@
+/*
+ * vfio protocol over a UNIX socket device handling.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/lockable.h"
+#include "qemu/thread.h"
+
+#include "hw/vfio-user/device.h"
+#include "hw/vfio-user/trace.h"
+
+/*
+ * These are to defend against a malign server trying
+ * to force us to run out of memory.
+ */
+#define VFIO_USER_MAX_REGIONS 100
+#define VFIO_USER_MAX_IRQS 50
+
+bool vfio_user_get_device_info(VFIOUserProxy *proxy,
+ struct vfio_device_info *info, Error **errp)
+{
+ VFIOUserDeviceInfo msg;
+ uint32_t argsz = sizeof(msg) - sizeof(msg.hdr);
+
+ memset(&msg, 0, sizeof(msg));
+ vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_INFO, sizeof(msg), 0);
+ msg.argsz = argsz;
+
+ if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, errp)) {
+ return false;
+ }
+
+ if (msg.hdr.flags & VFIO_USER_ERROR) {
+ error_setg_errno(errp, -msg.hdr.error_reply,
+ "VFIO_USER_DEVICE_GET_INFO failed");
+ return false;
+ }
+
+ trace_vfio_user_get_info(msg.num_regions, msg.num_irqs);
+
+ memcpy(info, &msg.argsz, argsz);
+
+ /* defend against a malicious server */
+ if (info->num_regions > VFIO_USER_MAX_REGIONS ||
+ info->num_irqs > VFIO_USER_MAX_IRQS) {
+ error_setg_errno(errp, EINVAL, "invalid reply");
+ return false;
+ }
+
+ return true;
+}
+
+void vfio_user_device_reset(VFIOUserProxy *proxy)
+{
+ Error *local_err = NULL;
+ VFIOUserHdr hdr;
+
+ vfio_user_request_msg(&hdr, VFIO_USER_DEVICE_RESET, sizeof(hdr), 0);
+
+ if (!vfio_user_send_wait(proxy, &hdr, NULL, 0, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return;
+ }
+
+ if (hdr.flags & VFIO_USER_ERROR) {
+ error_printf("reset reply error %d\n", hdr.error_reply);
+ }
+}
+
+static int vfio_user_get_region_info(VFIOUserProxy *proxy,
+ struct vfio_region_info *info,
+ VFIOUserFDs *fds)
+{
+ g_autofree VFIOUserRegionInfo *msgp = NULL;
+ Error *local_err = NULL;
+ uint32_t size;
+
+ /* data returned can be larger than vfio_region_info */
+ if (info->argsz < sizeof(*info)) {
+ error_printf("vfio_user_get_region_info argsz too small\n");
+ return -E2BIG;
+ }
+ if (fds != NULL && fds->send_fds != 0) {
+ error_printf("vfio_user_get_region_info can't send FDs\n");
+ return -EINVAL;
+ }
+
+ size = info->argsz + sizeof(VFIOUserHdr);
+ msgp = g_malloc0(size);
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO,
+ sizeof(*msgp), 0);
+ msgp->argsz = info->argsz;
+ msgp->index = info->index;
+
+ if (!vfio_user_send_wait(proxy, &msgp->hdr, fds, size, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return -EFAULT;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ return -msgp->hdr.error_reply;
+ }
+ trace_vfio_user_get_region_info(msgp->index, msgp->flags, msgp->size);
+
+ memcpy(info, &msgp->argsz, info->argsz);
+
+ /*
+ * If at least one region is directly mapped into the VM, then we can no
+ * longer rely on the sequential nature of vfio-user request handling to
+ * ensure that posted writes are completed before a subsequent read. In this
+ * case, disable posted write support. This is a per-device property, not
+ * per-region.
+ */
+ if (info->flags & VFIO_REGION_INFO_FLAG_MMAP) {
+ vfio_user_disable_posted_writes(proxy);
+ }
+
+ return 0;
+}
+
+static int vfio_user_device_io_get_region_info(VFIODevice *vbasedev,
+ struct vfio_region_info *info,
+ int *fd)
+{
+ VFIOUserFDs fds = { 0, 1, fd};
+ int ret;
+
+ if (info->index > vbasedev->num_regions) {
+ return -EINVAL;
+ }
+
+ ret = vfio_user_get_region_info(vbasedev->proxy, info, &fds);
+ if (ret) {
+ return ret;
+ }
+
+ /* cap_offset in valid area */
+ if ((info->flags & VFIO_REGION_INFO_FLAG_CAPS) &&
+ (info->cap_offset < sizeof(*info) || info->cap_offset > info->argsz)) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int vfio_user_device_io_get_irq_info(VFIODevice *vbasedev,
+ struct vfio_irq_info *info)
+{
+ VFIOUserProxy *proxy = vbasedev->proxy;
+ Error *local_err = NULL;
+ VFIOUserIRQInfo msg;
+
+ memset(&msg, 0, sizeof(msg));
+ vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO,
+ sizeof(msg), 0);
+ msg.argsz = info->argsz;
+ msg.index = info->index;
+
+ if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return -EFAULT;
+ }
+
+ if (msg.hdr.flags & VFIO_USER_ERROR) {
+ return -msg.hdr.error_reply;
+ }
+ trace_vfio_user_get_irq_info(msg.index, msg.flags, msg.count);
+
+ memcpy(info, &msg.argsz, sizeof(*info));
+ return 0;
+}
+
+static int irq_howmany(int *fdp, uint32_t cur, uint32_t max)
+{
+ int n = 0;
+
+ if (fdp[cur] != -1) {
+ do {
+ n++;
+ } while (n < max && fdp[cur + n] != -1);
+ } else {
+ do {
+ n++;
+ } while (n < max && fdp[cur + n] == -1);
+ }
+
+ return n;
+}
+
+static int vfio_user_device_io_set_irqs(VFIODevice *vbasedev,
+ struct vfio_irq_set *irq)
+{
+ VFIOUserProxy *proxy = vbasedev->proxy;
+ g_autofree VFIOUserIRQSet *msgp = NULL;
+ uint32_t size, nfds, send_fds, sent_fds, max;
+ Error *local_err = NULL;
+
+ if (irq->argsz < sizeof(*irq)) {
+ error_printf("vfio_user_set_irqs argsz too small\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Handle simple case
+ */
+ if ((irq->flags & VFIO_IRQ_SET_DATA_EVENTFD) == 0) {
+ size = sizeof(VFIOUserHdr) + irq->argsz;
+ msgp = g_malloc0(size);
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, size, 0);
+ msgp->argsz = irq->argsz;
+ msgp->flags = irq->flags;
+ msgp->index = irq->index;
+ msgp->start = irq->start;
+ msgp->count = irq->count;
+ trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count,
+ msgp->flags);
+
+ if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return -EFAULT;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ return -msgp->hdr.error_reply;
+ }
+
+ return 0;
+ }
+
+ /*
+ * Calculate the number of FDs to send
+ * and adjust argsz
+ */
+ nfds = (irq->argsz - sizeof(*irq)) / sizeof(int);
+ irq->argsz = sizeof(*irq);
+ msgp = g_malloc0(sizeof(*msgp));
+ /*
+ * Send in chunks if over max_send_fds
+ */
+ for (sent_fds = 0; nfds > sent_fds; sent_fds += send_fds) {
+ VFIOUserFDs *arg_fds, loop_fds;
+
+ /* must send all valid FDs or all invalid FDs in single msg */
+ max = nfds - sent_fds;
+ if (max > proxy->max_send_fds) {
+ max = proxy->max_send_fds;
+ }
+ send_fds = irq_howmany((int *)irq->data, sent_fds, max);
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS,
+ sizeof(*msgp), 0);
+ msgp->argsz = irq->argsz;
+ msgp->flags = irq->flags;
+ msgp->index = irq->index;
+ msgp->start = irq->start + sent_fds;
+ msgp->count = send_fds;
+ trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count,
+ msgp->flags);
+
+ loop_fds.send_fds = send_fds;
+ loop_fds.recv_fds = 0;
+ loop_fds.fds = (int *)irq->data + sent_fds;
+ arg_fds = loop_fds.fds[0] != -1 ? &loop_fds : NULL;
+
+ if (!vfio_user_send_wait(proxy, &msgp->hdr, arg_fds, 0, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return -EFAULT;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ return -msgp->hdr.error_reply;
+ }
+ }
+
+ return 0;
+}
+
+static int vfio_user_device_io_region_read(VFIODevice *vbasedev, uint8_t index,
+ off_t off, uint32_t count,
+ void *data)
+{
+ g_autofree VFIOUserRegionRW *msgp = NULL;
+ VFIOUserProxy *proxy = vbasedev->proxy;
+ int size = sizeof(*msgp) + count;
+ Error *local_err = NULL;
+
+ if (count > proxy->max_xfer_size) {
+ return -EINVAL;
+ }
+
+ msgp = g_malloc0(size);
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_READ, sizeof(*msgp), 0);
+ msgp->offset = off;
+ msgp->region = index;
+ msgp->count = count;
+ trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count);
+
+ if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, size, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return -EFAULT;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ return -msgp->hdr.error_reply;
+ } else if (msgp->count > count) {
+ return -E2BIG;
+ } else {
+ memcpy(data, &msgp->data, msgp->count);
+ }
+
+ return msgp->count;
+}
+
+/*
+ * If this is a posted write, and VFIO_PROXY_NO_POST is not set, then we are OK
+ * to send the write to the socket without waiting for the server's reply:
+ * a subsequent read (of any region) will not pass the posted write, as all
+ * messages are handled sequentially.
+ */
+static int vfio_user_device_io_region_write(VFIODevice *vbasedev, uint8_t index,
+ off_t off, unsigned count,
+ void *data, bool post)
+{
+ VFIOUserRegionRW *msgp = NULL;
+ VFIOUserProxy *proxy = vbasedev->proxy;
+ int size = sizeof(*msgp) + count;
+ Error *local_err = NULL;
+ bool can_multi;
+ int flags = 0;
+ int ret;
+
+ if (count > proxy->max_xfer_size) {
+ return -EINVAL;
+ }
+
+ if (proxy->flags & VFIO_PROXY_NO_POST) {
+ post = false;
+ }
+
+ if (post) {
+ flags |= VFIO_USER_NO_REPLY;
+ }
+
+ /* write eligible to be in a WRITE_MULTI msg ? */
+ can_multi = (proxy->flags & VFIO_PROXY_USE_MULTI) && post &&
+ count <= VFIO_USER_MULTI_DATA;
+
+ /*
+ * This should be a rare case, so first check without the lock,
+ * if we're wrong, vfio_send_queued() will flush any posted writes
+ * we missed here
+ */
+ if (proxy->wr_multi != NULL ||
+ (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi)) {
+
+ /*
+ * re-check with lock
+ *
+ * if already building a WRITE_MULTI msg,
+ * add this one if possible else flush pending before
+ * sending the current one
+ *
+ * else if outgoing queue is over the highwater,
+ * start a new WRITE_MULTI message
+ */
+ WITH_QEMU_LOCK_GUARD(&proxy->lock) {
+ if (proxy->wr_multi != NULL) {
+ if (can_multi) {
+ vfio_user_add_multi(proxy, index, off, count, data);
+ return count;
+ }
+ vfio_user_flush_multi(proxy);
+ } else if (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi) {
+ vfio_user_create_multi(proxy);
+ vfio_user_add_multi(proxy, index, off, count, data);
+ return count;
+ }
+ }
+ }
+
+ msgp = g_malloc0(size);
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags);
+ msgp->offset = off;
+ msgp->region = index;
+ msgp->count = count;
+ memcpy(&msgp->data, data, count);
+ trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count);
+
+ /* async send will free msg after it's sent */
+ if (post) {
+ if (!vfio_user_send_async(proxy, &msgp->hdr, NULL, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return -EFAULT;
+ }
+
+ return count;
+ }
+
+ if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ g_free(msgp);
+ return -EFAULT;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ ret = -msgp->hdr.error_reply;
+ } else {
+ ret = count;
+ }
+
+ g_free(msgp);
+ return ret;
+}
+
+/*
+ * Socket-based io_ops
+ */
+VFIODeviceIOOps vfio_user_device_io_ops_sock = {
+ .get_region_info = vfio_user_device_io_get_region_info,
+ .get_irq_info = vfio_user_device_io_get_irq_info,
+ .set_irqs = vfio_user_device_io_set_irqs,
+ .region_read = vfio_user_device_io_region_read,
+ .region_write = vfio_user_device_io_region_write,
+
+};
diff --git a/hw/vfio-user/device.h b/hw/vfio-user/device.h
new file mode 100644
index 0000000..d183a39
--- /dev/null
+++ b/hw/vfio-user/device.h
@@ -0,0 +1,24 @@
+#ifndef VFIO_USER_DEVICE_H
+#define VFIO_USER_DEVICE_H
+
+/*
+ * vfio protocol over a UNIX socket device handling.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "linux/vfio.h"
+
+#include "hw/vfio-user/proxy.h"
+
+bool vfio_user_get_device_info(VFIOUserProxy *proxy,
+ struct vfio_device_info *info, Error **errp);
+
+void vfio_user_device_reset(VFIOUserProxy *proxy);
+
+extern VFIODeviceIOOps vfio_user_device_io_ops_sock;
+
+#endif /* VFIO_USER_DEVICE_H */
diff --git a/hw/vfio-user/meson.build b/hw/vfio-user/meson.build
new file mode 100644
index 0000000..2ed0ae5
--- /dev/null
+++ b/hw/vfio-user/meson.build
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+vfio_user_ss = ss.source_set()
+vfio_user_ss.add(files(
+ 'container.c',
+ 'device.c',
+ 'pci.c',
+ 'proxy.c',
+))
+
+system_ss.add_all(when: 'CONFIG_VFIO_USER', if_true: vfio_user_ss)
diff --git a/hw/vfio-user/pci.c b/hw/vfio-user/pci.c
new file mode 100644
index 0000000..b53ed3b
--- /dev/null
+++ b/hw/vfio-user/pci.c
@@ -0,0 +1,480 @@
+/*
+ * vfio PCI device over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <sys/ioctl.h>
+#include "qemu/osdep.h"
+#include "qapi-visit-sockets.h"
+#include "qemu/error-report.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/vfio/pci.h"
+#include "hw/vfio-user/device.h"
+#include "hw/vfio-user/proxy.h"
+
+#define TYPE_VFIO_USER_PCI "vfio-user-pci"
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI)
+
+struct VFIOUserPCIDevice {
+ VFIOPCIDevice parent_obj;
+
+ SocketAddress *socket;
+ bool send_queued; /* all sends are queued */
+ uint32_t wait_time; /* timeout for message replies */
+ bool no_post; /* all region writes are sync */
+};
+
+/*
+ * The server maintains the device's pending interrupts,
+ * via its MSIX table and PBA, so we treat these accesses
+ * like PCI config space and forward them.
+ */
+static uint64_t vfio_user_pba_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ VFIOPCIDevice *vdev = opaque;
+ VFIORegion *region = &vdev->bars[vdev->msix->pba_bar].region;
+ uint64_t data;
+
+ /* server copy is what matters */
+ data = vfio_region_read(region, addr + vdev->msix->pba_offset, size);
+ return data;
+}
+
+static void vfio_user_pba_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ /* dropped */
+}
+
+static const MemoryRegionOps vfio_user_pba_ops = {
+ .read = vfio_user_pba_read,
+ .write = vfio_user_pba_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void vfio_user_msix_setup(VFIOPCIDevice *vdev)
+{
+ MemoryRegion *vfio_reg, *msix_reg, *pba_reg;
+
+ pba_reg = g_new0(MemoryRegion, 1);
+ vdev->msix->pba_region = pba_reg;
+
+ vfio_reg = vdev->bars[vdev->msix->pba_bar].mr;
+ msix_reg = &PCI_DEVICE(vdev)->msix_pba_mmio;
+ memory_region_init_io(pba_reg, OBJECT(vdev), &vfio_user_pba_ops, vdev,
+ "VFIO MSIX PBA", int128_get64(msix_reg->size));
+ memory_region_add_subregion_overlap(vfio_reg, vdev->msix->pba_offset,
+ pba_reg, 1);
+}
+
+static void vfio_user_msix_teardown(VFIOPCIDevice *vdev)
+{
+ MemoryRegion *mr, *sub;
+
+ mr = vdev->bars[vdev->msix->pba_bar].mr;
+ sub = vdev->msix->pba_region;
+ memory_region_del_subregion(mr, sub);
+
+ g_free(vdev->msix->pba_region);
+ vdev->msix->pba_region = NULL;
+}
+
+static void vfio_user_dma_read(VFIOPCIDevice *vdev, VFIOUserDMARW *msg)
+{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+ VFIOUserProxy *proxy = vdev->vbasedev.proxy;
+ VFIOUserDMARW *res;
+ MemTxResult r;
+ size_t size;
+
+ if (msg->hdr.size < sizeof(*msg)) {
+ vfio_user_send_error(proxy, &msg->hdr, EINVAL);
+ return;
+ }
+ if (msg->count > proxy->max_xfer_size) {
+ vfio_user_send_error(proxy, &msg->hdr, E2BIG);
+ return;
+ }
+
+ /* switch to our own message buffer */
+ size = msg->count + sizeof(VFIOUserDMARW);
+ res = g_malloc0(size);
+ memcpy(res, msg, sizeof(*res));
+ g_free(msg);
+
+ r = pci_dma_read(pdev, res->offset, &res->data, res->count);
+
+ switch (r) {
+ case MEMTX_OK:
+ if (res->hdr.flags & VFIO_USER_NO_REPLY) {
+ g_free(res);
+ return;
+ }
+ vfio_user_send_reply(proxy, &res->hdr, size);
+ break;
+ case MEMTX_ERROR:
+ vfio_user_send_error(proxy, &res->hdr, EFAULT);
+ break;
+ case MEMTX_DECODE_ERROR:
+ vfio_user_send_error(proxy, &res->hdr, ENODEV);
+ break;
+ case MEMTX_ACCESS_ERROR:
+ vfio_user_send_error(proxy, &res->hdr, EPERM);
+ break;
+ default:
+ error_printf("vfio_user_dma_read unknown error %d\n", r);
+ vfio_user_send_error(vdev->vbasedev.proxy, &res->hdr, EINVAL);
+ }
+}
+
+static void vfio_user_dma_write(VFIOPCIDevice *vdev, VFIOUserDMARW *msg)
+{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+ VFIOUserProxy *proxy = vdev->vbasedev.proxy;
+ MemTxResult r;
+
+ if (msg->hdr.size < sizeof(*msg)) {
+ vfio_user_send_error(proxy, &msg->hdr, EINVAL);
+ return;
+ }
+ /* make sure transfer count isn't larger than the message data */
+ if (msg->count > msg->hdr.size - sizeof(*msg)) {
+ vfio_user_send_error(proxy, &msg->hdr, E2BIG);
+ return;
+ }
+
+ r = pci_dma_write(pdev, msg->offset, &msg->data, msg->count);
+
+ switch (r) {
+ case MEMTX_OK:
+ if ((msg->hdr.flags & VFIO_USER_NO_REPLY) == 0) {
+ vfio_user_send_reply(proxy, &msg->hdr, sizeof(msg->hdr));
+ } else {
+ g_free(msg);
+ }
+ break;
+ case MEMTX_ERROR:
+ vfio_user_send_error(proxy, &msg->hdr, EFAULT);
+ break;
+ case MEMTX_DECODE_ERROR:
+ vfio_user_send_error(proxy, &msg->hdr, ENODEV);
+ break;
+ case MEMTX_ACCESS_ERROR:
+ vfio_user_send_error(proxy, &msg->hdr, EPERM);
+ break;
+ default:
+ error_printf("vfio_user_dma_write unknown error %d\n", r);
+ vfio_user_send_error(vdev->vbasedev.proxy, &msg->hdr, EINVAL);
+ }
+}
+
+/*
+ * Incoming request message callback.
+ *
+ * Runs off main loop, so BQL held.
+ */
+static void vfio_user_pci_process_req(void *opaque, VFIOUserMsg *msg)
+{
+ VFIOPCIDevice *vdev = opaque;
+ VFIOUserHdr *hdr = msg->hdr;
+
+ /* no incoming PCI requests pass FDs */
+ if (msg->fds != NULL) {
+ vfio_user_send_error(vdev->vbasedev.proxy, hdr, EINVAL);
+ vfio_user_putfds(msg);
+ return;
+ }
+
+ switch (hdr->command) {
+ case VFIO_USER_DMA_READ:
+ vfio_user_dma_read(vdev, (VFIOUserDMARW *)hdr);
+ break;
+ case VFIO_USER_DMA_WRITE:
+ vfio_user_dma_write(vdev, (VFIOUserDMARW *)hdr);
+ break;
+ default:
+ error_printf("vfio_user_pci_process_req unknown cmd %d\n",
+ hdr->command);
+ vfio_user_send_error(vdev->vbasedev.proxy, hdr, ENOSYS);
+ }
+}
+
+/*
+ * Emulated devices don't use host hot reset
+ */
+static void vfio_user_compute_needs_reset(VFIODevice *vbasedev)
+{
+ vbasedev->needs_reset = false;
+}
+
+static Object *vfio_user_pci_get_object(VFIODevice *vbasedev)
+{
+ VFIOUserPCIDevice *vdev = VFIO_USER_PCI(container_of(vbasedev,
+ VFIOPCIDevice,
+ vbasedev));
+
+ return OBJECT(vdev);
+}
+
+static VFIODeviceOps vfio_user_pci_ops = {
+ .vfio_compute_needs_reset = vfio_user_compute_needs_reset,
+ .vfio_eoi = vfio_pci_intx_eoi,
+ .vfio_get_object = vfio_user_pci_get_object,
+ /* No live migration support yet. */
+ .vfio_save_config = NULL,
+ .vfio_load_config = NULL,
+};
+
+static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
+{
+ ERRP_GUARD();
+ VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev);
+ VFIODevice *vbasedev = &vdev->vbasedev;
+ const char *sock_name;
+
+ AddressSpace *as;
+ SocketAddress addr;
+ VFIOUserProxy *proxy;
+
+ if (!udev->socket) {
+ error_setg(errp, "No socket specified");
+ error_append_hint(errp, "e.g. -device '{"
+ "\"driver\":\"vfio-user-pci\", "
+ "\"socket\": {\"path\": \"/tmp/vfio-user.sock\", "
+ "\"type\": \"unix\"}'"
+ "}'\n");
+ return;
+ }
+
+ sock_name = udev->socket->u.q_unix.path;
+
+ vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name);
+
+ memset(&addr, 0, sizeof(addr));
+ addr.type = SOCKET_ADDRESS_TYPE_UNIX;
+ addr.u.q_unix.path = (char *)sock_name;
+ proxy = vfio_user_connect_dev(&addr, errp);
+ if (!proxy) {
+ return;
+ }
+ vbasedev->proxy = proxy;
+ vfio_user_set_handler(vbasedev, vfio_user_pci_process_req, vdev);
+
+ vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name);
+
+ if (udev->send_queued) {
+ proxy->flags |= VFIO_PROXY_FORCE_QUEUED;
+ }
+
+ if (udev->no_post) {
+ proxy->flags |= VFIO_PROXY_NO_POST;
+ }
+
+ /* user specified or 5 sec default */
+ proxy->wait_time = udev->wait_time;
+
+ if (!vfio_user_validate_version(proxy, errp)) {
+ goto error;
+ }
+
+ /*
+ * Use socket-based device I/O instead of vfio kernel driver.
+ */
+ vbasedev->io_ops = &vfio_user_device_io_ops_sock;
+
+ /*
+ * vfio-user devices are effectively mdevs (don't use a host iommu).
+ */
+ vbasedev->mdev = true;
+
+ /*
+ * Enable per-region fds.
+ */
+ vbasedev->use_region_fds = true;
+
+ as = pci_device_iommu_address_space(pdev);
+ if (!vfio_device_attach_by_iommu_type(TYPE_VFIO_IOMMU_USER,
+ vbasedev->name, vbasedev,
+ as, errp)) {
+ goto error;
+ }
+
+ if (!vfio_pci_populate_device(vdev, errp)) {
+ goto error;
+ }
+
+ if (!vfio_pci_config_setup(vdev, errp)) {
+ goto error;
+ }
+
+ /*
+ * vfio_pci_config_setup will have registered the device's BARs
+ * and setup any MSIX BARs, so errors after it succeeds must
+ * use out_teardown
+ */
+
+ if (!vfio_pci_add_capabilities(vdev, errp)) {
+ goto out_teardown;
+ }
+
+ if (vdev->msix != NULL) {
+ vfio_user_msix_setup(vdev);
+ }
+
+ if (!vfio_pci_interrupt_setup(vdev, errp)) {
+ goto out_teardown;
+ }
+
+ vfio_pci_register_err_notifier(vdev);
+ vfio_pci_register_req_notifier(vdev);
+
+ return;
+
+out_teardown:
+ vfio_pci_teardown_msi(vdev);
+ vfio_pci_bars_exit(vdev);
+error:
+ error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+ vfio_pci_put_device(vdev);
+}
+
+static void vfio_user_pci_init(Object *obj)
+{
+ PCIDevice *pci_dev = PCI_DEVICE(obj);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(obj);
+ VFIODevice *vbasedev = &vdev->vbasedev;
+
+ device_add_bootindex_property(obj, &vdev->bootindex,
+ "bootindex", NULL,
+ &pci_dev->qdev);
+ vdev->host.domain = ~0U;
+ vdev->host.bus = ~0U;
+ vdev->host.slot = ~0U;
+ vdev->host.function = ~0U;
+
+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_user_pci_ops,
+ DEVICE(vdev), false);
+
+ vdev->nv_gpudirect_clique = 0xFF;
+
+ /*
+ * QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command
+ * line, therefore, no need to wait to realize like other devices.
+ */
+ pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
+}
+
+static void vfio_user_pci_finalize(Object *obj)
+{
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(obj);
+ VFIODevice *vbasedev = &vdev->vbasedev;
+
+ if (vdev->msix != NULL) {
+ vfio_user_msix_teardown(vdev);
+ }
+
+ vfio_pci_put_device(vdev);
+
+ if (vbasedev->proxy != NULL) {
+ vfio_user_disconnect(vbasedev->proxy);
+ }
+}
+
+static void vfio_user_pci_reset(DeviceState *dev)
+{
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(dev);
+ VFIODevice *vbasedev = &vdev->vbasedev;
+
+ vfio_pci_pre_reset(vdev);
+
+ if (vbasedev->reset_works) {
+ vfio_user_device_reset(vbasedev->proxy);
+ }
+
+ vfio_pci_post_reset(vdev);
+}
+
+static const Property vfio_user_pci_properties[] = {
+ DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice,
+ vendor_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice,
+ device_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
+ sub_vendor_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
+ sub_device_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-pci-class-code", VFIOPCIDevice,
+ class_code, PCI_ANY_ID),
+ DEFINE_PROP_BOOL("x-send-queued", VFIOUserPCIDevice, send_queued, false),
+ DEFINE_PROP_UINT32("x-msg-timeout", VFIOUserPCIDevice, wait_time, 5000),
+ DEFINE_PROP_BOOL("x-no-posted-writes", VFIOUserPCIDevice, no_post, false),
+};
+
+static void vfio_user_pci_set_socket(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ VFIOUserPCIDevice *udev = VFIO_USER_PCI(obj);
+ bool success;
+
+ if (VFIO_PCI_DEVICE(udev)->vbasedev.proxy) {
+ error_setg(errp, "Proxy is connected");
+ return;
+ }
+
+ qapi_free_SocketAddress(udev->socket);
+
+ udev->socket = NULL;
+
+ success = visit_type_SocketAddress(v, name, &udev->socket, errp);
+
+ if (!success) {
+ return;
+ }
+
+ if (udev->socket->type != SOCKET_ADDRESS_TYPE_UNIX) {
+ error_setg(errp, "Unsupported socket type %s",
+ SocketAddressType_str(udev->socket->type));
+ qapi_free_SocketAddress(udev->socket);
+ udev->socket = NULL;
+ return;
+ }
+}
+
+static void vfio_user_pci_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
+
+ device_class_set_legacy_reset(dc, vfio_user_pci_reset);
+ device_class_set_props(dc, vfio_user_pci_properties);
+
+ object_class_property_add(klass, "socket", "SocketAddress", NULL,
+ vfio_user_pci_set_socket, NULL, NULL);
+ object_class_property_set_description(klass, "socket",
+ "SocketAddress (UNIX sockets only)");
+
+ dc->desc = "VFIO over socket PCI device assignment";
+ pdc->realize = vfio_user_pci_realize;
+}
+
+static const TypeInfo vfio_user_pci_info = {
+ .name = TYPE_VFIO_USER_PCI,
+ .parent = TYPE_VFIO_PCI_DEVICE,
+ .instance_size = sizeof(VFIOUserPCIDevice),
+ .class_init = vfio_user_pci_class_init,
+ .instance_init = vfio_user_pci_init,
+ .instance_finalize = vfio_user_pci_finalize,
+};
+
+static void register_vfio_user_dev_type(void)
+{
+ type_register_static(&vfio_user_pci_info);
+}
+
+type_init(register_vfio_user_dev_type)
diff --git a/hw/vfio-user/protocol.h b/hw/vfio-user/protocol.h
new file mode 100644
index 0000000..3249a4a
--- /dev/null
+++ b/hw/vfio-user/protocol.h
@@ -0,0 +1,242 @@
+#ifndef VFIO_USER_PROTOCOL_H
+#define VFIO_USER_PROTOCOL_H
+
+/*
+ * vfio protocol over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * Each message has a standard header that describes the command
+ * being sent, which is almost always a VFIO ioctl().
+ *
+ * The header may be followed by command-specific data, such as the
+ * region and offset info for read and write commands.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+typedef struct {
+ uint16_t id;
+ uint16_t command;
+ uint32_t size;
+ uint32_t flags;
+ uint32_t error_reply;
+} VFIOUserHdr;
+
+/* VFIOUserHdr commands */
+enum vfio_user_command {
+ VFIO_USER_VERSION = 1,
+ VFIO_USER_DMA_MAP = 2,
+ VFIO_USER_DMA_UNMAP = 3,
+ VFIO_USER_DEVICE_GET_INFO = 4,
+ VFIO_USER_DEVICE_GET_REGION_INFO = 5,
+ VFIO_USER_DEVICE_GET_REGION_IO_FDS = 6,
+ VFIO_USER_DEVICE_GET_IRQ_INFO = 7,
+ VFIO_USER_DEVICE_SET_IRQS = 8,
+ VFIO_USER_REGION_READ = 9,
+ VFIO_USER_REGION_WRITE = 10,
+ VFIO_USER_DMA_READ = 11,
+ VFIO_USER_DMA_WRITE = 12,
+ VFIO_USER_DEVICE_RESET = 13,
+ VFIO_USER_DIRTY_PAGES = 14,
+ VFIO_USER_REGION_WRITE_MULTI = 15,
+ VFIO_USER_MAX,
+};
+
+/* VFIOUserHdr flags */
+#define VFIO_USER_REQUEST 0x0
+#define VFIO_USER_REPLY 0x1
+#define VFIO_USER_TYPE 0xF
+
+#define VFIO_USER_NO_REPLY 0x10
+#define VFIO_USER_ERROR 0x20
+
+
+/*
+ * VFIO_USER_VERSION
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint16_t major;
+ uint16_t minor;
+ char capabilities[];
+} VFIOUserVersion;
+
+#define VFIO_USER_MAJOR_VER 0
+#define VFIO_USER_MINOR_VER 0
+
+#define VFIO_USER_CAP "capabilities"
+
+/* "capabilities" members */
+#define VFIO_USER_CAP_MAX_FDS "max_msg_fds"
+#define VFIO_USER_CAP_MAX_XFER "max_data_xfer_size"
+#define VFIO_USER_CAP_PGSIZES "pgsizes"
+#define VFIO_USER_CAP_MAP_MAX "max_dma_maps"
+#define VFIO_USER_CAP_MIGR "migration"
+#define VFIO_USER_CAP_MULTI "write_multiple"
+
+/* "migration" members */
+#define VFIO_USER_CAP_PGSIZE "pgsize"
+#define VFIO_USER_CAP_MAX_BITMAP "max_bitmap_size"
+
+/*
+ * Max FDs mainly comes into play when a device supports multiple interrupts
+ * where each ones uses an eventfd to inject it into the guest.
+ * It is clamped by the the number of FDs the qio channel supports in a
+ * single message.
+ */
+#define VFIO_USER_DEF_MAX_FDS 8
+#define VFIO_USER_MAX_MAX_FDS 16
+
+/*
+ * Max transfer limits the amount of data in region and DMA messages.
+ * Region R/W will be very small (limited by how much a single instruction
+ * can process) so just use a reasonable limit here.
+ */
+#define VFIO_USER_DEF_MAX_XFER (1024 * 1024)
+#define VFIO_USER_MAX_MAX_XFER (64 * 1024 * 1024)
+
+/*
+ * Default pagesizes supported is 4k.
+ */
+#define VFIO_USER_DEF_PGSIZE 4096
+
+/*
+ * Default max number of DMA mappings is stolen from the
+ * linux kernel "dma_entry_limit"
+ */
+#define VFIO_USER_DEF_MAP_MAX 65535
+
+/*
+ * Default max bitmap size is also take from the linux kernel,
+ * where usage of signed ints limits the VA range to 2^31 bytes.
+ * Dividing that by the number of bits per byte yields 256MB
+ */
+#define VFIO_USER_DEF_MAX_BITMAP (256 * 1024 * 1024)
+
+/*
+ * VFIO_USER_DMA_MAP
+ * imported from struct vfio_iommu_type1_dma_map
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint64_t offset; /* FD offset */
+ uint64_t iova;
+ uint64_t size;
+} VFIOUserDMAMap;
+
+/*
+ * VFIO_USER_DMA_UNMAP
+ * imported from struct vfio_iommu_type1_dma_unmap
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint64_t iova;
+ uint64_t size;
+} VFIOUserDMAUnmap;
+
+/*
+ * VFIO_USER_DEVICE_GET_INFO
+ * imported from struct vfio_device_info
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint32_t num_regions;
+ uint32_t num_irqs;
+} VFIOUserDeviceInfo;
+
+/*
+ * VFIO_USER_DEVICE_GET_REGION_INFO
+ * imported from struct vfio_region_info
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint32_t index;
+ uint32_t cap_offset;
+ uint64_t size;
+ uint64_t offset;
+} VFIOUserRegionInfo;
+
+/*
+ * VFIO_USER_DEVICE_GET_IRQ_INFO
+ * imported from struct vfio_irq_info
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint32_t index;
+ uint32_t count;
+} VFIOUserIRQInfo;
+
+/*
+ * VFIO_USER_DEVICE_SET_IRQS
+ * imported from struct vfio_irq_set
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint32_t index;
+ uint32_t start;
+ uint32_t count;
+} VFIOUserIRQSet;
+
+/*
+ * VFIO_USER_REGION_READ
+ * VFIO_USER_REGION_WRITE
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint64_t offset;
+ uint32_t region;
+ uint32_t count;
+ char data[];
+} VFIOUserRegionRW;
+
+/*
+ * VFIO_USER_DMA_READ
+ * VFIO_USER_DMA_WRITE
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint64_t offset;
+ uint32_t count;
+ char data[];
+} VFIOUserDMARW;
+
+/* imported from struct vfio_bitmap */
+typedef struct {
+ uint64_t pgsize;
+ uint64_t size;
+ char data[];
+} VFIOUserBitmap;
+
+/*
+ * VFIO_USER_REGION_WRITE_MULTI
+ */
+#define VFIO_USER_MULTI_DATA 8
+#define VFIO_USER_MULTI_MAX 200
+
+typedef struct {
+ uint64_t offset;
+ uint32_t region;
+ uint32_t count;
+ char data[VFIO_USER_MULTI_DATA];
+} VFIOUserWROne;
+
+typedef struct {
+ VFIOUserHdr hdr;
+ uint64_t wr_cnt;
+ VFIOUserWROne wrs[VFIO_USER_MULTI_MAX];
+} VFIOUserWRMulti;
+
+#endif /* VFIO_USER_PROTOCOL_H */
diff --git a/hw/vfio-user/proxy.c b/hw/vfio-user/proxy.c
new file mode 100644
index 0000000..bbd7ec2
--- /dev/null
+++ b/hw/vfio-user/proxy.c
@@ -0,0 +1,1363 @@
+/*
+ * vfio protocol over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include <sys/ioctl.h>
+
+#include "hw/vfio/vfio-device.h"
+#include "hw/vfio-user/proxy.h"
+#include "hw/vfio-user/trace.h"
+#include "qapi/error.h"
+#include "qobject/qbool.h"
+#include "qobject/qdict.h"
+#include "qobject/qjson.h"
+#include "qobject/qnum.h"
+#include "qemu/error-report.h"
+#include "qemu/lockable.h"
+#include "qemu/main-loop.h"
+#include "qemu/thread.h"
+#include "system/iothread.h"
+
+static IOThread *vfio_user_iothread;
+
+static void vfio_user_shutdown(VFIOUserProxy *proxy);
+static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds);
+static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg);
+
+static void vfio_user_recv(void *opaque);
+static void vfio_user_send(void *opaque);
+
+static void vfio_user_request(void *opaque);
+
+static inline void vfio_user_set_error(VFIOUserHdr *hdr, uint32_t err)
+{
+ hdr->flags |= VFIO_USER_ERROR;
+ hdr->error_reply = err;
+}
+
+/*
+ * Functions called by main, CPU, or iothread threads
+ */
+
+static void vfio_user_shutdown(VFIOUserProxy *proxy)
+{
+ qio_channel_shutdown(proxy->ioc, QIO_CHANNEL_SHUTDOWN_READ, NULL);
+ qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, NULL,
+ proxy->ctx, NULL, NULL);
+}
+
+/*
+ * Same return values as qio_channel_writev_full():
+ *
+ * QIO_CHANNEL_ERR_BLOCK: *errp not set
+ * -1: *errp will be populated
+ * otherwise: bytes written
+ */
+static ssize_t vfio_user_send_qio(VFIOUserProxy *proxy, VFIOUserMsg *msg,
+ Error **errp)
+{
+ VFIOUserFDs *fds = msg->fds;
+ struct iovec iov = {
+ .iov_base = msg->hdr,
+ .iov_len = msg->hdr->size,
+ };
+ size_t numfds = 0;
+ int *fdp = NULL;
+ ssize_t ret;
+
+ if (fds != NULL && fds->send_fds != 0) {
+ numfds = fds->send_fds;
+ fdp = fds->fds;
+ }
+
+ ret = qio_channel_writev_full(proxy->ioc, &iov, 1, fdp, numfds, 0, errp);
+
+ if (ret == -1) {
+ vfio_user_set_error(msg->hdr, EIO);
+ vfio_user_shutdown(proxy);
+ }
+ trace_vfio_user_send_write(msg->hdr->id, ret);
+
+ return ret;
+}
+
+static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds)
+{
+ VFIOUserMsg *msg;
+
+ msg = QTAILQ_FIRST(&proxy->free);
+ if (msg != NULL) {
+ QTAILQ_REMOVE(&proxy->free, msg, next);
+ } else {
+ msg = g_malloc0(sizeof(*msg));
+ qemu_cond_init(&msg->cv);
+ }
+
+ msg->hdr = hdr;
+ msg->fds = fds;
+ return msg;
+}
+
+/*
+ * Recycle a message list entry to the free list.
+ */
+static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg)
+{
+ if (msg->type == VFIO_MSG_NONE) {
+ error_printf("vfio_user_recycle - freeing free msg\n");
+ return;
+ }
+
+ /* free msg buffer if no one is waiting to consume the reply */
+ if (msg->type == VFIO_MSG_NOWAIT || msg->type == VFIO_MSG_ASYNC) {
+ g_free(msg->hdr);
+ if (msg->fds != NULL) {
+ g_free(msg->fds);
+ }
+ }
+
+ msg->type = VFIO_MSG_NONE;
+ msg->hdr = NULL;
+ msg->fds = NULL;
+ msg->complete = false;
+ msg->pending = false;
+ QTAILQ_INSERT_HEAD(&proxy->free, msg, next);
+}
+
+VFIOUserFDs *vfio_user_getfds(int numfds)
+{
+ VFIOUserFDs *fds = g_malloc0(sizeof(*fds) + (numfds * sizeof(int)));
+
+ fds->fds = (int *)((char *)fds + sizeof(*fds));
+
+ return fds;
+}
+
+/*
+ * Functions only called by iothread
+ */
+
+/*
+ * Process a received message.
+ */
+static void vfio_user_process(VFIOUserProxy *proxy, VFIOUserMsg *msg,
+ bool isreply)
+{
+
+ /*
+ * Replies signal a waiter, if none just check for errors
+ * and free the message buffer.
+ *
+ * Requests get queued for the BH.
+ */
+ if (isreply) {
+ msg->complete = true;
+ if (msg->type == VFIO_MSG_WAIT) {
+ qemu_cond_signal(&msg->cv);
+ } else {
+ if (msg->hdr->flags & VFIO_USER_ERROR) {
+ error_printf("vfio_user_process: error reply on async ");
+ error_printf("request command %x error %s\n",
+ msg->hdr->command,
+ strerror(msg->hdr->error_reply));
+ }
+ /* youngest nowait msg has been ack'd */
+ if (proxy->last_nowait == msg) {
+ proxy->last_nowait = NULL;
+ }
+ vfio_user_recycle(proxy, msg);
+ }
+ } else {
+ QTAILQ_INSERT_TAIL(&proxy->incoming, msg, next);
+ qemu_bh_schedule(proxy->req_bh);
+ }
+}
+
+/*
+ * Complete a partial message read
+ */
+static int vfio_user_complete(VFIOUserProxy *proxy, Error **errp)
+{
+ VFIOUserMsg *msg = proxy->part_recv;
+ size_t msgleft = proxy->recv_left;
+ bool isreply;
+ char *data;
+ int ret;
+
+ data = (char *)msg->hdr + (msg->hdr->size - msgleft);
+ while (msgleft > 0) {
+ ret = qio_channel_read(proxy->ioc, data, msgleft, errp);
+
+ /* error or would block */
+ if (ret <= 0) {
+ /* try for rest on next iternation */
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
+ proxy->recv_left = msgleft;
+ }
+ return ret;
+ }
+ trace_vfio_user_recv_read(msg->hdr->id, ret);
+
+ msgleft -= ret;
+ data += ret;
+ }
+
+ /*
+ * Read complete message, process it.
+ */
+ proxy->part_recv = NULL;
+ proxy->recv_left = 0;
+ isreply = (msg->hdr->flags & VFIO_USER_TYPE) == VFIO_USER_REPLY;
+ vfio_user_process(proxy, msg, isreply);
+
+ /* return positive value */
+ return 1;
+}
+
+/*
+ * Receive and process one incoming message.
+ *
+ * For replies, find matching outgoing request and wake any waiters.
+ * For requests, queue in incoming list and run request BH.
+ */
+static int vfio_user_recv_one(VFIOUserProxy *proxy, Error **errp)
+{
+ VFIOUserMsg *msg = NULL;
+ g_autofree int *fdp = NULL;
+ VFIOUserFDs *reqfds;
+ VFIOUserHdr hdr;
+ struct iovec iov = {
+ .iov_base = &hdr,
+ .iov_len = sizeof(hdr),
+ };
+ bool isreply = false;
+ int i, ret;
+ size_t msgleft, numfds = 0;
+ char *data = NULL;
+ char *buf = NULL;
+
+ /*
+ * Complete any partial reads
+ */
+ if (proxy->part_recv != NULL) {
+ ret = vfio_user_complete(proxy, errp);
+
+ /* still not complete, try later */
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
+ return ret;
+ }
+
+ if (ret <= 0) {
+ goto fatal;
+ }
+ /* else fall into reading another msg */
+ }
+
+ /*
+ * Read header
+ */
+ ret = qio_channel_readv_full(proxy->ioc, &iov, 1, &fdp, &numfds, 0,
+ errp);
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
+ return ret;
+ }
+
+ /* read error or other side closed connection */
+ if (ret <= 0) {
+ goto fatal;
+ }
+
+ if (ret < sizeof(hdr)) {
+ error_setg(errp, "short read of header");
+ goto fatal;
+ }
+
+ /*
+ * Validate header
+ */
+ if (hdr.size < sizeof(VFIOUserHdr)) {
+ error_setg(errp, "bad header size");
+ goto fatal;
+ }
+ switch (hdr.flags & VFIO_USER_TYPE) {
+ case VFIO_USER_REQUEST:
+ isreply = false;
+ break;
+ case VFIO_USER_REPLY:
+ isreply = true;
+ break;
+ default:
+ error_setg(errp, "unknown message type");
+ goto fatal;
+ }
+ trace_vfio_user_recv_hdr(proxy->sockname, hdr.id, hdr.command, hdr.size,
+ hdr.flags);
+
+ /*
+ * For replies, find the matching pending request.
+ * For requests, reap incoming FDs.
+ */
+ if (isreply) {
+ QTAILQ_FOREACH(msg, &proxy->pending, next) {
+ if (hdr.id == msg->id) {
+ break;
+ }
+ }
+ if (msg == NULL) {
+ error_setg(errp, "unexpected reply");
+ goto err;
+ }
+ QTAILQ_REMOVE(&proxy->pending, msg, next);
+
+ /*
+ * Process any received FDs
+ */
+ if (numfds != 0) {
+ if (msg->fds == NULL || msg->fds->recv_fds < numfds) {
+ error_setg(errp, "unexpected FDs");
+ goto err;
+ }
+ msg->fds->recv_fds = numfds;
+ memcpy(msg->fds->fds, fdp, numfds * sizeof(int));
+ }
+ } else {
+ if (numfds != 0) {
+ reqfds = vfio_user_getfds(numfds);
+ memcpy(reqfds->fds, fdp, numfds * sizeof(int));
+ } else {
+ reqfds = NULL;
+ }
+ }
+
+ /*
+ * Put the whole message into a single buffer.
+ */
+ if (isreply) {
+ if (hdr.size > msg->rsize) {
+ error_setg(errp, "reply larger than recv buffer");
+ goto err;
+ }
+ *msg->hdr = hdr;
+ data = (char *)msg->hdr + sizeof(hdr);
+ } else {
+ if (hdr.size > proxy->max_xfer_size + sizeof(VFIOUserDMARW)) {
+ error_setg(errp, "vfio_user_recv request larger than max");
+ goto err;
+ }
+ buf = g_malloc0(hdr.size);
+ memcpy(buf, &hdr, sizeof(hdr));
+ data = buf + sizeof(hdr);
+ msg = vfio_user_getmsg(proxy, (VFIOUserHdr *)buf, reqfds);
+ msg->type = VFIO_MSG_REQ;
+ }
+
+ /*
+ * Read rest of message.
+ */
+ msgleft = hdr.size - sizeof(hdr);
+ while (msgleft > 0) {
+ ret = qio_channel_read(proxy->ioc, data, msgleft, errp);
+
+ /* prepare to complete read on next iternation */
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
+ proxy->part_recv = msg;
+ proxy->recv_left = msgleft;
+ return ret;
+ }
+
+ if (ret <= 0) {
+ goto fatal;
+ }
+ trace_vfio_user_recv_read(hdr.id, ret);
+
+ msgleft -= ret;
+ data += ret;
+ }
+
+ vfio_user_process(proxy, msg, isreply);
+ return 0;
+
+ /*
+ * fatal means the other side closed or we don't trust the stream
+ * err means this message is corrupt
+ */
+fatal:
+ vfio_user_shutdown(proxy);
+ proxy->state = VFIO_PROXY_ERROR;
+
+ /* set error if server side closed */
+ if (ret == 0) {
+ error_setg(errp, "server closed socket");
+ }
+
+err:
+ for (i = 0; i < numfds; i++) {
+ close(fdp[i]);
+ }
+ if (isreply && msg != NULL) {
+ /* force an error to keep sending thread from hanging */
+ vfio_user_set_error(msg->hdr, EINVAL);
+ msg->complete = true;
+ qemu_cond_signal(&msg->cv);
+ }
+ return -1;
+}
+
+static void vfio_user_recv(void *opaque)
+{
+ VFIOUserProxy *proxy = opaque;
+
+ QEMU_LOCK_GUARD(&proxy->lock);
+
+ if (proxy->state == VFIO_PROXY_CONNECTED) {
+ Error *local_err = NULL;
+
+ while (vfio_user_recv_one(proxy, &local_err) == 0) {
+ ;
+ }
+
+ if (local_err != NULL) {
+ error_report_err(local_err);
+ }
+ }
+}
+
+/*
+ * Send a single message, same return semantics as vfio_user_send_qio().
+ *
+ * Sent async messages are freed, others are moved to pending queue.
+ */
+static ssize_t vfio_user_send_one(VFIOUserProxy *proxy, Error **errp)
+{
+ VFIOUserMsg *msg;
+ ssize_t ret;
+
+ msg = QTAILQ_FIRST(&proxy->outgoing);
+ ret = vfio_user_send_qio(proxy, msg, errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ QTAILQ_REMOVE(&proxy->outgoing, msg, next);
+ proxy->num_outgoing--;
+ if (msg->type == VFIO_MSG_ASYNC) {
+ vfio_user_recycle(proxy, msg);
+ } else {
+ QTAILQ_INSERT_TAIL(&proxy->pending, msg, next);
+ msg->pending = true;
+ }
+
+ return ret;
+}
+
+/*
+ * Send messages from outgoing queue when the socket buffer has space.
+ * If we deplete 'outgoing', remove ourselves from the poll list.
+ */
+static void vfio_user_send(void *opaque)
+{
+ VFIOUserProxy *proxy = opaque;
+
+ QEMU_LOCK_GUARD(&proxy->lock);
+
+ if (proxy->state == VFIO_PROXY_CONNECTED) {
+ while (!QTAILQ_EMPTY(&proxy->outgoing)) {
+ Error *local_err = NULL;
+ int ret;
+
+ ret = vfio_user_send_one(proxy, &local_err);
+
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
+ return;
+ } else if (ret == -1) {
+ error_report_err(local_err);
+ return;
+ }
+ }
+ qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx,
+ vfio_user_recv, NULL, NULL, proxy);
+
+ /* queue empty - send any pending multi write msgs */
+ if (proxy->wr_multi != NULL) {
+ vfio_user_flush_multi(proxy);
+ }
+ }
+}
+
+static void vfio_user_close_cb(void *opaque)
+{
+ VFIOUserProxy *proxy = opaque;
+
+ QEMU_LOCK_GUARD(&proxy->lock);
+
+ proxy->state = VFIO_PROXY_CLOSED;
+ qemu_cond_signal(&proxy->close_cv);
+}
+
+
+/*
+ * Functions called by main or CPU threads
+ */
+
+/*
+ * Process incoming requests.
+ *
+ * The bus-specific callback has the form:
+ * request(opaque, msg)
+ * where 'opaque' was specified in vfio_user_set_handler
+ * and 'msg' is the inbound message.
+ *
+ * The callback is responsible for disposing of the message buffer,
+ * usually by re-using it when calling vfio_send_reply or vfio_send_error,
+ * both of which free their message buffer when the reply is sent.
+ *
+ * If the callback uses a new buffer, it needs to free the old one.
+ */
+static void vfio_user_request(void *opaque)
+{
+ VFIOUserProxy *proxy = opaque;
+ VFIOUserMsgQ new, free;
+ VFIOUserMsg *msg, *m1;
+
+ /* reap all incoming */
+ QTAILQ_INIT(&new);
+ WITH_QEMU_LOCK_GUARD(&proxy->lock) {
+ QTAILQ_FOREACH_SAFE(msg, &proxy->incoming, next, m1) {
+ QTAILQ_REMOVE(&proxy->incoming, msg, next);
+ QTAILQ_INSERT_TAIL(&new, msg, next);
+ }
+ }
+
+ /* process list */
+ QTAILQ_INIT(&free);
+ QTAILQ_FOREACH_SAFE(msg, &new, next, m1) {
+ QTAILQ_REMOVE(&new, msg, next);
+ trace_vfio_user_recv_request(msg->hdr->command);
+ proxy->request(proxy->req_arg, msg);
+ QTAILQ_INSERT_HEAD(&free, msg, next);
+ }
+
+ /* free list */
+ WITH_QEMU_LOCK_GUARD(&proxy->lock) {
+ QTAILQ_FOREACH_SAFE(msg, &free, next, m1) {
+ vfio_user_recycle(proxy, msg);
+ }
+ }
+}
+
+/*
+ * Messages are queued onto the proxy's outgoing list.
+ *
+ * It handles 3 types of messages:
+ *
+ * async messages - replies and posted writes
+ *
+ * There will be no reply from the server, so message
+ * buffers are freed after they're sent.
+ *
+ * nowait messages - map/unmap during address space transactions
+ *
+ * These are also sent async, but a reply is expected so that
+ * vfio_wait_reqs() can wait for the youngest nowait request.
+ * They transition from the outgoing list to the pending list
+ * when sent, and are freed when the reply is received.
+ *
+ * wait messages - all other requests
+ *
+ * The reply to these messages is waited for by their caller.
+ * They also transition from outgoing to pending when sent, but
+ * the message buffer is returned to the caller with the reply
+ * contents. The caller is responsible for freeing these messages.
+ *
+ * As an optimization, if the outgoing list and the socket send
+ * buffer are empty, the message is sent inline instead of being
+ * added to the outgoing list. The rest of the transitions are
+ * unchanged.
+ */
+static bool vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg,
+ Error **errp)
+{
+ int ret;
+
+ /* older coalesced writes go first */
+ if (proxy->wr_multi != NULL &&
+ ((msg->hdr->flags & VFIO_USER_TYPE) == VFIO_USER_REQUEST)) {
+ vfio_user_flush_multi(proxy);
+ }
+
+ /*
+ * Unsent outgoing msgs - add to tail
+ */
+ if (!QTAILQ_EMPTY(&proxy->outgoing)) {
+ QTAILQ_INSERT_TAIL(&proxy->outgoing, msg, next);
+ proxy->num_outgoing++;
+ return true;
+ }
+
+ /*
+ * Try inline - if blocked, queue it and kick send poller
+ */
+ if (proxy->flags & VFIO_PROXY_FORCE_QUEUED) {
+ ret = QIO_CHANNEL_ERR_BLOCK;
+ } else {
+ ret = vfio_user_send_qio(proxy, msg, errp);
+ }
+
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
+ QTAILQ_INSERT_HEAD(&proxy->outgoing, msg, next);
+ proxy->num_outgoing = 1;
+ qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx,
+ vfio_user_recv, proxy->ctx,
+ vfio_user_send, proxy);
+ return true;
+ }
+ if (ret == -1) {
+ return false;
+ }
+
+ /*
+ * Sent - free async, add others to pending
+ */
+ if (msg->type == VFIO_MSG_ASYNC) {
+ vfio_user_recycle(proxy, msg);
+ } else {
+ QTAILQ_INSERT_TAIL(&proxy->pending, msg, next);
+ msg->pending = true;
+ }
+
+ return true;
+}
+
+/*
+ * nowait send - vfio_wait_reqs() can wait for it later
+ *
+ * Returns false if we did not successfully receive a reply message, in which
+ * case @errp will be populated.
+ *
+ * In either case, ownership of @hdr and @fds is taken, and the caller must
+ * *not* free them itself.
+ */
+bool vfio_user_send_nowait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds, int rsize, Error **errp)
+{
+ VFIOUserMsg *msg;
+
+ QEMU_LOCK_GUARD(&proxy->lock);
+
+ msg = vfio_user_getmsg(proxy, hdr, fds);
+ msg->id = hdr->id;
+ msg->rsize = rsize ? rsize : hdr->size;
+ msg->type = VFIO_MSG_NOWAIT;
+
+ if (hdr->flags & VFIO_USER_NO_REPLY) {
+ error_setg_errno(errp, EINVAL, "%s on NO_REPLY message", __func__);
+ vfio_user_recycle(proxy, msg);
+ return false;
+ }
+
+ if (!vfio_user_send_queued(proxy, msg, errp)) {
+ vfio_user_recycle(proxy, msg);
+ return false;
+ }
+
+ proxy->last_nowait = msg;
+
+ return true;
+}
+
+/*
+ * Returns false if we did not successfully receive a reply message, in which
+ * case @errp will be populated.
+ *
+ * In either case, the caller must free @hdr and @fds if needed.
+ */
+bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds, int rsize, Error **errp)
+{
+ VFIOUserMsg *msg;
+ bool ok = false;
+
+ if (hdr->flags & VFIO_USER_NO_REPLY) {
+ error_setg_errno(errp, EINVAL, "%s on NO_REPLY message", __func__);
+ return false;
+ }
+
+ qemu_mutex_lock(&proxy->lock);
+
+ msg = vfio_user_getmsg(proxy, hdr, fds);
+ msg->id = hdr->id;
+ msg->rsize = rsize ? rsize : hdr->size;
+ msg->type = VFIO_MSG_WAIT;
+
+ ok = vfio_user_send_queued(proxy, msg, errp);
+
+ if (ok) {
+ while (!msg->complete) {
+ if (!qemu_cond_timedwait(&msg->cv, &proxy->lock,
+ proxy->wait_time)) {
+ VFIOUserMsgQ *list;
+
+ list = msg->pending ? &proxy->pending : &proxy->outgoing;
+ QTAILQ_REMOVE(list, msg, next);
+ error_setg_errno(errp, ETIMEDOUT,
+ "timed out waiting for reply");
+ ok = false;
+ break;
+ }
+ }
+ }
+
+ vfio_user_recycle(proxy, msg);
+
+ qemu_mutex_unlock(&proxy->lock);
+
+ return ok;
+}
+
+/*
+ * async send - msg can be queued, but will be freed when sent
+ *
+ * Returns false on failure, in which case @errp will be populated.
+ *
+ * In either case, ownership of @hdr and @fds is taken, and the caller must
+ * *not* free them itself.
+ */
+bool vfio_user_send_async(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds, Error **errp)
+{
+ VFIOUserMsg *msg;
+
+ QEMU_LOCK_GUARD(&proxy->lock);
+
+ msg = vfio_user_getmsg(proxy, hdr, fds);
+ msg->id = hdr->id;
+ msg->rsize = 0;
+ msg->type = VFIO_MSG_ASYNC;
+
+ if (!(hdr->flags & (VFIO_USER_NO_REPLY | VFIO_USER_REPLY))) {
+ error_setg_errno(errp, EINVAL, "%s on sync message", __func__);
+ vfio_user_recycle(proxy, msg);
+ return false;
+ }
+
+ if (!vfio_user_send_queued(proxy, msg, errp)) {
+ vfio_user_recycle(proxy, msg);
+ return false;
+ }
+
+ return true;
+}
+
+void vfio_user_wait_reqs(VFIOUserProxy *proxy)
+{
+ VFIOUserMsg *msg;
+
+ /*
+ * Any DMA map/unmap requests sent in the middle
+ * of a memory region transaction were sent nowait.
+ * Wait for them here.
+ */
+ qemu_mutex_lock(&proxy->lock);
+ if (proxy->last_nowait != NULL) {
+ /*
+ * Change type to WAIT to wait for reply
+ */
+ msg = proxy->last_nowait;
+ msg->type = VFIO_MSG_WAIT;
+ proxy->last_nowait = NULL;
+ while (!msg->complete) {
+ if (!qemu_cond_timedwait(&msg->cv, &proxy->lock,
+ proxy->wait_time)) {
+ VFIOUserMsgQ *list;
+
+ list = msg->pending ? &proxy->pending : &proxy->outgoing;
+ QTAILQ_REMOVE(list, msg, next);
+ error_printf("vfio_wait_reqs - timed out\n");
+ break;
+ }
+ }
+
+ if (msg->hdr->flags & VFIO_USER_ERROR) {
+ error_printf("vfio_user_wait_reqs - error reply on async ");
+ error_printf("request: command %x error %s\n", msg->hdr->command,
+ strerror(msg->hdr->error_reply));
+ }
+
+ /*
+ * Change type back to NOWAIT to free
+ */
+ msg->type = VFIO_MSG_NOWAIT;
+ vfio_user_recycle(proxy, msg);
+ }
+
+ qemu_mutex_unlock(&proxy->lock);
+}
+
+/*
+ * Reply to an incoming request.
+ */
+void vfio_user_send_reply(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int size)
+{
+ Error *local_err = NULL;
+
+ if (size < sizeof(VFIOUserHdr)) {
+ error_printf("%s: size too small", __func__);
+ g_free(hdr);
+ return;
+ }
+
+ /*
+ * convert header to associated reply
+ */
+ hdr->flags = VFIO_USER_REPLY;
+ hdr->size = size;
+
+ if (!vfio_user_send_async(proxy, hdr, NULL, &local_err)) {
+ error_report_err(local_err);
+ }
+}
+
+/*
+ * Send an error reply to an incoming request.
+ */
+void vfio_user_send_error(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int error)
+{
+ Error *local_err = NULL;
+
+ /*
+ * convert header to associated reply
+ */
+ hdr->flags = VFIO_USER_REPLY;
+ hdr->flags |= VFIO_USER_ERROR;
+ hdr->error_reply = error;
+ hdr->size = sizeof(*hdr);
+
+ if (!vfio_user_send_async(proxy, hdr, NULL, &local_err)) {
+ error_report_err(local_err);
+ }
+}
+
+/*
+ * Close FDs erroneously received in an incoming request.
+ */
+void vfio_user_putfds(VFIOUserMsg *msg)
+{
+ VFIOUserFDs *fds = msg->fds;
+ int i;
+
+ for (i = 0; i < fds->recv_fds; i++) {
+ close(fds->fds[i]);
+ }
+ g_free(fds);
+ msg->fds = NULL;
+}
+
+void
+vfio_user_disable_posted_writes(VFIOUserProxy *proxy)
+{
+ WITH_QEMU_LOCK_GUARD(&proxy->lock) {
+ proxy->flags |= VFIO_PROXY_NO_POST;
+ }
+}
+
+static QLIST_HEAD(, VFIOUserProxy) vfio_user_sockets =
+ QLIST_HEAD_INITIALIZER(vfio_user_sockets);
+
+VFIOUserProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp)
+{
+ VFIOUserProxy *proxy;
+ QIOChannelSocket *sioc;
+ QIOChannel *ioc;
+ char *sockname;
+
+ if (addr->type != SOCKET_ADDRESS_TYPE_UNIX) {
+ error_setg(errp, "vfio_user_connect - bad address family");
+ return NULL;
+ }
+ sockname = addr->u.q_unix.path;
+
+ sioc = qio_channel_socket_new();
+ ioc = QIO_CHANNEL(sioc);
+ if (qio_channel_socket_connect_sync(sioc, addr, errp) < 0) {
+ goto fail;
+ }
+ if (!qio_channel_set_blocking(ioc, false, errp)) {
+ goto fail;
+ }
+
+ proxy = g_malloc0(sizeof(VFIOUserProxy));
+ proxy->sockname = g_strdup_printf("unix:%s", sockname);
+ proxy->ioc = ioc;
+
+ /* init defaults */
+ proxy->max_xfer_size = VFIO_USER_DEF_MAX_XFER;
+ proxy->max_send_fds = VFIO_USER_DEF_MAX_FDS;
+ proxy->max_dma = VFIO_USER_DEF_MAP_MAX;
+ proxy->dma_pgsizes = VFIO_USER_DEF_PGSIZE;
+ proxy->max_bitmap = VFIO_USER_DEF_MAX_BITMAP;
+ proxy->migr_pgsize = VFIO_USER_DEF_PGSIZE;
+
+ proxy->flags = VFIO_PROXY_CLIENT;
+ proxy->state = VFIO_PROXY_CONNECTED;
+
+ qemu_mutex_init(&proxy->lock);
+ qemu_cond_init(&proxy->close_cv);
+
+ if (vfio_user_iothread == NULL) {
+ vfio_user_iothread = iothread_create("VFIO user", errp);
+ }
+
+ proxy->ctx = iothread_get_aio_context(vfio_user_iothread);
+ proxy->req_bh = qemu_bh_new(vfio_user_request, proxy);
+
+ QTAILQ_INIT(&proxy->outgoing);
+ QTAILQ_INIT(&proxy->incoming);
+ QTAILQ_INIT(&proxy->free);
+ QTAILQ_INIT(&proxy->pending);
+ QLIST_INSERT_HEAD(&vfio_user_sockets, proxy, next);
+
+ return proxy;
+
+fail:
+ object_unref(OBJECT(ioc));
+ return NULL;
+}
+
+void vfio_user_set_handler(VFIODevice *vbasedev,
+ void (*handler)(void *opaque, VFIOUserMsg *msg),
+ void *req_arg)
+{
+ VFIOUserProxy *proxy = vbasedev->proxy;
+
+ proxy->request = handler;
+ proxy->req_arg = req_arg;
+ qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx,
+ vfio_user_recv, NULL, NULL, proxy);
+}
+
+void vfio_user_disconnect(VFIOUserProxy *proxy)
+{
+ VFIOUserMsg *r1, *r2;
+
+ qemu_mutex_lock(&proxy->lock);
+
+ /* our side is quitting */
+ if (proxy->state == VFIO_PROXY_CONNECTED) {
+ vfio_user_shutdown(proxy);
+ if (!QTAILQ_EMPTY(&proxy->pending)) {
+ error_printf("vfio_user_disconnect: outstanding requests\n");
+ }
+ }
+ object_unref(OBJECT(proxy->ioc));
+ proxy->ioc = NULL;
+ qemu_bh_delete(proxy->req_bh);
+ proxy->req_bh = NULL;
+
+ proxy->state = VFIO_PROXY_CLOSING;
+ QTAILQ_FOREACH_SAFE(r1, &proxy->outgoing, next, r2) {
+ qemu_cond_destroy(&r1->cv);
+ QTAILQ_REMOVE(&proxy->outgoing, r1, next);
+ g_free(r1);
+ }
+ QTAILQ_FOREACH_SAFE(r1, &proxy->incoming, next, r2) {
+ qemu_cond_destroy(&r1->cv);
+ QTAILQ_REMOVE(&proxy->incoming, r1, next);
+ g_free(r1);
+ }
+ QTAILQ_FOREACH_SAFE(r1, &proxy->pending, next, r2) {
+ qemu_cond_destroy(&r1->cv);
+ QTAILQ_REMOVE(&proxy->pending, r1, next);
+ g_free(r1);
+ }
+ QTAILQ_FOREACH_SAFE(r1, &proxy->free, next, r2) {
+ qemu_cond_destroy(&r1->cv);
+ QTAILQ_REMOVE(&proxy->free, r1, next);
+ g_free(r1);
+ }
+
+ /*
+ * Make sure the iothread isn't blocking anywhere
+ * with a ref to this proxy by waiting for a BH
+ * handler to run after the proxy fd handlers were
+ * deleted above.
+ */
+ aio_bh_schedule_oneshot(proxy->ctx, vfio_user_close_cb, proxy);
+
+ while (proxy->state != VFIO_PROXY_CLOSED) {
+ qemu_cond_wait(&proxy->close_cv, &proxy->lock);
+ }
+
+ /* we now hold the only ref to proxy */
+ qemu_mutex_unlock(&proxy->lock);
+ qemu_cond_destroy(&proxy->close_cv);
+ qemu_mutex_destroy(&proxy->lock);
+
+ QLIST_REMOVE(proxy, next);
+ if (QLIST_EMPTY(&vfio_user_sockets)) {
+ iothread_destroy(vfio_user_iothread);
+ vfio_user_iothread = NULL;
+ }
+
+ g_free(proxy->sockname);
+ g_free(proxy);
+}
+
+void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd,
+ uint32_t size, uint32_t flags)
+{
+ static uint16_t next_id;
+
+ hdr->id = qatomic_fetch_inc(&next_id);
+ hdr->command = cmd;
+ hdr->size = size;
+ hdr->flags = (flags & ~VFIO_USER_TYPE) | VFIO_USER_REQUEST;
+ hdr->error_reply = 0;
+}
+
+struct cap_entry {
+ const char *name;
+ bool (*check)(VFIOUserProxy *proxy, QObject *qobj, Error **errp);
+};
+
+static bool caps_parse(VFIOUserProxy *proxy, QDict *qdict,
+ struct cap_entry caps[], Error **errp)
+{
+ QObject *qobj;
+ struct cap_entry *p;
+
+ for (p = caps; p->name != NULL; p++) {
+ qobj = qdict_get(qdict, p->name);
+ if (qobj != NULL) {
+ if (!p->check(proxy, qobj, errp)) {
+ return false;
+ }
+ qdict_del(qdict, p->name);
+ }
+ }
+
+ /* warning, for now */
+ if (qdict_size(qdict) != 0) {
+ warn_report("spurious capabilities");
+ }
+ return true;
+}
+
+static bool check_migr_pgsize(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
+{
+ QNum *qn = qobject_to(QNum, qobj);
+ uint64_t pgsize;
+
+ if (qn == NULL || !qnum_get_try_uint(qn, &pgsize)) {
+ error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZE);
+ return false;
+ }
+
+ /* must be larger than default */
+ if (pgsize & (VFIO_USER_DEF_PGSIZE - 1)) {
+ error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsize);
+ return false;
+ }
+
+ proxy->migr_pgsize = pgsize;
+ return true;
+}
+
+static bool check_bitmap(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
+{
+ QNum *qn = qobject_to(QNum, qobj);
+ uint64_t bitmap_size;
+
+ if (qn == NULL || !qnum_get_try_uint(qn, &bitmap_size)) {
+ error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_BITMAP);
+ return false;
+ }
+
+ /* can only lower it */
+ if (bitmap_size > VFIO_USER_DEF_MAX_BITMAP) {
+ error_setg(errp, "%s too large", VFIO_USER_CAP_MAX_BITMAP);
+ return false;
+ }
+
+ proxy->max_bitmap = bitmap_size;
+ return true;
+}
+
+static struct cap_entry caps_migr[] = {
+ { VFIO_USER_CAP_PGSIZE, check_migr_pgsize },
+ { VFIO_USER_CAP_MAX_BITMAP, check_bitmap },
+ { NULL }
+};
+
+static bool check_max_fds(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
+{
+ QNum *qn = qobject_to(QNum, qobj);
+ uint64_t max_send_fds;
+
+ if (qn == NULL || !qnum_get_try_uint(qn, &max_send_fds) ||
+ max_send_fds > VFIO_USER_MAX_MAX_FDS) {
+ error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS);
+ return false;
+ }
+ proxy->max_send_fds = max_send_fds;
+ return true;
+}
+
+static bool check_max_xfer(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
+{
+ QNum *qn = qobject_to(QNum, qobj);
+ uint64_t max_xfer_size;
+
+ if (qn == NULL || !qnum_get_try_uint(qn, &max_xfer_size) ||
+ max_xfer_size > VFIO_USER_MAX_MAX_XFER) {
+ error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_XFER);
+ return false;
+ }
+ proxy->max_xfer_size = max_xfer_size;
+ return true;
+}
+
+static bool check_pgsizes(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
+{
+ QNum *qn = qobject_to(QNum, qobj);
+ uint64_t pgsizes;
+
+ if (qn == NULL || !qnum_get_try_uint(qn, &pgsizes)) {
+ error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZES);
+ return false;
+ }
+
+ /* must be larger than default */
+ if (pgsizes & (VFIO_USER_DEF_PGSIZE - 1)) {
+ error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsizes);
+ return false;
+ }
+
+ proxy->dma_pgsizes = pgsizes;
+ return true;
+}
+
+static bool check_max_dma(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
+{
+ QNum *qn = qobject_to(QNum, qobj);
+ uint64_t max_dma;
+
+ if (qn == NULL || !qnum_get_try_uint(qn, &max_dma)) {
+ error_setg(errp, "malformed %s", VFIO_USER_CAP_MAP_MAX);
+ return false;
+ }
+
+ /* can only lower it */
+ if (max_dma > VFIO_USER_DEF_MAP_MAX) {
+ error_setg(errp, "%s too large", VFIO_USER_CAP_MAP_MAX);
+ return false;
+ }
+
+ proxy->max_dma = max_dma;
+ return true;
+}
+
+static bool check_migr(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
+{
+ QDict *qdict = qobject_to(QDict, qobj);
+
+ if (qdict == NULL) {
+ error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS);
+ return true;
+ }
+ return caps_parse(proxy, qdict, caps_migr, errp);
+}
+
+static bool check_multi(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
+{
+ QBool *qb = qobject_to(QBool, qobj);
+
+ if (qb == NULL) {
+ error_setg(errp, "malformed %s", VFIO_USER_CAP_MULTI);
+ return false;
+ }
+ if (qbool_get_bool(qb)) {
+ proxy->flags |= VFIO_PROXY_USE_MULTI;
+ }
+ return true;
+}
+
+static struct cap_entry caps_cap[] = {
+ { VFIO_USER_CAP_MAX_FDS, check_max_fds },
+ { VFIO_USER_CAP_MAX_XFER, check_max_xfer },
+ { VFIO_USER_CAP_PGSIZES, check_pgsizes },
+ { VFIO_USER_CAP_MAP_MAX, check_max_dma },
+ { VFIO_USER_CAP_MIGR, check_migr },
+ { VFIO_USER_CAP_MULTI, check_multi },
+ { NULL }
+};
+
+static bool check_cap(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
+{
+ QDict *qdict = qobject_to(QDict, qobj);
+
+ if (qdict == NULL) {
+ error_setg(errp, "malformed %s", VFIO_USER_CAP);
+ return false;
+ }
+ return caps_parse(proxy, qdict, caps_cap, errp);
+}
+
+static struct cap_entry ver_0_0[] = {
+ { VFIO_USER_CAP, check_cap },
+ { NULL }
+};
+
+static bool caps_check(VFIOUserProxy *proxy, int minor, const char *caps,
+ Error **errp)
+{
+ QObject *qobj;
+ QDict *qdict;
+ bool ret;
+
+ qobj = qobject_from_json(caps, NULL);
+ if (qobj == NULL) {
+ error_setg(errp, "malformed capabilities %s", caps);
+ return false;
+ }
+ qdict = qobject_to(QDict, qobj);
+ if (qdict == NULL) {
+ error_setg(errp, "capabilities %s not an object", caps);
+ qobject_unref(qobj);
+ return false;
+ }
+ ret = caps_parse(proxy, qdict, ver_0_0, errp);
+
+ qobject_unref(qobj);
+ return ret;
+}
+
+static GString *caps_json(void)
+{
+ QDict *dict = qdict_new();
+ QDict *capdict = qdict_new();
+ QDict *migdict = qdict_new();
+ GString *str;
+
+ qdict_put_int(migdict, VFIO_USER_CAP_PGSIZE, VFIO_USER_DEF_PGSIZE);
+ qdict_put_int(migdict, VFIO_USER_CAP_MAX_BITMAP, VFIO_USER_DEF_MAX_BITMAP);
+ qdict_put_obj(capdict, VFIO_USER_CAP_MIGR, QOBJECT(migdict));
+
+ qdict_put_int(capdict, VFIO_USER_CAP_MAX_FDS, VFIO_USER_MAX_MAX_FDS);
+ qdict_put_int(capdict, VFIO_USER_CAP_MAX_XFER, VFIO_USER_DEF_MAX_XFER);
+ qdict_put_int(capdict, VFIO_USER_CAP_PGSIZES, VFIO_USER_DEF_PGSIZE);
+ qdict_put_int(capdict, VFIO_USER_CAP_MAP_MAX, VFIO_USER_DEF_MAP_MAX);
+ qdict_put_bool(capdict, VFIO_USER_CAP_MULTI, true);
+
+ qdict_put_obj(dict, VFIO_USER_CAP, QOBJECT(capdict));
+
+ str = qobject_to_json(QOBJECT(dict));
+ qobject_unref(dict);
+ return str;
+}
+
+bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp)
+{
+ g_autofree VFIOUserVersion *msgp = NULL;
+ GString *caps;
+ char *reply;
+ int size, caplen;
+
+ caps = caps_json();
+ caplen = caps->len + 1;
+ size = sizeof(*msgp) + caplen;
+ msgp = g_malloc0(size);
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_VERSION, size, 0);
+ msgp->major = VFIO_USER_MAJOR_VER;
+ msgp->minor = VFIO_USER_MINOR_VER;
+ memcpy(&msgp->capabilities, caps->str, caplen);
+ g_string_free(caps, true);
+ trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities);
+
+ if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, errp)) {
+ return false;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ error_setg_errno(errp, msgp->hdr.error_reply, "version reply");
+ return false;
+ }
+
+ if (msgp->major != VFIO_USER_MAJOR_VER ||
+ msgp->minor > VFIO_USER_MINOR_VER) {
+ error_setg(errp, "incompatible server version");
+ return false;
+ }
+
+ reply = msgp->capabilities;
+ if (reply[msgp->hdr.size - sizeof(*msgp) - 1] != '\0') {
+ error_setg(errp, "corrupt version reply");
+ return false;
+ }
+
+ if (!caps_check(proxy, msgp->minor, reply, errp)) {
+ return false;
+ }
+
+ trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities);
+ return true;
+}
+
+void vfio_user_flush_multi(VFIOUserProxy *proxy)
+{
+ VFIOUserMsg *msg;
+ VFIOUserWRMulti *wm = proxy->wr_multi;
+ Error *local_err = NULL;
+
+ proxy->wr_multi = NULL;
+
+ /* adjust size for actual # of writes */
+ wm->hdr.size -= (VFIO_USER_MULTI_MAX - wm->wr_cnt) * sizeof(VFIOUserWROne);
+
+ msg = vfio_user_getmsg(proxy, &wm->hdr, NULL);
+ msg->id = wm->hdr.id;
+ msg->rsize = 0;
+ msg->type = VFIO_MSG_ASYNC;
+ trace_vfio_user_wrmulti("flush", wm->wr_cnt);
+
+ if (!vfio_user_send_queued(proxy, msg, &local_err)) {
+ error_report_err(local_err);
+ vfio_user_recycle(proxy, msg);
+ }
+}
+
+void vfio_user_create_multi(VFIOUserProxy *proxy)
+{
+ VFIOUserWRMulti *wm;
+
+ wm = g_malloc0(sizeof(*wm));
+ vfio_user_request_msg(&wm->hdr, VFIO_USER_REGION_WRITE_MULTI,
+ sizeof(*wm), VFIO_USER_NO_REPLY);
+ proxy->wr_multi = wm;
+}
+
+void vfio_user_add_multi(VFIOUserProxy *proxy, uint8_t index,
+ off_t offset, uint32_t count, void *data)
+{
+ VFIOUserWRMulti *wm = proxy->wr_multi;
+ VFIOUserWROne *w1 = &wm->wrs[wm->wr_cnt];
+
+ w1->offset = offset;
+ w1->region = index;
+ w1->count = count;
+ memcpy(&w1->data, data, count);
+
+ wm->wr_cnt++;
+ trace_vfio_user_wrmulti("add", wm->wr_cnt);
+ if (wm->wr_cnt == VFIO_USER_MULTI_MAX ||
+ proxy->num_outgoing < VFIO_USER_OUT_LOW) {
+ vfio_user_flush_multi(proxy);
+ }
+}
diff --git a/hw/vfio-user/proxy.h b/hw/vfio-user/proxy.h
new file mode 100644
index 0000000..61e64a0
--- /dev/null
+++ b/hw/vfio-user/proxy.h
@@ -0,0 +1,135 @@
+#ifndef VFIO_USER_PROXY_H
+#define VFIO_USER_PROXY_H
+
+/*
+ * vfio protocol over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "io/channel.h"
+#include "io/channel-socket.h"
+
+#include "qemu/queue.h"
+#include "qemu/sockets.h"
+#include "qemu/thread.h"
+#include "hw/vfio/vfio-device.h"
+#include "hw/vfio-user/protocol.h"
+
+typedef struct {
+ int send_fds;
+ int recv_fds;
+ int *fds;
+} VFIOUserFDs;
+
+enum msg_type {
+ VFIO_MSG_NONE,
+ VFIO_MSG_ASYNC,
+ VFIO_MSG_WAIT,
+ VFIO_MSG_NOWAIT,
+ VFIO_MSG_REQ,
+};
+
+typedef struct VFIOUserMsg {
+ QTAILQ_ENTRY(VFIOUserMsg) next;
+ VFIOUserHdr *hdr;
+ VFIOUserFDs *fds;
+ uint32_t rsize;
+ uint32_t id;
+ QemuCond cv;
+ bool complete;
+ bool pending;
+ enum msg_type type;
+} VFIOUserMsg;
+
+
+enum proxy_state {
+ VFIO_PROXY_CONNECTED = 1,
+ VFIO_PROXY_ERROR = 2,
+ VFIO_PROXY_CLOSING = 3,
+ VFIO_PROXY_CLOSED = 4,
+};
+
+typedef QTAILQ_HEAD(VFIOUserMsgQ, VFIOUserMsg) VFIOUserMsgQ;
+
+typedef struct VFIOUserProxy {
+ QLIST_ENTRY(VFIOUserProxy) next;
+ char *sockname;
+ struct QIOChannel *ioc;
+ void (*request)(void *opaque, VFIOUserMsg *msg);
+ void *req_arg;
+ uint64_t max_xfer_size;
+ uint64_t max_send_fds;
+ uint64_t max_dma;
+ uint64_t dma_pgsizes;
+ uint64_t max_bitmap;
+ uint64_t migr_pgsize;
+ int flags;
+ uint32_t wait_time;
+ QemuCond close_cv;
+ AioContext *ctx;
+ QEMUBH *req_bh;
+ bool async_ops;
+
+ /*
+ * above only changed when BQL is held
+ * below are protected by per-proxy lock
+ */
+ QemuMutex lock;
+ VFIOUserMsgQ free;
+ VFIOUserMsgQ pending;
+ VFIOUserMsgQ incoming;
+ VFIOUserMsgQ outgoing;
+ VFIOUserMsg *last_nowait;
+ VFIOUserMsg *part_recv;
+ size_t recv_left;
+ VFIOUserWRMulti *wr_multi;
+ int num_outgoing;
+ enum proxy_state state;
+} VFIOUserProxy;
+
+/* VFIOProxy flags */
+#define VFIO_PROXY_CLIENT 0x1
+#define VFIO_PROXY_FORCE_QUEUED 0x4
+#define VFIO_PROXY_NO_POST 0x8
+#define VFIO_PROXY_USE_MULTI 0x16
+
+/* coalescing high and low water marks for VFIOProxy num_outgoing */
+#define VFIO_USER_OUT_HIGH 1024
+#define VFIO_USER_OUT_LOW 128
+
+typedef struct VFIODevice VFIODevice;
+
+VFIOUserProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp);
+void vfio_user_disconnect(VFIOUserProxy *proxy);
+void vfio_user_set_handler(VFIODevice *vbasedev,
+ void (*handler)(void *opaque, VFIOUserMsg *msg),
+ void *reqarg);
+bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp);
+
+VFIOUserFDs *vfio_user_getfds(int numfds);
+void vfio_user_putfds(VFIOUserMsg *msg);
+
+void vfio_user_disable_posted_writes(VFIOUserProxy *proxy);
+
+void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd,
+ uint32_t size, uint32_t flags);
+void vfio_user_wait_reqs(VFIOUserProxy *proxy);
+bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds, int rsize, Error **errp);
+bool vfio_user_send_nowait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds, int rsize, Error **errp);
+bool vfio_user_send_async(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds, Error **errp);
+
+void vfio_user_send_reply(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int size);
+void vfio_user_send_error(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int error);
+
+void vfio_user_flush_multi(VFIOUserProxy *proxy);
+void vfio_user_create_multi(VFIOUserProxy *proxy);
+void vfio_user_add_multi(VFIOUserProxy *proxy, uint8_t index,
+ off_t offset, uint32_t count, void *data);
+
+#endif /* VFIO_USER_PROXY_H */
diff --git a/hw/vfio-user/trace-events b/hw/vfio-user/trace-events
new file mode 100644
index 0000000..abb67f4
--- /dev/null
+++ b/hw/vfio-user/trace-events
@@ -0,0 +1,20 @@
+# See docs/devel/tracing.rst for syntax documentation.
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+# common.c
+vfio_user_recv_hdr(const char *name, uint16_t id, uint16_t cmd, uint32_t size, uint32_t flags) " (%s) id 0x%x cmd 0x%x size 0x%x flags 0x%x"
+vfio_user_recv_read(uint16_t id, int read) " id 0x%x read 0x%x"
+vfio_user_recv_request(uint16_t cmd) " command 0x%x"
+vfio_user_send_write(uint16_t id, int wrote) " id 0x%x wrote 0x%x"
+vfio_user_version(uint16_t major, uint16_t minor, const char *caps) " major %d minor %d caps: %s"
+vfio_user_get_info(uint32_t nregions, uint32_t nirqs) " #regions %d #irqs %d"
+vfio_user_get_region_info(uint32_t index, uint32_t flags, uint64_t size) " index %d flags 0x%x size 0x%"PRIx64
+vfio_user_region_rw(uint32_t region, uint64_t off, uint32_t count) " region %d offset 0x%"PRIx64" count %d"
+vfio_user_get_irq_info(uint32_t index, uint32_t flags, uint32_t count) " index %d flags 0x%x count %d"
+vfio_user_set_irqs(uint32_t index, uint32_t start, uint32_t count, uint32_t flags) " index %d start %d count %d flags 0x%x"
+vfio_user_wrmulti(const char *s, uint64_t wr_cnt) " %s count 0x%"PRIx64
+
+# container.c
+vfio_user_dma_map(uint64_t iova, uint64_t size, uint64_t off, uint32_t flags, bool async_ops) " iova 0x%"PRIx64" size 0x%"PRIx64" off 0x%"PRIx64" flags 0x%x async_ops %d"
+vfio_user_dma_unmap(uint64_t iova, uint64_t size, uint32_t flags, bool async_ops) " iova 0x%"PRIx64" size 0x%"PRIx64" flags 0x%x async_ops %d"
diff --git a/hw/vfio-user/trace.h b/hw/vfio-user/trace.h
new file mode 100644
index 0000000..9cf02d9
--- /dev/null
+++ b/hw/vfio-user/trace.h
@@ -0,0 +1,4 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "trace/trace-hw_vfio_user.h"
diff --git a/hw/vfio/Kconfig b/hw/vfio/Kconfig
index 7cdba05..27de24e 100644
--- a/hw/vfio/Kconfig
+++ b/hw/vfio/Kconfig
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
config VFIO
bool
depends on LINUX
@@ -15,22 +17,6 @@ config VFIO_CCW
select VFIO
depends on LINUX && S390_CCW_VIRTIO
-config VFIO_PLATFORM
- bool
- default y
- select VFIO
- depends on LINUX && PLATFORM_BUS
-
-config VFIO_XGMAC
- bool
- default y
- depends on VFIO_PLATFORM
-
-config VFIO_AMD_XGBE
- bool
- default y
- depends on VFIO_PLATFORM
-
config VFIO_AP
bool
default y
diff --git a/hw/vfio/amd-xgbe.c b/hw/vfio/amd-xgbe.c
deleted file mode 100644
index 58f590e..0000000
--- a/hw/vfio/amd-xgbe.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * AMD XGBE VFIO device
- *
- * Copyright Linaro Limited, 2015
- *
- * Authors:
- * Eric Auger <eric.auger@linaro.org>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "hw/vfio/vfio-amd-xgbe.h"
-#include "migration/vmstate.h"
-#include "qemu/module.h"
-#include "qemu/error-report.h"
-
-static void amd_xgbe_realize(DeviceState *dev, Error **errp)
-{
- VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
- VFIOAmdXgbeDeviceClass *k = VFIO_AMD_XGBE_DEVICE_GET_CLASS(dev);
-
- warn_report("-device vfio-amd-xgbe is deprecated");
- vdev->compat = g_strdup("amd,xgbe-seattle-v1a");
- vdev->num_compat = 1;
-
- k->parent_realize(dev, errp);
-}
-
-static const VMStateDescription vfio_platform_amd_xgbe_vmstate = {
- .name = "vfio-amd-xgbe",
- .unmigratable = 1,
-};
-
-static void vfio_amd_xgbe_class_init(ObjectClass *klass, const void *data)
-{
- DeviceClass *dc = DEVICE_CLASS(klass);
- VFIOAmdXgbeDeviceClass *vcxc =
- VFIO_AMD_XGBE_DEVICE_CLASS(klass);
- device_class_set_parent_realize(dc, amd_xgbe_realize,
- &vcxc->parent_realize);
- dc->desc = "VFIO AMD XGBE";
- dc->vmsd = &vfio_platform_amd_xgbe_vmstate;
-}
-
-static const TypeInfo vfio_amd_xgbe_dev_info = {
- .name = TYPE_VFIO_AMD_XGBE,
- .parent = TYPE_VFIO_PLATFORM,
- .instance_size = sizeof(VFIOAmdXgbeDevice),
- .class_init = vfio_amd_xgbe_class_init,
- .class_size = sizeof(VFIOAmdXgbeDeviceClass),
-};
-
-static void register_amd_xgbe_dev_type(void)
-{
- type_register_static(&vfio_amd_xgbe_dev_info);
-}
-
-type_init(register_amd_xgbe_dev_type)
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index 785c0a0..7719f24 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -10,6 +10,7 @@
* directory.
*/
+#include <stdbool.h>
#include "qemu/osdep.h"
#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
#include <linux/vfio.h>
@@ -18,8 +19,10 @@
#include "hw/vfio/vfio-device.h"
#include "system/iommufd.h"
#include "hw/s390x/ap-device.h"
+#include "hw/s390x/css.h"
#include "qemu/error-report.h"
#include "qemu/event_notifier.h"
+#include "qemu/lockable.h"
#include "qemu/main-loop.h"
#include "qemu/module.h"
#include "qemu/option.h"
@@ -37,8 +40,23 @@ struct VFIOAPDevice {
APDevice apdev;
VFIODevice vdev;
EventNotifier req_notifier;
+ EventNotifier cfg_notifier;
};
+typedef struct APConfigChgEvent {
+ QTAILQ_ENTRY(APConfigChgEvent) next;
+} APConfigChgEvent;
+
+static QTAILQ_HEAD(, APConfigChgEvent) cfg_chg_events =
+ QTAILQ_HEAD_INITIALIZER(cfg_chg_events);
+
+static QemuMutex cfg_chg_events_lock;
+
+static void __attribute__((constructor)) vfio_ap_global_init(void)
+{
+ qemu_mutex_init(&cfg_chg_events_lock);
+}
+
OBJECT_DECLARE_SIMPLE_TYPE(VFIOAPDevice, VFIO_AP_DEVICE)
static void vfio_ap_compute_needs_reset(VFIODevice *vdev)
@@ -70,6 +88,57 @@ static void vfio_ap_req_notifier_handler(void *opaque)
}
}
+static void vfio_ap_cfg_chg_notifier_handler(void *opaque)
+{
+ APConfigChgEvent *cfg_chg_event;
+ VFIOAPDevice *vapdev = opaque;
+
+ if (!event_notifier_test_and_clear(&vapdev->cfg_notifier)) {
+ return;
+ }
+
+ cfg_chg_event = g_new0(APConfigChgEvent, 1);
+
+ WITH_QEMU_LOCK_GUARD(&cfg_chg_events_lock) {
+ QTAILQ_INSERT_TAIL(&cfg_chg_events, cfg_chg_event, next);
+ }
+
+ css_generate_css_crws(0);
+
+}
+
+int ap_chsc_sei_nt0_get_event(void *res)
+{
+ ChscSeiNt0Res *nt0_res = (ChscSeiNt0Res *)res;
+ APConfigChgEvent *cfg_chg_event;
+
+ WITH_QEMU_LOCK_GUARD(&cfg_chg_events_lock) {
+ if (QTAILQ_EMPTY(&cfg_chg_events)) {
+ return EVENT_INFORMATION_NOT_STORED;
+ }
+
+ cfg_chg_event = QTAILQ_FIRST(&cfg_chg_events);
+ QTAILQ_REMOVE(&cfg_chg_events, cfg_chg_event, next);
+ }
+
+ memset(nt0_res, 0, sizeof(*nt0_res));
+ g_free(cfg_chg_event);
+ nt0_res->flags |= PENDING_EVENT_INFO_BITMASK;
+ nt0_res->length = sizeof(ChscSeiNt0Res);
+ nt0_res->code = NT0_RES_RESPONSE_CODE;
+ nt0_res->nt = NT0_RES_NT_DEFAULT;
+ nt0_res->rs = NT0_RES_RS_AP_CHANGE;
+ nt0_res->cc = NT0_RES_CC_AP_CHANGE;
+
+ return EVENT_INFORMATION_STORED;
+}
+
+bool ap_chsc_sei_nt0_have_event(void)
+{
+ QEMU_LOCK_GUARD(&cfg_chg_events_lock);
+ return !QTAILQ_EMPTY(&cfg_chg_events);
+}
+
static bool vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev,
unsigned int irq, Error **errp)
{
@@ -85,6 +154,10 @@ static bool vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev,
notifier = &vapdev->req_notifier;
fd_read = vfio_ap_req_notifier_handler;
break;
+ case VFIO_AP_CFG_CHG_IRQ_INDEX:
+ notifier = &vapdev->cfg_notifier;
+ fd_read = vfio_ap_cfg_chg_notifier_handler;
+ break;
default:
error_setg(errp, "vfio: Unsupported device irq(%d)", irq);
return false;
@@ -137,6 +210,9 @@ static void vfio_ap_unregister_irq_notifier(VFIOAPDevice *vapdev,
case VFIO_AP_REQ_IRQ_INDEX:
notifier = &vapdev->req_notifier;
break;
+ case VFIO_AP_CFG_CHG_IRQ_INDEX:
+ notifier = &vapdev->cfg_notifier;
+ break;
default:
error_report("vfio: Unsupported device irq(%d)", irq);
return;
@@ -176,11 +252,20 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp)
warn_report_err(err);
}
+ if (!vfio_ap_register_irq_notifier(vapdev, VFIO_AP_CFG_CHG_IRQ_INDEX, &err))
+ {
+ /*
+ * Report this error, but do not make it a failing condition.
+ * Lack of this IRQ in the host does not prevent normal operation.
+ */
+ warn_report_err(err);
+ }
+
return;
error:
error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
- g_free(vbasedev->name);
+ vfio_device_free_name(vbasedev);
}
static void vfio_ap_unrealize(DeviceState *dev)
@@ -188,8 +273,9 @@ static void vfio_ap_unrealize(DeviceState *dev)
VFIOAPDevice *vapdev = VFIO_AP_DEVICE(dev);
vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX);
+ vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_CFG_CHG_IRQ_INDEX);
vfio_device_detach(&vapdev->vdev);
- g_free(vapdev->vdev.name);
+ vfio_device_free_name(&vapdev->vdev);
}
static const Property vfio_ap_properties[] = {
diff --git a/hw/vfio/calxeda-xgmac.c b/hw/vfio/calxeda-xgmac.c
deleted file mode 100644
index 03f2ff5..0000000
--- a/hw/vfio/calxeda-xgmac.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * calxeda xgmac VFIO device
- *
- * Copyright Linaro Limited, 2014
- *
- * Authors:
- * Eric Auger <eric.auger@linaro.org>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "hw/vfio/vfio-calxeda-xgmac.h"
-#include "migration/vmstate.h"
-#include "qemu/module.h"
-#include "qemu/error-report.h"
-
-static void calxeda_xgmac_realize(DeviceState *dev, Error **errp)
-{
- VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
- VFIOCalxedaXgmacDeviceClass *k = VFIO_CALXEDA_XGMAC_DEVICE_GET_CLASS(dev);
-
- warn_report("-device vfio-calxeda-xgmac is deprecated");
- vdev->compat = g_strdup("calxeda,hb-xgmac");
- vdev->num_compat = 1;
-
- k->parent_realize(dev, errp);
-}
-
-static const VMStateDescription vfio_platform_calxeda_xgmac_vmstate = {
- .name = "vfio-calxeda-xgmac",
- .unmigratable = 1,
-};
-
-static void vfio_calxeda_xgmac_class_init(ObjectClass *klass, const void *data)
-{
- DeviceClass *dc = DEVICE_CLASS(klass);
- VFIOCalxedaXgmacDeviceClass *vcxc =
- VFIO_CALXEDA_XGMAC_DEVICE_CLASS(klass);
- device_class_set_parent_realize(dc, calxeda_xgmac_realize,
- &vcxc->parent_realize);
- dc->desc = "VFIO Calxeda XGMAC";
- dc->vmsd = &vfio_platform_calxeda_xgmac_vmstate;
-}
-
-static const TypeInfo vfio_calxeda_xgmac_dev_info = {
- .name = TYPE_VFIO_CALXEDA_XGMAC,
- .parent = TYPE_VFIO_PLATFORM,
- .instance_size = sizeof(VFIOCalxedaXgmacDevice),
- .class_init = vfio_calxeda_xgmac_class_init,
- .class_size = sizeof(VFIOCalxedaXgmacDeviceClass),
-};
-
-static void register_calxeda_xgmac_dev_type(void)
-{
- type_register_static(&vfio_calxeda_xgmac_dev_info);
-}
-
-type_init(register_calxeda_xgmac_dev_type)
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index cea9d6e..9560b8d 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -619,7 +619,7 @@ out_io_notifier_err:
out_region_err:
vfio_device_detach(vbasedev);
out_attach_dev_err:
- g_free(vbasedev->name);
+ vfio_device_free_name(vbasedev);
out_unrealize:
if (cdc->unrealize) {
cdc->unrealize(cdev);
@@ -637,7 +637,7 @@ static void vfio_ccw_unrealize(DeviceState *dev)
vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX);
vfio_ccw_put_region(vcdev);
vfio_device_detach(&vcdev->vdev);
- g_free(vcdev->vdev.name);
+ vfio_device_free_name(&vcdev->vdev);
if (cdc->unrealize) {
cdc->unrealize(cdev);
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
deleted file mode 100644
index 1c6ca94..0000000
--- a/hw/vfio/container-base.c
+++ /dev/null
@@ -1,338 +0,0 @@
-/*
- * VFIO BASE CONTAINER
- *
- * Copyright (C) 2023 Intel Corporation.
- * Copyright Red Hat, Inc. 2023
- *
- * Authors: Yi Liu <yi.l.liu@intel.com>
- * Eric Auger <eric.auger@redhat.com>
- *
- * SPDX-License-Identifier: GPL-2.0-or-later
- */
-
-#include <sys/ioctl.h>
-#include <linux/vfio.h>
-
-#include "qemu/osdep.h"
-#include "system/tcg.h"
-#include "system/ram_addr.h"
-#include "qapi/error.h"
-#include "qemu/error-report.h"
-#include "hw/vfio/vfio-container-base.h"
-#include "hw/vfio/vfio-device.h" /* vfio_device_reset_handler */
-#include "system/reset.h"
-#include "vfio-helpers.h"
-
-#include "trace.h"
-
-static QLIST_HEAD(, VFIOAddressSpace) vfio_address_spaces =
- QLIST_HEAD_INITIALIZER(vfio_address_spaces);
-
-VFIOAddressSpace *vfio_address_space_get(AddressSpace *as)
-{
- VFIOAddressSpace *space;
-
- QLIST_FOREACH(space, &vfio_address_spaces, list) {
- if (space->as == as) {
- return space;
- }
- }
-
- /* No suitable VFIOAddressSpace, create a new one */
- space = g_malloc0(sizeof(*space));
- space->as = as;
- QLIST_INIT(&space->containers);
-
- if (QLIST_EMPTY(&vfio_address_spaces)) {
- qemu_register_reset(vfio_device_reset_handler, NULL);
- }
-
- QLIST_INSERT_HEAD(&vfio_address_spaces, space, list);
-
- return space;
-}
-
-void vfio_address_space_put(VFIOAddressSpace *space)
-{
- if (!QLIST_EMPTY(&space->containers)) {
- return;
- }
-
- QLIST_REMOVE(space, list);
- g_free(space);
-
- if (QLIST_EMPTY(&vfio_address_spaces)) {
- qemu_unregister_reset(vfio_device_reset_handler, NULL);
- }
-}
-
-void vfio_address_space_insert(VFIOAddressSpace *space,
- VFIOContainerBase *bcontainer)
-{
- QLIST_INSERT_HEAD(&space->containers, bcontainer, next);
- bcontainer->space = space;
-}
-
-int vfio_container_dma_map(VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly)
-{
- VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
-
- g_assert(vioc->dma_map);
- return vioc->dma_map(bcontainer, iova, size, vaddr, readonly);
-}
-
-int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb, bool unmap_all)
-{
- VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
-
- g_assert(vioc->dma_unmap);
- return vioc->dma_unmap(bcontainer, iova, size, iotlb, unmap_all);
-}
-
-bool vfio_container_add_section_window(VFIOContainerBase *bcontainer,
- MemoryRegionSection *section,
- Error **errp)
-{
- VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
-
- if (!vioc->add_window) {
- return true;
- }
-
- return vioc->add_window(bcontainer, section, errp);
-}
-
-void vfio_container_del_section_window(VFIOContainerBase *bcontainer,
- MemoryRegionSection *section)
-{
- VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
-
- if (!vioc->del_window) {
- return;
- }
-
- return vioc->del_window(bcontainer, section);
-}
-
-int vfio_container_set_dirty_page_tracking(VFIOContainerBase *bcontainer,
- bool start, Error **errp)
-{
- VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
- int ret;
-
- if (!bcontainer->dirty_pages_supported) {
- return 0;
- }
-
- g_assert(vioc->set_dirty_page_tracking);
- if (bcontainer->dirty_pages_started == start) {
- return 0;
- }
-
- ret = vioc->set_dirty_page_tracking(bcontainer, start, errp);
- if (!ret) {
- bcontainer->dirty_pages_started = start;
- }
-
- return ret;
-}
-
-static bool vfio_container_devices_dirty_tracking_is_started(
- const VFIOContainerBase *bcontainer)
-{
- VFIODevice *vbasedev;
-
- QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
- if (!vbasedev->dirty_tracking) {
- return false;
- }
- }
-
- return true;
-}
-
-bool vfio_container_dirty_tracking_is_started(
- const VFIOContainerBase *bcontainer)
-{
- return vfio_container_devices_dirty_tracking_is_started(bcontainer) ||
- bcontainer->dirty_pages_started;
-}
-
-bool vfio_container_devices_dirty_tracking_is_supported(
- const VFIOContainerBase *bcontainer)
-{
- VFIODevice *vbasedev;
-
- QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
- if (vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) {
- return false;
- }
- if (!vbasedev->dirty_pages_supported) {
- return false;
- }
- }
-
- return true;
-}
-
-static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
- hwaddr size, void *bitmap)
-{
- uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
- sizeof(struct vfio_device_feature_dma_logging_report),
- sizeof(uint64_t))] = {};
- struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
- struct vfio_device_feature_dma_logging_report *report =
- (struct vfio_device_feature_dma_logging_report *)feature->data;
-
- report->iova = iova;
- report->length = size;
- report->page_size = qemu_real_host_page_size();
- report->bitmap = (uintptr_t)bitmap;
-
- feature->argsz = sizeof(buf);
- feature->flags = VFIO_DEVICE_FEATURE_GET |
- VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;
-
- return vbasedev->io_ops->device_feature(vbasedev, feature);
-}
-
-static int vfio_container_iommu_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
- VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp)
-{
- VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
-
- g_assert(vioc->query_dirty_bitmap);
- return vioc->query_dirty_bitmap(bcontainer, vbmap, iova, size,
- errp);
-}
-
-static int vfio_container_devices_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
- VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp)
-{
- VFIODevice *vbasedev;
- int ret;
-
- QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
- ret = vfio_device_dma_logging_report(vbasedev, iova, size,
- vbmap->bitmap);
- if (ret) {
- error_setg_errno(errp, -ret,
- "%s: Failed to get DMA logging report, iova: "
- "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx,
- vbasedev->name, iova, size);
-
- return ret;
- }
- }
-
- return 0;
-}
-
-int vfio_container_query_dirty_bitmap(const VFIOContainerBase *bcontainer, uint64_t iova,
- uint64_t size, ram_addr_t ram_addr, Error **errp)
-{
- bool all_device_dirty_tracking =
- vfio_container_devices_dirty_tracking_is_supported(bcontainer);
- uint64_t dirty_pages;
- VFIOBitmap vbmap;
- int ret;
-
- if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) {
- cpu_physical_memory_set_dirty_range(ram_addr, size,
- tcg_enabled() ? DIRTY_CLIENTS_ALL :
- DIRTY_CLIENTS_NOCODE);
- return 0;
- }
-
- ret = vfio_bitmap_alloc(&vbmap, size);
- if (ret) {
- error_setg_errno(errp, -ret,
- "Failed to allocate dirty tracking bitmap");
- return ret;
- }
-
- if (all_device_dirty_tracking) {
- ret = vfio_container_devices_query_dirty_bitmap(bcontainer, &vbmap, iova, size,
- errp);
- } else {
- ret = vfio_container_iommu_query_dirty_bitmap(bcontainer, &vbmap, iova, size,
- errp);
- }
-
- if (ret) {
- goto out;
- }
-
- dirty_pages = cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap, ram_addr,
- vbmap.pages);
-
- trace_vfio_container_query_dirty_bitmap(iova, size, vbmap.size, ram_addr,
- dirty_pages);
-out:
- g_free(vbmap.bitmap);
-
- return ret;
-}
-
-static gpointer copy_iova_range(gconstpointer src, gpointer data)
-{
- Range *source = (Range *)src;
- Range *dest = g_new(Range, 1);
-
- range_set_bounds(dest, range_lob(source), range_upb(source));
- return dest;
-}
-
-GList *vfio_container_get_iova_ranges(const VFIOContainerBase *bcontainer)
-{
- assert(bcontainer);
- return g_list_copy_deep(bcontainer->iova_ranges, copy_iova_range, NULL);
-}
-
-static void vfio_container_instance_finalize(Object *obj)
-{
- VFIOContainerBase *bcontainer = VFIO_IOMMU(obj);
- VFIOGuestIOMMU *giommu, *tmp;
-
- QLIST_SAFE_REMOVE(bcontainer, next);
-
- QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) {
- memory_region_unregister_iommu_notifier(
- MEMORY_REGION(giommu->iommu_mr), &giommu->n);
- QLIST_REMOVE(giommu, giommu_next);
- g_free(giommu);
- }
-
- g_list_free_full(bcontainer->iova_ranges, g_free);
-}
-
-static void vfio_container_instance_init(Object *obj)
-{
- VFIOContainerBase *bcontainer = VFIO_IOMMU(obj);
-
- bcontainer->error = NULL;
- bcontainer->dirty_pages_supported = false;
- bcontainer->dma_max_mappings = 0;
- bcontainer->iova_ranges = NULL;
- QLIST_INIT(&bcontainer->giommu_list);
- QLIST_INIT(&bcontainer->vrdl_list);
-}
-
-static const TypeInfo types[] = {
- {
- .name = TYPE_VFIO_IOMMU,
- .parent = TYPE_OBJECT,
- .instance_init = vfio_container_instance_init,
- .instance_finalize = vfio_container_instance_finalize,
- .instance_size = sizeof(VFIOContainerBase),
- .class_size = sizeof(VFIOIOMMUClass),
- .abstract = true,
- },
-};
-
-DEFINE_TYPES(types)
diff --git a/hw/vfio/container-legacy.c b/hw/vfio/container-legacy.c
new file mode 100644
index 0000000..a3615d7
--- /dev/null
+++ b/hw/vfio/container-legacy.c
@@ -0,0 +1,1260 @@
+/*
+ * generic functions used by VFIO devices
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ * Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Based on qemu-kvm device-assignment:
+ * Adapted for KVM by Qumranet.
+ * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
+ * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
+ * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
+ * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
+ * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
+ */
+
+#include "qemu/osdep.h"
+#include <sys/ioctl.h>
+#include <linux/vfio.h>
+
+#include "hw/vfio/vfio-device.h"
+#include "system/address-spaces.h"
+#include "system/memory.h"
+#include "system/physmem.h"
+#include "qemu/error-report.h"
+#include "qemu/range.h"
+#include "system/reset.h"
+#include "trace.h"
+#include "qapi/error.h"
+#include "migration/cpr.h"
+#include "migration/blocker.h"
+#include "pci.h"
+#include "hw/vfio/vfio-container-legacy.h"
+#include "vfio-helpers.h"
+#include "vfio-listener.h"
+
+#define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio"
+
+typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
+static VFIOGroupList vfio_group_list =
+ QLIST_HEAD_INITIALIZER(vfio_group_list);
+
+static int vfio_ram_block_discard_disable(VFIOLegacyContainer *container,
+ bool state)
+{
+ switch (container->iommu_type) {
+ case VFIO_TYPE1v2_IOMMU:
+ case VFIO_TYPE1_IOMMU:
+ /*
+ * We support coordinated discarding of RAM via the RamDiscardManager.
+ */
+ return ram_block_uncoordinated_discard_disable(state);
+ default:
+ /*
+ * VFIO_SPAPR_TCE_IOMMU most probably works just fine with
+ * RamDiscardManager, however, it is completely untested.
+ *
+ * VFIO_SPAPR_TCE_v2_IOMMU with "DMA memory preregistering" does
+ * completely the opposite of managing mapping/pinning dynamically as
+ * required by RamDiscardManager. We would have to special-case sections
+ * with a RamDiscardManager.
+ */
+ return ram_block_discard_disable(state);
+ }
+}
+
+static int vfio_dma_unmap_bitmap(const VFIOLegacyContainer *container,
+ hwaddr iova, uint64_t size,
+ IOMMUTLBEntry *iotlb)
+{
+ const VFIOContainer *bcontainer = VFIO_IOMMU(container);
+ struct vfio_iommu_type1_dma_unmap *unmap;
+ struct vfio_bitmap *bitmap;
+ VFIOBitmap vbmap;
+ int ret;
+
+ ret = vfio_bitmap_alloc(&vbmap, size);
+ if (ret) {
+ return ret;
+ }
+
+ unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap));
+
+ unmap->argsz = sizeof(*unmap) + sizeof(*bitmap);
+ unmap->iova = iova;
+ unmap->size = size;
+ unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP;
+ bitmap = (struct vfio_bitmap *)&unmap->data;
+
+ /*
+ * physical_memory_set_dirty_lebitmap() supports pages in bitmap of
+ * qemu_real_host_page_size to mark those dirty. Hence set bitmap_pgsize
+ * to qemu_real_host_page_size.
+ */
+ bitmap->pgsize = qemu_real_host_page_size();
+ bitmap->size = vbmap.size;
+ bitmap->data = (__u64 *)vbmap.bitmap;
+
+ if (vbmap.size > bcontainer->max_dirty_bitmap_size) {
+ error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap.size);
+ ret = -E2BIG;
+ goto unmap_exit;
+ }
+
+ ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
+ if (!ret) {
+ physical_memory_set_dirty_lebitmap(vbmap.bitmap,
+ iotlb->translated_addr, vbmap.pages);
+ } else {
+ error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m");
+ }
+
+unmap_exit:
+ g_free(unmap);
+ g_free(vbmap.bitmap);
+
+ return ret;
+}
+
+static int vfio_legacy_dma_unmap_one(const VFIOContainer *bcontainer,
+ hwaddr iova, uint64_t size,
+ IOMMUTLBEntry *iotlb)
+{
+ const VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+ struct vfio_iommu_type1_dma_unmap unmap = {
+ .argsz = sizeof(unmap),
+ .flags = 0,
+ .iova = iova,
+ .size = size,
+ };
+ bool need_dirty_sync = false;
+ int ret;
+ Error *local_err = NULL;
+
+ g_assert(!cpr_is_incoming());
+
+ if (iotlb && vfio_container_dirty_tracking_is_started(bcontainer)) {
+ if (!vfio_container_devices_dirty_tracking_is_supported(bcontainer) &&
+ bcontainer->dirty_pages_supported) {
+ return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
+ }
+
+ need_dirty_sync = true;
+ }
+
+ if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
+ return -errno;
+ }
+
+ if (need_dirty_sync) {
+ ret = vfio_container_query_dirty_bitmap(bcontainer, iova, size,
+ iotlb->translated_addr, &local_err);
+ if (ret) {
+ error_report_err(local_err);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
+ */
+static int vfio_legacy_dma_unmap(const VFIOContainer *bcontainer,
+ hwaddr iova, uint64_t size,
+ IOMMUTLBEntry *iotlb, bool unmap_all)
+{
+ int ret;
+
+ if (unmap_all) {
+ /* The unmap ioctl doesn't accept a full 64-bit span. */
+ Int128 llsize = int128_rshift(int128_2_64(), 1);
+
+ ret = vfio_legacy_dma_unmap_one(bcontainer, 0, int128_get64(llsize),
+ iotlb);
+
+ if (ret == 0) {
+ ret = vfio_legacy_dma_unmap_one(bcontainer, int128_get64(llsize),
+ int128_get64(llsize), iotlb);
+ }
+
+ } else {
+ ret = vfio_legacy_dma_unmap_one(bcontainer, iova, size, iotlb);
+ }
+
+ return ret;
+}
+
+static int vfio_legacy_dma_map(const VFIOContainer *bcontainer, hwaddr iova,
+ uint64_t size, void *vaddr, bool readonly,
+ MemoryRegion *mr)
+{
+ const VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+ struct vfio_iommu_type1_dma_map map = {
+ .argsz = sizeof(map),
+ .flags = VFIO_DMA_MAP_FLAG_READ,
+ .vaddr = (__u64)(uintptr_t)vaddr,
+ .iova = iova,
+ .size = size,
+ };
+
+ if (!readonly) {
+ map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
+ }
+
+ /*
+ * Try the mapping, if it fails with EBUSY, unmap the region and try
+ * again. This shouldn't be necessary, but we sometimes see it in
+ * the VGA ROM space.
+ */
+ if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
+ (errno == EBUSY &&
+ vfio_legacy_dma_unmap(bcontainer, iova, size, NULL, false) == 0 &&
+ ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
+ return 0;
+ }
+
+ return -errno;
+}
+
+static int
+vfio_legacy_set_dirty_page_tracking(const VFIOContainer *bcontainer,
+ bool start, Error **errp)
+{
+ const VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+ int ret;
+ struct vfio_iommu_type1_dirty_bitmap dirty = {
+ .argsz = sizeof(dirty),
+ };
+
+ if (start) {
+ dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
+ } else {
+ dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP;
+ }
+
+ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty);
+ if (ret) {
+ ret = -errno;
+ error_setg_errno(errp, errno, "Failed to set dirty tracking flag 0x%x",
+ dirty.flags);
+ }
+
+ return ret;
+}
+
+static int vfio_legacy_query_dirty_bitmap(const VFIOContainer *bcontainer,
+ VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp)
+{
+ const VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+ struct vfio_iommu_type1_dirty_bitmap *dbitmap;
+ struct vfio_iommu_type1_dirty_bitmap_get *range;
+ int ret;
+
+ dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
+
+ dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
+ dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP;
+ range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data;
+ range->iova = iova;
+ range->size = size;
+
+ /*
+ * physical_memory_set_dirty_lebitmap() supports pages in bitmap of
+ * qemu_real_host_page_size to mark those dirty. Hence set bitmap's pgsize
+ * to qemu_real_host_page_size.
+ */
+ range->bitmap.pgsize = qemu_real_host_page_size();
+ range->bitmap.size = vbmap->size;
+ range->bitmap.data = (__u64 *)vbmap->bitmap;
+
+ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
+ if (ret) {
+ ret = -errno;
+ error_setg_errno(errp, errno,
+ "Failed to get dirty bitmap for iova: 0x%"PRIx64
+ " size: 0x%"PRIx64, (uint64_t)range->iova,
+ (uint64_t)range->size);
+ }
+
+ g_free(dbitmap);
+
+ return ret;
+}
+
+static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info,
+ VFIOContainer *bcontainer)
+{
+ struct vfio_info_cap_header *hdr;
+ struct vfio_iommu_type1_info_cap_iova_range *cap;
+
+ hdr = vfio_get_iommu_type1_info_cap(info,
+ VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
+ if (!hdr) {
+ return false;
+ }
+
+ cap = (void *)hdr;
+
+ for (int i = 0; i < cap->nr_iovas; i++) {
+ Range *range = g_new(Range, 1);
+
+ range_set_bounds(range, cap->iova_ranges[i].start,
+ cap->iova_ranges[i].end);
+ bcontainer->iova_ranges =
+ range_list_insert(bcontainer->iova_ranges, range);
+ }
+
+ return true;
+}
+
+static void vfio_group_add_kvm_device(VFIOGroup *group)
+{
+ Error *err = NULL;
+
+ if (vfio_kvm_device_add_fd(group->fd, &err)) {
+ error_reportf_err(err, "group ID %d: ", group->groupid);
+ }
+}
+
+static void vfio_group_del_kvm_device(VFIOGroup *group)
+{
+ Error *err = NULL;
+
+ if (vfio_kvm_device_del_fd(group->fd, &err)) {
+ error_reportf_err(err, "group ID %d: ", group->groupid);
+ }
+}
+
+/*
+ * vfio_get_iommu_type - selects the richest iommu_type (v2 first)
+ */
+static int vfio_get_iommu_type(int container_fd,
+ Error **errp)
+{
+ int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
+ VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
+ if (ioctl(container_fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
+ return iommu_types[i];
+ }
+ }
+ error_setg(errp, "No available IOMMU models");
+ return -EINVAL;
+}
+
+/*
+ * vfio_get_iommu_ops - get a VFIOIOMMUClass associated with a type
+ */
+static const char *vfio_get_iommu_class_name(int iommu_type)
+{
+ switch (iommu_type) {
+ case VFIO_TYPE1v2_IOMMU:
+ case VFIO_TYPE1_IOMMU:
+ return TYPE_VFIO_IOMMU_LEGACY;
+ break;
+ case VFIO_SPAPR_TCE_v2_IOMMU:
+ case VFIO_SPAPR_TCE_IOMMU:
+ return TYPE_VFIO_IOMMU_SPAPR;
+ break;
+ default:
+ g_assert_not_reached();
+ };
+}
+
+static bool vfio_set_iommu(int container_fd, int group_fd,
+ int *iommu_type, Error **errp)
+{
+ if (ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container_fd)) {
+ error_setg_errno(errp, errno, "Failed to set group container");
+ return false;
+ }
+
+ while (ioctl(container_fd, VFIO_SET_IOMMU, *iommu_type)) {
+ if (*iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
+ /*
+ * On sPAPR, despite the IOMMU subdriver always advertises v1 and
+ * v2, the running platform may not support v2 and there is no
+ * way to guess it until an IOMMU group gets added to the container.
+ * So in case it fails with v2, try v1 as a fallback.
+ */
+ *iommu_type = VFIO_SPAPR_TCE_IOMMU;
+ continue;
+ }
+ error_setg_errno(errp, errno, "Failed to set iommu for container");
+ return false;
+ }
+
+ return true;
+}
+
+static VFIOLegacyContainer *vfio_create_container(int fd, VFIOGroup *group,
+ Error **errp)
+{
+ int iommu_type;
+ const char *vioc_name;
+ VFIOLegacyContainer *container;
+
+ iommu_type = vfio_get_iommu_type(fd, errp);
+ if (iommu_type < 0) {
+ return NULL;
+ }
+
+ /*
+ * During CPR, just set the container type and skip the ioctls, as the
+ * container and group are already configured in the kernel.
+ */
+ if (!cpr_is_incoming() &&
+ !vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
+ return NULL;
+ }
+
+ vioc_name = vfio_get_iommu_class_name(iommu_type);
+
+ container = VFIO_IOMMU_LEGACY(object_new(vioc_name));
+ container->fd = fd;
+ container->iommu_type = iommu_type;
+ return container;
+}
+
+static int vfio_get_iommu_info(VFIOLegacyContainer *container,
+ struct vfio_iommu_type1_info **info)
+{
+
+ size_t argsz = sizeof(struct vfio_iommu_type1_info);
+
+ *info = g_new0(struct vfio_iommu_type1_info, 1);
+again:
+ (*info)->argsz = argsz;
+
+ if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) {
+ g_free(*info);
+ *info = NULL;
+ return -errno;
+ }
+
+ if (((*info)->argsz > argsz)) {
+ argsz = (*info)->argsz;
+ *info = g_realloc(*info, argsz);
+ goto again;
+ }
+
+ return 0;
+}
+
+static struct vfio_info_cap_header *
+vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
+{
+ struct vfio_info_cap_header *hdr;
+ void *ptr = info;
+
+ if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) {
+ return NULL;
+ }
+
+ for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
+ if (hdr->id == id) {
+ return hdr;
+ }
+ }
+
+ return NULL;
+}
+
+static void vfio_get_iommu_info_migration(VFIOLegacyContainer *container,
+ struct vfio_iommu_type1_info *info)
+{
+ struct vfio_info_cap_header *hdr;
+ struct vfio_iommu_type1_info_cap_migration *cap_mig;
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
+
+ hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION);
+ if (!hdr) {
+ return;
+ }
+
+ cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration,
+ header);
+
+ /*
+ * physical_memory_set_dirty_lebitmap() supports pages in bitmap of
+ * qemu_real_host_page_size to mark those dirty.
+ */
+ if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) {
+ bcontainer->dirty_pages_supported = true;
+ bcontainer->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size;
+ bcontainer->dirty_pgsizes = cap_mig->pgsize_bitmap;
+ }
+}
+
+static bool vfio_legacy_setup(VFIOContainer *bcontainer, Error **errp)
+{
+ VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+ g_autofree struct vfio_iommu_type1_info *info = NULL;
+ int ret;
+
+ ret = vfio_get_iommu_info(container, &info);
+ if (ret) {
+ error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info");
+ return false;
+ }
+
+ if (info->flags & VFIO_IOMMU_INFO_PGSIZES) {
+ bcontainer->pgsizes = info->iova_pgsizes;
+ } else {
+ bcontainer->pgsizes = qemu_real_host_page_size();
+ }
+
+ if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) {
+ bcontainer->dma_max_mappings = 65535;
+ }
+
+ vfio_get_info_iova_range(info, bcontainer);
+
+ vfio_get_iommu_info_migration(container, info);
+ return true;
+}
+
+static bool vfio_container_attach_discard_disable(
+ VFIOLegacyContainer *container, VFIOGroup *group, Error **errp)
+{
+ int ret;
+
+ /*
+ * VFIO is currently incompatible with discarding of RAM insofar as the
+ * madvise to purge (zap) the page from QEMU's address space does not
+ * interact with the memory API and therefore leaves stale virtual to
+ * physical mappings in the IOMMU if the page was previously pinned. We
+ * therefore set discarding broken for each group added to a container,
+ * whether the container is used individually or shared. This provides
+ * us with options to allow devices within a group to opt-in and allow
+ * discarding, so long as it is done consistently for a group (for instance
+ * if the device is an mdev device where it is known that the host vendor
+ * driver will never pin pages outside of the working set of the guest
+ * driver, which would thus not be discarding candidates).
+ *
+ * The first opportunity to induce pinning occurs here where we attempt to
+ * attach the group to existing containers within the AddressSpace. If any
+ * pages are already zapped from the virtual address space, such as from
+ * previous discards, new pinning will cause valid mappings to be
+ * re-established. Likewise, when the overall MemoryListener for a new
+ * container is registered, a replay of mappings within the AddressSpace
+ * will occur, re-establishing any previously zapped pages as well.
+ *
+ * Especially virtio-balloon is currently only prevented from discarding
+ * new memory, it will not yet set ram_block_discard_set_required() and
+ * therefore, neither stops us here or deals with the sudden memory
+ * consumption of inflated memory.
+ *
+ * We do support discarding of memory coordinated via the RamDiscardManager
+ * with some IOMMU types. vfio_ram_block_discard_disable() handles the
+ * details once we know which type of IOMMU we are using.
+ */
+
+ ret = vfio_ram_block_discard_disable(container, true);
+ if (ret) {
+ error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
+ if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
+ error_report("vfio: error disconnecting group %d from"
+ " container", group->groupid);
+ }
+ }
+ return !ret;
+}
+
+static bool vfio_container_group_add(VFIOLegacyContainer *container,
+ VFIOGroup *group, Error **errp)
+{
+ if (!vfio_container_attach_discard_disable(container, group, errp)) {
+ return false;
+ }
+ group->container = container;
+ QLIST_INSERT_HEAD(&container->group_list, group, container_next);
+ vfio_group_add_kvm_device(group);
+ /*
+ * Remember the container fd for each group, so we can attach to the same
+ * container after CPR.
+ */
+ cpr_resave_fd("vfio_container_for_group", group->groupid, container->fd);
+ return true;
+}
+
+static void vfio_container_group_del(VFIOLegacyContainer *container,
+ VFIOGroup *group)
+{
+ QLIST_REMOVE(group, container_next);
+ group->container = NULL;
+ vfio_group_del_kvm_device(group);
+ vfio_ram_block_discard_disable(container, false);
+ cpr_delete_fd("vfio_container_for_group", group->groupid);
+}
+
+static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as,
+ Error **errp)
+{
+ VFIOLegacyContainer *container;
+ VFIOContainer *bcontainer;
+ int ret, fd = -1;
+ VFIOAddressSpace *space;
+ VFIOIOMMUClass *vioc = NULL;
+ bool new_container = false;
+ bool group_was_added = false;
+
+ space = vfio_address_space_get(as);
+ fd = cpr_find_fd("vfio_container_for_group", group->groupid);
+
+ if (!cpr_is_incoming()) {
+ QLIST_FOREACH(bcontainer, &space->containers, next) {
+ container = VFIO_IOMMU_LEGACY(bcontainer);
+ if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
+ return vfio_container_group_add(container, group, errp);
+ }
+ }
+
+ fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
+ if (fd < 0) {
+ goto fail;
+ }
+ } else {
+ /*
+ * For incoming CPR, the group is already attached in the kernel.
+ * If a container with matching fd is found, then update the
+ * userland group list and return. If not, then after the loop,
+ * create the container struct and group list.
+ */
+ QLIST_FOREACH(bcontainer, &space->containers, next) {
+ container = VFIO_IOMMU_LEGACY(bcontainer);
+
+ if (vfio_cpr_container_match(container, group, fd)) {
+ return vfio_container_group_add(container, group, errp);
+ }
+ }
+ }
+
+ ret = ioctl(fd, VFIO_GET_API_VERSION);
+ if (ret != VFIO_API_VERSION) {
+ error_setg(errp, "supported vfio version: %d, "
+ "reported version: %d", VFIO_API_VERSION, ret);
+ goto fail;
+ }
+
+ container = vfio_create_container(fd, group, errp);
+ if (!container) {
+ goto fail;
+ }
+ new_container = true;
+ bcontainer = VFIO_IOMMU(container);
+
+ if (!vfio_legacy_cpr_register_container(container, errp)) {
+ goto fail;
+ }
+
+ vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ assert(vioc->setup);
+
+ if (!vioc->setup(bcontainer, errp)) {
+ goto fail;
+ }
+
+ vfio_address_space_insert(space, bcontainer);
+
+ if (!vfio_container_group_add(container, group, errp)) {
+ goto fail;
+ }
+ group_was_added = true;
+
+ /*
+ * If CPR, register the listener later, after all state that may
+ * affect regions and mapping boundaries has been cpr load'ed. Later,
+ * the listener will invoke its callback on each flat section and call
+ * dma_map to supply the new vaddr, and the calls will match the mappings
+ * remembered by the kernel.
+ */
+ if (!cpr_is_incoming()) {
+ if (!vfio_listener_register(bcontainer, errp)) {
+ goto fail;
+ }
+ }
+
+ bcontainer->initialized = true;
+
+ return true;
+
+fail:
+ if (new_container) {
+ vfio_listener_unregister(bcontainer);
+ }
+
+ if (group_was_added) {
+ vfio_container_group_del(container, group);
+ }
+ if (vioc && vioc->release) {
+ vioc->release(bcontainer);
+ }
+ if (new_container) {
+ vfio_legacy_cpr_unregister_container(container);
+ object_unref(container);
+ }
+ if (fd >= 0) {
+ close(fd);
+ }
+ vfio_address_space_put(space);
+
+ return false;
+}
+
+static void vfio_container_disconnect(VFIOGroup *group)
+{
+ VFIOLegacyContainer *container = group->container;
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+
+ QLIST_REMOVE(group, container_next);
+ group->container = NULL;
+ cpr_delete_fd("vfio_container_for_group", group->groupid);
+
+ /*
+ * Explicitly release the listener first before unset container,
+ * since unset may destroy the backend container if it's the last
+ * group.
+ */
+ if (QLIST_EMPTY(&container->group_list)) {
+ vfio_listener_unregister(bcontainer);
+ if (vioc->release) {
+ vioc->release(bcontainer);
+ }
+ }
+
+ if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
+ error_report("vfio: error disconnecting group %d from container",
+ group->groupid);
+ }
+
+ if (QLIST_EMPTY(&container->group_list)) {
+ VFIOAddressSpace *space = bcontainer->space;
+
+ trace_vfio_container_disconnect(container->fd);
+ vfio_legacy_cpr_unregister_container(container);
+ close(container->fd);
+ object_unref(container);
+
+ vfio_address_space_put(space);
+ }
+}
+
+static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp)
+{
+ ERRP_GUARD();
+ VFIOGroup *group;
+ char path[32];
+ struct vfio_group_status status = { .argsz = sizeof(status) };
+
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ if (group->groupid == groupid) {
+ /* Found it. Now is it already in the right context? */
+ if (VFIO_IOMMU(group->container)->space->as == as) {
+ return group;
+ } else {
+ error_setg(errp, "group %d used in multiple address spaces",
+ group->groupid);
+ return NULL;
+ }
+ }
+ }
+
+ group = g_malloc0(sizeof(*group));
+
+ snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
+ group->fd = cpr_open_fd(path, O_RDWR, "vfio_group", groupid, errp);
+ if (group->fd < 0) {
+ goto free_group_exit;
+ }
+
+ if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) {
+ error_setg_errno(errp, errno, "failed to get group %d status", groupid);
+ goto close_fd_exit;
+ }
+
+ if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
+ error_setg(errp, "group %d is not viable", groupid);
+ error_append_hint(errp,
+ "Please ensure all devices within the iommu_group "
+ "are bound to their vfio bus driver.\n");
+ goto close_fd_exit;
+ }
+
+ group->groupid = groupid;
+ QLIST_INIT(&group->device_list);
+
+ if (!vfio_container_connect(group, as, errp)) {
+ error_prepend(errp, "failed to setup container for group %d: ",
+ groupid);
+ goto close_fd_exit;
+ }
+
+ QLIST_INSERT_HEAD(&vfio_group_list, group, next);
+
+ return group;
+
+close_fd_exit:
+ cpr_delete_fd("vfio_group", groupid);
+ close(group->fd);
+
+free_group_exit:
+ g_free(group);
+
+ return NULL;
+}
+
+static void vfio_group_put(VFIOGroup *group)
+{
+ if (!group || !QLIST_EMPTY(&group->device_list)) {
+ return;
+ }
+
+ if (!group->ram_block_discard_allowed) {
+ vfio_ram_block_discard_disable(group->container, false);
+ }
+ vfio_group_del_kvm_device(group);
+ vfio_container_disconnect(group);
+ QLIST_REMOVE(group, next);
+ trace_vfio_group_put(group->fd);
+ cpr_delete_fd("vfio_group", group->groupid);
+ close(group->fd);
+ g_free(group);
+}
+
+static bool vfio_device_get(VFIOGroup *group, const char *name,
+ VFIODevice *vbasedev, Error **errp)
+{
+ g_autofree struct vfio_device_info *info = NULL;
+ int fd;
+
+ fd = vfio_cpr_group_get_device_fd(group->fd, name);
+ if (fd < 0) {
+ error_setg_errno(errp, errno, "error getting device from group %d",
+ group->groupid);
+ error_append_hint(errp,
+ "Verify all devices in group %d are bound to vfio-<bus> "
+ "or pci-stub and not already in use\n", group->groupid);
+ return false;
+ }
+
+ info = vfio_get_device_info(fd);
+ if (!info) {
+ error_setg_errno(errp, errno, "error getting device info");
+ goto fail;
+ }
+
+ /*
+ * Set discarding of RAM as not broken for this group if the driver knows
+ * the device operates compatibly with discarding. Setting must be
+ * consistent per group, but since compatibility is really only possible
+ * with mdev currently, we expect singleton groups.
+ */
+ if (vbasedev->ram_block_discard_allowed !=
+ group->ram_block_discard_allowed) {
+ if (!QLIST_EMPTY(&group->device_list)) {
+ error_setg(errp, "Inconsistent setting of support for discarding "
+ "RAM (e.g., balloon) within group");
+ goto fail;
+ }
+
+ if (!group->ram_block_discard_allowed) {
+ group->ram_block_discard_allowed = true;
+ vfio_ram_block_discard_disable(group->container, false);
+ }
+ }
+
+ vfio_device_prepare(vbasedev, VFIO_IOMMU(group->container), info);
+
+ vbasedev->fd = fd;
+ vbasedev->group = group;
+ QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
+
+ trace_vfio_device_get(name, info->flags, info->num_regions, info->num_irqs);
+
+ return true;
+
+fail:
+ close(fd);
+ cpr_delete_fd(name, 0);
+ return false;
+}
+
+static void vfio_device_put(VFIODevice *vbasedev)
+{
+ if (!vbasedev->group) {
+ return;
+ }
+ QLIST_REMOVE(vbasedev, next);
+ vbasedev->group = NULL;
+ trace_vfio_device_put(vbasedev->fd);
+ cpr_delete_fd(vbasedev->name, 0);
+ close(vbasedev->fd);
+}
+
+static int vfio_device_get_groupid(VFIODevice *vbasedev, Error **errp)
+{
+ char *tmp, group_path[PATH_MAX];
+ g_autofree char *group_name = NULL;
+ int ret, groupid;
+ ssize_t len;
+
+ tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
+ len = readlink(tmp, group_path, sizeof(group_path));
+ g_free(tmp);
+
+ if (len <= 0 || len >= sizeof(group_path)) {
+ ret = len < 0 ? -errno : -ENAMETOOLONG;
+ error_setg_errno(errp, -ret, "no iommu_group found");
+ return ret;
+ }
+
+ group_path[len] = 0;
+
+ group_name = g_path_get_basename(group_path);
+ if (sscanf(group_name, "%d", &groupid) != 1) {
+ error_setg_errno(errp, errno, "failed to read %s", group_path);
+ return -errno;
+ }
+ return groupid;
+}
+
+/*
+ * vfio_device_attach: attach a device to a security context
+ * @name and @vbasedev->name are likely to be different depending
+ * on the type of the device, hence the need for passing @name
+ */
+static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
+ AddressSpace *as, Error **errp)
+{
+ int groupid = vfio_device_get_groupid(vbasedev, errp);
+ VFIODevice *vbasedev_iter;
+ VFIOGroup *group;
+
+ if (groupid < 0) {
+ return false;
+ }
+
+ trace_vfio_device_attach(vbasedev->name, groupid);
+
+ group = vfio_group_get(groupid, as, errp);
+ if (!group) {
+ return false;
+ }
+
+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+ if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
+ error_setg(errp, "device is already attached");
+ goto group_put_exit;
+ }
+ }
+ if (!vfio_device_get(group, name, vbasedev, errp)) {
+ goto group_put_exit;
+ }
+
+ if (!vfio_device_hiod_create_and_realize(vbasedev,
+ TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO,
+ errp)) {
+ goto device_put_exit;
+ }
+
+ if (vbasedev->mdev) {
+ error_setg(&vbasedev->cpr.mdev_blocker,
+ "CPR does not support vfio mdev %s", vbasedev->name);
+ if (migrate_add_blocker_modes(&vbasedev->cpr.mdev_blocker, errp,
+ MIG_MODE_CPR_TRANSFER, MIG_MODE_CPR_EXEC,
+ -1) < 0) {
+ goto hiod_unref_exit;
+ }
+ }
+
+ return true;
+
+hiod_unref_exit:
+ object_unref(vbasedev->hiod);
+device_put_exit:
+ vfio_device_put(vbasedev);
+group_put_exit:
+ vfio_group_put(group);
+ return false;
+}
+
+static void vfio_legacy_detach_device(VFIODevice *vbasedev)
+{
+ VFIOGroup *group = vbasedev->group;
+
+ trace_vfio_device_detach(vbasedev->name, group->groupid);
+
+ vfio_device_unprepare(vbasedev);
+
+ migrate_del_blocker(&vbasedev->cpr.mdev_blocker);
+ object_unref(vbasedev->hiod);
+ vfio_device_put(vbasedev);
+ vfio_group_put(group);
+}
+
+static int vfio_legacy_pci_hot_reset(VFIODevice *vbasedev, bool single)
+{
+ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+ VFIOGroup *group;
+ struct vfio_pci_hot_reset_info *info = NULL;
+ struct vfio_pci_dependent_device *devices;
+ struct vfio_pci_hot_reset *reset;
+ int32_t *fds;
+ int ret, i, count;
+ bool multi = false;
+
+ trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
+
+ if (!single) {
+ vfio_pci_pre_reset(vdev);
+ }
+ vdev->vbasedev.needs_reset = false;
+
+ ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
+
+ if (ret) {
+ goto out_single;
+ }
+ devices = &info->devices[0];
+
+ trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
+
+ /* Verify that we have all the groups required */
+ for (i = 0; i < info->count; i++) {
+ PCIHostDeviceAddress host;
+ VFIOPCIDevice *tmp;
+ VFIODevice *vbasedev_iter;
+
+ host.domain = devices[i].segment;
+ host.bus = devices[i].bus;
+ host.slot = PCI_SLOT(devices[i].devfn);
+ host.function = PCI_FUNC(devices[i].devfn);
+
+ trace_vfio_pci_hot_reset_dep_devices(host.domain,
+ host.bus, host.slot, host.function, devices[i].group_id);
+
+ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
+ continue;
+ }
+
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ if (group->groupid == devices[i].group_id) {
+ break;
+ }
+ }
+
+ if (!group) {
+ if (!vdev->has_pm_reset) {
+ error_report("vfio: Cannot reset device %s, "
+ "depends on group %d which is not owned.",
+ vdev->vbasedev.name, devices[i].group_id);
+ }
+ ret = -EPERM;
+ goto out;
+ }
+
+ /* Prep dependent devices for reset and clear our marker. */
+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+ if (!vbasedev_iter->dev->realized ||
+ !vfio_pci_from_vfio_device(vbasedev_iter)) {
+ continue;
+ }
+ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
+ if (single) {
+ ret = -EINVAL;
+ goto out_single;
+ }
+ vfio_pci_pre_reset(tmp);
+ tmp->vbasedev.needs_reset = false;
+ multi = true;
+ break;
+ }
+ }
+ }
+
+ if (!single && !multi) {
+ ret = -EINVAL;
+ goto out_single;
+ }
+
+ /* Determine how many group fds need to be passed */
+ count = 0;
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ for (i = 0; i < info->count; i++) {
+ if (group->groupid == devices[i].group_id) {
+ count++;
+ break;
+ }
+ }
+ }
+
+ reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
+ reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
+ fds = &reset->group_fds[0];
+
+ /* Fill in group fds */
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ for (i = 0; i < info->count; i++) {
+ if (group->groupid == devices[i].group_id) {
+ fds[reset->count++] = group->fd;
+ break;
+ }
+ }
+ }
+
+ /* Bus reset! */
+ ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
+ g_free(reset);
+ if (ret) {
+ ret = -errno;
+ }
+
+ trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
+ ret ? strerror(errno) : "Success");
+
+out:
+ /* Re-enable INTx on affected devices */
+ for (i = 0; i < info->count; i++) {
+ PCIHostDeviceAddress host;
+ VFIOPCIDevice *tmp;
+ VFIODevice *vbasedev_iter;
+
+ host.domain = devices[i].segment;
+ host.bus = devices[i].bus;
+ host.slot = PCI_SLOT(devices[i].devfn);
+ host.function = PCI_FUNC(devices[i].devfn);
+
+ if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
+ continue;
+ }
+
+ QLIST_FOREACH(group, &vfio_group_list, next) {
+ if (group->groupid == devices[i].group_id) {
+ break;
+ }
+ }
+
+ if (!group) {
+ break;
+ }
+
+ QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+ if (!vbasedev_iter->dev->realized ||
+ !vfio_pci_from_vfio_device(vbasedev_iter)) {
+ continue;
+ }
+ tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+ if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
+ vfio_pci_post_reset(tmp);
+ break;
+ }
+ }
+ }
+out_single:
+ if (!single) {
+ vfio_pci_post_reset(vdev);
+ }
+ g_free(info);
+
+ return ret;
+}
+
+static void vfio_iommu_legacy_class_init(ObjectClass *klass, const void *data)
+{
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
+
+ vioc->setup = vfio_legacy_setup;
+ vioc->dma_map = vfio_legacy_dma_map;
+ vioc->dma_unmap = vfio_legacy_dma_unmap;
+ vioc->attach_device = vfio_legacy_attach_device;
+ vioc->detach_device = vfio_legacy_detach_device;
+ vioc->set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking;
+ vioc->query_dirty_bitmap = vfio_legacy_query_dirty_bitmap;
+ vioc->pci_hot_reset = vfio_legacy_pci_hot_reset;
+};
+
+static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
+ Error **errp)
+{
+ VFIODevice *vdev = opaque;
+
+ hiod->name = g_strdup(vdev->name);
+ hiod->agent = opaque;
+
+ return true;
+}
+
+static int hiod_legacy_vfio_get_cap(HostIOMMUDevice *hiod, int cap,
+ Error **errp)
+{
+ switch (cap) {
+ case HOST_IOMMU_DEVICE_CAP_AW_BITS:
+ return vfio_device_get_aw_bits(hiod->agent);
+ default:
+ error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
+ return -EINVAL;
+ }
+}
+
+static GList *
+hiod_legacy_vfio_get_iova_ranges(HostIOMMUDevice *hiod)
+{
+ VFIODevice *vdev = hiod->agent;
+
+ g_assert(vdev);
+ return vfio_container_get_iova_ranges(vdev->bcontainer);
+}
+
+static uint64_t
+hiod_legacy_vfio_get_page_size_mask(HostIOMMUDevice *hiod)
+{
+ VFIODevice *vdev = hiod->agent;
+
+ g_assert(vdev);
+ return vfio_container_get_page_size_mask(vdev->bcontainer);
+}
+
+static void vfio_iommu_legacy_instance_init(Object *obj)
+{
+ VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(obj);
+
+ QLIST_INIT(&container->group_list);
+}
+
+static void hiod_legacy_vfio_class_init(ObjectClass *oc, const void *data)
+{
+ HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc);
+
+ hioc->realize = hiod_legacy_vfio_realize;
+ hioc->get_cap = hiod_legacy_vfio_get_cap;
+ hioc->get_iova_ranges = hiod_legacy_vfio_get_iova_ranges;
+ hioc->get_page_size_mask = hiod_legacy_vfio_get_page_size_mask;
+};
+
+static const TypeInfo types[] = {
+ {
+ .name = TYPE_VFIO_IOMMU_LEGACY,
+ .parent = TYPE_VFIO_IOMMU,
+ .instance_init = vfio_iommu_legacy_instance_init,
+ .instance_size = sizeof(VFIOLegacyContainer),
+ .class_init = vfio_iommu_legacy_class_init,
+ }, {
+ .name = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO,
+ .parent = TYPE_HOST_IOMMU_DEVICE,
+ .class_init = hiod_legacy_vfio_class_init,
+ }
+};
+
+DEFINE_TYPES(types)
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index a9f0dba..9ddec30 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -1,1218 +1,352 @@
/*
- * generic functions used by VFIO devices
+ * VFIO BASE CONTAINER
*
- * Copyright Red Hat, Inc. 2012
+ * Copyright (C) 2023 Intel Corporation.
+ * Copyright Red Hat, Inc. 2023
*
- * Authors:
- * Alex Williamson <alex.williamson@redhat.com>
+ * Authors: Yi Liu <yi.l.liu@intel.com>
+ * Eric Auger <eric.auger@redhat.com>
*
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- * Based on qemu-kvm device-assignment:
- * Adapted for KVM by Qumranet.
- * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
- * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
- * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
- * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
- * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
+ * SPDX-License-Identifier: GPL-2.0-or-later
*/
-#include "qemu/osdep.h"
#include <sys/ioctl.h>
#include <linux/vfio.h>
-#include "hw/vfio/vfio-device.h"
-#include "system/address-spaces.h"
-#include "system/memory.h"
+#include "qemu/osdep.h"
+#include "system/tcg.h"
#include "system/ram_addr.h"
-#include "qemu/error-report.h"
-#include "qemu/range.h"
-#include "system/reset.h"
-#include "trace.h"
#include "qapi/error.h"
-#include "pci.h"
+#include "qemu/error-report.h"
#include "hw/vfio/vfio-container.h"
+#include "hw/vfio/vfio-device.h" /* vfio_device_reset_handler */
+#include "system/physmem.h"
+#include "system/reset.h"
#include "vfio-helpers.h"
-#include "vfio-cpr.h"
-#include "vfio-listener.h"
-
-#define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio"
-
-typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
-static VFIOGroupList vfio_group_list =
- QLIST_HEAD_INITIALIZER(vfio_group_list);
-
-static int vfio_ram_block_discard_disable(VFIOContainer *container, bool state)
-{
- switch (container->iommu_type) {
- case VFIO_TYPE1v2_IOMMU:
- case VFIO_TYPE1_IOMMU:
- /*
- * We support coordinated discarding of RAM via the RamDiscardManager.
- */
- return ram_block_uncoordinated_discard_disable(state);
- default:
- /*
- * VFIO_SPAPR_TCE_IOMMU most probably works just fine with
- * RamDiscardManager, however, it is completely untested.
- *
- * VFIO_SPAPR_TCE_v2_IOMMU with "DMA memory preregistering" does
- * completely the opposite of managing mapping/pinning dynamically as
- * required by RamDiscardManager. We would have to special-case sections
- * with a RamDiscardManager.
- */
- return ram_block_discard_disable(state);
- }
-}
-
-static int vfio_dma_unmap_bitmap(const VFIOContainer *container,
- hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb)
-{
- const VFIOContainerBase *bcontainer = &container->bcontainer;
- struct vfio_iommu_type1_dma_unmap *unmap;
- struct vfio_bitmap *bitmap;
- VFIOBitmap vbmap;
- int ret;
-
- ret = vfio_bitmap_alloc(&vbmap, size);
- if (ret) {
- return ret;
- }
-
- unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap));
-
- unmap->argsz = sizeof(*unmap) + sizeof(*bitmap);
- unmap->iova = iova;
- unmap->size = size;
- unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP;
- bitmap = (struct vfio_bitmap *)&unmap->data;
-
- /*
- * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of
- * qemu_real_host_page_size to mark those dirty. Hence set bitmap_pgsize
- * to qemu_real_host_page_size.
- */
- bitmap->pgsize = qemu_real_host_page_size();
- bitmap->size = vbmap.size;
- bitmap->data = (__u64 *)vbmap.bitmap;
-
- if (vbmap.size > bcontainer->max_dirty_bitmap_size) {
- error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, vbmap.size);
- ret = -E2BIG;
- goto unmap_exit;
- }
-
- ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
- if (!ret) {
- cpu_physical_memory_set_dirty_lebitmap(vbmap.bitmap,
- iotlb->translated_addr, vbmap.pages);
- } else {
- error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m");
- }
-
-unmap_exit:
- g_free(unmap);
- g_free(vbmap.bitmap);
-
- return ret;
-}
-
-static int vfio_legacy_dma_unmap_one(const VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb)
-{
- const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
- bcontainer);
- struct vfio_iommu_type1_dma_unmap unmap = {
- .argsz = sizeof(unmap),
- .flags = 0,
- .iova = iova,
- .size = size,
- };
- bool need_dirty_sync = false;
- int ret;
- Error *local_err = NULL;
- if (iotlb && vfio_container_dirty_tracking_is_started(bcontainer)) {
- if (!vfio_container_devices_dirty_tracking_is_supported(bcontainer) &&
- bcontainer->dirty_pages_supported) {
- return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
- }
-
- need_dirty_sync = true;
- }
-
- while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
- /*
- * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
- * v4.15) where an overflow in its wrap-around check prevents us from
- * unmapping the last page of the address space. Test for the error
- * condition and re-try the unmap excluding the last page. The
- * expectation is that we've never mapped the last page anyway and this
- * unmap request comes via vIOMMU support which also makes it unlikely
- * that this page is used. This bug was introduced well after type1 v2
- * support was introduced, so we shouldn't need to test for v1. A fix
- * is queued for kernel v5.0 so this workaround can be removed once
- * affected kernels are sufficiently deprecated.
- */
- if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) &&
- container->iommu_type == VFIO_TYPE1v2_IOMMU) {
- trace_vfio_legacy_dma_unmap_overflow_workaround();
- unmap.size -= 1ULL << ctz64(bcontainer->pgsizes);
- continue;
- }
- return -errno;
- }
-
- if (need_dirty_sync) {
- ret = vfio_container_query_dirty_bitmap(bcontainer, iova, size,
- iotlb->translated_addr, &local_err);
- if (ret) {
- error_report_err(local_err);
- return ret;
- }
- }
+#include "trace.h"
- return 0;
-}
+static QLIST_HEAD(, VFIOAddressSpace) vfio_address_spaces =
+ QLIST_HEAD_INITIALIZER(vfio_address_spaces);
-/*
- * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
- */
-static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb, bool unmap_all)
+VFIOAddressSpace *vfio_address_space_get(AddressSpace *as)
{
- int ret;
-
- if (unmap_all) {
- /* The unmap ioctl doesn't accept a full 64-bit span. */
- Int128 llsize = int128_rshift(int128_2_64(), 1);
-
- ret = vfio_legacy_dma_unmap_one(bcontainer, 0, int128_get64(llsize),
- iotlb);
+ VFIOAddressSpace *space;
- if (ret == 0) {
- ret = vfio_legacy_dma_unmap_one(bcontainer, int128_get64(llsize),
- int128_get64(llsize), iotlb);
+ QLIST_FOREACH(space, &vfio_address_spaces, list) {
+ if (space->as == as) {
+ return space;
}
-
- } else {
- ret = vfio_legacy_dma_unmap_one(bcontainer, iova, size, iotlb);
- }
-
- return ret;
-}
-
-static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
- ram_addr_t size, void *vaddr, bool readonly)
-{
- const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
- bcontainer);
- struct vfio_iommu_type1_dma_map map = {
- .argsz = sizeof(map),
- .flags = VFIO_DMA_MAP_FLAG_READ,
- .vaddr = (__u64)(uintptr_t)vaddr,
- .iova = iova,
- .size = size,
- };
-
- if (!readonly) {
- map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
- }
-
- /*
- * Try the mapping, if it fails with EBUSY, unmap the region and try
- * again. This shouldn't be necessary, but we sometimes see it in
- * the VGA ROM space.
- */
- if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
- (errno == EBUSY &&
- vfio_legacy_dma_unmap(bcontainer, iova, size, NULL, false) == 0 &&
- ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
- return 0;
- }
-
- return -errno;
-}
-
-static int
-vfio_legacy_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
- bool start, Error **errp)
-{
- const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
- bcontainer);
- int ret;
- struct vfio_iommu_type1_dirty_bitmap dirty = {
- .argsz = sizeof(dirty),
- };
-
- if (start) {
- dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
- } else {
- dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP;
- }
-
- ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty);
- if (ret) {
- ret = -errno;
- error_setg_errno(errp, errno, "Failed to set dirty tracking flag 0x%x",
- dirty.flags);
}
- return ret;
-}
-
-static int vfio_legacy_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
- VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp)
-{
- const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
- bcontainer);
- struct vfio_iommu_type1_dirty_bitmap *dbitmap;
- struct vfio_iommu_type1_dirty_bitmap_get *range;
- int ret;
-
- dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
-
- dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
- dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP;
- range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data;
- range->iova = iova;
- range->size = size;
-
- /*
- * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of
- * qemu_real_host_page_size to mark those dirty. Hence set bitmap's pgsize
- * to qemu_real_host_page_size.
- */
- range->bitmap.pgsize = qemu_real_host_page_size();
- range->bitmap.size = vbmap->size;
- range->bitmap.data = (__u64 *)vbmap->bitmap;
-
- ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
- if (ret) {
- ret = -errno;
- error_setg_errno(errp, errno,
- "Failed to get dirty bitmap for iova: 0x%"PRIx64
- " size: 0x%"PRIx64, (uint64_t)range->iova,
- (uint64_t)range->size);
- }
-
- g_free(dbitmap);
-
- return ret;
-}
-
-static bool vfio_get_info_iova_range(struct vfio_iommu_type1_info *info,
- VFIOContainerBase *bcontainer)
-{
- struct vfio_info_cap_header *hdr;
- struct vfio_iommu_type1_info_cap_iova_range *cap;
-
- hdr = vfio_get_iommu_type1_info_cap(info,
- VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
- if (!hdr) {
- return false;
- }
-
- cap = (void *)hdr;
+ /* No suitable VFIOAddressSpace, create a new one */
+ space = g_malloc0(sizeof(*space));
+ space->as = as;
+ QLIST_INIT(&space->containers);
- for (int i = 0; i < cap->nr_iovas; i++) {
- Range *range = g_new(Range, 1);
-
- range_set_bounds(range, cap->iova_ranges[i].start,
- cap->iova_ranges[i].end);
- bcontainer->iova_ranges =
- range_list_insert(bcontainer->iova_ranges, range);
+ if (QLIST_EMPTY(&vfio_address_spaces)) {
+ qemu_register_reset(vfio_device_reset_handler, NULL);
}
- return true;
-}
-
-static void vfio_group_add_kvm_device(VFIOGroup *group)
-{
- Error *err = NULL;
+ QLIST_INSERT_HEAD(&vfio_address_spaces, space, list);
- if (vfio_kvm_device_add_fd(group->fd, &err)) {
- error_reportf_err(err, "group ID %d: ", group->groupid);
- }
+ return space;
}
-static void vfio_group_del_kvm_device(VFIOGroup *group)
+void vfio_address_space_put(VFIOAddressSpace *space)
{
- Error *err = NULL;
-
- if (vfio_kvm_device_del_fd(group->fd, &err)) {
- error_reportf_err(err, "group ID %d: ", group->groupid);
+ if (!QLIST_EMPTY(&space->containers)) {
+ return;
}
-}
-/*
- * vfio_get_iommu_type - selects the richest iommu_type (v2 first)
- */
-static int vfio_get_iommu_type(int container_fd,
- Error **errp)
-{
- int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
- VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU };
- int i;
+ QLIST_REMOVE(space, list);
+ g_free(space);
- for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
- if (ioctl(container_fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
- return iommu_types[i];
- }
+ if (QLIST_EMPTY(&vfio_address_spaces)) {
+ qemu_unregister_reset(vfio_device_reset_handler, NULL);
}
- error_setg(errp, "No available IOMMU models");
- return -EINVAL;
}
-/*
- * vfio_get_iommu_ops - get a VFIOIOMMUClass associated with a type
- */
-static const char *vfio_get_iommu_class_name(int iommu_type)
+void vfio_address_space_insert(VFIOAddressSpace *space,
+ VFIOContainer *bcontainer)
{
- switch (iommu_type) {
- case VFIO_TYPE1v2_IOMMU:
- case VFIO_TYPE1_IOMMU:
- return TYPE_VFIO_IOMMU_LEGACY;
- break;
- case VFIO_SPAPR_TCE_v2_IOMMU:
- case VFIO_SPAPR_TCE_IOMMU:
- return TYPE_VFIO_IOMMU_SPAPR;
- break;
- default:
- g_assert_not_reached();
- };
+ QLIST_INSERT_HEAD(&space->containers, bcontainer, next);
+ bcontainer->space = space;
}
-static bool vfio_set_iommu(int container_fd, int group_fd,
- int *iommu_type, Error **errp)
+int vfio_container_dma_map(VFIOContainer *bcontainer,
+ hwaddr iova, uint64_t size,
+ void *vaddr, bool readonly, MemoryRegion *mr)
{
- if (ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container_fd)) {
- error_setg_errno(errp, errno, "Failed to set group container");
- return false;
- }
-
- while (ioctl(container_fd, VFIO_SET_IOMMU, *iommu_type)) {
- if (*iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
- /*
- * On sPAPR, despite the IOMMU subdriver always advertises v1 and
- * v2, the running platform may not support v2 and there is no
- * way to guess it until an IOMMU group gets added to the container.
- * So in case it fails with v2, try v1 as a fallback.
- */
- *iommu_type = VFIO_SPAPR_TCE_IOMMU;
- continue;
- }
- error_setg_errno(errp, errno, "Failed to set iommu for container");
- return false;
- }
-
- return true;
-}
-
-static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group,
- Error **errp)
-{
- int iommu_type;
- const char *vioc_name;
- VFIOContainer *container;
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ RAMBlock *rb = mr->ram_block;
+ int mfd = rb ? qemu_ram_get_fd(rb) : -1;
- iommu_type = vfio_get_iommu_type(fd, errp);
- if (iommu_type < 0) {
- return NULL;
- }
+ if (mfd >= 0 && vioc->dma_map_file) {
+ unsigned long start = vaddr - qemu_ram_get_host_addr(rb);
+ unsigned long offset = qemu_ram_get_fd_offset(rb);
- if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
- return NULL;
+ return vioc->dma_map_file(bcontainer, iova, size, mfd, start + offset,
+ readonly);
}
-
- vioc_name = vfio_get_iommu_class_name(iommu_type);
-
- container = VFIO_IOMMU_LEGACY(object_new(vioc_name));
- container->fd = fd;
- container->iommu_type = iommu_type;
- return container;
+ g_assert(vioc->dma_map);
+ return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr);
}
-static int vfio_get_iommu_info(VFIOContainer *container,
- struct vfio_iommu_type1_info **info)
+int vfio_container_dma_unmap(VFIOContainer *bcontainer,
+ hwaddr iova, uint64_t size,
+ IOMMUTLBEntry *iotlb, bool unmap_all)
{
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
- size_t argsz = sizeof(struct vfio_iommu_type1_info);
-
- *info = g_new0(struct vfio_iommu_type1_info, 1);
-again:
- (*info)->argsz = argsz;
-
- if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) {
- g_free(*info);
- *info = NULL;
- return -errno;
- }
-
- if (((*info)->argsz > argsz)) {
- argsz = (*info)->argsz;
- *info = g_realloc(*info, argsz);
- goto again;
- }
-
- return 0;
+ g_assert(vioc->dma_unmap);
+ return vioc->dma_unmap(bcontainer, iova, size, iotlb, unmap_all);
}
-static struct vfio_info_cap_header *
-vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
+bool vfio_container_add_section_window(VFIOContainer *bcontainer,
+ MemoryRegionSection *section,
+ Error **errp)
{
- struct vfio_info_cap_header *hdr;
- void *ptr = info;
-
- if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) {
- return NULL;
- }
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
- for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) {
- if (hdr->id == id) {
- return hdr;
- }
+ if (!vioc->add_window) {
+ return true;
}
- return NULL;
+ return vioc->add_window(bcontainer, section, errp);
}
-static void vfio_get_iommu_info_migration(VFIOContainer *container,
- struct vfio_iommu_type1_info *info)
+void vfio_container_del_section_window(VFIOContainer *bcontainer,
+ MemoryRegionSection *section)
{
- struct vfio_info_cap_header *hdr;
- struct vfio_iommu_type1_info_cap_migration *cap_mig;
- VFIOContainerBase *bcontainer = &container->bcontainer;
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
- hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION);
- if (!hdr) {
+ if (!vioc->del_window) {
return;
}
- cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration,
- header);
-
- /*
- * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of
- * qemu_real_host_page_size to mark those dirty.
- */
- if (cap_mig->pgsize_bitmap & qemu_real_host_page_size()) {
- bcontainer->dirty_pages_supported = true;
- bcontainer->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size;
- bcontainer->dirty_pgsizes = cap_mig->pgsize_bitmap;
- }
+ return vioc->del_window(bcontainer, section);
}
-static bool vfio_legacy_setup(VFIOContainerBase *bcontainer, Error **errp)
+int vfio_container_set_dirty_page_tracking(VFIOContainer *bcontainer,
+ bool start, Error **errp)
{
- VFIOContainer *container = container_of(bcontainer, VFIOContainer,
- bcontainer);
- g_autofree struct vfio_iommu_type1_info *info = NULL;
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
int ret;
- ret = vfio_get_iommu_info(container, &info);
- if (ret) {
- error_setg_errno(errp, -ret, "Failed to get VFIO IOMMU info");
- return false;
- }
-
- if (info->flags & VFIO_IOMMU_INFO_PGSIZES) {
- bcontainer->pgsizes = info->iova_pgsizes;
- } else {
- bcontainer->pgsizes = qemu_real_host_page_size();
- }
-
- if (!vfio_get_info_dma_avail(info, &bcontainer->dma_max_mappings)) {
- bcontainer->dma_max_mappings = 65535;
+ if (!bcontainer->dirty_pages_supported) {
+ return 0;
}
- vfio_get_info_iova_range(info, bcontainer);
-
- vfio_get_iommu_info_migration(container, info);
- return true;
-}
-
-static bool vfio_container_attach_discard_disable(VFIOContainer *container,
- VFIOGroup *group, Error **errp)
-{
- int ret;
-
- /*
- * VFIO is currently incompatible with discarding of RAM insofar as the
- * madvise to purge (zap) the page from QEMU's address space does not
- * interact with the memory API and therefore leaves stale virtual to
- * physical mappings in the IOMMU if the page was previously pinned. We
- * therefore set discarding broken for each group added to a container,
- * whether the container is used individually or shared. This provides
- * us with options to allow devices within a group to opt-in and allow
- * discarding, so long as it is done consistently for a group (for instance
- * if the device is an mdev device where it is known that the host vendor
- * driver will never pin pages outside of the working set of the guest
- * driver, which would thus not be discarding candidates).
- *
- * The first opportunity to induce pinning occurs here where we attempt to
- * attach the group to existing containers within the AddressSpace. If any
- * pages are already zapped from the virtual address space, such as from
- * previous discards, new pinning will cause valid mappings to be
- * re-established. Likewise, when the overall MemoryListener for a new
- * container is registered, a replay of mappings within the AddressSpace
- * will occur, re-establishing any previously zapped pages as well.
- *
- * Especially virtio-balloon is currently only prevented from discarding
- * new memory, it will not yet set ram_block_discard_set_required() and
- * therefore, neither stops us here or deals with the sudden memory
- * consumption of inflated memory.
- *
- * We do support discarding of memory coordinated via the RamDiscardManager
- * with some IOMMU types. vfio_ram_block_discard_disable() handles the
- * details once we know which type of IOMMU we are using.
- */
-
- ret = vfio_ram_block_discard_disable(container, true);
- if (ret) {
- error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
- if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
- error_report("vfio: error disconnecting group %d from"
- " container", group->groupid);
- }
+ g_assert(vioc->set_dirty_page_tracking);
+ if (bcontainer->dirty_pages_started == start) {
+ return 0;
}
- return !ret;
-}
-static bool vfio_container_group_add(VFIOContainer *container, VFIOGroup *group,
- Error **errp)
-{
- if (!vfio_container_attach_discard_disable(container, group, errp)) {
- return false;
+ ret = vioc->set_dirty_page_tracking(bcontainer, start, errp);
+ if (!ret) {
+ bcontainer->dirty_pages_started = start;
}
- group->container = container;
- QLIST_INSERT_HEAD(&container->group_list, group, container_next);
- vfio_group_add_kvm_device(group);
- return true;
-}
-static void vfio_container_group_del(VFIOContainer *container, VFIOGroup *group)
-{
- QLIST_REMOVE(group, container_next);
- group->container = NULL;
- vfio_group_del_kvm_device(group);
- vfio_ram_block_discard_disable(container, false);
+ return ret;
}
-static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as,
- Error **errp)
+static bool vfio_container_devices_dirty_tracking_is_started(
+ const VFIOContainer *bcontainer)
{
- VFIOContainer *container;
- VFIOContainerBase *bcontainer;
- int ret, fd = -1;
- VFIOAddressSpace *space;
- VFIOIOMMUClass *vioc = NULL;
- bool new_container = false;
- bool group_was_added = false;
-
- space = vfio_address_space_get(as);
+ VFIODevice *vbasedev;
- QLIST_FOREACH(bcontainer, &space->containers, next) {
- container = container_of(bcontainer, VFIOContainer, bcontainer);
- if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
- return vfio_container_group_add(container, group, errp);
+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
+ if (!vbasedev->dirty_tracking) {
+ return false;
}
}
- fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
- if (fd < 0) {
- goto fail;
- }
-
- ret = ioctl(fd, VFIO_GET_API_VERSION);
- if (ret != VFIO_API_VERSION) {
- error_setg(errp, "supported vfio version: %d, "
- "reported version: %d", VFIO_API_VERSION, ret);
- goto fail;
- }
-
- container = vfio_create_container(fd, group, errp);
- if (!container) {
- goto fail;
- }
- new_container = true;
- bcontainer = &container->bcontainer;
-
- if (!vfio_cpr_register_container(bcontainer, errp)) {
- goto fail;
- }
-
- vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
- assert(vioc->setup);
-
- if (!vioc->setup(bcontainer, errp)) {
- goto fail;
- }
-
- vfio_address_space_insert(space, bcontainer);
-
- if (!vfio_container_group_add(container, group, errp)) {
- goto fail;
- }
- group_was_added = true;
-
- if (!vfio_listener_register(bcontainer, errp)) {
- goto fail;
- }
-
- bcontainer->initialized = true;
-
return true;
-
-fail:
- vfio_listener_unregister(bcontainer);
-
- if (group_was_added) {
- vfio_container_group_del(container, group);
- }
- if (vioc && vioc->release) {
- vioc->release(bcontainer);
- }
- if (new_container) {
- vfio_cpr_unregister_container(bcontainer);
- object_unref(container);
- }
- if (fd >= 0) {
- close(fd);
- }
- vfio_address_space_put(space);
-
- return false;
}
-static void vfio_container_disconnect(VFIOGroup *group)
+bool vfio_container_dirty_tracking_is_started(
+ const VFIOContainer *bcontainer)
{
- VFIOContainer *container = group->container;
- VFIOContainerBase *bcontainer = &container->bcontainer;
- VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
-
- QLIST_REMOVE(group, container_next);
- group->container = NULL;
-
- /*
- * Explicitly release the listener first before unset container,
- * since unset may destroy the backend container if it's the last
- * group.
- */
- if (QLIST_EMPTY(&container->group_list)) {
- vfio_listener_unregister(bcontainer);
- if (vioc->release) {
- vioc->release(bcontainer);
- }
- }
-
- if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
- error_report("vfio: error disconnecting group %d from container",
- group->groupid);
- }
-
- if (QLIST_EMPTY(&container->group_list)) {
- VFIOAddressSpace *space = bcontainer->space;
-
- trace_vfio_container_disconnect(container->fd);
- vfio_cpr_unregister_container(bcontainer);
- close(container->fd);
- object_unref(container);
-
- vfio_address_space_put(space);
- }
+ return vfio_container_devices_dirty_tracking_is_started(bcontainer) ||
+ bcontainer->dirty_pages_started;
}
-static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp)
+bool vfio_container_devices_dirty_tracking_is_supported(
+ const VFIOContainer *bcontainer)
{
- ERRP_GUARD();
- VFIOGroup *group;
- char path[32];
- struct vfio_group_status status = { .argsz = sizeof(status) };
-
- QLIST_FOREACH(group, &vfio_group_list, next) {
- if (group->groupid == groupid) {
- /* Found it. Now is it already in the right context? */
- if (group->container->bcontainer.space->as == as) {
- return group;
- } else {
- error_setg(errp, "group %d used in multiple address spaces",
- group->groupid);
- return NULL;
- }
- }
- }
-
- group = g_malloc0(sizeof(*group));
-
- snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
- group->fd = qemu_open(path, O_RDWR, errp);
- if (group->fd < 0) {
- goto free_group_exit;
- }
-
- if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) {
- error_setg_errno(errp, errno, "failed to get group %d status", groupid);
- goto close_fd_exit;
- }
-
- if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
- error_setg(errp, "group %d is not viable", groupid);
- error_append_hint(errp,
- "Please ensure all devices within the iommu_group "
- "are bound to their vfio bus driver.\n");
- goto close_fd_exit;
- }
-
- group->groupid = groupid;
- QLIST_INIT(&group->device_list);
-
- if (!vfio_container_connect(group, as, errp)) {
- error_prepend(errp, "failed to setup container for group %d: ",
- groupid);
- goto close_fd_exit;
- }
-
- QLIST_INSERT_HEAD(&vfio_group_list, group, next);
-
- return group;
-
-close_fd_exit:
- close(group->fd);
+ VFIODevice *vbasedev;
-free_group_exit:
- g_free(group);
-
- return NULL;
-}
-
-static void vfio_group_put(VFIOGroup *group)
-{
- if (!group || !QLIST_EMPTY(&group->device_list)) {
- return;
- }
-
- if (!group->ram_block_discard_allowed) {
- vfio_ram_block_discard_disable(group->container, false);
- }
- vfio_group_del_kvm_device(group);
- vfio_container_disconnect(group);
- QLIST_REMOVE(group, next);
- trace_vfio_group_put(group->fd);
- close(group->fd);
- g_free(group);
-}
-
-static bool vfio_device_get(VFIOGroup *group, const char *name,
- VFIODevice *vbasedev, Error **errp)
-{
- g_autofree struct vfio_device_info *info = NULL;
- int fd;
-
- fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
- if (fd < 0) {
- error_setg_errno(errp, errno, "error getting device from group %d",
- group->groupid);
- error_append_hint(errp,
- "Verify all devices in group %d are bound to vfio-<bus> "
- "or pci-stub and not already in use\n", group->groupid);
- return false;
- }
-
- info = vfio_get_device_info(fd);
- if (!info) {
- error_setg_errno(errp, errno, "error getting device info");
- close(fd);
- return false;
- }
-
- /*
- * Set discarding of RAM as not broken for this group if the driver knows
- * the device operates compatibly with discarding. Setting must be
- * consistent per group, but since compatibility is really only possible
- * with mdev currently, we expect singleton groups.
- */
- if (vbasedev->ram_block_discard_allowed !=
- group->ram_block_discard_allowed) {
- if (!QLIST_EMPTY(&group->device_list)) {
- error_setg(errp, "Inconsistent setting of support for discarding "
- "RAM (e.g., balloon) within group");
- close(fd);
+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
+ if (vbasedev->device_dirty_page_tracking == ON_OFF_AUTO_OFF) {
return false;
}
-
- if (!group->ram_block_discard_allowed) {
- group->ram_block_discard_allowed = true;
- vfio_ram_block_discard_disable(group->container, false);
+ if (!vbasedev->dirty_pages_supported) {
+ return false;
}
}
- vfio_device_prepare(vbasedev, &group->container->bcontainer, info);
-
- vbasedev->fd = fd;
- vbasedev->group = group;
- QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
-
- trace_vfio_device_get(name, info->flags, info->num_regions, info->num_irqs);
-
return true;
}
-static void vfio_device_put(VFIODevice *vbasedev)
+static int vfio_device_dma_logging_report(VFIODevice *vbasedev, hwaddr iova,
+ hwaddr size, void *bitmap)
{
- if (!vbasedev->group) {
- return;
- }
- QLIST_REMOVE(vbasedev, next);
- vbasedev->group = NULL;
- trace_vfio_device_put(vbasedev->fd);
- close(vbasedev->fd);
-}
+ uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
+ sizeof(struct vfio_device_feature_dma_logging_report),
+ sizeof(uint64_t))] = {};
+ struct vfio_device_feature *feature = (struct vfio_device_feature *)buf;
+ struct vfio_device_feature_dma_logging_report *report =
+ (struct vfio_device_feature_dma_logging_report *)feature->data;
-static int vfio_device_get_groupid(VFIODevice *vbasedev, Error **errp)
-{
- char *tmp, group_path[PATH_MAX];
- g_autofree char *group_name = NULL;
- int ret, groupid;
- ssize_t len;
-
- tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
- len = readlink(tmp, group_path, sizeof(group_path));
- g_free(tmp);
-
- if (len <= 0 || len >= sizeof(group_path)) {
- ret = len < 0 ? -errno : -ENAMETOOLONG;
- error_setg_errno(errp, -ret, "no iommu_group found");
- return ret;
- }
+ report->iova = iova;
+ report->length = size;
+ report->page_size = qemu_real_host_page_size();
+ report->bitmap = (uintptr_t)bitmap;
- group_path[len] = 0;
+ feature->argsz = sizeof(buf);
+ feature->flags = VFIO_DEVICE_FEATURE_GET |
+ VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT;
- group_name = g_path_get_basename(group_path);
- if (sscanf(group_name, "%d", &groupid) != 1) {
- error_setg_errno(errp, errno, "failed to read %s", group_path);
- return -errno;
- }
- return groupid;
+ return vbasedev->io_ops->device_feature(vbasedev, feature);
}
-/*
- * vfio_device_attach: attach a device to a security context
- * @name and @vbasedev->name are likely to be different depending
- * on the type of the device, hence the need for passing @name
- */
-static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
- AddressSpace *as, Error **errp)
+static int vfio_container_iommu_query_dirty_bitmap(
+ const VFIOContainer *bcontainer, VFIOBitmap *vbmap, hwaddr iova,
+ hwaddr size, Error **errp)
{
- int groupid = vfio_device_get_groupid(vbasedev, errp);
- VFIODevice *vbasedev_iter;
- VFIOGroup *group;
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
- if (groupid < 0) {
- return false;
- }
+ g_assert(vioc->query_dirty_bitmap);
+ return vioc->query_dirty_bitmap(bcontainer, vbmap, iova, size,
+ errp);
+}
- trace_vfio_device_attach(vbasedev->name, groupid);
+static int vfio_container_devices_query_dirty_bitmap(
+ const VFIOContainer *bcontainer, VFIOBitmap *vbmap, hwaddr iova,
+ hwaddr size, Error **errp)
+{
+ VFIODevice *vbasedev;
+ int ret;
- group = vfio_group_get(groupid, as, errp);
- if (!group) {
- return false;
- }
+ QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
+ ret = vfio_device_dma_logging_report(vbasedev, iova, size,
+ vbmap->bitmap);
+ if (ret) {
+ error_setg_errno(errp, -ret,
+ "%s: Failed to get DMA logging report, iova: "
+ "0x%" HWADDR_PRIx ", size: 0x%" HWADDR_PRIx,
+ vbasedev->name, iova, size);
- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
- if (strcmp(vbasedev_iter->name, vbasedev->name) == 0) {
- error_setg(errp, "device is already attached");
- goto group_put_exit;
+ return ret;
}
}
- if (!vfio_device_get(group, name, vbasedev, errp)) {
- goto group_put_exit;
- }
-
- if (!vfio_device_hiod_create_and_realize(vbasedev,
- TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO,
- errp)) {
- goto device_put_exit;
- }
-
- return true;
-device_put_exit:
- vfio_device_put(vbasedev);
-group_put_exit:
- vfio_group_put(group);
- return false;
+ return 0;
}
-static void vfio_legacy_detach_device(VFIODevice *vbasedev)
+int vfio_container_query_dirty_bitmap(const VFIOContainer *bcontainer,
+ uint64_t iova, uint64_t size,
+ hwaddr translated_addr, Error **errp)
{
- VFIOGroup *group = vbasedev->group;
-
- trace_vfio_device_detach(vbasedev->name, group->groupid);
-
- vfio_device_unprepare(vbasedev);
-
- object_unref(vbasedev->hiod);
- vfio_device_put(vbasedev);
- vfio_group_put(group);
-}
+ bool all_device_dirty_tracking =
+ vfio_container_devices_dirty_tracking_is_supported(bcontainer);
+ uint64_t dirty_pages;
+ VFIOBitmap vbmap;
+ int ret;
-static int vfio_legacy_pci_hot_reset(VFIODevice *vbasedev, bool single)
-{
- VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
- VFIOGroup *group;
- struct vfio_pci_hot_reset_info *info = NULL;
- struct vfio_pci_dependent_device *devices;
- struct vfio_pci_hot_reset *reset;
- int32_t *fds;
- int ret, i, count;
- bool multi = false;
-
- trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
-
- if (!single) {
- vfio_pci_pre_reset(vdev);
+ if (!bcontainer->dirty_pages_supported && !all_device_dirty_tracking) {
+ physical_memory_set_dirty_range(translated_addr, size,
+ tcg_enabled() ? DIRTY_CLIENTS_ALL :
+ DIRTY_CLIENTS_NOCODE);
+ return 0;
}
- vdev->vbasedev.needs_reset = false;
-
- ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
+ ret = vfio_bitmap_alloc(&vbmap, size);
if (ret) {
- goto out_single;
- }
- devices = &info->devices[0];
-
- trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
-
- /* Verify that we have all the groups required */
- for (i = 0; i < info->count; i++) {
- PCIHostDeviceAddress host;
- VFIOPCIDevice *tmp;
- VFIODevice *vbasedev_iter;
-
- host.domain = devices[i].segment;
- host.bus = devices[i].bus;
- host.slot = PCI_SLOT(devices[i].devfn);
- host.function = PCI_FUNC(devices[i].devfn);
-
- trace_vfio_pci_hot_reset_dep_devices(host.domain,
- host.bus, host.slot, host.function, devices[i].group_id);
-
- if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
- continue;
- }
-
- QLIST_FOREACH(group, &vfio_group_list, next) {
- if (group->groupid == devices[i].group_id) {
- break;
- }
- }
-
- if (!group) {
- if (!vdev->has_pm_reset) {
- error_report("vfio: Cannot reset device %s, "
- "depends on group %d which is not owned.",
- vdev->vbasedev.name, devices[i].group_id);
- }
- ret = -EPERM;
- goto out;
- }
-
- /* Prep dependent devices for reset and clear our marker. */
- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
- if (!vbasedev_iter->dev->realized ||
- vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
- continue;
- }
- tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
- if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
- if (single) {
- ret = -EINVAL;
- goto out_single;
- }
- vfio_pci_pre_reset(tmp);
- tmp->vbasedev.needs_reset = false;
- multi = true;
- break;
- }
- }
- }
-
- if (!single && !multi) {
- ret = -EINVAL;
- goto out_single;
- }
-
- /* Determine how many group fds need to be passed */
- count = 0;
- QLIST_FOREACH(group, &vfio_group_list, next) {
- for (i = 0; i < info->count; i++) {
- if (group->groupid == devices[i].group_id) {
- count++;
- break;
- }
- }
+ error_setg_errno(errp, -ret,
+ "Failed to allocate dirty tracking bitmap");
+ return ret;
}
- reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
- reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
- fds = &reset->group_fds[0];
-
- /* Fill in group fds */
- QLIST_FOREACH(group, &vfio_group_list, next) {
- for (i = 0; i < info->count; i++) {
- if (group->groupid == devices[i].group_id) {
- fds[reset->count++] = group->fd;
- break;
- }
- }
+ if (all_device_dirty_tracking) {
+ ret = vfio_container_devices_query_dirty_bitmap(bcontainer, &vbmap, iova, size,
+ errp);
+ } else {
+ ret = vfio_container_iommu_query_dirty_bitmap(bcontainer, &vbmap, iova, size,
+ errp);
}
- /* Bus reset! */
- ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
- g_free(reset);
if (ret) {
- ret = -errno;
+ goto out;
}
- trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
- ret ? strerror(errno) : "Success");
+ dirty_pages = physical_memory_set_dirty_lebitmap(vbmap.bitmap,
+ translated_addr,
+ vbmap.pages);
+ trace_vfio_container_query_dirty_bitmap(iova, size, vbmap.size,
+ translated_addr, dirty_pages);
out:
- /* Re-enable INTx on affected devices */
- for (i = 0; i < info->count; i++) {
- PCIHostDeviceAddress host;
- VFIOPCIDevice *tmp;
- VFIODevice *vbasedev_iter;
-
- host.domain = devices[i].segment;
- host.bus = devices[i].bus;
- host.slot = PCI_SLOT(devices[i].devfn);
- host.function = PCI_FUNC(devices[i].devfn);
-
- if (vfio_pci_host_match(&host, vdev->vbasedev.name)) {
- continue;
- }
-
- QLIST_FOREACH(group, &vfio_group_list, next) {
- if (group->groupid == devices[i].group_id) {
- break;
- }
- }
-
- if (!group) {
- break;
- }
-
- QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
- if (!vbasedev_iter->dev->realized ||
- vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
- continue;
- }
- tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
- if (vfio_pci_host_match(&host, tmp->vbasedev.name)) {
- vfio_pci_post_reset(tmp);
- break;
- }
- }
- }
-out_single:
- if (!single) {
- vfio_pci_post_reset(vdev);
- }
- g_free(info);
+ g_free(vbmap.bitmap);
return ret;
}
-static void vfio_iommu_legacy_class_init(ObjectClass *klass, const void *data)
+static gpointer copy_iova_range(gconstpointer src, gpointer data)
{
- VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
-
- vioc->setup = vfio_legacy_setup;
- vioc->dma_map = vfio_legacy_dma_map;
- vioc->dma_unmap = vfio_legacy_dma_unmap;
- vioc->attach_device = vfio_legacy_attach_device;
- vioc->detach_device = vfio_legacy_detach_device;
- vioc->set_dirty_page_tracking = vfio_legacy_set_dirty_page_tracking;
- vioc->query_dirty_bitmap = vfio_legacy_query_dirty_bitmap;
- vioc->pci_hot_reset = vfio_legacy_pci_hot_reset;
-};
-
-static bool hiod_legacy_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
- Error **errp)
-{
- VFIODevice *vdev = opaque;
+ Range *source = (Range *)src;
+ Range *dest = g_new(Range, 1);
- hiod->name = g_strdup(vdev->name);
- hiod->agent = opaque;
-
- return true;
+ range_set_bounds(dest, range_lob(source), range_upb(source));
+ return dest;
}
-static int hiod_legacy_vfio_get_cap(HostIOMMUDevice *hiod, int cap,
- Error **errp)
+GList *vfio_container_get_iova_ranges(const VFIOContainer *bcontainer)
{
- switch (cap) {
- case HOST_IOMMU_DEVICE_CAP_AW_BITS:
- return vfio_device_get_aw_bits(hiod->agent);
- default:
- error_setg(errp, "%s: unsupported capability %x", hiod->name, cap);
- return -EINVAL;
- }
+ assert(bcontainer);
+ return g_list_copy_deep(bcontainer->iova_ranges, copy_iova_range, NULL);
}
-static GList *
-hiod_legacy_vfio_get_iova_ranges(HostIOMMUDevice *hiod)
+static void vfio_container_instance_finalize(Object *obj)
{
- VFIODevice *vdev = hiod->agent;
+ VFIOContainer *bcontainer = VFIO_IOMMU(obj);
+ VFIOGuestIOMMU *giommu, *tmp;
- g_assert(vdev);
- return vfio_container_get_iova_ranges(vdev->bcontainer);
-}
+ QLIST_SAFE_REMOVE(bcontainer, next);
-static uint64_t
-hiod_legacy_vfio_get_page_size_mask(HostIOMMUDevice *hiod)
-{
- VFIODevice *vdev = hiod->agent;
+ QLIST_FOREACH_SAFE(giommu, &bcontainer->giommu_list, giommu_next, tmp) {
+ memory_region_unregister_iommu_notifier(
+ MEMORY_REGION(giommu->iommu_mr), &giommu->n);
+ QLIST_REMOVE(giommu, giommu_next);
+ g_free(giommu);
+ }
- g_assert(vdev);
- return vfio_container_get_page_size_mask(vdev->bcontainer);
+ g_list_free_full(bcontainer->iova_ranges, g_free);
}
-static void vfio_iommu_legacy_instance_init(Object *obj)
+static void vfio_container_instance_init(Object *obj)
{
- VFIOContainer *container = VFIO_IOMMU_LEGACY(obj);
+ VFIOContainer *bcontainer = VFIO_IOMMU(obj);
- QLIST_INIT(&container->group_list);
+ bcontainer->error = NULL;
+ bcontainer->dirty_pages_supported = false;
+ bcontainer->dma_max_mappings = 0;
+ bcontainer->iova_ranges = NULL;
+ QLIST_INIT(&bcontainer->giommu_list);
+ QLIST_INIT(&bcontainer->vrdl_list);
}
-static void hiod_legacy_vfio_class_init(ObjectClass *oc, const void *data)
-{
- HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc);
-
- hioc->realize = hiod_legacy_vfio_realize;
- hioc->get_cap = hiod_legacy_vfio_get_cap;
- hioc->get_iova_ranges = hiod_legacy_vfio_get_iova_ranges;
- hioc->get_page_size_mask = hiod_legacy_vfio_get_page_size_mask;
-};
-
static const TypeInfo types[] = {
{
- .name = TYPE_VFIO_IOMMU_LEGACY,
- .parent = TYPE_VFIO_IOMMU,
- .instance_init = vfio_iommu_legacy_instance_init,
+ .name = TYPE_VFIO_IOMMU,
+ .parent = TYPE_OBJECT,
+ .instance_init = vfio_container_instance_init,
+ .instance_finalize = vfio_container_instance_finalize,
.instance_size = sizeof(VFIOContainer),
- .class_init = vfio_iommu_legacy_class_init,
- }, {
- .name = TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO,
- .parent = TYPE_HOST_IOMMU_DEVICE,
- .class_init = hiod_legacy_vfio_class_init,
- }
+ .class_size = sizeof(VFIOIOMMUClass),
+ .abstract = true,
+ },
};
DEFINE_TYPES(types)
diff --git a/hw/vfio/cpr-iommufd.c b/hw/vfio/cpr-iommufd.c
new file mode 100644
index 0000000..8a4d65d
--- /dev/null
+++ b/hw/vfio/cpr-iommufd.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2024-2025 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "hw/vfio/vfio-cpr.h"
+#include "hw/vfio/vfio-device.h"
+#include "migration/blocker.h"
+#include "migration/cpr.h"
+#include "migration/migration.h"
+#include "migration/vmstate.h"
+#include "system/iommufd.h"
+#include "vfio-iommufd.h"
+#include "trace.h"
+
+typedef struct CprVFIODevice {
+ char *name;
+ unsigned int namelen;
+ uint32_t ioas_id;
+ int devid;
+ uint32_t hwpt_id;
+ QLIST_ENTRY(CprVFIODevice) next;
+} CprVFIODevice;
+
+static const VMStateDescription vmstate_cpr_vfio_device = {
+ .name = "cpr vfio device",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(namelen, CprVFIODevice),
+ VMSTATE_VBUFFER_ALLOC_UINT32(name, CprVFIODevice, 0, NULL, namelen),
+ VMSTATE_INT32(devid, CprVFIODevice),
+ VMSTATE_UINT32(ioas_id, CprVFIODevice),
+ VMSTATE_UINT32(hwpt_id, CprVFIODevice),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+const VMStateDescription vmstate_cpr_vfio_devices = {
+ .name = CPR_STATE "/vfio devices",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (const VMStateField[]){
+ VMSTATE_QLIST_V(vfio_devices, CprState, 1, vmstate_cpr_vfio_device,
+ CprVFIODevice, next),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void vfio_cpr_save_device(VFIODevice *vbasedev)
+{
+ CprVFIODevice *elem = g_new0(CprVFIODevice, 1);
+
+ elem->name = g_strdup(vbasedev->name);
+ elem->namelen = strlen(vbasedev->name) + 1;
+ elem->ioas_id = vbasedev->cpr.ioas_id;
+ elem->devid = vbasedev->devid;
+ elem->hwpt_id = vbasedev->cpr.hwpt_id;
+ QLIST_INSERT_HEAD(&cpr_state.vfio_devices, elem, next);
+}
+
+static CprVFIODevice *find_device(const char *name)
+{
+ CprVFIODeviceList *head = &cpr_state.vfio_devices;
+ CprVFIODevice *elem;
+
+ QLIST_FOREACH(elem, head, next) {
+ if (!strcmp(elem->name, name)) {
+ return elem;
+ }
+ }
+ return NULL;
+}
+
+static void vfio_cpr_delete_device(const char *name)
+{
+ CprVFIODevice *elem = find_device(name);
+
+ if (elem) {
+ QLIST_REMOVE(elem, next);
+ g_free(elem->name);
+ g_free(elem);
+ }
+}
+
+static bool vfio_cpr_find_device(VFIODevice *vbasedev)
+{
+ CprVFIODevice *elem = find_device(vbasedev->name);
+
+ if (elem) {
+ vbasedev->cpr.ioas_id = elem->ioas_id;
+ vbasedev->devid = elem->devid;
+ vbasedev->cpr.hwpt_id = elem->hwpt_id;
+ trace_vfio_cpr_find_device(elem->ioas_id, elem->devid, elem->hwpt_id);
+ return true;
+ }
+ return false;
+}
+
+static bool vfio_cpr_supported(IOMMUFDBackend *be, Error **errp)
+{
+ if (!iommufd_change_process_capable(be)) {
+ if (errp) {
+ error_setg(errp, "vfio iommufd backend does not support "
+ "IOMMU_IOAS_CHANGE_PROCESS");
+ }
+ return false;
+ }
+ return true;
+}
+
+static int iommufd_cpr_pre_save(void *opaque)
+{
+ IOMMUFDBackend *be = opaque;
+
+ /*
+ * The process has not changed yet, but proactively try the ioctl,
+ * and it will fail if any DMA mappings are not supported.
+ */
+ if (!iommufd_change_process_capable(be)) {
+ error_report("some memory regions do not support "
+ "IOMMU_IOAS_CHANGE_PROCESS");
+ return -1;
+ }
+ return 0;
+}
+
+static int iommufd_cpr_post_load(void *opaque, int version_id)
+{
+ IOMMUFDBackend *be = opaque;
+ Error *local_err = NULL;
+
+ if (!iommufd_change_process(be, &local_err)) {
+ error_report_err(local_err);
+ return -1;
+ }
+ return 0;
+}
+
+static const VMStateDescription iommufd_cpr_vmstate = {
+ .name = "iommufd",
+ .version_id = 0,
+ .minimum_version_id = 0,
+ .pre_save = iommufd_cpr_pre_save,
+ .post_load = iommufd_cpr_post_load,
+ .needed = cpr_incoming_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+bool vfio_iommufd_cpr_register_iommufd(IOMMUFDBackend *be, Error **errp)
+{
+ Error **cpr_blocker = &be->cpr_blocker;
+
+ if (!vfio_cpr_supported(be, cpr_blocker)) {
+ return migrate_add_blocker_modes(cpr_blocker, errp,
+ MIG_MODE_CPR_TRANSFER,
+ MIG_MODE_CPR_EXEC, -1) == 0;
+ }
+
+ vmstate_register(NULL, -1, &iommufd_cpr_vmstate, be);
+
+ return true;
+}
+
+void vfio_iommufd_cpr_unregister_iommufd(IOMMUFDBackend *be)
+{
+ vmstate_unregister(NULL, &iommufd_cpr_vmstate, be);
+ migrate_del_blocker(&be->cpr_blocker);
+}
+
+bool vfio_iommufd_cpr_register_container(VFIOIOMMUFDContainer *container,
+ Error **errp)
+{
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
+
+ migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier,
+ vfio_cpr_reboot_notifier,
+ MIG_MODE_CPR_REBOOT);
+
+ vfio_cpr_add_kvm_notifier();
+
+ return true;
+}
+
+void vfio_iommufd_cpr_unregister_container(VFIOIOMMUFDContainer *container)
+{
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
+
+ migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
+}
+
+void vfio_iommufd_cpr_register_device(VFIODevice *vbasedev)
+{
+ if (!cpr_is_incoming()) {
+ /*
+ * Beware fd may have already been saved by vfio_device_set_fd,
+ * so call resave to avoid a duplicate entry.
+ */
+ cpr_resave_fd(vbasedev->name, 0, vbasedev->fd);
+ vfio_cpr_save_device(vbasedev);
+ }
+}
+
+void vfio_iommufd_cpr_unregister_device(VFIODevice *vbasedev)
+{
+ cpr_delete_fd(vbasedev->name, 0);
+ vfio_cpr_delete_device(vbasedev->name);
+}
+
+void vfio_cpr_load_device(VFIODevice *vbasedev)
+{
+ if (cpr_is_incoming()) {
+ bool ret = vfio_cpr_find_device(vbasedev);
+ g_assert(ret);
+
+ if (vbasedev->fd < 0) {
+ vbasedev->fd = cpr_find_fd(vbasedev->name, 0);
+ }
+ }
+}
diff --git a/hw/vfio/cpr-legacy.c b/hw/vfio/cpr-legacy.c
new file mode 100644
index 0000000..80af746
--- /dev/null
+++ b/hw/vfio/cpr-legacy.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2021-2025 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <sys/ioctl.h>
+#include <linux/vfio.h>
+#include "qemu/osdep.h"
+#include "hw/vfio/vfio-container-legacy.h"
+#include "hw/vfio/vfio-device.h"
+#include "hw/vfio/vfio-listener.h"
+#include "migration/blocker.h"
+#include "migration/cpr.h"
+#include "migration/migration.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+
+static bool vfio_dma_unmap_vaddr_all(VFIOLegacyContainer *container,
+ Error **errp)
+{
+ struct vfio_iommu_type1_dma_unmap unmap = {
+ .argsz = sizeof(unmap),
+ .flags = VFIO_DMA_UNMAP_FLAG_VADDR | VFIO_DMA_UNMAP_FLAG_ALL,
+ .iova = 0,
+ .size = 0,
+ };
+ if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
+ error_setg_errno(errp, errno, "vfio_dma_unmap_vaddr_all");
+ return false;
+ }
+ container->cpr.vaddr_unmapped = true;
+ return true;
+}
+
+/*
+ * Set the new @vaddr for any mappings registered during cpr load.
+ * The incoming state is cleared thereafter.
+ */
+static int vfio_legacy_cpr_dma_map(const VFIOContainer *bcontainer,
+ hwaddr iova, uint64_t size, void *vaddr,
+ bool readonly, MemoryRegion *mr)
+{
+ const VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+
+ struct vfio_iommu_type1_dma_map map = {
+ .argsz = sizeof(map),
+ .flags = VFIO_DMA_MAP_FLAG_VADDR,
+ .vaddr = (__u64)(uintptr_t)vaddr,
+ .iova = iova,
+ .size = size,
+ };
+
+ g_assert(cpr_is_incoming());
+
+ if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map)) {
+ return -errno;
+ }
+
+ return 0;
+}
+
+static void vfio_region_remap(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ VFIOLegacyContainer *container = container_of(listener,
+ VFIOLegacyContainer,
+ cpr.remap_listener);
+ vfio_container_region_add(VFIO_IOMMU(container), section, true);
+}
+
+static bool vfio_cpr_supported(VFIOLegacyContainer *container, Error **errp)
+{
+ if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UPDATE_VADDR)) {
+ error_setg(errp, "VFIO container does not support VFIO_UPDATE_VADDR");
+ return false;
+
+ } else if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UNMAP_ALL)) {
+ error_setg(errp, "VFIO container does not support VFIO_UNMAP_ALL");
+ return false;
+
+ } else {
+ return true;
+ }
+}
+
+static int vfio_container_pre_save(void *opaque)
+{
+ VFIOLegacyContainer *container = opaque;
+ Error *local_err = NULL;
+
+ if (!vfio_dma_unmap_vaddr_all(container, &local_err)) {
+ error_report_err(local_err);
+ return -1;
+ }
+ return 0;
+}
+
+static int vfio_container_post_load(void *opaque, int version_id)
+{
+ VFIOLegacyContainer *container = opaque;
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ dma_map_fn saved_dma_map = vioc->dma_map;
+ Error *local_err = NULL;
+
+ /* During incoming CPR, divert calls to dma_map. */
+ vioc->dma_map = vfio_legacy_cpr_dma_map;
+
+ if (!vfio_listener_register(bcontainer, &local_err)) {
+ error_report_err(local_err);
+ return -1;
+ }
+
+ /* Restore original dma_map function */
+ vioc->dma_map = saved_dma_map;
+
+ return 0;
+}
+
+static const VMStateDescription vfio_container_vmstate = {
+ .name = "vfio-container",
+ .version_id = 0,
+ .minimum_version_id = 0,
+ .priority = MIG_PRI_LOW, /* Must happen after devices and groups */
+ .pre_save = vfio_container_pre_save,
+ .post_load = vfio_container_post_load,
+ .needed = cpr_incoming_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier,
+ MigrationEvent *e, Error **errp)
+{
+ VFIOLegacyContainer *container =
+ container_of(notifier, VFIOLegacyContainer, cpr.transfer_notifier);
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
+
+ if (e->type != MIG_EVENT_PRECOPY_FAILED) {
+ return 0;
+ }
+
+ if (container->cpr.vaddr_unmapped) {
+ /*
+ * Force a call to vfio_region_remap for each mapped section by
+ * temporarily registering a listener, and temporarily diverting
+ * dma_map to vfio_legacy_cpr_dma_map. The latter restores vaddr.
+ */
+
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ dma_map_fn saved_dma_map = vioc->dma_map;
+ vioc->dma_map = vfio_legacy_cpr_dma_map;
+
+ container->cpr.remap_listener = (MemoryListener) {
+ .name = "vfio cpr recover",
+ .region_add = vfio_region_remap
+ };
+ memory_listener_register(&container->cpr.remap_listener,
+ bcontainer->space->as);
+ memory_listener_unregister(&container->cpr.remap_listener);
+ container->cpr.vaddr_unmapped = false;
+ vioc->dma_map = saved_dma_map;
+ }
+ return 0;
+}
+
+bool vfio_legacy_cpr_register_container(VFIOLegacyContainer *container,
+ Error **errp)
+{
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
+ Error **cpr_blocker = &container->cpr.blocker;
+
+ migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier,
+ vfio_cpr_reboot_notifier,
+ MIG_MODE_CPR_REBOOT);
+
+ if (!vfio_cpr_supported(container, cpr_blocker)) {
+ return migrate_add_blocker_modes(cpr_blocker, errp,
+ MIG_MODE_CPR_TRANSFER,
+ MIG_MODE_CPR_EXEC, -1) == 0;
+ }
+
+ vfio_cpr_add_kvm_notifier();
+
+ vmstate_register(NULL, -1, &vfio_container_vmstate, container);
+
+ migration_add_notifier_modes(&container->cpr.transfer_notifier,
+ vfio_cpr_fail_notifier,
+ MIG_MODE_CPR_TRANSFER, MIG_MODE_CPR_EXEC, -1);
+ return true;
+}
+
+void vfio_legacy_cpr_unregister_container(VFIOLegacyContainer *container)
+{
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
+
+ migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
+ migrate_del_blocker(&container->cpr.blocker);
+ vmstate_unregister(NULL, &vfio_container_vmstate, container);
+ migration_remove_notifier(&container->cpr.transfer_notifier);
+}
+
+/*
+ * In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after
+ * succeeding for others, so the latter have lost their vaddr. Call this
+ * to restore vaddr for a section with a giommu.
+ *
+ * The giommu already exists. Find it and replay it, which calls
+ * vfio_legacy_cpr_dma_map further down the stack.
+ */
+void vfio_cpr_giommu_remap(VFIOContainer *bcontainer,
+ MemoryRegionSection *section)
+{
+ VFIOGuestIOMMU *giommu = NULL;
+ hwaddr as_offset = section->offset_within_address_space;
+ hwaddr iommu_offset = as_offset - section->offset_within_region;
+
+ QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) {
+ if (giommu->iommu_mr == IOMMU_MEMORY_REGION(section->mr) &&
+ giommu->iommu_offset == iommu_offset) {
+ break;
+ }
+ }
+ g_assert(giommu);
+ memory_region_iommu_replay(giommu->iommu_mr, &giommu->n);
+}
+
+/*
+ * In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after
+ * succeeding for others, so the latter have lost their vaddr. Call this
+ * to restore vaddr for a section with a RamDiscardManager.
+ *
+ * The ram discard listener already exists. Call its populate function
+ * directly, which calls vfio_legacy_cpr_dma_map.
+ */
+bool vfio_cpr_ram_discard_register_listener(VFIOContainer *bcontainer,
+ MemoryRegionSection *section)
+{
+ VFIORamDiscardListener *vrdl =
+ vfio_find_ram_discard_listener(bcontainer, section);
+
+ g_assert(vrdl);
+ return vrdl->listener.notify_populate(&vrdl->listener, section) == 0;
+}
+
+int vfio_cpr_group_get_device_fd(int d, const char *name)
+{
+ const int id = 0;
+ int fd = cpr_find_fd(name, id);
+
+ if (fd < 0) {
+ fd = ioctl(d, VFIO_GROUP_GET_DEVICE_FD, name);
+ if (fd >= 0) {
+ cpr_save_fd(name, id, fd);
+ }
+ }
+ return fd;
+}
+
+static bool same_device(int fd1, int fd2)
+{
+ struct stat st1, st2;
+
+ return !fstat(fd1, &st1) && !fstat(fd2, &st2) && st1.st_dev == st2.st_dev;
+}
+
+bool vfio_cpr_container_match(VFIOLegacyContainer *container, VFIOGroup *group,
+ int fd)
+{
+ if (container->fd == fd) {
+ return true;
+ }
+ if (!same_device(container->fd, fd)) {
+ return false;
+ }
+ /*
+ * Same device, different fd. This occurs when the container fd is
+ * cpr_save'd multiple times, once for each groupid, so SCM_RIGHTS
+ * produces duplicates. De-dup it.
+ */
+ cpr_delete_fd("vfio_container_for_group", group->groupid);
+ close(fd);
+ cpr_save_fd("vfio_container_for_group", group->groupid, container->fd);
+ return true;
+}
diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c
index 3214184..db462aa 100644
--- a/hw/vfio/cpr.c
+++ b/hw/vfio/cpr.c
@@ -7,13 +7,16 @@
#include "qemu/osdep.h"
#include "hw/vfio/vfio-device.h"
-#include "migration/misc.h"
+#include "hw/vfio/vfio-cpr.h"
+#include "hw/vfio/pci.h"
+#include "hw/pci/msix.h"
+#include "hw/pci/msi.h"
+#include "migration/cpr.h"
#include "qapi/error.h"
#include "system/runstate.h"
-#include "vfio-cpr.h"
-static int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier,
- MigrationEvent *e, Error **errp)
+int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier,
+ MigrationEvent *e, Error **errp)
{
if (e->type == MIG_EVENT_PRECOPY_SETUP &&
!runstate_check(RUN_STATE_SUSPENDED) && !vm_get_suspended()) {
@@ -26,15 +29,266 @@ static int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier,
return 0;
}
-bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp)
+#define STRDUP_VECTOR_FD_NAME(vdev, name) \
+ g_strdup_printf("%s_%s", (vdev)->vbasedev.name, (name))
+
+void vfio_cpr_save_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr,
+ int fd)
+{
+ g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
+ cpr_save_fd(fdname, nr, fd);
+}
+
+int vfio_cpr_load_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr)
+{
+ g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
+ return cpr_find_fd(fdname, nr);
+}
+
+void vfio_cpr_delete_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr)
+{
+ g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
+ cpr_delete_fd(fdname, nr);
+}
+
+static void vfio_cpr_claim_vectors(VFIOPCIDevice *vdev, int nr_vectors,
+ bool msix)
+{
+ int i, fd;
+ bool pending = false;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+
+ vdev->nr_vectors = nr_vectors;
+ vdev->msi_vectors = g_new0(VFIOMSIVector, nr_vectors);
+ vdev->interrupt = msix ? VFIO_INT_MSIX : VFIO_INT_MSI;
+
+ vfio_pci_prepare_kvm_msi_virq_batch(vdev);
+
+ for (i = 0; i < nr_vectors; i++) {
+ VFIOMSIVector *vector = &vdev->msi_vectors[i];
+
+ fd = vfio_cpr_load_vector_fd(vdev, "interrupt", i);
+ if (fd >= 0) {
+ vfio_pci_vector_init(vdev, i);
+ vfio_pci_msi_set_handler(vdev, i, true);
+ }
+
+ if (vfio_cpr_load_vector_fd(vdev, "kvm_interrupt", i) >= 0) {
+ vfio_pci_add_kvm_msi_virq(vdev, vector, i, msix);
+ } else {
+ vdev->msi_vectors[i].virq = -1;
+ }
+
+ if (msix && msix_is_pending(pdev, i) && msix_is_masked(pdev, i)) {
+ set_bit(i, vdev->msix->pending);
+ pending = true;
+ }
+ }
+
+ vfio_pci_commit_kvm_msi_virq_batch(vdev);
+
+ if (msix) {
+ memory_region_set_enabled(&pdev->msix_pba_mmio, pending);
+ }
+}
+
+/*
+ * The kernel may change non-emulated config bits. Exclude them from the
+ * changed-bits check in get_pci_config_device.
+ */
+static int vfio_cpr_pci_pre_load(void *opaque)
+{
+ VFIOPCIDevice *vdev = opaque;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+ int size = MIN(pci_config_size(pdev), vdev->config_size);
+ int i;
+
+ for (i = 0; i < size; i++) {
+ pdev->cmask[i] &= vdev->emulated_config_bits[i];
+ }
+
+ return 0;
+}
+
+static int vfio_cpr_pci_post_load(void *opaque, int version_id)
+{
+ VFIOPCIDevice *vdev = opaque;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+ int nr_vectors;
+
+ vfio_sub_page_bar_update_mappings(vdev);
+
+ if (msix_enabled(pdev)) {
+ vfio_pci_msix_set_notifiers(vdev);
+ nr_vectors = vdev->msix->entries;
+ vfio_cpr_claim_vectors(vdev, nr_vectors, true);
+
+ } else if (msi_enabled(pdev)) {
+ nr_vectors = msi_nr_vectors_allocated(pdev);
+ vfio_cpr_claim_vectors(vdev, nr_vectors, false);
+
+ } else if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) {
+ Error *local_err = NULL;
+ if (!vfio_pci_intx_enable(vdev, &local_err)) {
+ error_report_err(local_err);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static bool pci_msix_present(void *opaque, int version_id)
+{
+ PCIDevice *pdev = opaque;
+
+ return msix_present(pdev);
+}
+
+static const VMStateDescription vfio_intx_vmstate = {
+ .name = "vfio-cpr-intx",
+ .version_id = 0,
+ .minimum_version_id = 0,
+ .fields = (VMStateField[]) {
+ VMSTATE_BOOL(pending, VFIOINTx),
+ VMSTATE_UINT32(route.mode, VFIOINTx),
+ VMSTATE_INT32(route.irq, VFIOINTx),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+#define VMSTATE_VFIO_INTX(_field, _state) { \
+ .name = (stringify(_field)), \
+ .size = sizeof(VFIOINTx), \
+ .vmsd = &vfio_intx_vmstate, \
+ .flags = VMS_STRUCT, \
+ .offset = vmstate_offset_value(_state, _field, VFIOINTx), \
+}
+
+const VMStateDescription vfio_cpr_pci_vmstate = {
+ .name = "vfio-cpr-pci",
+ .version_id = 0,
+ .minimum_version_id = 0,
+ .pre_load = vfio_cpr_pci_pre_load,
+ .post_load = vfio_cpr_pci_post_load,
+ .needed = cpr_incoming_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_PCI_DEVICE(parent_obj, VFIOPCIDevice),
+ VMSTATE_MSIX_TEST(parent_obj, VFIOPCIDevice, pci_msix_present),
+ VMSTATE_VFIO_INTX(intx, VFIOPCIDevice),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static NotifierWithReturn kvm_close_notifier;
+
+static int vfio_cpr_kvm_close_notifier(NotifierWithReturn *notifier,
+ MigrationEvent *e,
+ Error **errp)
+{
+ if (e->type == MIG_EVENT_PRECOPY_DONE) {
+ vfio_kvm_device_close();
+ }
+ return 0;
+}
+
+void vfio_cpr_add_kvm_notifier(void)
+{
+ if (!kvm_close_notifier.notify) {
+ migration_add_notifier_modes(&kvm_close_notifier,
+ vfio_cpr_kvm_close_notifier,
+ MIG_MODE_CPR_TRANSFER, MIG_MODE_CPR_EXEC,
+ -1);
+ }
+}
+
+static int set_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
+ EventNotifier *rn, int virq, bool enable)
+{
+ if (enable) {
+ return kvm_irqchip_add_irqfd_notifier_gsi(s, n, rn, virq);
+ } else {
+ return kvm_irqchip_remove_irqfd_notifier_gsi(s, n, virq);
+ }
+}
+
+static int vfio_cpr_set_msi_virq(VFIOPCIDevice *vdev, Error **errp, bool enable)
+{
+ const char *op = (enable ? "enable" : "disable");
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+ int i, nr_vectors, ret = 0;
+
+ if (msix_enabled(pdev)) {
+ nr_vectors = vdev->msix->entries;
+
+ } else if (msi_enabled(pdev)) {
+ nr_vectors = msi_nr_vectors_allocated(pdev);
+
+ } else if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) {
+ ret = set_irqfd_notifier_gsi(kvm_state, &vdev->intx.interrupt,
+ &vdev->intx.unmask, vdev->intx.route.irq,
+ enable);
+ if (ret) {
+ error_setg_errno(errp, -ret, "failed to %s INTx irq %d",
+ op, vdev->intx.route.irq);
+ return ret;
+ }
+ vfio_pci_intx_set_handler(vdev, enable);
+ return ret;
+
+ } else {
+ return 0;
+ }
+
+ for (i = 0; i < nr_vectors; i++) {
+ VFIOMSIVector *vector = &vdev->msi_vectors[i];
+ if (vector->use) {
+ ret = set_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt,
+ NULL, vector->virq, enable);
+ if (ret) {
+ error_setg_errno(errp, -ret,
+ "failed to %s msi vector %d virq %d",
+ op, i, vector->virq);
+ return ret;
+ }
+ vfio_pci_msi_set_handler(vdev, i, enable);
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * When CPR starts, detach IRQs from the VFIO device so future interrupts
+ * are posted to kvm_interrupt, which is preserved in new QEMU. Interrupts
+ * that were already posted to the old KVM instance, but not delivered to the
+ * VCPU, are recovered via KVM_GET_LAPIC and pushed to the new KVM instance
+ * in new QEMU.
+ *
+ * If CPR fails, reattach the IRQs.
+ */
+static int vfio_cpr_pci_notifier(NotifierWithReturn *notifier,
+ MigrationEvent *e, Error **errp)
+{
+ VFIOPCIDevice *vdev =
+ container_of(notifier, VFIOPCIDevice, cpr.transfer_notifier);
+
+ if (e->type == MIG_EVENT_PRECOPY_SETUP) {
+ return vfio_cpr_set_msi_virq(vdev, errp, false);
+ } else if (e->type == MIG_EVENT_PRECOPY_FAILED) {
+ return vfio_cpr_set_msi_virq(vdev, errp, true);
+ }
+ return 0;
+}
+
+void vfio_cpr_pci_register_device(VFIOPCIDevice *vdev)
{
- migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier,
- vfio_cpr_reboot_notifier,
- MIG_MODE_CPR_REBOOT);
- return true;
+ migration_add_notifier_modes(&vdev->cpr.transfer_notifier,
+ vfio_cpr_pci_notifier,
+ MIG_MODE_CPR_TRANSFER, MIG_MODE_CPR_EXEC, -1);
}
-void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer)
+void vfio_cpr_pci_unregister_device(VFIOPCIDevice *vdev)
{
- migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
+ migration_remove_notifier(&vdev->cpr.transfer_notifier);
}
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index 9fba2c7..64f8750 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -28,6 +28,8 @@
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/units.h"
+#include "migration/cpr.h"
+#include "migration/blocker.h"
#include "monitor/monitor.h"
#include "vfio-helpers.h"
@@ -127,7 +129,7 @@ static inline const char *action_to_str(int action)
static const char *index_to_str(VFIODevice *vbasedev, int index)
{
- if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
+ if (!vfio_pci_from_vfio_device(vbasedev)) {
return NULL;
}
@@ -200,6 +202,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
struct vfio_region_info **info)
{
size_t argsz = sizeof(struct vfio_region_info);
+ int fd = -1;
int ret;
/* check cache */
@@ -214,7 +217,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
retry:
(*info)->argsz = argsz;
- ret = vbasedev->io_ops->get_region_info(vbasedev, *info);
+ ret = vbasedev->io_ops->get_region_info(vbasedev, *info, &fd);
if (ret != 0) {
g_free(*info);
*info = NULL;
@@ -225,15 +228,30 @@ retry:
argsz = (*info)->argsz;
*info = g_realloc(*info, argsz);
+ if (fd != -1) {
+ close(fd);
+ fd = -1;
+ }
+
goto retry;
}
/* fill cache */
vbasedev->reginfo[index] = *info;
+ if (vbasedev->region_fds != NULL) {
+ vbasedev->region_fds[index] = fd;
+ }
return 0;
}
+int vfio_device_get_region_fd(VFIODevice *vbasedev, int index)
+{
+ return vbasedev->region_fds ?
+ vbasedev->region_fds[index] :
+ vbasedev->fd;
+}
+
int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
uint32_t subtype, struct vfio_region_info **info)
{
@@ -300,28 +318,40 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
error_setg(errp, "Use FD passing only with iommufd backend");
return false;
}
- /*
- * Give a name with fd so any function printing out vbasedev->name
- * will not break.
- */
if (!vbasedev->name) {
- vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
+
+ if (vbasedev->dev->id) {
+ vbasedev->name = g_strdup(vbasedev->dev->id);
+ return true;
+ } else {
+ /*
+ * Assign a name so any function printing it will not break.
+ * The fd number changes across processes, so this cannot be
+ * used as an invariant name for CPR.
+ */
+ vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
+ error_setg(&vbasedev->cpr.id_blocker,
+ "vfio device with fd=%d needs an id property",
+ vbasedev->fd);
+ return migrate_add_blocker_modes(&vbasedev->cpr.id_blocker,
+ errp, MIG_MODE_CPR_TRANSFER,
+ -1) == 0;
+ }
}
}
return true;
}
-void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
+void vfio_device_free_name(VFIODevice *vbasedev)
{
- ERRP_GUARD();
- int fd = monitor_fd_param(monitor_cur(), str, errp);
+ g_clear_pointer(&vbasedev->name, g_free);
+ migrate_del_blocker(&vbasedev->cpr.id_blocker);
+}
- if (fd < 0) {
- error_prepend(errp, "Could not parse remote object fd %s:", str);
- return;
- }
- vbasedev->fd = fd;
+void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
+{
+ vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, errp);
}
static VFIODeviceIOOps vfio_device_io_ops_ioctl;
@@ -334,6 +364,7 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
vbasedev->io_ops = &vfio_device_io_ops_ioctl;
vbasedev->dev = dev;
vbasedev->fd = -1;
+ vbasedev->use_region_fds = false;
vbasedev->ram_block_discard_allowed = ram_discard;
}
@@ -392,7 +423,7 @@ bool vfio_device_hiod_create_and_realize(VFIODevice *vbasedev,
VFIODevice *vfio_get_vfio_device(Object *obj)
{
if (object_dynamic_cast(obj, TYPE_VFIO_PCI)) {
- return &VFIO_PCI_BASE(obj)->vbasedev;
+ return &VFIO_PCI_DEVICE(obj)->vbasedev;
} else {
return NULL;
}
@@ -429,9 +460,11 @@ void vfio_device_detach(VFIODevice *vbasedev)
VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer)->detach_device(vbasedev);
}
-void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
+void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainer *bcontainer,
struct vfio_device_info *info)
{
+ int i;
+
vbasedev->num_irqs = info->num_irqs;
vbasedev->num_regions = info->num_regions;
vbasedev->flags = info->flags;
@@ -444,6 +477,12 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
vbasedev->reginfo = g_new0(struct vfio_region_info *,
vbasedev->num_regions);
+ if (vbasedev->use_region_fds) {
+ vbasedev->region_fds = g_new0(int, vbasedev->num_regions);
+ for (i = 0; i < vbasedev->num_regions; i++) {
+ vbasedev->region_fds[i] = -1;
+ }
+ }
}
void vfio_device_unprepare(VFIODevice *vbasedev)
@@ -452,9 +491,13 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
for (i = 0; i < vbasedev->num_regions; i++) {
g_free(vbasedev->reginfo[i]);
+ if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) {
+ close(vbasedev->region_fds[i]);
+ }
}
- g_free(vbasedev->reginfo);
- vbasedev->reginfo = NULL;
+
+ g_clear_pointer(&vbasedev->reginfo, g_free);
+ g_clear_pointer(&vbasedev->region_fds, g_free);
QLIST_REMOVE(vbasedev, container_next);
QLIST_REMOVE(vbasedev, global_next);
@@ -476,10 +519,13 @@ static int vfio_device_io_device_feature(VFIODevice *vbasedev,
}
static int vfio_device_io_get_region_info(VFIODevice *vbasedev,
- struct vfio_region_info *info)
+ struct vfio_region_info *info,
+ int *fd)
{
int ret;
+ *fd = -1;
+
ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info);
return ret < 0 ? -errno : ret;
@@ -522,7 +568,8 @@ static int vfio_device_io_region_read(VFIODevice *vbasedev, uint8_t index,
}
static int vfio_device_io_region_write(VFIODevice *vbasedev, uint8_t index,
- off_t off, uint32_t size, void *data)
+ off_t off, uint32_t size, void *data,
+ bool post)
{
struct vfio_region_info *info;
int ret;
diff --git a/hw/vfio/display.c b/hw/vfio/display.c
index 9c6f5aa..faacd90 100644
--- a/hw/vfio/display.c
+++ b/hw/vfio/display.c
@@ -365,7 +365,7 @@ static bool vfio_display_dmabuf_init(VFIOPCIDevice *vdev, Error **errp)
&vfio_display_dmabuf_ops,
vdev);
if (vdev->enable_ramfb) {
- vdev->dpy->ramfb = ramfb_setup(errp);
+ vdev->dpy->ramfb = ramfb_setup(vdev->use_legacy_x86_rom, errp);
if (!vdev->dpy->ramfb) {
return false;
}
@@ -494,7 +494,7 @@ static bool vfio_display_region_init(VFIOPCIDevice *vdev, Error **errp)
&vfio_display_region_ops,
vdev);
if (vdev->enable_ramfb) {
- vdev->dpy->ramfb = ramfb_setup(errp);
+ vdev->dpy->ramfb = ramfb_setup(vdev->use_legacy_x86_rom, errp);
if (!vdev->dpy->ramfb) {
return false;
}
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index d0dbab1..23d13e5 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -117,6 +117,17 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
int vfio_kvm_device_fd = -1;
#endif
+void vfio_kvm_device_close(void)
+{
+#ifdef CONFIG_KVM
+ kvm_close();
+ if (vfio_kvm_device_fd != -1) {
+ close(vfio_kvm_device_fd);
+ vfio_kvm_device_fd = -1;
+ }
+#endif
+}
+
int vfio_kvm_device_add_fd(int fd, Error **errp)
{
#ifdef CONFIG_KVM
@@ -198,3 +209,20 @@ retry:
return info;
}
+
+bool vfio_arch_wants_loading_config_after_iter(void)
+{
+ /*
+ * Starting the config load only after all iterables were loaded (during
+ * non-iterables loading phase) is required for ARM64 due to this platform
+ * VFIO dependency on interrupt controller being loaded first.
+ *
+ * See commit d329f5032e17 ("vfio: Move the saving of the config space to
+ * the right place in VFIO migration").
+ */
+#if defined(TARGET_ARM)
+ return true;
+#else
+ return false;
+#endif
+}
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
index e7952d1..4bfa2e0 100644
--- a/hw/vfio/igd.c
+++ b/hw/vfio/igd.c
@@ -113,6 +113,7 @@ static int igd_gen(VFIOPCIDevice *vdev)
#define IGD_BDSM 0x5c /* Base Data of Stolen Memory */
#define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */
+#define IGD_GMCH_VGA_DISABLE BIT(1)
#define IGD_GMCH_GEN6_GMS_SHIFT 3 /* SNB_GMCH in i915 */
#define IGD_GMCH_GEN6_GMS_MASK 0x1f
#define IGD_GMCH_GEN8_GMS_SHIFT 8 /* BDW_GMCH in i915 */
@@ -187,23 +188,21 @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
}
static bool vfio_pci_igd_opregion_detect(VFIOPCIDevice *vdev,
- struct vfio_region_info **opregion,
- Error **errp)
+ struct vfio_region_info **opregion)
{
int ret;
- /* Hotplugging is not supported for opregion access */
- if (vdev->pdev.qdev.hotplugged) {
- error_setg(errp, "IGD OpRegion is not supported on hotplugged device");
- return false;
- }
-
ret = vfio_device_get_region_info_type(&vdev->vbasedev,
VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, opregion);
if (ret) {
- error_setg_errno(errp, -ret,
- "Device does not supports IGD OpRegion feature");
+ return false;
+ }
+
+ /* Hotplugging is not supported for opregion access */
+ if (DEVICE(vdev)->hotplugged) {
+ warn_report("IGD device detected, but OpRegion is not supported "
+ "on hotplugged device.");
return false;
}
@@ -261,11 +260,12 @@ static int vfio_pci_igd_copy(VFIOPCIDevice *vdev, PCIDevice *pdev,
static int vfio_pci_igd_host_init(VFIOPCIDevice *vdev,
struct vfio_region_info *info)
{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
PCIBus *bus;
PCIDevice *host_bridge;
int ret;
- bus = pci_device_root_bus(&vdev->pdev);
+ bus = pci_device_root_bus(pdev);
host_bridge = pci_find_device(bus, 0, PCI_DEVFN(0, 0));
if (!host_bridge) {
@@ -328,13 +328,14 @@ type_init(vfio_pci_igd_register_types)
static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev,
struct vfio_region_info *info)
{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
PCIDevice *lpc_bridge;
int ret;
- lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev),
+ lpc_bridge = pci_find_device(pci_device_root_bus(pdev),
0, PCI_DEVFN(0x1f, 0));
if (!lpc_bridge) {
- lpc_bridge = pci_create_simple(pci_device_root_bus(&vdev->pdev),
+ lpc_bridge = pci_create_simple(pci_device_root_bus(pdev),
PCI_DEVFN(0x1f, 0), "vfio-pci-igd-lpc-bridge");
}
@@ -351,13 +352,14 @@ static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp)
{
struct vfio_region_info *host = NULL;
struct vfio_region_info *lpc = NULL;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
PCIDevice *lpc_bridge;
int ret;
/*
* Copying IDs or creating new devices are not supported on hotplug
*/
- if (vdev->pdev.qdev.hotplugged) {
+ if (DEVICE(vdev)->hotplugged) {
error_setg(errp, "IGD LPC is not supported on hotplugged device");
return false;
}
@@ -367,7 +369,7 @@ static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp)
* can stuff host values into, so if there's already one there and it's not
* one we can hack on, this quirk is no-go. Sorry Q35.
*/
- lpc_bridge = pci_find_device(pci_device_root_bus(&vdev->pdev),
+ lpc_bridge = pci_find_device(pci_device_root_bus(pdev),
0, PCI_DEVFN(0x1f, 0));
if (lpc_bridge && !object_dynamic_cast(OBJECT(lpc_bridge),
"vfio-pci-igd-lpc-bridge")) {
@@ -461,7 +463,7 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
int gen;
if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
- !vfio_is_vga(vdev) || nr != 0) {
+ !vfio_is_base_display(vdev) || nr != 0) {
return;
}
@@ -511,6 +513,7 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
{
struct vfio_region_info *opregion = NULL;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
int ret, gen;
uint64_t gms_size = 0;
uint64_t *bdsm_size;
@@ -519,30 +522,33 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
Error *err = NULL;
if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
- !vfio_is_vga(vdev)) {
+ !vfio_is_base_display(vdev)) {
return true;
}
/* IGD device always comes with OpRegion */
- if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
+ if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
return true;
}
info_report("OpRegion detected on Intel display %x.", vdev->device_id);
gen = igd_gen(vdev);
- gmch = vfio_pci_read_config(&vdev->pdev, IGD_GMCH, 4);
+ gmch = vfio_pci_read_config(pdev, IGD_GMCH, 4);
/*
* For backward compatibility, enable legacy mode when
* - Device geneation is 6 to 9 (including both)
+ * - IGD exposes itself as VGA controller and claims VGA cycles on host
* - Machine type is i440fx (pc_piix)
* - IGD device is at guest BDF 00:02.0
* - Not manually disabled by x-igd-legacy-mode=off
*/
if ((vdev->igd_legacy_mode != ON_OFF_AUTO_OFF) &&
+ vfio_is_vga(vdev) &&
(gen >= 6 && gen <= 9) &&
+ !(gmch & IGD_GMCH_VGA_DISABLE) &&
!strcmp(MACHINE_GET_CLASS(qdev_get_machine())->family, "pc_piix") &&
- (&vdev->pdev == pci_find_device(pci_device_root_bus(&vdev->pdev),
+ (pdev == pci_find_device(pci_device_root_bus(pdev),
0, PCI_DEVFN(0x2, 0)))) {
/*
* IGD legacy mode requires:
@@ -564,20 +570,22 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
*/
ret = vfio_device_get_region_info(&vdev->vbasedev,
VFIO_PCI_ROM_REGION_INDEX, &rom);
- if ((ret || !rom->size) && !vdev->pdev.romfile) {
+ if ((ret || !rom->size) && !pdev->romfile) {
error_setg(&err, "Device has no ROM");
goto error;
}
/*
- * If IGD VGA Disable is clear (expected) and VGA is not already
- * enabled, try to enable it. Probably shouldn't be using legacy mode
- * without VGA, but also no point in us enabling VGA if disabled in
- * hardware.
+ * If VGA is not already enabled, try to enable it. We shouldn't be
+ * using legacy mode without VGA.
*/
- if (!(gmch & 0x2) && !vdev->vga && !vfio_populate_vga(vdev, &err)) {
- error_setg(&err, "Unable to enable VGA access");
- goto error;
+ if (!vdev->vga) {
+ if (vfio_populate_vga(vdev, &err)) {
+ vfio_pci_config_register_vga(vdev);
+ } else {
+ error_setg(&err, "Unable to enable VGA access");
+ goto error;
+ }
}
/* Enable OpRegion and LPC bridge quirk */
@@ -607,8 +615,8 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
* ASLS (OpRegion address) is read-only, emulated
* It contains HPA, guest firmware need to reprogram it with GPA.
*/
- pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
- pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
+ pci_set_long(pdev->config + IGD_ASLS, 0);
+ pci_set_long(pdev->wmask + IGD_ASLS, ~0);
pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
/*
@@ -622,8 +630,8 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
}
/* GMCH is read-only, emulated */
- pci_set_long(vdev->pdev.config + IGD_GMCH, gmch);
- pci_set_long(vdev->pdev.wmask + IGD_GMCH, 0);
+ pci_set_long(pdev->config + IGD_GMCH, gmch);
+ pci_set_long(pdev->wmask + IGD_GMCH, 0);
pci_set_long(vdev->emulated_config_bits + IGD_GMCH, ~0);
}
@@ -632,12 +640,12 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
/* BDSM is read-write, emulated. BIOS needs to be able to write it */
if (gen < 11) {
- pci_set_long(vdev->pdev.config + IGD_BDSM, 0);
- pci_set_long(vdev->pdev.wmask + IGD_BDSM, ~0);
+ pci_set_long(pdev->config + IGD_BDSM, 0);
+ pci_set_long(pdev->wmask + IGD_BDSM, ~0);
pci_set_long(vdev->emulated_config_bits + IGD_BDSM, ~0);
} else {
- pci_set_quad(vdev->pdev.config + IGD_BDSM_GEN11, 0);
- pci_set_quad(vdev->pdev.wmask + IGD_BDSM_GEN11, ~0);
+ pci_set_quad(pdev->config + IGD_BDSM_GEN11, 0);
+ pci_set_quad(pdev->wmask + IGD_BDSM_GEN11, ~0);
pci_set_quad(vdev->emulated_config_bits + IGD_BDSM_GEN11, ~0);
}
}
@@ -695,7 +703,7 @@ static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp)
return true;
}
- if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
+ if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
/* Should never reach here, KVMGT always emulates OpRegion */
return false;
}
diff --git a/hw/vfio/iommufd-stubs.c b/hw/vfio/iommufd-stubs.c
new file mode 100644
index 0000000..0be5276
--- /dev/null
+++ b/hw/vfio/iommufd-stubs.c
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2025 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "migration/cpr.h"
+#include "migration/vmstate.h"
+
+const VMStateDescription vmstate_cpr_vfio_devices = {
+ .name = CPR_STATE "/vfio devices",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (const VMStateField[]){
+ VMSTATE_END_OF_LIST()
+ }
+};
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index af1c7ab..68470d5 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -21,35 +21,46 @@
#include "qapi/error.h"
#include "system/iommufd.h"
#include "hw/qdev-core.h"
+#include "hw/vfio/vfio-cpr.h"
#include "system/reset.h"
#include "qemu/cutils.h"
#include "qemu/chardev_open.h"
+#include "migration/cpr.h"
#include "pci.h"
#include "vfio-iommufd.h"
#include "vfio-helpers.h"
-#include "vfio-cpr.h"
#include "vfio-listener.h"
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \
TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio"
-static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
- ram_addr_t size, void *vaddr, bool readonly)
+static int iommufd_cdev_map(const VFIOContainer *bcontainer, hwaddr iova,
+ uint64_t size, void *vaddr, bool readonly,
+ MemoryRegion *mr)
{
- const VFIOIOMMUFDContainer *container =
- container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
+ const VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer);
return iommufd_backend_map_dma(container->be,
container->ioas_id,
iova, size, vaddr, readonly);
}
-static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
+static int iommufd_cdev_map_file(const VFIOContainer *bcontainer,
+ hwaddr iova, uint64_t size,
+ int fd, unsigned long start, bool readonly)
+{
+ const VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer);
+
+ return iommufd_backend_map_file_dma(container->be,
+ container->ioas_id,
+ iova, size, fd, start, readonly);
+}
+
+static int iommufd_cdev_unmap(const VFIOContainer *bcontainer,
+ hwaddr iova, uint64_t size,
IOMMUTLBEntry *iotlb, bool unmap_all)
{
- const VFIOIOMMUFDContainer *container =
- container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
+ const VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer);
/* unmap in halves */
if (unmap_all) {
@@ -108,6 +119,10 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp)
goto err_kvm_device_add;
}
+ if (cpr_is_incoming()) {
+ goto skip_bind;
+ }
+
/* Bind device to iommufd */
bind.iommufd = iommufd->fd;
if (ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind)) {
@@ -119,6 +134,8 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp)
vbasedev->devid = bind.out_devid;
trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name,
vbasedev->fd, vbasedev->devid);
+
+skip_bind:
return true;
err_bind:
iommufd_cdev_kvm_device_del(vbasedev);
@@ -139,11 +156,10 @@ static bool iommufd_hwpt_dirty_tracking(VFIOIOASHwpt *hwpt)
return hwpt && hwpt->hwpt_flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
}
-static int iommufd_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
+static int iommufd_set_dirty_page_tracking(const VFIOContainer *bcontainer,
bool start, Error **errp)
{
- const VFIOIOMMUFDContainer *container =
- container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
+ const VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer);
VFIOIOASHwpt *hwpt;
QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
@@ -170,13 +186,11 @@ err:
return -EINVAL;
}
-static int iommufd_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
+static int iommufd_query_dirty_bitmap(const VFIOContainer *bcontainer,
VFIOBitmap *vbmap, hwaddr iova,
hwaddr size, Error **errp)
{
- VFIOIOMMUFDContainer *container = container_of(bcontainer,
- VFIOIOMMUFDContainer,
- bcontainer);
+ VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer);
unsigned long page_size = qemu_real_host_page_size();
VFIOIOASHwpt *hwpt;
@@ -304,6 +318,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
{
ERRP_GUARD();
IOMMUFDBackend *iommufd = vbasedev->iommufd;
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
uint32_t type, flags = 0;
uint64_t hw_caps;
VFIOIOASHwpt *hwpt;
@@ -312,7 +327,14 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
/* Try to find a domain */
QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
- ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
+ if (!cpr_is_incoming()) {
+ ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
+ } else if (vbasedev->cpr.hwpt_id == hwpt->hwpt_id) {
+ ret = 0;
+ } else {
+ continue;
+ }
+
if (ret) {
/* -EINVAL means the domain is incompatible with the device. */
if (ret == -EINVAL) {
@@ -329,6 +351,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
return false;
} else {
vbasedev->hwpt = hwpt;
+ vbasedev->cpr.hwpt_id = hwpt->hwpt_id;
QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
return true;
@@ -351,6 +374,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
}
+ if (cpr_is_incoming()) {
+ hwpt_id = vbasedev->cpr.hwpt_id;
+ goto skip_alloc;
+ }
+
if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
container->ioas_id, flags,
IOMMU_HWPT_DATA_NONE, 0, NULL,
@@ -358,25 +386,26 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
return false;
}
+ ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp);
+ if (ret) {
+ iommufd_backend_free_id(container->be, hwpt_id);
+ return false;
+ }
+
+skip_alloc:
hwpt = g_malloc0(sizeof(*hwpt));
hwpt->hwpt_id = hwpt_id;
hwpt->hwpt_flags = flags;
QLIST_INIT(&hwpt->device_list);
- ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
- if (ret) {
- iommufd_backend_free_id(container->be, hwpt->hwpt_id);
- g_free(hwpt);
- return false;
- }
-
vbasedev->hwpt = hwpt;
+ vbasedev->cpr.hwpt_id = hwpt->hwpt_id;
vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
- container->bcontainer.dirty_pages_supported |=
+ bcontainer->dirty_pages_supported |=
vbasedev->iommu_dirty_tracking;
- if (container->bcontainer.dirty_pages_supported &&
+ if (bcontainer->dirty_pages_supported &&
!vbasedev->iommu_dirty_tracking) {
warn_report("IOMMU instance for device %s doesn't support dirty tracking",
vbasedev->name);
@@ -408,7 +437,9 @@ static bool iommufd_cdev_attach_container(VFIODevice *vbasedev,
return iommufd_cdev_autodomains_get(vbasedev, container, errp);
}
- return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
+ /* If CPR, we are already attached to ioas_id. */
+ return cpr_is_incoming() ||
+ !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
}
static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
@@ -428,12 +459,12 @@ static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container)
{
- VFIOContainerBase *bcontainer = &container->bcontainer;
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
if (!QLIST_EMPTY(&bcontainer->device_list)) {
return;
}
- vfio_cpr_unregister_container(bcontainer);
+ vfio_iommufd_cpr_unregister_container(container);
vfio_listener_unregister(bcontainer);
iommufd_backend_free_id(container->be, container->ioas_id);
object_unref(container);
@@ -450,7 +481,7 @@ static int iommufd_cdev_ram_block_discard_disable(bool state)
static bool iommufd_cdev_get_info_iova_range(VFIOIOMMUFDContainer *container,
uint32_t ioas_id, Error **errp)
{
- VFIOContainerBase *bcontainer = &container->bcontainer;
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
g_autofree struct iommu_ioas_iova_ranges *info = NULL;
struct iommu_iova_range *iova_ranges;
int sz, fd = container->be->fd;
@@ -492,16 +523,19 @@ error:
static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
AddressSpace *as, Error **errp)
{
- VFIOContainerBase *bcontainer;
+ VFIOContainer *bcontainer;
VFIOIOMMUFDContainer *container;
VFIOAddressSpace *space;
struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
int ret, devfd;
+ bool res;
uint32_t ioas_id;
Error *err = NULL;
const VFIOIOMMUClass *iommufd_vioc =
VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
+ vfio_cpr_load_device(vbasedev);
+
if (vbasedev->fd < 0) {
devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
if (devfd < 0) {
@@ -520,12 +554,21 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
/* try to attach to an existing container in this space */
QLIST_FOREACH(bcontainer, &space->containers, next) {
- container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
+ container = VFIO_IOMMU_IOMMUFD(bcontainer);
if (VFIO_IOMMU_GET_CLASS(bcontainer) != iommufd_vioc ||
vbasedev->iommufd != container->be) {
continue;
}
- if (!iommufd_cdev_attach_container(vbasedev, container, &err)) {
+
+ if (!cpr_is_incoming()) {
+ res = iommufd_cdev_attach_container(vbasedev, container, &err);
+ } else if (vbasedev->cpr.ioas_id == container->ioas_id) {
+ res = true;
+ } else {
+ continue;
+ }
+
+ if (!res) {
const char *msg = error_get_pretty(err);
trace_iommufd_cdev_fail_attach_existing_container(msg);
@@ -542,6 +585,11 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
}
}
+ if (cpr_is_incoming()) {
+ ioas_id = vbasedev->cpr.ioas_id;
+ goto skip_ioas_alloc;
+ }
+
/* Need to allocate a new dedicated container */
if (!iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp)) {
goto err_alloc_ioas;
@@ -549,12 +597,14 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id);
+skip_ioas_alloc:
container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD));
container->be = vbasedev->iommufd;
container->ioas_id = ioas_id;
QLIST_INIT(&container->hwpt_list);
+ vbasedev->cpr.ioas_id = ioas_id;
- bcontainer = &container->bcontainer;
+ bcontainer = VFIO_IOMMU(container);
vfio_address_space_insert(space, bcontainer);
if (!iommufd_cdev_attach_container(vbasedev, container, errp)) {
@@ -579,7 +629,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
goto err_listener_register;
}
- if (!vfio_cpr_register_container(bcontainer, errp)) {
+ if (!vfio_iommufd_cpr_register_container(container, errp)) {
goto err_listener_register;
}
@@ -592,6 +642,10 @@ found_container:
goto err_listener_register;
}
+ /*
+ * Do not move this code before attachment! The nested IOMMU support
+ * needs device and hwpt id which are generated only after attachment.
+ */
if (!vfio_device_hiod_create_and_realize(vbasedev,
TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, errp)) {
goto err_listener_register;
@@ -606,6 +660,7 @@ found_container:
}
vfio_device_prepare(vbasedev, bcontainer, &dev_info);
+ vfio_iommufd_cpr_register_device(vbasedev);
trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs,
vbasedev->num_regions, vbasedev->flags);
@@ -627,11 +682,10 @@ err_connect_bind:
static void iommufd_cdev_detach(VFIODevice *vbasedev)
{
- VFIOContainerBase *bcontainer = vbasedev->bcontainer;
+ VFIOContainer *bcontainer = vbasedev->bcontainer;
VFIOAddressSpace *space = bcontainer->space;
- VFIOIOMMUFDContainer *container = container_of(bcontainer,
- VFIOIOMMUFDContainer,
- bcontainer);
+ VFIOIOMMUFDContainer *container = VFIO_IOMMU_IOMMUFD(bcontainer);
+
vfio_device_unprepare(vbasedev);
if (!vbasedev->ram_block_discard_allowed) {
@@ -643,6 +697,7 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev)
iommufd_cdev_container_destroy(container);
vfio_address_space_put(space);
+ vfio_iommufd_cpr_unregister_device(vbasedev);
iommufd_cdev_unbind_and_disconnect(vbasedev);
close(vbasedev->fd);
}
@@ -676,8 +731,8 @@ iommufd_cdev_dep_get_realized_vpdev(struct vfio_pci_dependent_device *dep_dev,
}
vbasedev_tmp = iommufd_cdev_pci_find_by_devid(dep_dev->devid);
- if (!vbasedev_tmp || !vbasedev_tmp->dev->realized ||
- vbasedev_tmp->type != VFIO_DEVICE_TYPE_PCI) {
+ if (!vfio_pci_from_vfio_device(vbasedev_tmp) ||
+ !vbasedev_tmp->dev->realized) {
return NULL;
}
@@ -802,6 +857,7 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data)
VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
vioc->dma_map = iommufd_cdev_map;
+ vioc->dma_map_file = iommufd_cdev_map_file;
vioc->dma_unmap = iommufd_cdev_unmap;
vioc->attach_device = iommufd_cdev_attach;
vioc->detach_device = iommufd_cdev_detach;
@@ -810,21 +866,38 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data)
vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap;
};
+static bool
+host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ uint32_t hwpt_id, Error **errp)
+{
+ VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
+
+ return !iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp);
+}
+
+static bool
+host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ Error **errp)
+{
+ VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
+
+ return iommufd_cdev_detach_ioas_hwpt(vbasedev, errp);
+}
+
static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
Error **errp)
{
VFIODevice *vdev = opaque;
+ HostIOMMUDeviceIOMMUFD *idev;
HostIOMMUDeviceCaps *caps = &hiod->caps;
+ VendorCaps *vendor_caps = &caps->vendor_caps;
enum iommu_hw_info_type type;
- union {
- struct iommu_hw_info_vtd vtd;
- } data;
uint64_t hw_caps;
hiod->agent = opaque;
- if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid,
- &type, &data, sizeof(data),
+ if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, &type,
+ vendor_caps, sizeof(*vendor_caps),
&hw_caps, errp)) {
return false;
}
@@ -833,6 +906,11 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
caps->type = type;
caps->hw_caps = hw_caps;
+ idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod);
+ idev->iommufd = vdev->iommufd;
+ idev->devid = vdev->devid;
+ idev->hwpt_id = vdev->hwpt->hwpt_id;
+
return true;
}
@@ -858,10 +936,14 @@ hiod_iommufd_vfio_get_page_size_mask(HostIOMMUDevice *hiod)
static void hiod_iommufd_vfio_class_init(ObjectClass *oc, const void *data)
{
HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc);
+ HostIOMMUDeviceIOMMUFDClass *idevc = HOST_IOMMU_DEVICE_IOMMUFD_CLASS(oc);
hiodc->realize = hiod_iommufd_vfio_realize;
hiodc->get_iova_ranges = hiod_iommufd_vfio_get_iova_ranges;
hiodc->get_page_size_mask = hiod_iommufd_vfio_get_page_size_mask;
+
+ idevc->attach_hwpt = host_iommu_device_iommufd_vfio_attach_hwpt;
+ idevc->detach_hwpt = host_iommu_device_iommufd_vfio_detach_hwpt;
};
static const TypeInfo types[] = {
diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c
index bfacb3d..c6bb58f 100644
--- a/hw/vfio/listener.c
+++ b/hw/vfio/listener.c
@@ -25,11 +25,11 @@
#endif
#include <linux/vfio.h>
+#include "exec/target_page.h"
#include "hw/vfio/vfio-device.h"
#include "hw/vfio/pci.h"
#include "system/address-spaces.h"
#include "system/memory.h"
-#include "system/ram_addr.h"
#include "hw/hw.h"
#include "qemu/error-report.h"
#include "qemu/main-loop.h"
@@ -52,7 +52,7 @@
*/
-static bool vfio_log_sync_needed(const VFIOContainerBase *bcontainer)
+static bool vfio_log_sync_needed(const VFIOContainer *bcontainer)
{
VFIODevice *vbasedev;
@@ -90,16 +90,17 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
section->offset_within_address_space & (1ULL << 63);
}
-/* Called with rcu_read_lock held. */
-static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
- ram_addr_t *ram_addr, bool *read_only,
- Error **errp)
+/*
+ * Called with rcu_read_lock held.
+ * The returned MemoryRegion must not be accessed after calling rcu_read_unlock.
+ */
+static MemoryRegion *vfio_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p,
+ Error **errp)
{
- bool ret, mr_has_discard_manager;
+ MemoryRegion *mr;
- ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, read_only,
- &mr_has_discard_manager, errp);
- if (ret && mr_has_discard_manager) {
+ mr = memory_translate_iotlb(iotlb, xlat_p, errp);
+ if (mr && memory_region_has_ram_discard_manager(mr)) {
/*
* Malicious VMs might trigger discarding of IOMMU-mapped memory. The
* pages will remain pinned inside vfio until unmapped, resulting in a
@@ -118,14 +119,16 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
" intended via an IOMMU. It's possible to mitigate "
" by setting/adjusting RLIMIT_MEMLOCK.");
}
- return ret;
+ return mr;
}
static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
{
VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
- VFIOContainerBase *bcontainer = giommu->bcontainer;
+ VFIOContainer *bcontainer = giommu->bcontainer;
hwaddr iova = iotlb->iova + giommu->iommu_offset;
+ MemoryRegion *mr;
+ hwaddr xlat;
void *vaddr;
int ret;
Error *local_err = NULL;
@@ -150,10 +153,14 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
bool read_only;
- if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, &local_err)) {
+ mr = vfio_translate_iotlb(iotlb, &xlat, &local_err);
+ if (!mr) {
error_report_err(local_err);
goto out;
}
+ vaddr = memory_region_get_ram_ptr(mr) + xlat;
+ read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly;
+
/*
* vaddr is only valid until rcu_read_unlock(). But after
* vfio_dma_map has set up the mapping the pages will be
@@ -163,7 +170,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
*/
ret = vfio_container_dma_map(bcontainer, iova,
iotlb->addr_mask + 1, vaddr,
- read_only);
+ read_only, mr);
if (ret) {
error_report("vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx", %p) = %d (%s)",
@@ -195,7 +202,7 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
{
VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
listener);
- VFIOContainerBase *bcontainer = vrdl->bcontainer;
+ VFIOContainer *bcontainer = vrdl->bcontainer;
const hwaddr size = int128_get64(section->size);
const hwaddr iova = section->offset_within_address_space;
int ret;
@@ -213,7 +220,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
{
VFIORamDiscardListener *vrdl = container_of(rdl, VFIORamDiscardListener,
listener);
- VFIOContainerBase *bcontainer = vrdl->bcontainer;
+ VFIOContainer *bcontainer = vrdl->bcontainer;
const hwaddr end = section->offset_within_region +
int128_get64(section->size);
hwaddr start, next, iova;
@@ -233,7 +240,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
vaddr = memory_region_get_ram_ptr(section->mr) + start;
ret = vfio_container_dma_map(bcontainer, iova, next - start,
- vaddr, section->readonly);
+ vaddr, section->readonly, section->mr);
if (ret) {
/* Rollback */
vfio_ram_discard_notify_discard(rdl, section);
@@ -243,8 +250,9 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
return 0;
}
-static void vfio_ram_discard_register_listener(VFIOContainerBase *bcontainer,
- MemoryRegionSection *section)
+static bool vfio_ram_discard_register_listener(VFIOContainer *bcontainer,
+ MemoryRegionSection *section,
+ Error **errp)
{
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
int target_page_size = qemu_target_page_size();
@@ -309,16 +317,18 @@ static void vfio_ram_discard_register_listener(VFIOContainerBase *bcontainer,
if (vrdl_mappings + max_memslots - vrdl_count >
bcontainer->dma_max_mappings) {
- warn_report("%s: possibly running out of DMA mappings. E.g., try"
+ error_setg(errp, "%s: possibly running out of DMA mappings. E.g., try"
" increasing the 'block-size' of virtio-mem devies."
" Maximum possible DMA mappings: %d, Maximum possible"
" memslots: %d", __func__, bcontainer->dma_max_mappings,
max_memslots);
+ return false;
}
}
+ return true;
}
-static void vfio_ram_discard_unregister_listener(VFIOContainerBase *bcontainer,
+static void vfio_ram_discard_unregister_listener(VFIOContainer *bcontainer,
MemoryRegionSection *section)
{
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
@@ -386,7 +396,7 @@ static bool vfio_listener_valid_section(MemoryRegionSection *section,
return true;
}
-static bool vfio_get_section_iova_range(VFIOContainerBase *bcontainer,
+static bool vfio_get_section_iova_range(VFIOContainer *bcontainer,
MemoryRegionSection *section,
hwaddr *out_iova, hwaddr *out_end,
Int128 *out_llend)
@@ -413,9 +423,9 @@ static bool vfio_get_section_iova_range(VFIOContainerBase *bcontainer,
static void vfio_listener_begin(MemoryListener *listener)
{
- VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
- listener);
- void (*listener_begin)(VFIOContainerBase *bcontainer);
+ VFIOContainer *bcontainer = container_of(listener, VFIOContainer,
+ listener);
+ void (*listener_begin)(VFIOContainer *bcontainer);
listener_begin = VFIO_IOMMU_GET_CLASS(bcontainer)->listener_begin;
@@ -426,11 +436,11 @@ static void vfio_listener_begin(MemoryListener *listener)
static void vfio_listener_commit(MemoryListener *listener)
{
- VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
- listener);
- void (*listener_commit)(VFIOContainerBase *bcontainer);
+ VFIOContainer *bcontainer = container_of(listener, VFIOContainer,
+ listener);
+ void (*listener_commit)(VFIOContainer *bcontainer);
- listener_commit = VFIO_IOMMU_GET_CLASS(bcontainer)->listener_begin;
+ listener_commit = VFIO_IOMMU_GET_CLASS(bcontainer)->listener_commit;
if (listener_commit) {
listener_commit(bcontainer);
@@ -443,17 +453,44 @@ static void vfio_device_error_append(VFIODevice *vbasedev, Error **errp)
* MMIO region mapping failures are not fatal but in this case PCI
* peer-to-peer transactions are broken.
*/
- if (vbasedev && vbasedev->type == VFIO_DEVICE_TYPE_PCI) {
+ if (vfio_pci_from_vfio_device(vbasedev)) {
error_append_hint(errp, "%s: PCI peer-to-peer transactions "
"on BARs are not supported.\n", vbasedev->name);
}
}
+VFIORamDiscardListener *vfio_find_ram_discard_listener(
+ VFIOContainer *bcontainer, MemoryRegionSection *section)
+{
+ VFIORamDiscardListener *vrdl = NULL;
+
+ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
+ if (vrdl->mr == section->mr &&
+ vrdl->offset_within_address_space ==
+ section->offset_within_address_space) {
+ break;
+ }
+ }
+
+ if (!vrdl) {
+ hw_error("vfio: Trying to sync missing RAM discard listener");
+ /* does not return */
+ }
+ return vrdl;
+}
+
static void vfio_listener_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
- VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
- listener);
+ VFIOContainer *bcontainer = container_of(listener, VFIOContainer,
+ listener);
+ vfio_container_region_add(bcontainer, section, false);
+}
+
+void vfio_container_region_add(VFIOContainer *bcontainer,
+ MemoryRegionSection *section,
+ bool cpr_remap)
+{
hwaddr iova, end;
Int128 llend, llsize;
void *vaddr;
@@ -489,6 +526,11 @@ static void vfio_listener_region_add(MemoryListener *listener,
int iommu_idx;
trace_vfio_listener_region_add_iommu(section->mr->name, iova, end);
+
+ if (cpr_remap) {
+ vfio_cpr_giommu_remap(bcontainer, section);
+ }
+
/*
* FIXME: For VFIO iommu types which have KVM acceleration to
* avoid bouncing all map/unmaps through qemu this way, this
@@ -531,7 +573,17 @@ static void vfio_listener_region_add(MemoryListener *listener,
* about changes.
*/
if (memory_region_has_ram_discard_manager(section->mr)) {
- vfio_ram_discard_register_listener(bcontainer, section);
+ if (!cpr_remap) {
+ if (!vfio_ram_discard_register_listener(bcontainer, section, &err)) {
+ goto fail;
+ }
+ } else if (!vfio_cpr_ram_discard_register_listener(bcontainer,
+ section)) {
+ error_setg(&err,
+ "vfio_cpr_ram_discard_register_listener for %s failed",
+ memory_region_name(section->mr));
+ goto fail;
+ }
return;
}
@@ -557,7 +609,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
}
ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize),
- vaddr, section->readonly);
+ vaddr, section->readonly, section->mr);
if (ret) {
error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx", %p) = %d (%s)",
@@ -604,8 +656,8 @@ fail:
static void vfio_listener_region_del(MemoryListener *listener,
MemoryRegionSection *section)
{
- VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
- listener);
+ VFIOContainer *bcontainer = container_of(listener, VFIOContainer,
+ listener);
hwaddr iova, end;
Int128 llend, llsize;
int ret;
@@ -692,13 +744,13 @@ typedef struct VFIODirtyRanges {
} VFIODirtyRanges;
typedef struct VFIODirtyRangesListener {
- VFIOContainerBase *bcontainer;
+ VFIOContainer *bcontainer;
VFIODirtyRanges ranges;
MemoryListener listener;
} VFIODirtyRangesListener;
static bool vfio_section_is_vfio_pci(MemoryRegionSection *section,
- VFIOContainerBase *bcontainer)
+ VFIOContainer *bcontainer)
{
VFIOPCIDevice *pcidev;
VFIODevice *vbasedev;
@@ -707,7 +759,7 @@ static bool vfio_section_is_vfio_pci(MemoryRegionSection *section,
owner = memory_region_owner(section->mr);
QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
- if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
+ if (!vfio_pci_from_vfio_device(vbasedev)) {
continue;
}
pcidev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
@@ -783,7 +835,7 @@ static const MemoryListener vfio_dirty_tracking_listener = {
.region_add = vfio_dirty_tracking_update,
};
-static void vfio_dirty_tracking_init(VFIOContainerBase *bcontainer,
+static void vfio_dirty_tracking_init(VFIOContainer *bcontainer,
VFIODirtyRanges *ranges)
{
VFIODirtyRangesListener dirty;
@@ -808,7 +860,7 @@ static void vfio_dirty_tracking_init(VFIOContainerBase *bcontainer,
memory_listener_unregister(&dirty.listener);
}
-static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer)
+static void vfio_devices_dma_logging_stop(VFIOContainer *bcontainer)
{
uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature),
sizeof(uint64_t))] = {};
@@ -837,7 +889,7 @@ static void vfio_devices_dma_logging_stop(VFIOContainerBase *bcontainer)
}
static struct vfio_device_feature *
-vfio_device_feature_dma_logging_start_create(VFIOContainerBase *bcontainer,
+vfio_device_feature_dma_logging_start_create(VFIOContainer *bcontainer,
VFIODirtyRanges *tracking)
{
struct vfio_device_feature *feature;
@@ -910,7 +962,7 @@ static void vfio_device_feature_dma_logging_start_destroy(
g_free(feature);
}
-static bool vfio_devices_dma_logging_start(VFIOContainerBase *bcontainer,
+static bool vfio_devices_dma_logging_start(VFIOContainer *bcontainer,
Error **errp)
{
struct vfio_device_feature *feature;
@@ -954,8 +1006,8 @@ static bool vfio_listener_log_global_start(MemoryListener *listener,
Error **errp)
{
ERRP_GUARD();
- VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
- listener);
+ VFIOContainer *bcontainer = container_of(listener, VFIOContainer,
+ listener);
bool ret;
if (vfio_container_devices_dirty_tracking_is_supported(bcontainer)) {
@@ -972,8 +1024,8 @@ static bool vfio_listener_log_global_start(MemoryListener *listener,
static void vfio_listener_log_global_stop(MemoryListener *listener)
{
- VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
- listener);
+ VFIOContainer *bcontainer = container_of(listener, VFIOContainer,
+ listener);
Error *local_err = NULL;
int ret = 0;
@@ -1005,11 +1057,13 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
vfio_giommu_dirty_notifier *gdn = container_of(n,
vfio_giommu_dirty_notifier, n);
VFIOGuestIOMMU *giommu = gdn->giommu;
- VFIOContainerBase *bcontainer = giommu->bcontainer;
+ VFIOContainer *bcontainer = giommu->bcontainer;
hwaddr iova = iotlb->iova + giommu->iommu_offset;
- ram_addr_t translated_addr;
+ hwaddr translated_addr;
Error *local_err = NULL;
int ret = -EINVAL;
+ MemoryRegion *mr;
+ hwaddr xlat;
trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask);
@@ -1021,9 +1075,11 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
}
rcu_read_lock();
- if (!vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL, &local_err)) {
+ mr = vfio_translate_iotlb(iotlb, &xlat, &local_err);
+ if (!mr) {
goto out_unlock;
}
+ translated_addr = memory_region_get_ram_addr(mr) + xlat;
ret = vfio_container_query_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1,
translated_addr, &local_err);
@@ -1052,8 +1108,8 @@ static int vfio_ram_discard_query_dirty_bitmap(MemoryRegionSection *section,
{
const hwaddr size = int128_get64(section->size);
const hwaddr iova = section->offset_within_address_space;
- const ram_addr_t ram_addr = memory_region_get_ram_addr(section->mr) +
- section->offset_within_region;
+ const hwaddr translated_addr = memory_region_get_ram_addr(section->mr) +
+ section->offset_within_region;
VFIORamDiscardListener *vrdl = opaque;
Error *local_err = NULL;
int ret;
@@ -1062,8 +1118,8 @@ static int vfio_ram_discard_query_dirty_bitmap(MemoryRegionSection *section,
* Sync the whole mapped region (spanning multiple individual mappings)
* in one go.
*/
- ret = vfio_container_query_dirty_bitmap(vrdl->bcontainer, iova, size, ram_addr,
- &local_err);
+ ret = vfio_container_query_dirty_bitmap(vrdl->bcontainer, iova, size,
+ translated_addr, &local_err);
if (ret) {
error_report_err(local_err);
}
@@ -1071,23 +1127,12 @@ static int vfio_ram_discard_query_dirty_bitmap(MemoryRegionSection *section,
}
static int
-vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer,
+vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *bcontainer,
MemoryRegionSection *section)
{
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
- VFIORamDiscardListener *vrdl = NULL;
-
- QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
- if (vrdl->mr == section->mr &&
- vrdl->offset_within_address_space ==
- section->offset_within_address_space) {
- break;
- }
- }
-
- if (!vrdl) {
- hw_error("vfio: Trying to sync missing RAM discard listener");
- }
+ VFIORamDiscardListener *vrdl =
+ vfio_find_ram_discard_listener(bcontainer, section);
/*
* We only want/can synchronize the bitmap for actually mapped parts -
@@ -1098,7 +1143,7 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer,
&vrdl);
}
-static int vfio_sync_iommu_dirty_bitmap(VFIOContainerBase *bcontainer,
+static int vfio_sync_iommu_dirty_bitmap(VFIOContainer *bcontainer,
MemoryRegionSection *section)
{
VFIOGuestIOMMU *giommu;
@@ -1135,10 +1180,10 @@ static int vfio_sync_iommu_dirty_bitmap(VFIOContainerBase *bcontainer,
return 0;
}
-static int vfio_sync_dirty_bitmap(VFIOContainerBase *bcontainer,
+static int vfio_sync_dirty_bitmap(VFIOContainer *bcontainer,
MemoryRegionSection *section, Error **errp)
{
- ram_addr_t ram_addr;
+ hwaddr translated_addr;
if (memory_region_is_iommu(section->mr)) {
return vfio_sync_iommu_dirty_bitmap(bcontainer, section);
@@ -1153,19 +1198,19 @@ static int vfio_sync_dirty_bitmap(VFIOContainerBase *bcontainer,
return ret;
}
- ram_addr = memory_region_get_ram_addr(section->mr) +
- section->offset_within_region;
+ translated_addr = memory_region_get_ram_addr(section->mr) +
+ section->offset_within_region;
return vfio_container_query_dirty_bitmap(bcontainer,
REAL_HOST_PAGE_ALIGN(section->offset_within_address_space),
- int128_get64(section->size), ram_addr, errp);
+ int128_get64(section->size), translated_addr, errp);
}
static void vfio_listener_log_sync(MemoryListener *listener,
MemoryRegionSection *section)
{
- VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
- listener);
+ VFIOContainer *bcontainer = container_of(listener, VFIOContainer,
+ listener);
int ret;
Error *local_err = NULL;
@@ -1196,7 +1241,7 @@ static const MemoryListener vfio_memory_listener = {
.log_sync = vfio_listener_log_sync,
};
-bool vfio_listener_register(VFIOContainerBase *bcontainer, Error **errp)
+bool vfio_listener_register(VFIOContainer *bcontainer, Error **errp)
{
bcontainer->listener = vfio_memory_listener;
memory_listener_register(&bcontainer->listener, bcontainer->space->as);
@@ -1210,7 +1255,7 @@ bool vfio_listener_register(VFIOContainerBase *bcontainer, Error **errp)
return true;
}
-void vfio_listener_unregister(VFIOContainerBase *bcontainer)
+void vfio_listener_unregister(VFIOContainer *bcontainer)
{
memory_listener_unregister(&bcontainer->listener);
}
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
index bccb050..82f6869 100644
--- a/hw/vfio/meson.build
+++ b/hw/vfio/meson.build
@@ -1,8 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
vfio_ss = ss.source_set()
vfio_ss.add(files(
'listener.c',
- 'container-base.c',
'container.c',
+ 'container-legacy.c',
'helpers.c',
))
vfio_ss.add(when: 'CONFIG_PSERIES', if_true: files('spapr.c'))
@@ -11,16 +13,14 @@ vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
'pci.c',
))
vfio_ss.add(when: 'CONFIG_VFIO_CCW', if_true: files('ccw.c'))
-vfio_ss.add(when: 'CONFIG_VFIO_PLATFORM', if_true: files('platform.c'))
vfio_ss.add(when: 'CONFIG_VFIO_AP', if_true: files('ap.c'))
vfio_ss.add(when: 'CONFIG_VFIO_IGD', if_true: files('igd.c'))
specific_ss.add_all(when: 'CONFIG_VFIO', if_true: vfio_ss)
-system_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c'))
-system_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c'))
system_ss.add(when: 'CONFIG_VFIO', if_true: files(
'cpr.c',
+ 'cpr-legacy.c',
'device.c',
'migration.c',
'migration-multifd.c',
@@ -28,7 +28,9 @@ system_ss.add(when: 'CONFIG_VFIO', if_true: files(
))
system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files(
'iommufd.c',
+ 'cpr-iommufd.c',
))
+system_ss.add(when: 'CONFIG_IOMMUFD', if_false: files('iommufd-stubs.c'))
system_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
'display.c',
))
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
index 850a319..e478503 100644
--- a/hw/vfio/migration-multifd.c
+++ b/hw/vfio/migration-multifd.c
@@ -13,7 +13,6 @@
#include "hw/vfio/vfio-device.h"
#include "migration/misc.h"
#include "qapi/error.h"
-#include "qemu/bswap.h"
#include "qemu/error-report.h"
#include "qemu/lockable.h"
#include "qemu/main-loop.h"
@@ -23,6 +22,7 @@
#include "migration-multifd.h"
#include "vfio-migration-internal.h"
#include "trace.h"
+#include "vfio-helpers.h"
#define VFIO_DEVICE_STATE_CONFIG_STATE (1)
@@ -35,6 +35,18 @@ typedef struct VFIODeviceStatePacket {
uint8_t data[0];
} QEMU_PACKED VFIODeviceStatePacket;
+bool vfio_load_config_after_iter(VFIODevice *vbasedev)
+{
+ if (vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_ON) {
+ return true;
+ } else if (vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_OFF) {
+ return false;
+ }
+
+ assert(vbasedev->migration_load_config_after_iter == ON_OFF_AUTO_AUTO);
+ return vfio_arch_wants_loading_config_after_iter();
+}
+
/* type safety */
typedef struct VFIOStateBuffers {
GArray *array;
@@ -50,12 +62,16 @@ typedef struct VFIOMultifd {
bool load_bufs_thread_running;
bool load_bufs_thread_want_exit;
+ bool load_bufs_iter_done;
+ QemuCond load_bufs_iter_done_cond;
+
VFIOStateBuffers load_bufs;
QemuCond load_bufs_buffer_ready_cond;
QemuCond load_bufs_thread_finished_cond;
QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */
uint32_t load_buf_idx;
uint32_t load_buf_idx_last;
+ size_t load_buf_queued_pending_buffers_size;
} VFIOMultifd;
static void vfio_state_buffer_clear(gpointer data)
@@ -112,6 +128,7 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev,
VFIOMigration *migration = vbasedev->migration;
VFIOMultifd *multifd = migration->multifd;
VFIOStateBuffer *lb;
+ size_t data_size = packet_total_size - sizeof(*packet);
vfio_state_buffers_assert_init(&multifd->load_bufs);
if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) {
@@ -127,8 +144,19 @@ static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev,
assert(packet->idx >= multifd->load_buf_idx);
- lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet));
- lb->len = packet_total_size - sizeof(*packet);
+ multifd->load_buf_queued_pending_buffers_size += data_size;
+ if (multifd->load_buf_queued_pending_buffers_size >
+ vbasedev->migration_max_queued_buffers_size) {
+ error_setg(errp,
+ "%s: queuing state buffer %" PRIu32
+ " would exceed the size max of %" PRIu64,
+ vbasedev->name, packet->idx,
+ vbasedev->migration_max_queued_buffers_size);
+ return false;
+ }
+
+ lb->data = g_memdup2(&packet->data, data_size);
+ lb->len = data_size;
lb->is_present = true;
return true;
@@ -312,6 +340,9 @@ static bool vfio_load_state_buffer_write(VFIODevice *vbasedev,
assert(wr_ret <= buf_len);
buf_len -= wr_ret;
buf_cur += wr_ret;
+
+ assert(multifd->load_buf_queued_pending_buffers_size >= wr_ret);
+ multifd->load_buf_queued_pending_buffers_size -= wr_ret;
}
trace_vfio_load_state_device_buffer_load_end(vbasedev->name,
@@ -394,6 +425,22 @@ static bool vfio_load_bufs_thread(void *opaque, bool *should_quit, Error **errp)
multifd->load_buf_idx++;
}
+ if (vfio_load_config_after_iter(vbasedev)) {
+ while (!multifd->load_bufs_iter_done) {
+ qemu_cond_wait(&multifd->load_bufs_iter_done_cond,
+ &multifd->load_bufs_mutex);
+
+ /*
+ * Need to re-check cancellation immediately after wait in case
+ * cond was signalled by vfio_load_cleanup_load_bufs_thread().
+ */
+ if (vfio_load_bufs_thread_want_exit(multifd, should_quit)) {
+ error_setg(errp, "operation cancelled");
+ goto thread_exit;
+ }
+ }
+ }
+
if (!vfio_load_bufs_thread_load_config(vbasedev, errp)) {
goto thread_exit;
}
@@ -413,6 +460,48 @@ thread_exit:
return ret;
}
+int vfio_load_state_config_load_ready(VFIODevice *vbasedev)
+{
+ VFIOMigration *migration = vbasedev->migration;
+ VFIOMultifd *multifd = migration->multifd;
+ int ret = 0;
+
+ if (!vfio_multifd_transfer_enabled(vbasedev)) {
+ error_report("%s: got DEV_CONFIG_LOAD_READY outside multifd transfer",
+ vbasedev->name);
+ return -EINVAL;
+ }
+
+ if (!vfio_load_config_after_iter(vbasedev)) {
+ error_report("%s: got DEV_CONFIG_LOAD_READY but was disabled",
+ vbasedev->name);
+ return -EINVAL;
+ }
+
+ assert(multifd);
+
+ /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
+ bql_unlock();
+ WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
+ if (multifd->load_bufs_iter_done) {
+ /* Can't print error here as we're outside BQL */
+ ret = -EINVAL;
+ break;
+ }
+
+ multifd->load_bufs_iter_done = true;
+ qemu_cond_signal(&multifd->load_bufs_iter_done_cond);
+ }
+ bql_lock();
+
+ if (ret) {
+ error_report("%s: duplicate DEV_CONFIG_LOAD_READY",
+ vbasedev->name);
+ }
+
+ return ret;
+}
+
static VFIOMultifd *vfio_multifd_new(void)
{
VFIOMultifd *multifd = g_new(VFIOMultifd, 1);
@@ -423,8 +512,12 @@ static VFIOMultifd *vfio_multifd_new(void)
multifd->load_buf_idx = 0;
multifd->load_buf_idx_last = UINT32_MAX;
+ multifd->load_buf_queued_pending_buffers_size = 0;
qemu_cond_init(&multifd->load_bufs_buffer_ready_cond);
+ multifd->load_bufs_iter_done = false;
+ qemu_cond_init(&multifd->load_bufs_iter_done_cond);
+
multifd->load_bufs_thread_running = false;
multifd->load_bufs_thread_want_exit = false;
qemu_cond_init(&multifd->load_bufs_thread_finished_cond);
@@ -448,6 +541,7 @@ static void vfio_load_cleanup_load_bufs_thread(VFIOMultifd *multifd)
multifd->load_bufs_thread_want_exit = true;
qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond);
+ qemu_cond_signal(&multifd->load_bufs_iter_done_cond);
qemu_cond_wait(&multifd->load_bufs_thread_finished_cond,
&multifd->load_bufs_mutex);
}
@@ -460,6 +554,7 @@ static void vfio_multifd_free(VFIOMultifd *multifd)
vfio_load_cleanup_load_bufs_thread(multifd);
qemu_cond_destroy(&multifd->load_bufs_thread_finished_cond);
+ qemu_cond_destroy(&multifd->load_bufs_iter_done_cond);
vfio_state_buffers_destroy(&multifd->load_bufs);
qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond);
qemu_mutex_destroy(&multifd->load_bufs_mutex);
@@ -583,7 +678,7 @@ vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
/*
* This thread is spawned by the migration core directly via
- * .save_live_complete_precopy_thread SaveVMHandler.
+ * .save_complete_precopy_thread SaveVMHandler.
*
* It exits after either:
* * completing saving the remaining device state and device config, OR:
@@ -592,7 +687,7 @@ vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
* multifd_device_state_save_thread_should_exit() returning true.
*/
bool
-vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
+vfio_multifd_save_complete_precopy_thread(SaveCompletePrecopyThreadData *d,
Error **errp)
{
VFIODevice *vbasedev = d->handler_opaque;
diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
index 0bab632..82d2d3a 100644
--- a/hw/vfio/migration-multifd.h
+++ b/hw/vfio/migration-multifd.h
@@ -20,13 +20,16 @@ void vfio_multifd_cleanup(VFIODevice *vbasedev);
bool vfio_multifd_transfer_supported(void);
bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev);
+bool vfio_load_config_after_iter(VFIODevice *vbasedev);
bool vfio_multifd_load_state_buffer(void *opaque, char *data, size_t data_size,
Error **errp);
+int vfio_load_state_config_load_ready(VFIODevice *vbasedev);
+
void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f);
bool
-vfio_multifd_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
+vfio_multifd_save_complete_precopy_thread(SaveCompletePrecopyThreadData *d,
Error **errp);
int vfio_multifd_switchover_start(VFIODevice *vbasedev);
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index b76697bd..4c06e3d 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -675,7 +675,11 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
int ret;
if (vfio_multifd_transfer_enabled(vbasedev)) {
- vfio_multifd_emit_dummy_eos(vbasedev, f);
+ if (vfio_load_config_after_iter(vbasedev)) {
+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_LOAD_READY);
+ } else {
+ vfio_multifd_emit_dummy_eos(vbasedev, f);
+ }
return;
}
@@ -784,6 +788,10 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
return ret;
}
+ case VFIO_MIG_FLAG_DEV_CONFIG_LOAD_READY:
+ {
+ return vfio_load_state_config_load_ready(vbasedev);
+ }
default:
error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
return -EINVAL;
@@ -824,7 +832,7 @@ static const SaveVMHandlers savevm_vfio_handlers = {
.state_pending_exact = vfio_state_pending_exact,
.is_active_iterate = vfio_is_active_iterate,
.save_live_iterate = vfio_save_iterate,
- .save_live_complete_precopy = vfio_save_complete_precopy,
+ .save_complete = vfio_save_complete_precopy,
.save_state = vfio_save_state,
.load_setup = vfio_load_setup,
.load_cleanup = vfio_load_cleanup,
@@ -835,7 +843,7 @@ static const SaveVMHandlers savevm_vfio_handlers = {
*/
.load_state_buffer = vfio_multifd_load_state_buffer,
.switchover_start = vfio_switchover_start,
- .save_live_complete_precopy_thread = vfio_multifd_save_complete_precopy_thread,
+ .save_complete_precopy_thread = vfio_multifd_save_complete_precopy_thread,
};
/* ---------------------------------------------------------------------- */
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 3f00225..b5da6af 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -113,6 +113,7 @@ static uint64_t vfio_generic_window_quirk_data_read(void *opaque,
{
VFIOConfigWindowQuirk *window = opaque;
VFIOPCIDevice *vdev = window->vdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
uint64_t data;
/* Always read data reg, discard if window enabled */
@@ -120,7 +121,7 @@ static uint64_t vfio_generic_window_quirk_data_read(void *opaque,
addr + window->data_offset, size);
if (window->window_enabled) {
- data = vfio_pci_read_config(&vdev->pdev, window->address_val, size);
+ data = vfio_pci_read_config(pdev, window->address_val, size);
trace_vfio_quirk_generic_window_data_read(vdev->vbasedev.name,
memory_region_name(window->data_mem), data);
}
@@ -133,9 +134,10 @@ static void vfio_generic_window_quirk_data_write(void *opaque, hwaddr addr,
{
VFIOConfigWindowQuirk *window = opaque;
VFIOPCIDevice *vdev = window->vdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
if (window->window_enabled) {
- vfio_pci_write_config(&vdev->pdev, window->address_val, data, size);
+ vfio_pci_write_config(pdev, window->address_val, data, size);
trace_vfio_quirk_generic_window_data_write(vdev->vbasedev.name,
memory_region_name(window->data_mem), data);
return;
@@ -156,6 +158,7 @@ static uint64_t vfio_generic_quirk_mirror_read(void *opaque,
{
VFIOConfigMirrorQuirk *mirror = opaque;
VFIOPCIDevice *vdev = mirror->vdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
uint64_t data;
/* Read and discard in case the hardware cares */
@@ -163,7 +166,7 @@ static uint64_t vfio_generic_quirk_mirror_read(void *opaque,
addr + mirror->offset, size);
addr += mirror->config_offset;
- data = vfio_pci_read_config(&vdev->pdev, addr, size);
+ data = vfio_pci_read_config(pdev, addr, size);
trace_vfio_quirk_generic_mirror_read(vdev->vbasedev.name,
memory_region_name(mirror->mem),
addr, data);
@@ -175,9 +178,10 @@ static void vfio_generic_quirk_mirror_write(void *opaque, hwaddr addr,
{
VFIOConfigMirrorQuirk *mirror = opaque;
VFIOPCIDevice *vdev = mirror->vdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
addr += mirror->config_offset;
- vfio_pci_write_config(&vdev->pdev, addr, data, size);
+ vfio_pci_write_config(pdev, addr, data, size);
trace_vfio_quirk_generic_mirror_write(vdev->vbasedev.name,
memory_region_name(mirror->mem),
addr, data);
@@ -211,7 +215,8 @@ static uint64_t vfio_ati_3c3_quirk_read(void *opaque,
hwaddr addr, unsigned size)
{
VFIOPCIDevice *vdev = opaque;
- uint64_t data = vfio_pci_read_config(&vdev->pdev,
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+ uint64_t data = vfio_pci_read_config(pdev,
PCI_BASE_ADDRESS_4 + 1, size);
trace_vfio_quirk_ati_3c3_read(vdev->vbasedev.name, data);
@@ -563,6 +568,7 @@ static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
{
VFIONvidia3d0Quirk *quirk = opaque;
VFIOPCIDevice *vdev = quirk->vdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
VFIONvidia3d0State old_state = quirk->state;
uint64_t data = vfio_vga_read(&vdev->vga->region[QEMU_PCI_VGA_IO_HI],
addr + 0x10, size);
@@ -573,7 +579,7 @@ static uint64_t vfio_nvidia_3d0_quirk_read(void *opaque,
(quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
- data = vfio_pci_read_config(&vdev->pdev, offset, size);
+ data = vfio_pci_read_config(pdev, offset, size);
trace_vfio_quirk_nvidia_3d0_read(vdev->vbasedev.name,
offset, size, data);
}
@@ -586,6 +592,7 @@ static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
{
VFIONvidia3d0Quirk *quirk = opaque;
VFIOPCIDevice *vdev = quirk->vdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
VFIONvidia3d0State old_state = quirk->state;
quirk->state = NONE;
@@ -599,7 +606,7 @@ static void vfio_nvidia_3d0_quirk_write(void *opaque, hwaddr addr,
if ((quirk->offset & ~(PCI_CONFIG_SPACE_SIZE - 1)) == 0x1800) {
uint8_t offset = quirk->offset & (PCI_CONFIG_SPACE_SIZE - 1);
- vfio_pci_write_config(&vdev->pdev, offset, data, size);
+ vfio_pci_write_config(pdev, offset, data, size);
trace_vfio_quirk_nvidia_3d0_write(vdev->vbasedev.name,
offset, data, size);
return;
@@ -815,7 +822,7 @@ static void vfio_nvidia_quirk_mirror_write(void *opaque, hwaddr addr,
{
VFIOConfigMirrorQuirk *mirror = opaque;
VFIOPCIDevice *vdev = mirror->vdev;
- PCIDevice *pdev = &vdev->pdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
LastDataSet *last = (LastDataSet *)&mirror->data;
vfio_generic_quirk_mirror_write(opaque, addr, data, size);
@@ -1005,6 +1012,7 @@ static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr,
{
VFIOrtl8168Quirk *rtl = opaque;
VFIOPCIDevice *vdev = rtl->vdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
rtl->enabled = false;
@@ -1013,7 +1021,7 @@ static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr,
rtl->addr = (uint32_t)data;
if (data & 0x80000000U) { /* Do write */
- if (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX) {
+ if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
hwaddr offset = data & 0xfff;
uint64_t val = rtl->data;
@@ -1021,7 +1029,7 @@ static void vfio_rtl8168_quirk_address_write(void *opaque, hwaddr addr,
(uint16_t)offset, val);
/* Write to the proper guest MSI-X table instead */
- memory_region_dispatch_write(&vdev->pdev.msix_table_mmio,
+ memory_region_dispatch_write(&pdev->msix_table_mmio,
offset, val,
size_memop(size) | MO_LE,
MEMTXATTRS_UNSPECIFIED);
@@ -1049,11 +1057,12 @@ static uint64_t vfio_rtl8168_quirk_data_read(void *opaque,
{
VFIOrtl8168Quirk *rtl = opaque;
VFIOPCIDevice *vdev = rtl->vdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
uint64_t data = vfio_region_read(&vdev->bars[2].region, addr + 0x70, size);
- if (rtl->enabled && (vdev->pdev.cap_present & QEMU_PCI_CAP_MSIX)) {
+ if (rtl->enabled && (pdev->cap_present & QEMU_PCI_CAP_MSIX)) {
hwaddr offset = rtl->addr & 0xfff;
- memory_region_dispatch_read(&vdev->pdev.msix_table_mmio, offset,
+ memory_region_dispatch_read(&pdev->msix_table_mmio, offset,
&data, size_memop(size) | MO_LE,
MEMTXATTRS_UNSPECIFIED);
trace_vfio_quirk_rtl8168_msix_read(vdev->vbasedev.name, offset, data);
@@ -1150,15 +1159,12 @@ void vfio_vga_quirk_exit(VFIOPCIDevice *vdev)
void vfio_vga_quirk_finalize(VFIOPCIDevice *vdev)
{
- int i, j;
+ int i;
for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
while (!QLIST_EMPTY(&vdev->vga->region[i].quirks)) {
VFIOQuirk *quirk = QLIST_FIRST(&vdev->vga->region[i].quirks);
QLIST_REMOVE(quirk, next);
- for (j = 0; j < quirk->nr_mem; j++) {
- object_unparent(OBJECT(&quirk->mem[j]));
- }
g_free(quirk->mem);
g_free(quirk->data);
g_free(quirk);
@@ -1198,14 +1204,10 @@ void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
void vfio_bar_quirk_finalize(VFIOPCIDevice *vdev, int nr)
{
VFIOBAR *bar = &vdev->bars[nr];
- int i;
while (!QLIST_EMPTY(&bar->quirks)) {
VFIOQuirk *quirk = QLIST_FIRST(&bar->quirks);
QLIST_REMOVE(quirk, next);
- for (i = 0; i < quirk->nr_mem; i++) {
- object_unparent(OBJECT(&quirk->mem[i]));
- }
g_free(quirk->mem);
g_free(quirk->data);
g_free(quirk);
@@ -1297,7 +1299,7 @@ static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev)
static int vfio_radeon_reset(VFIOPCIDevice *vdev)
{
- PCIDevice *pdev = &vdev->pdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
int i, ret = 0;
uint32_t data;
@@ -1454,7 +1456,7 @@ static bool is_valid_std_cap_offset(uint8_t pos)
static bool vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
{
ERRP_GUARD();
- PCIDevice *pdev = &vdev->pdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
int ret, pos;
bool c8_conflict = false, d4_conflict = false;
uint8_t tmp;
@@ -1547,6 +1549,7 @@ static bool vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp)
static bool vfio_add_vmd_shadow_cap(VFIOPCIDevice *vdev, Error **errp)
{
ERRP_GUARD();
+ PCIDevice *pdev = PCI_DEVICE(vdev);
uint8_t membar_phys[16];
int ret, pos = 0xE8;
@@ -1565,7 +1568,7 @@ static bool vfio_add_vmd_shadow_cap(VFIOPCIDevice *vdev, Error **errp)
return false;
}
- ret = pci_add_capability(&vdev->pdev, PCI_CAP_ID_VNDR, pos,
+ ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos,
VMD_SHADOW_CAP_LEN, errp);
if (ret < 0) {
error_prepend(errp, "Failed to add VMD MEMBAR Shadow cap: ");
@@ -1574,10 +1577,10 @@ static bool vfio_add_vmd_shadow_cap(VFIOPCIDevice *vdev, Error **errp)
memset(vdev->emulated_config_bits + pos, 0xFF, VMD_SHADOW_CAP_LEN);
pos += PCI_CAP_FLAGS;
- pci_set_byte(vdev->pdev.config + pos++, VMD_SHADOW_CAP_LEN);
- pci_set_byte(vdev->pdev.config + pos++, VMD_SHADOW_CAP_VER);
- pci_set_long(vdev->pdev.config + pos, 0x53484457); /* SHDW */
- memcpy(vdev->pdev.config + pos + 4, membar_phys, 16);
+ pci_set_byte(pdev->config + pos++, VMD_SHADOW_CAP_LEN);
+ pci_set_byte(pdev->config + pos++, VMD_SHADOW_CAP_VER);
+ pci_set_long(pdev->config + pos, 0x53484457); /* SHDW */
+ memcpy(pdev->config + pos + 4, membar_phys, 16);
return true;
}
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a1bfdfe..06b06af 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -29,7 +29,9 @@
#include "hw/pci/pci_bridge.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"
+#include "hw/vfio/vfio-cpr.h"
#include "migration/vmstate.h"
+#include "migration/cpr.h"
#include "qobject/qdict.h"
#include "qemu/error-report.h"
#include "qemu/main-loop.h"
@@ -47,8 +49,6 @@
#include "vfio-migration-internal.h"
#include "vfio-helpers.h"
-#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
-
/* Protected by BQL */
static KVMRouteChange vfio_route_change;
@@ -56,6 +56,36 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
static void vfio_msi_disable_common(VFIOPCIDevice *vdev);
+/* Create new or reuse existing eventfd */
+static bool vfio_notifier_init(VFIOPCIDevice *vdev, EventNotifier *e,
+ const char *name, int nr, Error **errp)
+{
+ int fd, ret;
+
+ fd = vfio_cpr_load_vector_fd(vdev, name, nr);
+ if (fd >= 0) {
+ event_notifier_init_fd(e, fd);
+ return true;
+ }
+
+ ret = event_notifier_init(e, 0);
+ if (ret) {
+ error_setg_errno(errp, -ret, "vfio_notifier_init %s failed", name);
+ return false;
+ }
+
+ fd = event_notifier_get_fd(e);
+ vfio_cpr_save_vector_fd(vdev, name, nr, fd);
+ return true;
+}
+
+static void vfio_notifier_cleanup(VFIOPCIDevice *vdev, EventNotifier *e,
+ const char *name, int nr)
+{
+ vfio_cpr_delete_vector_fd(vdev, name, nr);
+ event_notifier_cleanup(e);
+}
+
/*
* Disabling BAR mmaping can be slow, but toggling it around INTx can
* also be a huge overhead. We try to get the best of both worlds by
@@ -87,6 +117,7 @@ static void vfio_intx_mmap_enable(void *opaque)
static void vfio_intx_interrupt(void *opaque)
{
VFIOPCIDevice *vdev = opaque;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
if (!event_notifier_test_and_clear(&vdev->intx.interrupt)) {
return;
@@ -95,7 +126,7 @@ static void vfio_intx_interrupt(void *opaque)
trace_vfio_intx_interrupt(vdev->vbasedev.name, 'A' + vdev->intx.pin);
vdev->intx.pending = true;
- pci_irq_assert(&vdev->pdev);
+ pci_irq_assert(pdev);
vfio_mmap_set_enabled(vdev, false);
if (vdev->intx.mmap_timeout) {
timer_mod(vdev->intx.mmap_timer,
@@ -103,24 +134,26 @@ static void vfio_intx_interrupt(void *opaque)
}
}
-static void vfio_intx_eoi(VFIODevice *vbasedev)
+void vfio_pci_intx_eoi(VFIODevice *vbasedev)
{
VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
+ PCIDevice *pdev = PCI_DEVICE(vdev);
if (!vdev->intx.pending) {
return;
}
- trace_vfio_intx_eoi(vbasedev->name);
+ trace_vfio_pci_intx_eoi(vbasedev->name);
vdev->intx.pending = false;
- pci_irq_deassert(&vdev->pdev);
+ pci_irq_deassert(pdev);
vfio_device_irq_unmask(vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
}
static bool vfio_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp)
{
#ifdef CONFIG_KVM
+ PCIDevice *pdev = PCI_DEVICE(vdev);
int irq_fd = event_notifier_get_fd(&vdev->intx.interrupt);
if (vdev->no_kvm_intx || !kvm_irqfds_enabled() ||
@@ -133,11 +166,10 @@ static bool vfio_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp)
qemu_set_fd_handler(irq_fd, NULL, NULL, vdev);
vfio_device_irq_mask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
vdev->intx.pending = false;
- pci_irq_deassert(&vdev->pdev);
+ pci_irq_deassert(pdev);
/* Get an eventfd for resample/unmask */
- if (event_notifier_init(&vdev->intx.unmask, 0)) {
- error_setg(errp, "event_notifier_init failed eoi");
+ if (!vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0, errp)) {
goto fail;
}
@@ -169,7 +201,7 @@ fail_vfio:
kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vdev->intx.interrupt,
vdev->intx.route.irq);
fail_irqfd:
- event_notifier_cleanup(&vdev->intx.unmask);
+ vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0);
fail:
qemu_set_fd_handler(irq_fd, vfio_intx_interrupt, NULL, vdev);
vfio_device_irq_unmask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
@@ -179,9 +211,41 @@ fail:
#endif
}
+static bool vfio_cpr_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp)
+{
+#ifdef CONFIG_KVM
+ if (vdev->no_kvm_intx || !kvm_irqfds_enabled() ||
+ vdev->intx.route.mode != PCI_INTX_ENABLED ||
+ !kvm_resamplefds_enabled()) {
+ return true;
+ }
+
+ if (!vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0, errp)) {
+ return false;
+ }
+
+ if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
+ &vdev->intx.interrupt,
+ &vdev->intx.unmask,
+ vdev->intx.route.irq)) {
+ error_setg_errno(errp, errno, "failed to setup resample irqfd");
+ vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0);
+ return false;
+ }
+
+ vdev->intx.kvm_accel = true;
+ trace_vfio_intx_enable_kvm(vdev->vbasedev.name);
+ return true;
+#else
+ return true;
+#endif
+}
+
static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev)
{
#ifdef CONFIG_KVM
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+
if (!vdev->intx.kvm_accel) {
return;
}
@@ -192,7 +256,7 @@ static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev)
*/
vfio_device_irq_mask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
vdev->intx.pending = false;
- pci_irq_deassert(&vdev->pdev);
+ pci_irq_deassert(pdev);
/* Tell KVM to stop listening for an INTx irqfd */
if (kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vdev->intx.interrupt,
@@ -201,7 +265,7 @@ static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev)
}
/* We only need to close the eventfd for VFIO to cleanup the kernel side */
- event_notifier_cleanup(&vdev->intx.unmask);
+ vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0);
/* QEMU starts listening for interrupt events. */
qemu_set_fd_handler(event_notifier_get_fd(&vdev->intx.interrupt),
@@ -236,19 +300,19 @@ static void vfio_intx_update(VFIOPCIDevice *vdev, PCIINTxRoute *route)
}
/* Re-enable the interrupt in cased we missed an EOI */
- vfio_intx_eoi(&vdev->vbasedev);
+ vfio_pci_intx_eoi(&vdev->vbasedev);
}
static void vfio_intx_routing_notifier(PCIDevice *pdev)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev);
PCIINTxRoute route;
if (vdev->interrupt != VFIO_INT_INTx) {
return;
}
- route = pci_device_route_intx_to_irq(&vdev->pdev, vdev->intx.pin);
+ route = pci_device_route_intx_to_irq(pdev, vdev->intx.pin);
if (pci_intx_route_changed(&vdev->intx.route, &route)) {
vfio_intx_update(vdev, &route);
@@ -265,20 +329,26 @@ static void vfio_irqchip_change(Notifier *notify, void *data)
static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
{
- uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1);
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+ uint8_t pin = vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1);
Error *err = NULL;
int32_t fd;
- int ret;
if (!pin) {
return true;
}
- vfio_disable_interrupts(vdev);
+ /*
+ * Do not alter interrupt state during vfio_realize and cpr load.
+ * The incoming state is cleared thereafter.
+ */
+ if (!cpr_is_incoming()) {
+ vfio_disable_interrupts(vdev);
+ }
vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */
- pci_config_set_interrupt_pin(vdev->pdev.config, pin);
+ pci_config_set_interrupt_pin(pdev->config, pin);
#ifdef CONFIG_KVM
/*
@@ -286,23 +356,30 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
* where we won't actually use the result anyway.
*/
if (kvm_irqfds_enabled() && kvm_resamplefds_enabled()) {
- vdev->intx.route = pci_device_route_intx_to_irq(&vdev->pdev,
+ vdev->intx.route = pci_device_route_intx_to_irq(pdev,
vdev->intx.pin);
}
#endif
- ret = event_notifier_init(&vdev->intx.interrupt, 0);
- if (ret) {
- error_setg_errno(errp, -ret, "event_notifier_init failed");
+ if (!vfio_notifier_init(vdev, &vdev->intx.interrupt, "intx-interrupt", 0,
+ errp)) {
return false;
}
fd = event_notifier_get_fd(&vdev->intx.interrupt);
qemu_set_fd_handler(fd, vfio_intx_interrupt, NULL, vdev);
+
+ if (cpr_is_incoming()) {
+ if (!vfio_cpr_intx_enable_kvm(vdev, &err)) {
+ warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+ }
+ goto skip_signaling;
+ }
+
if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) {
qemu_set_fd_handler(fd, NULL, NULL, vdev);
- event_notifier_cleanup(&vdev->intx.interrupt);
+ vfio_notifier_cleanup(vdev, &vdev->intx.interrupt, "intx-interrupt", 0);
return false;
}
@@ -310,6 +387,7 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
+skip_signaling:
vdev->interrupt = VFIO_INT_INTx;
trace_vfio_intx_enable(vdev->vbasedev.name);
@@ -318,24 +396,38 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
static void vfio_intx_disable(VFIOPCIDevice *vdev)
{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
int fd;
timer_del(vdev->intx.mmap_timer);
vfio_intx_disable_kvm(vdev);
vfio_device_irq_disable(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
vdev->intx.pending = false;
- pci_irq_deassert(&vdev->pdev);
+ pci_irq_deassert(pdev);
vfio_mmap_set_enabled(vdev, true);
fd = event_notifier_get_fd(&vdev->intx.interrupt);
qemu_set_fd_handler(fd, NULL, NULL, vdev);
- event_notifier_cleanup(&vdev->intx.interrupt);
+ vfio_notifier_cleanup(vdev, &vdev->intx.interrupt, "intx-interrupt", 0);
vdev->interrupt = VFIO_INT_NONE;
trace_vfio_intx_disable(vdev->vbasedev.name);
}
+bool vfio_pci_intx_enable(VFIOPCIDevice *vdev, Error **errp)
+{
+ return vfio_intx_enable(vdev, errp);
+}
+
+void vfio_pci_intx_set_handler(VFIOPCIDevice *vdev, bool enable)
+{
+ int fd = event_notifier_get_fd(&vdev->intx.interrupt);
+ IOHandler *handler = (enable ? vfio_intx_interrupt : NULL);
+
+ qemu_set_fd_handler(fd, handler, NULL, vdev);
+}
+
/*
* MSI/X
*/
@@ -343,6 +435,7 @@ static void vfio_msi_interrupt(void *opaque)
{
VFIOMSIVector *vector = opaque;
VFIOPCIDevice *vdev = vector->vdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
MSIMessage (*get_msg)(PCIDevice *dev, unsigned vector);
void (*notify)(PCIDevice *dev, unsigned vector);
MSIMessage msg;
@@ -357,9 +450,9 @@ static void vfio_msi_interrupt(void *opaque)
notify = msix_notify;
/* A masked vector firing needs to use the PBA, enable it */
- if (msix_is_masked(&vdev->pdev, nr)) {
+ if (msix_is_masked(pdev, nr)) {
set_bit(nr, vdev->msix->pending);
- memory_region_set_enabled(&vdev->pdev.msix_pba_mmio, true);
+ memory_region_set_enabled(&pdev->msix_pba_mmio, true);
trace_vfio_msix_pba_enable(vdev->vbasedev.name);
}
} else if (vdev->interrupt == VFIO_INT_MSI) {
@@ -369,9 +462,18 @@ static void vfio_msi_interrupt(void *opaque)
abort();
}
- msg = get_msg(&vdev->pdev, nr);
+ msg = get_msg(pdev, nr);
trace_vfio_msi_interrupt(vdev->vbasedev.name, nr, msg.address, msg.data);
- notify(&vdev->pdev, nr);
+ notify(pdev, nr);
+}
+
+void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr, bool enable)
+{
+ VFIOMSIVector *vector = &vdev->msi_vectors[nr];
+ int fd = event_notifier_get_fd(&vector->interrupt);
+ IOHandler *handler = (enable ? vfio_msi_interrupt : NULL);
+
+ qemu_set_fd_handler(fd, handler, NULL, vector);
}
/*
@@ -401,6 +503,7 @@ static int vfio_enable_msix_no_vec(VFIOPCIDevice *vdev)
static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
struct vfio_irq_set *irq_set;
int ret = 0, i, argsz;
int32_t *fds;
@@ -443,7 +546,7 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
*/
if (vdev->msi_vectors[i].use) {
if (vdev->msi_vectors[i].virq < 0 ||
- (msix && msix_is_masked(&vdev->pdev, i))) {
+ (msix && msix_is_masked(pdev, i))) {
fd = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt);
} else {
fd = event_notifier_get_fd(&vdev->msi_vectors[i].kvm_interrupt);
@@ -460,24 +563,29 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
return ret;
}
-static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
- int vector_n, bool msix)
+void vfio_pci_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
+ int vector_n, bool msix)
{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+
if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) {
return;
}
vector->virq = kvm_irqchip_add_msi_route(&vfio_route_change,
- vector_n, &vdev->pdev);
+ vector_n, pdev);
}
-static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector)
+static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector, int nr)
{
+ const char *name = "kvm_interrupt";
+
if (vector->virq < 0) {
return;
}
- if (event_notifier_init(&vector->kvm_interrupt, 0)) {
+ if (!vfio_notifier_init(vector->vdev, &vector->kvm_interrupt, name, nr,
+ NULL)) {
goto fail_notifier;
}
@@ -489,19 +597,20 @@ static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector)
return;
fail_kvm:
- event_notifier_cleanup(&vector->kvm_interrupt);
+ vfio_notifier_cleanup(vector->vdev, &vector->kvm_interrupt, name, nr);
fail_notifier:
kvm_irqchip_release_virq(kvm_state, vector->virq);
vector->virq = -1;
}
-static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector)
+static void vfio_remove_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
+ int nr)
{
kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt,
vector->virq);
kvm_irqchip_release_virq(kvm_state, vector->virq);
vector->virq = -1;
- event_notifier_cleanup(&vector->kvm_interrupt);
+ vfio_notifier_cleanup(vdev, &vector->kvm_interrupt, "kvm_interrupt", nr);
}
static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg,
@@ -511,10 +620,47 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg,
kvm_irqchip_commit_routes(kvm_state);
}
+static void set_irq_signalling(VFIODevice *vbasedev, VFIOMSIVector *vector,
+ unsigned int nr)
+{
+ Error *err = NULL;
+ int32_t fd;
+
+ if (vector->virq >= 0) {
+ fd = event_notifier_get_fd(&vector->kvm_interrupt);
+ } else {
+ fd = event_notifier_get_fd(&vector->interrupt);
+ }
+
+ if (!vfio_device_irq_set_signaling(vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, nr,
+ VFIO_IRQ_SET_ACTION_TRIGGER,
+ fd, &err)) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name);
+ }
+}
+
+void vfio_pci_vector_init(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOMSIVector *vector = &vdev->msi_vectors[nr];
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+ Error *local_err = NULL;
+
+ vector->vdev = vdev;
+ vector->virq = -1;
+ if (!vfio_notifier_init(vdev, &vector->interrupt, "interrupt", nr,
+ &local_err)) {
+ error_report_err(local_err);
+ }
+ vector->use = true;
+ if (vdev->interrupt == VFIO_INT_MSIX) {
+ msix_vector_use(pdev, nr);
+ }
+}
+
static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
MSIMessage *msg, IOHandler *handler)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev);
VFIOMSIVector *vector;
int ret;
bool resizing = !!(vdev->nr_vectors < nr + 1);
@@ -524,13 +670,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
vector = &vdev->msi_vectors[nr];
if (!vector->use) {
- vector->vdev = vdev;
- vector->virq = -1;
- if (event_notifier_init(&vector->interrupt, 0)) {
- error_report("vfio: Error: event_notifier_init failed");
- }
- vector->use = true;
- msix_vector_use(pdev, nr);
+ vfio_pci_vector_init(vdev, nr);
}
qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
@@ -542,19 +682,19 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
*/
if (vector->virq >= 0) {
if (!msg) {
- vfio_remove_kvm_msi_virq(vector);
+ vfio_remove_kvm_msi_virq(vdev, vector, nr);
} else {
vfio_update_kvm_msi_virq(vector, *msg, pdev);
}
} else {
if (msg) {
if (vdev->defer_kvm_irq_routing) {
- vfio_add_kvm_msi_virq(vdev, vector, nr, true);
+ vfio_pci_add_kvm_msi_virq(vdev, vector, nr, true);
} else {
vfio_route_change = kvm_irqchip_begin_route_changes(kvm_state);
- vfio_add_kvm_msi_virq(vdev, vector, nr, true);
+ vfio_pci_add_kvm_msi_virq(vdev, vector, nr, true);
kvm_irqchip_commit_route_changes(&vfio_route_change);
- vfio_connect_kvm_msi_virq(vector);
+ vfio_connect_kvm_msi_virq(vector, nr);
}
}
}
@@ -583,21 +723,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
strerror(-ret));
}
} else {
- Error *err = NULL;
- int32_t fd;
-
- if (vector->virq >= 0) {
- fd = event_notifier_get_fd(&vector->kvm_interrupt);
- } else {
- fd = event_notifier_get_fd(&vector->interrupt);
- }
-
- if (!vfio_device_irq_set_signaling(&vdev->vbasedev,
- VFIO_PCI_MSIX_IRQ_INDEX, nr,
- VFIO_IRQ_SET_ACTION_TRIGGER, fd,
- &err)) {
- error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
- }
+ set_irq_signalling(&vdev->vbasedev, vector, nr);
}
}
@@ -605,7 +731,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
clear_bit(nr, vdev->msix->pending);
if (find_first_bit(vdev->msix->pending,
vdev->nr_vectors) == vdev->nr_vectors) {
- memory_region_set_enabled(&vdev->pdev.msix_pba_mmio, false);
+ memory_region_set_enabled(&pdev->msix_pba_mmio, false);
trace_vfio_msix_pba_disable(vdev->vbasedev.name);
}
@@ -615,12 +741,21 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
static int vfio_msix_vector_use(PCIDevice *pdev,
unsigned int nr, MSIMessage msg)
{
+ /*
+ * Ignore the callback from msix_set_vector_notifiers during resume.
+ * The necessary subset of these actions is called from
+ * vfio_cpr_claim_vectors during post load.
+ */
+ if (cpr_is_incoming()) {
+ return 0;
+ }
+
return vfio_msix_vector_do_use(pdev, nr, &msg, vfio_msi_interrupt);
}
static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev);
VFIOMSIVector *vector = &vdev->msi_vectors[nr];
trace_vfio_msix_vector_release(vdev->vbasedev.name, nr);
@@ -645,14 +780,22 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
}
}
-static void vfio_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
+void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev)
+{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+
+ msix_set_vector_notifiers(pdev, vfio_msix_vector_use,
+ vfio_msix_vector_release, NULL);
+}
+
+void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
{
assert(!vdev->defer_kvm_irq_routing);
vdev->defer_kvm_irq_routing = true;
vfio_route_change = kvm_irqchip_begin_route_changes(kvm_state);
}
-static void vfio_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
+void vfio_pci_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
{
int i;
@@ -662,12 +805,13 @@ static void vfio_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
kvm_irqchip_commit_route_changes(&vfio_route_change);
for (i = 0; i < vdev->nr_vectors; i++) {
- vfio_connect_kvm_msi_virq(&vdev->msi_vectors[i]);
+ vfio_connect_kvm_msi_virq(&vdev->msi_vectors[i], i);
}
}
static void vfio_msix_enable(VFIOPCIDevice *vdev)
{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
int ret;
vfio_disable_interrupts(vdev);
@@ -682,14 +826,14 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev)
* routes once rather than per vector provides a substantial
* performance improvement.
*/
- vfio_prepare_kvm_msi_virq_batch(vdev);
+ vfio_pci_prepare_kvm_msi_virq_batch(vdev);
- if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
+ if (msix_set_vector_notifiers(pdev, vfio_msix_vector_use,
vfio_msix_vector_release, NULL)) {
error_report("vfio: msix_set_vector_notifiers failed");
}
- vfio_commit_kvm_msi_virq_batch(vdev);
+ vfio_pci_commit_kvm_msi_virq_batch(vdev);
if (vdev->nr_vectors) {
ret = vfio_enable_vectors(vdev, true);
@@ -722,30 +866,33 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev)
static void vfio_msi_enable(VFIOPCIDevice *vdev)
{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
int ret, i;
vfio_disable_interrupts(vdev);
- vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev);
+ vdev->nr_vectors = msi_nr_vectors_allocated(pdev);
retry:
/*
* Setting vector notifiers needs to enable route for each vector.
* Deferring to commit the KVM routes once rather than per vector
* provides a substantial performance improvement.
*/
- vfio_prepare_kvm_msi_virq_batch(vdev);
+ vfio_pci_prepare_kvm_msi_virq_batch(vdev);
vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->nr_vectors);
for (i = 0; i < vdev->nr_vectors; i++) {
VFIOMSIVector *vector = &vdev->msi_vectors[i];
+ Error *local_err = NULL;
vector->vdev = vdev;
vector->virq = -1;
vector->use = true;
- if (event_notifier_init(&vector->interrupt, 0)) {
- error_report("vfio: Error: event_notifier_init failed");
+ if (!vfio_notifier_init(vdev, &vector->interrupt, "interrupt", i,
+ &local_err)) {
+ error_report_err(local_err);
}
qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
@@ -755,10 +902,10 @@ retry:
* Attempt to enable route through KVM irqchip,
* default to userspace handling if unavailable.
*/
- vfio_add_kvm_msi_virq(vdev, vector, i, false);
+ vfio_pci_add_kvm_msi_virq(vdev, vector, i, false);
}
- vfio_commit_kvm_msi_virq_batch(vdev);
+ vfio_pci_commit_kvm_msi_virq_batch(vdev);
/* Set interrupt type prior to possible interrupts */
vdev->interrupt = VFIO_INT_MSI;
@@ -801,11 +948,11 @@ static void vfio_msi_disable_common(VFIOPCIDevice *vdev)
VFIOMSIVector *vector = &vdev->msi_vectors[i];
if (vdev->msi_vectors[i].use) {
if (vector->virq >= 0) {
- vfio_remove_kvm_msi_virq(vector);
+ vfio_remove_kvm_msi_virq(vdev, vector, i);
}
qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
NULL, NULL, NULL);
- event_notifier_cleanup(&vector->interrupt);
+ vfio_notifier_cleanup(vdev, &vector->interrupt, "interrupt", i);
}
}
@@ -817,10 +964,11 @@ static void vfio_msi_disable_common(VFIOPCIDevice *vdev)
static void vfio_msix_disable(VFIOPCIDevice *vdev)
{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
Error *err = NULL;
int i;
- msix_unset_vector_notifiers(&vdev->pdev);
+ msix_unset_vector_notifiers(pdev);
/*
* MSI-X will only release vectors if MSI-X is still enabled on the
@@ -828,8 +976,8 @@ static void vfio_msix_disable(VFIOPCIDevice *vdev)
*/
for (i = 0; i < vdev->nr_vectors; i++) {
if (vdev->msi_vectors[i].use) {
- vfio_msix_vector_release(&vdev->pdev, i);
- msix_vector_unuse(&vdev->pdev, i);
+ vfio_msix_vector_release(pdev, i);
+ msix_vector_unuse(pdev, i);
}
}
@@ -866,6 +1014,7 @@ static void vfio_msi_disable(VFIOPCIDevice *vdev)
static void vfio_update_msi(VFIOPCIDevice *vdev)
{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
int i;
for (i = 0; i < vdev->nr_vectors; i++) {
@@ -876,8 +1025,8 @@ static void vfio_update_msi(VFIOPCIDevice *vdev)
continue;
}
- msg = msi_get_message(&vdev->pdev, i);
- vfio_update_kvm_msi_virq(vector, msg, &vdev->pdev);
+ msg = msi_get_message(pdev, i);
+ vfio_update_kvm_msi_virq(vector, msg, pdev);
}
}
@@ -984,7 +1133,7 @@ static int vfio_pci_config_space_write(VFIOPCIDevice *vdev, off_t offset,
{
return vdev->vbasedev.io_ops->region_write(&vdev->vbasedev,
VFIO_PCI_CONFIG_REGION_INDEX,
- offset, size, data);
+ offset, size, data, false);
}
static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
@@ -1039,13 +1188,14 @@ static const MemoryRegionOps vfio_rom_ops = {
static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
VFIODevice *vbasedev = &vdev->vbasedev;
uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK);
char *name;
- if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
+ if (pdev->romfile || !pdev->rom_bar) {
/* Since pci handles romfile, just print a message and return */
- if (vfio_opt_rom_in_denylist(vdev) && vdev->pdev.romfile) {
+ if (vfio_opt_rom_in_denylist(vdev) && pdev->romfile) {
warn_report("Device at %s is known to cause system instability"
" issues during option rom execution",
vdev->vbasedev.name);
@@ -1074,7 +1224,7 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
}
if (vfio_opt_rom_in_denylist(vdev)) {
- if (vdev->pdev.rom_bar > 0) {
+ if (pdev->rom_bar > 0) {
warn_report("Device at %s is known to cause system instability"
" issues during option rom execution",
vdev->vbasedev.name);
@@ -1093,12 +1243,12 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
name = g_strdup_printf("vfio[%s].rom", vdev->vbasedev.name);
- memory_region_init_io(&vdev->pdev.rom, OBJECT(vdev),
+ memory_region_init_io(&pdev->rom, OBJECT(vdev),
&vfio_rom_ops, vdev, name, size);
g_free(name);
- pci_register_bar(&vdev->pdev, PCI_ROM_SLOT,
- PCI_BASE_ADDRESS_SPACE_MEMORY, &vdev->pdev.rom);
+ pci_register_bar(pdev, PCI_ROM_SLOT,
+ PCI_BASE_ADDRESS_SPACE_MEMORY, &pdev->rom);
vdev->rom_read_failed = false;
}
@@ -1196,7 +1346,7 @@ static const MemoryRegionOps vfio_vga_ops = {
*/
static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev);
VFIORegion *region = &vdev->bars[bar].region;
MemoryRegion *mmap_mr, *region_mr, *base_mr;
PCIIORegion *r;
@@ -1242,7 +1392,7 @@ static void vfio_sub_page_bar_update_mapping(PCIDevice *pdev, int bar)
*/
uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev);
VFIODevice *vbasedev = &vdev->vbasedev;
uint32_t emu_bits = 0, emu_val = 0, phys_val = 0, val;
@@ -1276,7 +1426,7 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
void vfio_pci_write_config(PCIDevice *pdev,
uint32_t addr, uint32_t val, int len)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev);
VFIODevice *vbasedev = &vdev->vbasedev;
uint32_t val_le = cpu_to_le32(val);
int ret;
@@ -1371,6 +1521,7 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev)
static bool vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
uint16_t ctrl;
bool msi_64bit, msi_maskbit;
int ret, entries;
@@ -1391,7 +1542,7 @@ static bool vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
trace_vfio_msi_setup(vdev->vbasedev.name, pos);
- ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit, &err);
+ ret = msi_init(pdev, pos, entries, msi_64bit, msi_maskbit, &err);
if (ret < 0) {
if (ret == -ENOTSUP) {
return true;
@@ -1584,6 +1735,7 @@ static bool vfio_pci_relocate_msix(VFIOPCIDevice *vdev, Error **errp)
*/
static bool vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp)
{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
uint8_t pos;
uint16_t ctrl;
uint32_t table, pba;
@@ -1591,7 +1743,7 @@ static bool vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp)
VFIOMSIXInfo *msix;
int ret;
- pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSIX);
+ pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
if (!pos) {
return true;
}
@@ -1683,12 +1835,13 @@ static bool vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp)
static bool vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
int ret;
Error *err = NULL;
vdev->msix->pending = g_new0(unsigned long,
BITS_TO_LONGS(vdev->msix->entries));
- ret = msix_init(&vdev->pdev, vdev->msix->entries,
+ ret = msix_init(pdev, vdev->msix->entries,
vdev->bars[vdev->msix->table_bar].mr,
vdev->msix->table_bar, vdev->msix->table_offset,
vdev->bars[vdev->msix->pba_bar].mr,
@@ -1720,7 +1873,7 @@ static bool vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
* vector-use notifier is called, which occurs on unmask, we test whether
* PBA emulation is needed and again disable if not.
*/
- memory_region_set_enabled(&vdev->pdev.msix_pba_mmio, false);
+ memory_region_set_enabled(&pdev->msix_pba_mmio, false);
/*
* The emulated machine may provide a paravirt interface for MSIX setup
@@ -1732,18 +1885,20 @@ static bool vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
*/
if (object_property_get_bool(OBJECT(qdev_get_machine()),
"vfio-no-msix-emulation", NULL)) {
- memory_region_set_enabled(&vdev->pdev.msix_table_mmio, false);
+ memory_region_set_enabled(&pdev->msix_table_mmio, false);
}
return true;
}
-static void vfio_teardown_msi(VFIOPCIDevice *vdev)
+void vfio_pci_teardown_msi(VFIOPCIDevice *vdev)
{
- msi_uninit(&vdev->pdev);
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+
+ msi_uninit(pdev);
if (vdev->msix) {
- msix_uninit(&vdev->pdev,
+ msix_uninit(pdev,
vdev->bars[vdev->msix->table_bar].mr,
vdev->bars[vdev->msix->pba_bar].mr);
g_free(vdev->msix->pending);
@@ -1788,6 +1943,9 @@ static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr)
bar->type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK :
~PCI_BASE_ADDRESS_MEM_MASK);
bar->size = bar->region.size;
+
+ /* IO regions are sync, memory can be async */
+ bar->region.post_wr = (bar->ioport == 0);
}
static void vfio_bars_prepare(VFIOPCIDevice *vdev)
@@ -1801,6 +1959,7 @@ static void vfio_bars_prepare(VFIOPCIDevice *vdev)
static void vfio_bar_register(VFIOPCIDevice *vdev, int nr)
{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
VFIOBAR *bar = &vdev->bars[nr];
char *name;
@@ -1822,7 +1981,7 @@ static void vfio_bar_register(VFIOPCIDevice *vdev, int nr)
}
}
- pci_register_bar(&vdev->pdev, nr, bar->type, bar->mr);
+ pci_register_bar(pdev, nr, bar->type, bar->mr);
}
static void vfio_bars_register(VFIOPCIDevice *vdev)
@@ -1834,8 +1993,9 @@ static void vfio_bars_register(VFIOPCIDevice *vdev)
}
}
-static void vfio_bars_exit(VFIOPCIDevice *vdev)
+void vfio_pci_bars_exit(VFIOPCIDevice *vdev)
{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
int i;
for (i = 0; i < PCI_ROM_SLOT; i++) {
@@ -1849,7 +2009,7 @@ static void vfio_bars_exit(VFIOPCIDevice *vdev)
}
if (vdev->vga) {
- pci_unregister_vga(&vdev->pdev);
+ pci_unregister_vga(pdev);
vfio_vga_quirk_exit(vdev);
}
}
@@ -1865,7 +2025,6 @@ static void vfio_bars_finalize(VFIOPCIDevice *vdev)
vfio_region_finalize(&bar->region);
if (bar->mr) {
assert(bar->size);
- object_unparent(OBJECT(bar->mr));
g_free(bar->mr);
bar->mr = NULL;
}
@@ -1873,9 +2032,6 @@ static void vfio_bars_finalize(VFIOPCIDevice *vdev)
if (vdev->vga) {
vfio_vga_quirk_finalize(vdev);
- for (i = 0; i < ARRAY_SIZE(vdev->vga->region); i++) {
- object_unparent(OBJECT(&vdev->vga->region[i].mem));
- }
g_free(vdev->vga);
}
}
@@ -1921,8 +2077,10 @@ static void vfio_set_word_bits(uint8_t *buf, uint16_t val, uint16_t mask)
static void vfio_add_emulated_word(VFIOPCIDevice *vdev, int pos,
uint16_t val, uint16_t mask)
{
- vfio_set_word_bits(vdev->pdev.config + pos, val, mask);
- vfio_set_word_bits(vdev->pdev.wmask + pos, ~mask, mask);
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+
+ vfio_set_word_bits(pdev->config + pos, val, mask);
+ vfio_set_word_bits(pdev->wmask + pos, ~mask, mask);
vfio_set_word_bits(vdev->emulated_config_bits + pos, mask, mask);
}
@@ -1934,8 +2092,10 @@ static void vfio_set_long_bits(uint8_t *buf, uint32_t val, uint32_t mask)
static void vfio_add_emulated_long(VFIOPCIDevice *vdev, int pos,
uint32_t val, uint32_t mask)
{
- vfio_set_long_bits(vdev->pdev.config + pos, val, mask);
- vfio_set_long_bits(vdev->pdev.wmask + pos, ~mask, mask);
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+
+ vfio_set_long_bits(pdev->config + pos, val, mask);
+ vfio_set_long_bits(pdev->wmask + pos, ~mask, mask);
vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask);
}
@@ -1943,7 +2103,8 @@ static void vfio_pci_enable_rp_atomics(VFIOPCIDevice *vdev)
{
struct vfio_device_info_cap_pci_atomic_comp *cap;
g_autofree struct vfio_device_info *info = NULL;
- PCIBus *bus = pci_get_bus(&vdev->pdev);
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+ PCIBus *bus = pci_get_bus(pdev);
PCIDevice *parent = bus->parent_dev;
struct vfio_info_cap_header *hdr;
uint32_t mask = 0;
@@ -1959,8 +2120,8 @@ static void vfio_pci_enable_rp_atomics(VFIOPCIDevice *vdev)
if (pci_bus_is_root(bus) || !parent || !parent->exp.exp_cap ||
pcie_cap_get_type(parent) != PCI_EXP_TYPE_ROOT_PORT ||
pcie_cap_get_version(parent) != PCI_EXP_FLAGS_VER2 ||
- vdev->pdev.devfn ||
- vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+ pdev->devfn ||
+ pdev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
return;
}
@@ -2004,8 +2165,10 @@ static void vfio_pci_enable_rp_atomics(VFIOPCIDevice *vdev)
static void vfio_pci_disable_rp_atomics(VFIOPCIDevice *vdev)
{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+
if (vdev->clear_parent_atomics_on_exit) {
- PCIDevice *parent = pci_get_bus(&vdev->pdev)->parent_dev;
+ PCIDevice *parent = pci_get_bus(pdev)->parent_dev;
uint8_t *pos = parent->config + parent->exp.exp_cap + PCI_EXP_DEVCAP2;
pci_long_test_and_clear_mask(pos, PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
@@ -2017,10 +2180,11 @@ static void vfio_pci_disable_rp_atomics(VFIOPCIDevice *vdev)
static bool vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
Error **errp)
{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
uint16_t flags;
uint8_t type;
- flags = pci_get_word(vdev->pdev.config + pos + PCI_CAP_FLAGS);
+ flags = pci_get_word(pdev->config + pos + PCI_CAP_FLAGS);
type = (flags & PCI_EXP_FLAGS_TYPE) >> 4;
if (type != PCI_EXP_TYPE_ENDPOINT &&
@@ -2032,8 +2196,8 @@ static bool vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
return false;
}
- if (!pci_bus_is_express(pci_get_bus(&vdev->pdev))) {
- PCIBus *bus = pci_get_bus(&vdev->pdev);
+ if (!pci_bus_is_express(pci_get_bus(pdev))) {
+ PCIBus *bus = pci_get_bus(pdev);
PCIDevice *bridge;
/*
@@ -2065,7 +2229,7 @@ static bool vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
return true;
}
- } else if (pci_bus_is_root(pci_get_bus(&vdev->pdev))) {
+ } else if (pci_bus_is_root(pci_get_bus(pdev))) {
/*
* On a Root Complex bus Endpoints become Root Complex Integrated
* Endpoints, which changes the type and clears the LNK & LNK2 fields.
@@ -2133,20 +2297,20 @@ static bool vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size,
1, PCI_EXP_FLAGS_VERS);
}
- pos = pci_add_capability(&vdev->pdev, PCI_CAP_ID_EXP, pos, size,
- errp);
+ pos = pci_add_capability(pdev, PCI_CAP_ID_EXP, pos, size, errp);
if (pos < 0) {
return false;
}
- vdev->pdev.exp.exp_cap = pos;
+ pdev->exp.exp_cap = pos;
return true;
}
static void vfio_check_pcie_flr(VFIOPCIDevice *vdev, uint8_t pos)
{
- uint32_t cap = pci_get_long(vdev->pdev.config + pos + PCI_EXP_DEVCAP);
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+ uint32_t cap = pci_get_long(pdev->config + pos + PCI_EXP_DEVCAP);
if (cap & PCI_EXP_DEVCAP_FLR) {
trace_vfio_check_pcie_flr(vdev->vbasedev.name);
@@ -2156,7 +2320,8 @@ static void vfio_check_pcie_flr(VFIOPCIDevice *vdev, uint8_t pos)
static void vfio_check_pm_reset(VFIOPCIDevice *vdev, uint8_t pos)
{
- uint16_t csr = pci_get_word(vdev->pdev.config + pos + PCI_PM_CTRL);
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+ uint16_t csr = pci_get_word(pdev->config + pos + PCI_PM_CTRL);
if (!(csr & PCI_PM_CTRL_NO_SOFT_RESET)) {
trace_vfio_check_pm_reset(vdev->vbasedev.name);
@@ -2166,7 +2331,8 @@ static void vfio_check_pm_reset(VFIOPCIDevice *vdev, uint8_t pos)
static void vfio_check_af_flr(VFIOPCIDevice *vdev, uint8_t pos)
{
- uint8_t cap = pci_get_byte(vdev->pdev.config + pos + PCI_AF_CAP);
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+ uint8_t cap = pci_get_byte(pdev->config + pos + PCI_AF_CAP);
if ((cap & PCI_AF_CAP_TP) && (cap & PCI_AF_CAP_FLR)) {
trace_vfio_check_af_flr(vdev->vbasedev.name);
@@ -2177,7 +2343,7 @@ static void vfio_check_af_flr(VFIOPCIDevice *vdev, uint8_t pos)
static bool vfio_add_vendor_specific_cap(VFIOPCIDevice *vdev, int pos,
uint8_t size, Error **errp)
{
- PCIDevice *pdev = &vdev->pdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
pos = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, size, errp);
if (pos < 0) {
@@ -2199,7 +2365,7 @@ static bool vfio_add_vendor_specific_cap(VFIOPCIDevice *vdev, int pos,
static bool vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp)
{
ERRP_GUARD();
- PCIDevice *pdev = &vdev->pdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
uint8_t cap_id, next, size;
bool ret;
@@ -2285,17 +2451,18 @@ static bool vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp)
static int vfio_setup_rebar_ecap(VFIOPCIDevice *vdev, uint16_t pos)
{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
uint32_t ctrl;
int i, nbar;
- ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL);
+ ctrl = pci_get_long(pdev->config + pos + PCI_REBAR_CTRL);
nbar = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT;
for (i = 0; i < nbar; i++) {
uint32_t cap;
int size;
- ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL + (i * 8));
+ ctrl = pci_get_long(pdev->config + pos + PCI_REBAR_CTRL + (i * 8));
size = (ctrl & PCI_REBAR_CTRL_BAR_SIZE) >> PCI_REBAR_CTRL_BAR_SHIFT;
/* The cap register reports sizes 1MB to 128TB, with 4 reserved bits */
@@ -2333,7 +2500,7 @@ static int vfio_setup_rebar_ecap(VFIOPCIDevice *vdev, uint16_t pos)
static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
{
- PCIDevice *pdev = &vdev->pdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
uint32_t header;
uint16_t cap_id, next, size;
uint8_t cap_ver;
@@ -2425,9 +2592,9 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
g_free(config);
}
-static bool vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp)
+bool vfio_pci_add_capabilities(VFIOPCIDevice *vdev, Error **errp)
{
- PCIDevice *pdev = &vdev->pdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) ||
!pdev->config[PCI_CAPABILITY_LIST]) {
@@ -2444,7 +2611,7 @@ static bool vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp)
void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
{
- PCIDevice *pdev = &vdev->pdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
uint16_t cmd;
vfio_disable_interrupts(vdev);
@@ -2640,8 +2807,8 @@ static const VMStateDescription vmstate_vfio_pci_config = {
.version_id = 1,
.minimum_version_id = 1,
.fields = (const VMStateField[]) {
- VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice),
- VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, vfio_msix_present),
+ VMSTATE_PCI_DEVICE(parent_obj, VFIOPCIDevice),
+ VMSTATE_MSIX_TEST(parent_obj, VFIOPCIDevice, vfio_msix_present),
VMSTATE_END_OF_LIST()
},
.subsections = (const VMStateDescription * const []) {
@@ -2654,23 +2821,26 @@ static int vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f, Error **errp)
{
VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
- return vmstate_save_state_with_err(f, &vmstate_vfio_pci_config, vdev, NULL,
- errp);
+ return vmstate_save_state(f, &vmstate_vfio_pci_config, vdev, NULL,
+ errp);
}
static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
{
VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
- PCIDevice *pdev = &vdev->pdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
pcibus_t old_addr[PCI_NUM_REGIONS - 1];
int bar, ret;
+ Error *local_err = NULL;
for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
old_addr[bar] = pdev->io_regions[bar].addr;
}
- ret = vmstate_load_state(f, &vmstate_vfio_pci_config, vdev, 1);
+ ret = vmstate_load_state(f, &vmstate_vfio_pci_config, vdev, 1,
+ &local_err);
if (ret) {
+ error_report_err(local_err);
return ret;
}
@@ -2698,10 +2868,33 @@ static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
return ret;
}
+/* Transform from VFIODevice to VFIOPCIDevice. Return NULL if fails. */
+VFIOPCIDevice *vfio_pci_from_vfio_device(VFIODevice *vbasedev)
+{
+ if (vbasedev && vbasedev->type == VFIO_DEVICE_TYPE_PCI) {
+ return container_of(vbasedev, VFIOPCIDevice, vbasedev);
+ }
+ return NULL;
+}
+
+void vfio_sub_page_bar_update_mappings(VFIOPCIDevice *vdev)
+{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+ int page_size = qemu_real_host_page_size();
+ int bar;
+
+ for (bar = 0; bar < PCI_ROM_SLOT; bar++) {
+ PCIIORegion *r = &pdev->io_regions[bar];
+ if (r->addr != PCI_BAR_UNMAPPED && r->size > 0 && r->size < page_size) {
+ vfio_sub_page_bar_update_mapping(pdev, bar);
+ }
+ }
+}
+
static VFIODeviceOps vfio_pci_ops = {
.vfio_compute_needs_reset = vfio_pci_compute_needs_reset,
.vfio_hot_reset_multi = vfio_pci_hot_reset_multi,
- .vfio_eoi = vfio_intx_eoi,
+ .vfio_eoi = vfio_pci_intx_eoi,
.vfio_get_object = vfio_pci_get_object,
.vfio_save_config = vfio_pci_save_config,
.vfio_load_config = vfio_pci_load_config,
@@ -2765,15 +2958,12 @@ bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
"vfio-vga-io@0x3c0",
QEMU_PCI_VGA_IO_HI_SIZE);
- pci_register_vga(&vdev->pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
- &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
- &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem);
-
return true;
}
-static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
+bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp)
{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
VFIODevice *vbasedev = &vdev->vbasedev;
struct vfio_region_info *reg_info = NULL;
struct vfio_irq_info irq_info;
@@ -2818,14 +3008,14 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
return false;
}
- trace_vfio_populate_device_config(vdev->vbasedev.name,
+ trace_vfio_pci_populate_device_config(vdev->vbasedev.name,
(unsigned long)reg_info->size,
(unsigned long)reg_info->offset,
(unsigned long)reg_info->flags);
vdev->config_size = reg_info->size;
if (vdev->config_size == PCI_CONFIG_SPACE_SIZE) {
- vdev->pdev.cap_present &= ~QEMU_PCI_CAP_EXPRESS;
+ pdev->cap_present &= ~QEMU_PCI_CAP_EXPRESS;
}
vdev->config_offset = reg_info->offset;
@@ -2840,7 +3030,7 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_ERR_IRQ_INDEX, &irq_info);
if (ret) {
/* This can fail for an old kernel or legacy PCI dev */
- trace_vfio_populate_device_get_irq_info_failure(strerror(-ret));
+ trace_vfio_pci_populate_device_get_irq_info_failure(strerror(-ret));
} else if (irq_info.count == 1) {
vdev->pci_aer = true;
} else {
@@ -2852,11 +3042,24 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
return true;
}
-static void vfio_pci_put_device(VFIOPCIDevice *vdev)
+void vfio_pci_put_device(VFIOPCIDevice *vdev)
{
+ vfio_display_finalize(vdev);
+ vfio_bars_finalize(vdev);
+ vfio_cpr_pci_unregister_device(vdev);
+ g_free(vdev->emulated_config_bits);
+ g_free(vdev->rom);
+ /*
+ * XXX Leaking igd_opregion is not an oversight, we can't remove the
+ * fw_cfg entry therefore leaking this allocation seems like the safest
+ * option.
+ *
+ * g_free(vdev->igd_opregion);
+ */
+
vfio_device_detach(&vdev->vbasedev);
- g_free(vdev->vbasedev.name);
+ vfio_device_free_name(&vdev->vbasedev);
g_free(vdev->msix);
}
@@ -2888,7 +3091,7 @@ static void vfio_err_notifier_handler(void *opaque)
* and continue after disabling error recovery support for the
* device.
*/
-static void vfio_register_err_notifier(VFIOPCIDevice *vdev)
+void vfio_pci_register_err_notifier(VFIOPCIDevice *vdev)
{
Error *err = NULL;
int32_t fd;
@@ -2897,8 +3100,9 @@ static void vfio_register_err_notifier(VFIOPCIDevice *vdev)
return;
}
- if (event_notifier_init(&vdev->err_notifier, 0)) {
- error_report("vfio: Unable to init event notifier for error detection");
+ if (!vfio_notifier_init(vdev, &vdev->err_notifier, "err_notifier", 0,
+ &err)) {
+ error_report_err(err);
vdev->pci_aer = false;
return;
}
@@ -2906,11 +3110,16 @@ static void vfio_register_err_notifier(VFIOPCIDevice *vdev)
fd = event_notifier_get_fd(&vdev->err_notifier);
qemu_set_fd_handler(fd, vfio_err_notifier_handler, NULL, vdev);
+ /* Do not alter irq_signaling during vfio_realize for cpr */
+ if (cpr_is_incoming()) {
+ return;
+ }
+
if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_ERR_IRQ_INDEX, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
qemu_set_fd_handler(fd, NULL, NULL, vdev);
- event_notifier_cleanup(&vdev->err_notifier);
+ vfio_notifier_cleanup(vdev, &vdev->err_notifier, "err_notifier", 0);
vdev->pci_aer = false;
}
}
@@ -2929,7 +3138,7 @@ static void vfio_unregister_err_notifier(VFIOPCIDevice *vdev)
}
qemu_set_fd_handler(event_notifier_get_fd(&vdev->err_notifier),
NULL, NULL, vdev);
- event_notifier_cleanup(&vdev->err_notifier);
+ vfio_notifier_cleanup(vdev, &vdev->err_notifier, "err_notifier", 0);
}
static void vfio_req_notifier_handler(void *opaque)
@@ -2947,7 +3156,7 @@ static void vfio_req_notifier_handler(void *opaque)
}
}
-static void vfio_register_req_notifier(VFIOPCIDevice *vdev)
+void vfio_pci_register_req_notifier(VFIOPCIDevice *vdev)
{
struct vfio_irq_info irq_info;
Error *err = NULL;
@@ -2964,19 +3173,26 @@ static void vfio_register_req_notifier(VFIOPCIDevice *vdev)
return;
}
- if (event_notifier_init(&vdev->req_notifier, 0)) {
- error_report("vfio: Unable to init event notifier for device request");
+ if (!vfio_notifier_init(vdev, &vdev->req_notifier, "req_notifier", 0,
+ &err)) {
+ error_report_err(err);
return;
}
fd = event_notifier_get_fd(&vdev->req_notifier);
qemu_set_fd_handler(fd, vfio_req_notifier_handler, NULL, vdev);
+ /* Do not alter irq_signaling during vfio_realize for cpr */
+ if (cpr_is_incoming()) {
+ vdev->req_enabled = true;
+ return;
+ }
+
if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
qemu_set_fd_handler(fd, NULL, NULL, vdev);
- event_notifier_cleanup(&vdev->req_notifier);
+ vfio_notifier_cleanup(vdev, &vdev->req_notifier, "req_notifier", 0);
} else {
vdev->req_enabled = true;
}
@@ -2996,15 +3212,38 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
}
qemu_set_fd_handler(event_notifier_get_fd(&vdev->req_notifier),
NULL, NULL, vdev);
- event_notifier_cleanup(&vdev->req_notifier);
+ vfio_notifier_cleanup(vdev, &vdev->req_notifier, "req_notifier", 0);
vdev->req_enabled = false;
}
-static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
+void vfio_pci_config_register_vga(VFIOPCIDevice *vdev)
{
- PCIDevice *pdev = &vdev->pdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
+ assert(vdev->vga != NULL);
+
+ pci_register_vga(pdev, &vdev->vga->region[QEMU_PCI_VGA_MEM].mem,
+ &vdev->vga->region[QEMU_PCI_VGA_IO_LO].mem,
+ &vdev->vga->region[QEMU_PCI_VGA_IO_HI].mem);
+}
+
+bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
+{
+ PCIDevice *pdev = PCI_DEVICE(vdev);
VFIODevice *vbasedev = &vdev->vbasedev;
+ uint32_t config_space_size;
+ int ret;
+
+ config_space_size = MIN(pci_config_size(pdev), vdev->config_size);
+
+ /* Get a copy of config space */
+ ret = vfio_pci_config_space_read(vdev, 0, config_space_size,
+ pdev->config);
+ if (ret < (int)config_space_size) {
+ ret = ret < 0 ? -ret : EFAULT;
+ error_setg_errno(errp, ret, "failed to read device config space");
+ return false;
+ }
/* vfio emulates a lot for us, but some bits need extra love */
vdev->emulated_config_bits = g_malloc0(vdev->config_size);
@@ -3062,15 +3301,32 @@ static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
vdev->sub_device_id);
}
+ /*
+ * Class code is a 24-bit value at config space 0x09. Allow overriding it
+ * with any 24-bit value.
+ */
+ if (vdev->class_code != PCI_ANY_ID) {
+ if (vdev->class_code > 0xffffff) {
+ error_setg(errp, "invalid PCI class code provided");
+ return false;
+ }
+ /* Higher 24 bits of PCI_CLASS_REVISION are class code */
+ vfio_add_emulated_long(vdev, PCI_CLASS_REVISION,
+ vdev->class_code << 8, ~0xff);
+ trace_vfio_pci_emulated_class_code(vbasedev->name, vdev->class_code);
+ } else {
+ vdev->class_code = pci_get_long(pdev->config + PCI_CLASS_REVISION) >> 8;
+ }
+
/* QEMU can change multi-function devices to single function, or reverse */
vdev->emulated_config_bits[PCI_HEADER_TYPE] =
PCI_HEADER_TYPE_MULTI_FUNCTION;
/* Restore or clear multifunction, this is always controlled by QEMU */
- if (vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
- vdev->pdev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
+ if (pdev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+ pdev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
} else {
- vdev->pdev.config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION;
+ pdev->config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION;
}
/*
@@ -3078,8 +3334,8 @@ static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
* BAR, such as might be the case with the option ROM, we can get
* confusing, unwritable, residual addresses from the host here.
*/
- memset(&vdev->pdev.config[PCI_BASE_ADDRESS_0], 0, 24);
- memset(&vdev->pdev.config[PCI_ROM_ADDRESS], 0, 4);
+ memset(&pdev->config[PCI_BASE_ADDRESS_0], 0, 24);
+ memset(&pdev->config[PCI_ROM_ADDRESS], 0, 4);
vfio_pci_size_rom(vdev);
@@ -3091,12 +3347,16 @@ static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
vfio_bars_register(vdev);
+ if (vdev->vga && vfio_is_vga(vdev)) {
+ vfio_pci_config_register_vga(vdev);
+ }
+
return true;
}
-static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
+bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
{
- PCIDevice *pdev = &vdev->pdev;
+ PCIDevice *pdev = PCI_DEVICE(vdev);
/* QEMU emulates all of MSI & MSIX */
if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
@@ -3109,16 +3369,22 @@ static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
vdev->msi_cap_size);
}
- if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) {
+ if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) {
vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
vfio_intx_mmap_enable, vdev);
- pci_device_set_intx_routing_notifier(&vdev->pdev,
+ pci_device_set_intx_routing_notifier(pdev,
vfio_intx_routing_notifier);
vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
- if (!vfio_intx_enable(vdev, errp)) {
+
+ /*
+ * During CPR, do not call vfio_intx_enable at this time. Instead,
+ * call it from vfio_pci_post_load after the intx routing data has
+ * been loaded from vmstate.
+ */
+ if (!cpr_is_incoming() && !vfio_intx_enable(vdev, errp)) {
timer_free(vdev->intx.mmap_timer);
- pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+ pci_device_set_intx_routing_notifier(pdev, NULL);
kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
return false;
}
@@ -3126,15 +3392,14 @@ static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
return true;
}
-static void vfio_realize(PCIDevice *pdev, Error **errp)
+static void vfio_pci_realize(PCIDevice *pdev, Error **errp)
{
ERRP_GUARD();
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev);
VFIODevice *vbasedev = &vdev->vbasedev;
- int i, ret;
+ int i;
char uuid[UUID_STR_LEN];
g_autofree char *name = NULL;
- uint32_t config_space_size;
if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
if (!(~vdev->host.domain || ~vdev->host.bus ||
@@ -3185,18 +3450,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
goto error;
}
- if (!vfio_populate_device(vdev, errp)) {
- goto error;
- }
-
- config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size);
-
- /* Get a copy of config space */
- ret = vfio_pci_config_space_read(vdev, 0, config_space_size,
- vdev->pdev.config);
- if (ret < (int)config_space_size) {
- ret = ret < 0 ? -ret : EFAULT;
- error_setg_errno(errp, ret, "failed to read device config space");
+ if (!vfio_pci_populate_device(vdev, errp)) {
goto error;
}
@@ -3210,7 +3464,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
goto out_teardown;
}
- if (!vfio_add_capabilities(vdev, errp)) {
+ if (!vfio_pci_add_capabilities(vdev, errp)) {
goto out_unset_idev;
}
@@ -3226,7 +3480,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
vfio_bar_quirk_setup(vdev, i);
}
- if (!vfio_interrupt_setup(vdev, errp)) {
+ if (!vfio_pci_interrupt_setup(vdev, errp)) {
goto out_unset_idev;
}
@@ -3270,9 +3524,10 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
}
}
- vfio_register_err_notifier(vdev);
- vfio_register_req_notifier(vdev);
+ vfio_pci_register_err_notifier(vdev);
+ vfio_pci_register_req_notifier(vdev);
vfio_setup_resetfn_quirk(vdev);
+ vfio_cpr_pci_register_device(vdev);
return;
@@ -3280,7 +3535,7 @@ out_deregister:
if (vdev->interrupt == VFIO_INT_INTx) {
vfio_intx_disable(vdev);
}
- pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+ pci_device_set_intx_routing_notifier(pdev, NULL);
if (vdev->irqchip_change_notifier.notify) {
kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
}
@@ -3292,38 +3547,27 @@ out_unset_idev:
pci_device_unset_iommu_device(pdev);
}
out_teardown:
- vfio_teardown_msi(vdev);
- vfio_bars_exit(vdev);
+ vfio_pci_teardown_msi(vdev);
+ vfio_pci_bars_exit(vdev);
error:
error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
}
-static void vfio_instance_finalize(Object *obj)
+static void vfio_pci_finalize(Object *obj)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(obj);
- vfio_display_finalize(vdev);
- vfio_bars_finalize(vdev);
- g_free(vdev->emulated_config_bits);
- g_free(vdev->rom);
- /*
- * XXX Leaking igd_opregion is not an oversight, we can't remove the
- * fw_cfg entry therefore leaking this allocation seems like the safest
- * option.
- *
- * g_free(vdev->igd_opregion);
- */
vfio_pci_put_device(vdev);
}
static void vfio_exitfn(PCIDevice *pdev)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(pdev);
VFIODevice *vbasedev = &vdev->vbasedev;
vfio_unregister_req_notifier(vdev);
vfio_unregister_err_notifier(vdev);
- pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+ pci_device_set_intx_routing_notifier(pdev, NULL);
if (vdev->irqchip_change_notifier.notify) {
kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
}
@@ -3331,9 +3575,9 @@ static void vfio_exitfn(PCIDevice *pdev)
if (vdev->intx.mmap_timer) {
timer_free(vdev->intx.mmap_timer);
}
- vfio_teardown_msi(vdev);
+ vfio_pci_teardown_msi(vdev);
vfio_pci_disable_rp_atomics(vdev);
- vfio_bars_exit(vdev);
+ vfio_pci_bars_exit(vdev);
vfio_migration_exit(vbasedev);
if (!vbasedev->mdev) {
pci_device_unset_iommu_device(pdev);
@@ -3342,7 +3586,12 @@ static void vfio_exitfn(PCIDevice *pdev)
static void vfio_pci_reset(DeviceState *dev)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(dev);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(dev);
+
+ /* Do not reset the device during qemu_system_reset prior to cpr load */
+ if (cpr_is_incoming()) {
+ return;
+ }
trace_vfio_pci_reset(vdev->vbasedev.name);
@@ -3379,10 +3628,10 @@ post_reset:
vfio_pci_post_reset(vdev);
}
-static void vfio_instance_init(Object *obj)
+static void vfio_pci_init(Object *obj)
{
PCIDevice *pci_dev = PCI_DEVICE(obj);
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(obj);
VFIODevice *vbasedev = &vdev->vbasedev;
device_add_bootindex_property(obj, &vdev->bootindex,
@@ -3401,9 +3650,16 @@ static void vfio_instance_init(Object *obj)
/* QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command
* line, therefore, no need to wait to realize like other devices */
pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
+
+ /*
+ * A device that is resuming for cpr is already configured, so do not
+ * reset it during qemu_system_reset prior to cpr load, else interrupts
+ * may be lost.
+ */
+ pci_dev->cap_present |= QEMU_PCI_SKIP_RESET_ON_CPR;
}
-static void vfio_pci_base_dev_class_init(ObjectClass *klass, const void *data)
+static void vfio_pci_device_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
@@ -3415,12 +3671,12 @@ static void vfio_pci_base_dev_class_init(ObjectClass *klass, const void *data)
pdc->config_write = vfio_pci_write_config;
}
-static const TypeInfo vfio_pci_base_dev_info = {
- .name = TYPE_VFIO_PCI_BASE,
+static const TypeInfo vfio_pci_device_info = {
+ .name = TYPE_VFIO_PCI_DEVICE,
.parent = TYPE_PCI_DEVICE,
- .instance_size = 0,
+ .instance_size = sizeof(VFIOPCIDevice),
.abstract = true,
- .class_init = vfio_pci_base_dev_class_init,
+ .class_init = vfio_pci_device_class_init,
.interfaces = (const InterfaceInfo[]) {
{ INTERFACE_PCIE_DEVICE },
{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
@@ -3430,7 +3686,7 @@ static const TypeInfo vfio_pci_base_dev_info = {
static PropertyInfo vfio_pci_migration_multifd_transfer_prop;
-static const Property vfio_pci_dev_properties[] = {
+static const Property vfio_pci_properties[] = {
DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host),
DEFINE_PROP_UUID_NODEFAULT("vf-token", VFIOPCIDevice, vf_token),
DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev),
@@ -3462,6 +3718,11 @@ static const Property vfio_pci_dev_properties[] = {
vbasedev.migration_multifd_transfer,
vfio_pci_migration_multifd_transfer_prop, OnOffAuto,
.set_default = true, .defval.i = ON_OFF_AUTO_AUTO),
+ DEFINE_PROP_ON_OFF_AUTO("x-migration-load-config-after-iter", VFIOPCIDevice,
+ vbasedev.migration_load_config_after_iter,
+ ON_OFF_AUTO_AUTO),
+ DEFINE_PROP_SIZE("x-migration-max-queued-buffers-size", VFIOPCIDevice,
+ vbasedev.migration_max_queued_buffers_size, UINT64_MAX),
DEFINE_PROP_BOOL("migration-events", VFIOPCIDevice,
vbasedev.migration_events, false),
DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
@@ -3482,6 +3743,8 @@ static const Property vfio_pci_dev_properties[] = {
sub_vendor_id, PCI_ANY_ID),
DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
sub_device_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-pci-class-code", VFIOPCIDevice,
+ class_code, PCI_ANY_ID),
DEFINE_PROP_UINT32("x-igd-gms", VFIOPCIDevice, igd_gms, 0),
DEFINE_PROP_UNSIGNED_NODEFAULT("x-nv-gpudirect-clique", VFIOPCIDevice,
nv_gpudirect_clique,
@@ -3498,23 +3761,24 @@ static const Property vfio_pci_dev_properties[] = {
#ifdef CONFIG_IOMMUFD
static void vfio_pci_set_fd(Object *obj, const char *str, Error **errp)
{
- VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
+ VFIOPCIDevice *vdev = VFIO_PCI_DEVICE(obj);
vfio_device_set_fd(&vdev->vbasedev, str, errp);
}
#endif
-static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data)
+static void vfio_pci_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
device_class_set_legacy_reset(dc, vfio_pci_reset);
- device_class_set_props(dc, vfio_pci_dev_properties);
+ device_class_set_props(dc, vfio_pci_properties);
#ifdef CONFIG_IOMMUFD
object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
#endif
+ dc->vmsd = &vfio_cpr_pci_vmstate;
dc->desc = "VFIO-based PCI device assignment";
- pdc->realize = vfio_realize;
+ pdc->realize = vfio_pci_realize;
object_class_property_set_description(klass, /* 1.3 */
"host",
@@ -3635,29 +3899,44 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data)
"x-migration-multifd-transfer",
"Transfer this device state via "
"multifd channels when live migrating it");
-}
-
-static const TypeInfo vfio_pci_dev_info = {
+ object_class_property_set_description(klass, /* 10.1 */
+ "x-migration-load-config-after-iter",
+ "Start the config load only after "
+ "all iterables were loaded (during "
+ "non-iterables loading phase) when "
+ "doing live migration of device state "
+ "via multifd channels");
+ object_class_property_set_description(klass, /* 10.1 */
+ "x-migration-max-queued-buffers-size",
+ "Maximum size of in-flight VFIO "
+ "device state buffers queued at the "
+ "destination when doing live "
+ "migration of device state via "
+ "multifd channels");
+}
+
+static const TypeInfo vfio_pci_info = {
.name = TYPE_VFIO_PCI,
- .parent = TYPE_VFIO_PCI_BASE,
- .instance_size = sizeof(VFIOPCIDevice),
- .class_init = vfio_pci_dev_class_init,
- .instance_init = vfio_instance_init,
- .instance_finalize = vfio_instance_finalize,
+ .parent = TYPE_VFIO_PCI_DEVICE,
+ .class_init = vfio_pci_class_init,
+ .instance_init = vfio_pci_init,
+ .instance_finalize = vfio_pci_finalize,
};
-static const Property vfio_pci_dev_nohotplug_properties[] = {
+static const Property vfio_pci_nohotplug_properties[] = {
DEFINE_PROP_BOOL("ramfb", VFIOPCIDevice, enable_ramfb, false),
+ DEFINE_PROP_BOOL("use-legacy-x86-rom", VFIOPCIDevice,
+ use_legacy_x86_rom, false),
DEFINE_PROP_ON_OFF_AUTO("x-ramfb-migrate", VFIOPCIDevice, ramfb_migrate,
ON_OFF_AUTO_AUTO),
};
-static void vfio_pci_nohotplug_dev_class_init(ObjectClass *klass,
+static void vfio_pci_nohotplug_class_init(ObjectClass *klass,
const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
- device_class_set_props(dc, vfio_pci_dev_nohotplug_properties);
+ device_class_set_props(dc, vfio_pci_nohotplug_properties);
dc->hotpluggable = false;
object_class_property_set_description(klass, /* 3.1 */
@@ -3668,13 +3947,16 @@ static void vfio_pci_nohotplug_dev_class_init(ObjectClass *klass,
"x-ramfb-migrate",
"Override default migration support for ramfb support "
"(DEBUG)");
+ object_class_property_set_description(klass, /* 10.1 */
+ "use-legacy-x86-rom",
+ "Controls loading of a legacy VGA BIOS ROM");
}
-static const TypeInfo vfio_pci_nohotplug_dev_info = {
+static const TypeInfo vfio_pci_nohotplug_info = {
.name = TYPE_VFIO_PCI_NOHOTPLUG,
.parent = TYPE_VFIO_PCI,
.instance_size = sizeof(VFIOPCIDevice),
- .class_init = vfio_pci_nohotplug_dev_class_init,
+ .class_init = vfio_pci_nohotplug_class_init,
};
static void register_vfio_pci_dev_type(void)
@@ -3690,9 +3972,9 @@ static void register_vfio_pci_dev_type(void)
vfio_pci_migration_multifd_transfer_prop = qdev_prop_on_off_auto;
vfio_pci_migration_multifd_transfer_prop.realized_set_allowed = true;
- type_register_static(&vfio_pci_base_dev_info);
- type_register_static(&vfio_pci_dev_info);
- type_register_static(&vfio_pci_nohotplug_dev_info);
+ type_register_static(&vfio_pci_device_info);
+ type_register_static(&vfio_pci_info);
+ type_register_static(&vfio_pci_nohotplug_info);
}
type_init(register_vfio_pci_dev_type)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 5ce0fb9..0f78cf9 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -14,6 +14,7 @@
#include "system/memory.h"
#include "hw/pci/pci_device.h"
+#include "hw/vfio/types.h"
#include "hw/vfio/vfio-device.h"
#include "hw/vfio/vfio-region.h"
#include "qemu/event_notifier.h"
@@ -116,21 +117,14 @@ typedef struct VFIOMSIXInfo {
uint32_t pba_offset;
unsigned long *pending;
bool noresize;
+ MemoryRegion *pba_region;
} VFIOMSIXInfo;
-/*
- * TYPE_VFIO_PCI_BASE is an abstract type used to share code
- * between VFIO implementations that use a kernel driver
- * with those that use user sockets.
- */
-#define TYPE_VFIO_PCI_BASE "vfio-pci-base"
-OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI_BASE)
-
-#define TYPE_VFIO_PCI "vfio-pci"
-/* TYPE_VFIO_PCI shares struct VFIOPCIDevice. */
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOPCIDevice, VFIO_PCI_DEVICE)
struct VFIOPCIDevice {
- PCIDevice pdev;
+ PCIDevice parent_obj;
+
VFIODevice vbasedev;
VFIOINTx intx;
unsigned int config_size;
@@ -156,6 +150,7 @@ struct VFIOPCIDevice {
uint32_t device_id;
uint32_t sub_vendor_id;
uint32_t sub_device_id;
+ uint32_t class_code;
uint32_t features;
#define VFIO_FEATURE_ENABLE_VGA_BIT 0
#define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
@@ -187,12 +182,14 @@ struct VFIOPCIDevice {
bool no_kvm_ioeventfd;
bool no_vfio_ioeventfd;
bool enable_ramfb;
+ bool use_legacy_x86_rom;
OnOffAuto ramfb_migrate;
bool defer_kvm_irq_routing;
bool clear_parent_atomics_on_exit;
bool skip_vsc_check;
VFIODisplay *dpy;
Notifier irqchip_change_notifier;
+ VFIOPCICPR cpr;
};
/* Use uin32_t for vendor & device so PCI_ANY_ID expands and cannot match hw */
@@ -204,12 +201,25 @@ static inline bool vfio_pci_is(VFIOPCIDevice *vdev, uint32_t vendor, uint32_t de
static inline bool vfio_is_vga(VFIOPCIDevice *vdev)
{
- PCIDevice *pdev = &vdev->pdev;
- uint16_t class = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
+ return (vdev->class_code >> 8) == PCI_CLASS_DISPLAY_VGA;
+}
- return class == PCI_CLASS_DISPLAY_VGA;
+static inline bool vfio_is_base_display(VFIOPCIDevice *vdev)
+{
+ return (vdev->class_code >> 16) == PCI_BASE_CLASS_DISPLAY;
}
+/* MSI/MSI-X/INTx */
+void vfio_pci_vector_init(VFIOPCIDevice *vdev, int nr);
+void vfio_pci_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
+ int vector_n, bool msix);
+void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev);
+void vfio_pci_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev);
+bool vfio_pci_intx_enable(VFIOPCIDevice *vdev, Error **errp);
+void vfio_pci_intx_set_handler(VFIOPCIDevice *vdev, bool enable);
+void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev);
+void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr, bool enable);
+
uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
void vfio_pci_write_config(PCIDevice *pdev,
uint32_t addr, uint32_t val, int len);
@@ -217,6 +227,19 @@ void vfio_pci_write_config(PCIDevice *pdev,
uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size);
void vfio_vga_write(void *opaque, hwaddr addr, uint64_t data, unsigned size);
+/**
+ * vfio_pci_from_vfio_device: Transform from VFIODevice to
+ * VFIOPCIDevice
+ *
+ * This function checks if the given @vbasedev is a VFIO PCI device.
+ * If it is, it returns the containing VFIOPCIDevice.
+ *
+ * @vbasedev: The VFIODevice to transform
+ *
+ * Return: The VFIOPCIDevice on success, NULL on failure.
+ */
+VFIOPCIDevice *vfio_pci_from_vfio_device(VFIODevice *vbasedev);
+void vfio_sub_page_bar_update_mappings(VFIOPCIDevice *vdev);
bool vfio_opt_rom_in_denylist(VFIOPCIDevice *vdev);
bool vfio_config_quirk_setup(VFIOPCIDevice *vdev, Error **errp);
void vfio_vga_quirk_setup(VFIOPCIDevice *vdev);
@@ -248,4 +271,16 @@ void vfio_display_finalize(VFIOPCIDevice *vdev);
extern const VMStateDescription vfio_display_vmstate;
+void vfio_pci_bars_exit(VFIOPCIDevice *vdev);
+bool vfio_pci_add_capabilities(VFIOPCIDevice *vdev, Error **errp);
+void vfio_pci_config_register_vga(VFIOPCIDevice *vdev);
+bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp);
+bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp);
+void vfio_pci_intx_eoi(VFIODevice *vbasedev);
+void vfio_pci_put_device(VFIOPCIDevice *vdev);
+bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp);
+void vfio_pci_register_err_notifier(VFIOPCIDevice *vdev);
+void vfio_pci_register_req_notifier(VFIOPCIDevice *vdev);
+void vfio_pci_teardown_msi(VFIOPCIDevice *vdev);
+
#endif /* HW_VFIO_VFIO_PCI_H */
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
deleted file mode 100644
index 9a21f2e..0000000
--- a/hw/vfio/platform.c
+++ /dev/null
@@ -1,716 +0,0 @@
-/*
- * vfio based device assignment support - platform devices
- *
- * Copyright Linaro Limited, 2014
- *
- * Authors:
- * Kim Phillips <kim.phillips@linaro.org>
- * Eric Auger <eric.auger@linaro.org>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- * Based on vfio based PCI device assignment support:
- * Copyright Red Hat, Inc. 2012
- */
-
-#include "qemu/osdep.h"
-#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
-#include "qapi/error.h"
-#include <sys/ioctl.h>
-#include <linux/vfio.h>
-
-#include "hw/vfio/vfio-platform.h"
-#include "system/iommufd.h"
-#include "migration/vmstate.h"
-#include "qemu/error-report.h"
-#include "qemu/lockable.h"
-#include "qemu/main-loop.h"
-#include "qemu/module.h"
-#include "qemu/range.h"
-#include "system/memory.h"
-#include "system/address-spaces.h"
-#include "qemu/queue.h"
-#include "hw/sysbus.h"
-#include "trace.h"
-#include "hw/irq.h"
-#include "hw/platform-bus.h"
-#include "hw/qdev-properties.h"
-#include "system/kvm.h"
-#include "hw/vfio/vfio-region.h"
-
-/*
- * Functions used whatever the injection method
- */
-
-static inline bool vfio_irq_is_automasked(VFIOINTp *intp)
-{
- return intp->flags & VFIO_IRQ_INFO_AUTOMASKED;
-}
-
-/**
- * vfio_init_intp - allocate, initialize the IRQ struct pointer
- * and add it into the list of IRQs
- * @vbasedev: the VFIO device handle
- * @info: irq info struct retrieved from VFIO driver
- * @errp: error object
- */
-static VFIOINTp *vfio_init_intp(VFIODevice *vbasedev,
- struct vfio_irq_info info, Error **errp)
-{
- int ret;
- VFIOPlatformDevice *vdev =
- container_of(vbasedev, VFIOPlatformDevice, vbasedev);
- SysBusDevice *sbdev = SYS_BUS_DEVICE(vdev);
- VFIOINTp *intp;
-
- intp = g_malloc0(sizeof(*intp));
- intp->vdev = vdev;
- intp->pin = info.index;
- intp->flags = info.flags;
- intp->state = VFIO_IRQ_INACTIVE;
- intp->kvm_accel = false;
-
- sysbus_init_irq(sbdev, &intp->qemuirq);
-
- /* Get an eventfd for trigger */
- intp->interrupt = g_new0(EventNotifier, 1);
- ret = event_notifier_init(intp->interrupt, 0);
- if (ret) {
- g_free(intp->interrupt);
- g_free(intp);
- error_setg_errno(errp, -ret,
- "failed to initialize trigger eventfd notifier");
- return NULL;
- }
- if (vfio_irq_is_automasked(intp)) {
- /* Get an eventfd for resample/unmask */
- intp->unmask = g_new0(EventNotifier, 1);
- ret = event_notifier_init(intp->unmask, 0);
- if (ret) {
- g_free(intp->interrupt);
- g_free(intp->unmask);
- g_free(intp);
- error_setg_errno(errp, -ret,
- "failed to initialize resample eventfd notifier");
- return NULL;
- }
- }
-
- QLIST_INSERT_HEAD(&vdev->intp_list, intp, next);
- return intp;
-}
-
-/**
- * vfio_set_trigger_eventfd - set VFIO eventfd handling
- *
- * @intp: IRQ struct handle
- * @handler: handler to be called on eventfd signaling
- *
- * Setup VFIO signaling and attach an optional user-side handler
- * to the eventfd
- */
-static int vfio_set_trigger_eventfd(VFIOINTp *intp,
- eventfd_user_side_handler_t handler)
-{
- VFIODevice *vbasedev = &intp->vdev->vbasedev;
- int32_t fd = event_notifier_get_fd(intp->interrupt);
- Error *err = NULL;
-
- qemu_set_fd_handler(fd, (IOHandler *)handler, NULL, intp);
-
- if (!vfio_device_irq_set_signaling(vbasedev, intp->pin, 0,
- VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
- error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name);
- qemu_set_fd_handler(fd, NULL, NULL, NULL);
- return -EINVAL;
- }
-
- return 0;
-}
-
-/*
- * Functions only used when eventfds are handled on user-side
- * ie. without irqfd
- */
-
-/**
- * vfio_mmap_set_enabled - enable/disable the fast path mode
- * @vdev: the VFIO platform device
- * @enabled: the target mmap state
- *
- * enabled = true ~ fast path = MMIO region is mmaped (no KVM TRAP);
- * enabled = false ~ slow path = MMIO region is trapped and region callbacks
- * are called; slow path enables to trap the device IRQ status register reset
-*/
-
-static void vfio_mmap_set_enabled(VFIOPlatformDevice *vdev, bool enabled)
-{
- int i;
-
- for (i = 0; i < vdev->vbasedev.num_regions; i++) {
- vfio_region_mmaps_set_enabled(vdev->regions[i], enabled);
- }
-}
-
-/**
- * vfio_intp_mmap_enable - timer function, restores the fast path
- * if there is no more active IRQ
- * @opaque: actually points to the VFIO platform device
- *
- * Called on mmap timer timeout, this function checks whether the
- * IRQ is still active and if not, restores the fast path.
- * by construction a single eventfd is handled at a time.
- * if the IRQ is still active, the timer is re-programmed.
- */
-static void vfio_intp_mmap_enable(void *opaque)
-{
- VFIOINTp *tmp;
- VFIOPlatformDevice *vdev = (VFIOPlatformDevice *)opaque;
-
- QEMU_LOCK_GUARD(&vdev->intp_mutex);
- QLIST_FOREACH(tmp, &vdev->intp_list, next) {
- if (tmp->state == VFIO_IRQ_ACTIVE) {
- trace_vfio_platform_intp_mmap_enable(tmp->pin);
- /* re-program the timer to check active status later */
- timer_mod(vdev->mmap_timer,
- qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
- vdev->mmap_timeout);
- return;
- }
- }
- vfio_mmap_set_enabled(vdev, true);
-}
-
-/**
- * vfio_intp_inject_pending_lockheld - Injects a pending IRQ
- * @opaque: opaque pointer, in practice the VFIOINTp handle
- *
- * The function is called on a previous IRQ completion, from
- * vfio_platform_eoi, while the intp_mutex is locked.
- * Also in such situation, the slow path already is set and
- * the mmap timer was already programmed.
- */
-static void vfio_intp_inject_pending_lockheld(VFIOINTp *intp)
-{
- trace_vfio_platform_intp_inject_pending_lockheld(intp->pin,
- event_notifier_get_fd(intp->interrupt));
-
- intp->state = VFIO_IRQ_ACTIVE;
-
- /* trigger the virtual IRQ */
- qemu_set_irq(intp->qemuirq, 1);
-}
-
-/**
- * vfio_intp_interrupt - The user-side eventfd handler
- * @opaque: opaque pointer which in practice is the VFIOINTp handle
- *
- * the function is entered in event handler context:
- * the vIRQ is injected into the guest if there is no other active
- * or pending IRQ.
- */
-static void vfio_intp_interrupt(VFIOINTp *intp)
-{
- int ret;
- VFIOINTp *tmp;
- VFIOPlatformDevice *vdev = intp->vdev;
- bool delay_handling = false;
-
- QEMU_LOCK_GUARD(&vdev->intp_mutex);
- if (intp->state == VFIO_IRQ_INACTIVE) {
- QLIST_FOREACH(tmp, &vdev->intp_list, next) {
- if (tmp->state == VFIO_IRQ_ACTIVE ||
- tmp->state == VFIO_IRQ_PENDING) {
- delay_handling = true;
- break;
- }
- }
- }
- if (delay_handling) {
- /*
- * the new IRQ gets a pending status and is pushed in
- * the pending queue
- */
- intp->state = VFIO_IRQ_PENDING;
- trace_vfio_intp_interrupt_set_pending(intp->pin);
- QSIMPLEQ_INSERT_TAIL(&vdev->pending_intp_queue,
- intp, pqnext);
- event_notifier_test_and_clear(intp->interrupt);
- return;
- }
-
- trace_vfio_platform_intp_interrupt(intp->pin,
- event_notifier_get_fd(intp->interrupt));
-
- ret = event_notifier_test_and_clear(intp->interrupt);
- if (!ret) {
- error_report("Error when clearing fd=%d (ret = %d)",
- event_notifier_get_fd(intp->interrupt), ret);
- }
-
- intp->state = VFIO_IRQ_ACTIVE;
-
- /* sets slow path */
- vfio_mmap_set_enabled(vdev, false);
-
- /* trigger the virtual IRQ */
- qemu_set_irq(intp->qemuirq, 1);
-
- /*
- * Schedule the mmap timer which will restore fastpath when no IRQ
- * is active anymore
- */
- if (vdev->mmap_timeout) {
- timer_mod(vdev->mmap_timer,
- qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
- vdev->mmap_timeout);
- }
-}
-
-/**
- * vfio_platform_eoi - IRQ completion routine
- * @vbasedev: the VFIO device handle
- *
- * De-asserts the active virtual IRQ and unmasks the physical IRQ
- * (effective for level sensitive IRQ auto-masked by the VFIO driver).
- * Then it handles next pending IRQ if any.
- * eoi function is called on the first access to any MMIO region
- * after an IRQ was triggered, trapped since slow path was set.
- * It is assumed this access corresponds to the IRQ status
- * register reset. With such a mechanism, a single IRQ can be
- * handled at a time since there is no way to know which IRQ
- * was completed by the guest (we would need additional details
- * about the IRQ status register mask).
- */
-static void vfio_platform_eoi(VFIODevice *vbasedev)
-{
- VFIOINTp *intp;
- VFIOPlatformDevice *vdev =
- container_of(vbasedev, VFIOPlatformDevice, vbasedev);
-
- QEMU_LOCK_GUARD(&vdev->intp_mutex);
- QLIST_FOREACH(intp, &vdev->intp_list, next) {
- if (intp->state == VFIO_IRQ_ACTIVE) {
- trace_vfio_platform_eoi(intp->pin,
- event_notifier_get_fd(intp->interrupt));
- intp->state = VFIO_IRQ_INACTIVE;
-
- /* deassert the virtual IRQ */
- qemu_set_irq(intp->qemuirq, 0);
-
- if (vfio_irq_is_automasked(intp)) {
- /* unmasks the physical level-sensitive IRQ */
- vfio_device_irq_unmask(vbasedev, intp->pin);
- }
-
- /* a single IRQ can be active at a time */
- break;
- }
- }
- /* in case there are pending IRQs, handle the first one */
- if (!QSIMPLEQ_EMPTY(&vdev->pending_intp_queue)) {
- intp = QSIMPLEQ_FIRST(&vdev->pending_intp_queue);
- vfio_intp_inject_pending_lockheld(intp);
- QSIMPLEQ_REMOVE_HEAD(&vdev->pending_intp_queue, pqnext);
- }
-}
-
-/**
- * vfio_start_eventfd_injection - starts the virtual IRQ injection using
- * user-side handled eventfds
- * @sbdev: the sysbus device handle
- * @irq: the qemu irq handle
- */
-
-static void vfio_start_eventfd_injection(SysBusDevice *sbdev, qemu_irq irq)
-{
- VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
- VFIOINTp *intp;
-
- QLIST_FOREACH(intp, &vdev->intp_list, next) {
- if (intp->qemuirq == irq) {
- break;
- }
- }
- assert(intp);
-
- if (vfio_set_trigger_eventfd(intp, vfio_intp_interrupt)) {
- abort();
- }
-}
-
-/*
- * Functions used for irqfd
- */
-
-/**
- * vfio_set_resample_eventfd - sets the resamplefd for an IRQ
- * @intp: the IRQ struct handle
- * programs the VFIO driver to unmask this IRQ when the
- * intp->unmask eventfd is triggered
- */
-static int vfio_set_resample_eventfd(VFIOINTp *intp)
-{
- int32_t fd = event_notifier_get_fd(intp->unmask);
- VFIODevice *vbasedev = &intp->vdev->vbasedev;
- Error *err = NULL;
-
- qemu_set_fd_handler(fd, NULL, NULL, NULL);
- if (!vfio_device_irq_set_signaling(vbasedev, intp->pin, 0,
- VFIO_IRQ_SET_ACTION_UNMASK, fd, &err)) {
- error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name);
- return -EINVAL;
- }
- return 0;
-}
-
-/**
- * vfio_start_irqfd_injection - starts the virtual IRQ injection using
- * irqfd
- *
- * @sbdev: the sysbus device handle
- * @irq: the qemu irq handle
- *
- * In case the irqfd setup fails, we fallback to userspace handled eventfd
- */
-static void vfio_start_irqfd_injection(SysBusDevice *sbdev, qemu_irq irq)
-{
- VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
- VFIOINTp *intp;
-
- if (!kvm_irqfds_enabled() || !kvm_resamplefds_enabled() ||
- !vdev->irqfd_allowed) {
- goto fail_irqfd;
- }
-
- QLIST_FOREACH(intp, &vdev->intp_list, next) {
- if (intp->qemuirq == irq) {
- break;
- }
- }
- assert(intp);
-
- if (kvm_irqchip_add_irqfd_notifier(kvm_state, intp->interrupt,
- intp->unmask, irq) < 0) {
- goto fail_irqfd;
- }
-
- if (vfio_set_trigger_eventfd(intp, NULL) < 0) {
- goto fail_vfio;
- }
- if (vfio_irq_is_automasked(intp)) {
- if (vfio_set_resample_eventfd(intp) < 0) {
- goto fail_vfio;
- }
- trace_vfio_platform_start_level_irqfd_injection(intp->pin,
- event_notifier_get_fd(intp->interrupt),
- event_notifier_get_fd(intp->unmask));
- } else {
- trace_vfio_platform_start_edge_irqfd_injection(intp->pin,
- event_notifier_get_fd(intp->interrupt));
- }
-
- intp->kvm_accel = true;
-
- return;
-fail_vfio:
- kvm_irqchip_remove_irqfd_notifier(kvm_state, intp->interrupt, irq);
- abort();
-fail_irqfd:
- vfio_start_eventfd_injection(sbdev, irq);
-}
-
-/* VFIO skeleton */
-
-static void vfio_platform_compute_needs_reset(VFIODevice *vbasedev)
-{
- vbasedev->needs_reset = true;
-}
-
-/* not implemented yet */
-static int vfio_platform_hot_reset_multi(VFIODevice *vbasedev)
-{
- return -1;
-}
-
-/**
- * vfio_populate_device - Allocate and populate MMIO region
- * and IRQ structs according to driver returned information
- * @vbasedev: the VFIO device handle
- * @errp: error object
- *
- */
-static bool vfio_populate_device(VFIODevice *vbasedev, Error **errp)
-{
- VFIOINTp *intp, *tmp;
- int i, ret = -1;
- VFIOPlatformDevice *vdev =
- container_of(vbasedev, VFIOPlatformDevice, vbasedev);
-
- if (!(vbasedev->flags & VFIO_DEVICE_FLAGS_PLATFORM)) {
- error_setg(errp, "this isn't a platform device");
- return false;
- }
-
- vdev->regions = g_new0(VFIORegion *, vbasedev->num_regions);
-
- for (i = 0; i < vbasedev->num_regions; i++) {
- char *name = g_strdup_printf("VFIO %s region %d\n", vbasedev->name, i);
-
- vdev->regions[i] = g_new0(VFIORegion, 1);
- ret = vfio_region_setup(OBJECT(vdev), vbasedev,
- vdev->regions[i], i, name);
- g_free(name);
- if (ret) {
- error_setg_errno(errp, -ret, "failed to get region %d info", i);
- goto reg_error;
- }
- }
-
- vdev->mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
- vfio_intp_mmap_enable, vdev);
-
- QSIMPLEQ_INIT(&vdev->pending_intp_queue);
-
- for (i = 0; i < vbasedev->num_irqs; i++) {
- struct vfio_irq_info irq;
-
- ret = vfio_device_get_irq_info(vbasedev, i, &irq);
-
- if (ret) {
- error_setg_errno(errp, -ret, "failed to get device irq info");
- goto irq_err;
- } else {
- trace_vfio_platform_populate_interrupts(irq.index,
- irq.count,
- irq.flags);
- intp = vfio_init_intp(vbasedev, irq, errp);
- if (!intp) {
- goto irq_err;
- }
- }
- }
- return true;
-irq_err:
- timer_del(vdev->mmap_timer);
- QLIST_FOREACH_SAFE(intp, &vdev->intp_list, next, tmp) {
- QLIST_REMOVE(intp, next);
- g_free(intp);
- }
-reg_error:
- for (i = 0; i < vbasedev->num_regions; i++) {
- if (vdev->regions[i]) {
- vfio_region_finalize(vdev->regions[i]);
- }
- g_free(vdev->regions[i]);
- }
- g_free(vdev->regions);
- return false;
-}
-
-/* specialized functions for VFIO Platform devices */
-static VFIODeviceOps vfio_platform_ops = {
- .vfio_compute_needs_reset = vfio_platform_compute_needs_reset,
- .vfio_hot_reset_multi = vfio_platform_hot_reset_multi,
- .vfio_eoi = vfio_platform_eoi,
-};
-
-/**
- * vfio_base_device_init - perform preliminary VFIO setup
- * @vbasedev: the VFIO device handle
- * @errp: error object
- *
- * Implement the VFIO command sequence that allows to discover
- * assigned device resources: group extraction, device
- * fd retrieval, resource query.
- * Precondition: the device name must be initialized
- */
-static bool vfio_base_device_init(VFIODevice *vbasedev, Error **errp)
-{
- /* @fd takes precedence over @sysfsdev which takes precedence over @host */
- if (vbasedev->fd < 0 && vbasedev->sysfsdev) {
- g_free(vbasedev->name);
- vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
- } else if (vbasedev->fd < 0) {
- if (!vbasedev->name || strchr(vbasedev->name, '/')) {
- error_setg(errp, "wrong host device name");
- return false;
- }
-
- vbasedev->sysfsdev = g_strdup_printf("/sys/bus/platform/devices/%s",
- vbasedev->name);
- }
-
- if (!vfio_device_get_name(vbasedev, errp)) {
- return false;
- }
-
- if (!vfio_device_attach(vbasedev->name, vbasedev,
- &address_space_memory, errp)) {
- return false;
- }
-
- if (vfio_populate_device(vbasedev, errp)) {
- return true;
- }
-
- vfio_device_detach(vbasedev);
- return false;
-}
-
-/**
- * vfio_platform_realize - the device realize function
- * @dev: device state pointer
- * @errp: error
- *
- * initialize the device, its memory regions and IRQ structures
- * IRQ are started separately
- */
-static void vfio_platform_realize(DeviceState *dev, Error **errp)
-{
- ERRP_GUARD();
- VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(dev);
- SysBusDevice *sbdev = SYS_BUS_DEVICE(dev);
- VFIODevice *vbasedev = &vdev->vbasedev;
- int i;
-
- warn_report("-device vfio-platform is deprecated");
- qemu_mutex_init(&vdev->intp_mutex);
-
- trace_vfio_platform_realize(vbasedev->sysfsdev ?
- vbasedev->sysfsdev : vbasedev->name,
- vdev->compat);
-
- if (!vfio_base_device_init(vbasedev, errp)) {
- goto init_err;
- }
-
- if (!vdev->compat) {
- GError *gerr = NULL;
- gchar *contents;
- gsize length;
- char *path;
-
- path = g_strdup_printf("%s/of_node/compatible", vbasedev->sysfsdev);
- if (!g_file_get_contents(path, &contents, &length, &gerr)) {
- error_setg(errp, "%s", gerr->message);
- g_error_free(gerr);
- g_free(path);
- return;
- }
- g_free(path);
- vdev->compat = contents;
- for (vdev->num_compat = 0; length; vdev->num_compat++) {
- size_t skip = strlen(contents) + 1;
- contents += skip;
- length -= skip;
- }
- }
-
- for (i = 0; i < vbasedev->num_regions; i++) {
- if (vfio_region_mmap(vdev->regions[i])) {
- warn_report("%s mmap unsupported, performance may be slow",
- memory_region_name(vdev->regions[i]->mem));
- }
- sysbus_init_mmio(sbdev, vdev->regions[i]->mem);
- }
- return;
-
-init_err:
- if (vdev->vbasedev.name) {
- error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
- } else {
- error_prepend(errp, "vfio error: ");
- }
-}
-
-static const VMStateDescription vfio_platform_vmstate = {
- .name = "vfio-platform",
- .unmigratable = 1,
-};
-
-static const Property vfio_platform_dev_properties[] = {
- DEFINE_PROP_STRING("host", VFIOPlatformDevice, vbasedev.name),
- DEFINE_PROP_STRING("sysfsdev", VFIOPlatformDevice, vbasedev.sysfsdev),
- DEFINE_PROP_BOOL("x-no-mmap", VFIOPlatformDevice, vbasedev.no_mmap, false),
- DEFINE_PROP_UINT32("mmap-timeout-ms", VFIOPlatformDevice,
- mmap_timeout, 1100),
- DEFINE_PROP_BOOL("x-irqfd", VFIOPlatformDevice, irqfd_allowed, true),
-#ifdef CONFIG_IOMMUFD
- DEFINE_PROP_LINK("iommufd", VFIOPlatformDevice, vbasedev.iommufd,
- TYPE_IOMMUFD_BACKEND, IOMMUFDBackend *),
-#endif
-};
-
-static void vfio_platform_instance_init(Object *obj)
-{
- VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj);
- VFIODevice *vbasedev = &vdev->vbasedev;
-
- vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PLATFORM, &vfio_platform_ops,
- DEVICE(vdev), false);
-}
-
-#ifdef CONFIG_IOMMUFD
-static void vfio_platform_set_fd(Object *obj, const char *str, Error **errp)
-{
- vfio_device_set_fd(&VFIO_PLATFORM_DEVICE(obj)->vbasedev, str, errp);
-}
-#endif
-
-static void vfio_platform_class_init(ObjectClass *klass, const void *data)
-{
- DeviceClass *dc = DEVICE_CLASS(klass);
- SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
-
- dc->realize = vfio_platform_realize;
- device_class_set_props(dc, vfio_platform_dev_properties);
-#ifdef CONFIG_IOMMUFD
- object_class_property_add_str(klass, "fd", NULL, vfio_platform_set_fd);
-#endif
- dc->vmsd = &vfio_platform_vmstate;
- dc->desc = "VFIO-based platform device assignment";
- sbc->connect_irq_notifier = vfio_start_irqfd_injection;
- set_bit(DEVICE_CATEGORY_MISC, dc->categories);
-
- object_class_property_set_description(klass, /* 2.4 */
- "host",
- "Host device name of assigned device");
- object_class_property_set_description(klass, /* 2.4 and 2.5 */
- "x-no-mmap",
- "Disable MMAP for device. Allows to trace MMIO "
- "accesses (DEBUG)");
- object_class_property_set_description(klass, /* 2.4 */
- "mmap-timeout-ms",
- "When EOI is not provided by KVM/QEMU, wait time "
- "(milliseconds) to re-enable device direct access "
- "after level interrupt (DEBUG)");
- object_class_property_set_description(klass, /* 2.4 */
- "x-irqfd",
- "Allow disabling irqfd support (DEBUG)");
- object_class_property_set_description(klass, /* 2.6 */
- "sysfsdev",
- "Host sysfs path of assigned device");
-#ifdef CONFIG_IOMMUFD
- object_class_property_set_description(klass, /* 9.0 */
- "iommufd",
- "Set host IOMMUFD backend device");
-#endif
-}
-
-static const TypeInfo vfio_platform_dev_info = {
- .name = TYPE_VFIO_PLATFORM,
- .parent = TYPE_DYNAMIC_SYS_BUS_DEVICE,
- .instance_size = sizeof(VFIOPlatformDevice),
- .instance_init = vfio_platform_instance_init,
- .class_init = vfio_platform_class_init,
- .class_size = sizeof(VFIOPlatformDeviceClass),
-};
-
-static void register_vfio_platform_dev_type(void)
-{
- type_register_static(&vfio_platform_dev_info);
-}
-
-type_init(register_vfio_platform_dev_type)
diff --git a/hw/vfio/region.c b/hw/vfio/region.c
index 34752c3..b165ab0 100644
--- a/hw/vfio/region.c
+++ b/hw/vfio/region.c
@@ -66,7 +66,7 @@ void vfio_region_write(void *opaque, hwaddr addr,
}
ret = vbasedev->io_ops->region_write(vbasedev, region->nr,
- addr, size, &buf);
+ addr, size, &buf, region->post_wr);
if (ret != size) {
error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
",%d) failed: %s",
@@ -200,6 +200,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
region->size = info->size;
region->fd_offset = info->offset;
region->nr = index;
+ region->post_wr = false;
if (region->size) {
region->mem = g_new0(MemoryRegion, 1);
@@ -241,6 +242,7 @@ int vfio_region_mmap(VFIORegion *region)
{
int i, ret, prot = 0;
char *name;
+ int fd;
if (!region->mem) {
return 0;
@@ -271,14 +273,15 @@ int vfio_region_mmap(VFIORegion *region)
goto no_mmap;
}
+ fd = vfio_device_get_region_fd(region->vbasedev, region->nr);
+
map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align);
munmap(map_base, map_align - map_base);
munmap(map_align + region->mmaps[i].size,
align - (map_align - map_base));
region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot,
- MAP_SHARED | MAP_FIXED,
- region->vbasedev->fd,
+ MAP_SHARED | MAP_FIXED, fd,
region->fd_offset +
region->mmaps[i].offset);
if (region->mmaps[i].mmap == MAP_FAILED) {
@@ -362,12 +365,9 @@ void vfio_region_finalize(VFIORegion *region)
for (i = 0; i < region->nr_mmaps; i++) {
if (region->mmaps[i].mmap) {
munmap(region->mmaps[i].mmap, region->mmaps[i].size);
- object_unparent(OBJECT(&region->mmaps[i].mem));
}
}
- object_unparent(OBJECT(region->mem));
-
g_free(region->mem);
g_free(region->mmaps);
diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
index 564b70e..0f23681 100644
--- a/hw/vfio/spapr.c
+++ b/hw/vfio/spapr.c
@@ -15,9 +15,8 @@
#include "system/hostmem.h"
#include "system/address-spaces.h"
-#include "hw/vfio/vfio-container.h"
+#include "hw/vfio/vfio-container-legacy.h"
#include "hw/hw.h"
-#include "system/ram_addr.h"
#include "qemu/error-report.h"
#include "qapi/error.h"
#include "trace.h"
@@ -30,12 +29,13 @@ typedef struct VFIOHostDMAWindow {
QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next;
} VFIOHostDMAWindow;
-typedef struct VFIOSpaprContainer {
- VFIOContainer container;
+struct VFIOSpaprContainer {
+ VFIOLegacyContainer parent_obj;
+
MemoryListener prereg_listener;
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
unsigned int levels;
-} VFIOSpaprContainer;
+};
OBJECT_DECLARE_SIMPLE_TYPE(VFIOSpaprContainer, VFIO_IOMMU_SPAPR);
@@ -61,8 +61,8 @@ static void vfio_prereg_listener_region_add(MemoryListener *listener,
{
VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer,
prereg_listener);
- VFIOContainer *container = &scontainer->container;
- VFIOContainerBase *bcontainer = &container->bcontainer;
+ VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(scontainer);
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
const hwaddr gpa = section->offset_within_address_space;
hwaddr end;
int ret;
@@ -121,7 +121,7 @@ static void vfio_prereg_listener_region_del(MemoryListener *listener,
{
VFIOSpaprContainer *scontainer = container_of(listener, VFIOSpaprContainer,
prereg_listener);
- VFIOContainer *container = &scontainer->container;
+ VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(scontainer);
const hwaddr gpa = section->offset_within_address_space;
hwaddr end;
int ret;
@@ -218,7 +218,7 @@ static VFIOHostDMAWindow *vfio_find_hostwin(VFIOSpaprContainer *container,
return hostwin_found ? hostwin : NULL;
}
-static int vfio_spapr_remove_window(VFIOContainer *container,
+static int vfio_spapr_remove_window(VFIOLegacyContainer *container,
hwaddr offset_within_address_space)
{
struct vfio_iommu_spapr_tce_remove remove = {
@@ -239,14 +239,13 @@ static int vfio_spapr_remove_window(VFIOContainer *container,
return 0;
}
-static bool vfio_spapr_create_window(VFIOContainer *container,
+static bool vfio_spapr_create_window(VFIOLegacyContainer *container,
MemoryRegionSection *section,
hwaddr *pgsize, Error **errp)
{
int ret = 0;
- VFIOContainerBase *bcontainer = &container->bcontainer;
- VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
- container);
+ VFIOContainer *bcontainer = VFIO_IOMMU(container);
+ VFIOSpaprContainer *scontainer = VFIO_IOMMU_SPAPR(bcontainer);
IOMMUMemoryRegion *iommu_mr = IOMMU_MEMORY_REGION(section->mr);
uint64_t pagesize = memory_region_iommu_get_min_page_size(iommu_mr), pgmask;
unsigned entries, bits_total, bits_per_level, max_levels, ddw_levels;
@@ -348,14 +347,12 @@ static bool vfio_spapr_create_window(VFIOContainer *container,
}
static bool
-vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer,
+vfio_spapr_container_add_section_window(VFIOContainer *bcontainer,
MemoryRegionSection *section,
Error **errp)
{
- VFIOContainer *container = container_of(bcontainer, VFIOContainer,
- bcontainer);
- VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
- container);
+ VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+ VFIOSpaprContainer *scontainer = VFIO_IOMMU_SPAPR(container);
VFIOHostDMAWindow *hostwin;
hwaddr pgsize = 0;
int ret;
@@ -440,13 +437,11 @@ vfio_spapr_container_add_section_window(VFIOContainerBase *bcontainer,
}
static void
-vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer,
+vfio_spapr_container_del_section_window(VFIOContainer *bcontainer,
MemoryRegionSection *section)
{
- VFIOContainer *container = container_of(bcontainer, VFIOContainer,
- bcontainer);
- VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
- container);
+ VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+ VFIOSpaprContainer *scontainer = VFIO_IOMMU_SPAPR(container);
if (container->iommu_type != VFIO_SPAPR_TCE_v2_IOMMU) {
return;
@@ -463,12 +458,10 @@ vfio_spapr_container_del_section_window(VFIOContainerBase *bcontainer,
}
}
-static void vfio_spapr_container_release(VFIOContainerBase *bcontainer)
+static void vfio_spapr_container_release(VFIOContainer *bcontainer)
{
- VFIOContainer *container = container_of(bcontainer, VFIOContainer,
- bcontainer);
- VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
- container);
+ VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+ VFIOSpaprContainer *scontainer = VFIO_IOMMU_SPAPR(container);
VFIOHostDMAWindow *hostwin, *next;
if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
@@ -481,13 +474,11 @@ static void vfio_spapr_container_release(VFIOContainerBase *bcontainer)
}
}
-static bool vfio_spapr_container_setup(VFIOContainerBase *bcontainer,
+static bool vfio_spapr_container_setup(VFIOContainer *bcontainer,
Error **errp)
{
- VFIOContainer *container = container_of(bcontainer, VFIOContainer,
- bcontainer);
- VFIOSpaprContainer *scontainer = container_of(container, VFIOSpaprContainer,
- container);
+ VFIOLegacyContainer *container = VFIO_IOMMU_LEGACY(bcontainer);
+ VFIOSpaprContainer *scontainer = VFIO_IOMMU_SPAPR(container);
struct vfio_iommu_spapr_tce_info info;
bool v2 = container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU;
int ret, fd = container->fd;
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index e90ec9b..1e89544 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -1,8 +1,10 @@
# See docs/devel/tracing.rst for syntax documentation.
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
# pci.c
vfio_intx_interrupt(const char *name, char line) " (%s) Pin %c"
-vfio_intx_eoi(const char *name) " (%s) EOI"
+vfio_pci_intx_eoi(const char *name) " (%s) EOI"
vfio_intx_enable_kvm(const char *name) " (%s) KVM INTx accel enabled"
vfio_intx_disable_kvm(const char *name) " (%s) KVM INTx accel disabled"
vfio_intx_update(const char *name, int new_irq, int target_irq) " (%s) IRQ moved %d -> %d"
@@ -35,8 +37,8 @@ vfio_pci_hot_reset(const char *name, const char *type) " (%s) %s"
vfio_pci_hot_reset_has_dep_devices(const char *name) "%s: hot reset dependent devices:"
vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int group_id) "\t%04x:%02x:%02x.%x group %d"
vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s"
-vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device '%s' config: size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
-vfio_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s"
+vfio_pci_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device '%s' config: size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
+vfio_pci_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s"
vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d"
vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x"
vfio_pci_reset(const char *name) " (%s)"
@@ -46,6 +48,7 @@ vfio_pci_emulated_vendor_id(const char *name, uint16_t val) "%s 0x%04x"
vfio_pci_emulated_device_id(const char *name, uint16_t val) "%s 0x%04x"
vfio_pci_emulated_sub_vendor_id(const char *name, uint16_t val) "%s 0x%04x"
vfio_pci_emulated_sub_device_id(const char *name, uint16_t val) "%s 0x%04x"
+vfio_pci_emulated_class_code(const char *name, uint32_t val) "%s 0x%06x"
# pci-quirks.c
vfio_quirk_rom_in_denylist(const char *name, uint16_t vid, uint16_t did) "%s %04x:%04x"
@@ -101,15 +104,14 @@ vfio_device_dirty_tracking_update(uint64_t start, uint64_t end, uint64_t min, ui
vfio_device_dirty_tracking_start(int nr_ranges, uint64_t min32, uint64_t max32, uint64_t min64, uint64_t max64, uint64_t minpci, uint64_t maxpci) "nr_ranges %d 32:[0x%"PRIx64" - 0x%"PRIx64"], 64:[0x%"PRIx64" - 0x%"PRIx64"], pci64:[0x%"PRIx64" - 0x%"PRIx64"]"
vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64
-# container-base.c
-vfio_container_query_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64
-
# container.c
+vfio_container_query_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t translated_addr, uint64_t dirty_pages) "iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" gpa=0x%"PRIx64" dirty_pages=%"PRIu64
+
+# container-legacy.c
vfio_container_disconnect(int fd) "close container->fd=%d"
vfio_group_put(int fd) "close group->fd=%d"
vfio_device_get(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u"
vfio_device_put(int fd) "close vdev->fd=%d"
-vfio_legacy_dma_unmap_overflow_workaround(void) ""
# region.c
vfio_region_write(const char *name, int index, uint64_t addr, uint64_t data, unsigned size) " (%s:region%d+0x%"PRIx64", 0x%"PRIx64 ", %d)"
@@ -124,17 +126,6 @@ vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Re
vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries"
vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]"
-# platform.c
-vfio_platform_realize(char *name, char *compat) "vfio device %s, compat = %s"
-vfio_platform_eoi(int pin, int fd) "EOI IRQ pin %d (fd=%d)"
-vfio_platform_intp_mmap_enable(int pin) "IRQ #%d still active, stay in slow path"
-vfio_platform_intp_interrupt(int pin, int fd) "Inject IRQ #%d (fd = %d)"
-vfio_platform_intp_inject_pending_lockheld(int pin, int fd) "Inject pending IRQ #%d (fd = %d)"
-vfio_platform_populate_interrupts(int pin, int count, int flags) "- IRQ index %d: count %d, flags=0x%x"
-vfio_intp_interrupt_set_pending(int index) "irq %d is set PENDING"
-vfio_platform_start_level_irqfd_injection(int index, int fd, int resamplefd) "IRQ index=%d, fd = %d, resamplefd = %d"
-vfio_platform_start_edge_irqfd_injection(int index, int fd) "IRQ index=%d, fd = %d"
-
# spapr.c
vfio_prereg_listener_region_add_skip(uint64_t start, uint64_t end) "0x%"PRIx64" - 0x%"PRIx64
vfio_prereg_listener_region_del_skip(uint64_t start, uint64_t end) "0x%"PRIx64" - 0x%"PRIx64
@@ -195,6 +186,9 @@ iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD con
iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d"
iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d"
+# cpr-iommufd.c
+vfio_cpr_find_device(uint32_t ioas_id, int devid, uint32_t hwpt_id) "ioas_id %u, devid %d, hwpt_id %u"
+
# device.c
vfio_device_get_region_info_type(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x"
vfio_device_reset_handler(void) ""
diff --git a/hw/vfio/trace.h b/hw/vfio/trace.h
index 5a343aa..b34b61d 100644
--- a/hw/vfio/trace.h
+++ b/hw/vfio/trace.h
@@ -1 +1,4 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
#include "trace/trace-hw_vfio.h"
diff --git a/hw/vfio/types.h b/hw/vfio/types.h
new file mode 100644
index 0000000..5482d90
--- /dev/null
+++ b/hw/vfio/types.h
@@ -0,0 +1,23 @@
+/*
+ * VFIO types definition
+ *
+ * Copyright Red Hat, Inc. 2025
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef HW_VFIO_VFIO_TYPES_H
+#define HW_VFIO_VFIO_TYPES_H
+
+/*
+ * TYPE_VFIO_PCI_DEVICE is an abstract type used to share code
+ * between VFIO implementations that use a kernel driver
+ * with those that use user sockets.
+ */
+#define TYPE_VFIO_PCI_DEVICE "vfio-pci-device"
+
+#define TYPE_VFIO_PCI "vfio-pci"
+/* TYPE_VFIO_PCI shares struct VFIOPCIDevice. */
+
+#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
+
+#endif /* HW_VFIO_VFIO_TYPES_H */
diff --git a/hw/vfio/vfio-cpr.h b/hw/vfio/vfio-cpr.h
deleted file mode 100644
index 134b83a..0000000
--- a/hw/vfio/vfio-cpr.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * VFIO CPR
- *
- * Copyright (c) 2025 Oracle and/or its affiliates.
- *
- * SPDX-License-Identifier: GPL-2.0-or-later
- */
-
-#ifndef HW_VFIO_CPR_H
-#define HW_VFIO_CPR_H
-
-bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp);
-void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer);
-
-#endif /* HW_VFIO_CPR_H */
diff --git a/hw/vfio/vfio-helpers.h b/hw/vfio/vfio-helpers.h
index 54a327f..ce31758 100644
--- a/hw/vfio/vfio-helpers.h
+++ b/hw/vfio/vfio-helpers.h
@@ -32,4 +32,6 @@ struct vfio_device_info *vfio_get_device_info(int fd);
int vfio_kvm_device_add_fd(int fd, Error **errp);
int vfio_kvm_device_del_fd(int fd, Error **errp);
+bool vfio_arch_wants_loading_config_after_iter(void);
+
#endif /* HW_VFIO_VFIO_HELPERS_H */
diff --git a/hw/vfio/vfio-iommufd.h b/hw/vfio/vfio-iommufd.h
index 07ea0f4..6b28e1f 100644
--- a/hw/vfio/vfio-iommufd.h
+++ b/hw/vfio/vfio-iommufd.h
@@ -9,7 +9,7 @@
#ifndef HW_VFIO_VFIO_IOMMUFD_H
#define HW_VFIO_VFIO_IOMMUFD_H
-#include "hw/vfio/vfio-container-base.h"
+#include "hw/vfio/vfio-container.h"
typedef struct VFIODevice VFIODevice;
@@ -22,12 +22,13 @@ typedef struct VFIOIOASHwpt {
typedef struct IOMMUFDBackend IOMMUFDBackend;
-typedef struct VFIOIOMMUFDContainer {
- VFIOContainerBase bcontainer;
+struct VFIOIOMMUFDContainer {
+ VFIOContainer parent_obj;
+
IOMMUFDBackend *be;
uint32_t ioas_id;
QLIST_HEAD(, VFIOIOASHwpt) hwpt_list;
-} VFIOIOMMUFDContainer;
+};
OBJECT_DECLARE_SIMPLE_TYPE(VFIOIOMMUFDContainer, VFIO_IOMMU_IOMMUFD);
diff --git a/hw/vfio/vfio-listener.h b/hw/vfio/vfio-listener.h
index eb69ddd..a90674c 100644
--- a/hw/vfio/vfio-listener.h
+++ b/hw/vfio/vfio-listener.h
@@ -9,7 +9,7 @@
#ifndef HW_VFIO_VFIO_LISTENER_H
#define HW_VFIO_VFIO_LISTENER_H
-bool vfio_listener_register(VFIOContainerBase *bcontainer, Error **errp);
-void vfio_listener_unregister(VFIOContainerBase *bcontainer);
+bool vfio_listener_register(VFIOContainer *bcontainer, Error **errp);
+void vfio_listener_unregister(VFIOContainer *bcontainer);
#endif /* HW_VFIO_VFIO_LISTENER_H */
diff --git a/hw/vfio/vfio-migration-internal.h b/hw/vfio/vfio-migration-internal.h
index a8b456b..814fbd9 100644
--- a/hw/vfio/vfio-migration-internal.h
+++ b/hw/vfio/vfio-migration-internal.h
@@ -13,7 +13,6 @@
#include <linux/vfio.h>
#endif
-#include "qemu/typedefs.h"
#include "qemu/notify.h"
/*
@@ -32,6 +31,7 @@
#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL)
#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL)
#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL)
+#define VFIO_MIG_FLAG_DEV_CONFIG_LOAD_READY (0xffffffffef100006ULL)
typedef struct VFIODevice VFIODevice;
typedef struct VFIOMultifd VFIOMultifd;
diff --git a/hw/vfio/vfio-region.h b/hw/vfio/vfio-region.h
new file mode 100644
index 0000000..ede6e0c
--- /dev/null
+++ b/hw/vfio/vfio-region.h
@@ -0,0 +1,48 @@
+/*
+ * VFIO region
+ *
+ * Copyright Red Hat, Inc. 2025
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VFIO_REGION_H
+#define HW_VFIO_REGION_H
+
+#include "system/memory.h"
+
+typedef struct VFIOMmap {
+ MemoryRegion mem;
+ void *mmap;
+ off_t offset;
+ size_t size;
+} VFIOMmap;
+
+typedef struct VFIODevice VFIODevice;
+
+typedef struct VFIORegion {
+ struct VFIODevice *vbasedev;
+ off_t fd_offset; /* offset of region within device fd */
+ MemoryRegion *mem; /* slow, read/write access */
+ size_t size;
+ uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
+ uint32_t nr_mmaps;
+ VFIOMmap *mmaps;
+ uint8_t nr; /* cache the region number for debug */
+ bool post_wr; /* writes can be posted */
+} VFIORegion;
+
+
+void vfio_region_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size);
+uint64_t vfio_region_read(void *opaque,
+ hwaddr addr, unsigned size);
+int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
+ int index, const char *name);
+int vfio_region_mmap(VFIORegion *region);
+void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled);
+void vfio_region_unmap(VFIORegion *region);
+void vfio_region_exit(VFIORegion *region);
+void vfio_region_finalize(VFIORegion *region);
+
+#endif /* HW_VFIO_REGION_H */
diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig
index 7648a2d..10f5c53 100644
--- a/hw/virtio/Kconfig
+++ b/hw/virtio/Kconfig
@@ -126,3 +126,8 @@ config VHOST_USER_SCMI
bool
default y
depends on VIRTIO && VHOST_USER && ARM
+
+config VHOST_USER_TEST
+ bool
+ default y
+ depends on VIRTIO && VHOST_USER
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
index 164f6fd..48b9fed 100644
--- a/hw/virtio/meson.build
+++ b/hw/virtio/meson.build
@@ -1,6 +1,7 @@
system_virtio_ss = ss.source_set()
system_virtio_ss.add(files('virtio-bus.c'))
system_virtio_ss.add(files('iothread-vq-mapping.c'))
+system_virtio_ss.add(files('virtio-config-io.c'))
system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c'))
system_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c'))
system_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c'))
@@ -10,18 +11,18 @@ system_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')
specific_virtio_ss = ss.source_set()
specific_virtio_ss.add(files('virtio.c'))
-specific_virtio_ss.add(files('virtio-config-io.c', 'virtio-qmp.c'))
+specific_virtio_ss.add(files('virtio-qmp.c'))
if have_vhost
system_virtio_ss.add(files('vhost.c'))
- specific_virtio_ss.add(files('vhost-backend.c', 'vhost-iova-tree.c'))
+ system_virtio_ss.add(files('vhost-backend.c', 'vhost-iova-tree.c'))
if have_vhost_user
# fixme - this really should be generic
specific_virtio_ss.add(files('vhost-user.c'))
system_virtio_ss.add(files('vhost-user-base.c'))
# MMIO Stubs
- system_virtio_ss.add(files('vhost-user-device.c'))
+ system_virtio_ss.add(when: 'CONFIG_VHOST_USER_TEST', if_true: files('vhost-user-test-device.c'))
system_virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c'))
system_virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user-i2c.c'))
system_virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c'))
@@ -29,7 +30,8 @@ if have_vhost
system_virtio_ss.add(when: 'CONFIG_VHOST_USER_INPUT', if_true: files('vhost-user-input.c'))
# PCI Stubs
- system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('vhost-user-device-pci.c'))
+ system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_TEST'],
+ if_true: files('vhost-user-test-device-pci.c'))
system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'],
if_true: files('vhost-user-gpio-pci.c'))
system_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_I2C'],
@@ -43,22 +45,22 @@ if have_vhost
endif
if have_vhost_vdpa
system_virtio_ss.add(files('vhost-vdpa.c'))
- specific_virtio_ss.add(files('vhost-shadow-virtqueue.c'))
+ system_virtio_ss.add(files('vhost-shadow-virtqueue.c'))
endif
else
system_virtio_ss.add(files('vhost-stub.c'))
endif
+system_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.c'))
+system_virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c'))
specific_virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))
specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: files('vhost-user-fs.c'))
specific_virtio_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem.c'))
specific_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock.c'))
-specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.c'))
-specific_virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c'))
-specific_virtio_ss.add(when: 'CONFIG_VIRTIO_NSM', if_true: [files('virtio-nsm.c', 'cbor-helpers.c'), libcbor])
specific_virtio_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem.c'))
-specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_SCMI', if_true: files('vhost-user-scmi.c'))
-specific_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_SCMI'], if_true: files('vhost-user-scmi-pci.c'))
+system_virtio_ss.add(when: 'CONFIG_VIRTIO_NSM', if_true: files('virtio-nsm.c'))
+system_virtio_ss.add(when: 'CONFIG_VIRTIO_NSM', if_true: [files('cbor-helpers.c'), libcbor])
+system_virtio_ss.add(when: 'CONFIG_VHOST_USER_SCMI', if_true: files('vhost-user-scmi.c'))
virtio_pci_ss = ss.source_set()
virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c'))
@@ -67,6 +69,7 @@ virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_BLK', if_true: files('vhost-user-blk-
virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_SCSI', if_true: files('vhost-user-scsi-pci.c'))
virtio_pci_ss.add(when: 'CONFIG_VHOST_SCSI', if_true: files('vhost-scsi-pci.c'))
virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_FS', if_true: files('vhost-user-fs-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VHOST_USER_SCMI', if_true: files('vhost-user-scmi-pci.c'))
virtio_pci_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto-pci.c'))
virtio_pci_ss.add(when: 'CONFIG_VIRTIO_INPUT_HOST', if_true: files('virtio-input-host-pci.c'))
@@ -85,7 +88,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem-pci.c'))
virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev-pci.c'))
virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MD', if_true: files('virtio-md-pci.c'))
-specific_virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss)
+system_virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss)
system_ss.add_all(when: 'CONFIG_VIRTIO', if_true: system_virtio_ss)
system_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c'))
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 76f0d45..658cc36 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -75,7 +75,6 @@ virtqueue_flush(void *vq, unsigned int count) "vq %p count %u"
virtqueue_pop(void *vq, void *elem, unsigned int in_num, unsigned int out_num) "vq %p elem %p in_num %u out_num %u"
virtio_queue_notify(void *vdev, int n, void *vq) "vdev %p n %d vq %p"
virtio_notify_irqfd_deferred_fn(void *vdev, void *vq) "vdev %p vq %p"
-virtio_notify_irqfd(void *vdev, void *vq) "vdev %p vq %p"
virtio_notify(void *vdev, void *vq) "vdev %p vq %p"
virtio_set_status(void *vdev, uint8_t val) "vdev %p val %u"
diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
index d1da40a..4a7b970 100644
--- a/hw/virtio/vdpa-dev.c
+++ b/hw/virtio/vdpa-dev.c
@@ -338,6 +338,12 @@ static int vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status)
return 0;
}
+static struct vhost_dev *vhost_vdpa_device_get_vhost(VirtIODevice *vdev)
+{
+ VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev);
+ return &s->dev;
+}
+
static const Property vhost_vdpa_device_properties[] = {
DEFINE_PROP_STRING("vhostdev", VhostVdpaDevice, vhostdev),
DEFINE_PROP_UINT16("queue-size", VhostVdpaDevice, queue_size, 0),
@@ -369,6 +375,7 @@ static void vhost_vdpa_device_class_init(ObjectClass *klass, const void *data)
vdc->set_config = vhost_vdpa_device_set_config;
vdc->get_features = vhost_vdpa_device_get_features;
vdc->set_status = vhost_vdpa_device_set_status;
+ vdc->get_vhost = vhost_vdpa_device_get_vhost;
}
static void vhost_vdpa_device_instance_init(Object *obj)
diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
index 833804d..4367db0 100644
--- a/hw/virtio/vhost-backend.c
+++ b/hw/virtio/vhost-backend.c
@@ -20,6 +20,11 @@
#include <linux/vhost.h>
#include <sys/ioctl.h>
+struct vhost_features {
+ uint64_t count;
+ uint64_t features[VIRTIO_FEATURES_NU64S];
+};
+
static int vhost_kernel_call(struct vhost_dev *dev, unsigned long int request,
void *arg)
{
@@ -182,12 +187,6 @@ static int vhost_kernel_get_vring_worker(struct vhost_dev *dev,
return vhost_kernel_call(dev, VHOST_GET_VRING_WORKER, worker);
}
-static int vhost_kernel_set_features(struct vhost_dev *dev,
- uint64_t features)
-{
- return vhost_kernel_call(dev, VHOST_SET_FEATURES, &features);
-}
-
static int vhost_kernel_set_backend_cap(struct vhost_dev *dev)
{
uint64_t features;
@@ -210,10 +209,51 @@ static int vhost_kernel_set_backend_cap(struct vhost_dev *dev)
return 0;
}
-static int vhost_kernel_get_features(struct vhost_dev *dev,
- uint64_t *features)
+static int vhost_kernel_set_features(struct vhost_dev *dev,
+ const uint64_t *features)
{
- return vhost_kernel_call(dev, VHOST_GET_FEATURES, features);
+ struct vhost_features farray;
+ bool extended_in_use;
+ int r;
+
+ farray.count = VIRTIO_FEATURES_NU64S;
+ virtio_features_copy(farray.features, features);
+ extended_in_use = virtio_features_use_ex(farray.features);
+
+ /*
+ * Can't check for ENOTTY: for unknown ioctls the kernel interprets
+ * the argument as a virtio queue id and most likely errors out validating
+ * such id, instead of reporting an unknown operation.
+ */
+ r = vhost_kernel_call(dev, VHOST_SET_FEATURES_ARRAY, &farray);
+ if (!r) {
+ return 0;
+ }
+
+ if (extended_in_use) {
+ error_report("Trying to set extended features without kernel support");
+ return -EINVAL;
+ }
+ return vhost_kernel_call(dev, VHOST_SET_FEATURES, &farray.features[0]);
+}
+
+static int vhost_kernel_get_features(struct vhost_dev *dev, uint64_t *features)
+{
+ struct vhost_features farray;
+ int r;
+
+ farray.count = VIRTIO_FEATURES_NU64S;
+ r = vhost_kernel_call(dev, VHOST_GET_FEATURES_ARRAY, &farray);
+ if (r) {
+ memset(&farray, 0, sizeof(farray));
+ r = vhost_kernel_call(dev, VHOST_GET_FEATURES, &farray.features[0]);
+ }
+ if (r) {
+ return r;
+ }
+
+ virtio_features_copy(features, farray.features);
+ return 0;
}
static int vhost_kernel_set_owner(struct vhost_dev *dev)
@@ -341,8 +381,8 @@ const VhostOps kernel_ops = {
.vhost_attach_vring_worker = vhost_kernel_attach_vring_worker,
.vhost_new_worker = vhost_kernel_new_worker,
.vhost_free_worker = vhost_kernel_free_worker,
- .vhost_set_features = vhost_kernel_set_features,
- .vhost_get_features = vhost_kernel_get_features,
+ .vhost_set_features_ex = vhost_kernel_set_features,
+ .vhost_get_features_ex = vhost_kernel_get_features,
.vhost_set_backend_cap = vhost_kernel_set_backend_cap,
.vhost_set_owner = vhost_kernel_set_owner,
.vhost_get_vq_index = vhost_kernel_get_vq_index,
diff --git a/hw/virtio/vhost-user-device-pci.c b/hw/virtio/vhost-user-test-device-pci.c
index f10bac8..b4ed0ef 100644
--- a/hw/virtio/vhost-user-device-pci.c
+++ b/hw/virtio/vhost-user-test-device-pci.c
@@ -18,13 +18,13 @@ struct VHostUserDevicePCI {
VHostUserBase vub;
};
-#define TYPE_VHOST_USER_DEVICE_PCI "vhost-user-device-pci-base"
+#define TYPE_VHOST_USER_TEST_DEVICE_PCI "vhost-user-test-device-pci-base"
-OBJECT_DECLARE_SIMPLE_TYPE(VHostUserDevicePCI, VHOST_USER_DEVICE_PCI)
+OBJECT_DECLARE_SIMPLE_TYPE(VHostUserDevicePCI, VHOST_USER_TEST_DEVICE_PCI)
static void vhost_user_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
{
- VHostUserDevicePCI *dev = VHOST_USER_DEVICE_PCI(vpci_dev);
+ VHostUserDevicePCI *dev = VHOST_USER_TEST_DEVICE_PCI(vpci_dev);
DeviceState *vdev = DEVICE(&dev->vub);
vpci_dev->nvectors = 1;
@@ -38,9 +38,6 @@ static void vhost_user_device_pci_class_init(ObjectClass *klass,
VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
- /* Reason: stop users confusing themselves */
- dc->user_creatable = false;
-
k->realize = vhost_user_device_pci_realize;
set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
@@ -51,15 +48,15 @@ static void vhost_user_device_pci_class_init(ObjectClass *klass,
static void vhost_user_device_pci_instance_init(Object *obj)
{
- VHostUserDevicePCI *dev = VHOST_USER_DEVICE_PCI(obj);
+ VHostUserDevicePCI *dev = VHOST_USER_TEST_DEVICE_PCI(obj);
virtio_instance_init_common(obj, &dev->vub, sizeof(dev->vub),
- TYPE_VHOST_USER_DEVICE);
+ TYPE_VHOST_USER_TEST_DEVICE);
}
static const VirtioPCIDeviceTypeInfo vhost_user_device_pci_info = {
- .base_name = TYPE_VHOST_USER_DEVICE_PCI,
- .non_transitional_name = "vhost-user-device-pci",
+ .base_name = TYPE_VHOST_USER_TEST_DEVICE_PCI,
+ .non_transitional_name = "vhost-user-test-device-pci",
.instance_size = sizeof(VHostUserDevicePCI),
.instance_init = vhost_user_device_pci_instance_init,
.class_init = vhost_user_device_pci_class_init,
diff --git a/hw/virtio/vhost-user-device.c b/hw/virtio/vhost-user-test-device.c
index 3939bdf..1b98ea3 100644
--- a/hw/virtio/vhost-user-device.c
+++ b/hw/virtio/vhost-user-test-device.c
@@ -1,5 +1,5 @@
/*
- * Generic vhost-user-device implementation for any vhost-user-backend
+ * Generic vhost-user-test-device implementation for any vhost-user-backend
*
* This is a concrete implementation of vhost-user-base which can be
* configured via properties. It is useful for development and
@@ -25,7 +25,7 @@
*/
static const VMStateDescription vud_vmstate = {
- .name = "vhost-user-device",
+ .name = "vhost-user-test-device",
.unmigratable = 1,
};
@@ -41,16 +41,13 @@ static void vud_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
- /* Reason: stop inexperienced users confusing themselves */
- dc->user_creatable = false;
-
device_class_set_props(dc, vud_properties);
dc->vmsd = &vud_vmstate;
set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
}
static const TypeInfo vud_info = {
- .name = TYPE_VHOST_USER_DEVICE,
+ .name = TYPE_VHOST_USER_TEST_DEVICE,
.parent = TYPE_VHOST_USER_BASE,
.class_init = vud_class_init,
};
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 1e1d6b0..36c9c2e 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -2039,7 +2039,10 @@ static int vhost_user_postcopy_advise(struct vhost_dev *dev, Error **errp)
error_setg(errp, "%s: Failed to get ufd", __func__);
return -EIO;
}
- qemu_socket_set_nonblock(ufd);
+ if (!qemu_set_blocking(ufd, false, errp)) {
+ close(ufd);
+ return -EINVAL;
+ }
/* register ufd with userfault thread */
u->postcopy_fd.fd = ufd;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 1ab2c11..7061b6e 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -209,6 +209,8 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
int ret;
Int128 llend;
Error *local_err = NULL;
+ MemoryRegion *mr;
+ hwaddr xlat;
if (iotlb->target_as != &address_space_memory) {
error_report("Wrong target AS \"%s\", only system memory is allowed",
@@ -228,11 +230,14 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
bool read_only;
- if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL,
- &local_err)) {
+ mr = memory_translate_iotlb(iotlb, &xlat, &local_err);
+ if (!mr) {
error_report_err(local_err);
return;
}
+ vaddr = memory_region_get_ram_ptr(mr) + xlat;
+ read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly;
+
ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova,
iotlb->addr_mask + 1, vaddr, read_only);
if (ret) {
@@ -594,6 +599,36 @@ static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v)
v->shadow_vqs = g_steal_pointer(&shadow_vqs);
}
+static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
+{
+ struct vhost_vdpa *v = dev->opaque;
+
+ uint64_t features;
+ uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
+ 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
+ 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID |
+ 0x1ULL << VHOST_BACKEND_F_SUSPEND;
+ int r;
+
+ if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
+ return -EFAULT;
+ }
+
+ features &= f;
+
+ if (vhost_vdpa_first_dev(dev)) {
+ r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
+ if (r) {
+ return -EFAULT;
+ }
+ }
+
+ dev->backend_cap = features;
+ v->shared->backend_cap = features;
+
+ return 0;
+}
+
static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
{
struct vhost_vdpa *v = opaque;
@@ -603,7 +638,12 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
v->dev = dev;
dev->opaque = opaque ;
- v->shared->listener = vhost_vdpa_memory_listener;
+
+ ret = vhost_vdpa_set_backend_cap(dev);
+ if (unlikely(ret != 0)) {
+ return ret;
+ }
+
vhost_vdpa_init_svq(dev, v);
error_propagate(&dev->migration_blocker, v->migration_blocker);
@@ -639,6 +679,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
VIRTIO_CONFIG_S_DRIVER);
+ v->shared->listener = vhost_vdpa_memory_listener;
return 0;
}
@@ -841,36 +882,6 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev,
return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
}
-static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
-{
- struct vhost_vdpa *v = dev->opaque;
-
- uint64_t features;
- uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
- 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
- 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID |
- 0x1ULL << VHOST_BACKEND_F_SUSPEND;
- int r;
-
- if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
- return -EFAULT;
- }
-
- features &= f;
-
- if (vhost_vdpa_first_dev(dev)) {
- r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
- if (r) {
- return -EFAULT;
- }
- }
-
- dev->backend_cap = features;
- v->shared->backend_cap = features;
-
- return 0;
-}
-
static int vhost_vdpa_get_device_id(struct vhost_dev *dev,
uint32_t *device_id)
{
@@ -888,8 +899,14 @@ static int vhost_vdpa_reset_device(struct vhost_dev *dev)
ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
trace_vhost_vdpa_reset_device(dev);
+ if (ret) {
+ return ret;
+ }
+
+ memory_listener_unregister(&v->shared->listener);
+ v->shared->listener_registered = false;
v->suspended = false;
- return ret;
+ return 0;
}
static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
@@ -1373,7 +1390,15 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
"IOMMU and try again");
return -1;
}
- memory_listener_register(&v->shared->listener, dev->vdev->dma_as);
+ if (v->shared->listener_registered &&
+ dev->vdev->dma_as != v->shared->listener.address_space) {
+ memory_listener_unregister(&v->shared->listener);
+ v->shared->listener_registered = false;
+ }
+ if (!v->shared->listener_registered) {
+ memory_listener_register(&v->shared->listener, dev->vdev->dma_as);
+ v->shared->listener_registered = true;
+ }
return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
}
@@ -1383,8 +1408,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
static void vhost_vdpa_reset_status(struct vhost_dev *dev)
{
- struct vhost_vdpa *v = dev->opaque;
-
if (!vhost_vdpa_last_dev(dev)) {
return;
}
@@ -1392,7 +1415,6 @@ static void vhost_vdpa_reset_status(struct vhost_dev *dev)
vhost_vdpa_reset_device(dev);
vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
VIRTIO_CONFIG_S_DRIVER);
- memory_listener_unregister(&v->shared->listener);
}
static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
@@ -1526,12 +1548,27 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev,
static int vhost_vdpa_set_owner(struct vhost_dev *dev)
{
+ int r;
+ struct vhost_vdpa *v;
+
if (!vhost_vdpa_first_dev(dev)) {
return 0;
}
trace_vhost_vdpa_set_owner(dev);
- return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
+ r = vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
+ if (unlikely(r < 0)) {
+ return r;
+ }
+
+ /*
+ * Being optimistic and listening address space memory. If the device
+ * uses vIOMMU, it is changed at vhost_vdpa_dev_start.
+ */
+ v = dev->opaque;
+ memory_listener_register(&v->shared->listener, &address_space_memory);
+ v->shared->listener_registered = true;
+ return 0;
}
static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev,
@@ -1563,7 +1600,6 @@ const VhostOps vdpa_ops = {
.vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
.vhost_set_vring_call = vhost_vdpa_set_vring_call,
.vhost_get_features = vhost_vdpa_get_features,
- .vhost_set_backend_cap = vhost_vdpa_set_backend_cap,
.vhost_set_owner = vhost_vdpa_set_owner,
.vhost_set_vring_endian = NULL,
.vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c
index 6e40888..107d88b 100644
--- a/hw/virtio/vhost-vsock.c
+++ b/hw/virtio/vhost-vsock.c
@@ -147,9 +147,7 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp)
return;
}
- if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) {
- error_setg_errno(errp, errno,
- "vhost-vsock: unable to set non-blocking mode");
+ if (!qemu_set_blocking(vhostfd, false, errp)) {
return;
}
} else {
@@ -160,9 +158,7 @@ static void vhost_vsock_device_realize(DeviceState *dev, Error **errp)
return;
}
- if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) {
- error_setg_errno(errp, errno,
- "Failed to set FD nonblocking");
+ if (!qemu_set_blocking(vhostfd, false, errp)) {
return;
}
}
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index fc43853..266a115 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -27,6 +27,7 @@
#include "migration/blocker.h"
#include "migration/qemu-file-types.h"
#include "system/dma.h"
+#include "system/memory.h"
#include "trace.h"
/* enabled until disconnected backend stabilizes */
@@ -47,12 +48,6 @@ static struct vhost_log *vhost_log[VHOST_BACKEND_TYPE_MAX];
static struct vhost_log *vhost_log_shm[VHOST_BACKEND_TYPE_MAX];
static QLIST_HEAD(, vhost_dev) vhost_log_devs[VHOST_BACKEND_TYPE_MAX];
-/* Memslots used by backends that support private memslots (without an fd). */
-static unsigned int used_memslots;
-
-/* Memslots used by backends that only support shared memslots (with an fd). */
-static unsigned int used_shared_memslots;
-
static QLIST_HEAD(, vhost_dev) vhost_devices =
QLIST_HEAD_INITIALIZER(vhost_devices);
@@ -74,15 +69,15 @@ unsigned int vhost_get_free_memslots(void)
QLIST_FOREACH(hdev, &vhost_devices, entry) {
unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev);
- unsigned int cur_free;
+ unsigned int cur_free = r - hdev->mem->nregions;
- if (hdev->vhost_ops->vhost_backend_no_private_memslots &&
- hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) {
- cur_free = r - used_shared_memslots;
+ if (unlikely(r < hdev->mem->nregions)) {
+ warn_report_once("used (%u) vhost backend memory slots exceed"
+ " the device limit (%u).", hdev->mem->nregions, r);
+ free = 0;
} else {
- cur_free = r - used_memslots;
+ free = MIN(free, cur_free);
}
- free = MIN(free, cur_free);
}
return free;
}
@@ -461,7 +456,8 @@ static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
hwaddr *plen, bool is_write)
{
if (!vhost_dev_has_iommu(dev)) {
- return cpu_physical_memory_map(addr, plen, is_write);
+ return address_space_map(dev->vdev->dma_as, addr, plen, is_write,
+ MEMTXATTRS_UNSPECIFIED);
} else {
return (void *)(uintptr_t)addr;
}
@@ -472,7 +468,8 @@ static void vhost_memory_unmap(struct vhost_dev *dev, void *buffer,
hwaddr access_len)
{
if (!vhost_dev_has_iommu(dev)) {
- cpu_physical_memory_unmap(buffer, len, is_write, access_len);
+ address_space_unmap(dev->vdev->dma_as, buffer, len, is_write,
+ access_len);
}
}
@@ -666,13 +663,6 @@ static void vhost_commit(MemoryListener *listener)
dev->mem = g_realloc(dev->mem, regions_size);
dev->mem->nregions = dev->n_mem_sections;
- if (dev->vhost_ops->vhost_backend_no_private_memslots &&
- dev->vhost_ops->vhost_backend_no_private_memslots(dev)) {
- used_shared_memslots = dev->mem->nregions;
- } else {
- used_memslots = dev->mem->nregions;
- }
-
for (i = 0; i < dev->n_mem_sections; i++) {
struct vhost_memory_region *cur_vmr = dev->mem->regions + i;
struct MemoryRegionSection *mrs = dev->mem_sections + i;
@@ -985,20 +975,34 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
static int vhost_dev_set_features(struct vhost_dev *dev,
bool enable_log)
{
- uint64_t features = dev->acked_features;
+ uint64_t features[VIRTIO_FEATURES_NU64S];
int r;
+
+ virtio_features_copy(features, dev->acked_features_ex);
if (enable_log) {
- features |= 0x1ULL << VHOST_F_LOG_ALL;
+ virtio_add_feature_ex(features, VHOST_F_LOG_ALL);
}
if (!vhost_dev_has_iommu(dev)) {
- features &= ~(0x1ULL << VIRTIO_F_IOMMU_PLATFORM);
+ virtio_clear_feature_ex(features, VIRTIO_F_IOMMU_PLATFORM);
}
if (dev->vhost_ops->vhost_force_iommu) {
if (dev->vhost_ops->vhost_force_iommu(dev) == true) {
- features |= 0x1ULL << VIRTIO_F_IOMMU_PLATFORM;
+ virtio_add_feature_ex(features, VIRTIO_F_IOMMU_PLATFORM);
}
}
- r = dev->vhost_ops->vhost_set_features(dev, features);
+
+ if (virtio_features_use_ex(features) &&
+ !dev->vhost_ops->vhost_set_features_ex) {
+ r = -EINVAL;
+ VHOST_OPS_DEBUG(r, "extended features without device support");
+ goto out;
+ }
+
+ if (dev->vhost_ops->vhost_set_features_ex) {
+ r = dev->vhost_ops->vhost_set_features_ex(dev, features);
+ } else {
+ r = dev->vhost_ops->vhost_set_features(dev, features[0]);
+ }
if (r < 0) {
VHOST_OPS_DEBUG(r, "vhost_set_features failed");
goto out;
@@ -1123,7 +1127,8 @@ static bool vhost_log_global_start(MemoryListener *listener, Error **errp)
r = vhost_migration_log(listener, true);
if (r < 0) {
- abort();
+ error_setg_errno(errp, -r, "vhost: Failed to start logging");
+ return false;
}
return true;
}
@@ -1134,7 +1139,8 @@ static void vhost_log_global_stop(MemoryListener *listener)
r = vhost_migration_log(listener, false);
if (r < 0) {
- abort();
+ /* Not fatal, so report it, but take no further action */
+ warn_report("vhost: Failed to stop logging");
}
}
@@ -1367,25 +1373,30 @@ fail_alloc_desc:
return r;
}
-int vhost_virtqueue_stop(struct vhost_dev *dev,
- struct VirtIODevice *vdev,
- struct vhost_virtqueue *vq,
- unsigned idx)
+static int do_vhost_virtqueue_stop(struct vhost_dev *dev,
+ struct VirtIODevice *vdev,
+ struct vhost_virtqueue *vq,
+ unsigned idx, bool force)
{
int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
struct vhost_vring_state state = {
.index = vhost_vq_index,
};
- int r;
+ int r = 0;
if (virtio_queue_get_desc_addr(vdev, idx) == 0) {
/* Don't stop the virtqueue which might have not been started */
return 0;
}
- r = dev->vhost_ops->vhost_get_vring_base(dev, &state);
- if (r < 0) {
- VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r);
+ if (!force) {
+ r = dev->vhost_ops->vhost_get_vring_base(dev, &state);
+ if (r < 0) {
+ VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r);
+ }
+ }
+
+ if (r < 0 || force) {
/* Connection to the backend is broken, so let's sync internal
* last avail idx to the device used idx.
*/
@@ -1414,6 +1425,14 @@ int vhost_virtqueue_stop(struct vhost_dev *dev,
return r;
}
+int vhost_virtqueue_stop(struct vhost_dev *dev,
+ struct VirtIODevice *vdev,
+ struct vhost_virtqueue *vq,
+ unsigned idx)
+{
+ return do_vhost_virtqueue_stop(dev, vdev, vq, idx, false);
+}
+
static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev,
int n, uint32_t timeout)
{
@@ -1506,12 +1525,27 @@ static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
}
}
+static int vhost_dev_get_features(struct vhost_dev *hdev,
+ uint64_t *features)
+{
+ uint64_t features64;
+ int r;
+
+ if (hdev->vhost_ops->vhost_get_features_ex) {
+ return hdev->vhost_ops->vhost_get_features_ex(hdev, features);
+ }
+
+ r = hdev->vhost_ops->vhost_get_features(hdev, &features64);
+ virtio_features_from_u64(features, features64);
+ return r;
+}
+
int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
VhostBackendType backend_type, uint32_t busyloop_timeout,
Error **errp)
{
+ uint64_t features[VIRTIO_FEATURES_NU64S];
unsigned int used, reserved, limit;
- uint64_t features;
int i, r, n_initialized_vqs = 0;
hdev->vdev = NULL;
@@ -1531,7 +1565,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
goto fail;
}
- r = hdev->vhost_ops->vhost_get_features(hdev, &features);
+ r = vhost_dev_get_features(hdev, features);
if (r < 0) {
error_setg_errno(errp, -r, "vhost_get_features failed");
goto fail;
@@ -1569,7 +1603,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
}
}
- hdev->features = features;
+ virtio_features_copy(hdev->features_ex, features);
hdev->memory_listener = (MemoryListener) {
.name = "vhost",
@@ -1592,7 +1626,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
};
if (hdev->migration_blocker == NULL) {
- if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) {
+ if (!virtio_has_feature_ex(hdev->features_ex, VHOST_F_LOG_ALL)) {
error_setg(&hdev->migration_blocker,
"Migration disabled: vhost lacks VHOST_F_LOG_ALL feature.");
} else if (vhost_dev_log_is_shared(hdev) && !qemu_memfd_alloc_check()) {
@@ -1619,15 +1653,11 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);
/*
- * The listener we registered properly updated the corresponding counter.
- * So we can trust that these values are accurate.
+ * The listener we registered properly setup the number of required
+ * memslots in vhost_commit().
*/
- if (hdev->vhost_ops->vhost_backend_no_private_memslots &&
- hdev->vhost_ops->vhost_backend_no_private_memslots(hdev)) {
- used = used_shared_memslots;
- } else {
- used = used_memslots;
- }
+ used = hdev->mem->nregions;
+
/*
* We assume that all reserved memslots actually require a real memslot
* in our vhost backend. This might not be true, for example, if the
@@ -1819,7 +1849,7 @@ void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask)
int r;
EventNotifier *notifier =
&hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier;
- EventNotifier *config_notifier = &vdev->config_notifier;
+ EventNotifier *config_notifier = virtio_config_get_guest_notifier(vdev);
assert(hdev->vhost_ops);
if ((hdev->started == false) ||
@@ -1850,39 +1880,40 @@ static void vhost_stop_config_intr(struct vhost_dev *dev)
static void vhost_start_config_intr(struct vhost_dev *dev)
{
int r;
+ EventNotifier *config_notifier =
+ virtio_config_get_guest_notifier(dev->vdev);
assert(dev->vhost_ops);
- int fd = event_notifier_get_fd(&dev->vdev->config_notifier);
+ int fd = event_notifier_get_fd(config_notifier);
if (dev->vhost_ops->vhost_set_config_call) {
r = dev->vhost_ops->vhost_set_config_call(dev, fd);
if (!r) {
- event_notifier_set(&dev->vdev->config_notifier);
+ event_notifier_set(config_notifier);
}
}
}
-uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
- uint64_t features)
+void vhost_get_features_ex(struct vhost_dev *hdev,
+ const int *feature_bits,
+ uint64_t *features)
{
const int *bit = feature_bits;
+
while (*bit != VHOST_INVALID_FEATURE_BIT) {
- uint64_t bit_mask = (1ULL << *bit);
- if (!(hdev->features & bit_mask)) {
- features &= ~bit_mask;
+ if (!virtio_has_feature_ex(hdev->features_ex, *bit)) {
+ virtio_clear_feature_ex(features, *bit);
}
bit++;
}
- return features;
}
-void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
- uint64_t features)
+void vhost_ack_features_ex(struct vhost_dev *hdev, const int *feature_bits,
+ const uint64_t *features)
{
const int *bit = feature_bits;
while (*bit != VHOST_INVALID_FEATURE_BIT) {
- uint64_t bit_mask = (1ULL << *bit);
- if (features & bit_mask) {
- hdev->acked_features |= bit_mask;
+ if (virtio_has_feature_ex(features, *bit)) {
+ virtio_add_feature_ex(hdev->acked_features_ex, *bit);
}
bit++;
}
@@ -2136,16 +2167,18 @@ fail_features:
}
/* Host notifiers must be enabled at this point. */
-int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
+static int do_vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev,
+ bool vrings, bool force)
{
int i;
int rc = 0;
+ EventNotifier *config_notifier = virtio_config_get_guest_notifier(vdev);
/* should only be called after backend is connected */
assert(hdev->vhost_ops);
event_notifier_test_and_clear(
&hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier);
- event_notifier_test_and_clear(&vdev->config_notifier);
+ event_notifier_test_and_clear(config_notifier);
event_notifier_cleanup(
&hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier);
@@ -2158,10 +2191,11 @@ int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
vhost_dev_set_vring_enable(hdev, false);
}
for (i = 0; i < hdev->nvqs; ++i) {
- rc |= vhost_virtqueue_stop(hdev,
- vdev,
- hdev->vqs + i,
- hdev->vq_index + i);
+ rc |= do_vhost_virtqueue_stop(hdev,
+ vdev,
+ hdev->vqs + i,
+ hdev->vq_index + i,
+ force);
}
if (hdev->vhost_ops->vhost_reset_status) {
hdev->vhost_ops->vhost_reset_status(hdev);
@@ -2181,6 +2215,17 @@ int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
return rc;
}
+int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
+{
+ return do_vhost_dev_stop(hdev, vdev, vrings, false);
+}
+
+int vhost_dev_force_stop(struct vhost_dev *hdev, VirtIODevice *vdev,
+ bool vrings)
+{
+ return do_vhost_dev_stop(hdev, vdev, vrings, true);
+}
+
int vhost_net_set_backend(struct vhost_dev *hdev,
struct vhost_vring_file *file)
{
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index db787d0..02cdd80 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -23,6 +23,7 @@
#include "hw/qdev-properties.h"
#include "hw/boards.h"
#include "system/balloon.h"
+#include "system/ramblock.h"
#include "hw/virtio/virtio-balloon.h"
#include "system/address-spaces.h"
#include "qapi/error.h"
diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
index 11adfbf..cef944e 100644
--- a/hw/virtio/virtio-bus.c
+++ b/hw/virtio/virtio-bus.c
@@ -62,9 +62,14 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp)
}
/* Get the features of the plugged device. */
- assert(vdc->get_features != NULL);
- vdev->host_features = vdc->get_features(vdev, vdev->host_features,
- &local_err);
+ if (vdc->get_features_ex) {
+ vdc->get_features_ex(vdev, vdev->host_features_ex, &local_err);
+ } else {
+ assert(vdc->get_features != NULL);
+ virtio_features_from_u64(vdev->host_features_ex,
+ vdc->get_features(vdev, vdev->host_features,
+ &local_err));
+ }
if (local_err) {
error_propagate(errp, local_err);
return;
diff --git a/hw/virtio/virtio-config-io.c b/hw/virtio/virtio-config-io.c
index ad78e0b..f58d90b 100644
--- a/hw/virtio/virtio-config-io.c
+++ b/hw/virtio/virtio-config-io.c
@@ -11,7 +11,6 @@
#include "qemu/osdep.h"
#include "hw/virtio/virtio.h"
-#include "cpu.h"
uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
{
diff --git a/hw/virtio/virtio-hmp-cmds.c b/hw/virtio/virtio-hmp-cmds.c
index 7d8677b..1daae48 100644
--- a/hw/virtio/virtio-hmp-cmds.c
+++ b/hw/virtio/virtio-hmp-cmds.c
@@ -74,7 +74,8 @@ static void hmp_virtio_dump_features(Monitor *mon,
}
if (features->has_unknown_dev_features) {
- monitor_printf(mon, " unknown-features(0x%016"PRIx64")\n",
+ monitor_printf(mon, " unknown-features(0x%016"PRIx64"%016"PRIx64")\n",
+ features->unknown_dev_features2,
features->unknown_dev_features);
}
}
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index a3d1a67..15ba679 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -17,6 +17,7 @@
#include "qemu/units.h"
#include "system/numa.h"
#include "system/system.h"
+#include "system/ramblock.h"
#include "system/reset.h"
#include "system/runstate.h"
#include "hw/virtio/virtio.h"
@@ -24,7 +25,6 @@
#include "hw/virtio/virtio-mem.h"
#include "qapi/error.h"
#include "qapi/visitor.h"
-#include "system/ram_addr.h"
#include "migration/misc.h"
#include "hw/boards.h"
#include "hw/qdev-properties.h"
@@ -244,28 +244,6 @@ static int virtio_mem_for_each_plugged_range(VirtIOMEM *vmem, void *arg,
return ret;
}
-/*
- * Adjust the memory section to cover the intersection with the given range.
- *
- * Returns false if the intersection is empty, otherwise returns true.
- */
-static bool virtio_mem_intersect_memory_section(MemoryRegionSection *s,
- uint64_t offset, uint64_t size)
-{
- uint64_t start = MAX(s->offset_within_region, offset);
- uint64_t end = MIN(s->offset_within_region + int128_get64(s->size),
- offset + size);
-
- if (end <= start) {
- return false;
- }
-
- s->offset_within_address_space += start - s->offset_within_region;
- s->offset_within_region = start;
- s->size = int128_make64(end - start);
- return true;
-}
-
typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg);
static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
@@ -287,7 +265,7 @@ static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
first_bit + 1) - 1;
size = (last_bit - first_bit + 1) * vmem->block_size;
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
break;
}
ret = cb(&tmp, arg);
@@ -319,7 +297,7 @@ static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem,
first_bit + 1) - 1;
size = (last_bit - first_bit + 1) * vmem->block_size;
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
break;
}
ret = cb(&tmp, arg);
@@ -355,7 +333,7 @@ static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
MemoryRegionSection tmp = *rdl->section;
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
continue;
}
rdl->notify_discard(rdl, &tmp);
@@ -371,7 +349,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
MemoryRegionSection tmp = *rdl->section;
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
continue;
}
ret = rdl->notify_populate(rdl, &tmp);
@@ -388,7 +366,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
if (rdl2 == rdl) {
break;
}
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
continue;
}
rdl2->notify_discard(rdl2, &tmp);
@@ -1070,6 +1048,17 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
}
/*
+ * Set ourselves as RamDiscardManager before the plug handler maps the
+ * memory region and exposes it via an address space.
+ */
+ if (memory_region_set_ram_discard_manager(&vmem->memdev->mr,
+ RAM_DISCARD_MANAGER(vmem))) {
+ error_setg(errp, "Failed to set RamDiscardManager");
+ ram_block_coordinated_discard_require(false);
+ return;
+ }
+
+ /*
* We don't know at this point whether shared RAM is migrated using
* QEMU or migrated using the file content. "x-ignore-shared" will be
* configured after realizing the device. So in case we have an
@@ -1083,6 +1072,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
if (ret) {
error_setg_errno(errp, -ret, "Unexpected error discarding RAM");
+ memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
ram_block_coordinated_discard_require(false);
return;
}
@@ -1144,13 +1134,6 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
vmem->system_reset = VIRTIO_MEM_SYSTEM_RESET(obj);
vmem->system_reset->vmem = vmem;
qemu_register_resettable(obj);
-
- /*
- * Set ourselves as RamDiscardManager before the plug handler maps the
- * memory region and exposes it via an address space.
- */
- memory_region_set_ram_discard_manager(&vmem->memdev->mr,
- RAM_DISCARD_MANAGER(vmem));
}
static void virtio_mem_device_unrealize(DeviceState *dev)
@@ -1158,12 +1141,6 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOMEM *vmem = VIRTIO_MEM(dev);
- /*
- * The unplug handler unmapped the memory region, it cannot be
- * found via an address space anymore. Unset ourselves.
- */
- memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
-
qemu_unregister_resettable(OBJECT(vmem->system_reset));
object_unref(OBJECT(vmem->system_reset));
@@ -1176,6 +1153,11 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
virtio_del_queue(vdev, 0);
virtio_cleanup(vdev);
g_free(vmem->bitmap);
+ /*
+ * The unplug handler unmapped the memory region, it cannot be
+ * found via an address space anymore. Unset ourselves.
+ */
+ memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
ram_block_coordinated_discard_require(false);
}
@@ -1750,7 +1732,7 @@ static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
}
struct VirtIOMEMReplayData {
- void *fn;
+ ReplayRamDiscardState fn;
void *opaque;
};
@@ -1758,12 +1740,12 @@ static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
{
struct VirtIOMEMReplayData *data = arg;
- return ((ReplayRamPopulate)data->fn)(s, data->opaque);
+ return data->fn(s, data->opaque);
}
static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
MemoryRegionSection *s,
- ReplayRamPopulate replay_fn,
+ ReplayRamDiscardState replay_fn,
void *opaque)
{
const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
@@ -1782,14 +1764,13 @@ static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s,
{
struct VirtIOMEMReplayData *data = arg;
- ((ReplayRamDiscard)data->fn)(s, data->opaque);
- return 0;
+ return data->fn(s, data->opaque);
}
-static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
- MemoryRegionSection *s,
- ReplayRamDiscard replay_fn,
- void *opaque)
+static int virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
+ MemoryRegionSection *s,
+ ReplayRamDiscardState replay_fn,
+ void *opaque)
{
const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
struct VirtIOMEMReplayData data = {
@@ -1798,8 +1779,8 @@ static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
};
g_assert(s->mr == &vmem->memdev->mr);
- virtio_mem_for_each_unplugged_section(vmem, s, &data,
- virtio_mem_rdm_replay_discarded_cb);
+ return virtio_mem_for_each_unplugged_section(vmem, s, &data,
+ virtio_mem_rdm_replay_discarded_cb);
}
static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 532c671..fb58c36 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -34,6 +34,7 @@
#include "qemu/error-report.h"
#include "qemu/log.h"
#include "trace.h"
+#include "qapi/error.h"
static bool virtio_mmio_ioeventfd_enabled(DeviceState *d)
{
@@ -612,14 +613,14 @@ static void virtio_mmio_save_extra_state(DeviceState *opaque, QEMUFile *f)
{
VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
- vmstate_save_state(f, &vmstate_virtio_mmio, proxy, NULL);
+ vmstate_save_state(f, &vmstate_virtio_mmio, proxy, NULL, &error_fatal);
}
static int virtio_mmio_load_extra_state(DeviceState *opaque, QEMUFile *f)
{
VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
- return vmstate_load_state(f, &vmstate_virtio_mmio, proxy, 1);
+ return vmstate_load_state(f, &vmstate_virtio_mmio, proxy, 1, &error_fatal);
}
static bool virtio_mmio_has_extra_state(DeviceState *opaque)
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 9b48aa8..937e22f 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -30,9 +30,11 @@
#include "qemu/error-report.h"
#include "qemu/log.h"
#include "qemu/module.h"
+#include "qemu/bswap.h"
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
#include "hw/loader.h"
+#include "system/accel-irq.h"
#include "system/kvm.h"
#include "hw/virtio/virtio-pci.h"
#include "qemu/range.h"
@@ -108,6 +110,29 @@ static const VMStateDescription vmstate_virtio_pci_modern_queue_state = {
}
};
+static bool virtio_pci_modern_state_features128_needed(void *opaque)
+{
+ VirtIOPCIProxy *proxy = opaque;
+ uint32_t features = 0;
+ int i;
+
+ for (i = 2; i < ARRAY_SIZE(proxy->guest_features); ++i) {
+ features |= proxy->guest_features[i];
+ }
+ return features;
+}
+
+static const VMStateDescription vmstate_virtio_pci_modern_state_features128 = {
+ .name = "virtio_pci/modern_state/features128",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = &virtio_pci_modern_state_features128_needed,
+ .fields = (const VMStateField[]) {
+ VMSTATE_UINT32_SUB_ARRAY(guest_features, VirtIOPCIProxy, 2, 2),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static bool virtio_pci_modern_state_needed(void *opaque)
{
VirtIOPCIProxy *proxy = opaque;
@@ -115,6 +140,12 @@ static bool virtio_pci_modern_state_needed(void *opaque)
return virtio_pci_modern(proxy);
}
+/*
+ * Avoid silently breaking migration should the feature space increase
+ * even more in the (far away) future
+ */
+QEMU_BUILD_BUG_ON(VIRTIO_FEATURES_NU32S != 4);
+
static const VMStateDescription vmstate_virtio_pci_modern_state_sub = {
.name = "virtio_pci/modern_state",
.version_id = 1,
@@ -123,11 +154,15 @@ static const VMStateDescription vmstate_virtio_pci_modern_state_sub = {
.fields = (const VMStateField[]) {
VMSTATE_UINT32(dfselect, VirtIOPCIProxy),
VMSTATE_UINT32(gfselect, VirtIOPCIProxy),
- VMSTATE_UINT32_ARRAY(guest_features, VirtIOPCIProxy, 2),
+ VMSTATE_UINT32_SUB_ARRAY(guest_features, VirtIOPCIProxy, 0, 2),
VMSTATE_STRUCT_ARRAY(vqs, VirtIOPCIProxy, VIRTIO_QUEUE_MAX, 0,
vmstate_virtio_pci_modern_queue_state,
VirtIOPCIQueue),
VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription * const []) {
+ &vmstate_virtio_pci_modern_state_features128,
+ NULL
}
};
@@ -146,23 +181,21 @@ static const VMStateDescription vmstate_virtio_pci = {
static bool virtio_pci_has_extra_state(DeviceState *d)
{
- VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
-
- return proxy->flags & VIRTIO_PCI_FLAG_MIGRATE_EXTRA;
+ return true;
}
static void virtio_pci_save_extra_state(DeviceState *d, QEMUFile *f)
{
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
- vmstate_save_state(f, &vmstate_virtio_pci, proxy, NULL);
+ vmstate_save_state(f, &vmstate_virtio_pci, proxy, NULL, &error_fatal);
}
static int virtio_pci_load_extra_state(DeviceState *d, QEMUFile *f)
{
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
- return vmstate_load_state(f, &vmstate_virtio_pci, proxy, 1);
+ return vmstate_load_state(f, &vmstate_virtio_pci, proxy, 1, &error_fatal);
}
static void virtio_pci_save_queue(DeviceState *d, int n, QEMUFile *f)
@@ -826,11 +859,11 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
if (irqfd->users == 0) {
KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state);
- ret = kvm_irqchip_add_msi_route(&c, vector, &proxy->pci_dev);
+ ret = accel_irqchip_add_msi_route(&c, vector, &proxy->pci_dev);
if (ret < 0) {
return ret;
}
- kvm_irqchip_commit_route_changes(&c);
+ accel_irqchip_commit_route_changes(&c);
irqfd->virq = ret;
}
irqfd->users++;
@@ -842,7 +875,7 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
{
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
if (--irqfd->users == 0) {
- kvm_irqchip_release_virq(kvm_state, irqfd->virq);
+ accel_irqchip_release_virq(irqfd->virq);
}
}
@@ -851,7 +884,7 @@ static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
unsigned int vector)
{
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
- return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq);
+ return accel_irqchip_add_irqfd_notifier_gsi(n, NULL, irqfd->virq);
}
static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
@@ -861,7 +894,7 @@ static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
int ret;
- ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq);
+ ret = accel_irqchip_remove_irqfd_notifier_gsi(n, irqfd->virq);
assert(ret == 0);
}
static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no,
@@ -996,12 +1029,12 @@ static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy,
if (proxy->vector_irqfd) {
irqfd = &proxy->vector_irqfd[vector];
if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) {
- ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg,
- &proxy->pci_dev);
+ ret = accel_irqchip_update_msi_route(irqfd->virq, msg,
+ &proxy->pci_dev);
if (ret < 0) {
return ret;
}
- kvm_irqchip_commit_routes(kvm_state);
+ accel_irqchip_commit_routes();
}
}
@@ -1215,7 +1248,12 @@ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign,
static bool virtio_pci_query_guest_notifiers(DeviceState *d)
{
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
- return msix_enabled(&proxy->pci_dev);
+
+ if (msix_enabled(&proxy->pci_dev)) {
+ return true;
+ } else {
+ return pci_irq_disabled(&proxy->pci_dev);
+ }
}
static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
@@ -1225,7 +1263,7 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
int r, n;
bool with_irqfd = msix_enabled(&proxy->pci_dev) &&
- kvm_msi_via_irqfd_enabled();
+ accel_msi_via_irqfd_enabled() ;
nvqs = MIN(nvqs, VIRTIO_QUEUE_MAX);
@@ -1429,7 +1467,7 @@ static void virtio_pci_set_vector(VirtIODevice *vdev,
uint16_t new_vector)
{
bool kvm_irqfd = (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) &&
- msix_enabled(&proxy->pci_dev) && kvm_msi_via_irqfd_enabled();
+ msix_enabled(&proxy->pci_dev) && accel_msi_via_irqfd_enabled();
if (new_vector == old_vector) {
return;
@@ -1473,6 +1511,19 @@ int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy,
return virtio_pci_add_mem_cap(proxy, &cap.cap);
}
+static int virtio_pci_select_max(const VirtIODevice *vdev)
+{
+ int i;
+
+ for (i = VIRTIO_FEATURES_NU64S - 1; i > 0; i--) {
+ if (vdev->host_features_ex[i]) {
+ return (i + 1) * 2;
+ }
+ }
+
+ return 2;
+}
+
static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
unsigned size)
{
@@ -1490,18 +1541,21 @@ static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
val = proxy->dfselect;
break;
case VIRTIO_PCI_COMMON_DF:
- if (proxy->dfselect <= 1) {
+ if (proxy->dfselect < virtio_pci_select_max(vdev)) {
VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
- val = (vdev->host_features & ~vdc->legacy_features) >>
- (32 * proxy->dfselect);
+ val = vdev->host_features_ex[proxy->dfselect >> 1] >>
+ (32 * (proxy->dfselect & 1));
+ if (proxy->dfselect <= 1) {
+ val &= (~vdc->legacy_features) >> (32 * proxy->dfselect);
+ }
}
break;
case VIRTIO_PCI_COMMON_GFSELECT:
val = proxy->gfselect;
break;
case VIRTIO_PCI_COMMON_GF:
- if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
+ if (proxy->gfselect < virtio_pci_select_max(vdev)) {
val = proxy->guest_features[proxy->gfselect];
}
break;
@@ -1584,11 +1638,18 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr,
proxy->gfselect = val;
break;
case VIRTIO_PCI_COMMON_GF:
- if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) {
+ if (proxy->gfselect < virtio_pci_select_max(vdev)) {
+ uint64_t features[VIRTIO_FEATURES_NU64S];
+ int i;
+
proxy->guest_features[proxy->gfselect] = val;
- virtio_set_features(vdev,
- (((uint64_t)proxy->guest_features[1]) << 32) |
- proxy->guest_features[0]);
+ virtio_features_clear(features);
+ for (i = 0; i < ARRAY_SIZE(proxy->guest_features); ++i) {
+ uint64_t cur = proxy->guest_features[i];
+
+ features[i >> 1] |= cur << ((i & 1) * 32);
+ }
+ virtio_set_features_ex(vdev, features);
}
break;
case VIRTIO_PCI_COMMON_MSIX:
@@ -2307,6 +2368,8 @@ static void virtio_pci_reset(DeviceState *qdev)
virtio_bus_reset(bus);
msix_unuse_all_vectors(&proxy->pci_dev);
+ memset(proxy->guest_features, 0, sizeof(proxy->guest_features));
+
for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
proxy->vqs[i].enabled = 0;
proxy->vqs[i].reset = 0;
@@ -2363,12 +2426,8 @@ static void virtio_pci_bus_reset_hold(Object *obj, ResetType type)
static const Property virtio_pci_properties[] = {
DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags,
VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false),
- DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags,
- VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true),
DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags,
VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false),
- DEFINE_PROP_BIT("x-disable-pcie", VirtIOPCIProxy, flags,
- VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy,
@@ -2397,8 +2456,7 @@ static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp)
VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
PCIDevice *pci_dev = &proxy->pci_dev;
- if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) &&
- virtio_pci_modern(proxy)) {
+ if (virtio_pci_modern(proxy)) {
pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
}
diff --git a/hw/virtio/virtio-qmp.c b/hw/virtio/virtio-qmp.c
index 3b6377c..b338344 100644
--- a/hw/virtio/virtio-qmp.c
+++ b/hw/virtio/virtio-qmp.c
@@ -325,6 +325,20 @@ static const qmp_virtio_feature_map_t virtio_net_feature_map[] = {
FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \
"VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features "
"negotiation supported"),
+ FEATURE_ENTRY(VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO, \
+ "VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO: Driver can receive GSO over "
+ "UDP tunnel packets"),
+ FEATURE_ENTRY(VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM, \
+ "VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO: Driver can receive GSO over "
+ "UDP tunnel packets requiring checksum offload for the outer "
+ "header"),
+ FEATURE_ENTRY(VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO, \
+ "VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO: Device can receive GSO over "
+ "UDP tunnel packets"),
+ FEATURE_ENTRY(VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM, \
+ "VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM: Device can receive GSO over "
+ "UDP tunnel packets requiring checksum offload for the outer "
+ "header"),
{ -1, "" }
};
#endif
@@ -510,6 +524,24 @@ static const qmp_virtio_feature_map_t virtio_gpio_feature_map[] = {
list; \
})
+#define CONVERT_FEATURES_EX(type, map, bitmap) \
+ ({ \
+ type *list = NULL; \
+ type *node; \
+ for (i = 0; map[i].virtio_bit != -1; i++) { \
+ bit = map[i].virtio_bit; \
+ if (!virtio_has_feature_ex(bitmap, bit)) { \
+ continue; \
+ } \
+ node = g_new0(type, 1); \
+ node->value = g_strdup(map[i].feature_desc); \
+ node->next = list; \
+ list = node; \
+ virtio_clear_feature_ex(bitmap, bit); \
+ } \
+ list; \
+ })
+
VirtioDeviceStatus *qmp_decode_status(uint8_t bitmap)
{
VirtioDeviceStatus *status;
@@ -545,109 +577,112 @@ VhostDeviceProtocols *qmp_decode_protocols(uint64_t bitmap)
return vhu_protocols;
}
-VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, uint64_t bitmap)
+VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id,
+ const uint64_t *bmap)
{
+ uint64_t bitmap[VIRTIO_FEATURES_NU64S];
VirtioDeviceFeatures *features;
uint64_t bit;
int i;
+ virtio_features_copy(bitmap, bmap);
features = g_new0(VirtioDeviceFeatures, 1);
features->has_dev_features = true;
/* transport features */
- features->transports = CONVERT_FEATURES(strList, virtio_transport_map, 0,
- bitmap);
+ features->transports = CONVERT_FEATURES_EX(strList, virtio_transport_map,
+ bitmap);
/* device features */
switch (device_id) {
#ifdef CONFIG_VIRTIO_SERIAL
case VIRTIO_ID_CONSOLE:
features->dev_features =
- CONVERT_FEATURES(strList, virtio_serial_feature_map, 0, bitmap);
+ CONVERT_FEATURES_EX(strList, virtio_serial_feature_map, bitmap);
break;
#endif
#ifdef CONFIG_VIRTIO_BLK
case VIRTIO_ID_BLOCK:
features->dev_features =
- CONVERT_FEATURES(strList, virtio_blk_feature_map, 0, bitmap);
+ CONVERT_FEATURES_EX(strList, virtio_blk_feature_map, bitmap);
break;
#endif
#ifdef CONFIG_VIRTIO_GPU
case VIRTIO_ID_GPU:
features->dev_features =
- CONVERT_FEATURES(strList, virtio_gpu_feature_map, 0, bitmap);
+ CONVERT_FEATURES_EX(strList, virtio_gpu_feature_map, bitmap);
break;
#endif
#ifdef CONFIG_VIRTIO_NET
case VIRTIO_ID_NET:
features->dev_features =
- CONVERT_FEATURES(strList, virtio_net_feature_map, 0, bitmap);
+ CONVERT_FEATURES_EX(strList, virtio_net_feature_map, bitmap);
break;
#endif
#ifdef CONFIG_VIRTIO_SCSI
case VIRTIO_ID_SCSI:
features->dev_features =
- CONVERT_FEATURES(strList, virtio_scsi_feature_map, 0, bitmap);
+ CONVERT_FEATURES_EX(strList, virtio_scsi_feature_map, bitmap);
break;
#endif
#ifdef CONFIG_VIRTIO_BALLOON
case VIRTIO_ID_BALLOON:
features->dev_features =
- CONVERT_FEATURES(strList, virtio_balloon_feature_map, 0, bitmap);
+ CONVERT_FEATURES_EX(strList, virtio_balloon_feature_map, bitmap);
break;
#endif
#ifdef CONFIG_VIRTIO_IOMMU
case VIRTIO_ID_IOMMU:
features->dev_features =
- CONVERT_FEATURES(strList, virtio_iommu_feature_map, 0, bitmap);
+ CONVERT_FEATURES_EX(strList, virtio_iommu_feature_map, bitmap);
break;
#endif
#ifdef CONFIG_VIRTIO_INPUT
case VIRTIO_ID_INPUT:
features->dev_features =
- CONVERT_FEATURES(strList, virtio_input_feature_map, 0, bitmap);
+ CONVERT_FEATURES_EX(strList, virtio_input_feature_map, bitmap);
break;
#endif
#ifdef CONFIG_VHOST_USER_FS
case VIRTIO_ID_FS:
features->dev_features =
- CONVERT_FEATURES(strList, virtio_fs_feature_map, 0, bitmap);
+ CONVERT_FEATURES_EX(strList, virtio_fs_feature_map, bitmap);
break;
#endif
#ifdef CONFIG_VHOST_VSOCK
case VIRTIO_ID_VSOCK:
features->dev_features =
- CONVERT_FEATURES(strList, virtio_vsock_feature_map, 0, bitmap);
+ CONVERT_FEATURES_EX(strList, virtio_vsock_feature_map, bitmap);
break;
#endif
#ifdef CONFIG_VIRTIO_CRYPTO
case VIRTIO_ID_CRYPTO:
features->dev_features =
- CONVERT_FEATURES(strList, virtio_crypto_feature_map, 0, bitmap);
+ CONVERT_FEATURES_EX(strList, virtio_crypto_feature_map, bitmap);
break;
#endif
#ifdef CONFIG_VIRTIO_MEM
case VIRTIO_ID_MEM:
features->dev_features =
- CONVERT_FEATURES(strList, virtio_mem_feature_map, 0, bitmap);
+ CONVERT_FEATURES_EX(strList, virtio_mem_feature_map, bitmap);
break;
#endif
#ifdef CONFIG_VIRTIO_I2C_ADAPTER
case VIRTIO_ID_I2C_ADAPTER:
features->dev_features =
- CONVERT_FEATURES(strList, virtio_i2c_feature_map, 0, bitmap);
+ CONVERT_FEATURES_EX(strList, virtio_i2c_feature_map, bitmap);
break;
#endif
#ifdef CONFIG_VIRTIO_RNG
case VIRTIO_ID_RNG:
features->dev_features =
- CONVERT_FEATURES(strList, virtio_rng_feature_map, 0, bitmap);
+ CONVERT_FEATURES_EX(strList, virtio_rng_feature_map, bitmap);
break;
#endif
#ifdef CONFIG_VHOST_USER_GPIO
case VIRTIO_ID_GPIO:
features->dev_features =
- CONVERT_FEATURES(strList, virtio_gpio_feature_map, 0, bitmap);
+ CONVERT_FEATURES_EX(strList, virtio_gpio_feature_map, bitmap);
break;
#endif
/* No features */
@@ -680,10 +715,9 @@ VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, uint64_t bitmap)
g_assert_not_reached();
}
- features->has_unknown_dev_features = bitmap != 0;
- if (features->has_unknown_dev_features) {
- features->unknown_dev_features = bitmap;
- }
+ features->has_unknown_dev_features = !virtio_features_empty(bitmap);
+ features->unknown_dev_features = bitmap[0];
+ features->unknown_dev_features2 = bitmap[1];
return features;
}
@@ -743,11 +777,11 @@ VirtioStatus *qmp_x_query_virtio_status(const char *path, Error **errp)
status->device_id = vdev->device_id;
status->vhost_started = vdev->vhost_started;
status->guest_features = qmp_decode_features(vdev->device_id,
- vdev->guest_features);
+ vdev->guest_features_ex);
status->host_features = qmp_decode_features(vdev->device_id,
- vdev->host_features);
+ vdev->host_features_ex);
status->backend_features = qmp_decode_features(vdev->device_id,
- vdev->backend_features);
+ vdev->backend_features_ex);
switch (vdev->device_endian) {
case VIRTIO_DEVICE_ENDIAN_LITTLE:
@@ -785,11 +819,12 @@ VirtioStatus *qmp_x_query_virtio_status(const char *path, Error **errp)
status->vhost_dev->nvqs = hdev->nvqs;
status->vhost_dev->vq_index = hdev->vq_index;
status->vhost_dev->features =
- qmp_decode_features(vdev->device_id, hdev->features);
+ qmp_decode_features(vdev->device_id, hdev->features_ex);
status->vhost_dev->acked_features =
- qmp_decode_features(vdev->device_id, hdev->acked_features);
+ qmp_decode_features(vdev->device_id, hdev->acked_features_ex);
status->vhost_dev->backend_features =
- qmp_decode_features(vdev->device_id, hdev->backend_features);
+ qmp_decode_features(vdev->device_id, hdev->backend_features_ex);
+
status->vhost_dev->protocol_features =
qmp_decode_protocols(hdev->protocol_features);
status->vhost_dev->max_queues = hdev->max_queues;
diff --git a/hw/virtio/virtio-qmp.h b/hw/virtio/virtio-qmp.h
index 245a446..e0a1e49 100644
--- a/hw/virtio/virtio-qmp.h
+++ b/hw/virtio/virtio-qmp.h
@@ -18,6 +18,7 @@
VirtIODevice *qmp_find_virtio_device(const char *path);
VirtioDeviceStatus *qmp_decode_status(uint8_t bitmap);
VhostDeviceProtocols *qmp_decode_protocols(uint64_t bitmap);
-VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, uint64_t bitmap);
+VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id,
+ const uint64_t *bitmap);
#endif
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 2e98cec..153ee0a 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -20,7 +20,7 @@
#include "qemu/log.h"
#include "qemu/main-loop.h"
#include "qemu/module.h"
-#include "exec/tswap.h"
+#include "qemu/target-info.h"
#include "qom/object_interfaces.h"
#include "hw/core/cpu.h"
#include "hw/virtio/virtio.h"
@@ -31,6 +31,8 @@
#include "hw/qdev-properties.h"
#include "hw/virtio/virtio-access.h"
#include "system/dma.h"
+#include "system/iothread.h"
+#include "system/memory.h"
#include "system/runstate.h"
#include "virtio-qmp.h"
@@ -205,6 +207,15 @@ static const char *virtio_id_to_name(uint16_t device_id)
return name;
}
+static void virtio_check_indirect_feature(VirtIODevice *vdev)
+{
+ if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Device %s: indirect_desc was not negotiated!\n",
+ vdev->name);
+ }
+}
+
/* Called within call_rcu(). */
static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
{
@@ -247,7 +258,10 @@ void virtio_init_region_cache(VirtIODevice *vdev, int n)
len = address_space_cache_init(&new->desc, vdev->dma_as,
addr, size, packed);
if (len < size) {
- virtio_error(vdev, "Cannot map desc");
+ virtio_error(vdev,
+ "Failed to map descriptor ring for device %s: "
+ "invalid guest physical address or corrupted queue setup",
+ qdev_get_printable_name(DEVICE(vdev)));
goto err_desc;
}
@@ -255,7 +269,10 @@ void virtio_init_region_cache(VirtIODevice *vdev, int n)
len = address_space_cache_init(&new->used, vdev->dma_as,
vq->vring.used, size, true);
if (len < size) {
- virtio_error(vdev, "Cannot map used");
+ virtio_error(vdev,
+ "Failed to map used ring for device %s: "
+ "possible guest misconfiguration or insufficient memory",
+ qdev_get_printable_name(DEVICE(vdev)));
goto err_used;
}
@@ -263,7 +280,10 @@ void virtio_init_region_cache(VirtIODevice *vdev, int n)
len = address_space_cache_init(&new->avail, vdev->dma_as,
vq->vring.avail, size, false);
if (len < size) {
- virtio_error(vdev, "Cannot map avail");
+ virtio_error(vdev,
+ "Failed to map avalaible ring for device %s: "
+ "possible queue misconfiguration or overlapping memory region",
+ qdev_get_printable_name(DEVICE(vdev)));
goto err_avail;
}
@@ -929,18 +949,18 @@ static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
static void virtqueue_ordered_fill(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len)
{
- unsigned int i, steps, max_steps;
+ unsigned int i, steps, max_steps, ndescs;
i = vq->used_idx % vq->vring.num;
steps = 0;
/*
- * We shouldn't need to increase 'i' by more than the distance
- * between used_idx and last_avail_idx.
+ * We shouldn't need to increase 'i' by more than or equal to
+ * the distance between used_idx and last_avail_idx (max_steps).
*/
max_steps = (vq->last_avail_idx - vq->used_idx) % vq->vring.num;
/* Search for element in vq->used_elems */
- while (steps <= max_steps) {
+ while (steps < max_steps) {
/* Found element, set length and mark as filled */
if (vq->used_elems[i].index == elem->index) {
vq->used_elems[i].len = len;
@@ -948,8 +968,18 @@ static void virtqueue_ordered_fill(VirtQueue *vq, const VirtQueueElement *elem,
break;
}
- i += vq->used_elems[i].ndescs;
- steps += vq->used_elems[i].ndescs;
+ ndescs = vq->used_elems[i].ndescs;
+
+ /* Defensive sanity check */
+ if (unlikely(ndescs == 0 || ndescs > vq->vring.num)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: %s invalid ndescs %u at position %u\n",
+ __func__, vq->vdev->name, ndescs, i);
+ return;
+ }
+
+ i += ndescs;
+ steps += ndescs;
if (i >= vq->vring.num) {
i -= vq->vring.num;
@@ -1603,7 +1633,8 @@ out:
* virtqueue_unmap_sg() can't be used). Assumes buffers weren't written to
* yet.
*/
-static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
+static void virtqueue_undo_map_desc(AddressSpace *as,
+ unsigned int out_num, unsigned int in_num,
struct iovec *iov)
{
unsigned int i;
@@ -1611,7 +1642,7 @@ static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
for (i = 0; i < out_num + in_num; i++) {
int is_write = i >= out_num;
- cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
+ address_space_unmap(as, iov->iov_base, iov->iov_len, is_write, 0);
iov++;
}
}
@@ -1680,8 +1711,8 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
VirtIODevice *vdev = vq->vdev;
VirtQueueElement *elem = NULL;
unsigned out_num, in_num, elem_entries;
- hwaddr addr[VIRTQUEUE_MAX_SIZE];
- struct iovec iov[VIRTQUEUE_MAX_SIZE];
+ hwaddr QEMU_UNINITIALIZED addr[VIRTQUEUE_MAX_SIZE];
+ struct iovec QEMU_UNINITIALIZED iov[VIRTQUEUE_MAX_SIZE];
VRingDesc desc;
int rc;
@@ -1733,6 +1764,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
virtio_error(vdev, "Invalid size for indirect buffer table");
goto done;
}
+ virtio_check_indirect_feature(vdev);
/* loop over the indirect descriptor table */
len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
@@ -1812,7 +1844,7 @@ done:
return elem;
err_undo_map:
- virtqueue_undo_map_desc(out_num, in_num, iov);
+ virtqueue_undo_map_desc(vdev->dma_as, out_num, in_num, iov);
goto done;
}
@@ -1826,8 +1858,8 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
VirtIODevice *vdev = vq->vdev;
VirtQueueElement *elem = NULL;
unsigned out_num, in_num, elem_entries;
- hwaddr addr[VIRTQUEUE_MAX_SIZE];
- struct iovec iov[VIRTQUEUE_MAX_SIZE];
+ hwaddr QEMU_UNINITIALIZED addr[VIRTQUEUE_MAX_SIZE];
+ struct iovec QEMU_UNINITIALIZED iov[VIRTQUEUE_MAX_SIZE];
VRingPackedDesc desc;
uint16_t id;
int rc;
@@ -1870,6 +1902,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
virtio_error(vdev, "Invalid size for indirect buffer table");
goto done;
}
+ virtio_check_indirect_feature(vdev);
/* loop over the indirect descriptor table */
len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
@@ -1961,7 +1994,7 @@ done:
return elem;
err_undo_map:
- virtqueue_undo_map_desc(out_num, in_num, iov);
+ virtqueue_undo_map_desc(vdev->dma_as, out_num, in_num, iov);
goto done;
}
@@ -2633,16 +2666,8 @@ static void virtio_notify_irqfd_deferred_fn(void *opaque)
event_notifier_set(notifier);
}
-void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
+static void virtio_irq(VirtQueue *vq)
{
- WITH_RCU_READ_LOCK_GUARD() {
- if (!virtio_should_notify(vdev, vq)) {
- return;
- }
- }
-
- trace_virtio_notify_irqfd(vdev, vq);
-
/*
* virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
* windows drivers included in virtio-win 1.8.0 (circa 2015) are
@@ -2659,13 +2684,18 @@ void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
* to an atomic operation.
*/
virtio_set_isr(vq->vdev, 0x1);
- defer_call(virtio_notify_irqfd_deferred_fn, &vq->guest_notifier);
-}
-static void virtio_irq(VirtQueue *vq)
-{
- virtio_set_isr(vq->vdev, 0x1);
- virtio_notify_vector(vq->vdev, vq->vector);
+ /*
+ * The interrupt code path requires the Big QEMU Lock (BQL), so use the
+ * notifier instead when in an IOThread. This assumes that device models
+ * have already called ->set_guest_notifiers() sometime before calling this
+ * function.
+ */
+ if (qemu_in_iothread()) {
+ defer_call(virtio_notify_irqfd_deferred_fn, &vq->guest_notifier);
+ } else {
+ virtio_notify_vector(vq->vdev, vq->vector);
+ }
}
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
@@ -2687,7 +2717,12 @@ void virtio_notify_config(VirtIODevice *vdev)
virtio_set_isr(vdev, 0x3);
vdev->generation++;
- virtio_notify_vector(vdev, vdev->config_vector);
+
+ if (qemu_in_iothread()) {
+ defer_call(virtio_notify_irqfd_deferred_fn, &vdev->config_notifier);
+ } else {
+ virtio_notify_vector(vdev, vdev->config_vector);
+ }
}
static bool virtio_device_endian_needed(void *opaque)
@@ -2943,6 +2978,30 @@ static const VMStateDescription vmstate_virtio_disabled = {
}
};
+static bool virtio_128bit_features_needed(void *opaque)
+{
+ VirtIODevice *vdev = opaque;
+
+ return virtio_features_use_ex(vdev->host_features_ex);
+}
+
+static const VMStateDescription vmstate_virtio_128bit_features = {
+ .name = "virtio/128bit_features",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = &virtio_128bit_features_needed,
+ .fields = (const VMStateField[]) {
+ VMSTATE_UINT64(guest_features_ex[1], VirtIODevice),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+/*
+ * Avoid silently breaking migration should the feature space increase
+ * even more in the (far away) future
+ */
+QEMU_BUILD_BUG_ON(VIRTIO_FEATURES_NU64S != 2);
+
static const VMStateDescription vmstate_virtio = {
.name = "virtio",
.version_id = 1,
@@ -2952,6 +3011,7 @@ static const VMStateDescription vmstate_virtio = {
},
.subsections = (const VMStateDescription * const []) {
&vmstate_virtio_device_endian,
+ &vmstate_virtio_128bit_features,
&vmstate_virtio_64bit_features,
&vmstate_virtio_virtqueues,
&vmstate_virtio_ringsize,
@@ -2971,6 +3031,7 @@ int virtio_save(VirtIODevice *vdev, QEMUFile *f)
VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
int i;
+ Error *local_err = NULL;
if (k->save_config) {
k->save_config(qbus->parent, f);
@@ -3014,14 +3075,15 @@ int virtio_save(VirtIODevice *vdev, QEMUFile *f)
}
if (vdc->vmsd) {
- int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
+ int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL, &local_err);
if (ret) {
+ error_report_err(local_err);
return ret;
}
}
/* Subsections */
- return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
+ return vmstate_save_state(f, &vmstate_virtio, vdev, NULL, &error_fatal);
}
/* A wrapper for use as a VMState .put function */
@@ -3048,23 +3110,30 @@ const VMStateInfo virtio_vmstate_info = {
.put = virtio_device_put,
};
-static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
+static int virtio_set_features_nocheck(VirtIODevice *vdev, const uint64_t *val)
{
VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
- bool bad = (val & ~(vdev->host_features)) != 0;
+ uint64_t tmp[VIRTIO_FEATURES_NU64S];
+ bool bad;
+
+ bad = virtio_features_andnot(tmp, val, vdev->host_features_ex);
+ virtio_features_and(tmp, val, vdev->host_features_ex);
- val &= vdev->host_features;
- if (k->set_features) {
- k->set_features(vdev, val);
+ if (k->set_features_ex) {
+ k->set_features_ex(vdev, val);
+ } else if (k->set_features) {
+ bad = bad || virtio_features_use_ex(tmp);
+ k->set_features(vdev, tmp[0]);
}
- vdev->guest_features = val;
+
+ virtio_features_copy(vdev->guest_features_ex, tmp);
return bad ? -1 : 0;
}
typedef struct VirtioSetFeaturesNocheckData {
Coroutine *co;
VirtIODevice *vdev;
- uint64_t val;
+ uint64_t val[VIRTIO_FEATURES_NU64S];
int ret;
} VirtioSetFeaturesNocheckData;
@@ -3077,14 +3146,15 @@ static void virtio_set_features_nocheck_bh(void *opaque)
}
static int coroutine_mixed_fn
-virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev, uint64_t val)
+virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev,
+ const uint64_t *val)
{
if (qemu_in_coroutine()) {
VirtioSetFeaturesNocheckData data = {
.co = qemu_coroutine_self(),
.vdev = vdev,
- .val = val,
};
+ virtio_features_copy(data.val, val);
aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
virtio_set_features_nocheck_bh, &data);
qemu_coroutine_yield();
@@ -3096,6 +3166,14 @@ virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev, uint64_t val)
int virtio_set_features(VirtIODevice *vdev, uint64_t val)
{
+ uint64_t features[VIRTIO_FEATURES_NU64S];
+
+ virtio_features_from_u64(features, val);
+ return virtio_set_features_ex(vdev, features);
+}
+
+int virtio_set_features_ex(VirtIODevice *vdev, const uint64_t *features)
+{
int ret;
/*
* The driver must not attempt to set features after feature negotiation
@@ -3105,13 +3183,13 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val)
return -EINVAL;
}
- if (val & (1ull << VIRTIO_F_BAD_FEATURE)) {
+ if (features[0] & (1ull << VIRTIO_F_BAD_FEATURE)) {
qemu_log_mask(LOG_GUEST_ERROR,
"%s: guest driver for %s has enabled UNUSED(30) feature bit!\n",
__func__, vdev->name);
}
- ret = virtio_set_features_nocheck(vdev, val);
+ ret = virtio_set_features_nocheck(vdev, features);
if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
/* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */
int i;
@@ -3134,6 +3212,7 @@ void virtio_reset(void *opaque)
{
VirtIODevice *vdev = opaque;
VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+ uint64_t features[VIRTIO_FEATURES_NU64S];
int i;
virtio_set_status(vdev, 0);
@@ -3160,7 +3239,8 @@ void virtio_reset(void *opaque)
vdev->start_on_kick = false;
vdev->started = false;
vdev->broken = false;
- virtio_set_features_nocheck(vdev, 0);
+ virtio_features_clear(features);
+ virtio_set_features_nocheck(vdev, features);
vdev->queue_sel = 0;
vdev->status = 0;
vdev->disabled = false;
@@ -3214,6 +3294,7 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+ Error *local_err = NULL;
/*
* We poison the endianness to ensure it does not get used before
@@ -3243,7 +3324,7 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
* Note: devices should always test host features in future - don't create
* new dependencies like this.
*/
- vdev->guest_features = features;
+ virtio_features_from_u64(vdev->guest_features_ex, features);
config_len = qemu_get_be32(f);
@@ -3259,13 +3340,6 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
config_len--;
}
- if (vdc->pre_load_queues) {
- ret = vdc->pre_load_queues(vdev);
- if (ret) {
- return ret;
- }
- }
-
num = qemu_get_be32(f);
if (num > VIRTIO_QUEUE_MAX) {
@@ -3273,6 +3347,13 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
return -1;
}
+ if (vdc->pre_load_queues) {
+ ret = vdc->pre_load_queues(vdev, num);
+ if (ret) {
+ return ret;
+ }
+ }
+
for (i = 0; i < num; i++) {
vdev->vq[i].vring.num = qemu_get_be32(f);
if (k->has_variable_vring_alignment) {
@@ -3306,15 +3387,17 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
}
if (vdc->vmsd) {
- ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
+ ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id, &local_err);
if (ret) {
+ error_report_err(local_err);
return ret;
}
}
/* Subsections */
- ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
+ ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1, &local_err);
if (ret) {
+ error_report_err(local_err);
return ret;
}
@@ -3322,26 +3405,17 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
vdev->device_endian = virtio_default_endian();
}
- if (virtio_64bit_features_needed(vdev)) {
- /*
- * Subsection load filled vdev->guest_features. Run them
- * through virtio_set_features to sanity-check them against
- * host_features.
- */
- uint64_t features64 = vdev->guest_features;
- if (virtio_set_features_nocheck_maybe_co(vdev, features64) < 0) {
- error_report("Features 0x%" PRIx64 " unsupported. "
- "Allowed features: 0x%" PRIx64,
- features64, vdev->host_features);
- return -1;
- }
- } else {
- if (virtio_set_features_nocheck_maybe_co(vdev, features) < 0) {
- error_report("Features 0x%x unsupported. "
- "Allowed features: 0x%" PRIx64,
- features, vdev->host_features);
- return -1;
- }
+ /*
+ * guest_features_ex is fully initialized with u32 features and upper
+ * bits have been filled as needed by the later load.
+ */
+ if (virtio_set_features_nocheck_maybe_co(vdev,
+ vdev->guest_features_ex) < 0) {
+ error_report("Features 0x" VIRTIO_FEATURES_FMT " unsupported. "
+ "Allowed features: 0x" VIRTIO_FEATURES_FMT,
+ VIRTIO_FEATURES_PR(vdev->guest_features_ex),
+ VIRTIO_FEATURES_PR(vdev->host_features_ex));
+ return -1;
}
if (!virtio_device_started(vdev, vdev->status) &&
diff --git a/hw/vmapple/virtio-blk.c b/hw/vmapple/virtio-blk.c
index 532b564..9de9aaa 100644
--- a/hw/vmapple/virtio-blk.c
+++ b/hw/vmapple/virtio-blk.c
@@ -19,7 +19,6 @@
#include "hw/vmapple/vmapple.h"
#include "hw/virtio/virtio-blk.h"
#include "hw/virtio/virtio-pci.h"
-#include "qemu/bswap.h"
#include "qemu/log.h"
#include "qemu/module.h"
#include "qapi/error.h"
diff --git a/hw/vmapple/vmapple.c b/hw/vmapple/vmapple.c
index 16e6110..1e4365f 100644
--- a/hw/vmapple/vmapple.c
+++ b/hw/vmapple/vmapple.c
@@ -51,6 +51,8 @@
#include "system/reset.h"
#include "system/runstate.h"
#include "system/system.h"
+#include "target/arm/gtimer.h"
+#include "target/arm/cpu.h"
struct VMAppleMachineState {
MachineState parent;
diff --git a/hw/watchdog/wdt_i6300esb.c b/hw/watchdog/wdt_i6300esb.c
index bb8a276..3aa01b8 100644
--- a/hw/watchdog/wdt_i6300esb.c
+++ b/hw/watchdog/wdt_i6300esb.c
@@ -55,7 +55,7 @@
/* Config register bits */
#define ESB_WDT_REBOOT (0x01 << 5) /* Enable reboot on timeout */
#define ESB_WDT_FREQ (0x01 << 2) /* Decrement frequency */
-#define ESB_WDT_INTTYPE (0x11 << 0) /* Interrupt type on timer1 timeout */
+#define ESB_WDT_INTTYPE (0x03 << 0) /* Interrupt type on timer1 timeout */
/* Reload register bits */
#define ESB_WDT_RELOAD (0x01 << 8) /* prevent timeout */
diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c
index 78e0bc8..52e2cce 100644
--- a/hw/xen/xen-hvm-common.c
+++ b/hw/xen/xen-hvm-common.c
@@ -12,6 +12,7 @@
#include "hw/xen/xen-bus.h"
#include "hw/boards.h"
#include "hw/xen/arch_hvm.h"
+#include "system/memory.h"
#include "system/runstate.h"
#include "system/system.h"
#include "system/xen.h"
@@ -279,8 +280,8 @@ static void do_outp(uint32_t addr,
* memory, as part of the implementation of an ioreq.
*
* Equivalent to
- * cpu_physical_memory_rw(addr + (req->df ? -1 : +1) * req->size * i,
- * val, req->size, 0/1)
+ * address_space_rw(as, addr + (req->df ? -1 : +1) * req->size * i,
+ * attrs, val, req->size, 0/1)
* except without the integer overflow problems.
*/
static void rw_phys_req_item(hwaddr addr,
@@ -295,7 +296,8 @@ static void rw_phys_req_item(hwaddr addr,
} else {
addr += offset;
}
- cpu_physical_memory_rw(addr, val, req->size, rw);
+ address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
+ val, req->size, rw);
}
static inline void read_phys_req_item(hwaddr addr,
diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index 9d16644..006b5b5 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -54,6 +54,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
+#include "qemu/error-report.h"
#include <sys/ioctl.h>
#include "hw/pci/pci.h"
diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c
index 09cca4e..e9ba173 100644
--- a/hw/xen/xen_pt_msi.c
+++ b/hw/xen/xen_pt_msi.c
@@ -637,14 +637,5 @@ void xen_pt_msix_unmap(XenPCIPassthroughState *s)
void xen_pt_msix_delete(XenPCIPassthroughState *s)
{
- XenPTMSIX *msix = s->msix;
-
- if (!msix) {
- return;
- }
-
- object_unparent(OBJECT(&msix->mmio));
-
- g_free(s->msix);
- s->msix = NULL;
+ g_clear_pointer(&s->msix, g_free);
}
diff --git a/hw/xtensa/xtfpga.c b/hw/xtensa/xtfpga.c
index 6efffae..55de1a7 100644
--- a/hw/xtensa/xtfpga.c
+++ b/hw/xtensa/xtfpga.c
@@ -268,7 +268,7 @@ static void xtfpga_init(const XtfpgaBoardDesc *board, MachineState *machine)
/* Need MMU initialized prior to ELF loading,
* so that ELF gets loaded into virtual addresses
*/
- cpu_reset(CPU(cpu));
+ reset_mmu(cenv);
}
if (smp_cpus > 1) {
extints = xtensa_mx_pic_get_extints(mx_pic);