aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/Kconfig1
-rw-r--r--hw/acpi/nvdimm.c2
-rw-r--r--hw/acpi/pcihp.c2
-rw-r--r--hw/acpi/vmgenid.c6
-rw-r--r--hw/arm/Kconfig2
-rw-r--r--hw/arm/aspeed.c285
-rw-r--r--hw/arm/aspeed_ast27x0-fc.c10
-rw-r--r--hw/arm/aspeed_ast27x0.c14
-rw-r--r--hw/arm/aspeed_eeprom.c21
-rw-r--r--hw/arm/aspeed_eeprom.h3
-rw-r--r--hw/arm/boot.c18
-rw-r--r--hw/arm/fby35.c1
-rw-r--r--hw/arm/meson.build4
-rw-r--r--hw/arm/mps2.c4
-rw-r--r--hw/arm/npcm8xx.c55
-rw-r--r--hw/arm/sbsa-ref.c8
-rw-r--r--hw/arm/virt-acpi-build.c186
-rw-r--r--hw/arm/virt.c53
-rw-r--r--hw/audio/ac97.c4
-rw-r--r--hw/audio/asc.c9
-rw-r--r--hw/audio/cs4231a.c4
-rw-r--r--hw/audio/es1370.c2
-rw-r--r--hw/audio/gus.c2
-rw-r--r--hw/audio/marvell_88w8618.c2
-rw-r--r--hw/audio/sb16.c2
-rw-r--r--hw/audio/via-ac97.c2
-rw-r--r--hw/block/Kconfig3
-rw-r--r--hw/block/meson.build1
-rw-r--r--hw/block/nand.c835
-rw-r--r--hw/char/sclpconsole-lm.c2
-rw-r--r--hw/char/sh_serial.c24
-rw-r--r--hw/char/sifive_uart.c6
-rw-r--r--hw/core/loader.c14
-rw-r--r--hw/core/machine.c22
-rw-r--r--hw/core/meson.build4
-rw-r--r--hw/core/qdev-properties-system.c1
-rw-r--r--hw/display/apple-gfx.m10
-rw-r--r--hw/display/virtio-gpu-virgl.c44
-rw-r--r--hw/display/vmware_vga.c2
-rw-r--r--hw/dma/xlnx_csu_dma.c2
-rw-r--r--hw/gpio/pca9552.c2
-rw-r--r--hw/hyperv/hv-balloon.c9
-rw-r--r--hw/hyperv/syndbg.c2
-rw-r--r--hw/i386/Kconfig6
-rw-r--r--hw/i386/amd_iommu.c20
-rw-r--r--hw/i386/kvm/apic.c5
-rw-r--r--hw/i386/kvm/xen-stubs.c13
-rw-r--r--hw/i386/kvm/xen_evtchn.c2
-rw-r--r--hw/i386/meson.build1
-rw-r--r--hw/i386/monitor.c2
-rw-r--r--hw/i386/pc.c71
-rw-r--r--hw/i386/pc_piix.c31
-rw-r--r--hw/i386/pc_q35.c26
-rw-r--r--hw/i386/pc_sysfw.c7
-rw-r--r--hw/i386/sgx-stub.c6
-rw-r--r--hw/i386/sgx.c34
-rw-r--r--hw/i386/tdvf-hob.c130
-rw-r--r--hw/i386/tdvf-hob.h26
-rw-r--r--hw/i386/tdvf.c189
-rw-r--r--hw/i386/x86-common.c6
-rw-r--r--hw/i386/x86.c1
-rw-r--r--hw/intc/arm_gic.c2
-rw-r--r--hw/intc/arm_gic_common.c1
-rw-r--r--hw/intc/armv7m_nvic.c27
-rw-r--r--hw/intc/aspeed_intc.c12
-rw-r--r--hw/intc/loongarch_extioi.c49
-rw-r--r--hw/intc/loongarch_extioi_kvm.c140
-rw-r--r--hw/intc/loongarch_ipi.c29
-rw-r--r--hw/intc/loongarch_ipi_kvm.c85
-rw-r--r--hw/intc/loongarch_pch_msi.c10
-rw-r--r--hw/intc/loongarch_pch_pic.c47
-rw-r--r--hw/intc/loongarch_pic_kvm.c89
-rw-r--r--hw/intc/loongson_ipi_common.c33
-rw-r--r--hw/intc/meson.build6
-rw-r--r--hw/intc/riscv_aclint.c5
-rw-r--r--hw/intc/riscv_aplic.c12
-rw-r--r--hw/intc/riscv_imsic.c10
-rw-r--r--hw/loongarch/boot.c52
-rw-r--r--hw/loongarch/virt-acpi-build.c4
-rw-r--r--hw/loongarch/virt.c92
-rw-r--r--hw/meson.build1
-rw-r--r--hw/microblaze/petalogix_ml605_mmu.c15
-rw-r--r--hw/microblaze/petalogix_s3adsp1800_mmu.c42
-rw-r--r--hw/microblaze/xlnx-zynqmp-pmu.c7
-rw-r--r--hw/misc/aspeed_hace.c479
-rw-r--r--hw/misc/aspeed_scu.c22
-rw-r--r--hw/misc/aspeed_sdmc.c3
-rw-r--r--hw/misc/ivshmem-flat.c2
-rw-r--r--hw/misc/stm32_rcc.c2
-rw-r--r--hw/misc/trace-events8
-rw-r--r--hw/net/e1000.c95
-rw-r--r--hw/net/fsl_etsec/etsec.c1
-rw-r--r--hw/net/i82596.c38
-rw-r--r--hw/net/rocker/rocker.h14
-rw-r--r--hw/net/rocker/rocker_hw.h20
-rw-r--r--hw/net/rocker/rocker_of_dpa.c40
-rw-r--r--hw/net/rtl8139.c2
-rw-r--r--hw/net/tulip.c2
-rw-r--r--hw/net/virtio-net.c6
-rw-r--r--hw/net/vmxnet3.c44
-rw-r--r--hw/net/vmxnet3.h4
-rw-r--r--hw/net/xgmac.c2
-rw-r--r--hw/nvme/ctrl.c8
-rw-r--r--hw/nvram/fw_cfg.c110
-rw-r--r--hw/pci-host/gt64120.c82
-rw-r--r--hw/pci-host/ppce500.c8
-rw-r--r--hw/pci-host/raven.c87
-rw-r--r--hw/pci/msix.c2
-rw-r--r--hw/pci/pci.c211
-rw-r--r--hw/pci/pci_host.c6
-rw-r--r--hw/pci/pcie.c78
-rw-r--r--hw/ppc/e500.c28
-rw-r--r--hw/ppc/e500.h4
-rw-r--r--hw/ppc/e500plat.c2
-rw-r--r--hw/ppc/mpc8544ds.c2
-rw-r--r--hw/ppc/pnv_occ.c2
-rw-r--r--hw/ppc/prep.c27
-rw-r--r--hw/ppc/spapr.c2
-rw-r--r--hw/ppc/spapr_tpm_proxy.c4
-rw-r--r--hw/riscv/Kconfig9
-rw-r--r--hw/riscv/boot.c2
-rw-r--r--hw/riscv/meson.build1
-rw-r--r--hw/riscv/riscv-iommu-bits.h1
-rw-r--r--hw/riscv/riscv-iommu-pci.c6
-rw-r--r--hw/riscv/riscv-iommu-sys.c6
-rw-r--r--hw/riscv/riscv-iommu.c9
-rw-r--r--hw/riscv/virt-acpi-build.c15
-rw-r--r--hw/riscv/virt.c71
-rw-r--r--hw/riscv/xiangshan_kmh.c220
-rw-r--r--hw/s390x/ap-stub.c21
-rw-r--r--hw/s390x/ccw-device.c2
-rw-r--r--hw/s390x/cpu-topology.c4
-rw-r--r--hw/s390x/event-facility.c2
-rw-r--r--hw/s390x/meson.build2
-rw-r--r--hw/s390x/s390-skeys.c1
-rw-r--r--hw/s390x/s390-virtio-ccw.c36
-rw-r--r--hw/s390x/sclpcpi.c212
-rw-r--r--hw/scsi/lsi53c895a.c2
-rw-r--r--hw/scsi/megasas.c7
-rw-r--r--hw/scsi/scsi-disk.c55
-rw-r--r--hw/scsi/vmw_pvscsi.c67
-rw-r--r--hw/scsi/vmw_pvscsi.h4
-rw-r--r--hw/timer/hpet.c187
-rw-r--r--hw/ufs/lu.c2
-rw-r--r--hw/usb/hcd-ohci.c2
-rw-r--r--hw/vfio-user/Kconfig7
-rw-r--r--hw/vfio-user/container.c361
-rw-r--r--hw/vfio-user/container.h23
-rw-r--r--hw/vfio-user/device.c441
-rw-r--r--hw/vfio-user/device.h24
-rw-r--r--hw/vfio-user/meson.build11
-rw-r--r--hw/vfio-user/pci.c475
-rw-r--r--hw/vfio-user/protocol.h242
-rw-r--r--hw/vfio-user/proxy.c1356
-rw-r--r--hw/vfio-user/proxy.h135
-rw-r--r--hw/vfio-user/trace-events20
-rw-r--r--hw/vfio-user/trace.h4
-rw-r--r--hw/vfio/Kconfig2
-rw-r--r--hw/vfio/ap.c90
-rw-r--r--hw/vfio/ccw.c4
-rw-r--r--hw/vfio/container-base.c13
-rw-r--r--hw/vfio/container.c108
-rw-r--r--hw/vfio/cpr-iommufd.c225
-rw-r--r--hw/vfio/cpr-legacy.c284
-rw-r--r--hw/vfio/cpr.c182
-rw-r--r--hw/vfio/device.c79
-rw-r--r--hw/vfio/helpers.c11
-rw-r--r--hw/vfio/igd.c22
-rw-r--r--hw/vfio/iommufd-stubs.c18
-rw-r--r--hw/vfio/iommufd.c126
-rw-r--r--hw/vfio/listener.c95
-rw-r--r--hw/vfio/meson.build5
-rw-r--r--hw/vfio/pci.c399
-rw-r--r--hw/vfio/pci.h22
-rw-r--r--hw/vfio/platform.c2
-rw-r--r--hw/vfio/region.c9
-rw-r--r--hw/vfio/trace-events11
-rw-r--r--hw/vfio/trace.h3
-rw-r--r--hw/vfio/vfio-cpr.h15
-rw-r--r--hw/virtio/vhost-vdpa.c116
-rw-r--r--hw/virtio/virtio-mem.c83
-rw-r--r--hw/virtio/virtio-pci.c18
-rw-r--r--hw/virtio/virtio.c19
183 files changed, 7971 insertions, 2513 deletions
diff --git a/hw/Kconfig b/hw/Kconfig
index 9a86a6a..9e6c789 100644
--- a/hw/Kconfig
+++ b/hw/Kconfig
@@ -42,6 +42,7 @@ source ufs/Kconfig
source usb/Kconfig
source virtio/Kconfig
source vfio/Kconfig
+source vfio-user/Kconfig
source vmapple/Kconfig
source xen/Kconfig
source watchdog/Kconfig
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 9ba9080..732d613 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -535,7 +535,7 @@ nvdimm_dsm_no_payload(uint32_t func_ret_status, hwaddr dsm_mem_addr)
#define NVDIMM_QEMU_RSVD_HANDLE_ROOT 0x10000
-/* Read FIT data, defined in docs/specs/acpi_nvdimm.txt. */
+/* Read FIT data, defined in docs/specs/acpi_nvdimm.rst. */
static void nvdimm_dsm_func_read_fit(NVDIMMState *state, NvdimmDsmIn *in,
hwaddr dsm_mem_addr)
{
diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c
index aac9001..497281a 100644
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -3,7 +3,7 @@
*
* QEMU supports PCI hotplug via ACPI. This module
* implements the interface between QEMU and the ACPI BIOS.
- * Interface specification - see docs/specs/acpi_pci_hotplug.txt
+ * Interface specification - see docs/specs/acpi_pci_hotplug.rst
*
* Copyright (c) 2013, Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)
* Copyright (c) 2006 Fabrice Bellard
diff --git a/hw/acpi/vmgenid.c b/hw/acpi/vmgenid.c
index fac3d6d..33c35c8 100644
--- a/hw/acpi/vmgenid.c
+++ b/hw/acpi/vmgenid.c
@@ -38,7 +38,7 @@ void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid,
guid_le = qemu_uuid_bswap(vms->guid);
/* The GUID is written at a fixed offset into the fw_cfg file
* in order to implement the "OVMF SDT Header probe suppressor"
- * see docs/specs/vmgenid.txt for more details
+ * see docs/specs/vmgenid.rst for more details
*/
g_array_insert_vals(guid, VMGENID_GUID_OFFSET, guid_le.data,
ARRAY_SIZE(guid_le.data));
@@ -101,7 +101,7 @@ void vmgenid_build_acpi(VmGenIdState *vms, GArray *table_data, GArray *guid,
* < 4GB, but write 64 bits anyway.
* The address that is patched in is offset in order to implement
* the "OVMF SDT Header probe suppressor"
- * see docs/specs/vmgenid.txt for more details.
+ * see docs/specs/vmgenid.rst for more details.
*/
bios_linker_loader_write_pointer(linker,
VMGENID_ADDR_FW_CFG_FILE, 0, sizeof(uint64_t),
@@ -153,7 +153,7 @@ static void vmgenid_update_guest(VmGenIdState *vms)
guid_le = qemu_uuid_bswap(vms->guid);
/* The GUID is written at a fixed offset into the fw_cfg file
* in order to implement the "OVMF SDT Header probe suppressor"
- * see docs/specs/vmgenid.txt for more details.
+ * see docs/specs/vmgenid.rst for more details.
*/
cpu_physical_memory_write(vmgenid_addr, guid_le.data,
sizeof(guid_le.data));
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index a55b44d..6ea8653 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -147,7 +147,6 @@ config OMAP
bool
select FRAMEBUFFER
select I2C
- select NAND
select PFLASH_CFI01
select SD
select SERIAL_MM
@@ -533,6 +532,7 @@ config ASPEED_SOC
select I2C
select DPS310
select PCA9552
+ select PCA9554
select SERIAL_MM
select SMBUS_EEPROM
select PCA954X
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index d0b3336..c31bbe7 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -19,6 +19,7 @@
#include "hw/i2c/i2c_mux_pca954x.h"
#include "hw/i2c/smbus_eeprom.h"
#include "hw/gpio/pca9552.h"
+#include "hw/gpio/pca9554.h"
#include "hw/nvram/eeprom_at24c.h"
#include "hw/sensor/tmp105.h"
#include "hw/misc/led.h"
@@ -197,9 +198,12 @@ struct AspeedMachineState {
#define FUJI_BMC_HW_STRAP2 0x00000000
/* Bletchley hardware value */
-/* TODO: Leave same as EVB for now. */
-#define BLETCHLEY_BMC_HW_STRAP1 AST2600_EVB_HW_STRAP1
-#define BLETCHLEY_BMC_HW_STRAP2 AST2600_EVB_HW_STRAP2
+#define BLETCHLEY_BMC_HW_STRAP1 0x00002000
+#define BLETCHLEY_BMC_HW_STRAP2 0x00000801
+
+/* GB200NVL hardware value */
+#define GB200NVL_BMC_HW_STRAP1 AST2600_EVB_HW_STRAP1
+#define GB200NVL_BMC_HW_STRAP2 AST2600_EVB_HW_STRAP2
/* Qualcomm DC-SCM hardware value */
#define QCOM_DC_SCM_V1_BMC_HW_STRAP1 0x00000000
@@ -465,6 +469,8 @@ static void aspeed_machine_init(MachineState *machine)
aspeed_board_init_flashes(&bmc->soc->spi[0],
bmc->spi_model ? bmc->spi_model : amc->spi_model,
1, amc->num_cs);
+ aspeed_board_init_flashes(&bmc->soc->spi[1],
+ amc->spi2_model, 1, amc->num_cs2);
}
if (machine->kernel_filename && sc->num_cpus > 1) {
@@ -645,6 +651,12 @@ static void create_pca9552(AspeedSoCState *soc, int bus_id, int addr)
TYPE_PCA9552, addr);
}
+static I2CSlave *create_pca9554(AspeedSoCState *soc, int bus_id, int addr)
+{
+ return i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, bus_id),
+ TYPE_PCA9554, addr);
+}
+
static void sonorapass_bmc_i2c_init(AspeedMachineState *bmc)
{
AspeedSoCState *soc = bmc->soc;
@@ -1003,6 +1015,180 @@ static void fuji_bmc_i2c_init(AspeedMachineState *bmc)
}
#define TYPE_TMP421 "tmp421"
+#define TYPE_DS1338 "ds1338"
+
+/* Catalina hardware value */
+#define CATALINA_BMC_HW_STRAP1 0x00002002
+#define CATALINA_BMC_HW_STRAP2 0x00000800
+
+#define CATALINA_BMC_RAM_SIZE ASPEED_RAM_SIZE(2 * GiB)
+
+static void catalina_bmc_i2c_init(AspeedMachineState *bmc)
+{
+ /* Reference from v6.16-rc2 aspeed-bmc-facebook-catalina.dts */
+
+ AspeedSoCState *soc = bmc->soc;
+ I2CBus *i2c[16] = {};
+ I2CSlave *i2c_mux;
+
+ /* busses 0-15 are all used. */
+ for (int i = 0; i < ARRAY_SIZE(i2c); i++) {
+ i2c[i] = aspeed_i2c_get_bus(&soc->i2c, i);
+ }
+
+ /* &i2c0 */
+ /* i2c-mux@71 (PCA9546) on i2c0 */
+ i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x71);
+
+ /* i2c-mux@72 (PCA9546) on i2c0 */
+ i2c_mux = i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x72);
+
+ /* i2c0mux1ch1 */
+ /* io_expander7 - pca9535@20 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 1),
+ TYPE_PCA9552, 0x20);
+ /* eeprom@50 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x50, 8 * KiB);
+
+ /* i2c-mux@73 (PCA9546) on i2c0 */
+ i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x73);
+
+ /* i2c-mux@75 (PCA9546) on i2c0 */
+ i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x75);
+
+ /* i2c-mux@76 (PCA9546) on i2c0 */
+ i2c_mux = i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x76);
+
+ /* i2c0mux4ch1 */
+ /* io_expander8 - pca9535@21 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 1),
+ TYPE_PCA9552, 0x21);
+ /* eeprom@50 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x50, 8 * KiB);
+
+ /* i2c-mux@77 (PCA9546) on i2c0 */
+ i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x77);
+
+
+ /* &i2c1 */
+ /* i2c-mux@70 (PCA9548) on i2c1 */
+ i2c_mux = i2c_slave_create_simple(i2c[1], TYPE_PCA9548, 0x70);
+ /* i2c1mux0ch0 */
+ /* ina238@41 - no model */
+ /* ina238@42 - no model */
+ /* ina238@44 - no model */
+ /* i2c1mux0ch1 */
+ /* ina238@41 - no model */
+ /* ina238@43 - no model */
+ /* i2c1mux0ch4 */
+ /* ltc4287@42 - no model */
+ /* ltc4287@43 - no model */
+
+ /* i2c1mux0ch5 */
+ /* eeprom@54 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 5), 0x54, 8 * KiB);
+ /* tpm75@4f */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 5), TYPE_TMP75, 0x4f);
+
+ /* i2c1mux0ch6 */
+ /* io_expander5 - pca9554@27 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 6),
+ TYPE_PCA9554, 0x27);
+ /* io_expander6 - pca9555@25 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 6),
+ TYPE_PCA9552, 0x25);
+ /* eeprom@51 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 6), 0x51, 8 * KiB);
+
+ /* i2c1mux0ch7 */
+ /* eeprom@53 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 7), 0x53, 8 * KiB);
+ /* temperature-sensor@4b - tmp75 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 7), TYPE_TMP75, 0x4b);
+
+ /* &i2c2 */
+ /* io_expander0 - pca9555@20 */
+ i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x20);
+ /* io_expander0 - pca9555@21 */
+ i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x21);
+ /* io_expander0 - pca9555@27 */
+ i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x27);
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[2], 0x50, 8 * KiB);
+ /* eeprom@51 */
+ at24c_eeprom_init(i2c[2], 0x51, 8 * KiB);
+
+ /* &i2c5 */
+ /* i2c-mux@70 (PCA9548) on i2c5 */
+ i2c_mux = i2c_slave_create_simple(i2c[5], TYPE_PCA9548, 0x70);
+ /* i2c5mux0ch6 */
+ /* eeprom@52 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 6), 0x52, 8 * KiB);
+ /* i2c5mux0ch7 */
+ /* ina230@40 - no model */
+ /* ina230@41 - no model */
+ /* ina230@44 - no model */
+ /* ina230@45 - no model */
+
+ /* &i2c6 */
+ /* io_expander3 - pca9555@21 */
+ i2c_slave_create_simple(i2c[6], TYPE_PCA9552, 0x21);
+ /* rtc@6f - nct3018y */
+ i2c_slave_create_simple(i2c[6], TYPE_DS1338, 0x6f);
+
+ /* &i2c9 */
+ /* io_expander4 - pca9555@4f */
+ i2c_slave_create_simple(i2c[9], TYPE_PCA9552, 0x4f);
+ /* temperature-sensor@4b - tpm75 */
+ i2c_slave_create_simple(i2c[9], TYPE_TMP75, 0x4b);
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[9], 0x50, 8 * KiB);
+ /* eeprom@56 */
+ at24c_eeprom_init(i2c[9], 0x56, 8 * KiB);
+
+ /* &i2c10 */
+ /* temperature-sensor@1f - tpm421 */
+ i2c_slave_create_simple(i2c[10], TYPE_TMP421, 0x1f);
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[10], 0x50, 8 * KiB);
+
+ /* &i2c11 */
+ /* ssif-bmc@10 - no model */
+
+ /* &i2c12 */
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[12], 0x50, 8 * KiB);
+
+ /* &i2c13 */
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[13], 0x50, 8 * KiB);
+ /* eeprom@54 */
+ at24c_eeprom_init(i2c[13], 0x54, 256);
+ /* eeprom@55 */
+ at24c_eeprom_init(i2c[13], 0x55, 256);
+ /* eeprom@57 */
+ at24c_eeprom_init(i2c[13], 0x57, 256);
+
+ /* &i2c14 */
+ /* io_expander9 - pca9555@10 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x10);
+ /* io_expander10 - pca9555@11 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x11);
+ /* io_expander11 - pca9555@12 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x12);
+ /* io_expander12 - pca9555@13 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x13);
+ /* io_expander13 - pca9555@14 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x14);
+ /* io_expander14 - pca9555@15 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x15);
+
+ /* &i2c15 */
+ /* temperature-sensor@1f - tmp421 */
+ i2c_slave_create_simple(i2c[15], TYPE_TMP421, 0x1f);
+ /* eeprom@52 */
+ at24c_eeprom_init(i2c[15], 0x52, 8 * KiB);
+}
static void bletchley_bmc_i2c_init(AspeedMachineState *bmc)
{
@@ -1050,6 +1236,45 @@ static void bletchley_bmc_i2c_init(AspeedMachineState *bmc)
i2c_slave_create_simple(i2c[12], TYPE_PCA9552, 0x67);
}
+
+static void gb200nvl_bmc_i2c_init(AspeedMachineState *bmc)
+{
+ AspeedSoCState *soc = bmc->soc;
+ I2CBus *i2c[15] = {};
+ DeviceState *dev;
+ for (int i = 0; i < sizeof(i2c) / sizeof(i2c[0]); i++) {
+ if ((i == 11) || (i == 12) || (i == 13)) {
+ continue;
+ }
+ i2c[i] = aspeed_i2c_get_bus(&soc->i2c, i);
+ }
+
+ /* Bus 5 Expander */
+ create_pca9554(soc, 4, 0x21);
+
+ /* Mux I2c Expanders */
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x71);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x72);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x73);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x75);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x76);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x77);
+
+ /* Bus 10 */
+ dev = DEVICE(create_pca9554(soc, 9, 0x20));
+
+ /* Set FPGA_READY */
+ object_property_set_str(OBJECT(dev), "pin1", "high", &error_fatal);
+
+ create_pca9554(soc, 9, 0x21);
+ at24c_eeprom_init(i2c[9], 0x50, 64 * KiB);
+ at24c_eeprom_init(i2c[9], 0x51, 64 * KiB);
+
+ /* Bus 11 */
+ at24c_eeprom_init_rom(i2c[10], 0x50, 256, gb200nvl_bmc_fruid,
+ gb200nvl_bmc_fruid_len);
+}
+
static void fby35_i2c_init(AspeedMachineState *bmc)
{
AspeedSoCState *soc = bmc->soc;
@@ -1585,6 +1810,52 @@ static void aspeed_machine_bletchley_class_init(ObjectClass *oc,
aspeed_machine_class_init_cpus_defaults(mc);
}
+static void aspeed_machine_catalina_class_init(ObjectClass *oc,
+ const void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
+
+ mc->desc = "Facebook Catalina BMC (Cortex-A7)";
+ amc->soc_name = "ast2600-a3";
+ amc->hw_strap1 = CATALINA_BMC_HW_STRAP1;
+ amc->hw_strap2 = CATALINA_BMC_HW_STRAP2;
+ amc->fmc_model = "w25q01jvq";
+ amc->spi_model = NULL;
+ amc->num_cs = 2;
+ amc->macs_mask = ASPEED_MAC2_ON;
+ amc->i2c_init = catalina_bmc_i2c_init;
+ mc->auto_create_sdcard = true;
+ mc->default_ram_size = CATALINA_BMC_RAM_SIZE;
+ aspeed_machine_class_init_cpus_defaults(mc);
+ aspeed_machine_ast2600_class_emmc_init(oc);
+}
+
+#define GB200NVL_BMC_RAM_SIZE ASPEED_RAM_SIZE(1 * GiB)
+
+static void aspeed_machine_gb200nvl_class_init(ObjectClass *oc,
+ const void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
+
+ mc->desc = "Nvidia GB200NVL BMC (Cortex-A7)";
+ amc->soc_name = "ast2600-a3";
+ amc->hw_strap1 = GB200NVL_BMC_HW_STRAP1;
+ amc->hw_strap2 = GB200NVL_BMC_HW_STRAP2;
+ amc->fmc_model = "mx66u51235f";
+ amc->spi_model = "mx66u51235f";
+ amc->num_cs = 2;
+
+ amc->spi2_model = "mx66u51235f";
+ amc->num_cs2 = 1;
+ amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON;
+ amc->i2c_init = gb200nvl_bmc_i2c_init;
+ mc->default_ram_size = GB200NVL_BMC_RAM_SIZE;
+ aspeed_machine_class_init_cpus_defaults(mc);
+ aspeed_machine_ast2600_class_emmc_init(oc);
+}
+
static void fby35_reset(MachineState *state, ResetType type)
{
AspeedMachineState *bmc = ASPEED_MACHINE(state);
@@ -1878,6 +2149,14 @@ static const TypeInfo aspeed_machine_types[] = {
.parent = TYPE_ASPEED_MACHINE,
.class_init = aspeed_machine_bletchley_class_init,
}, {
+ .name = MACHINE_TYPE_NAME("gb200nvl-bmc"),
+ .parent = TYPE_ASPEED_MACHINE,
+ .class_init = aspeed_machine_gb200nvl_class_init,
+ }, {
+ .name = MACHINE_TYPE_NAME("catalina-bmc"),
+ .parent = TYPE_ASPEED_MACHINE,
+ .class_init = aspeed_machine_catalina_class_init,
+ }, {
.name = MACHINE_TYPE_NAME("fby35-bmc"),
.parent = MACHINE_TYPE_NAME("ast2600-evb"),
.class_init = aspeed_machine_fby35_class_init,
diff --git a/hw/arm/aspeed_ast27x0-fc.c b/hw/arm/aspeed_ast27x0-fc.c
index 125a3ad..7087be4 100644
--- a/hw/arm/aspeed_ast27x0-fc.c
+++ b/hw/arm/aspeed_ast27x0-fc.c
@@ -48,7 +48,7 @@ struct Ast2700FCState {
bool mmio_exec;
};
-#define AST2700FC_BMC_RAM_SIZE (2 * GiB)
+#define AST2700FC_BMC_RAM_SIZE (1 * GiB)
#define AST2700FC_CM4_DRAM_SIZE (32 * MiB)
#define AST2700FC_HW_STRAP1 0x000000C0
@@ -68,6 +68,7 @@ static void ast2700fc_ca35_init(MachineState *machine)
memory_region_init(&s->ca35_memory, OBJECT(&s->ca35), "ca35-memory",
UINT64_MAX);
+ memory_region_add_subregion(get_system_memory(), 0, &s->ca35_memory);
if (!memory_region_init_ram(&s->ca35_dram, OBJECT(&s->ca35), "ca35-dram",
AST2700FC_BMC_RAM_SIZE, &error_abort)) {
@@ -86,6 +87,13 @@ static void ast2700fc_ca35_init(MachineState *machine)
AST2700FC_BMC_RAM_SIZE, &error_abort)) {
return;
}
+
+ for (int i = 0; i < sc->macs_num; i++) {
+ if (!qemu_configure_nic_device(DEVICE(&soc->ftgmac100[i]),
+ true, NULL)) {
+ break;
+ }
+ }
if (!object_property_set_int(OBJECT(&s->ca35), "hw-strap1",
AST2700FC_HW_STRAP1, &error_abort)) {
return;
diff --git a/hw/arm/aspeed_ast27x0.c b/hw/arm/aspeed_ast27x0.c
index 1974a25..6aa3841 100644
--- a/hw/arm/aspeed_ast27x0.c
+++ b/hw/arm/aspeed_ast27x0.c
@@ -23,14 +23,14 @@
#include "qobject/qlist.h"
#include "qemu/log.h"
-#define AST2700_SOC_IO_SIZE 0x01000000
+#define AST2700_SOC_IO_SIZE 0x00FE0000
#define AST2700_SOC_IOMEM_SIZE 0x01000000
#define AST2700_SOC_DPMCU_SIZE 0x00040000
#define AST2700_SOC_LTPI_SIZE 0x01000000
static const hwaddr aspeed_soc_ast2700_memmap[] = {
- [ASPEED_DEV_IOMEM] = 0x00000000,
[ASPEED_DEV_VBOOTROM] = 0x00000000,
+ [ASPEED_DEV_IOMEM] = 0x00020000,
[ASPEED_DEV_SRAM] = 0x10000000,
[ASPEED_DEV_DPMCU] = 0x11000000,
[ASPEED_DEV_IOMEM0] = 0x12000000,
@@ -346,8 +346,9 @@ static void aspeed_ram_capacity_write(void *opaque, hwaddr addr, uint64_t data,
* If writes the data to the address which is beyond the ram size,
* it would write the data to the "address % ram_size".
*/
- result = address_space_write(&s->dram_as, addr % ram_size,
- MEMTXATTRS_UNSPECIFIED, &data, 4);
+ address_space_stl_le(&s->dram_as, addr % ram_size, data,
+ MEMTXATTRS_UNSPECIFIED, &result);
+
if (result != MEMTX_OK) {
qemu_log_mask(LOG_GUEST_ERROR,
"%s: DRAM write failed, addr:0x%" HWADDR_PRIx
@@ -360,9 +361,10 @@ static const MemoryRegionOps aspeed_ram_capacity_ops = {
.read = aspeed_ram_capacity_read,
.write = aspeed_ram_capacity_write,
.endianness = DEVICE_LITTLE_ENDIAN,
+ .impl.min_access_size = 4,
.valid = {
- .min_access_size = 1,
- .max_access_size = 8,
+ .min_access_size = 4,
+ .max_access_size = 4,
},
};
diff --git a/hw/arm/aspeed_eeprom.c b/hw/arm/aspeed_eeprom.c
index daa3d32..8bbbdec 100644
--- a/hw/arm/aspeed_eeprom.c
+++ b/hw/arm/aspeed_eeprom.c
@@ -162,6 +162,25 @@ const uint8_t rainier_bmc_fruid[] = {
0x31, 0x50, 0x46, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
};
+const uint8_t gb200nvl_bmc_fruid[] = {
+ 0x01, 0x00, 0x00, 0x01, 0x0b, 0x00, 0x00, 0xf3, 0x01, 0x0a, 0x19, 0x1f,
+ 0x0f, 0xe6, 0xc6, 0x4e, 0x56, 0x49, 0x44, 0x49, 0x41, 0xc5, 0x50, 0x33,
+ 0x38, 0x30, 0x39, 0xcd, 0x31, 0x35, 0x38, 0x33, 0x33, 0x32, 0x34, 0x38,
+ 0x30, 0x30, 0x31, 0x35, 0x30, 0xd2, 0x36, 0x39, 0x39, 0x2d, 0x31, 0x33,
+ 0x38, 0x30, 0x39, 0x2d, 0x30, 0x34, 0x30, 0x34, 0x2d, 0x36, 0x30, 0x30,
+ 0xc0, 0x01, 0x01, 0xd6, 0x4d, 0x41, 0x43, 0x3a, 0x20, 0x33, 0x43, 0x3a,
+ 0x36, 0x44, 0x3a, 0x36, 0x36, 0x3a, 0x31, 0x34, 0x3a, 0x43, 0x38, 0x3a,
+ 0x37, 0x41, 0xc1, 0x3b, 0x01, 0x09, 0x19, 0xc6, 0x4e, 0x56, 0x49, 0x44,
+ 0x49, 0x41, 0xc9, 0x50, 0x33, 0x38, 0x30, 0x39, 0x2d, 0x42, 0x4d, 0x43,
+ 0xd2, 0x36, 0x39, 0x39, 0x2d, 0x31, 0x33, 0x38, 0x30, 0x39, 0x2d, 0x30,
+ 0x34, 0x30, 0x34, 0x2d, 0x36, 0x30, 0x30, 0xc4, 0x41, 0x45, 0x2e, 0x31,
+ 0xcd, 0x31, 0x35, 0x38, 0x33, 0x33, 0x32, 0x34, 0x38, 0x30, 0x30, 0x31,
+ 0x35, 0x30, 0xc0, 0xc4, 0x76, 0x30, 0x2e, 0x31, 0xc1, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+
+};
+
const size_t tiogapass_bmc_fruid_len = sizeof(tiogapass_bmc_fruid);
const size_t fby35_nic_fruid_len = sizeof(fby35_nic_fruid);
const size_t fby35_bb_fruid_len = sizeof(fby35_bb_fruid);
@@ -169,3 +188,5 @@ const size_t fby35_bmc_fruid_len = sizeof(fby35_bmc_fruid);
const size_t yosemitev2_bmc_fruid_len = sizeof(yosemitev2_bmc_fruid);
const size_t rainier_bb_fruid_len = sizeof(rainier_bb_fruid);
const size_t rainier_bmc_fruid_len = sizeof(rainier_bmc_fruid);
+const size_t gb200nvl_bmc_fruid_len = sizeof(gb200nvl_bmc_fruid);
+
diff --git a/hw/arm/aspeed_eeprom.h b/hw/arm/aspeed_eeprom.h
index f08c16e..3ed9bc1 100644
--- a/hw/arm/aspeed_eeprom.h
+++ b/hw/arm/aspeed_eeprom.h
@@ -26,4 +26,7 @@ extern const size_t rainier_bb_fruid_len;
extern const uint8_t rainier_bmc_fruid[];
extern const size_t rainier_bmc_fruid_len;
+extern const uint8_t gb200nvl_bmc_fruid[];
+extern const size_t gb200nvl_bmc_fruid_len;
+
#endif
diff --git a/hw/arm/boot.c b/hw/arm/boot.c
index f94b940..becd827 100644
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -19,6 +19,7 @@
#include "system/kvm.h"
#include "system/tcg.h"
#include "system/system.h"
+#include "system/memory.h"
#include "system/numa.h"
#include "hw/boards.h"
#include "system/reset.h"
@@ -526,7 +527,7 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo,
if (binfo->dtb_filename) {
char *filename;
- filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, binfo->dtb_filename);
+ filename = qemu_find_file(QEMU_FILE_TYPE_DTB, binfo->dtb_filename);
if (!filename) {
fprintf(stderr, "Couldn't open dtb file %s\n", binfo->dtb_filename);
goto fail;
@@ -743,7 +744,7 @@ static void do_cpu_reset(void *opaque)
} else {
if (arm_feature(env, ARM_FEATURE_EL3) &&
(info->secure_boot ||
- (info->secure_board_setup && cs == first_cpu))) {
+ (info->secure_board_setup && cpu == info->primary_cpu))) {
/* Start this CPU in Secure SVC */
target_el = 3;
}
@@ -751,7 +752,7 @@ static void do_cpu_reset(void *opaque)
arm_emulate_firmware_reset(cs, target_el);
- if (cs == first_cpu) {
+ if (cpu == info->primary_cpu) {
AddressSpace *as = arm_boot_address_space(cpu, info);
cpu_set_pc(cs, info->loader_start);
@@ -1238,6 +1239,9 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info)
info->dtb_filename = ms->dtb;
info->dtb_limit = 0;
+ /* We assume the CPU passed as argument is the primary CPU. */
+ info->primary_cpu = cpu;
+
/* Load the kernel. */
if (!info->kernel_filename || info->firmware_loaded) {
arm_setup_firmware_boot(cpu, info);
@@ -1287,12 +1291,8 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info)
object_property_set_int(cpuobj, "psci-conduit", info->psci_conduit,
&error_abort);
- /*
- * Secondary CPUs start in PSCI powered-down state. Like the
- * code in do_cpu_reset(), we assume first_cpu is the primary
- * CPU.
- */
- if (cs != first_cpu) {
+ /* Secondary CPUs start in PSCI powered-down state. */
+ if (ARM_CPU(cs) != info->primary_cpu) {
object_property_set_bool(cpuobj, "start-powered-off", true,
&error_abort);
}
diff --git a/hw/arm/fby35.c b/hw/arm/fby35.c
index e123fa6..c14fc2e 100644
--- a/hw/arm/fby35.c
+++ b/hw/arm/fby35.c
@@ -77,6 +77,7 @@ static void fby35_bmc_init(Fby35State *s)
memory_region_init(&s->bmc_memory, OBJECT(&s->bmc), "bmc-memory",
UINT64_MAX);
+ memory_region_add_subregion(get_system_memory(), 0, &s->bmc_memory);
memory_region_init_ram(&s->bmc_dram, OBJECT(&s->bmc), "bmc-dram",
FBY35_BMC_RAM_SIZE, &error_abort);
diff --git a/hw/arm/meson.build b/hw/arm/meson.build
index 5098795..d90be8f 100644
--- a/hw/arm/meson.build
+++ b/hw/arm/meson.build
@@ -8,7 +8,7 @@ arm_common_ss.add(when: 'CONFIG_HIGHBANK', if_true: files('highbank.c'))
arm_common_ss.add(when: 'CONFIG_INTEGRATOR', if_true: files('integratorcp.c'))
arm_common_ss.add(when: 'CONFIG_MICROBIT', if_true: files('microbit.c'))
arm_common_ss.add(when: 'CONFIG_MPS3R', if_true: files('mps3r.c'))
-arm_common_ss.add(when: 'CONFIG_MUSICPAL', if_true: [pixman, files('musicpal.c')])
+arm_common_ss.add(when: 'CONFIG_MUSICPAL', if_true: [files('musicpal.c')])
arm_common_ss.add(when: 'CONFIG_NETDUINOPLUS2', if_true: files('netduinoplus2.c'))
arm_common_ss.add(when: 'CONFIG_OLIMEX_STM32_H405', if_true: files('olimex-stm32-h405.c'))
arm_common_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx.c', 'npcm7xx_boards.c'))
@@ -79,7 +79,7 @@ arm_common_ss.add(when: 'CONFIG_SX1', if_true: files('omap_sx1.c'))
arm_common_ss.add(when: 'CONFIG_VERSATILE', if_true: files('versatilepb.c'))
arm_common_ss.add(when: 'CONFIG_VEXPRESS', if_true: files('vexpress.c'))
-arm_common_ss.add(fdt, files('boot.c'))
+arm_common_ss.add(files('boot.c'))
hw_arch += {'arm': arm_ss}
hw_common_arch += {'arm': arm_common_ss}
diff --git a/hw/arm/mps2.c b/hw/arm/mps2.c
index 58efb41..bd378e3 100644
--- a/hw/arm/mps2.c
+++ b/hw/arm/mps2.c
@@ -224,7 +224,11 @@ static void mps2_common_init(MachineState *machine)
switch (mmc->fpga_type) {
case FPGA_AN385:
case FPGA_AN386:
+ qdev_prop_set_uint32(armv7m, "num-irq", 32);
+ break;
case FPGA_AN500:
+ /* The AN500 configures its Cortex-M7 with 16 MPU regions */
+ qdev_prop_set_uint32(armv7m, "mpu-ns-regions", 16);
qdev_prop_set_uint32(armv7m, "num-irq", 32);
break;
case FPGA_AN511:
diff --git a/hw/arm/npcm8xx.c b/hw/arm/npcm8xx.c
index d7ee306..a276fea 100644
--- a/hw/arm/npcm8xx.c
+++ b/hw/arm/npcm8xx.c
@@ -67,6 +67,9 @@
/* SDHCI Modules */
#define NPCM8XX_MMC_BA 0xf0842000
+/* PCS Module */
+#define NPCM8XX_PCS_BA 0xf0780000
+
/* PSPI Modules */
#define NPCM8XX_PSPI_BA 0xf0201000
@@ -85,6 +88,10 @@ enum NPCM8xxInterrupt {
NPCM8XX_ADC_IRQ = 0,
NPCM8XX_PECI_IRQ = 6,
NPCM8XX_KCS_HIB_IRQ = 9,
+ NPCM8XX_GMAC1_IRQ = 14,
+ NPCM8XX_GMAC2_IRQ,
+ NPCM8XX_GMAC3_IRQ,
+ NPCM8XX_GMAC4_IRQ,
NPCM8XX_MMC_IRQ = 26,
NPCM8XX_PSPI_IRQ = 28,
NPCM8XX_TIMER0_IRQ = 32, /* Timer Module 0 */
@@ -260,6 +267,14 @@ static const hwaddr npcm8xx_smbus_addr[] = {
0xfff0a000,
};
+/* Register base address for each GMAC Module */
+static const hwaddr npcm8xx_gmac_addr[] = {
+ 0xf0802000,
+ 0xf0804000,
+ 0xf0806000,
+ 0xf0808000,
+};
+
/* Register base address for each USB host EHCI registers */
static const hwaddr npcm8xx_ehci_addr[] = {
0xf0828100,
@@ -350,6 +365,7 @@ static struct arm_boot_info npcm8xx_binfo = {
.secure_boot = false,
.board_id = -1,
.board_setup_addr = NPCM8XX_BOARD_SETUP_ADDR,
+ .psci_conduit = QEMU_PSCI_CONDUIT_SMC,
};
void npcm8xx_load_kernel(MachineState *machine, NPCM8xxState *soc)
@@ -444,6 +460,11 @@ static void npcm8xx_init(Object *obj)
object_initialize_child(obj, "mft[*]", &s->mft[i], TYPE_NPCM7XX_MFT);
}
+ for (i = 0; i < ARRAY_SIZE(s->gmac); i++) {
+ object_initialize_child(obj, "gmac[*]", &s->gmac[i], TYPE_NPCM_GMAC);
+ }
+ object_initialize_child(obj, "pcs", &s->pcs, TYPE_NPCM_PCS);
+
object_initialize_child(obj, "mmc", &s->mmc, TYPE_NPCM7XX_SDHCI);
object_initialize_child(obj, "pspi", &s->pspi, TYPE_NPCM_PSPI);
}
@@ -669,6 +690,35 @@ static void npcm8xx_realize(DeviceState *dev, Error **errp)
}
/*
+ * GMAC Modules. Cannot fail.
+ */
+ QEMU_BUILD_BUG_ON(ARRAY_SIZE(npcm8xx_gmac_addr) != ARRAY_SIZE(s->gmac));
+ for (i = 0; i < ARRAY_SIZE(s->gmac); i++) {
+ SysBusDevice *sbd = SYS_BUS_DEVICE(&s->gmac[i]);
+
+ /* This is used to make sure that the NIC can create the device */
+ qemu_configure_nic_device(DEVICE(sbd), false, NULL);
+
+ /*
+ * The device exists regardless of whether it's connected to a QEMU
+ * netdev backend. So always instantiate it even if there is no
+ * backend.
+ */
+ sysbus_realize(sbd, &error_abort);
+ sysbus_mmio_map(sbd, 0, npcm8xx_gmac_addr[i]);
+ /*
+ * N.B. The values for the second argument sysbus_connect_irq are
+ * chosen to match the registration order in npcm7xx_emc_realize.
+ */
+ sysbus_connect_irq(sbd, 0, npcm8xx_irq(s, NPCM8XX_GMAC1_IRQ + i));
+ }
+ /*
+ * GMAC Physical Coding Sublayer(PCS) Module. Cannot fail.
+ */
+ sysbus_realize(SYS_BUS_DEVICE(&s->pcs), &error_abort);
+ sysbus_mmio_map(SYS_BUS_DEVICE(&s->pcs), 0, NPCM8XX_PCS_BA);
+
+ /*
* Flash Interface Unit (FIU). Can fail if incorrect number of chip selects
* specified, but this is a programming error.
*/
@@ -741,12 +791,7 @@ static void npcm8xx_realize(DeviceState *dev, Error **errp)
create_unimplemented_device("npcm8xx.ahbpci", 0xf0400000, 1 * MiB);
create_unimplemented_device("npcm8xx.dap", 0xf0500000, 960 * KiB);
create_unimplemented_device("npcm8xx.mcphy", 0xf05f0000, 64 * KiB);
- create_unimplemented_device("npcm8xx.pcs", 0xf0780000, 256 * KiB);
create_unimplemented_device("npcm8xx.tsgen", 0xf07fc000, 8 * KiB);
- create_unimplemented_device("npcm8xx.gmac1", 0xf0802000, 8 * KiB);
- create_unimplemented_device("npcm8xx.gmac2", 0xf0804000, 8 * KiB);
- create_unimplemented_device("npcm8xx.gmac3", 0xf0806000, 8 * KiB);
- create_unimplemented_device("npcm8xx.gmac4", 0xf0808000, 8 * KiB);
create_unimplemented_device("npcm8xx.copctl", 0xf080c000, 4 * KiB);
create_unimplemented_device("npcm8xx.tipctl", 0xf080d000, 4 * KiB);
create_unimplemented_device("npcm8xx.rst", 0xf080e000, 4 * KiB);
diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c
index deae5cf..15c1ff4 100644
--- a/hw/arm/sbsa-ref.c
+++ b/hw/arm/sbsa-ref.c
@@ -19,6 +19,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/cutils.h"
#include "qemu/datadir.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
@@ -53,8 +54,7 @@
#include "target/arm/cpu-qom.h"
#include "target/arm/gtimer.h"
-#define RAMLIMIT_GB 8192
-#define RAMLIMIT_BYTES (RAMLIMIT_GB * GiB)
+#define RAMLIMIT_BYTES (8 * TiB)
#define NUM_IRQS 256
#define NUM_SMMU_IRQS 4
@@ -756,7 +756,9 @@ static void sbsa_ref_init(MachineState *machine)
sms->smp_cpus = smp_cpus;
if (machine->ram_size > sbsa_ref_memmap[SBSA_MEM].size) {
- error_report("sbsa-ref: cannot model more than %dGB RAM", RAMLIMIT_GB);
+ char *size_str = size_to_str(RAMLIMIT_BYTES);
+
+ error_report("sbsa-ref: cannot model more than %s of RAM", size_str);
exit(1);
}
diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 7e8e0f0..cd90c47 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -266,6 +266,43 @@ static int iort_idmap_compare(gconstpointer a, gconstpointer b)
return idmap_a->input_base - idmap_b->input_base;
}
+/* Compute ID ranges (RIDs) from RC that are directed to the ITS Group node */
+static void create_rc_its_idmaps(GArray *its_idmaps, GArray *smmu_idmaps)
+{
+ AcpiIortIdMapping *idmap;
+ AcpiIortIdMapping next_range = {0};
+
+ /*
+ * Based on the RID ranges that are directed to the SMMU, determine the
+ * bypassed RID ranges, i.e., the ones that are directed to the ITS Group
+ * node and do not pass through the SMMU, by subtracting the SMMU-bound
+ * ranges from the full RID range (0x0000–0xFFFF).
+ */
+ for (int i = 0; i < smmu_idmaps->len; i++) {
+ idmap = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i);
+
+ if (next_range.input_base < idmap->input_base) {
+ next_range.id_count = idmap->input_base - next_range.input_base;
+ g_array_append_val(its_idmaps, next_range);
+ }
+
+ next_range.input_base = idmap->input_base + idmap->id_count;
+ }
+
+ /*
+ * Append the last RC -> ITS ID mapping.
+ *
+ * RIDs are 16-bit, according to the PCI Express 2.0 Base Specification, rev
+ * 0.9, section 2.2.6.2, "Transaction Descriptor - Transaction ID Field",
+ * hence the end of the range is 0x10000.
+ */
+ if (next_range.input_base < 0x10000) {
+ next_range.id_count = 0x10000 - next_range.input_base;
+ g_array_append_val(its_idmaps, next_range);
+ }
+}
+
+
/*
* Input Output Remapping Table (IORT)
* Conforms to "IO Remapping Table System Software on ARM Platforms",
@@ -276,10 +313,9 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
{
int i, nb_nodes, rc_mapping_count;
size_t node_size, smmu_offset = 0;
- AcpiIortIdMapping *idmap;
uint32_t id = 0;
- GArray *smmu_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
- GArray *its_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
+ GArray *rc_smmu_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
+ GArray *rc_its_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
AcpiTable table = { .sig = "IORT", .rev = 3, .oem_id = vms->oem_id,
.oem_table_id = vms->oem_table_id };
@@ -287,40 +323,39 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
acpi_table_begin(&table, table_data);
if (vms->iommu == VIRT_IOMMU_SMMUV3) {
- AcpiIortIdMapping next_range = {0};
-
object_child_foreach_recursive(object_get_root(),
- iort_host_bridges, smmu_idmaps);
+ iort_host_bridges, rc_smmu_idmaps);
/* Sort the smmu idmap by input_base */
- g_array_sort(smmu_idmaps, iort_idmap_compare);
+ g_array_sort(rc_smmu_idmaps, iort_idmap_compare);
/*
- * Split the whole RIDs by mapping from RC to SMMU,
- * build the ID mapping from RC to ITS directly.
+ * Knowing the ID ranges from the RC to the SMMU, it's possible to
+ * determine the ID ranges from RC that are directed to the ITS.
*/
- for (i = 0; i < smmu_idmaps->len; i++) {
- idmap = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i);
+ create_rc_its_idmaps(rc_its_idmaps, rc_smmu_idmaps);
- if (next_range.input_base < idmap->input_base) {
- next_range.id_count = idmap->input_base - next_range.input_base;
- g_array_append_val(its_idmaps, next_range);
- }
+ nb_nodes = 2; /* RC and SMMUv3 */
+ rc_mapping_count = rc_smmu_idmaps->len;
- next_range.input_base = idmap->input_base + idmap->id_count;
- }
+ if (vms->its) {
+ /*
+ * Knowing the ID ranges from the RC to the SMMU, it's possible to
+ * determine the ID ranges from RC that go directly to ITS.
+ */
+ create_rc_its_idmaps(rc_its_idmaps, rc_smmu_idmaps);
- /* Append the last RC -> ITS ID mapping */
- if (next_range.input_base < 0x10000) {
- next_range.id_count = 0x10000 - next_range.input_base;
- g_array_append_val(its_idmaps, next_range);
+ nb_nodes++; /* ITS */
+ rc_mapping_count += rc_its_idmaps->len;
}
-
- nb_nodes = 3; /* RC, ITS, SMMUv3 */
- rc_mapping_count = smmu_idmaps->len + its_idmaps->len;
} else {
- nb_nodes = 2; /* RC, ITS */
- rc_mapping_count = 1;
+ if (vms->its) {
+ nb_nodes = 2; /* RC and ITS */
+ rc_mapping_count = 1; /* Direct map to ITS */
+ } else {
+ nb_nodes = 1; /* RC only */
+ rc_mapping_count = 0; /* No output mapping */
+ }
}
/* Number of IORT Nodes */
build_append_int_noprefix(table_data, nb_nodes, 4);
@@ -329,31 +364,43 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
build_append_int_noprefix(table_data, IORT_NODE_OFFSET, 4);
build_append_int_noprefix(table_data, 0, 4); /* Reserved */
- /* Table 12 ITS Group Format */
- build_append_int_noprefix(table_data, 0 /* ITS Group */, 1); /* Type */
- node_size = 20 /* fixed header size */ + 4 /* 1 GIC ITS Identifier */;
- build_append_int_noprefix(table_data, node_size, 2); /* Length */
- build_append_int_noprefix(table_data, 1, 1); /* Revision */
- build_append_int_noprefix(table_data, id++, 4); /* Identifier */
- build_append_int_noprefix(table_data, 0, 4); /* Number of ID mappings */
- build_append_int_noprefix(table_data, 0, 4); /* Reference to ID Array */
- build_append_int_noprefix(table_data, 1, 4); /* Number of ITSs */
- /* GIC ITS Identifier Array */
- build_append_int_noprefix(table_data, 0 /* MADT translation_id */, 4);
+ if (vms->its) {
+ /* Table 12 ITS Group Format */
+ build_append_int_noprefix(table_data, 0 /* ITS Group */, 1); /* Type */
+ node_size = 20 /* fixed header size */ + 4 /* 1 GIC ITS Identifier */;
+ build_append_int_noprefix(table_data, node_size, 2); /* Length */
+ build_append_int_noprefix(table_data, 1, 1); /* Revision */
+ build_append_int_noprefix(table_data, id++, 4); /* Identifier */
+ build_append_int_noprefix(table_data, 0, 4); /* Number of ID mappings */
+ build_append_int_noprefix(table_data, 0, 4); /* Reference to ID Array */
+ build_append_int_noprefix(table_data, 1, 4); /* Number of ITSs */
+ /* GIC ITS Identifier Array */
+ build_append_int_noprefix(table_data, 0 /* MADT translation_id */, 4);
+ }
if (vms->iommu == VIRT_IOMMU_SMMUV3) {
int irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE;
-
+ int smmu_mapping_count, offset_to_id_array;
+
+ if (vms->its) {
+ smmu_mapping_count = 1; /* ITS Group node */
+ offset_to_id_array = SMMU_V3_ENTRY_SIZE; /* Just after the header */
+ } else {
+ smmu_mapping_count = 0; /* No ID mappings */
+ offset_to_id_array = 0; /* No ID mappings array */
+ }
smmu_offset = table_data->len - table.table_offset;
/* Table 9 SMMUv3 Format */
build_append_int_noprefix(table_data, 4 /* SMMUv3 */, 1); /* Type */
- node_size = SMMU_V3_ENTRY_SIZE + ID_MAPPING_ENTRY_SIZE;
+ node_size = SMMU_V3_ENTRY_SIZE +
+ (ID_MAPPING_ENTRY_SIZE * smmu_mapping_count);
build_append_int_noprefix(table_data, node_size, 2); /* Length */
build_append_int_noprefix(table_data, 4, 1); /* Revision */
build_append_int_noprefix(table_data, id++, 4); /* Identifier */
- build_append_int_noprefix(table_data, 1, 4); /* Number of ID mappings */
+ /* Number of ID mappings */
+ build_append_int_noprefix(table_data, smmu_mapping_count, 4);
/* Reference to ID Array */
- build_append_int_noprefix(table_data, SMMU_V3_ENTRY_SIZE, 4);
+ build_append_int_noprefix(table_data, offset_to_id_array, 4);
/* Base address */
build_append_int_noprefix(table_data, vms->memmap[VIRT_SMMU].base, 8);
/* Flags */
@@ -369,9 +416,11 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
build_append_int_noprefix(table_data, 0, 4); /* Proximity domain */
/* DeviceID mapping index (ignored since interrupts are GSIV based) */
build_append_int_noprefix(table_data, 0, 4);
-
- /* output IORT node is the ITS group node (the first node) */
- build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET);
+ /* Array of ID mappings */
+ if (smmu_mapping_count) {
+ /* Output IORT node is the ITS Group node (the first node). */
+ build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET);
+ }
}
/* Table 17 Root Complex Node */
@@ -407,29 +456,44 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
if (vms->iommu == VIRT_IOMMU_SMMUV3) {
AcpiIortIdMapping *range;
- /* translated RIDs connect to SMMUv3 node: RC -> SMMUv3 -> ITS */
- for (i = 0; i < smmu_idmaps->len; i++) {
- range = &g_array_index(smmu_idmaps, AcpiIortIdMapping, i);
- /* output IORT node is the smmuv3 node */
+ /*
+ * Map RIDs (input) from RC to SMMUv3 nodes: RC -> SMMUv3.
+ *
+ * N.B.: The mapping from SMMUv3 to ITS Group node (SMMUv3 -> ITS) is
+ * defined in the SMMUv3 table, where all SMMUv3 IDs are mapped to the
+ * ITS Group node, if ITS is available.
+ */
+ for (i = 0; i < rc_smmu_idmaps->len; i++) {
+ range = &g_array_index(rc_smmu_idmaps, AcpiIortIdMapping, i);
+ /* Output IORT node is the SMMUv3 node. */
build_iort_id_mapping(table_data, range->input_base,
range->id_count, smmu_offset);
}
- /* bypassed RIDs connect to ITS group node directly: RC -> ITS */
- for (i = 0; i < its_idmaps->len; i++) {
- range = &g_array_index(its_idmaps, AcpiIortIdMapping, i);
- /* output IORT node is the ITS group node (the first node) */
- build_iort_id_mapping(table_data, range->input_base,
- range->id_count, IORT_NODE_OFFSET);
+ if (vms->its) {
+ /*
+ * Map bypassed (don't go through the SMMU) RIDs (input) to
+ * ITS Group node directly: RC -> ITS.
+ */
+ for (i = 0; i < rc_its_idmaps->len; i++) {
+ range = &g_array_index(rc_its_idmaps, AcpiIortIdMapping, i);
+ /* Output IORT node is the ITS Group node (the first node). */
+ build_iort_id_mapping(table_data, range->input_base,
+ range->id_count, IORT_NODE_OFFSET);
+ }
}
} else {
- /* output IORT node is the ITS group node (the first node) */
+ /*
+ * Map all RIDs (input) to ITS Group node directly, since there is no
+ * SMMU: RC -> ITS.
+ * Output IORT node is the ITS Group node (the first node).
+ */
build_iort_id_mapping(table_data, 0, 0x10000, IORT_NODE_OFFSET);
}
acpi_table_end(linker, &table);
- g_array_free(smmu_idmaps, true);
- g_array_free(its_idmaps, true);
+ g_array_free(rc_smmu_idmaps, true);
+ g_array_free(rc_its_idmaps, true);
}
/*
@@ -737,7 +801,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
memmap[VIRT_HIGH_GIC_REDIST2].size);
}
- if (its_class_name()) {
+ if (vms->its) {
/*
* ACPI spec, Revision 6.0 Errata A
* (original 6.0 definition has invalid Length)
@@ -969,10 +1033,8 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
vms->oem_table_id);
}
- if (its_class_name()) {
- acpi_add_table(table_offsets, tables_blob);
- build_iort(tables_blob, tables->linker, vms);
- }
+ acpi_add_table(table_offsets, tables_blob);
+ build_iort(tables_blob, tables->linker, vms);
#ifdef CONFIG_TPM
if (tpm_get_version(tpm_find()) == TPM_VERSION_2_0) {
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 9a6cd08..3bcdf92 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -705,21 +705,18 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms)
static void create_its(VirtMachineState *vms)
{
- const char *itsclass = its_class_name();
DeviceState *dev;
- if (!strcmp(itsclass, "arm-gicv3-its")) {
- if (!vms->tcg_its) {
- itsclass = NULL;
- }
- }
-
- if (!itsclass) {
- /* Do nothing if not supported */
+ assert(vms->its);
+ if (!kvm_irqchip_in_kernel() && !vms->tcg_its) {
+ /*
+ * Do nothing if ITS is neither supported by the host nor emulated by
+ * the machine.
+ */
return;
}
- dev = qdev_new(itsclass);
+ dev = qdev_new(its_class_name());
object_property_set_link(OBJECT(dev), "parent-gicv3", OBJECT(vms->gic),
&error_abort);
@@ -1487,9 +1484,12 @@ static void create_virtio_iommu_dt_bindings(VirtMachineState *vms)
qemu_fdt_setprop_cell(ms->fdt, node, "phandle", vms->iommu_phandle);
g_free(node);
- qemu_fdt_setprop_cells(ms->fdt, vms->pciehb_nodename, "iommu-map",
- 0x0, vms->iommu_phandle, 0x0, bdf,
- bdf + 1, vms->iommu_phandle, bdf + 1, 0xffff - bdf);
+ if (!vms->default_bus_bypass_iommu) {
+ qemu_fdt_setprop_cells(ms->fdt, vms->pciehb_nodename, "iommu-map",
+ 0x0, vms->iommu_phandle, 0x0, bdf,
+ bdf + 1, vms->iommu_phandle, bdf + 1,
+ 0xffff - bdf);
+ }
}
static void create_pcie(VirtMachineState *vms)
@@ -1612,8 +1612,10 @@ static void create_pcie(VirtMachineState *vms)
switch (vms->iommu) {
case VIRT_IOMMU_SMMUV3:
create_smmu(vms, vms->bus);
- qemu_fdt_setprop_cells(ms->fdt, nodename, "iommu-map",
- 0x0, vms->iommu_phandle, 0x0, 0x10000);
+ if (!vms->default_bus_bypass_iommu) {
+ qemu_fdt_setprop_cells(ms->fdt, nodename, "iommu-map",
+ 0x0, vms->iommu_phandle, 0x0, 0x10000);
+ }
break;
default:
g_assert_not_reached();
@@ -2024,10 +2026,11 @@ static void finalize_gic_version(VirtMachineState *vms)
}
/*
- * virt_cpu_post_init() must be called after the CPUs have
- * been realized and the GIC has been created.
+ * virt_post_cpus_gic_realized() must be called after the CPUs and
+ * the GIC have both been realized.
*/
-static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem)
+static void virt_post_cpus_gic_realized(VirtMachineState *vms,
+ MemoryRegion *sysmem)
{
int max_cpus = MACHINE(vms)->smp.max_cpus;
bool aarch64, pmu, steal_time;
@@ -2198,14 +2201,14 @@ static void machvirt_init(MachineState *machine)
exit(1);
}
- if (vms->secure && (kvm_enabled() || hvf_enabled())) {
+ if (vms->secure && !tcg_enabled() && !qtest_enabled()) {
error_report("mach-virt: %s does not support providing "
"Security extensions (TrustZone) to the guest CPU",
current_accel_name());
exit(1);
}
- if (vms->virt && (kvm_enabled() || hvf_enabled())) {
+ if (vms->virt && !tcg_enabled() && !qtest_enabled()) {
error_report("mach-virt: %s does not support providing "
"Virtualization extensions to the guest CPU",
current_accel_name());
@@ -2344,7 +2347,7 @@ static void machvirt_init(MachineState *machine)
create_gic(vms, sysmem);
- virt_cpu_post_init(vms, sysmem);
+ virt_post_cpus_gic_realized(vms, sysmem);
fdt_add_pmu_nodes(vms);
@@ -3337,12 +3340,8 @@ static void virt_instance_init(Object *obj)
/* Default allows ITS instantiation */
vms->its = true;
-
- if (vmc->no_tcg_its) {
- vms->tcg_its = false;
- } else {
- vms->tcg_its = true;
- }
+ /* Allow ITS emulation if the machine version supports it */
+ vms->tcg_its = !vmc->no_tcg_its;
/* Default disallows iommu instantiation */
vms->iommu = VIRT_IOMMU_NONE;
diff --git a/hw/audio/ac97.c b/hw/audio/ac97.c
index 669a046..eb7a847 100644
--- a/hw/audio/ac97.c
+++ b/hw/audio/ac97.c
@@ -886,7 +886,7 @@ static void nabm_writel(void *opaque, uint32_t addr, uint32_t val)
static int write_audio(AC97LinkState *s, AC97BusMasterRegs *r,
int max, int *stop)
{
- uint8_t tmpbuf[4096];
+ QEMU_UNINITIALIZED uint8_t tmpbuf[4096];
uint32_t addr = r->bd.addr;
uint32_t temp = r->picb << 1;
uint32_t written = 0;
@@ -959,7 +959,7 @@ static void write_bup(AC97LinkState *s, int elapsed)
static int read_audio(AC97LinkState *s, AC97BusMasterRegs *r,
int max, int *stop)
{
- uint8_t tmpbuf[4096];
+ QEMU_UNINITIALIZED uint8_t tmpbuf[4096];
uint32_t addr = r->bd.addr;
uint32_t temp = r->picb << 1;
uint32_t nread = 0;
diff --git a/hw/audio/asc.c b/hw/audio/asc.c
index 18382cc..edd42d6 100644
--- a/hw/audio/asc.c
+++ b/hw/audio/asc.c
@@ -12,6 +12,7 @@
#include "qemu/osdep.h"
#include "qemu/timer.h"
+#include "qapi/error.h"
#include "hw/sysbus.h"
#include "hw/irq.h"
#include "audio/audio.h"
@@ -653,11 +654,17 @@ static void asc_realize(DeviceState *dev, Error **errp)
s->voice = AUD_open_out(&s->card, s->voice, "asc.out", s, asc_out_cb,
&as);
+ if (!s->voice) {
+ AUD_remove_card(&s->card);
+ error_setg(errp, "Initializing audio stream failed");
+ return;
+ }
+
s->shift = 1;
s->samples = AUD_get_buffer_size_out(s->voice) >> s->shift;
s->mixbuf = g_malloc0(s->samples << s->shift);
- s->silentbuf = g_malloc0(s->samples << s->shift);
+ s->silentbuf = g_malloc(s->samples << s->shift);
memset(s->silentbuf, 0x80, s->samples << s->shift);
/* Add easc registers if required */
diff --git a/hw/audio/cs4231a.c b/hw/audio/cs4231a.c
index eb9a458..6dfff20 100644
--- a/hw/audio/cs4231a.c
+++ b/hw/audio/cs4231a.c
@@ -528,7 +528,7 @@ static int cs_write_audio (CSState *s, int nchan, int dma_pos,
int dma_len, int len)
{
int temp, net;
- uint8_t tmpbuf[4096];
+ QEMU_UNINITIALIZED uint8_t tmpbuf[4096];
IsaDmaClass *k = ISADMA_GET_CLASS(s->isa_dma);
temp = len;
@@ -547,7 +547,7 @@ static int cs_write_audio (CSState *s, int nchan, int dma_pos,
copied = k->read_memory(s->isa_dma, nchan, tmpbuf, dma_pos, to_copy);
if (s->tab) {
int i;
- int16_t linbuf[4096];
+ QEMU_UNINITIALIZED int16_t linbuf[4096];
for (i = 0; i < copied; ++i)
linbuf[i] = s->tab[tmpbuf[i]];
diff --git a/hw/audio/es1370.c b/hw/audio/es1370.c
index 8efb969..a6a32a6 100644
--- a/hw/audio/es1370.c
+++ b/hw/audio/es1370.c
@@ -604,7 +604,7 @@ static uint64_t es1370_read(void *opaque, hwaddr addr, unsigned size)
static void es1370_transfer_audio (ES1370State *s, struct chan *d, int loop_sel,
int max, bool *irq)
{
- uint8_t tmpbuf[4096];
+ QEMU_UNINITIALIZED uint8_t tmpbuf[4096];
size_t to_transfer;
uint32_t addr = d->frame_addr;
int sc = d->scount & 0xffff;
diff --git a/hw/audio/gus.c b/hw/audio/gus.c
index 87e8634..c36df02 100644
--- a/hw/audio/gus.c
+++ b/hw/audio/gus.c
@@ -183,7 +183,7 @@ static int GUS_read_DMA (void *opaque, int nchan, int dma_pos, int dma_len)
{
GUSState *s = opaque;
IsaDmaClass *k = ISADMA_GET_CLASS(s->isa_dma);
- char tmpbuf[4096];
+ QEMU_UNINITIALIZED char tmpbuf[4096];
int pos = dma_pos, mode, left = dma_len - dma_pos;
ldebug ("read DMA %#x %d\n", dma_pos, dma_len);
diff --git a/hw/audio/marvell_88w8618.c b/hw/audio/marvell_88w8618.c
index 6d3ebbb..c5c79d0 100644
--- a/hw/audio/marvell_88w8618.c
+++ b/hw/audio/marvell_88w8618.c
@@ -66,7 +66,7 @@ static void mv88w8618_audio_callback(void *opaque, int free_out, int free_in)
{
mv88w8618_audio_state *s = opaque;
int16_t *codec_buffer;
- int8_t buf[4096];
+ QEMU_UNINITIALIZED int8_t buf[4096];
int8_t *mem_buffer;
int pos, block_size;
diff --git a/hw/audio/sb16.c b/hw/audio/sb16.c
index 19fd3b9..bac6411 100644
--- a/hw/audio/sb16.c
+++ b/hw/audio/sb16.c
@@ -1181,7 +1181,7 @@ static int write_audio (SB16State *s, int nchan, int dma_pos,
IsaDma *isa_dma = nchan == s->dma ? s->isa_dma : s->isa_hdma;
IsaDmaClass *k = ISADMA_GET_CLASS(isa_dma);
int temp, net;
- uint8_t tmpbuf[4096];
+ QEMU_UNINITIALIZED uint8_t tmpbuf[4096];
temp = len;
net = 0;
diff --git a/hw/audio/via-ac97.c b/hw/audio/via-ac97.c
index 1e0a5c7..d5231e1 100644
--- a/hw/audio/via-ac97.c
+++ b/hw/audio/via-ac97.c
@@ -175,7 +175,7 @@ static void out_cb(void *opaque, int avail)
ViaAC97SGDChannel *c = &s->aur;
int temp, to_copy, copied;
bool stop = false;
- uint8_t tmpbuf[4096];
+ QEMU_UNINITIALIZED uint8_t tmpbuf[4096];
if (c->stat & STAT_PAUSED) {
return;
diff --git a/hw/block/Kconfig b/hw/block/Kconfig
index a898e04..737dbcd 100644
--- a/hw/block/Kconfig
+++ b/hw/block/Kconfig
@@ -13,9 +13,6 @@ config FDC_SYSBUS
config SSI_M25P80
bool
-config NAND
- bool
-
config PFLASH_CFI01
bool
diff --git a/hw/block/meson.build b/hw/block/meson.build
index 16a51bf..6557044 100644
--- a/hw/block/meson.build
+++ b/hw/block/meson.build
@@ -6,7 +6,6 @@ system_ss.add(files(
system_ss.add(when: 'CONFIG_FDC', if_true: files('fdc.c'))
system_ss.add(when: 'CONFIG_FDC_ISA', if_true: files('fdc-isa.c'))
system_ss.add(when: 'CONFIG_FDC_SYSBUS', if_true: files('fdc-sysbus.c'))
-system_ss.add(when: 'CONFIG_NAND', if_true: files('nand.c'))
system_ss.add(when: 'CONFIG_PFLASH_CFI01', if_true: files('pflash_cfi01.c'))
system_ss.add(when: 'CONFIG_PFLASH_CFI02', if_true: files('pflash_cfi02.c'))
system_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c'))
diff --git a/hw/block/nand.c b/hw/block/nand.c
deleted file mode 100644
index c80bf78..0000000
--- a/hw/block/nand.c
+++ /dev/null
@@ -1,835 +0,0 @@
-/*
- * Flash NAND memory emulation. Based on "16M x 8 Bit NAND Flash
- * Memory" datasheet for the KM29U128AT / K9F2808U0A chips from
- * Samsung Electronic.
- *
- * Copyright (c) 2006 Openedhand Ltd.
- * Written by Andrzej Zaborowski <balrog@zabor.org>
- *
- * Support for additional features based on "MT29F2G16ABCWP 2Gx16"
- * datasheet from Micron Technology and "NAND02G-B2C" datasheet
- * from ST Microelectronics.
- *
- * This code is licensed under the GNU GPL v2.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-
-#ifndef NAND_IO
-
-#include "qemu/osdep.h"
-#include "hw/hw.h"
-#include "hw/qdev-properties.h"
-#include "hw/qdev-properties-system.h"
-#include "hw/block/flash.h"
-#include "system/block-backend.h"
-#include "migration/vmstate.h"
-#include "qapi/error.h"
-#include "qemu/error-report.h"
-#include "qemu/module.h"
-#include "qom/object.h"
-
-# define NAND_CMD_READ0 0x00
-# define NAND_CMD_READ1 0x01
-# define NAND_CMD_READ2 0x50
-# define NAND_CMD_LPREAD2 0x30
-# define NAND_CMD_NOSERIALREAD2 0x35
-# define NAND_CMD_RANDOMREAD1 0x05
-# define NAND_CMD_RANDOMREAD2 0xe0
-# define NAND_CMD_READID 0x90
-# define NAND_CMD_RESET 0xff
-# define NAND_CMD_PAGEPROGRAM1 0x80
-# define NAND_CMD_PAGEPROGRAM2 0x10
-# define NAND_CMD_CACHEPROGRAM2 0x15
-# define NAND_CMD_BLOCKERASE1 0x60
-# define NAND_CMD_BLOCKERASE2 0xd0
-# define NAND_CMD_READSTATUS 0x70
-# define NAND_CMD_COPYBACKPRG1 0x85
-
-# define NAND_IOSTATUS_ERROR (1 << 0)
-# define NAND_IOSTATUS_PLANE0 (1 << 1)
-# define NAND_IOSTATUS_PLANE1 (1 << 2)
-# define NAND_IOSTATUS_PLANE2 (1 << 3)
-# define NAND_IOSTATUS_PLANE3 (1 << 4)
-# define NAND_IOSTATUS_READY (1 << 6)
-# define NAND_IOSTATUS_UNPROTCT (1 << 7)
-
-# define MAX_PAGE 0x800
-# define MAX_OOB 0x40
-
-typedef struct NANDFlashState NANDFlashState;
-struct NANDFlashState {
- DeviceState parent_obj;
-
- uint8_t manf_id, chip_id;
- uint8_t buswidth; /* in BYTES */
- int size, pages;
- int page_shift, oob_shift, erase_shift, addr_shift;
- uint8_t *storage;
- BlockBackend *blk;
- int mem_oob;
-
- uint8_t cle, ale, ce, wp, gnd;
-
- uint8_t io[MAX_PAGE + MAX_OOB + 0x400];
- uint8_t *ioaddr;
- int iolen;
-
- uint32_t cmd;
- uint64_t addr;
- int addrlen;
- int status;
- int offset;
-
- void (*blk_write)(NANDFlashState *s);
- void (*blk_erase)(NANDFlashState *s);
- /*
- * Returns %true when block containing (@addr + @offset) is
- * successfully loaded, otherwise %false.
- */
- bool (*blk_load)(NANDFlashState *s, uint64_t addr, unsigned offset);
-
- uint32_t ioaddr_vmstate;
-};
-
-#define TYPE_NAND "nand"
-
-OBJECT_DECLARE_SIMPLE_TYPE(NANDFlashState, NAND)
-
-static void mem_and(uint8_t *dest, const uint8_t *src, size_t n)
-{
- /* Like memcpy() but we logical-AND the data into the destination */
- int i;
- for (i = 0; i < n; i++) {
- dest[i] &= src[i];
- }
-}
-
-# define NAND_NO_AUTOINCR 0x00000001
-# define NAND_BUSWIDTH_16 0x00000002
-# define NAND_NO_PADDING 0x00000004
-# define NAND_CACHEPRG 0x00000008
-# define NAND_COPYBACK 0x00000010
-# define NAND_IS_AND 0x00000020
-# define NAND_4PAGE_ARRAY 0x00000040
-# define NAND_NO_READRDY 0x00000100
-# define NAND_SAMSUNG_LP (NAND_NO_PADDING | NAND_COPYBACK)
-
-# define NAND_IO
-
-# define PAGE(addr) ((addr) >> ADDR_SHIFT)
-# define PAGE_START(page) (PAGE(page) * (NAND_PAGE_SIZE + OOB_SIZE))
-# define PAGE_MASK ((1 << ADDR_SHIFT) - 1)
-# define OOB_SHIFT (PAGE_SHIFT - 5)
-# define OOB_SIZE (1 << OOB_SHIFT)
-# define SECTOR(addr) ((addr) >> (9 + ADDR_SHIFT - PAGE_SHIFT))
-# define SECTOR_OFFSET(addr) ((addr) & ((511 >> PAGE_SHIFT) << 8))
-
-# define NAND_PAGE_SIZE 256
-# define PAGE_SHIFT 8
-# define PAGE_SECTORS 1
-# define ADDR_SHIFT 8
-# include "nand.c"
-# define NAND_PAGE_SIZE 512
-# define PAGE_SHIFT 9
-# define PAGE_SECTORS 1
-# define ADDR_SHIFT 8
-# include "nand.c"
-# define NAND_PAGE_SIZE 2048
-# define PAGE_SHIFT 11
-# define PAGE_SECTORS 4
-# define ADDR_SHIFT 16
-# include "nand.c"
-
-/* Information based on Linux drivers/mtd/nand/raw/nand_ids.c */
-static const struct {
- int size;
- int width;
- int page_shift;
- int erase_shift;
- uint32_t options;
-} nand_flash_ids[0x100] = {
- [0 ... 0xff] = { 0 },
-
- [0x6b] = { 4, 8, 9, 4, 0 },
- [0xe3] = { 4, 8, 9, 4, 0 },
- [0xe5] = { 4, 8, 9, 4, 0 },
- [0xd6] = { 8, 8, 9, 4, 0 },
- [0xe6] = { 8, 8, 9, 4, 0 },
-
- [0x33] = { 16, 8, 9, 5, 0 },
- [0x73] = { 16, 8, 9, 5, 0 },
- [0x43] = { 16, 16, 9, 5, NAND_BUSWIDTH_16 },
- [0x53] = { 16, 16, 9, 5, NAND_BUSWIDTH_16 },
-
- [0x35] = { 32, 8, 9, 5, 0 },
- [0x75] = { 32, 8, 9, 5, 0 },
- [0x45] = { 32, 16, 9, 5, NAND_BUSWIDTH_16 },
- [0x55] = { 32, 16, 9, 5, NAND_BUSWIDTH_16 },
-
- [0x36] = { 64, 8, 9, 5, 0 },
- [0x76] = { 64, 8, 9, 5, 0 },
- [0x46] = { 64, 16, 9, 5, NAND_BUSWIDTH_16 },
- [0x56] = { 64, 16, 9, 5, NAND_BUSWIDTH_16 },
-
- [0x78] = { 128, 8, 9, 5, 0 },
- [0x39] = { 128, 8, 9, 5, 0 },
- [0x79] = { 128, 8, 9, 5, 0 },
- [0x72] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 },
- [0x49] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 },
- [0x74] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 },
- [0x59] = { 128, 16, 9, 5, NAND_BUSWIDTH_16 },
-
- [0x71] = { 256, 8, 9, 5, 0 },
-
- /*
- * These are the new chips with large page size. The pagesize and the
- * erasesize is determined from the extended id bytes
- */
-# define LP_OPTIONS (NAND_SAMSUNG_LP | NAND_NO_READRDY | NAND_NO_AUTOINCR)
-# define LP_OPTIONS16 (LP_OPTIONS | NAND_BUSWIDTH_16)
-
- /* 512 Megabit */
- [0xa2] = { 64, 8, 0, 0, LP_OPTIONS },
- [0xf2] = { 64, 8, 0, 0, LP_OPTIONS },
- [0xb2] = { 64, 16, 0, 0, LP_OPTIONS16 },
- [0xc2] = { 64, 16, 0, 0, LP_OPTIONS16 },
-
- /* 1 Gigabit */
- [0xa1] = { 128, 8, 0, 0, LP_OPTIONS },
- [0xf1] = { 128, 8, 0, 0, LP_OPTIONS },
- [0xb1] = { 128, 16, 0, 0, LP_OPTIONS16 },
- [0xc1] = { 128, 16, 0, 0, LP_OPTIONS16 },
-
- /* 2 Gigabit */
- [0xaa] = { 256, 8, 0, 0, LP_OPTIONS },
- [0xda] = { 256, 8, 0, 0, LP_OPTIONS },
- [0xba] = { 256, 16, 0, 0, LP_OPTIONS16 },
- [0xca] = { 256, 16, 0, 0, LP_OPTIONS16 },
-
- /* 4 Gigabit */
- [0xac] = { 512, 8, 0, 0, LP_OPTIONS },
- [0xdc] = { 512, 8, 0, 0, LP_OPTIONS },
- [0xbc] = { 512, 16, 0, 0, LP_OPTIONS16 },
- [0xcc] = { 512, 16, 0, 0, LP_OPTIONS16 },
-
- /* 8 Gigabit */
- [0xa3] = { 1024, 8, 0, 0, LP_OPTIONS },
- [0xd3] = { 1024, 8, 0, 0, LP_OPTIONS },
- [0xb3] = { 1024, 16, 0, 0, LP_OPTIONS16 },
- [0xc3] = { 1024, 16, 0, 0, LP_OPTIONS16 },
-
- /* 16 Gigabit */
- [0xa5] = { 2048, 8, 0, 0, LP_OPTIONS },
- [0xd5] = { 2048, 8, 0, 0, LP_OPTIONS },
- [0xb5] = { 2048, 16, 0, 0, LP_OPTIONS16 },
- [0xc5] = { 2048, 16, 0, 0, LP_OPTIONS16 },
-};
-
-static void nand_reset(DeviceState *dev)
-{
- NANDFlashState *s = NAND(dev);
- s->cmd = NAND_CMD_READ0;
- s->addr = 0;
- s->addrlen = 0;
- s->iolen = 0;
- s->offset = 0;
- s->status &= NAND_IOSTATUS_UNPROTCT;
- s->status |= NAND_IOSTATUS_READY;
-}
-
-static inline void nand_pushio_byte(NANDFlashState *s, uint8_t value)
-{
- s->ioaddr[s->iolen++] = value;
- for (value = s->buswidth; --value;) {
- s->ioaddr[s->iolen++] = 0;
- }
-}
-
-/*
- * nand_load_block: Load block containing (s->addr + @offset).
- * Returns length of data available at @offset in this block.
- */
-static unsigned nand_load_block(NANDFlashState *s, unsigned offset)
-{
- unsigned iolen;
-
- if (!s->blk_load(s, s->addr, offset)) {
- return 0;
- }
-
- iolen = (1 << s->page_shift);
- if (s->gnd) {
- iolen += 1 << s->oob_shift;
- }
- assert(offset <= iolen);
- iolen -= offset;
-
- return iolen;
-}
-
-static void nand_command(NANDFlashState *s)
-{
- switch (s->cmd) {
- case NAND_CMD_READ0:
- s->iolen = 0;
- break;
-
- case NAND_CMD_READID:
- s->ioaddr = s->io;
- s->iolen = 0;
- nand_pushio_byte(s, s->manf_id);
- nand_pushio_byte(s, s->chip_id);
- nand_pushio_byte(s, 'Q'); /* Don't-care byte (often 0xa5) */
- if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) {
- /* Page Size, Block Size, Spare Size; bit 6 indicates
- * 8 vs 16 bit width NAND.
- */
- nand_pushio_byte(s, (s->buswidth == 2) ? 0x55 : 0x15);
- } else {
- nand_pushio_byte(s, 0xc0); /* Multi-plane */
- }
- break;
-
- case NAND_CMD_RANDOMREAD2:
- case NAND_CMD_NOSERIALREAD2:
- if (!(nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP))
- break;
- s->iolen = nand_load_block(s, s->addr & ((1 << s->addr_shift) - 1));
- break;
-
- case NAND_CMD_RESET:
- nand_reset(DEVICE(s));
- break;
-
- case NAND_CMD_PAGEPROGRAM1:
- s->ioaddr = s->io;
- s->iolen = 0;
- break;
-
- case NAND_CMD_PAGEPROGRAM2:
- if (s->wp) {
- s->blk_write(s);
- }
- break;
-
- case NAND_CMD_BLOCKERASE1:
- break;
-
- case NAND_CMD_BLOCKERASE2:
- s->addr &= (1ull << s->addrlen * 8) - 1;
- s->addr <<= nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP ?
- 16 : 8;
-
- if (s->wp) {
- s->blk_erase(s);
- }
- break;
-
- case NAND_CMD_READSTATUS:
- s->ioaddr = s->io;
- s->iolen = 0;
- nand_pushio_byte(s, s->status);
- break;
-
- default:
- printf("%s: Unknown NAND command 0x%02x\n", __func__, s->cmd);
- }
-}
-
-static int nand_pre_save(void *opaque)
-{
- NANDFlashState *s = NAND(opaque);
-
- s->ioaddr_vmstate = s->ioaddr - s->io;
-
- return 0;
-}
-
-static int nand_post_load(void *opaque, int version_id)
-{
- NANDFlashState *s = NAND(opaque);
-
- if (s->ioaddr_vmstate > sizeof(s->io)) {
- return -EINVAL;
- }
- s->ioaddr = s->io + s->ioaddr_vmstate;
-
- return 0;
-}
-
-static const VMStateDescription vmstate_nand = {
- .name = "nand",
- .version_id = 1,
- .minimum_version_id = 1,
- .pre_save = nand_pre_save,
- .post_load = nand_post_load,
- .fields = (const VMStateField[]) {
- VMSTATE_UINT8(cle, NANDFlashState),
- VMSTATE_UINT8(ale, NANDFlashState),
- VMSTATE_UINT8(ce, NANDFlashState),
- VMSTATE_UINT8(wp, NANDFlashState),
- VMSTATE_UINT8(gnd, NANDFlashState),
- VMSTATE_BUFFER(io, NANDFlashState),
- VMSTATE_UINT32(ioaddr_vmstate, NANDFlashState),
- VMSTATE_INT32(iolen, NANDFlashState),
- VMSTATE_UINT32(cmd, NANDFlashState),
- VMSTATE_UINT64(addr, NANDFlashState),
- VMSTATE_INT32(addrlen, NANDFlashState),
- VMSTATE_INT32(status, NANDFlashState),
- VMSTATE_INT32(offset, NANDFlashState),
- /* XXX: do we want to save s->storage too? */
- VMSTATE_END_OF_LIST()
- }
-};
-
-static void nand_realize(DeviceState *dev, Error **errp)
-{
- int pagesize;
- NANDFlashState *s = NAND(dev);
- int ret;
-
-
- s->buswidth = nand_flash_ids[s->chip_id].width >> 3;
- s->size = nand_flash_ids[s->chip_id].size << 20;
- if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) {
- s->page_shift = 11;
- s->erase_shift = 6;
- } else {
- s->page_shift = nand_flash_ids[s->chip_id].page_shift;
- s->erase_shift = nand_flash_ids[s->chip_id].erase_shift;
- }
-
- switch (1 << s->page_shift) {
- case 256:
- nand_init_256(s);
- break;
- case 512:
- nand_init_512(s);
- break;
- case 2048:
- nand_init_2048(s);
- break;
- default:
- error_setg(errp, "Unsupported NAND block size %#x",
- 1 << s->page_shift);
- return;
- }
-
- pagesize = 1 << s->oob_shift;
- s->mem_oob = 1;
- if (s->blk) {
- if (!blk_supports_write_perm(s->blk)) {
- error_setg(errp, "Can't use a read-only drive");
- return;
- }
- ret = blk_set_perm(s->blk, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE,
- BLK_PERM_ALL, errp);
- if (ret < 0) {
- return;
- }
- if (blk_getlength(s->blk) >=
- (s->pages << s->page_shift) + (s->pages << s->oob_shift)) {
- pagesize = 0;
- s->mem_oob = 0;
- }
- } else {
- pagesize += 1 << s->page_shift;
- }
- if (pagesize) {
- s->storage = (uint8_t *) memset(g_malloc(s->pages * pagesize),
- 0xff, s->pages * pagesize);
- }
- /* Give s->ioaddr a sane value in case we save state before it is used. */
- s->ioaddr = s->io;
-}
-
-static const Property nand_properties[] = {
- DEFINE_PROP_UINT8("manufacturer_id", NANDFlashState, manf_id, 0),
- DEFINE_PROP_UINT8("chip_id", NANDFlashState, chip_id, 0),
- DEFINE_PROP_DRIVE("drive", NANDFlashState, blk),
-};
-
-static void nand_class_init(ObjectClass *klass, const void *data)
-{
- DeviceClass *dc = DEVICE_CLASS(klass);
-
- dc->realize = nand_realize;
- device_class_set_legacy_reset(dc, nand_reset);
- dc->vmsd = &vmstate_nand;
- device_class_set_props(dc, nand_properties);
- set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-}
-
-static const TypeInfo nand_info = {
- .name = TYPE_NAND,
- .parent = TYPE_DEVICE,
- .instance_size = sizeof(NANDFlashState),
- .class_init = nand_class_init,
-};
-
-static void nand_register_types(void)
-{
- type_register_static(&nand_info);
-}
-
-/*
- * Chip inputs are CLE, ALE, CE, WP, GND and eight I/O pins. Chip
- * outputs are R/B and eight I/O pins.
- *
- * CE, WP and R/B are active low.
- */
-void nand_setpins(DeviceState *dev, uint8_t cle, uint8_t ale,
- uint8_t ce, uint8_t wp, uint8_t gnd)
-{
- NANDFlashState *s = NAND(dev);
-
- s->cle = cle;
- s->ale = ale;
- s->ce = ce;
- s->wp = wp;
- s->gnd = gnd;
- if (wp) {
- s->status |= NAND_IOSTATUS_UNPROTCT;
- } else {
- s->status &= ~NAND_IOSTATUS_UNPROTCT;
- }
-}
-
-void nand_getpins(DeviceState *dev, int *rb)
-{
- *rb = 1;
-}
-
-void nand_setio(DeviceState *dev, uint32_t value)
-{
- int i;
- NANDFlashState *s = NAND(dev);
-
- if (!s->ce && s->cle) {
- if (nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) {
- if (s->cmd == NAND_CMD_READ0 && value == NAND_CMD_LPREAD2)
- return;
- if (value == NAND_CMD_RANDOMREAD1) {
- s->addr &= ~((1 << s->addr_shift) - 1);
- s->addrlen = 0;
- return;
- }
- }
- if (value == NAND_CMD_READ0) {
- s->offset = 0;
- } else if (value == NAND_CMD_READ1) {
- s->offset = 0x100;
- value = NAND_CMD_READ0;
- } else if (value == NAND_CMD_READ2) {
- s->offset = 1 << s->page_shift;
- value = NAND_CMD_READ0;
- }
-
- s->cmd = value;
-
- if (s->cmd == NAND_CMD_READSTATUS ||
- s->cmd == NAND_CMD_PAGEPROGRAM2 ||
- s->cmd == NAND_CMD_BLOCKERASE1 ||
- s->cmd == NAND_CMD_BLOCKERASE2 ||
- s->cmd == NAND_CMD_NOSERIALREAD2 ||
- s->cmd == NAND_CMD_RANDOMREAD2 ||
- s->cmd == NAND_CMD_RESET) {
- nand_command(s);
- }
-
- if (s->cmd != NAND_CMD_RANDOMREAD2) {
- s->addrlen = 0;
- }
- }
-
- if (s->ale) {
- unsigned int shift = s->addrlen * 8;
- uint64_t mask = ~(0xffull << shift);
- uint64_t v = (uint64_t)value << shift;
-
- s->addr = (s->addr & mask) | v;
- s->addrlen ++;
-
- switch (s->addrlen) {
- case 1:
- if (s->cmd == NAND_CMD_READID) {
- nand_command(s);
- }
- break;
- case 2: /* fix cache address as a byte address */
- s->addr <<= (s->buswidth - 1);
- break;
- case 3:
- if (!(nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) &&
- (s->cmd == NAND_CMD_READ0 ||
- s->cmd == NAND_CMD_PAGEPROGRAM1)) {
- nand_command(s);
- }
- break;
- case 4:
- if ((nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) &&
- nand_flash_ids[s->chip_id].size < 256 && /* 1Gb or less */
- (s->cmd == NAND_CMD_READ0 ||
- s->cmd == NAND_CMD_PAGEPROGRAM1)) {
- nand_command(s);
- }
- break;
- case 5:
- if ((nand_flash_ids[s->chip_id].options & NAND_SAMSUNG_LP) &&
- nand_flash_ids[s->chip_id].size >= 256 && /* 2Gb or more */
- (s->cmd == NAND_CMD_READ0 ||
- s->cmd == NAND_CMD_PAGEPROGRAM1)) {
- nand_command(s);
- }
- break;
- default:
- break;
- }
- }
-
- if (!s->cle && !s->ale && s->cmd == NAND_CMD_PAGEPROGRAM1) {
- if (s->iolen < (1 << s->page_shift) + (1 << s->oob_shift)) {
- for (i = s->buswidth; i--; value >>= 8) {
- s->io[s->iolen ++] = (uint8_t) (value & 0xff);
- }
- }
- } else if (!s->cle && !s->ale && s->cmd == NAND_CMD_COPYBACKPRG1) {
- if ((s->addr & ((1 << s->addr_shift) - 1)) <
- (1 << s->page_shift) + (1 << s->oob_shift)) {
- for (i = s->buswidth; i--; s->addr++, value >>= 8) {
- s->io[s->iolen + (s->addr & ((1 << s->addr_shift) - 1))] =
- (uint8_t) (value & 0xff);
- }
- }
- }
-}
-
-uint32_t nand_getio(DeviceState *dev)
-{
- int offset;
- uint32_t x = 0;
- NANDFlashState *s = NAND(dev);
-
- /* Allow sequential reading */
- if (!s->iolen && s->cmd == NAND_CMD_READ0) {
- offset = (int) (s->addr & ((1 << s->addr_shift) - 1)) + s->offset;
- s->offset = 0;
- s->iolen = nand_load_block(s, offset);
- }
-
- if (s->ce || s->iolen <= 0) {
- return 0;
- }
-
- for (offset = s->buswidth; offset--;) {
- x |= s->ioaddr[offset] << (offset << 3);
- }
- /* after receiving READ STATUS command all subsequent reads will
- * return the status register value until another command is issued
- */
- if (s->cmd != NAND_CMD_READSTATUS) {
- s->addr += s->buswidth;
- s->ioaddr += s->buswidth;
- s->iolen -= s->buswidth;
- }
- return x;
-}
-
-uint32_t nand_getbuswidth(DeviceState *dev)
-{
- NANDFlashState *s = (NANDFlashState *) dev;
- return s->buswidth << 3;
-}
-
-DeviceState *nand_init(BlockBackend *blk, int manf_id, int chip_id)
-{
- DeviceState *dev;
-
- if (nand_flash_ids[chip_id].size == 0) {
- hw_error("%s: Unsupported NAND chip ID.\n", __func__);
- }
- dev = qdev_new(TYPE_NAND);
- qdev_prop_set_uint8(dev, "manufacturer_id", manf_id);
- qdev_prop_set_uint8(dev, "chip_id", chip_id);
- if (blk) {
- qdev_prop_set_drive_err(dev, "drive", blk, &error_fatal);
- }
-
- qdev_realize(dev, NULL, &error_fatal);
- return dev;
-}
-
-type_init(nand_register_types)
-
-#else
-
-/* Program a single page */
-static void glue(nand_blk_write_, NAND_PAGE_SIZE)(NANDFlashState *s)
-{
- uint64_t off, page, sector, soff;
- uint8_t iobuf[(PAGE_SECTORS + 2) * 0x200];
- if (PAGE(s->addr) >= s->pages)
- return;
-
- if (!s->blk) {
- mem_and(s->storage + PAGE_START(s->addr) + (s->addr & PAGE_MASK) +
- s->offset, s->io, s->iolen);
- } else if (s->mem_oob) {
- sector = SECTOR(s->addr);
- off = (s->addr & PAGE_MASK) + s->offset;
- soff = SECTOR_OFFSET(s->addr);
- if (blk_pread(s->blk, sector << BDRV_SECTOR_BITS,
- PAGE_SECTORS << BDRV_SECTOR_BITS, iobuf, 0) < 0) {
- printf("%s: read error in sector %" PRIu64 "\n", __func__, sector);
- return;
- }
-
- mem_and(iobuf + (soff | off), s->io, MIN(s->iolen, NAND_PAGE_SIZE - off));
- if (off + s->iolen > NAND_PAGE_SIZE) {
- page = PAGE(s->addr);
- mem_and(s->storage + (page << OOB_SHIFT), s->io + NAND_PAGE_SIZE - off,
- MIN(OOB_SIZE, off + s->iolen - NAND_PAGE_SIZE));
- }
-
- if (blk_pwrite(s->blk, sector << BDRV_SECTOR_BITS,
- PAGE_SECTORS << BDRV_SECTOR_BITS, iobuf, 0) < 0) {
- printf("%s: write error in sector %" PRIu64 "\n", __func__, sector);
- }
- } else {
- off = PAGE_START(s->addr) + (s->addr & PAGE_MASK) + s->offset;
- sector = off >> 9;
- soff = off & 0x1ff;
- if (blk_pread(s->blk, sector << BDRV_SECTOR_BITS,
- (PAGE_SECTORS + 2) << BDRV_SECTOR_BITS, iobuf, 0) < 0) {
- printf("%s: read error in sector %" PRIu64 "\n", __func__, sector);
- return;
- }
-
- mem_and(iobuf + soff, s->io, s->iolen);
-
- if (blk_pwrite(s->blk, sector << BDRV_SECTOR_BITS,
- (PAGE_SECTORS + 2) << BDRV_SECTOR_BITS, iobuf, 0) < 0) {
- printf("%s: write error in sector %" PRIu64 "\n", __func__, sector);
- }
- }
- s->offset = 0;
-}
-
-/* Erase a single block */
-static void glue(nand_blk_erase_, NAND_PAGE_SIZE)(NANDFlashState *s)
-{
- uint64_t i, page, addr;
- uint8_t iobuf[0x200] = { [0 ... 0x1ff] = 0xff, };
- addr = s->addr & ~((1 << (ADDR_SHIFT + s->erase_shift)) - 1);
-
- if (PAGE(addr) >= s->pages) {
- return;
- }
-
- if (!s->blk) {
- memset(s->storage + PAGE_START(addr),
- 0xff, (NAND_PAGE_SIZE + OOB_SIZE) << s->erase_shift);
- } else if (s->mem_oob) {
- memset(s->storage + (PAGE(addr) << OOB_SHIFT),
- 0xff, OOB_SIZE << s->erase_shift);
- i = SECTOR(addr);
- page = SECTOR(addr + (1 << (ADDR_SHIFT + s->erase_shift)));
- for (; i < page; i ++)
- if (blk_pwrite(s->blk, i << BDRV_SECTOR_BITS,
- BDRV_SECTOR_SIZE, iobuf, 0) < 0) {
- printf("%s: write error in sector %" PRIu64 "\n", __func__, i);
- }
- } else {
- addr = PAGE_START(addr);
- page = addr >> 9;
- if (blk_pread(s->blk, page << BDRV_SECTOR_BITS,
- BDRV_SECTOR_SIZE, iobuf, 0) < 0) {
- printf("%s: read error in sector %" PRIu64 "\n", __func__, page);
- }
- memset(iobuf + (addr & 0x1ff), 0xff, (~addr & 0x1ff) + 1);
- if (blk_pwrite(s->blk, page << BDRV_SECTOR_BITS,
- BDRV_SECTOR_SIZE, iobuf, 0) < 0) {
- printf("%s: write error in sector %" PRIu64 "\n", __func__, page);
- }
-
- memset(iobuf, 0xff, 0x200);
- i = (addr & ~0x1ff) + 0x200;
- for (addr += ((NAND_PAGE_SIZE + OOB_SIZE) << s->erase_shift) - 0x200;
- i < addr; i += 0x200) {
- if (blk_pwrite(s->blk, i, BDRV_SECTOR_SIZE, iobuf, 0) < 0) {
- printf("%s: write error in sector %" PRIu64 "\n",
- __func__, i >> 9);
- }
- }
-
- page = i >> 9;
- if (blk_pread(s->blk, page << BDRV_SECTOR_BITS,
- BDRV_SECTOR_SIZE, iobuf, 0) < 0) {
- printf("%s: read error in sector %" PRIu64 "\n", __func__, page);
- }
- memset(iobuf, 0xff, ((addr - 1) & 0x1ff) + 1);
- if (blk_pwrite(s->blk, page << BDRV_SECTOR_BITS,
- BDRV_SECTOR_SIZE, iobuf, 0) < 0) {
- printf("%s: write error in sector %" PRIu64 "\n", __func__, page);
- }
- }
-}
-
-static bool glue(nand_blk_load_, NAND_PAGE_SIZE)(NANDFlashState *s,
- uint64_t addr, unsigned offset)
-{
- if (PAGE(addr) >= s->pages) {
- return false;
- }
-
- if (offset > NAND_PAGE_SIZE + OOB_SIZE) {
- return false;
- }
-
- if (s->blk) {
- if (s->mem_oob) {
- if (blk_pread(s->blk, SECTOR(addr) << BDRV_SECTOR_BITS,
- PAGE_SECTORS << BDRV_SECTOR_BITS, s->io, 0) < 0) {
- printf("%s: read error in sector %" PRIu64 "\n",
- __func__, SECTOR(addr));
- }
- memcpy(s->io + SECTOR_OFFSET(s->addr) + NAND_PAGE_SIZE,
- s->storage + (PAGE(s->addr) << OOB_SHIFT),
- OOB_SIZE);
- s->ioaddr = s->io + SECTOR_OFFSET(s->addr) + offset;
- } else {
- if (blk_pread(s->blk, PAGE_START(addr),
- (PAGE_SECTORS + 2) << BDRV_SECTOR_BITS, s->io, 0)
- < 0) {
- printf("%s: read error in sector %" PRIu64 "\n",
- __func__, PAGE_START(addr) >> 9);
- }
- s->ioaddr = s->io + (PAGE_START(addr) & 0x1ff) + offset;
- }
- } else {
- memcpy(s->io, s->storage + PAGE_START(s->addr) +
- offset, NAND_PAGE_SIZE + OOB_SIZE - offset);
- s->ioaddr = s->io;
- }
-
- return true;
-}
-
-static void glue(nand_init_, NAND_PAGE_SIZE)(NANDFlashState *s)
-{
- s->oob_shift = PAGE_SHIFT - 5;
- s->pages = s->size >> PAGE_SHIFT;
- s->addr_shift = ADDR_SHIFT;
-
- s->blk_erase = glue(nand_blk_erase_, NAND_PAGE_SIZE);
- s->blk_write = glue(nand_blk_write_, NAND_PAGE_SIZE);
- s->blk_load = glue(nand_blk_load_, NAND_PAGE_SIZE);
-}
-
-# undef NAND_PAGE_SIZE
-# undef PAGE_SHIFT
-# undef PAGE_SECTORS
-# undef ADDR_SHIFT
-#endif /* NAND_IO */
diff --git a/hw/char/sclpconsole-lm.c b/hw/char/sclpconsole-lm.c
index e9580aa..3e40d5e 100644
--- a/hw/char/sclpconsole-lm.c
+++ b/hw/char/sclpconsole-lm.c
@@ -214,7 +214,7 @@ static int process_mdb(SCLPEvent *event, MDBO *mdbo)
{
int rc;
int len;
- uint8_t buffer[SIZE_BUFFER];
+ QEMU_UNINITIALIZED uint8_t buffer[SIZE_BUFFER];
len = be16_to_cpu(mdbo->length);
len -= sizeof(mdbo->length) + sizeof(mdbo->type)
diff --git a/hw/char/sh_serial.c b/hw/char/sh_serial.c
index 6abd803..30447fa 100644
--- a/hw/char/sh_serial.c
+++ b/hw/char/sh_serial.c
@@ -78,10 +78,6 @@ struct SHSerialState {
qemu_irq bri;
};
-typedef struct {} SHSerialStateClass;
-
-OBJECT_DEFINE_TYPE(SHSerialState, sh_serial, SH_SERIAL, SYS_BUS_DEVICE)
-
static void sh_serial_clear_fifo(SHSerialState *s)
{
memset(s->rx_fifo, 0, SH_RX_FIFO_LENGTH);
@@ -434,17 +430,13 @@ static void sh_serial_realize(DeviceState *d, Error **errp)
s->etu = NANOSECONDS_PER_SECOND / 9600;
}
-static void sh_serial_finalize(Object *obj)
+static void sh_serial_unrealize(DeviceState *dev)
{
- SHSerialState *s = SH_SERIAL(obj);
+ SHSerialState *s = SH_SERIAL(dev);
timer_del(&s->fifo_timeout_timer);
}
-static void sh_serial_init(Object *obj)
-{
-}
-
static const Property sh_serial_properties[] = {
DEFINE_PROP_CHR("chardev", SHSerialState, chr),
DEFINE_PROP_UINT8("features", SHSerialState, feat, 0),
@@ -456,7 +448,19 @@ static void sh_serial_class_init(ObjectClass *oc, const void *data)
device_class_set_props(dc, sh_serial_properties);
dc->realize = sh_serial_realize;
+ dc->unrealize = sh_serial_unrealize;
device_class_set_legacy_reset(dc, sh_serial_reset);
/* Reason: part of SuperH CPU/SoC, needs to be wired up */
dc->user_creatable = false;
}
+
+static const TypeInfo sh_serial_types[] = {
+ {
+ .name = TYPE_SH_SERIAL,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(SHSerialState),
+ .class_init = sh_serial_class_init,
+ },
+};
+
+DEFINE_TYPES(sh_serial_types)
diff --git a/hw/char/sifive_uart.c b/hw/char/sifive_uart.c
index 0fc89e7..9bc697a 100644
--- a/hw/char/sifive_uart.c
+++ b/hw/char/sifive_uart.c
@@ -128,8 +128,10 @@ static void sifive_uart_write_tx_fifo(SiFiveUARTState *s, const uint8_t *buf,
s->txfifo |= SIFIVE_UART_TXFIFO_FULL;
}
- timer_mod(s->fifo_trigger_handle, current_time +
- TX_INTERRUPT_TRIGGER_DELAY_NS);
+ if (!timer_pending(s->fifo_trigger_handle)) {
+ timer_mod(s->fifo_trigger_handle, current_time +
+ TX_INTERRUPT_TRIGGER_DELAY_NS);
+ }
}
static uint64_t
diff --git a/hw/core/loader.c b/hw/core/loader.c
index b792a54..e7056ba 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -1333,20 +1333,6 @@ void rom_set_fw(FWCfgState *f)
fw_cfg = f;
}
-void rom_set_order_override(int order)
-{
- if (!fw_cfg)
- return;
- fw_cfg_set_order_override(fw_cfg, order);
-}
-
-void rom_reset_order_override(void)
-{
- if (!fw_cfg)
- return;
- fw_cfg_reset_order_override(fw_cfg);
-}
-
void rom_transaction_begin(void)
{
Rom *rom;
diff --git a/hw/core/machine.c b/hw/core/machine.c
index b8ae155..e869821 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -37,7 +37,9 @@
#include "hw/virtio/virtio-iommu.h"
#include "audio/audio.h"
-GlobalProperty hw_compat_10_0[] = {};
+GlobalProperty hw_compat_10_0[] = {
+ { "scsi-hd", "dpofua", "off" },
+};
const size_t hw_compat_10_0_len = G_N_ELEMENTS(hw_compat_10_0);
GlobalProperty hw_compat_9_2[] = {
@@ -283,24 +285,6 @@ GlobalProperty hw_compat_2_6[] = {
};
const size_t hw_compat_2_6_len = G_N_ELEMENTS(hw_compat_2_6);
-GlobalProperty hw_compat_2_5[] = {
- { "isa-fdc", "fallback", "144" },
- { "pvscsi", "x-old-pci-configuration", "on" },
- { "pvscsi", "x-disable-pcie", "on" },
- { "vmxnet3", "x-old-msi-offsets", "on" },
- { "vmxnet3", "x-disable-pcie", "on" },
-};
-const size_t hw_compat_2_5_len = G_N_ELEMENTS(hw_compat_2_5);
-
-GlobalProperty hw_compat_2_4[] = {
- { "e1000", "extra_mac_registers", "off" },
- { "virtio-pci", "x-disable-pcie", "on" },
- { "virtio-pci", "migrate-extra", "off" },
- { "fw_cfg_mem", "dma_enabled", "off" },
- { "fw_cfg_io", "dma_enabled", "off" }
-};
-const size_t hw_compat_2_4_len = G_N_ELEMENTS(hw_compat_2_4);
-
MachineState *current_machine;
static char *machine_get_kernel(Object *obj, Error **errp)
diff --git a/hw/core/meson.build b/hw/core/meson.build
index 547de65..b5a545a 100644
--- a/hw/core/meson.build
+++ b/hw/core/meson.build
@@ -26,7 +26,7 @@ system_ss.add(when: 'CONFIG_XILINX_AXI', if_true: files('stream.c'))
system_ss.add(when: 'CONFIG_PLATFORM_BUS', if_true: files('sysbus-fdt.c'))
system_ss.add(when: 'CONFIG_EIF', if_true: [files('eif.c'), zlib, libcbor, gnutls])
-libsystem_ss.add(files(
+system_ss.add(files(
'cpu-system.c',
'fw-path-provider.c',
'gpio.c',
@@ -46,7 +46,7 @@ libsystem_ss.add(files(
'vm-change-state-handler.c',
'clock-vmstate.c',
))
-libuser_ss.add(files(
+user_ss.add(files(
'cpu-user.c',
'qdev-user.c',
))
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 8e11e63..24e145d 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -145,6 +145,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name,
if (ctx != bdrv_get_aio_context(bs)) {
error_setg(errp, "Different aio context is not supported for new "
"node");
+ return;
}
blk_replace_bs(blk, bs, errp);
diff --git a/hw/display/apple-gfx.m b/hw/display/apple-gfx.m
index 8dde1f1..174d56a 100644
--- a/hw/display/apple-gfx.m
+++ b/hw/display/apple-gfx.m
@@ -454,7 +454,7 @@ static void set_cursor_glyph(void *opaque)
/* ------ DMA (device reading system memory) ------ */
typedef struct AppleGFXReadMemoryJob {
- QemuSemaphore sem;
+ QemuEvent event;
hwaddr physical_address;
uint64_t length;
void *dst;
@@ -470,7 +470,7 @@ static void apple_gfx_do_read_memory(void *opaque)
job->dst, job->length, MEMTXATTRS_UNSPECIFIED);
job->success = (r == MEMTX_OK);
- qemu_sem_post(&job->sem);
+ qemu_event_set(&job->event);
}
static bool apple_gfx_read_memory(AppleGFXState *s, hwaddr physical_address,
@@ -483,11 +483,11 @@ static bool apple_gfx_read_memory(AppleGFXState *s, hwaddr physical_address,
trace_apple_gfx_read_memory(physical_address, length, dst);
/* Performing DMA requires BQL, so do it in a BH. */
- qemu_sem_init(&job.sem, 0);
+ qemu_event_init(&job.event, 0);
aio_bh_schedule_oneshot(qemu_get_aio_context(),
apple_gfx_do_read_memory, &job);
- qemu_sem_wait(&job.sem);
- qemu_sem_destroy(&job.sem);
+ qemu_event_wait(&job.event);
+ qemu_event_destroy(&job.event);
return job.success;
}
diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
index 145a0b3..94ddc01 100644
--- a/hw/display/virtio-gpu-virgl.c
+++ b/hw/display/virtio-gpu-virgl.c
@@ -970,6 +970,15 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
}
trace_virtio_gpu_fence_ctrl(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type);
+#if VIRGL_VERSION_MAJOR >= 1
+ if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX) {
+ virgl_renderer_context_create_fence(cmd->cmd_hdr.ctx_id,
+ VIRGL_RENDERER_FENCE_FLAG_MERGEABLE,
+ cmd->cmd_hdr.ring_idx,
+ cmd->cmd_hdr.fence_id);
+ return;
+ }
+#endif
virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type);
}
@@ -983,6 +992,11 @@ static void virgl_write_fence(void *opaque, uint32_t fence)
* the guest can end up emitting fences out of order
* so we should check all fenced cmds not just the first one.
*/
+#if VIRGL_VERSION_MAJOR >= 1
+ if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX) {
+ continue;
+ }
+#endif
if (cmd->cmd_hdr.fence_id > fence) {
continue;
}
@@ -997,6 +1011,29 @@ static void virgl_write_fence(void *opaque, uint32_t fence)
}
}
+#if VIRGL_VERSION_MAJOR >= 1
+static void virgl_write_context_fence(void *opaque, uint32_t ctx_id,
+ uint32_t ring_idx, uint64_t fence_id) {
+ VirtIOGPU *g = opaque;
+ struct virtio_gpu_ctrl_command *cmd, *tmp;
+
+ QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) {
+ if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_INFO_RING_IDX &&
+ cmd->cmd_hdr.ctx_id == ctx_id && cmd->cmd_hdr.ring_idx == ring_idx &&
+ cmd->cmd_hdr.fence_id <= fence_id) {
+ trace_virtio_gpu_fence_resp(cmd->cmd_hdr.fence_id);
+ virtio_gpu_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
+ QTAILQ_REMOVE(&g->fenceq, cmd, next);
+ g_free(cmd);
+ g->inflight--;
+ if (virtio_gpu_stats_enabled(g->parent_obj.conf)) {
+ trace_virtio_gpu_dec_inflight_fences(g->inflight);
+ }
+ }
+ }
+}
+#endif
+
static virgl_renderer_gl_context
virgl_create_context(void *opaque, int scanout_idx,
struct virgl_renderer_gl_ctx_param *params)
@@ -1031,11 +1068,18 @@ static int virgl_make_context_current(void *opaque, int scanout_idx,
}
static struct virgl_renderer_callbacks virtio_gpu_3d_cbs = {
+#if VIRGL_VERSION_MAJOR >= 1
+ .version = 3,
+#else
.version = 1,
+#endif
.write_fence = virgl_write_fence,
.create_gl_context = virgl_create_context,
.destroy_gl_context = virgl_destroy_context,
.make_current = virgl_make_context_current,
+#if VIRGL_VERSION_MAJOR >= 1
+ .write_context_fence = virgl_write_context_fence,
+#endif
};
static void virtio_gpu_print_stats(void *opaque)
diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c
index 544bb65..bc1a8ed 100644
--- a/hw/display/vmware_vga.c
+++ b/hw/display/vmware_vga.c
@@ -618,7 +618,7 @@ static void vmsvga_fifo_run(struct vmsvga_state_s *s)
uint32_t cmd, colour;
int args, len, maxloop = 1024;
int x, y, dx, dy, width, height;
- struct vmsvga_cursor_definition_s cursor;
+ QEMU_UNINITIALIZED struct vmsvga_cursor_definition_s cursor;
uint32_t cmd_start;
len = vmsvga_fifo_length(s);
diff --git a/hw/dma/xlnx_csu_dma.c b/hw/dma/xlnx_csu_dma.c
index 3db3904..d8c7da1 100644
--- a/hw/dma/xlnx_csu_dma.c
+++ b/hw/dma/xlnx_csu_dma.c
@@ -287,7 +287,7 @@ static uint32_t xlnx_csu_dma_advance(XlnxCSUDMA *s, uint32_t len)
static void xlnx_csu_dma_src_notify(void *opaque)
{
XlnxCSUDMA *s = XLNX_CSU_DMA(opaque);
- unsigned char buf[4 * 1024];
+ QEMU_UNINITIALIZED unsigned char buf[4 * 1024];
size_t rlen = 0;
ptimer_transaction_begin(s->src_timer);
diff --git a/hw/gpio/pca9552.c b/hw/gpio/pca9552.c
index d65c0a2..1e10238 100644
--- a/hw/gpio/pca9552.c
+++ b/hw/gpio/pca9552.c
@@ -76,7 +76,7 @@ static void pca955x_display_pins_status(PCA955xState *s,
return;
}
if (trace_event_get_state_backends(TRACE_PCA955X_GPIO_STATUS)) {
- char *buf = g_newa(char, k->pin_count + 1);
+ char buf[PCA955X_PIN_COUNT_MAX + 1];
for (i = 0; i < k->pin_count; i++) {
if (extract32(pins_status, i, 1)) {
diff --git a/hw/hyperv/hv-balloon.c b/hw/hyperv/hv-balloon.c
index 94b0abb..6dbcb2d 100644
--- a/hw/hyperv/hv-balloon.c
+++ b/hw/hyperv/hv-balloon.c
@@ -67,10 +67,6 @@
* these requests
*/
-struct HvBalloonClass {
- VMBusDeviceClass parent_class;
-} HvBalloonClass;
-
typedef enum State {
/* not a real state */
S_NO_CHANGE = 0,
@@ -162,8 +158,9 @@ typedef struct HvBalloon {
MemoryRegion *mr;
} HvBalloon;
-OBJECT_DEFINE_TYPE_WITH_INTERFACES(HvBalloon, hv_balloon, HV_BALLOON, VMBUS_DEVICE, \
- { TYPE_MEMORY_DEVICE }, { })
+OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(HvBalloon, hv_balloon, \
+ HV_BALLOON, VMBUS_DEVICE, \
+ { TYPE_MEMORY_DEVICE }, { })
#define HV_BALLOON_SET_STATE(hvb, news) \
do { \
diff --git a/hw/hyperv/syndbg.c b/hw/hyperv/syndbg.c
index 8b8a147..ac7e15f 100644
--- a/hw/hyperv/syndbg.c
+++ b/hw/hyperv/syndbg.c
@@ -192,7 +192,7 @@ static uint16_t handle_recv_msg(HvSynDbg *syndbg, uint64_t outgpa,
{
uint16_t ret;
g_assert(MSG_BUFSZ >= qemu_target_page_size());
- uint8_t data_buf[MSG_BUFSZ];
+ QEMU_UNINITIALIZED uint8_t data_buf[MSG_BUFSZ];
hwaddr out_len;
void *out_data;
ssize_t recv_byte_count;
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index d34ce07..14d23e2 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -10,6 +10,11 @@ config SGX
bool
depends on KVM
+config TDX
+ bool
+ select X86_FW_OVMF
+ depends on KVM && X86_64
+
config PC
bool
imply APPLESMC
@@ -26,6 +31,7 @@ config PC
imply QXL
imply SEV
imply SGX
+ imply TDX
imply TEST_DEVICES
imply TPM_CRB
imply TPM_TIS_ISA
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 0775c8f..963aa24 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1426,7 +1426,6 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
AMDVIState *s = opaque;
AMDVIAddressSpace **iommu_as, *amdvi_dev_as;
int bus_num = pci_bus_num(bus);
- X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
iommu_as = s->address_spaces[bus_num];
@@ -1486,15 +1485,8 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
AMDVI_INT_ADDR_FIRST,
&amdvi_dev_as->iommu_ir, 1);
- if (!x86_iommu->pt_supported) {
- memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false);
- memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu),
- true);
- } else {
- memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu),
- false);
- memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, true);
- }
+ memory_region_set_enabled(&amdvi_dev_as->iommu_nodma, false);
+ memory_region_set_enabled(MEMORY_REGION(&amdvi_dev_as->iommu), true);
}
return &iommu_as[devfn]->as;
}
@@ -1723,6 +1715,14 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp)
exit(EXIT_FAILURE);
}
+ if (s->xtsup) {
+ if (kvm_irqchip_is_split() && !kvm_enable_x2apic()) {
+ error_report("AMD IOMMU xtsup=on requires x2APIC support on "
+ "the KVM side");
+ exit(EXIT_FAILURE);
+ }
+ }
+
pci_setup_iommu(bus, &amdvi_iommu_ops, s);
amdvi_init(s);
}
diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c
index 39035db..1be9bfe 100644
--- a/hw/i386/kvm/apic.c
+++ b/hw/i386/kvm/apic.c
@@ -17,6 +17,7 @@
#include "system/hw_accel.h"
#include "system/kvm.h"
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic,
int reg_id, uint32_t val)
@@ -141,6 +142,10 @@ static void kvm_apic_put(CPUState *cs, run_on_cpu_data data)
struct kvm_lapic_state kapic;
int ret;
+ if (is_tdx_vm()) {
+ return;
+ }
+
kvm_put_apicbase(s->cpu, s->apicbase);
kvm_put_apic_state(s, &kapic);
diff --git a/hw/i386/kvm/xen-stubs.c b/hw/i386/kvm/xen-stubs.c
index d03131e..ce73119 100644
--- a/hw/i386/kvm/xen-stubs.c
+++ b/hw/i386/kvm/xen-stubs.c
@@ -12,7 +12,6 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
-#include "qapi/qapi-commands-misc-target.h"
#include "xen_evtchn.h"
#include "xen_primary_console.h"
@@ -38,15 +37,3 @@ void xen_primary_console_create(void)
void xen_primary_console_set_be_port(uint16_t port)
{
}
-#ifdef TARGET_I386
-EvtchnInfoList *qmp_xen_event_list(Error **errp)
-{
- error_setg(errp, "Xen event channel emulation not enabled");
- return NULL;
-}
-
-void qmp_xen_event_inject(uint32_t port, Error **errp)
-{
- error_setg(errp, "Xen event channel emulation not enabled");
-}
-#endif
diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index b519054..dd566c4 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -19,7 +19,7 @@
#include "monitor/monitor.h"
#include "monitor/hmp.h"
#include "qapi/error.h"
-#include "qapi/qapi-commands-misc-target.h"
+#include "qapi/qapi-commands-misc-i386.h"
#include "qobject/qdict.h"
#include "qom/object.h"
#include "exec/target_page.h"
diff --git a/hw/i386/meson.build b/hw/i386/meson.build
index 10bdfde..7896f34 100644
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -32,6 +32,7 @@ i386_ss.add(when: 'CONFIG_PC', if_true: files(
'port92.c'))
i386_ss.add(when: 'CONFIG_X86_FW_OVMF', if_true: files('pc_sysfw_ovmf.c'),
if_false: files('pc_sysfw_ovmf-stubs.c'))
+i386_ss.add(when: 'CONFIG_TDX', if_true: files('tdvf.c', 'tdvf-hob.c'))
subdir('kvm')
subdir('xen')
diff --git a/hw/i386/monitor.c b/hw/i386/monitor.c
index 1921e4d..79df965 100644
--- a/hw/i386/monitor.c
+++ b/hw/i386/monitor.c
@@ -26,7 +26,7 @@
#include "monitor/monitor.h"
#include "qobject/qdict.h"
#include "qapi/error.h"
-#include "qapi/qapi-commands-misc-target.h"
+#include "qapi/qapi-commands-misc-i386.h"
#include "hw/i386/x86.h"
#include "hw/rtc/mc146818rtc.h"
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 7065615..b211633 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -44,6 +44,7 @@
#include "system/xen.h"
#include "system/reset.h"
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
#include "hw/xen/xen.h"
#include "qobject/qlist.h"
#include "qemu/error-report.h"
@@ -259,28 +260,6 @@ GlobalProperty pc_compat_2_6[] = {
};
const size_t pc_compat_2_6_len = G_N_ELEMENTS(pc_compat_2_6);
-GlobalProperty pc_compat_2_5[] = {};
-const size_t pc_compat_2_5_len = G_N_ELEMENTS(pc_compat_2_5);
-
-GlobalProperty pc_compat_2_4[] = {
- PC_CPU_MODEL_IDS("2.4.0")
- { "Haswell-" TYPE_X86_CPU, "abm", "off" },
- { "Haswell-noTSX-" TYPE_X86_CPU, "abm", "off" },
- { "Broadwell-" TYPE_X86_CPU, "abm", "off" },
- { "Broadwell-noTSX-" TYPE_X86_CPU, "abm", "off" },
- { "host" "-" TYPE_X86_CPU, "host-cache-info", "on" },
- { TYPE_X86_CPU, "check", "off" },
- { "qemu64" "-" TYPE_X86_CPU, "sse4a", "on" },
- { "qemu64" "-" TYPE_X86_CPU, "abm", "on" },
- { "qemu64" "-" TYPE_X86_CPU, "popcnt", "on" },
- { "qemu32" "-" TYPE_X86_CPU, "popcnt", "on" },
- { "Opteron_G2" "-" TYPE_X86_CPU, "rdtscp", "on" },
- { "Opteron_G3" "-" TYPE_X86_CPU, "rdtscp", "on" },
- { "Opteron_G4" "-" TYPE_X86_CPU, "rdtscp", "on" },
- { "Opteron_G5" "-" TYPE_X86_CPU, "rdtscp", "on", }
-};
-const size_t pc_compat_2_4_len = G_N_ELEMENTS(pc_compat_2_4);
-
/*
* @PC_FW_DATA:
* Size of the chunk of memory at the top of RAM for the BIOS ACPI tables
@@ -976,21 +955,23 @@ void pc_memory_init(PCMachineState *pcms,
/* Initialize PC system firmware */
pc_system_firmware_init(pcms, rom_memory);
- option_rom_mr = g_malloc(sizeof(*option_rom_mr));
- if (machine_require_guest_memfd(machine)) {
- memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
- PC_ROM_SIZE, &error_fatal);
- } else {
- memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
- &error_fatal);
- if (pcmc->pci_enabled) {
- memory_region_set_readonly(option_rom_mr, true);
+ if (!is_tdx_vm()) {
+ option_rom_mr = g_malloc(sizeof(*option_rom_mr));
+ if (machine_require_guest_memfd(machine)) {
+ memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom",
+ PC_ROM_SIZE, &error_fatal);
+ } else {
+ memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE,
+ &error_fatal);
+ if (pcmc->pci_enabled) {
+ memory_region_set_readonly(option_rom_mr, true);
+ }
}
+ memory_region_add_subregion_overlap(rom_memory,
+ PC_ROM_MIN_VGA,
+ option_rom_mr,
+ 1);
}
- memory_region_add_subregion_overlap(rom_memory,
- PC_ROM_MIN_VGA,
- option_rom_mr,
- 1);
fw_cfg = fw_cfg_arch_create(machine,
x86ms->boot_cpus, x86ms->apic_id_limit);
@@ -999,14 +980,13 @@ void pc_memory_init(PCMachineState *pcms,
if (machine->device_memory) {
uint64_t *val = g_malloc(sizeof(*val));
- uint64_t res_mem_end = machine->device_memory->base;
-
- if (!pcmc->broken_reserved_end) {
- res_mem_end += memory_region_size(&machine->device_memory->mr);
- }
+ uint64_t res_mem_end;
if (pcms->cxl_devices_state.is_enabled) {
res_mem_end = cxl_resv_end;
+ } else {
+ res_mem_end = machine->device_memory->base
+ + memory_region_size(&machine->device_memory->mr);
}
*val = cpu_to_le64(ROUND_UP(res_mem_end, 1 * GiB));
fw_cfg_add_file(fw_cfg, "etc/reserved-memory-end", val, sizeof(*val));
@@ -1044,9 +1024,7 @@ uint64_t pc_pci_hole64_start(void)
hole64_start = pc_get_cxl_range_end(pcms);
} else if (pcmc->has_reserved_memory && (ms->ram_size < ms->maxram_size)) {
pc_get_device_memory_range(pcms, &hole64_start, &size);
- if (!pcmc->broken_reserved_end) {
- hole64_start += size;
- }
+ hole64_start += size;
} else {
hole64_start = pc_above_4g_end(pcms);
}
@@ -1058,7 +1036,6 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
{
DeviceState *dev = NULL;
- rom_set_order_override(FW_CFG_ORDER_OVERRIDE_VGA);
if (pci_bus) {
PCIDevice *pcidev = pci_vga_init(pci_bus);
dev = pcidev ? &pcidev->qdev : NULL;
@@ -1066,7 +1043,7 @@ DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
ISADevice *isadev = isa_vga_init(isa_bus);
dev = isadev ? DEVICE(isadev) : NULL;
}
- rom_reset_order_override();
+
return dev;
}
@@ -1256,8 +1233,6 @@ void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus)
bool default_is_ne2k = g_str_equal(mc->default_nic, TYPE_ISA_NE2000);
NICInfo *nd;
- rom_set_order_override(FW_CFG_ORDER_OVERRIDE_NIC);
-
while ((nd = qemu_find_nic_info(TYPE_ISA_NE2000, default_is_ne2k, NULL))) {
pc_init_ne2k_isa(isa_bus, nd, &error_fatal);
}
@@ -1266,8 +1241,6 @@ void pc_nic_init(PCMachineClass *pcmc, ISABus *isa_bus, PCIBus *pci_bus)
if (pci_bus) {
pci_init_nic_devices(pci_bus, mc->default_nic);
}
-
- rom_reset_order_override();
}
void pc_i8259_create(ISABus *isa_bus, qemu_irq *i8259_irqs)
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 0dce512..ea7572e 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -285,6 +285,8 @@ static void pc_init1(MachineState *machine, const char *pci_type)
pcms->idebus[0] = qdev_get_child_bus(dev, "ide.0");
pcms->idebus[1] = qdev_get_child_bus(dev, "ide.1");
} else {
+ uint32_t irq;
+
isa_bus = isa_bus_new(NULL, system_memory, system_io,
&error_abort);
isa_bus_register_input_irqs(isa_bus, x86ms->gsi);
@@ -292,6 +294,9 @@ static void pc_init1(MachineState *machine, const char *pci_type)
x86ms->rtc = isa_new(TYPE_MC146818_RTC);
qdev_prop_set_int32(DEVICE(x86ms->rtc), "base_year", 2000);
isa_realize_and_unref(x86ms->rtc, isa_bus, &error_fatal);
+ irq = object_property_get_uint(OBJECT(x86ms->rtc), "irq",
+ &error_fatal);
+ isa_connect_gpio_out(ISA_DEVICE(x86ms->rtc), 0, irq);
i8257_dma_init(OBJECT(machine), isa_bus, 0);
pcms->hpet_enabled = false;
@@ -778,32 +783,6 @@ static void pc_i440fx_machine_2_6_options(MachineClass *m)
DEFINE_I440FX_MACHINE(2, 6);
-static void pc_i440fx_machine_2_5_options(MachineClass *m)
-{
- X86MachineClass *x86mc = X86_MACHINE_CLASS(m);
-
- pc_i440fx_machine_2_6_options(m);
- x86mc->save_tsc_khz = false;
- m->legacy_fw_cfg_order = 1;
- compat_props_add(m->compat_props, hw_compat_2_5, hw_compat_2_5_len);
- compat_props_add(m->compat_props, pc_compat_2_5, pc_compat_2_5_len);
-}
-
-DEFINE_I440FX_MACHINE(2, 5);
-
-static void pc_i440fx_machine_2_4_options(MachineClass *m)
-{
- PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
-
- pc_i440fx_machine_2_5_options(m);
- m->hw_version = "2.4.0";
- pcmc->broken_reserved_end = true;
- compat_props_add(m->compat_props, hw_compat_2_4, hw_compat_2_4_len);
- compat_props_add(m->compat_props, pc_compat_2_4, pc_compat_2_4_len);
-}
-
-DEFINE_I440FX_MACHINE(2, 4);
-
#ifdef CONFIG_ISAPC
static void isapc_machine_options(MachineClass *m)
{
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index c538b3d..33211b1 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -672,29 +672,3 @@ static void pc_q35_machine_2_6_options(MachineClass *m)
}
DEFINE_Q35_MACHINE(2, 6);
-
-static void pc_q35_machine_2_5_options(MachineClass *m)
-{
- X86MachineClass *x86mc = X86_MACHINE_CLASS(m);
-
- pc_q35_machine_2_6_options(m);
- x86mc->save_tsc_khz = false;
- m->legacy_fw_cfg_order = 1;
- compat_props_add(m->compat_props, hw_compat_2_5, hw_compat_2_5_len);
- compat_props_add(m->compat_props, pc_compat_2_5, pc_compat_2_5_len);
-}
-
-DEFINE_Q35_MACHINE(2, 5);
-
-static void pc_q35_machine_2_4_options(MachineClass *m)
-{
- PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
-
- pc_q35_machine_2_5_options(m);
- m->hw_version = "2.4.0";
- pcmc->broken_reserved_end = true;
- compat_props_add(m->compat_props, hw_compat_2_4, hw_compat_2_4_len);
- compat_props_add(m->compat_props, pc_compat_2_4, pc_compat_2_4_len);
-}
-
-DEFINE_Q35_MACHINE(2, 4);
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index 1eeb58a..821396c 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -37,6 +37,7 @@
#include "hw/block/flash.h"
#include "system/kvm.h"
#include "target/i386/sev.h"
+#include "kvm/tdx.h"
#define FLASH_SECTOR_SIZE 4096
@@ -280,5 +281,11 @@ void x86_firmware_configure(hwaddr gpa, void *ptr, int size)
}
sev_encrypt_flash(gpa, ptr, size, &error_fatal);
+ } else if (is_tdx_vm()) {
+ ret = tdx_parse_tdvf(ptr, size);
+ if (ret) {
+ error_report("failed to parse TDVF for TDX VM");
+ exit(1);
+ }
}
}
diff --git a/hw/i386/sgx-stub.c b/hw/i386/sgx-stub.c
index 38ff75e..d295e54 100644
--- a/hw/i386/sgx-stub.c
+++ b/hw/i386/sgx-stub.c
@@ -3,20 +3,20 @@
#include "monitor/hmp-target.h"
#include "hw/i386/pc.h"
#include "hw/i386/sgx-epc.h"
+#include "qapi/qapi-commands-misc-i386.h"
#include "qapi/error.h"
-#include "qapi/qapi-commands-misc-target.h"
void sgx_epc_build_srat(GArray *table_data)
{
}
-SGXInfo *qmp_query_sgx(Error **errp)
+SgxInfo *qmp_query_sgx(Error **errp)
{
error_setg(errp, "SGX support is not compiled in");
return NULL;
}
-SGXInfo *qmp_query_sgx_capabilities(Error **errp)
+SgxInfo *qmp_query_sgx_capabilities(Error **errp)
{
error_setg(errp, "SGX support is not compiled in");
return NULL;
diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c
index 5685c4f..e280154 100644
--- a/hw/i386/sgx.c
+++ b/hw/i386/sgx.c
@@ -19,7 +19,7 @@
#include "monitor/hmp-target.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
-#include "qapi/qapi-commands-misc-target.h"
+#include "qapi/qapi-commands-misc-i386.h"
#include "system/address-spaces.h"
#include "system/hw_accel.h"
#include "system/reset.h"
@@ -84,10 +84,10 @@ static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high)
((high & MAKE_64BIT_MASK(0, 20)) << 32);
}
-static SGXEPCSectionList *sgx_calc_host_epc_sections(void)
+static SgxEpcSectionList *sgx_calc_host_epc_sections(void)
{
- SGXEPCSectionList *head = NULL, **tail = &head;
- SGXEPCSection *section;
+ SgxEpcSectionList *head = NULL, **tail = &head;
+ SgxEpcSection *section;
uint32_t i, type;
uint32_t eax, ebx, ecx, edx;
uint32_t j = 0;
@@ -104,7 +104,7 @@ static SGXEPCSectionList *sgx_calc_host_epc_sections(void)
break;
}
- section = g_new0(SGXEPCSection, 1);
+ section = g_new0(SgxEpcSection, 1);
section->node = j++;
section->size = sgx_calc_section_metric(ecx, edx);
QAPI_LIST_APPEND(tail, section);
@@ -153,9 +153,9 @@ static void sgx_epc_reset(void *opaque)
}
}
-SGXInfo *qmp_query_sgx_capabilities(Error **errp)
+SgxInfo *qmp_query_sgx_capabilities(Error **errp)
{
- SGXInfo *info = NULL;
+ SgxInfo *info = NULL;
uint32_t eax, ebx, ecx, edx;
Error *local_err = NULL;
@@ -166,7 +166,7 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp)
return NULL;
}
- info = g_new0(SGXInfo, 1);
+ info = g_new0(SgxInfo, 1);
host_cpuid(0x7, 0, &eax, &ebx, &ecx, &edx);
info->sgx = ebx & (1U << 2) ? true : false;
@@ -183,17 +183,17 @@ SGXInfo *qmp_query_sgx_capabilities(Error **errp)
return info;
}
-static SGXEPCSectionList *sgx_get_epc_sections_list(void)
+static SgxEpcSectionList *sgx_get_epc_sections_list(void)
{
GSList *device_list = sgx_epc_get_device_list();
- SGXEPCSectionList *head = NULL, **tail = &head;
- SGXEPCSection *section;
+ SgxEpcSectionList *head = NULL, **tail = &head;
+ SgxEpcSection *section;
for (; device_list; device_list = device_list->next) {
DeviceState *dev = device_list->data;
Object *obj = OBJECT(dev);
- section = g_new0(SGXEPCSection, 1);
+ section = g_new0(SgxEpcSection, 1);
section->node = object_property_get_uint(obj, SGX_EPC_NUMA_NODE_PROP,
&error_abort);
section->size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP,
@@ -205,9 +205,9 @@ static SGXEPCSectionList *sgx_get_epc_sections_list(void)
return head;
}
-SGXInfo *qmp_query_sgx(Error **errp)
+SgxInfo *qmp_query_sgx(Error **errp)
{
- SGXInfo *info = NULL;
+ SgxInfo *info = NULL;
X86MachineState *x86ms;
PCMachineState *pcms =
(PCMachineState *)object_dynamic_cast(qdev_get_machine(),
@@ -223,7 +223,7 @@ SGXInfo *qmp_query_sgx(Error **errp)
return NULL;
}
- info = g_new0(SGXInfo, 1);
+ info = g_new0(SgxInfo, 1);
info->sgx = true;
info->sgx1 = true;
@@ -237,8 +237,8 @@ SGXInfo *qmp_query_sgx(Error **errp)
void hmp_info_sgx(Monitor *mon, const QDict *qdict)
{
Error *err = NULL;
- SGXEPCSectionList *section_list, *section;
- g_autoptr(SGXInfo) info = qmp_query_sgx(&err);
+ SgxEpcSectionList *section_list, *section;
+ g_autoptr(SgxInfo) info = qmp_query_sgx(&err);
uint64_t size = 0;
if (err) {
diff --git a/hw/i386/tdvf-hob.c b/hw/i386/tdvf-hob.c
new file mode 100644
index 0000000..782b3d1
--- /dev/null
+++ b/hw/i386/tdvf-hob.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2025 Intel Corporation
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ * <isaku.yamahata at intel.com>
+ * Xiaoyao Li <xiaoyao.li@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "standard-headers/uefi/uefi.h"
+#include "hw/pci/pcie_host.h"
+#include "tdvf-hob.h"
+
+typedef struct TdvfHob {
+ hwaddr hob_addr;
+ void *ptr;
+ int size;
+
+ /* working area */
+ void *current;
+ void *end;
+} TdvfHob;
+
+static uint64_t tdvf_current_guest_addr(const TdvfHob *hob)
+{
+ return hob->hob_addr + (hob->current - hob->ptr);
+}
+
+static void tdvf_align(TdvfHob *hob, size_t align)
+{
+ hob->current = QEMU_ALIGN_PTR_UP(hob->current, align);
+}
+
+static void *tdvf_get_area(TdvfHob *hob, uint64_t size)
+{
+ void *ret;
+
+ if (hob->current + size > hob->end) {
+ error_report("TD_HOB overrun, size = 0x%" PRIx64, size);
+ exit(1);
+ }
+
+ ret = hob->current;
+ hob->current += size;
+ tdvf_align(hob, 8);
+ return ret;
+}
+
+static void tdvf_hob_add_memory_resources(TdxGuest *tdx, TdvfHob *hob)
+{
+ EFI_HOB_RESOURCE_DESCRIPTOR *region;
+ EFI_RESOURCE_ATTRIBUTE_TYPE attr;
+ EFI_RESOURCE_TYPE resource_type;
+
+ TdxRamEntry *e;
+ int i;
+
+ for (i = 0; i < tdx->nr_ram_entries; i++) {
+ e = &tdx->ram_entries[i];
+
+ if (e->type == TDX_RAM_UNACCEPTED) {
+ resource_type = EFI_RESOURCE_MEMORY_UNACCEPTED;
+ attr = EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED;
+ } else if (e->type == TDX_RAM_ADDED) {
+ resource_type = EFI_RESOURCE_SYSTEM_MEMORY;
+ attr = EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE;
+ } else {
+ error_report("unknown TDX_RAM_ENTRY type %d", e->type);
+ exit(1);
+ }
+
+ region = tdvf_get_area(hob, sizeof(*region));
+ *region = (EFI_HOB_RESOURCE_DESCRIPTOR) {
+ .Header = {
+ .HobType = EFI_HOB_TYPE_RESOURCE_DESCRIPTOR,
+ .HobLength = cpu_to_le16(sizeof(*region)),
+ .Reserved = cpu_to_le32(0),
+ },
+ .Owner = EFI_HOB_OWNER_ZERO,
+ .ResourceType = cpu_to_le32(resource_type),
+ .ResourceAttribute = cpu_to_le32(attr),
+ .PhysicalStart = cpu_to_le64(e->address),
+ .ResourceLength = cpu_to_le64(e->length),
+ };
+ }
+}
+
+void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob)
+{
+ TdvfHob hob = {
+ .hob_addr = td_hob->address,
+ .size = td_hob->size,
+ .ptr = td_hob->mem_ptr,
+
+ .current = td_hob->mem_ptr,
+ .end = td_hob->mem_ptr + td_hob->size,
+ };
+
+ EFI_HOB_GENERIC_HEADER *last_hob;
+ EFI_HOB_HANDOFF_INFO_TABLE *hit;
+
+ /* Note, Efi{Free}Memory{Bottom,Top} are ignored, leave 'em zeroed. */
+ hit = tdvf_get_area(&hob, sizeof(*hit));
+ *hit = (EFI_HOB_HANDOFF_INFO_TABLE) {
+ .Header = {
+ .HobType = EFI_HOB_TYPE_HANDOFF,
+ .HobLength = cpu_to_le16(sizeof(*hit)),
+ .Reserved = cpu_to_le32(0),
+ },
+ .Version = cpu_to_le32(EFI_HOB_HANDOFF_TABLE_VERSION),
+ .BootMode = cpu_to_le32(0),
+ .EfiMemoryTop = cpu_to_le64(0),
+ .EfiMemoryBottom = cpu_to_le64(0),
+ .EfiFreeMemoryTop = cpu_to_le64(0),
+ .EfiFreeMemoryBottom = cpu_to_le64(0),
+ .EfiEndOfHobList = cpu_to_le64(0), /* initialized later */
+ };
+
+ tdvf_hob_add_memory_resources(tdx, &hob);
+
+ last_hob = tdvf_get_area(&hob, sizeof(*last_hob));
+ *last_hob = (EFI_HOB_GENERIC_HEADER) {
+ .HobType = EFI_HOB_TYPE_END_OF_HOB_LIST,
+ .HobLength = cpu_to_le16(sizeof(*last_hob)),
+ .Reserved = cpu_to_le32(0),
+ };
+ hit->EfiEndOfHobList = tdvf_current_guest_addr(&hob);
+}
diff --git a/hw/i386/tdvf-hob.h b/hw/i386/tdvf-hob.h
new file mode 100644
index 0000000..4fc6a37
--- /dev/null
+++ b/hw/i386/tdvf-hob.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef HW_I386_TD_HOB_H
+#define HW_I386_TD_HOB_H
+
+#include "hw/i386/tdvf.h"
+#include "target/i386/kvm/tdx.h"
+
+void tdvf_hob_create(TdxGuest *tdx, TdxFirmwareEntry *td_hob);
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_PRIVATE \
+ (EFI_RESOURCE_ATTRIBUTE_PRESENT | \
+ EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
+ EFI_RESOURCE_ATTRIBUTE_TESTED)
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_UNACCEPTED \
+ (EFI_RESOURCE_ATTRIBUTE_PRESENT | \
+ EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
+ EFI_RESOURCE_ATTRIBUTE_TESTED)
+
+#define EFI_RESOURCE_ATTRIBUTE_TDVF_MMIO \
+ (EFI_RESOURCE_ATTRIBUTE_PRESENT | \
+ EFI_RESOURCE_ATTRIBUTE_INITIALIZED | \
+ EFI_RESOURCE_ATTRIBUTE_UNCACHEABLE)
+
+#endif
diff --git a/hw/i386/tdvf.c b/hw/i386/tdvf.c
new file mode 100644
index 0000000..645d9d1
--- /dev/null
+++ b/hw/i386/tdvf.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2025 Intel Corporation
+ * Author: Isaku Yamahata <isaku.yamahata at gmail.com>
+ * <isaku.yamahata at intel.com>
+ * Xiaoyao Li <xiaoyao.li@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+
+#include "hw/i386/pc.h"
+#include "hw/i386/tdvf.h"
+#include "system/kvm.h"
+
+#define TDX_METADATA_OFFSET_GUID "e47a6535-984a-4798-865e-4685a7bf8ec2"
+#define TDX_METADATA_VERSION 1
+#define TDVF_SIGNATURE 0x46564454 /* TDVF as little endian */
+#define TDVF_ALIGNMENT 4096
+
+/*
+ * the raw structs read from TDVF keeps the name convention in
+ * TDVF Design Guide spec.
+ */
+typedef struct {
+ uint32_t DataOffset;
+ uint32_t RawDataSize;
+ uint64_t MemoryAddress;
+ uint64_t MemoryDataSize;
+ uint32_t Type;
+ uint32_t Attributes;
+} TdvfSectionEntry;
+
+typedef struct {
+ uint32_t Signature;
+ uint32_t Length;
+ uint32_t Version;
+ uint32_t NumberOfSectionEntries;
+ TdvfSectionEntry SectionEntries[];
+} TdvfMetadata;
+
+struct tdx_metadata_offset {
+ uint32_t offset;
+};
+
+static TdvfMetadata *tdvf_get_metadata(void *flash_ptr, int size)
+{
+ TdvfMetadata *metadata;
+ uint32_t offset = 0;
+ uint8_t *data;
+
+ if ((uint32_t) size != size) {
+ return NULL;
+ }
+
+ if (pc_system_ovmf_table_find(TDX_METADATA_OFFSET_GUID, &data, NULL)) {
+ offset = size - le32_to_cpu(((struct tdx_metadata_offset *)data)->offset);
+
+ if (offset + sizeof(*metadata) > size) {
+ return NULL;
+ }
+ } else {
+ error_report("Cannot find TDX_METADATA_OFFSET_GUID");
+ return NULL;
+ }
+
+ metadata = flash_ptr + offset;
+
+ /* Finally, verify the signature to determine if this is a TDVF image. */
+ metadata->Signature = le32_to_cpu(metadata->Signature);
+ if (metadata->Signature != TDVF_SIGNATURE) {
+ error_report("Invalid TDVF signature in metadata!");
+ return NULL;
+ }
+
+ /* Sanity check that the TDVF doesn't overlap its own metadata. */
+ metadata->Length = le32_to_cpu(metadata->Length);
+ if (offset + metadata->Length > size) {
+ return NULL;
+ }
+
+ /* Only version 1 is supported/defined. */
+ metadata->Version = le32_to_cpu(metadata->Version);
+ if (metadata->Version != TDX_METADATA_VERSION) {
+ return NULL;
+ }
+
+ return metadata;
+}
+
+static int tdvf_parse_and_check_section_entry(const TdvfSectionEntry *src,
+ TdxFirmwareEntry *entry)
+{
+ entry->data_offset = le32_to_cpu(src->DataOffset);
+ entry->data_len = le32_to_cpu(src->RawDataSize);
+ entry->address = le64_to_cpu(src->MemoryAddress);
+ entry->size = le64_to_cpu(src->MemoryDataSize);
+ entry->type = le32_to_cpu(src->Type);
+ entry->attributes = le32_to_cpu(src->Attributes);
+
+ /* sanity check */
+ if (entry->size < entry->data_len) {
+ error_report("Broken metadata RawDataSize 0x%x MemoryDataSize 0x%"PRIx64,
+ entry->data_len, entry->size);
+ return -1;
+ }
+ if (!QEMU_IS_ALIGNED(entry->address, TDVF_ALIGNMENT)) {
+ error_report("MemoryAddress 0x%"PRIx64" not page aligned", entry->address);
+ return -1;
+ }
+ if (!QEMU_IS_ALIGNED(entry->size, TDVF_ALIGNMENT)) {
+ error_report("MemoryDataSize 0x%"PRIx64" not page aligned", entry->size);
+ return -1;
+ }
+
+ switch (entry->type) {
+ case TDVF_SECTION_TYPE_BFV:
+ case TDVF_SECTION_TYPE_CFV:
+ /* The sections that must be copied from firmware image to TD memory */
+ if (entry->data_len == 0) {
+ error_report("%d section with RawDataSize == 0", entry->type);
+ return -1;
+ }
+ break;
+ case TDVF_SECTION_TYPE_TD_HOB:
+ case TDVF_SECTION_TYPE_TEMP_MEM:
+ /* The sections that no need to be copied from firmware image */
+ if (entry->data_len != 0) {
+ error_report("%d section with RawDataSize 0x%x != 0",
+ entry->type, entry->data_len);
+ return -1;
+ }
+ break;
+ default:
+ error_report("TDVF contains unsupported section type %d", entry->type);
+ return -1;
+ }
+
+ return 0;
+}
+
+int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size)
+{
+ g_autofree TdvfSectionEntry *sections = NULL;
+ TdvfMetadata *metadata;
+ ssize_t entries_size;
+ int i;
+
+ metadata = tdvf_get_metadata(flash_ptr, size);
+ if (!metadata) {
+ return -EINVAL;
+ }
+
+ /* load and parse metadata entries */
+ fw->nr_entries = le32_to_cpu(metadata->NumberOfSectionEntries);
+ if (fw->nr_entries < 2) {
+ error_report("Invalid number of fw entries (%u) in TDVF Metadata",
+ fw->nr_entries);
+ return -EINVAL;
+ }
+
+ entries_size = fw->nr_entries * sizeof(TdvfSectionEntry);
+ if (metadata->Length != sizeof(*metadata) + entries_size) {
+ error_report("TDVF metadata len (0x%x) mismatch, expected (0x%x)",
+ metadata->Length,
+ (uint32_t)(sizeof(*metadata) + entries_size));
+ return -EINVAL;
+ }
+
+ fw->entries = g_new(TdxFirmwareEntry, fw->nr_entries);
+ sections = g_new(TdvfSectionEntry, fw->nr_entries);
+
+ memcpy(sections, (void *)metadata + sizeof(*metadata), entries_size);
+
+ for (i = 0; i < fw->nr_entries; i++) {
+ if (tdvf_parse_and_check_section_entry(&sections[i], &fw->entries[i])) {
+ goto err;
+ }
+ }
+
+ fw->mem_ptr = flash_ptr;
+ return 0;
+
+err:
+ fw->entries = 0;
+ g_free(fw->entries);
+ return -EINVAL;
+}
diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c
index 1b0671c..b1b5f11 100644
--- a/hw/i386/x86-common.c
+++ b/hw/i386/x86-common.c
@@ -44,6 +44,7 @@
#include "standard-headers/asm-x86/bootparam.h"
#include CONFIG_DEVICES
#include "kvm/kvm_i386.h"
+#include "kvm/tdx.h"
#ifdef CONFIG_XEN_EMU
#include "hw/xen/xen.h"
@@ -1035,11 +1036,14 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware,
if (machine_require_guest_memfd(MACHINE(x86ms))) {
memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios",
bios_size, &error_fatal);
+ if (is_tdx_vm()) {
+ tdx_set_tdvf_region(&x86ms->bios);
+ }
} else {
memory_region_init_ram(&x86ms->bios, NULL, "pc.bios",
bios_size, &error_fatal);
}
- if (sev_enabled()) {
+ if (sev_enabled() || is_tdx_vm()) {
/*
* The concept of a "reset" simply doesn't exist for
* confidential computing guests, we have to destroy and
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index e2d0409..f80533d 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -382,7 +382,6 @@ static void x86_machine_class_init(ObjectClass *oc, const void *data)
mc->get_default_cpu_node_id = x86_get_default_cpu_node_id;
mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids;
mc->kvm_type = x86_kvm_type;
- x86mc->save_tsc_khz = true;
x86mc->fwcfg_dma_enabled = true;
nc->nmi_monitor_handler = x86_nmi;
diff --git a/hw/intc/arm_gic.c b/hw/intc/arm_gic.c
index d18bef4..899f133 100644
--- a/hw/intc/arm_gic.c
+++ b/hw/intc/arm_gic.c
@@ -59,7 +59,7 @@ static const uint8_t gic_id_gicv2[] = {
static inline int gic_get_current_cpu(GICState *s)
{
if (!qtest_enabled() && s->num_cpu > 1) {
- return current_cpu->cpu_index;
+ return current_cpu->cpu_index - s->first_cpu_index;
}
return 0;
}
diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c
index 0f0c48d..ed5be05 100644
--- a/hw/intc/arm_gic_common.c
+++ b/hw/intc/arm_gic_common.c
@@ -350,6 +350,7 @@ static void arm_gic_common_linux_init(ARMLinuxBootIf *obj,
static const Property arm_gic_common_properties[] = {
DEFINE_PROP_UINT32("num-cpu", GICState, num_cpu, 1),
+ DEFINE_PROP_UINT32("first-cpu-index", GICState, first_cpu_index, 0),
DEFINE_PROP_UINT32("num-irq", GICState, num_irq, 32),
/* Revision can be 1 or 2 for GIC architecture specification
* versions 1 or 2, or 0 to indicate the legacy 11MPCore GIC.
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index 83ff74f..6d85720 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -988,6 +988,7 @@ static void nvic_nmi_trigger(void *opaque, int n, int level)
static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs)
{
ARMCPU *cpu = s->cpu;
+ ARMISARegisters *isar = &cpu->isar;
uint32_t val;
switch (offset) {
@@ -1263,17 +1264,17 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs)
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_pfr0;
+ return GET_IDREG(isar, ID_PFR0);
case 0xd44: /* PFR1. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_pfr1;
+ return GET_IDREG(isar, ID_PFR1);
case 0xd48: /* DFR0. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_dfr0;
+ return GET_IDREG(isar, ID_DFR0);
case 0xd4c: /* AFR0. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
@@ -1283,52 +1284,52 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset, MemTxAttrs attrs)
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_mmfr0;
+ return GET_IDREG(isar, ID_MMFR0);
case 0xd54: /* MMFR1. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_mmfr1;
+ return GET_IDREG(isar, ID_MMFR1);
case 0xd58: /* MMFR2. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_mmfr2;
+ return GET_IDREG(isar, ID_MMFR2);
case 0xd5c: /* MMFR3. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_mmfr3;
+ return GET_IDREG(isar, ID_MMFR3);
case 0xd60: /* ISAR0. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_isar0;
+ return GET_IDREG(&cpu->isar, ID_ISAR0);
case 0xd64: /* ISAR1. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_isar1;
+ return GET_IDREG(&cpu->isar, ID_ISAR1);
case 0xd68: /* ISAR2. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_isar2;
+ return GET_IDREG(&cpu->isar, ID_ISAR2);
case 0xd6c: /* ISAR3. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_isar3;
+ return GET_IDREG(&cpu->isar, ID_ISAR3);
case 0xd70: /* ISAR4. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_isar4;
+ return GET_IDREG(&cpu->isar, ID_ISAR4);
case 0xd74: /* ISAR5. */
if (!arm_feature(&cpu->env, ARM_FEATURE_M_MAIN)) {
goto bad_offset;
}
- return cpu->isar.id_isar5;
+ return GET_IDREG(&cpu->isar, ID_ISAR5);
case 0xd78: /* CLIDR */
return cpu->clidr;
case 0xd7c: /* CTR */
diff --git a/hw/intc/aspeed_intc.c b/hw/intc/aspeed_intc.c
index 33fcbe7..5cd786d 100644
--- a/hw/intc/aspeed_intc.c
+++ b/hw/intc/aspeed_intc.c
@@ -737,6 +737,7 @@ static const MemoryRegionOps aspeed_intc_ops = {
.read = aspeed_intc_read,
.write = aspeed_intc_write,
.endianness = DEVICE_LITTLE_ENDIAN,
+ .impl.min_access_size = 4,
.valid = {
.min_access_size = 4,
.max_access_size = 4,
@@ -747,6 +748,7 @@ static const MemoryRegionOps aspeed_intcio_ops = {
.read = aspeed_intcio_read,
.write = aspeed_intcio_write,
.endianness = DEVICE_LITTLE_ENDIAN,
+ .impl.min_access_size = 4,
.valid = {
.min_access_size = 4,
.max_access_size = 4,
@@ -757,6 +759,7 @@ static const MemoryRegionOps aspeed_ssp_intc_ops = {
.read = aspeed_intc_read,
.write = aspeed_ssp_intc_write,
.endianness = DEVICE_LITTLE_ENDIAN,
+ .impl.min_access_size = 4,
.valid = {
.min_access_size = 4,
.max_access_size = 4,
@@ -767,6 +770,7 @@ static const MemoryRegionOps aspeed_ssp_intcio_ops = {
.read = aspeed_intcio_read,
.write = aspeed_ssp_intcio_write,
.endianness = DEVICE_LITTLE_ENDIAN,
+ .impl.min_access_size = 4,
.valid = {
.min_access_size = 4,
.max_access_size = 4,
@@ -777,6 +781,7 @@ static const MemoryRegionOps aspeed_tsp_intc_ops = {
.read = aspeed_intc_read,
.write = aspeed_tsp_intc_write,
.endianness = DEVICE_LITTLE_ENDIAN,
+ .impl.min_access_size = 4,
.valid = {
.min_access_size = 4,
.max_access_size = 4,
@@ -787,6 +792,7 @@ static const MemoryRegionOps aspeed_tsp_intcio_ops = {
.read = aspeed_intcio_read,
.write = aspeed_tsp_intcio_write,
.endianness = DEVICE_LITTLE_ENDIAN,
+ .impl.min_access_size = 4,
.valid = {
.min_access_size = 4,
.max_access_size = 4,
@@ -995,7 +1001,8 @@ static AspeedINTCIRQ aspeed_2700ssp_intcio_irqs[ASPEED_INTC_MAX_INPINS] = {
{5, 5, 1, R_SSPINT165_EN, R_SSPINT165_STATUS},
};
-static void aspeed_2700ssp_intcio_class_init(ObjectClass *klass, const void *data)
+static void aspeed_2700ssp_intcio_class_init(ObjectClass *klass,
+ const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
AspeedINTCClass *aic = ASPEED_INTC_CLASS(klass);
@@ -1063,7 +1070,8 @@ static AspeedINTCIRQ aspeed_2700tsp_intcio_irqs[ASPEED_INTC_MAX_INPINS] = {
{5, 5, 1, R_TSPINT165_EN, R_TSPINT165_STATUS},
};
-static void aspeed_2700tsp_intcio_class_init(ObjectClass *klass, const void *data)
+static void aspeed_2700tsp_intcio_class_init(ObjectClass *klass,
+ const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
AspeedINTCClass *aic = ASPEED_INTC_CLASS(klass);
diff --git a/hw/intc/loongarch_extioi.c b/hw/intc/loongarch_extioi.c
index 7c38c4c..8b8ac6b 100644
--- a/hw/intc/loongarch_extioi.c
+++ b/hw/intc/loongarch_extioi.c
@@ -12,6 +12,7 @@
#include "hw/irq.h"
#include "hw/loongarch/virt.h"
#include "system/address-spaces.h"
+#include "system/kvm.h"
#include "hw/intc/loongarch_extioi.h"
#include "trace.h"
@@ -351,23 +352,29 @@ static void loongarch_extioi_realize(DeviceState *dev, Error **errp)
return;
}
- for (i = 0; i < EXTIOI_IRQS; i++) {
- sysbus_init_irq(sbd, &s->irq[i]);
- }
-
- qdev_init_gpio_in(dev, extioi_setirq, EXTIOI_IRQS);
- memory_region_init_io(&s->extioi_system_mem, OBJECT(s), &extioi_ops,
- s, "extioi_system_mem", 0x900);
- sysbus_init_mmio(sbd, &s->extioi_system_mem);
-
if (s->features & BIT(EXTIOI_HAS_VIRT_EXTENSION)) {
- memory_region_init_io(&s->virt_extend, OBJECT(s), &extioi_virt_ops,
- s, "extioi_virt", EXTIOI_VIRT_SIZE);
- sysbus_init_mmio(sbd, &s->virt_extend);
s->features |= EXTIOI_VIRT_HAS_FEATURES;
} else {
s->status |= BIT(EXTIOI_ENABLE);
}
+
+ if (kvm_irqchip_in_kernel()) {
+ kvm_extioi_realize(dev, errp);
+ } else {
+ for (i = 0; i < EXTIOI_IRQS; i++) {
+ sysbus_init_irq(sbd, &s->irq[i]);
+ }
+
+ qdev_init_gpio_in(dev, extioi_setirq, EXTIOI_IRQS);
+ memory_region_init_io(&s->extioi_system_mem, OBJECT(s), &extioi_ops,
+ s, "extioi_system_mem", 0x900);
+ sysbus_init_mmio(sbd, &s->extioi_system_mem);
+ if (s->features & BIT(EXTIOI_HAS_VIRT_EXTENSION)) {
+ memory_region_init_io(&s->virt_extend, OBJECT(s), &extioi_virt_ops,
+ s, "extioi_virt", EXTIOI_VIRT_SIZE);
+ sysbus_init_mmio(sbd, &s->virt_extend);
+ }
+ }
}
static void loongarch_extioi_unrealize(DeviceState *dev)
@@ -384,6 +391,19 @@ static void loongarch_extioi_reset_hold(Object *obj, ResetType type)
if (lec->parent_phases.hold) {
lec->parent_phases.hold(obj, type);
}
+
+ if (kvm_irqchip_in_kernel()) {
+ kvm_extioi_put(obj, 0);
+ }
+}
+
+static int vmstate_extioi_pre_save(void *opaque)
+{
+ if (kvm_irqchip_in_kernel()) {
+ return kvm_extioi_get(opaque);
+ }
+
+ return 0;
}
static int vmstate_extioi_post_load(void *opaque, int version_id)
@@ -391,6 +411,10 @@ static int vmstate_extioi_post_load(void *opaque, int version_id)
LoongArchExtIOICommonState *s = LOONGARCH_EXTIOI_COMMON(opaque);
int i, start_irq;
+ if (kvm_irqchip_in_kernel()) {
+ return kvm_extioi_put(opaque, version_id);
+ }
+
for (i = 0; i < (EXTIOI_IRQS / 4); i++) {
start_irq = i * 4;
extioi_update_sw_coremap(s, start_irq, s->coremap[i], false);
@@ -416,6 +440,7 @@ static void loongarch_extioi_class_init(ObjectClass *klass, const void *data)
&lec->parent_unrealize);
resettable_class_set_parent_phases(rc, NULL, loongarch_extioi_reset_hold,
NULL, &lec->parent_phases);
+ lecc->pre_save = vmstate_extioi_pre_save;
lecc->post_load = vmstate_extioi_post_load;
}
diff --git a/hw/intc/loongarch_extioi_kvm.c b/hw/intc/loongarch_extioi_kvm.c
new file mode 100644
index 0000000..0133540
--- /dev/null
+++ b/hw/intc/loongarch_extioi_kvm.c
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * LoongArch EXTIOI interrupt kvm support
+ *
+ * Copyright (C) 2025 Loongson Technology Corporation Limited
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/typedefs.h"
+#include "hw/intc/loongarch_extioi.h"
+#include "linux/kvm.h"
+#include "qapi/error.h"
+#include "system/kvm.h"
+
+static void kvm_extioi_access_reg(int fd, uint64_t addr, void *val, bool write)
+{
+ kvm_device_access(fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS,
+ addr, val, write, &error_abort);
+}
+
+static void kvm_extioi_access_sw_state(int fd, uint64_t addr,
+ void *val, bool write)
+{
+ kvm_device_access(fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS,
+ addr, val, write, &error_abort);
+}
+
+static void kvm_extioi_access_sw_status(void *opaque, bool write)
+{
+ LoongArchExtIOICommonState *lecs = LOONGARCH_EXTIOI_COMMON(opaque);
+ LoongArchExtIOIState *les = LOONGARCH_EXTIOI(opaque);
+ int addr;
+
+ addr = KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_STATE;
+ kvm_extioi_access_sw_state(les->dev_fd, addr, &lecs->status, write);
+}
+
+static void kvm_extioi_access_regs(void *opaque, bool write)
+{
+ LoongArchExtIOICommonState *lecs = LOONGARCH_EXTIOI_COMMON(opaque);
+ LoongArchExtIOIState *les = LOONGARCH_EXTIOI(opaque);
+ int fd = les->dev_fd;
+ int addr, offset, cpu;
+
+ for (addr = EXTIOI_NODETYPE_START; addr < EXTIOI_NODETYPE_END; addr += 4) {
+ offset = (addr - EXTIOI_NODETYPE_START) / 4;
+ kvm_extioi_access_reg(fd, addr, &lecs->nodetype[offset], write);
+ }
+
+ for (addr = EXTIOI_IPMAP_START; addr < EXTIOI_IPMAP_END; addr += 4) {
+ offset = (addr - EXTIOI_IPMAP_START) / 4;
+ kvm_extioi_access_reg(fd, addr, &lecs->ipmap[offset], write);
+ }
+
+ for (addr = EXTIOI_ENABLE_START; addr < EXTIOI_ENABLE_END; addr += 4) {
+ offset = (addr - EXTIOI_ENABLE_START) / 4;
+ kvm_extioi_access_reg(fd, addr, &lecs->enable[offset], write);
+ }
+
+ for (addr = EXTIOI_BOUNCE_START; addr < EXTIOI_BOUNCE_END; addr += 4) {
+ offset = (addr - EXTIOI_BOUNCE_START) / 4;
+ kvm_extioi_access_reg(fd, addr, &lecs->bounce[offset], write);
+ }
+
+ for (addr = EXTIOI_ISR_START; addr < EXTIOI_ISR_END; addr += 4) {
+ offset = (addr - EXTIOI_ISR_START) / 4;
+ kvm_extioi_access_reg(fd, addr, &lecs->isr[offset], write);
+ }
+
+ for (addr = EXTIOI_COREMAP_START; addr < EXTIOI_COREMAP_END; addr += 4) {
+ offset = (addr - EXTIOI_COREMAP_START) / 4;
+ kvm_extioi_access_reg(fd, addr, &lecs->coremap[offset], write);
+ }
+
+ for (cpu = 0; cpu < lecs->num_cpu; cpu++) {
+ for (addr = EXTIOI_COREISR_START;
+ addr < EXTIOI_COREISR_END; addr += 4) {
+ offset = (addr - EXTIOI_COREISR_START) / 4;
+ kvm_extioi_access_reg(fd, (cpu << 16) | addr,
+ &lecs->cpu[cpu].coreisr[offset], write);
+ }
+ }
+}
+
+int kvm_extioi_get(void *opaque)
+{
+ kvm_extioi_access_regs(opaque, false);
+ kvm_extioi_access_sw_status(opaque, false);
+ return 0;
+}
+
+int kvm_extioi_put(void *opaque, int version_id)
+{
+ LoongArchExtIOIState *les = LOONGARCH_EXTIOI(opaque);
+ int fd = les->dev_fd;
+
+ if (fd == 0) {
+ return 0;
+ }
+
+ kvm_extioi_access_regs(opaque, true);
+ kvm_extioi_access_sw_status(opaque, true);
+ kvm_device_access(fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL,
+ KVM_DEV_LOONGARCH_EXTIOI_CTRL_LOAD_FINISHED,
+ NULL, true, &error_abort);
+ return 0;
+}
+
+void kvm_extioi_realize(DeviceState *dev, Error **errp)
+{
+ LoongArchExtIOICommonState *lecs = LOONGARCH_EXTIOI_COMMON(dev);
+ LoongArchExtIOIState *les = LOONGARCH_EXTIOI(dev);
+ int ret;
+
+ ret = kvm_create_device(kvm_state, KVM_DEV_TYPE_LOONGARCH_EIOINTC, false);
+ if (ret < 0) {
+ fprintf(stderr, "create KVM_LOONGARCH_EIOINTC failed: %s\n",
+ strerror(-ret));
+ abort();
+ }
+
+ les->dev_fd = ret;
+ ret = kvm_device_access(les->dev_fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL,
+ KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU,
+ &lecs->num_cpu, true, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "KVM_LOONGARCH_EXTIOI_INIT_NUM_CPU failed: %s\n",
+ strerror(-ret));
+ abort();
+ }
+
+ ret = kvm_device_access(les->dev_fd, KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL,
+ KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE,
+ &lecs->features, true, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "KVM_LOONGARCH_EXTIOI_INIT_FEATURE failed: %s\n",
+ strerror(-ret));
+ abort();
+ }
+}
diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c
index 74372a2..fc8005c 100644
--- a/hw/intc/loongarch_ipi.c
+++ b/hw/intc/loongarch_ipi.c
@@ -11,6 +11,7 @@
#include "qapi/error.h"
#include "hw/intc/loongarch_ipi.h"
#include "hw/qdev-properties.h"
+#include "system/kvm.h"
#include "target/loongarch/cpu.h"
static AddressSpace *get_iocsr_as(CPUState *cpu)
@@ -91,6 +92,10 @@ static void loongarch_ipi_realize(DeviceState *dev, Error **errp)
lics->cpu[i].ipi = lics;
qdev_init_gpio_out(dev, &lics->cpu[i].irq, 1);
}
+
+ if (kvm_irqchip_in_kernel()) {
+ kvm_ipi_realize(dev, errp);
+ }
}
static void loongarch_ipi_reset_hold(Object *obj, ResetType type)
@@ -117,6 +122,10 @@ static void loongarch_ipi_reset_hold(Object *obj, ResetType type)
core->clear = 0;
memset(core->buf, 0, sizeof(core->buf));
}
+
+ if (kvm_irqchip_in_kernel()) {
+ kvm_ipi_put(obj, 0);
+ }
}
static void loongarch_ipi_cpu_plug(HotplugHandler *hotplug_dev,
@@ -166,6 +175,24 @@ static void loongarch_ipi_cpu_unplug(HotplugHandler *hotplug_dev,
core->cpu = NULL;
}
+static int loongarch_ipi_pre_save(void *opaque)
+{
+ if (kvm_irqchip_in_kernel()) {
+ return kvm_ipi_get(opaque);
+ }
+
+ return 0;
+}
+
+static int loongarch_ipi_post_load(void *opaque, int version_id)
+{
+ if (kvm_irqchip_in_kernel()) {
+ return kvm_ipi_put(opaque, version_id);
+ }
+
+ return 0;
+}
+
static void loongarch_ipi_class_init(ObjectClass *klass, const void *data)
{
LoongsonIPICommonClass *licc = LOONGSON_IPI_COMMON_CLASS(klass);
@@ -182,6 +209,8 @@ static void loongarch_ipi_class_init(ObjectClass *klass, const void *data)
licc->cpu_by_arch_id = loongarch_cpu_by_arch_id;
hc->plug = loongarch_ipi_cpu_plug;
hc->unplug = loongarch_ipi_cpu_unplug;
+ licc->pre_save = loongarch_ipi_pre_save;
+ licc->post_load = loongarch_ipi_post_load;
}
static const TypeInfo loongarch_ipi_types[] = {
diff --git a/hw/intc/loongarch_ipi_kvm.c b/hw/intc/loongarch_ipi_kvm.c
new file mode 100644
index 0000000..4cb3acc
--- /dev/null
+++ b/hw/intc/loongarch_ipi_kvm.c
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * LoongArch IPI interrupt KVM support
+ *
+ * Copyright (C) 2025 Loongson Technology Corporation Limited
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/intc/loongarch_ipi.h"
+#include "system/kvm.h"
+#include "target/loongarch/cpu.h"
+
+static void kvm_ipi_access_reg(int fd, uint64_t addr, uint32_t *val, bool write)
+{
+ kvm_device_access(fd, KVM_DEV_LOONGARCH_IPI_GRP_REGS,
+ addr, val, write, &error_abort);
+}
+
+static void kvm_ipi_access_regs(void *opaque, bool write)
+{
+ LoongsonIPICommonState *ipi = (LoongsonIPICommonState *)opaque;
+ LoongarchIPIState *lis = LOONGARCH_IPI(opaque);
+ IPICore *core;
+ uint64_t attr;
+ int cpu, fd = lis->dev_fd;
+
+ if (fd == 0) {
+ return;
+ }
+
+ for (cpu = 0; cpu < ipi->num_cpu; cpu++) {
+ core = &ipi->cpu[cpu];
+ attr = (cpu << 16) | CORE_STATUS_OFF;
+ kvm_ipi_access_reg(fd, attr, &core->status, write);
+
+ attr = (cpu << 16) | CORE_EN_OFF;
+ kvm_ipi_access_reg(fd, attr, &core->en, write);
+
+ attr = (cpu << 16) | CORE_SET_OFF;
+ kvm_ipi_access_reg(fd, attr, &core->set, write);
+
+ attr = (cpu << 16) | CORE_CLEAR_OFF;
+ kvm_ipi_access_reg(fd, attr, &core->clear, write);
+
+ attr = (cpu << 16) | CORE_BUF_20;
+ kvm_ipi_access_reg(fd, attr, &core->buf[0], write);
+
+ attr = (cpu << 16) | CORE_BUF_28;
+ kvm_ipi_access_reg(fd, attr, &core->buf[2], write);
+
+ attr = (cpu << 16) | CORE_BUF_30;
+ kvm_ipi_access_reg(fd, attr, &core->buf[4], write);
+
+ attr = (cpu << 16) | CORE_BUF_38;
+ kvm_ipi_access_reg(fd, attr, &core->buf[6], write);
+ }
+}
+
+int kvm_ipi_get(void *opaque)
+{
+ kvm_ipi_access_regs(opaque, false);
+ return 0;
+}
+
+int kvm_ipi_put(void *opaque, int version_id)
+{
+ kvm_ipi_access_regs(opaque, true);
+ return 0;
+}
+
+void kvm_ipi_realize(DeviceState *dev, Error **errp)
+{
+ LoongarchIPIState *lis = LOONGARCH_IPI(dev);
+ int ret;
+
+ ret = kvm_create_device(kvm_state, KVM_DEV_TYPE_LOONGARCH_IPI, false);
+ if (ret < 0) {
+ fprintf(stderr, "IPI KVM_CREATE_DEVICE failed: %s\n",
+ strerror(-ret));
+ abort();
+ }
+
+ lis->dev_fd = ret;
+}
diff --git a/hw/intc/loongarch_pch_msi.c b/hw/intc/loongarch_pch_msi.c
index 06eb944..f6d1631 100644
--- a/hw/intc/loongarch_pch_msi.c
+++ b/hw/intc/loongarch_pch_msi.c
@@ -13,6 +13,7 @@
#include "hw/pci/msi.h"
#include "hw/misc/unimp.h"
#include "migration/vmstate.h"
+#include "system/kvm.h"
#include "trace.h"
static uint64_t loongarch_msi_mem_read(void *opaque, hwaddr addr, unsigned size)
@@ -26,6 +27,15 @@ static void loongarch_msi_mem_write(void *opaque, hwaddr addr,
LoongArchPCHMSI *s = (LoongArchPCHMSI *)opaque;
int irq_num;
+ if (kvm_irqchip_in_kernel()) {
+ MSIMessage msg;
+
+ msg.address = addr;
+ msg.data = val;
+ kvm_irqchip_send_msi(kvm_state, msg);
+ return;
+ }
+
/*
* vector number is irq number from upper extioi intc
* need subtract irq base to get msi vector offset
diff --git a/hw/intc/loongarch_pch_pic.c b/hw/intc/loongarch_pch_pic.c
index cbba2fc..c4b242d 100644
--- a/hw/intc/loongarch_pch_pic.c
+++ b/hw/intc/loongarch_pch_pic.c
@@ -10,6 +10,7 @@
#include "qemu/log.h"
#include "hw/irq.h"
#include "hw/intc/loongarch_pch_pic.h"
+#include "system/kvm.h"
#include "trace.h"
#include "qapi/error.h"
@@ -48,6 +49,11 @@ static void pch_pic_irq_handler(void *opaque, int irq, int level)
assert(irq < s->irq_num);
trace_loongarch_pch_pic_irq_handler(irq, level);
+ if (kvm_irqchip_in_kernel()) {
+ kvm_set_irq(kvm_state, irq, !!level);
+ return;
+ }
+
if (s->intedge & mask) {
/* Edge triggered */
if (level) {
@@ -82,7 +88,7 @@ static uint64_t pch_pic_read(void *opaque, hwaddr addr, uint64_t field_mask)
addr -= offset;
switch (addr) {
case PCH_PIC_INT_ID:
- val = s->id.data;
+ val = cpu_to_le64(s->id.data);
break;
case PCH_PIC_INT_MASK:
val = s->int_mask;
@@ -258,6 +264,10 @@ static void loongarch_pic_reset_hold(Object *obj, ResetType type)
if (lpc->parent_phases.hold) {
lpc->parent_phases.hold(obj, type);
}
+
+ if (kvm_irqchip_in_kernel()) {
+ kvm_pic_put(obj, 0);
+ }
}
static void loongarch_pic_realize(DeviceState *dev, Error **errp)
@@ -275,22 +285,49 @@ static void loongarch_pic_realize(DeviceState *dev, Error **errp)
qdev_init_gpio_out(dev, s->parent_irq, s->irq_num);
qdev_init_gpio_in(dev, pch_pic_irq_handler, s->irq_num);
- memory_region_init_io(&s->iomem, OBJECT(dev),
- &loongarch_pch_pic_ops,
- s, TYPE_LOONGARCH_PIC, VIRT_PCH_REG_SIZE);
- sysbus_init_mmio(sbd, &s->iomem);
+
+ if (kvm_irqchip_in_kernel()) {
+ kvm_pic_realize(dev, errp);
+ } else {
+ memory_region_init_io(&s->iomem, OBJECT(dev),
+ &loongarch_pch_pic_ops,
+ s, TYPE_LOONGARCH_PIC, VIRT_PCH_REG_SIZE);
+ sysbus_init_mmio(sbd, &s->iomem);
+ }
+}
+
+static int loongarch_pic_pre_save(LoongArchPICCommonState *opaque)
+{
+ if (kvm_irqchip_in_kernel()) {
+ return kvm_pic_get(opaque);
+ }
+
+ return 0;
+}
+
+static int loongarch_pic_post_load(LoongArchPICCommonState *opaque,
+ int version_id)
+{
+ if (kvm_irqchip_in_kernel()) {
+ return kvm_pic_put(opaque, version_id);
+ }
+
+ return 0;
}
static void loongarch_pic_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
LoongarchPICClass *lpc = LOONGARCH_PIC_CLASS(klass);
+ LoongArchPICCommonClass *lpcc = LOONGARCH_PIC_COMMON_CLASS(klass);
ResettableClass *rc = RESETTABLE_CLASS(klass);
resettable_class_set_parent_phases(rc, NULL, loongarch_pic_reset_hold,
NULL, &lpc->parent_phases);
device_class_set_parent_realize(dc, loongarch_pic_realize,
&lpc->parent_realize);
+ lpcc->pre_save = loongarch_pic_pre_save;
+ lpcc->post_load = loongarch_pic_post_load;
}
static const TypeInfo loongarch_pic_types[] = {
diff --git a/hw/intc/loongarch_pic_kvm.c b/hw/intc/loongarch_pic_kvm.c
new file mode 100644
index 0000000..dd504ec
--- /dev/null
+++ b/hw/intc/loongarch_pic_kvm.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * LoongArch kvm pch pic interrupt support
+ *
+ * Copyright (C) 2025 Loongson Technology Corporation Limited
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/boards.h"
+#include "hw/intc/loongarch_pch_pic.h"
+#include "hw/loongarch/virt.h"
+#include "hw/pci-host/ls7a.h"
+#include "system/kvm.h"
+
+static void kvm_pch_pic_access_reg(int fd, uint64_t addr, void *val, bool write)
+{
+ kvm_device_access(fd, KVM_DEV_LOONGARCH_PCH_PIC_GRP_REGS,
+ addr, val, write, &error_abort);
+}
+
+static void kvm_pch_pic_access(void *opaque, bool write)
+{
+ LoongArchPICCommonState *s = LOONGARCH_PIC_COMMON(opaque);
+ LoongarchPICState *lps = LOONGARCH_PIC(opaque);
+ int fd = lps->dev_fd;
+ int addr, offset;
+
+ if (fd == 0) {
+ return;
+ }
+
+ kvm_pch_pic_access_reg(fd, PCH_PIC_INT_MASK, &s->int_mask, write);
+ kvm_pch_pic_access_reg(fd, PCH_PIC_HTMSI_EN, &s->htmsi_en, write);
+ kvm_pch_pic_access_reg(fd, PCH_PIC_INT_EDGE, &s->intedge, write);
+ kvm_pch_pic_access_reg(fd, PCH_PIC_AUTO_CTRL0, &s->auto_crtl0, write);
+ kvm_pch_pic_access_reg(fd, PCH_PIC_AUTO_CTRL1, &s->auto_crtl1, write);
+
+ for (addr = PCH_PIC_ROUTE_ENTRY;
+ addr < PCH_PIC_ROUTE_ENTRY_END; addr++) {
+ offset = addr - PCH_PIC_ROUTE_ENTRY;
+ kvm_pch_pic_access_reg(fd, addr, &s->route_entry[offset], write);
+ }
+
+ for (addr = PCH_PIC_HTMSI_VEC; addr < PCH_PIC_HTMSI_VEC_END; addr++) {
+ offset = addr - PCH_PIC_HTMSI_VEC;
+ kvm_pch_pic_access_reg(fd, addr, &s->htmsi_vector[offset], write);
+ }
+
+ kvm_pch_pic_access_reg(fd, PCH_PIC_INT_REQUEST, &s->intirr, write);
+ kvm_pch_pic_access_reg(fd, PCH_PIC_INT_STATUS, &s->intisr, write);
+ kvm_pch_pic_access_reg(fd, PCH_PIC_INT_POL, &s->int_polarity, write);
+}
+
+int kvm_pic_get(void *opaque)
+{
+ kvm_pch_pic_access(opaque, false);
+ return 0;
+}
+
+int kvm_pic_put(void *opaque, int version_id)
+{
+ kvm_pch_pic_access(opaque, true);
+ return 0;
+}
+
+void kvm_pic_realize(DeviceState *dev, Error **errp)
+{
+ LoongarchPICState *lps = LOONGARCH_PIC(dev);
+ uint64_t pch_pic_base = VIRT_PCH_REG_BASE;
+ int ret;
+
+ ret = kvm_create_device(kvm_state, KVM_DEV_TYPE_LOONGARCH_PCHPIC, false);
+ if (ret < 0) {
+ fprintf(stderr, "Create KVM_LOONGARCH_PCHPIC failed: %s\n",
+ strerror(-ret));
+ abort();
+ }
+
+ lps->dev_fd = ret;
+ ret = kvm_device_access(lps->dev_fd, KVM_DEV_LOONGARCH_PCH_PIC_GRP_CTRL,
+ KVM_DEV_LOONGARCH_PCH_PIC_CTRL_INIT,
+ &pch_pic_base, true, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "KVM_LOONGARCH_PCH_PIC_INIT failed: %s\n",
+ strerror(-ret));
+ abort();
+ }
+}
diff --git a/hw/intc/loongson_ipi_common.c b/hw/intc/loongson_ipi_common.c
index f32661c..8cd78d4 100644
--- a/hw/intc/loongson_ipi_common.c
+++ b/hw/intc/loongson_ipi_common.c
@@ -11,6 +11,7 @@
#include "hw/irq.h"
#include "qemu/log.h"
#include "migration/vmstate.h"
+#include "system/kvm.h"
#include "trace.h"
MemTxResult loongson_ipi_core_readl(void *opaque, hwaddr addr, uint64_t *data,
@@ -255,6 +256,10 @@ static void loongson_ipi_common_realize(DeviceState *dev, Error **errp)
LoongsonIPICommonState *s = LOONGSON_IPI_COMMON(dev);
SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+ if (kvm_irqchip_in_kernel()) {
+ return;
+ }
+
memory_region_init_io(&s->ipi_iocsr_mem, OBJECT(dev),
&loongson_ipi_iocsr_ops,
s, "loongson_ipi_iocsr", 0x48);
@@ -277,10 +282,38 @@ static void loongson_ipi_common_unrealize(DeviceState *dev)
g_free(s->cpu);
}
+static int loongson_ipi_common_pre_save(void *opaque)
+{
+ IPICore *ipicore = (IPICore *)opaque;
+ LoongsonIPICommonState *s = ipicore->ipi;
+ LoongsonIPICommonClass *licc = LOONGSON_IPI_COMMON_GET_CLASS(s);
+
+ if (licc->pre_save) {
+ return licc->pre_save(s);
+ }
+
+ return 0;
+}
+
+static int loongson_ipi_common_post_load(void *opaque, int version_id)
+{
+ IPICore *ipicore = (IPICore *)opaque;
+ LoongsonIPICommonState *s = ipicore->ipi;
+ LoongsonIPICommonClass *licc = LOONGSON_IPI_COMMON_GET_CLASS(s);
+
+ if (licc->post_load) {
+ return licc->post_load(s, version_id);
+ }
+
+ return 0;
+}
+
static const VMStateDescription vmstate_ipi_core = {
.name = "ipi-single",
.version_id = 2,
.minimum_version_id = 2,
+ .pre_save = loongson_ipi_common_pre_save,
+ .post_load = loongson_ipi_common_post_load,
.fields = (const VMStateField[]) {
VMSTATE_UINT32(status, IPICore),
VMSTATE_UINT32(en, IPICore),
diff --git a/hw/intc/meson.build b/hw/intc/meson.build
index 602da30..3137521 100644
--- a/hw/intc/meson.build
+++ b/hw/intc/meson.build
@@ -71,6 +71,12 @@ specific_ss.add(when: 'CONFIG_M68K_IRQC', if_true: files('m68k_irqc.c'))
specific_ss.add(when: 'CONFIG_LOONGSON_IPI_COMMON', if_true: files('loongson_ipi_common.c'))
specific_ss.add(when: 'CONFIG_LOONGSON_IPI', if_true: files('loongson_ipi.c'))
specific_ss.add(when: 'CONFIG_LOONGARCH_IPI', if_true: files('loongarch_ipi.c'))
+specific_ss.add(when: ['CONFIG_KVM', 'CONFIG_LOONGARCH_IPI'],
+ if_true: files('loongarch_ipi_kvm.c'))
specific_ss.add(when: 'CONFIG_LOONGARCH_PCH_PIC', if_true: files('loongarch_pch_pic.c', 'loongarch_pic_common.c'))
+specific_ss.add(when: ['CONFIG_KVM', 'CONFIG_LOONGARCH_PCH_PIC'],
+ if_true: files('loongarch_pic_kvm.c'))
specific_ss.add(when: 'CONFIG_LOONGARCH_PCH_MSI', if_true: files('loongarch_pch_msi.c'))
specific_ss.add(when: 'CONFIG_LOONGARCH_EXTIOI', if_true: files('loongarch_extioi.c', 'loongarch_extioi_common.c'))
+specific_ss.add(when: ['CONFIG_KVM', 'CONFIG_LOONGARCH_EXTIOI'],
+ if_true: files('loongarch_extioi_kvm.c'))
diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c
index b0139f0..4623cfa0 100644
--- a/hw/intc/riscv_aclint.c
+++ b/hw/intc/riscv_aclint.c
@@ -28,6 +28,7 @@
#include "qemu/module.h"
#include "hw/sysbus.h"
#include "target/riscv/cpu.h"
+#include "target/riscv/time_helper.h"
#include "hw/qdev-properties.h"
#include "hw/intc/riscv_aclint.h"
#include "qemu/timer.h"
@@ -240,6 +241,10 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr,
riscv_aclint_mtimer_write_timecmp(mtimer, RISCV_CPU(cpu),
mtimer->hartid_base + i,
mtimer->timecmp[i]);
+ riscv_timer_write_timecmp(env, env->stimer, env->stimecmp, 0, MIP_STIP);
+ riscv_timer_write_timecmp(env, env->vstimer, env->vstimecmp,
+ env->htimedelta, MIP_VSTIP);
+
}
return;
}
diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c
index 8bcd9f4..4fa5f75 100644
--- a/hw/intc/riscv_aplic.c
+++ b/hw/intc/riscv_aplic.c
@@ -962,10 +962,18 @@ static const Property riscv_aplic_properties[] = {
DEFINE_PROP_BOOL("mmode", RISCVAPLICState, mmode, 0),
};
+static bool riscv_aplic_state_needed(void *opaque)
+{
+ RISCVAPLICState *aplic = opaque;
+
+ return riscv_use_emulated_aplic(aplic->msimode);
+}
+
static const VMStateDescription vmstate_riscv_aplic = {
.name = "riscv_aplic",
- .version_id = 2,
- .minimum_version_id = 2,
+ .version_id = 3,
+ .minimum_version_id = 3,
+ .needed = riscv_aplic_state_needed,
.fields = (const VMStateField[]) {
VMSTATE_UINT32(domaincfg, RISCVAPLICState),
VMSTATE_UINT32(mmsicfgaddr, RISCVAPLICState),
diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c
index 2169988..6174e1a 100644
--- a/hw/intc/riscv_imsic.c
+++ b/hw/intc/riscv_imsic.c
@@ -398,10 +398,16 @@ static const Property riscv_imsic_properties[] = {
DEFINE_PROP_UINT32("num-irqs", RISCVIMSICState, num_irqs, 0),
};
+static bool riscv_imsic_state_needed(void *opaque)
+{
+ return !kvm_irqchip_in_kernel();
+}
+
static const VMStateDescription vmstate_riscv_imsic = {
.name = "riscv_imsic",
- .version_id = 1,
- .minimum_version_id = 1,
+ .version_id = 2,
+ .minimum_version_id = 2,
+ .needed = riscv_imsic_state_needed,
.fields = (const VMStateField[]) {
VMSTATE_VARRAY_UINT32(eidelivery, RISCVIMSICState,
num_pages, 0,
diff --git a/hw/loongarch/boot.c b/hw/loongarch/boot.c
index 9b6292e..14d6c52 100644
--- a/hw/loongarch/boot.c
+++ b/hw/loongarch/boot.c
@@ -35,12 +35,6 @@ struct loongarch_linux_hdr {
uint32_t pe_header_offset;
} QEMU_PACKED;
-struct memmap_entry *memmap_table;
-unsigned memmap_entries;
-
-ram_addr_t initrd_offset;
-uint64_t initrd_size;
-
static const unsigned int slave_boot_code[] = {
/* Configure reset ebase. */
0x0400302c, /* csrwr $t0, LOONGARCH_CSR_EENTRY */
@@ -94,12 +88,16 @@ static inline void *guidcpy(void *dst, const void *src)
return memcpy(dst, src, sizeof(efi_guid_t));
}
-static void init_efi_boot_memmap(struct efi_system_table *systab,
+static void init_efi_boot_memmap(MachineState *ms,
+ struct efi_system_table *systab,
void *p, void *start)
{
unsigned i;
struct efi_boot_memmap *boot_memmap = p;
efi_guid_t tbl_guid = LINUX_EFI_BOOT_MEMMAP_GUID;
+ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(ms);
+ struct memmap_entry *memmap_table;
+ unsigned int memmap_entries;
/* efi_configuration_table 1 */
guidcpy(&systab->tables[0].guid, &tbl_guid);
@@ -111,6 +109,8 @@ static void init_efi_boot_memmap(struct efi_system_table *systab,
boot_memmap->map_size = 0;
efi_memory_desc_t *map = p + sizeof(struct efi_boot_memmap);
+ memmap_table = lvms->memmap_table;
+ memmap_entries = lvms->memmap_entries;
for (i = 0; i < memmap_entries; i++) {
map = (void *)boot_memmap + sizeof(*map);
map[i].type = memmap_table[i].type;
@@ -121,7 +121,8 @@ static void init_efi_boot_memmap(struct efi_system_table *systab,
}
}
-static void init_efi_initrd_table(struct efi_system_table *systab,
+static void init_efi_initrd_table(struct loongarch_boot_info *info,
+ struct efi_system_table *systab,
void *p, void *start)
{
efi_guid_t tbl_guid = LINUX_EFI_INITRD_MEDIA_GUID;
@@ -132,8 +133,8 @@ static void init_efi_initrd_table(struct efi_system_table *systab,
systab->tables[1].table = (struct efi_configuration_table *)(p - start);
systab->nr_tables = 2;
- initrd_table->base = initrd_offset;
- initrd_table->size = initrd_size;
+ initrd_table->base = info->initrd_addr;
+ initrd_table->size = info->initrd_size;
}
static void init_efi_fdt_table(struct efi_system_table *systab)
@@ -146,10 +147,12 @@ static void init_efi_fdt_table(struct efi_system_table *systab)
systab->nr_tables = 3;
}
-static void init_systab(struct loongarch_boot_info *info, void *p, void *start)
+static void init_systab(MachineState *ms,
+ struct loongarch_boot_info *info, void *p, void *start)
{
void *bp_tables_start;
struct efi_system_table *systab = p;
+ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(ms);
info->a2 = p - start;
@@ -166,10 +169,10 @@ static void init_systab(struct loongarch_boot_info *info, void *p, void *start)
systab->tables = p;
bp_tables_start = p;
- init_efi_boot_memmap(systab, p, start);
+ init_efi_boot_memmap(ms, systab, p, start);
p += ROUND_UP(sizeof(struct efi_boot_memmap) +
- sizeof(efi_memory_desc_t) * memmap_entries, 64 * KiB);
- init_efi_initrd_table(systab, p, start);
+ sizeof(efi_memory_desc_t) * lvms->memmap_entries, 64 * KiB);
+ init_efi_initrd_table(info, systab, p, start);
p += ROUND_UP(sizeof(struct efi_initrd), 64 * KiB);
init_efi_fdt_table(systab);
@@ -276,8 +279,8 @@ static ram_addr_t alloc_initrd_memory(struct loongarch_boot_info *info,
static int64_t load_kernel_info(struct loongarch_boot_info *info)
{
- uint64_t kernel_entry, kernel_low, kernel_high;
- ssize_t kernel_size;
+ uint64_t kernel_entry, kernel_low, kernel_high, initrd_offset = 0;
+ ssize_t kernel_size, initrd_size;
kernel_size = load_elf(info->kernel_filename, NULL,
cpu_loongarch_virt_to_phys, NULL,
@@ -313,8 +316,9 @@ static int64_t load_kernel_info(struct loongarch_boot_info *info)
info->initrd_filename);
exit(1);
}
- } else {
- initrd_size = 0;
+
+ info->initrd_addr = initrd_offset;
+ info->initrd_size = initrd_size;
}
return kernel_entry;
@@ -369,17 +373,19 @@ static void loongarch_firmware_boot(LoongArchVirtMachineState *lvms,
fw_cfg_add_kernel_info(info, lvms->fw_cfg);
}
-static void init_boot_rom(struct loongarch_boot_info *info, void *p)
+static void init_boot_rom(MachineState *ms,
+ struct loongarch_boot_info *info, void *p)
{
void *start = p;
init_cmdline(info, p, start);
p += COMMAND_LINE_SIZE;
- init_systab(info, p, start);
+ init_systab(ms, info, p, start);
}
-static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info)
+static void loongarch_direct_kernel_boot(MachineState *ms,
+ struct loongarch_boot_info *info)
{
void *p, *bp;
int64_t kernel_addr = VIRT_FLASH0_BASE;
@@ -397,7 +403,7 @@ static void loongarch_direct_kernel_boot(struct loongarch_boot_info *info)
/* Load cmdline and system tables at [0 - 1 MiB] */
p = g_malloc0(1 * MiB);
bp = p;
- init_boot_rom(info, p);
+ init_boot_rom(ms, info, p);
rom_add_blob_fixed_as("boot_info", bp, 1 * MiB, 0, &address_space_memory);
/* Load slave boot code at pflash0 . */
@@ -437,6 +443,6 @@ void loongarch_load_kernel(MachineState *ms, struct loongarch_boot_info *info)
if (lvms->bios_loaded) {
loongarch_firmware_boot(lvms, info);
} else {
- loongarch_direct_kernel_boot(info);
+ loongarch_direct_kernel_boot(ms, info);
}
}
diff --git a/hw/loongarch/virt-acpi-build.c b/hw/loongarch/virt-acpi-build.c
index 073b6de..2cd2d9d 100644
--- a/hw/loongarch/virt-acpi-build.c
+++ b/hw/loongarch/virt-acpi-build.c
@@ -575,8 +575,8 @@ static void acpi_build(AcpiBuildTables *tables, MachineState *machine)
acpi_add_table(table_offsets, tables_blob);
{
AcpiMcfgInfo mcfg = {
- .base = cpu_to_le64(VIRT_PCI_CFG_BASE),
- .size = cpu_to_le64(VIRT_PCI_CFG_SIZE),
+ .base = VIRT_PCI_CFG_BASE,
+ .size = VIRT_PCI_CFG_SIZE,
};
build_mcfg(tables_blob, tables->linker, &mcfg, lvms->oem_id,
lvms->oem_table_id);
diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index 1b50404..b15ada2 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -136,6 +136,10 @@ static void virt_build_smbios(LoongArchVirtMachineState *lvms)
return;
}
+ if (kvm_enabled()) {
+ product = "KVM Virtual Machine";
+ }
+
smbios_set_defaults("QEMU", product, mc->name);
smbios_get_tables(ms, SMBIOS_ENTRY_POINT_TYPE_64,
@@ -168,8 +172,15 @@ static void virt_powerdown_req(Notifier *notifier, void *opaque)
acpi_send_event(s->acpi_ged, ACPI_POWER_DOWN_STATUS);
}
-static void memmap_add_entry(uint64_t address, uint64_t length, uint32_t type)
+static void memmap_add_entry(MachineState *ms, uint64_t address,
+ uint64_t length, uint32_t type)
{
+ LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(ms);
+ struct memmap_entry *memmap_table;
+ unsigned int memmap_entries;
+
+ memmap_table = lvms->memmap_table;
+ memmap_entries = lvms->memmap_entries;
/* Ensure there are no duplicate entries. */
for (unsigned i = 0; i < memmap_entries; i++) {
assert(memmap_table[i].address != address);
@@ -182,6 +193,8 @@ static void memmap_add_entry(uint64_t address, uint64_t length, uint32_t type)
memmap_table[memmap_entries].type = cpu_to_le32(type);
memmap_table[memmap_entries].reserved = 0;
memmap_entries++;
+ lvms->memmap_table = memmap_table;
+ lvms->memmap_entries = memmap_entries;
}
static DeviceState *create_acpi_ged(DeviceState *pch_pic,
@@ -401,12 +414,6 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms)
lvms->ipi = ipi;
sysbus_realize_and_unref(SYS_BUS_DEVICE(ipi), &error_fatal);
- /* IPI iocsr memory region */
- memory_region_add_subregion(&lvms->system_iocsr, SMP_IPI_MAILBOX,
- sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 0));
- memory_region_add_subregion(&lvms->system_iocsr, MAIL_SEND_ADDR,
- sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 1));
-
/* Create EXTIOI device */
extioi = qdev_new(TYPE_LOONGARCH_EXTIOI);
lvms->extioi = extioi;
@@ -414,12 +421,6 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms)
qdev_prop_set_bit(extioi, "has-virtualization-extension", true);
}
sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), &error_fatal);
- memory_region_add_subregion(&lvms->system_iocsr, APIC_BASE,
- sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 0));
- if (virt_is_veiointc_enabled(lvms)) {
- memory_region_add_subregion(&lvms->system_iocsr, EXTIOI_VIRT_BASE,
- sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 1));
- }
virt_cpu_irq_init(lvms);
pch_pic = qdev_new(TYPE_LOONGARCH_PIC);
@@ -427,13 +428,6 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms)
qdev_prop_set_uint32(pch_pic, "pch_pic_irq_num", num);
d = SYS_BUS_DEVICE(pch_pic);
sysbus_realize_and_unref(d, &error_fatal);
- memory_region_add_subregion(get_system_memory(), VIRT_IOAPIC_REG_BASE,
- sysbus_mmio_get_region(d, 0));
-
- /* Connect pch_pic irqs to extioi */
- for (i = 0; i < num; i++) {
- qdev_connect_gpio_out(DEVICE(d), i, qdev_get_gpio_in(extioi, i));
- }
pch_msi = qdev_new(TYPE_LOONGARCH_PCH_MSI);
start = num;
@@ -443,12 +437,40 @@ static void virt_irq_init(LoongArchVirtMachineState *lvms)
d = SYS_BUS_DEVICE(pch_msi);
sysbus_realize_and_unref(d, &error_fatal);
sysbus_mmio_map(d, 0, VIRT_PCH_MSI_ADDR_LOW);
- for (i = 0; i < num; i++) {
- /* Connect pch_msi irqs to extioi */
- qdev_connect_gpio_out(DEVICE(d), i,
- qdev_get_gpio_in(extioi, i + start));
- }
+ if (kvm_irqchip_in_kernel()) {
+ kvm_loongarch_init_irq_routing();
+ } else {
+ /* IPI iocsr memory region */
+ memory_region_add_subregion(&lvms->system_iocsr, SMP_IPI_MAILBOX,
+ sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 0));
+ memory_region_add_subregion(&lvms->system_iocsr, MAIL_SEND_ADDR,
+ sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 1));
+
+ /* EXTIOI iocsr memory region */
+ memory_region_add_subregion(&lvms->system_iocsr, APIC_BASE,
+ sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 0));
+ if (virt_is_veiointc_enabled(lvms)) {
+ memory_region_add_subregion(&lvms->system_iocsr, EXTIOI_VIRT_BASE,
+ sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 1));
+ }
+
+ /* PCH_PIC memory region */
+ memory_region_add_subregion(get_system_memory(), VIRT_IOAPIC_REG_BASE,
+ sysbus_mmio_get_region(SYS_BUS_DEVICE(pch_pic), 0));
+
+ /* Connect pch_pic irqs to extioi */
+ for (i = 0; i < VIRT_PCH_PIC_IRQ_NUM; i++) {
+ qdev_connect_gpio_out(DEVICE(pch_pic), i,
+ qdev_get_gpio_in(extioi, i));
+ }
+
+ for (i = VIRT_PCH_PIC_IRQ_NUM; i < EXTIOI_IRQS; i++) {
+ /* Connect pch_msi irqs to extioi */
+ qdev_connect_gpio_out(DEVICE(pch_msi), i - VIRT_PCH_PIC_IRQ_NUM,
+ qdev_get_gpio_in(extioi, i));
+ }
+ }
virt_devices_init(pch_pic, lvms);
}
@@ -509,6 +531,10 @@ static MemTxResult virt_iocsr_misc_write(void *opaque, hwaddr addr,
switch (addr) {
case MISC_FUNC_REG:
+ if (kvm_irqchip_in_kernel()) {
+ return MEMTX_OK;
+ }
+
if (!virt_is_veiointc_enabled(lvms)) {
return MEMTX_OK;
}
@@ -559,6 +585,10 @@ static MemTxResult virt_iocsr_misc_read(void *opaque, hwaddr addr,
ret = 0x303030354133ULL; /* "3A5000" */
break;
case MISC_FUNC_REG:
+ if (kvm_irqchip_in_kernel()) {
+ return MEMTX_OK;
+ }
+
if (!virt_is_veiointc_enabled(lvms)) {
ret |= BIT_ULL(IOCSRM_EXTIOI_EN);
break;
@@ -619,13 +649,13 @@ static void fw_cfg_add_memory(MachineState *ms)
}
if (size >= gap) {
- memmap_add_entry(base, gap, 1);
+ memmap_add_entry(ms, base, gap, 1);
size -= gap;
base = VIRT_HIGHMEM_BASE;
}
if (size) {
- memmap_add_entry(base, size, 1);
+ memmap_add_entry(ms, base, size, 1);
base += size;
}
@@ -640,7 +670,7 @@ static void fw_cfg_add_memory(MachineState *ms)
* lowram: [base, +(gap - numa_info[0].node_mem))
* highram: [VIRT_HIGHMEM_BASE, +(ram_size - gap))
*/
- memmap_add_entry(base, gap - numa_info[0].node_mem, 1);
+ memmap_add_entry(ms, base, gap - numa_info[0].node_mem, 1);
size = ram_size - gap;
base = VIRT_HIGHMEM_BASE;
} else {
@@ -648,7 +678,7 @@ static void fw_cfg_add_memory(MachineState *ms)
}
if (size) {
- memmap_add_entry(base, size, 1);
+ memmap_add_entry(ms, base, size, 1);
}
}
@@ -734,8 +764,8 @@ static void virt_init(MachineState *machine)
rom_set_fw(lvms->fw_cfg);
if (lvms->fw_cfg != NULL) {
fw_cfg_add_file(lvms->fw_cfg, "etc/memmap",
- memmap_table,
- sizeof(struct memmap_entry) * (memmap_entries));
+ lvms->memmap_table,
+ sizeof(struct memmap_entry) * lvms->memmap_entries);
}
/* Initialize the IO interrupt subsystem */
diff --git a/hw/meson.build b/hw/meson.build
index b91f761..791ce21 100644
--- a/hw/meson.build
+++ b/hw/meson.build
@@ -39,6 +39,7 @@ subdir('uefi')
subdir('ufs')
subdir('usb')
subdir('vfio')
+subdir('vfio-user')
subdir('virtio')
subdir('vmapple')
subdir('watchdog')
diff --git a/hw/microblaze/petalogix_ml605_mmu.c b/hw/microblaze/petalogix_ml605_mmu.c
index bea6b68..6e923c4 100644
--- a/hw/microblaze/petalogix_ml605_mmu.c
+++ b/hw/microblaze/petalogix_ml605_mmu.c
@@ -80,8 +80,6 @@ petalogix_ml605_init(MachineState *machine)
MemoryRegion *phys_lmb_bram = g_new(MemoryRegion, 1);
MemoryRegion *phys_ram = g_new(MemoryRegion, 1);
qemu_irq irq[32];
- EndianMode endianness = TARGET_BIG_ENDIAN ? ENDIAN_MODE_BIG
- : ENDIAN_MODE_LITTLE;
/* init CPUs */
cpu = MICROBLAZE_CPU(object_new(TYPE_MICROBLAZE_CPU));
@@ -113,7 +111,7 @@ petalogix_ml605_init(MachineState *machine)
dev = qdev_new("xlnx.xps-intc");
- qdev_prop_set_enum(dev, "endianness", endianness);
+ qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
qdev_prop_set_uint32(dev, "kind-of-intr", 1 << TIMER_IRQ);
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, INTC_BASEADDR);
@@ -129,7 +127,7 @@ petalogix_ml605_init(MachineState *machine)
/* 2 timers at irq 2 @ 100 Mhz. */
dev = qdev_new("xlnx.xps-timer");
- qdev_prop_set_enum(dev, "endianness", endianness);
+ qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
qdev_prop_set_uint32(dev, "one-timer-only", 0);
qdev_prop_set_uint32(dev, "clock-frequency", 100 * 1000000);
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
@@ -177,7 +175,7 @@ petalogix_ml605_init(MachineState *machine)
SSIBus *spi;
dev = qdev_new("xlnx.xps-spi");
- qdev_prop_set_enum(dev, "endianness", endianness);
+ qdev_prop_set_enum(dev, "endianness", ENDIAN_MODE_LITTLE);
qdev_prop_set_uint8(dev, "num-ss-bits", NUM_SPI_FLASHES);
busdev = SYS_BUS_DEVICE(dev);
sysbus_realize_and_unref(busdev, &error_fatal);
@@ -218,12 +216,7 @@ petalogix_ml605_init(MachineState *machine)
static void petalogix_ml605_machine_init(MachineClass *mc)
{
- if (TARGET_BIG_ENDIAN) {
- mc->desc = "PetaLogix linux refdesign for xilinx ml605 (big endian)";
- mc->deprecation_reason = "big endian support is not tested";
- } else {
- mc->desc = "PetaLogix linux refdesign for xilinx ml605 (little endian)";
- }
+ mc->desc = "PetaLogix linux refdesign for xilinx ml605 (little endian)";
mc->init = petalogix_ml605_init;
}
diff --git a/hw/microblaze/petalogix_s3adsp1800_mmu.c b/hw/microblaze/petalogix_s3adsp1800_mmu.c
index 032f6f7..e8d0ddf 100644
--- a/hw/microblaze/petalogix_s3adsp1800_mmu.c
+++ b/hw/microblaze/petalogix_s3adsp1800_mmu.c
@@ -58,9 +58,20 @@
#define TYPE_PETALOGIX_S3ADSP1800_MACHINE \
MACHINE_TYPE_NAME("petalogix-s3adsp1800")
+struct S3Adsp1800MachineState {
+ MachineState parent_class;
+
+ EndianMode endianness;
+};
+
+OBJECT_DECLARE_TYPE(S3Adsp1800MachineState, MachineClass,
+ PETALOGIX_S3ADSP1800_MACHINE)
+
+
static void
petalogix_s3adsp1800_init(MachineState *machine)
{
+ S3Adsp1800MachineState *psms = PETALOGIX_S3ADSP1800_MACHINE(machine);
ram_addr_t ram_size = machine->ram_size;
DeviceState *dev;
MicroBlazeCPU *cpu;
@@ -71,13 +82,12 @@ petalogix_s3adsp1800_init(MachineState *machine)
MemoryRegion *phys_ram = g_new(MemoryRegion, 1);
qemu_irq irq[32];
MemoryRegion *sysmem = get_system_memory();
- EndianMode endianness = TARGET_BIG_ENDIAN ? ENDIAN_MODE_BIG
- : ENDIAN_MODE_LITTLE;
+ EndianMode endianness = psms->endianness;
cpu = MICROBLAZE_CPU(object_new(TYPE_MICROBLAZE_CPU));
object_property_set_str(OBJECT(cpu), "version", "7.10.d", &error_abort);
object_property_set_bool(OBJECT(cpu), "little-endian",
- !TARGET_BIG_ENDIAN, &error_abort);
+ endianness == ENDIAN_MODE_LITTLE, &error_abort);
qdev_realize(DEVICE(cpu), NULL, &error_abort);
/* Attach emulated BRAM through the LMB. */
@@ -135,20 +145,41 @@ petalogix_s3adsp1800_init(MachineState *machine)
create_unimplemented_device("xps_gpio", GPIO_BASEADDR, 0x10000);
- microblaze_load_kernel(cpu, !TARGET_BIG_ENDIAN, ddr_base, ram_size,
- machine->initrd_filename,
+ microblaze_load_kernel(cpu, endianness == ENDIAN_MODE_LITTLE, ddr_base,
+ ram_size, machine->initrd_filename,
BINARY_DEVICE_TREE_FILE,
NULL);
}
+static int machine_get_endianness(Object *obj, Error **errp G_GNUC_UNUSED)
+{
+ S3Adsp1800MachineState *ms = PETALOGIX_S3ADSP1800_MACHINE(obj);
+ return ms->endianness;
+}
+
+static void machine_set_endianness(Object *obj, int endianness, Error **errp)
+{
+ S3Adsp1800MachineState *ms = PETALOGIX_S3ADSP1800_MACHINE(obj);
+ ms->endianness = endianness;
+}
+
static void petalogix_s3adsp1800_machine_class_init(ObjectClass *oc,
const void *data)
{
MachineClass *mc = MACHINE_CLASS(oc);
+ ObjectProperty *prop;
mc->desc = "PetaLogix linux refdesign for xilinx Spartan 3ADSP1800";
mc->init = petalogix_s3adsp1800_init;
mc->is_default = true;
+
+ prop = object_class_property_add_enum(oc, "endianness", "EndianMode",
+ &EndianMode_lookup,
+ machine_get_endianness,
+ machine_set_endianness);
+ object_property_set_default_str(prop, TARGET_BIG_ENDIAN ? "big" : "little");
+ object_class_property_set_description(oc, "endianness",
+ "Defines whether the machine runs in big or little endian mode");
}
static const TypeInfo petalogix_s3adsp1800_machine_types[] = {
@@ -156,6 +187,7 @@ static const TypeInfo petalogix_s3adsp1800_machine_types[] = {
.name = TYPE_PETALOGIX_S3ADSP1800_MACHINE,
.parent = TYPE_MACHINE,
.class_init = petalogix_s3adsp1800_machine_class_init,
+ .instance_size = sizeof(S3Adsp1800MachineState),
},
};
diff --git a/hw/microblaze/xlnx-zynqmp-pmu.c b/hw/microblaze/xlnx-zynqmp-pmu.c
index ed40b5f..e909802 100644
--- a/hw/microblaze/xlnx-zynqmp-pmu.c
+++ b/hw/microblaze/xlnx-zynqmp-pmu.c
@@ -181,12 +181,7 @@ static void xlnx_zynqmp_pmu_init(MachineState *machine)
static void xlnx_zynqmp_pmu_machine_init(MachineClass *mc)
{
- if (TARGET_BIG_ENDIAN) {
- mc->desc = "Xilinx ZynqMP PMU machine (big endian)";
- mc->deprecation_reason = "big endian support is not tested";
- } else {
- mc->desc = "Xilinx ZynqMP PMU machine (little endian)";
- }
+ mc->desc = "Xilinx ZynqMP PMU machine (little endian)";
mc->init = xlnx_zynqmp_pmu_init;
}
diff --git a/hw/misc/aspeed_hace.c b/hw/misc/aspeed_hace.c
index f4bff32..726368f 100644
--- a/hw/misc/aspeed_hace.c
+++ b/hw/misc/aspeed_hace.c
@@ -10,14 +10,17 @@
*/
#include "qemu/osdep.h"
+#include "qemu/cutils.h"
#include "qemu/log.h"
#include "qemu/error-report.h"
+#include "qemu/iov.h"
#include "hw/misc/aspeed_hace.h"
#include "qapi/error.h"
#include "migration/vmstate.h"
#include "crypto/hash.h"
#include "hw/qdev-properties.h"
#include "hw/irq.h"
+#include "trace.h"
#define R_CRYPT_CMD (0x10 / 4)
@@ -27,9 +30,12 @@
#define TAG_IRQ BIT(15)
#define R_HASH_SRC (0x20 / 4)
-#define R_HASH_DEST (0x24 / 4)
+#define R_HASH_DIGEST (0x24 / 4)
#define R_HASH_KEY_BUFF (0x28 / 4)
#define R_HASH_SRC_LEN (0x2c / 4)
+#define R_HASH_SRC_HI (0x90 / 4)
+#define R_HASH_DIGEST_HI (0x94 / 4)
+#define R_HASH_KEY_BUFF_HI (0x98 / 4)
#define R_HASH_CMD (0x30 / 4)
/* Hash algorithm selection */
@@ -84,6 +90,42 @@ static const struct {
QCRYPTO_HASH_ALGO_SHA256 },
};
+static void hace_hexdump(const char *desc, const char *buf, size_t size)
+{
+ g_autoptr(GString) str = g_string_sized_new(64);
+ size_t len;
+ size_t i;
+
+ for (i = 0; i < size; i += len) {
+ len = MIN(16, size - i);
+ g_string_truncate(str, 0);
+ qemu_hexdump_line(str, buf + i, len, 1, 4);
+ trace_aspeed_hace_hexdump(desc, i, str->str);
+ }
+}
+
+static void hace_iov_hexdump(const char *desc, const struct iovec *iov,
+ const unsigned int iov_cnt)
+{
+ size_t size = 0;
+ char *buf;
+ int i;
+
+ for (i = 0; i < iov_cnt; i++) {
+ size += iov[i].iov_len;
+ }
+
+ buf = g_malloc(size);
+
+ if (!buf) {
+ return;
+ }
+
+ iov_to_buf(iov, iov_cnt, 0, buf, size);
+ hace_hexdump(desc, buf, size);
+ g_free(buf);
+}
+
static int hash_algo_lookup(uint32_t reg)
{
int i;
@@ -142,171 +184,269 @@ static bool has_padding(AspeedHACEState *s, struct iovec *iov,
return false;
}
-static int reconstruct_iov(AspeedHACEState *s, struct iovec *iov, int id,
- uint32_t *pad_offset)
+static uint64_t hash_get_source_addr(AspeedHACEState *s)
{
- int i, iov_count;
- if (*pad_offset != 0) {
- s->iov_cache[s->iov_count].iov_base = iov[id].iov_base;
- s->iov_cache[s->iov_count].iov_len = *pad_offset;
- ++s->iov_count;
- }
- for (i = 0; i < s->iov_count; i++) {
- iov[i].iov_base = s->iov_cache[i].iov_base;
- iov[i].iov_len = s->iov_cache[i].iov_len;
+ AspeedHACEClass *ahc = ASPEED_HACE_GET_CLASS(s);
+ uint64_t src_addr = 0;
+
+ src_addr = deposit64(src_addr, 0, 32, s->regs[R_HASH_SRC]);
+ if (ahc->has_dma64) {
+ src_addr = deposit64(src_addr, 32, 32, s->regs[R_HASH_SRC_HI]);
}
- iov_count = s->iov_count;
- s->iov_count = 0;
- s->total_req_len = 0;
- return iov_count;
+
+ return src_addr;
}
-static void do_hash_operation(AspeedHACEState *s, int algo, bool sg_mode,
- bool acc_mode)
+static int hash_prepare_direct_iov(AspeedHACEState *s, struct iovec *iov,
+ bool acc_mode, bool *acc_final_request)
{
- struct iovec iov[ASPEED_HACE_MAX_SG];
uint32_t total_msg_len;
uint32_t pad_offset;
- g_autofree uint8_t *digest_buf = NULL;
- size_t digest_len = 0;
- bool sg_acc_mode_final_request = false;
- int i;
+ uint64_t src;
void *haddr;
- Error *local_err = NULL;
+ hwaddr plen;
+ int iov_idx;
+
+ plen = s->regs[R_HASH_SRC_LEN];
+ src = hash_get_source_addr(s);
+ trace_aspeed_hace_hash_addr("src", src);
+ haddr = address_space_map(&s->dram_as, src, &plen, false,
+ MEMTXATTRS_UNSPECIFIED);
+ if (haddr == NULL) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Unable to map address, addr=0x%" HWADDR_PRIx
+ " ,plen=0x%" HWADDR_PRIx "\n",
+ __func__, src, plen);
+ return -1;
+ }
- if (acc_mode && s->hash_ctx == NULL) {
- s->hash_ctx = qcrypto_hash_new(algo, &local_err);
- if (s->hash_ctx == NULL) {
- qemu_log_mask(LOG_GUEST_ERROR, "qcrypto hash failed : %s",
- error_get_pretty(local_err));
- error_free(local_err);
- return;
+ iov[0].iov_base = haddr;
+ iov_idx = 1;
+
+ if (acc_mode) {
+ s->total_req_len += plen;
+
+ if (has_padding(s, &iov[0], plen, &total_msg_len,
+ &pad_offset)) {
+ /* Padding being present indicates the final request */
+ *acc_final_request = true;
+ iov[0].iov_len = pad_offset;
+ } else {
+ iov[0].iov_len = plen;
}
+ } else {
+ iov[0].iov_len = plen;
}
- if (sg_mode) {
- uint32_t len = 0;
-
- for (i = 0; !(len & SG_LIST_LEN_LAST); i++) {
- uint32_t addr, src;
- hwaddr plen;
+ return iov_idx;
+}
- if (i == ASPEED_HACE_MAX_SG) {
- qemu_log_mask(LOG_GUEST_ERROR,
- "aspeed_hace: guest failed to set end of sg list marker\n");
- break;
- }
+static int hash_prepare_sg_iov(AspeedHACEState *s, struct iovec *iov,
+ bool acc_mode, bool *acc_final_request)
+{
+ uint32_t total_msg_len;
+ uint32_t pad_offset;
+ uint32_t len = 0;
+ uint32_t sg_addr;
+ uint64_t src;
+ int iov_idx;
+ hwaddr plen;
+ void *haddr;
- src = s->regs[R_HASH_SRC] + (i * SG_LIST_ENTRY_SIZE);
+ src = hash_get_source_addr(s);
+ for (iov_idx = 0; !(len & SG_LIST_LEN_LAST); iov_idx++) {
+ if (iov_idx == ASPEED_HACE_MAX_SG) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Failed to set end of sg list marker\n",
+ __func__);
+ return -1;
+ }
- len = address_space_ldl_le(&s->dram_as, src,
+ len = address_space_ldl_le(&s->dram_as, src,
+ MEMTXATTRS_UNSPECIFIED, NULL);
+ sg_addr = address_space_ldl_le(&s->dram_as, src + SG_LIST_LEN_SIZE,
MEMTXATTRS_UNSPECIFIED, NULL);
+ sg_addr &= SG_LIST_ADDR_MASK;
+ trace_aspeed_hace_hash_sg(iov_idx, src, sg_addr, len);
+ /*
+ * To maintain compatibility with older SoCs such as the AST2600,
+ * the AST2700 HW automatically set bit 34 of the 64-bit sg_addr.
+ * As a result, the firmware only needs to provide a 32-bit sg_addr
+ * containing bits [31:0]. This is sufficient for the AST2700, as
+ * it uses a DRAM offset rather than a DRAM address.
+ */
+ plen = len & SG_LIST_LEN_MASK;
+ haddr = address_space_map(&s->dram_as, sg_addr, &plen, false,
+ MEMTXATTRS_UNSPECIFIED);
- addr = address_space_ldl_le(&s->dram_as, src + SG_LIST_LEN_SIZE,
- MEMTXATTRS_UNSPECIFIED, NULL);
- addr &= SG_LIST_ADDR_MASK;
+ if (haddr == NULL) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Unable to map address, sg_addr=0x%x, "
+ "plen=0x%" HWADDR_PRIx "\n",
+ __func__, sg_addr, plen);
+ return -1;
+ }
- plen = len & SG_LIST_LEN_MASK;
- haddr = address_space_map(&s->dram_as, addr, &plen, false,
- MEMTXATTRS_UNSPECIFIED);
- if (haddr == NULL) {
- qemu_log_mask(LOG_GUEST_ERROR,
- "%s: qcrypto failed\n", __func__);
- return;
- }
- iov[i].iov_base = haddr;
- if (acc_mode) {
- s->total_req_len += plen;
-
- if (has_padding(s, &iov[i], plen, &total_msg_len,
- &pad_offset)) {
- /* Padding being present indicates the final request */
- sg_acc_mode_final_request = true;
- iov[i].iov_len = pad_offset;
- } else {
- iov[i].iov_len = plen;
- }
+ src += SG_LIST_ENTRY_SIZE;
+
+ iov[iov_idx].iov_base = haddr;
+ if (acc_mode) {
+ s->total_req_len += plen;
+
+ if (has_padding(s, &iov[iov_idx], plen, &total_msg_len,
+ &pad_offset)) {
+ /* Padding being present indicates the final request */
+ *acc_final_request = true;
+ iov[iov_idx].iov_len = pad_offset;
} else {
- iov[i].iov_len = plen;
+ iov[iov_idx].iov_len = plen;
}
+ } else {
+ iov[iov_idx].iov_len = plen;
}
- } else {
- hwaddr len = s->regs[R_HASH_SRC_LEN];
+ }
- haddr = address_space_map(&s->dram_as, s->regs[R_HASH_SRC],
- &len, false, MEMTXATTRS_UNSPECIFIED);
- if (haddr == NULL) {
- qemu_log_mask(LOG_GUEST_ERROR, "%s: qcrypto failed\n", __func__);
+ return iov_idx;
+}
+
+static uint64_t hash_get_digest_addr(AspeedHACEState *s)
+{
+ AspeedHACEClass *ahc = ASPEED_HACE_GET_CLASS(s);
+ uint64_t digest_addr = 0;
+
+ digest_addr = deposit64(digest_addr, 0, 32, s->regs[R_HASH_DIGEST]);
+ if (ahc->has_dma64) {
+ digest_addr = deposit64(digest_addr, 32, 32, s->regs[R_HASH_DIGEST_HI]);
+ }
+
+ return digest_addr;
+}
+
+static void hash_write_digest_and_unmap_iov(AspeedHACEState *s,
+ struct iovec *iov,
+ int iov_idx,
+ uint8_t *digest_buf,
+ size_t digest_len)
+{
+ uint64_t digest_addr = 0;
+
+ digest_addr = hash_get_digest_addr(s);
+ trace_aspeed_hace_hash_addr("digest", digest_addr);
+ if (address_space_write(&s->dram_as, digest_addr,
+ MEMTXATTRS_UNSPECIFIED,
+ digest_buf, digest_len)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Failed to write digest to 0x%" HWADDR_PRIx "\n",
+ __func__, digest_addr);
+ }
+
+ if (trace_event_get_state_backends(TRACE_ASPEED_HACE_HEXDUMP)) {
+ hace_hexdump("digest", (char *)digest_buf, digest_len);
+ }
+
+ for (; iov_idx > 0; iov_idx--) {
+ address_space_unmap(&s->dram_as, iov[iov_idx - 1].iov_base,
+ iov[iov_idx - 1].iov_len, false,
+ iov[iov_idx - 1].iov_len);
+ }
+}
+
+static void hash_execute_non_acc_mode(AspeedHACEState *s, int algo,
+ struct iovec *iov, int iov_idx)
+{
+ g_autofree uint8_t *digest_buf = NULL;
+ Error *local_err = NULL;
+ size_t digest_len = 0;
+
+ if (qcrypto_hash_bytesv(algo, iov, iov_idx, &digest_buf,
+ &digest_len, &local_err) < 0) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: qcrypto hash bytesv failed : %s",
+ __func__, error_get_pretty(local_err));
+ error_free(local_err);
+ return;
+ }
+
+ hash_write_digest_and_unmap_iov(s, iov, iov_idx, digest_buf, digest_len);
+}
+
+static void hash_execute_acc_mode(AspeedHACEState *s, int algo,
+ struct iovec *iov, int iov_idx,
+ bool final_request)
+{
+ g_autofree uint8_t *digest_buf = NULL;
+ Error *local_err = NULL;
+ size_t digest_len = 0;
+
+ trace_aspeed_hace_hash_execute_acc_mode(final_request);
+
+ if (s->hash_ctx == NULL) {
+ s->hash_ctx = qcrypto_hash_new(algo, &local_err);
+ if (s->hash_ctx == NULL) {
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: qcrypto hash new failed : %s",
+ __func__, error_get_pretty(local_err));
+ error_free(local_err);
return;
}
- iov[0].iov_base = haddr;
- iov[0].iov_len = len;
- i = 1;
-
- if (s->iov_count) {
- /*
- * In aspeed sdk kernel driver, sg_mode is disabled in hash_final().
- * Thus if we received a request with sg_mode disabled, it is
- * required to check whether cache is empty. If no, we should
- * combine cached iov and the current iov.
- */
- s->total_req_len += len;
- if (has_padding(s, iov, len, &total_msg_len, &pad_offset)) {
- i = reconstruct_iov(s, iov, 0, &pad_offset);
- }
- }
}
- if (acc_mode) {
- if (qcrypto_hash_updatev(s->hash_ctx, iov, i, &local_err) < 0) {
- qemu_log_mask(LOG_GUEST_ERROR, "qcrypto hash update failed : %s",
- error_get_pretty(local_err));
+ if (qcrypto_hash_updatev(s->hash_ctx, iov, iov_idx, &local_err) < 0) {
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: qcrypto hash updatev failed : %s",
+ __func__, error_get_pretty(local_err));
+ error_free(local_err);
+ return;
+ }
+
+ if (final_request) {
+ if (qcrypto_hash_finalize_bytes(s->hash_ctx, &digest_buf,
+ &digest_len, &local_err)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: qcrypto hash finalize bytes failed : %s",
+ __func__, error_get_pretty(local_err));
error_free(local_err);
- return;
+ local_err = NULL;
}
- if (sg_acc_mode_final_request) {
- if (qcrypto_hash_finalize_bytes(s->hash_ctx, &digest_buf,
- &digest_len, &local_err)) {
- qemu_log_mask(LOG_GUEST_ERROR,
- "qcrypto hash finalize failed : %s",
- error_get_pretty(local_err));
- error_free(local_err);
- local_err = NULL;
- }
+ qcrypto_hash_free(s->hash_ctx);
+
+ s->hash_ctx = NULL;
+ s->total_req_len = 0;
+ }
- qcrypto_hash_free(s->hash_ctx);
+ hash_write_digest_and_unmap_iov(s, iov, iov_idx, digest_buf, digest_len);
+}
- s->hash_ctx = NULL;
- s->iov_count = 0;
- s->total_req_len = 0;
- }
- } else if (qcrypto_hash_bytesv(algo, iov, i, &digest_buf,
- &digest_len, &local_err) < 0) {
- qemu_log_mask(LOG_GUEST_ERROR, "qcrypto hash bytesv failed : %s",
- error_get_pretty(local_err));
- error_free(local_err);
- return;
+static void do_hash_operation(AspeedHACEState *s, int algo, bool sg_mode,
+ bool acc_mode)
+{
+ QEMU_UNINITIALIZED struct iovec iov[ASPEED_HACE_MAX_SG];
+ bool acc_final_request = false;
+ int iov_idx = -1;
+
+ /* Prepares the iov for hashing operations based on the selected mode */
+ if (sg_mode) {
+ iov_idx = hash_prepare_sg_iov(s, iov, acc_mode, &acc_final_request);
+ } else {
+ iov_idx = hash_prepare_direct_iov(s, iov, acc_mode,
+ &acc_final_request);
}
- if (address_space_write(&s->dram_as, s->regs[R_HASH_DEST],
- MEMTXATTRS_UNSPECIFIED,
- digest_buf, digest_len)) {
+ if (iov_idx <= 0) {
qemu_log_mask(LOG_GUEST_ERROR,
- "aspeed_hace: address space write failed\n");
+ "%s: Failed to prepare iov\n", __func__);
+ return;
}
- for (; i > 0; i--) {
- address_space_unmap(&s->dram_as, iov[i - 1].iov_base,
- iov[i - 1].iov_len, false,
- iov[i - 1].iov_len);
+ if (trace_event_get_state_backends(TRACE_ASPEED_HACE_HEXDUMP)) {
+ hace_iov_hexdump("plaintext", iov, iov_idx);
}
- /*
- * Set status bits to indicate completion. Testing shows hardware sets
- * these irrespective of HASH_IRQ_EN.
- */
- s->regs[R_STATUS] |= HASH_IRQ;
+ /* Executes the hash operation */
+ if (acc_mode) {
+ hash_execute_acc_mode(s, algo, iov, iov_idx, acc_final_request);
+ } else {
+ hash_execute_non_acc_mode(s, algo, iov, iov_idx);
+ }
}
static uint64_t aspeed_hace_read(void *opaque, hwaddr addr, unsigned int size)
@@ -315,12 +455,7 @@ static uint64_t aspeed_hace_read(void *opaque, hwaddr addr, unsigned int size)
addr >>= 2;
- if (addr >= ASPEED_HACE_NR_REGS) {
- qemu_log_mask(LOG_GUEST_ERROR,
- "%s: Out-of-bounds read at offset 0x%" HWADDR_PRIx "\n",
- __func__, addr << 2);
- return 0;
- }
+ trace_aspeed_hace_read(addr << 2, s->regs[addr]);
return s->regs[addr];
}
@@ -333,12 +468,7 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data,
addr >>= 2;
- if (addr >= ASPEED_HACE_NR_REGS) {
- qemu_log_mask(LOG_GUEST_ERROR,
- "%s: Out-of-bounds write at offset 0x%" HWADDR_PRIx "\n",
- __func__, addr << 2);
- return;
- }
+ trace_aspeed_hace_write(addr << 2, data);
switch (addr) {
case R_STATUS:
@@ -362,7 +492,7 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data,
case R_HASH_SRC:
data &= ahc->src_mask;
break;
- case R_HASH_DEST:
+ case R_HASH_DIGEST:
data &= ahc->dest_mask;
break;
case R_HASH_KEY_BUFF:
@@ -390,10 +520,16 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data,
qemu_log_mask(LOG_GUEST_ERROR,
"%s: Invalid hash algorithm selection 0x%"PRIx64"\n",
__func__, data & ahc->hash_mask);
- break;
+ } else {
+ do_hash_operation(s, algo, data & HASH_SG_EN,
+ ((data & HASH_HMAC_MASK) == HASH_DIGEST_ACCUM));
}
- do_hash_operation(s, algo, data & HASH_SG_EN,
- ((data & HASH_HMAC_MASK) == HASH_DIGEST_ACCUM));
+
+ /*
+ * Set status bits to indicate completion. Testing shows hardware sets
+ * these irrespective of HASH_IRQ_EN.
+ */
+ s->regs[R_STATUS] |= HASH_IRQ;
if (data & HASH_IRQ_EN) {
qemu_irq_raise(s->irq);
@@ -410,6 +546,15 @@ static void aspeed_hace_write(void *opaque, hwaddr addr, uint64_t data,
}
}
break;
+ case R_HASH_SRC_HI:
+ data &= ahc->src_hi_mask;
+ break;
+ case R_HASH_DIGEST_HI:
+ data &= ahc->dest_hi_mask;
+ break;
+ case R_HASH_KEY_BUFF_HI:
+ data &= ahc->key_hi_mask;
+ break;
default:
break;
}
@@ -430,14 +575,14 @@ static const MemoryRegionOps aspeed_hace_ops = {
static void aspeed_hace_reset(DeviceState *dev)
{
struct AspeedHACEState *s = ASPEED_HACE(dev);
+ AspeedHACEClass *ahc = ASPEED_HACE_GET_CLASS(s);
if (s->hash_ctx != NULL) {
qcrypto_hash_free(s->hash_ctx);
s->hash_ctx = NULL;
}
- memset(s->regs, 0, sizeof(s->regs));
- s->iov_count = 0;
+ memset(s->regs, 0, ahc->nr_regs << 2);
s->total_req_len = 0;
}
@@ -445,11 +590,13 @@ static void aspeed_hace_realize(DeviceState *dev, Error **errp)
{
AspeedHACEState *s = ASPEED_HACE(dev);
SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+ AspeedHACEClass *ahc = ASPEED_HACE_GET_CLASS(s);
sysbus_init_irq(sbd, &s->irq);
+ s->regs = g_new(uint32_t, ahc->nr_regs);
memory_region_init_io(&s->iomem, OBJECT(s), &aspeed_hace_ops, s,
- TYPE_ASPEED_HACE, 0x1000);
+ TYPE_ASPEED_HACE, ahc->nr_regs << 2);
if (!s->dram_mr) {
error_setg(errp, TYPE_ASPEED_HACE ": 'dram' link not set");
@@ -469,21 +616,28 @@ static const Property aspeed_hace_properties[] = {
static const VMStateDescription vmstate_aspeed_hace = {
.name = TYPE_ASPEED_HACE,
- .version_id = 1,
- .minimum_version_id = 1,
+ .version_id = 2,
+ .minimum_version_id = 2,
.fields = (const VMStateField[]) {
- VMSTATE_UINT32_ARRAY(regs, AspeedHACEState, ASPEED_HACE_NR_REGS),
VMSTATE_UINT32(total_req_len, AspeedHACEState),
- VMSTATE_UINT32(iov_count, AspeedHACEState),
VMSTATE_END_OF_LIST(),
}
};
+static void aspeed_hace_unrealize(DeviceState *dev)
+{
+ AspeedHACEState *s = ASPEED_HACE(dev);
+
+ g_free(s->regs);
+ s->regs = NULL;
+}
+
static void aspeed_hace_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
dc->realize = aspeed_hace_realize;
+ dc->unrealize = aspeed_hace_unrealize;
device_class_set_legacy_reset(dc, aspeed_hace_reset);
device_class_set_props(dc, aspeed_hace_properties);
dc->vmsd = &vmstate_aspeed_hace;
@@ -504,6 +658,7 @@ static void aspeed_ast2400_hace_class_init(ObjectClass *klass, const void *data)
dc->desc = "AST2400 Hash and Crypto Engine";
+ ahc->nr_regs = 0x64 >> 2;
ahc->src_mask = 0x0FFFFFFF;
ahc->dest_mask = 0x0FFFFFF8;
ahc->key_mask = 0x0FFFFFC0;
@@ -523,6 +678,7 @@ static void aspeed_ast2500_hace_class_init(ObjectClass *klass, const void *data)
dc->desc = "AST2500 Hash and Crypto Engine";
+ ahc->nr_regs = 0x64 >> 2;
ahc->src_mask = 0x3fffffff;
ahc->dest_mask = 0x3ffffff8;
ahc->key_mask = 0x3FFFFFC0;
@@ -542,6 +698,7 @@ static void aspeed_ast2600_hace_class_init(ObjectClass *klass, const void *data)
dc->desc = "AST2600 Hash and Crypto Engine";
+ ahc->nr_regs = 0x64 >> 2;
ahc->src_mask = 0x7FFFFFFF;
ahc->dest_mask = 0x7FFFFFF8;
ahc->key_mask = 0x7FFFFFF8;
@@ -561,6 +718,7 @@ static void aspeed_ast1030_hace_class_init(ObjectClass *klass, const void *data)
dc->desc = "AST1030 Hash and Crypto Engine";
+ ahc->nr_regs = 0x64 >> 2;
ahc->src_mask = 0x7FFFFFFF;
ahc->dest_mask = 0x7FFFFFF8;
ahc->key_mask = 0x7FFFFFF8;
@@ -580,17 +738,36 @@ static void aspeed_ast2700_hace_class_init(ObjectClass *klass, const void *data)
dc->desc = "AST2700 Hash and Crypto Engine";
+ ahc->nr_regs = 0x9C >> 2;
ahc->src_mask = 0x7FFFFFFF;
ahc->dest_mask = 0x7FFFFFF8;
ahc->key_mask = 0x7FFFFFF8;
ahc->hash_mask = 0x00147FFF;
/*
+ * The AST2700 supports a maximum DRAM size of 8 GB, with a DRAM
+ * addressable range from 0x0_0000_0000 to 0x1_FFFF_FFFF. Since this range
+ * fits within 34 bits, only bits [33:0] are needed to store the DRAM
+ * offset. To optimize address storage, the high physical address bits
+ * [1:0] of the source, digest and key buffer addresses are stored as
+ * dram_offset bits [33:32].
+ *
+ * This approach eliminates the need to reduce the high part of the DRAM
+ * physical address for DMA operations. Previously, this was calculated as
+ * (high physical address bits [7:0] - 4), since the DRAM start address is
+ * 0x4_00000000, making the high part address [7:0] - 4.
+ */
+ ahc->src_hi_mask = 0x00000003;
+ ahc->dest_hi_mask = 0x00000003;
+ ahc->key_hi_mask = 0x00000003;
+
+ /*
* Currently, it does not support the CRYPT command. Instead, it only
* sends an interrupt to notify the firmware that the crypt command
* has completed. It is a temporary workaround.
*/
ahc->raise_crypt_interrupt_workaround = true;
+ ahc->has_dma64 = true;
}
static const TypeInfo aspeed_ast2700_hace_info = {
diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c
index 4930e00..a0ab5ee 100644
--- a/hw/misc/aspeed_scu.c
+++ b/hw/misc/aspeed_scu.c
@@ -91,6 +91,7 @@
#define BMC_DEV_ID TO_REG(0x1A4)
#define AST2600_PROT_KEY TO_REG(0x00)
+#define AST2600_PROT_KEY2 TO_REG(0x10)
#define AST2600_SILICON_REV TO_REG(0x04)
#define AST2600_SILICON_REV2 TO_REG(0x14)
#define AST2600_SYS_RST_CTRL TO_REG(0x40)
@@ -176,6 +177,7 @@
#define AST2700_SCUIO_UARTCLK_GEN TO_REG(0x330)
#define AST2700_SCUIO_HUARTCLK_GEN TO_REG(0x334)
#define AST2700_SCUIO_CLK_DUTY_MEAS_RST TO_REG(0x388)
+#define AST2700_SCUIO_FREQ_CNT_CTL TO_REG(0x3A0)
#define SCU_IO_REGION_SIZE 0x1000
@@ -722,6 +724,8 @@ static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset,
int reg = TO_REG(offset);
/* Truncate here so bitwise operations below behave as expected */
uint32_t data = data64;
+ bool prot_data_state = data == ASPEED_SCU_PROT_KEY;
+ bool unlocked = s->regs[AST2600_PROT_KEY] && s->regs[AST2600_PROT_KEY2];
if (reg >= ASPEED_AST2600_SCU_NR_REGS) {
qemu_log_mask(LOG_GUEST_ERROR,
@@ -730,15 +734,24 @@ static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset,
return;
}
- if (reg > PROT_KEY && !s->regs[PROT_KEY]) {
+ if ((reg != AST2600_PROT_KEY && reg != AST2600_PROT_KEY2) && !unlocked) {
qemu_log_mask(LOG_GUEST_ERROR, "%s: SCU is locked!\n", __func__);
+ return;
}
trace_aspeed_scu_write(offset, size, data);
switch (reg) {
case AST2600_PROT_KEY:
- s->regs[reg] = (data == ASPEED_SCU_PROT_KEY) ? 1 : 0;
+ /*
+ * Writing a value to SCU000 will modify both protection
+ * registers to each protection register individually.
+ */
+ s->regs[AST2600_PROT_KEY] = prot_data_state;
+ s->regs[AST2600_PROT_KEY2] = prot_data_state;
+ return;
+ case AST2600_PROT_KEY2:
+ s->regs[AST2600_PROT_KEY2] = prot_data_state;
return;
case AST2600_HW_STRAP1:
case AST2600_HW_STRAP2:
@@ -1022,6 +1035,10 @@ static void aspeed_ast2700_scuio_write(void *opaque, hwaddr offset,
s->regs[reg - 1] ^= data;
updated = true;
break;
+ case AST2700_SCUIO_FREQ_CNT_CTL:
+ s->regs[reg] = deposit32(s->regs[reg], 6, 1, !!(data & BIT(1)));
+ updated = true;
+ break;
default:
qemu_log_mask(LOG_GUEST_ERROR,
"%s: Unhandled write at offset 0x%" HWADDR_PRIx "\n",
@@ -1066,6 +1083,7 @@ static const uint32_t ast2700_a0_resets_io[ASPEED_AST2700_SCU_NR_REGS] = {
[AST2700_SCUIO_UARTCLK_GEN] = 0x00014506,
[AST2700_SCUIO_HUARTCLK_GEN] = 0x000145c0,
[AST2700_SCUIO_CLK_DUTY_MEAS_RST] = 0x0c9100d2,
+ [AST2700_SCUIO_FREQ_CNT_CTL] = 0x00000080,
};
static void aspeed_2700_scuio_class_init(ObjectClass *klass, const void *data)
diff --git a/hw/misc/aspeed_sdmc.c b/hw/misc/aspeed_sdmc.c
index f04d993..dff7cc3 100644
--- a/hw/misc/aspeed_sdmc.c
+++ b/hw/misc/aspeed_sdmc.c
@@ -570,6 +570,9 @@ static void aspeed_2700_sdmc_reset(DeviceState *dev)
/* Set ram size bit and defaults values */
s->regs[R_MAIN_CONF] = asc->compute_conf(s, 0);
+ /* Skipping dram init */
+ s->regs[R_MAIN_CONTROL] = BIT(16);
+
if (s->unlocked) {
s->regs[R_2700_PROT] = PROT_UNLOCKED;
}
diff --git a/hw/misc/ivshmem-flat.c b/hw/misc/ivshmem-flat.c
index be28c24..fe4be6b 100644
--- a/hw/misc/ivshmem-flat.c
+++ b/hw/misc/ivshmem-flat.c
@@ -362,7 +362,7 @@ static bool ivshmem_flat_connect_server(DeviceState *dev, Error **errp)
*
* ivshmem_flat_recv_msg() calls return 'msg' and 'fd'.
*
- * See ./docs/specs/ivshmem-spec.txt for details on the protocol.
+ * See docs/specs/ivshmem-spec.rst for details on the protocol.
*/
/* Step 0 */
diff --git a/hw/misc/stm32_rcc.c b/hw/misc/stm32_rcc.c
index 94e8dae..5815b3e 100644
--- a/hw/misc/stm32_rcc.c
+++ b/hw/misc/stm32_rcc.c
@@ -60,7 +60,7 @@ static void stm32_rcc_write(void *opaque, hwaddr addr,
uint32_t value = val64;
uint32_t prev_value, new_value, irq_offset;
- trace_stm32_rcc_write(value, addr);
+ trace_stm32_rcc_write(addr, value);
if (addr > STM32_RCC_DCKCFGR2) {
qemu_log_mask(LOG_GUEST_ERROR, "%s: Bad offset 0x%"HWADDR_PRIx"\n",
diff --git a/hw/misc/trace-events b/hw/misc/trace-events
index 4383808..e3f64c0 100644
--- a/hw/misc/trace-events
+++ b/hw/misc/trace-events
@@ -302,6 +302,14 @@ aspeed_peci_read(uint64_t offset, uint64_t data) "offset 0x%" PRIx64 " data 0x%"
aspeed_peci_write(uint64_t offset, uint64_t data) "offset 0x%" PRIx64 " data 0x%" PRIx64
aspeed_peci_raise_interrupt(uint32_t ctrl, uint32_t status) "ctrl 0x%" PRIx32 " status 0x%" PRIx32
+# aspeed_hace.c
+aspeed_hace_read(uint64_t offset, uint64_t data) "offset 0x%" PRIx64 " data 0x%" PRIx64
+aspeed_hace_write(uint64_t offset, uint64_t data) "offset 0x%" PRIx64 " data 0x%" PRIx64
+aspeed_hace_hash_sg(int index, uint64_t list_addr, uint64_t buf_addr, uint32_t len) "%d: list_addr 0x%" PRIx64 " buf_addr 0x%" PRIx64 " len 0x%" PRIx32
+aspeed_hace_hash_addr(const char *s, uint64_t addr) "%s: 0x%" PRIx64
+aspeed_hace_hash_execute_acc_mode(bool final_request) "final request: %d"
+aspeed_hace_hexdump(const char *desc, uint32_t offset, char *s) "%s: 0x%08x: %s"
+
# bcm2835_property.c
bcm2835_mbox_property(uint32_t tag, uint32_t bufsize, size_t resplen) "mbox property tag:0x%08x in_sz:%u out_sz:%zu"
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index cba4999..a80a7b0 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -127,10 +127,8 @@ struct E1000State_st {
QEMUTimer *flush_queue_timer;
/* Compatibility flags for migration to/from qemu 1.3.0 and older */
-#define E1000_FLAG_MAC_BIT 2
#define E1000_FLAG_TSO_BIT 3
#define E1000_FLAG_VET_BIT 4
-#define E1000_FLAG_MAC (1 << E1000_FLAG_MAC_BIT)
#define E1000_FLAG_TSO (1 << E1000_FLAG_TSO_BIT)
#define E1000_FLAG_VET (1 << E1000_FLAG_VET_BIT)
@@ -1212,52 +1210,51 @@ enum { NWRITEOPS = ARRAY_SIZE(macreg_writeops) };
enum { MAC_ACCESS_PARTIAL = 1, MAC_ACCESS_FLAG_NEEDED = 2 };
-#define markflag(x) ((E1000_FLAG_##x << 2) | MAC_ACCESS_FLAG_NEEDED)
/* In the array below the meaning of the bits is: [f|f|f|f|f|f|n|p]
* f - flag bits (up to 6 possible flags)
* n - flag needed
- * p - partially implenented */
+ * p - partially implemented */
static const uint8_t mac_reg_access[0x8000] = {
- [IPAV] = markflag(MAC), [WUC] = markflag(MAC),
- [IP6AT] = markflag(MAC), [IP4AT] = markflag(MAC),
- [FFVT] = markflag(MAC), [WUPM] = markflag(MAC),
- [ECOL] = markflag(MAC), [MCC] = markflag(MAC),
- [DC] = markflag(MAC), [TNCRS] = markflag(MAC),
- [RLEC] = markflag(MAC), [XONRXC] = markflag(MAC),
- [XOFFTXC] = markflag(MAC), [RFC] = markflag(MAC),
- [TSCTFC] = markflag(MAC), [MGTPRC] = markflag(MAC),
- [WUS] = markflag(MAC), [AIT] = markflag(MAC),
- [FFLT] = markflag(MAC), [FFMT] = markflag(MAC),
- [SCC] = markflag(MAC), [FCRUC] = markflag(MAC),
- [LATECOL] = markflag(MAC), [COLC] = markflag(MAC),
- [SEQEC] = markflag(MAC), [CEXTERR] = markflag(MAC),
- [XONTXC] = markflag(MAC), [XOFFRXC] = markflag(MAC),
- [RJC] = markflag(MAC), [RNBC] = markflag(MAC),
- [MGTPDC] = markflag(MAC), [MGTPTC] = markflag(MAC),
- [RUC] = markflag(MAC), [ROC] = markflag(MAC),
- [GORCL] = markflag(MAC), [GORCH] = markflag(MAC),
- [GOTCL] = markflag(MAC), [GOTCH] = markflag(MAC),
- [BPRC] = markflag(MAC), [MPRC] = markflag(MAC),
- [TSCTC] = markflag(MAC), [PRC64] = markflag(MAC),
- [PRC127] = markflag(MAC), [PRC255] = markflag(MAC),
- [PRC511] = markflag(MAC), [PRC1023] = markflag(MAC),
- [PRC1522] = markflag(MAC), [PTC64] = markflag(MAC),
- [PTC127] = markflag(MAC), [PTC255] = markflag(MAC),
- [PTC511] = markflag(MAC), [PTC1023] = markflag(MAC),
- [PTC1522] = markflag(MAC), [MPTC] = markflag(MAC),
- [BPTC] = markflag(MAC),
-
- [TDFH] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [TDFT] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [TDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [TDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [TDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFH] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFT] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFHS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFTS] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [RDFPC] = markflag(MAC) | MAC_ACCESS_PARTIAL,
- [PBM] = markflag(MAC) | MAC_ACCESS_PARTIAL,
+ [IPAV] = MAC_ACCESS_FLAG_NEEDED, [WUC] = MAC_ACCESS_FLAG_NEEDED,
+ [IP6AT] = MAC_ACCESS_FLAG_NEEDED, [IP4AT] = MAC_ACCESS_FLAG_NEEDED,
+ [FFVT] = MAC_ACCESS_FLAG_NEEDED, [WUPM] = MAC_ACCESS_FLAG_NEEDED,
+ [ECOL] = MAC_ACCESS_FLAG_NEEDED, [MCC] = MAC_ACCESS_FLAG_NEEDED,
+ [DC] = MAC_ACCESS_FLAG_NEEDED, [TNCRS] = MAC_ACCESS_FLAG_NEEDED,
+ [RLEC] = MAC_ACCESS_FLAG_NEEDED, [XONRXC] = MAC_ACCESS_FLAG_NEEDED,
+ [XOFFTXC] = MAC_ACCESS_FLAG_NEEDED, [RFC] = MAC_ACCESS_FLAG_NEEDED,
+ [TSCTFC] = MAC_ACCESS_FLAG_NEEDED, [MGTPRC] = MAC_ACCESS_FLAG_NEEDED,
+ [WUS] = MAC_ACCESS_FLAG_NEEDED, [AIT] = MAC_ACCESS_FLAG_NEEDED,
+ [FFLT] = MAC_ACCESS_FLAG_NEEDED, [FFMT] = MAC_ACCESS_FLAG_NEEDED,
+ [SCC] = MAC_ACCESS_FLAG_NEEDED, [FCRUC] = MAC_ACCESS_FLAG_NEEDED,
+ [LATECOL] = MAC_ACCESS_FLAG_NEEDED, [COLC] = MAC_ACCESS_FLAG_NEEDED,
+ [SEQEC] = MAC_ACCESS_FLAG_NEEDED, [CEXTERR] = MAC_ACCESS_FLAG_NEEDED,
+ [XONTXC] = MAC_ACCESS_FLAG_NEEDED, [XOFFRXC] = MAC_ACCESS_FLAG_NEEDED,
+ [RJC] = MAC_ACCESS_FLAG_NEEDED, [RNBC] = MAC_ACCESS_FLAG_NEEDED,
+ [MGTPDC] = MAC_ACCESS_FLAG_NEEDED, [MGTPTC] = MAC_ACCESS_FLAG_NEEDED,
+ [RUC] = MAC_ACCESS_FLAG_NEEDED, [ROC] = MAC_ACCESS_FLAG_NEEDED,
+ [GORCL] = MAC_ACCESS_FLAG_NEEDED, [GORCH] = MAC_ACCESS_FLAG_NEEDED,
+ [GOTCL] = MAC_ACCESS_FLAG_NEEDED, [GOTCH] = MAC_ACCESS_FLAG_NEEDED,
+ [BPRC] = MAC_ACCESS_FLAG_NEEDED, [MPRC] = MAC_ACCESS_FLAG_NEEDED,
+ [TSCTC] = MAC_ACCESS_FLAG_NEEDED, [PRC64] = MAC_ACCESS_FLAG_NEEDED,
+ [PRC127] = MAC_ACCESS_FLAG_NEEDED, [PRC255] = MAC_ACCESS_FLAG_NEEDED,
+ [PRC511] = MAC_ACCESS_FLAG_NEEDED, [PRC1023] = MAC_ACCESS_FLAG_NEEDED,
+ [PRC1522] = MAC_ACCESS_FLAG_NEEDED, [PTC64] = MAC_ACCESS_FLAG_NEEDED,
+ [PTC127] = MAC_ACCESS_FLAG_NEEDED, [PTC255] = MAC_ACCESS_FLAG_NEEDED,
+ [PTC511] = MAC_ACCESS_FLAG_NEEDED, [PTC1023] = MAC_ACCESS_FLAG_NEEDED,
+ [PTC1522] = MAC_ACCESS_FLAG_NEEDED, [MPTC] = MAC_ACCESS_FLAG_NEEDED,
+ [BPTC] = MAC_ACCESS_FLAG_NEEDED,
+
+ [TDFH] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [TDFT] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [TDFHS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [TDFTS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [TDFPC] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFH] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFT] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFHS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFTS] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [RDFPC] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
+ [PBM] = MAC_ACCESS_FLAG_NEEDED | MAC_ACCESS_PARTIAL,
};
static void
@@ -1419,13 +1416,6 @@ static int e1000_tx_tso_post_load(void *opaque, int version_id)
return 0;
}
-static bool e1000_full_mac_needed(void *opaque)
-{
- E1000State *s = opaque;
-
- return chkflag(MAC);
-}
-
static bool e1000_tso_state_needed(void *opaque)
{
E1000State *s = opaque;
@@ -1451,7 +1441,6 @@ static const VMStateDescription vmstate_e1000_full_mac_state = {
.name = "e1000/full_mac_state",
.version_id = 1,
.minimum_version_id = 1,
- .needed = e1000_full_mac_needed,
.fields = (const VMStateField[]) {
VMSTATE_UINT32_ARRAY(mac_reg, E1000State, 0x8000),
VMSTATE_END_OF_LIST()
@@ -1679,8 +1668,6 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp)
static const Property e1000_properties[] = {
DEFINE_NIC_PROPERTIES(E1000State, conf),
- DEFINE_PROP_BIT("extra_mac_registers", E1000State,
- compat_flags, E1000_FLAG_MAC_BIT, true),
DEFINE_PROP_BIT("migrate_tso_props", E1000State,
compat_flags, E1000_FLAG_TSO_BIT, true),
DEFINE_PROP_BIT("init-vet", E1000State,
diff --git a/hw/net/fsl_etsec/etsec.c b/hw/net/fsl_etsec/etsec.c
index d14cb2a..846f6cb 100644
--- a/hw/net/fsl_etsec/etsec.c
+++ b/hw/net/fsl_etsec/etsec.c
@@ -389,6 +389,7 @@ static void etsec_realize(DeviceState *dev, Error **errp)
{
eTSEC *etsec = ETSEC_COMMON(dev);
+ qemu_macaddr_default_if_unset(&etsec->conf.macaddr);
etsec->nic = qemu_new_nic(&net_etsec_info, &etsec->conf,
object_get_typename(OBJECT(dev)), dev->id,
&dev->mem_reentrancy_guard, etsec);
diff --git a/hw/net/i82596.c b/hw/net/i82596.c
index 64ed3c8..c1ff3e6 100644
--- a/hw/net/i82596.c
+++ b/hw/net/i82596.c
@@ -5,7 +5,7 @@
* This work is licensed under the GNU GPL license version 2 or later.
*
* This software was written to be compatible with the specification:
- * https://www.intel.com/assets/pdf/general/82596ca.pdf
+ * https://parisc.docs.kernel.org/en/latest/_downloads/96672be0650d9fc046bbcea40b92482f/82596CA.pdf
*/
#include "qemu/osdep.h"
@@ -177,6 +177,26 @@ static void set_individual_address(I82596State *s, uint32_t addr)
trace_i82596_new_mac(nc->info_str);
}
+static void i82596_configure(I82596State *s, uint32_t addr)
+{
+ uint8_t byte_cnt;
+ byte_cnt = get_byte(addr + 8) & 0x0f;
+
+ byte_cnt = MAX(byte_cnt, 4);
+ byte_cnt = MIN(byte_cnt, sizeof(s->config));
+ /* copy byte_cnt max. */
+ address_space_read(&address_space_memory, addr + 8,
+ MEMTXATTRS_UNSPECIFIED, s->config, byte_cnt);
+ /* config byte according to page 35ff */
+ s->config[2] &= 0x82; /* mask valid bits */
+ s->config[2] |= 0x40;
+ s->config[7] &= 0xf7; /* clear zero bit */
+ assert(I596_NOCRC_INS == 0); /* do CRC insertion */
+ s->config[10] = MAX(s->config[10], 5); /* min frame length */
+ s->config[12] &= 0x40; /* only full duplex field valid */
+ s->config[13] |= 0x3f; /* set ones in byte 13 */
+}
+
static void set_multicast_list(I82596State *s, uint32_t addr)
{
uint16_t mc_count, i;
@@ -234,7 +254,6 @@ static void command_loop(I82596State *s)
{
uint16_t cmd;
uint16_t status;
- uint8_t byte_cnt;
DBG(printf("STARTING COMMAND LOOP cmd_p=%08x\n", s->cmd_p));
@@ -254,20 +273,7 @@ static void command_loop(I82596State *s)
set_individual_address(s, s->cmd_p);
break;
case CmdConfigure:
- byte_cnt = get_byte(s->cmd_p + 8) & 0x0f;
- byte_cnt = MAX(byte_cnt, 4);
- byte_cnt = MIN(byte_cnt, sizeof(s->config));
- /* copy byte_cnt max. */
- address_space_read(&address_space_memory, s->cmd_p + 8,
- MEMTXATTRS_UNSPECIFIED, s->config, byte_cnt);
- /* config byte according to page 35ff */
- s->config[2] &= 0x82; /* mask valid bits */
- s->config[2] |= 0x40;
- s->config[7] &= 0xf7; /* clear zero bit */
- assert(I596_NOCRC_INS == 0); /* do CRC insertion */
- s->config[10] = MAX(s->config[10], 5); /* min frame length */
- s->config[12] &= 0x40; /* only full duplex field valid */
- s->config[13] |= 0x3f; /* set ones in byte 13 */
+ i82596_configure(s, s->cmd_p);
break;
case CmdTDR:
/* get signal LINK */
diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h
index 6e0962f..ae06c1c 100644
--- a/hw/net/rocker/rocker.h
+++ b/hw/net/rocker/rocker.h
@@ -36,15 +36,7 @@ static inline G_GNUC_PRINTF(1, 2) int DPRINTF(const char *fmt, ...)
}
#endif
-#define __le16 uint16_t
-#define __le32 uint32_t
-#define __le64 uint64_t
-
-#define __be16 uint16_t
-#define __be32 uint32_t
-#define __be64 uint64_t
-
-static inline bool ipv4_addr_is_multicast(__be32 addr)
+static inline bool ipv4_addr_is_multicast(uint32_t addr)
{
return (addr & htonl(0xf0000000)) == htonl(0xe0000000);
}
@@ -52,8 +44,8 @@ static inline bool ipv4_addr_is_multicast(__be32 addr)
typedef struct ipv6_addr {
union {
uint8_t addr8[16];
- __be16 addr16[8];
- __be32 addr32[4];
+ uint16_t addr16[8];
+ uint32_t addr32[4];
};
} Ipv6Addr;
diff --git a/hw/net/rocker/rocker_hw.h b/hw/net/rocker/rocker_hw.h
index 1786323..7ec6bfb 100644
--- a/hw/net/rocker/rocker_hw.h
+++ b/hw/net/rocker/rocker_hw.h
@@ -9,10 +9,6 @@
#ifndef ROCKER_HW_H
#define ROCKER_HW_H
-#define __le16 uint16_t
-#define __le32 uint32_t
-#define __le64 uint64_t
-
/*
* Return codes
*/
@@ -124,12 +120,12 @@ enum {
*/
typedef struct rocker_desc {
- __le64 buf_addr;
+ uint64_t buf_addr;
uint64_t cookie;
- __le16 buf_size;
- __le16 tlv_size;
- __le16 rsvd[5]; /* pad to 32 bytes */
- __le16 comp_err;
+ uint16_t buf_size;
+ uint16_t tlv_size;
+ uint16_t rsvd[5]; /* pad to 32 bytes */
+ uint16_t comp_err;
} __attribute__((packed, aligned(8))) RockerDesc;
/*
@@ -137,9 +133,9 @@ typedef struct rocker_desc {
*/
typedef struct rocker_tlv {
- __le32 type;
- __le16 len;
- __le16 rsvd;
+ uint32_t type;
+ uint16_t len;
+ uint16_t rsvd;
} __attribute__((packed, aligned(8))) RockerTlv;
/* cmd msg */
diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c
index 3378f63..4aed178 100644
--- a/hw/net/rocker/rocker_of_dpa.c
+++ b/hw/net/rocker/rocker_of_dpa.c
@@ -52,10 +52,10 @@ typedef struct of_dpa_flow_key {
uint32_t tunnel_id; /* overlay tunnel id */
uint32_t tbl_id; /* table id */
struct {
- __be16 vlan_id; /* 0 if no VLAN */
+ uint16_t vlan_id; /* 0 if no VLAN */
MACAddr src; /* ethernet source address */
MACAddr dst; /* ethernet destination address */
- __be16 type; /* ethernet frame type */
+ uint16_t type; /* ethernet frame type */
} eth;
struct {
uint8_t proto; /* IP protocol or ARP opcode */
@@ -66,14 +66,14 @@ typedef struct of_dpa_flow_key {
union {
struct {
struct {
- __be32 src; /* IP source address */
- __be32 dst; /* IP destination address */
+ uint32_t src; /* IP source address */
+ uint32_t dst; /* IP destination address */
} addr;
union {
struct {
- __be16 src; /* TCP/UDP/SCTP source port */
- __be16 dst; /* TCP/UDP/SCTP destination port */
- __be16 flags; /* TCP flags */
+ uint16_t src; /* TCP/UDP/SCTP source port */
+ uint16_t dst; /* TCP/UDP/SCTP destination port */
+ uint16_t flags; /* TCP flags */
} tp;
struct {
MACAddr sha; /* ARP source hardware address */
@@ -86,11 +86,11 @@ typedef struct of_dpa_flow_key {
Ipv6Addr src; /* IPv6 source address */
Ipv6Addr dst; /* IPv6 destination address */
} addr;
- __be32 label; /* IPv6 flow label */
+ uint32_t label; /* IPv6 flow label */
struct {
- __be16 src; /* TCP/UDP/SCTP source port */
- __be16 dst; /* TCP/UDP/SCTP destination port */
- __be16 flags; /* TCP flags */
+ uint16_t src; /* TCP/UDP/SCTP source port */
+ uint16_t dst; /* TCP/UDP/SCTP destination port */
+ uint16_t flags; /* TCP flags */
} tp;
struct {
Ipv6Addr target; /* ND target address */
@@ -112,13 +112,13 @@ typedef struct of_dpa_flow_action {
struct {
uint32_t group_id;
uint32_t tun_log_lport;
- __be16 vlan_id;
+ uint16_t vlan_id;
} write;
struct {
- __be16 new_vlan_id;
+ uint16_t new_vlan_id;
uint32_t out_pport;
uint8_t copy_to_cpu;
- __be16 vlan_id;
+ uint16_t vlan_id;
} apply;
} OfDpaFlowAction;
@@ -143,7 +143,7 @@ typedef struct of_dpa_flow {
typedef struct of_dpa_flow_pkt_fields {
uint32_t tunnel_id;
struct eth_header *ethhdr;
- __be16 *h_proto;
+ uint16_t *h_proto;
struct vlan_header *vlanhdr;
struct ip_header *ipv4hdr;
struct ip6_header *ipv6hdr;
@@ -180,7 +180,7 @@ typedef struct of_dpa_group {
uint32_t group_id;
MACAddr src_mac;
MACAddr dst_mac;
- __be16 vlan_id;
+ uint16_t vlan_id;
} l2_rewrite;
struct {
uint16_t group_count;
@@ -190,13 +190,13 @@ typedef struct of_dpa_group {
uint32_t group_id;
MACAddr src_mac;
MACAddr dst_mac;
- __be16 vlan_id;
+ uint16_t vlan_id;
uint8_t ttl_check;
} l3_unicast;
};
} OfDpaGroup;
-static int of_dpa_mask2prefix(__be32 mask)
+static int of_dpa_mask2prefix(uint32_t mask)
{
int i;
int count = 32;
@@ -451,7 +451,7 @@ static void of_dpa_flow_pkt_parse(OfDpaFlowContext *fc,
fc->iovcnt = iovcnt + 2;
}
-static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, __be16 vlan_id)
+static void of_dpa_flow_pkt_insert_vlan(OfDpaFlowContext *fc, uint16_t vlan_id)
{
OfDpaFlowPktFields *fields = &fc->fields;
uint16_t h_proto = fields->ethhdr->h_proto;
@@ -486,7 +486,7 @@ static void of_dpa_flow_pkt_strip_vlan(OfDpaFlowContext *fc)
static void of_dpa_flow_pkt_hdr_rewrite(OfDpaFlowContext *fc,
uint8_t *src_mac, uint8_t *dst_mac,
- __be16 vlan_id)
+ uint16_t vlan_id)
{
OfDpaFlowPktFields *fields = &fc->fields;
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
index 15b8f75..654a087 100644
--- a/hw/net/rtl8139.c
+++ b/hw/net/rtl8139.c
@@ -1816,7 +1816,7 @@ static int rtl8139_transmit_one(RTL8139State *s, int descriptor)
PCIDevice *d = PCI_DEVICE(s);
int txsize = s->TxStatus[descriptor] & 0x1fff;
- uint8_t txbuffer[0x2000];
+ QEMU_UNINITIALIZED uint8_t txbuffer[0x2000];
DPRINTF("+++ transmit reading %d bytes from host memory at 0x%08x\n",
txsize, s->TxAddr[descriptor]);
diff --git a/hw/net/tulip.c b/hw/net/tulip.c
index 63fe513..319af90 100644
--- a/hw/net/tulip.c
+++ b/hw/net/tulip.c
@@ -629,7 +629,7 @@ static void tulip_setup_filter_addr(TULIPState *s, uint8_t *buf, int n)
static void tulip_setup_frame(TULIPState *s,
struct tulip_descriptor *desc)
{
- uint8_t buf[4096];
+ QEMU_UNINITIALIZED uint8_t buf[4096];
int len = (desc->control >> TDES1_BUF1_SIZE_SHIFT) & TDES1_BUF1_SIZE_MASK;
int i;
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 221252e..eb93607 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1911,9 +1911,9 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
VirtIONet *n = qemu_get_nic_opaque(nc);
VirtIONetQueue *q;
VirtIODevice *vdev = VIRTIO_DEVICE(n);
- VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
- size_t lens[VIRTQUEUE_MAX_SIZE];
- struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
+ QEMU_UNINITIALIZED VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE];
+ QEMU_UNINITIALIZED size_t lens[VIRTQUEUE_MAX_SIZE];
+ QEMU_UNINITIALIZED struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
struct virtio_net_hdr_v1_hash extra_hdr;
unsigned mhdr_cnt = 0;
size_t offset, i, guest_offset, j;
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 83d942a..7c0ca56 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -41,19 +41,9 @@
#define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1
#define VMXNET3_MSIX_BAR_SIZE 0x2000
-/* Compatibility flags for migration */
-#define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT 0
-#define VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS \
- (1 << VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT)
-#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT 1
-#define VMXNET3_COMPAT_FLAG_DISABLE_PCIE \
- (1 << VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT)
-
#define VMXNET3_EXP_EP_OFFSET (0x48)
-#define VMXNET3_MSI_OFFSET(s) \
- ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x50 : 0x84)
-#define VMXNET3_MSIX_OFFSET(s) \
- ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0 : 0x9c)
+#define VMXNET3_MSI_OFFSET (0x84)
+#define VMXNET3_MSIX_OFFSET (0x9c)
#define VMXNET3_DSN_OFFSET (0x100)
#define VMXNET3_BAR0_IDX (0)
@@ -61,8 +51,7 @@
#define VMXNET3_MSIX_BAR_IDX (2)
#define VMXNET3_OFF_MSIX_TABLE (0x000)
-#define VMXNET3_OFF_MSIX_PBA(s) \
- ((s)->compat_flags & VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS ? 0x800 : 0x1000)
+#define VMXNET3_OFF_MSIX_PBA (0x1000)
/* Link speed in Mbps should be shifted by 16 */
#define VMXNET3_LINK_SPEED (1000 << 16)
@@ -2122,8 +2111,8 @@ vmxnet3_init_msix(VMXNET3State *s)
&s->msix_bar,
VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_TABLE,
&s->msix_bar,
- VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA(s),
- VMXNET3_MSIX_OFFSET(s), NULL);
+ VMXNET3_MSIX_BAR_IDX, VMXNET3_OFF_MSIX_PBA,
+ VMXNET3_MSIX_OFFSET, NULL);
if (0 > res) {
VMW_WRPRN("Failed to initialize MSI-X, error %d", res);
@@ -2221,7 +2210,7 @@ static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp)
/* Interrupt pin A */
pci_dev->config[PCI_INTERRUPT_PIN] = 0x01;
- ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET(s), VMXNET3_MAX_NMSIX_INTRS,
+ ret = msi_init(pci_dev, VMXNET3_MSI_OFFSET, VMXNET3_MAX_NMSIX_INTRS,
VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK, NULL);
/* Any error other than -ENOTSUP(board's MSI support is broken)
* is a programming error. Fall back to INTx silently on -ENOTSUP */
@@ -2249,6 +2238,7 @@ static void vmxnet3_instance_init(Object *obj)
device_add_bootindex_property(obj, &s->conf.bootindex,
"bootindex", "/ethernet-phy@0",
DEVICE(obj));
+ PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS;
}
static void vmxnet3_pci_uninit(PCIDevice *pci_dev)
@@ -2472,30 +2462,12 @@ static const VMStateDescription vmstate_vmxnet3 = {
static const Property vmxnet3_properties[] = {
DEFINE_NIC_PROPERTIES(VMXNET3State, conf),
- DEFINE_PROP_BIT("x-old-msi-offsets", VMXNET3State, compat_flags,
- VMXNET3_COMPAT_FLAG_OLD_MSI_OFFSETS_BIT, false),
- DEFINE_PROP_BIT("x-disable-pcie", VMXNET3State, compat_flags,
- VMXNET3_COMPAT_FLAG_DISABLE_PCIE_BIT, false),
};
-static void vmxnet3_realize(DeviceState *qdev, Error **errp)
-{
- VMXNET3Class *vc = VMXNET3_DEVICE_GET_CLASS(qdev);
- PCIDevice *pci_dev = PCI_DEVICE(qdev);
- VMXNET3State *s = VMXNET3(qdev);
-
- if (!(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE)) {
- pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
- }
-
- vc->parent_dc_realize(qdev, errp);
-}
-
static void vmxnet3_class_init(ObjectClass *class, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(class);
PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
- VMXNET3Class *vc = VMXNET3_DEVICE_CLASS(class);
c->realize = vmxnet3_pci_realize;
c->exit = vmxnet3_pci_uninit;
@@ -2506,8 +2478,6 @@ static void vmxnet3_class_init(ObjectClass *class, const void *data)
c->class_id = PCI_CLASS_NETWORK_ETHERNET;
c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE;
c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
- device_class_set_parent_realize(dc, vmxnet3_realize,
- &vc->parent_dc_realize);
dc->desc = "VMWare Paravirtualized Ethernet v3";
device_class_set_legacy_reset(dc, vmxnet3_qdev_reset);
dc->vmsd = &vmstate_vmxnet3;
diff --git a/hw/net/vmxnet3.h b/hw/net/vmxnet3.h
index f9283f9..dbc69d5 100644
--- a/hw/net/vmxnet3.h
+++ b/hw/net/vmxnet3.h
@@ -63,8 +63,8 @@
* details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ * along with this program; if not, see
+ * <https://www.gnu.org/licenses/>.
*
* The full GNU General Public License is included in this distribution in
* the file called "COPYING".
diff --git a/hw/net/xgmac.c b/hw/net/xgmac.c
index 9c87c4e..d45f872 100644
--- a/hw/net/xgmac.c
+++ b/hw/net/xgmac.c
@@ -207,7 +207,7 @@ static void xgmac_enet_send(XgmacState *s)
struct desc bd;
int frame_size;
int len;
- uint8_t frame[8192];
+ QEMU_UNINITIALIZED uint8_t frame[8192];
uint8_t *ptr;
ptr = frame;
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index fd93550..e764ec7 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -22,7 +22,7 @@
*
* Usage
* -----
- * See docs/system/nvme.rst for extensive documentation.
+ * See docs/system/devices/nvme.rst for extensive documentation.
*
* Add options:
* -drive file=<file>,if=none,id=<drive_id>
@@ -1057,7 +1057,8 @@ static uint16_t nvme_map_sgl(NvmeCtrl *n, NvmeSg *sg, NvmeSglDescriptor sgl,
*/
#define SEG_CHUNK_SIZE 256
- NvmeSglDescriptor segment[SEG_CHUNK_SIZE], *sgld, *last_sgld;
+ QEMU_UNINITIALIZED NvmeSglDescriptor segment[SEG_CHUNK_SIZE];
+ NvmeSglDescriptor *sgld, *last_sgld;
uint64_t nsgld;
uint32_t seg_len;
uint16_t status;
@@ -5128,7 +5129,7 @@ static uint16_t nvme_error_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
uint64_t off, NvmeRequest *req)
{
- uint32_t nslist[1024];
+ uint32_t nslist[1024] = {};
uint32_t trans_len;
int i = 0;
uint32_t nsid;
@@ -5138,7 +5139,6 @@ static uint16_t nvme_changed_nslist(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
return NVME_INVALID_FIELD | NVME_DNR;
}
- memset(nslist, 0x0, sizeof(nslist));
trans_len = MIN(sizeof(nslist) - off, buf_len);
while ((nsid = find_first_bit(n->changed_nsids, NVME_CHANGED_NSID_SIZE)) !=
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index 237b9f7..aa24050 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -817,62 +817,6 @@ void fw_cfg_modify_i64(FWCfgState *s, uint16_t key, uint64_t value)
g_free(old);
}
-void fw_cfg_set_order_override(FWCfgState *s, int order)
-{
- assert(s->fw_cfg_order_override == 0);
- s->fw_cfg_order_override = order;
-}
-
-void fw_cfg_reset_order_override(FWCfgState *s)
-{
- assert(s->fw_cfg_order_override != 0);
- s->fw_cfg_order_override = 0;
-}
-
-/*
- * This is the legacy order list. For legacy systems, files are in
- * the fw_cfg in the order defined below, by the "order" value. Note
- * that some entries (VGA ROMs, NIC option ROMS, etc.) go into a
- * specific area, but there may be more than one and they occur in the
- * order that the user specifies them on the command line. Those are
- * handled in a special manner, using the order override above.
- *
- * For non-legacy, the files are sorted by filename to avoid this kind
- * of complexity in the future.
- *
- * This is only for x86, other arches don't implement versioning so
- * they won't set legacy mode.
- */
-static struct {
- const char *name;
- int order;
-} fw_cfg_order[] = {
- { "etc/boot-menu-wait", 10 },
- { "bootsplash.jpg", 11 },
- { "bootsplash.bmp", 12 },
- { "etc/boot-fail-wait", 15 },
- { "etc/smbios/smbios-tables", 20 },
- { "etc/smbios/smbios-anchor", 30 },
- { "etc/e820", 40 },
- { "etc/reserved-memory-end", 50 },
- { "genroms/kvmvapic.bin", 55 },
- { "genroms/linuxboot.bin", 60 },
- { }, /* VGA ROMs from pc_vga_init come here, 70. */
- { }, /* NIC option ROMs from pc_nic_init come here, 80. */
- { "etc/system-states", 90 },
- { }, /* User ROMs come here, 100. */
- { }, /* Device FW comes here, 110. */
- { "etc/extra-pci-roots", 120 },
- { "etc/acpi/tables", 130 },
- { "etc/table-loader", 140 },
- { "etc/tpm/log", 150 },
- { "etc/acpi/rsdp", 160 },
- { "bootorder", 170 },
- { "etc/msr_feature_control", 180 },
-
-#define FW_CFG_ORDER_OVERRIDE_LAST 200
-};
-
/*
* Any sub-page size update to these table MRs will be lost during migration,
* as we use aligned size in ram_load_precopy() -> qemu_ram_resize() path.
@@ -890,29 +834,6 @@ static void fw_cfg_acpi_mr_save(FWCfgState *s, const char *filename, size_t len)
}
}
-static int get_fw_cfg_order(FWCfgState *s, const char *name)
-{
- int i;
-
- if (s->fw_cfg_order_override > 0) {
- return s->fw_cfg_order_override;
- }
-
- for (i = 0; i < ARRAY_SIZE(fw_cfg_order); i++) {
- if (fw_cfg_order[i].name == NULL) {
- continue;
- }
-
- if (strcmp(name, fw_cfg_order[i].name) == 0) {
- return fw_cfg_order[i].order;
- }
- }
-
- /* Stick unknown stuff at the end. */
- warn_report("Unknown firmware file in legacy mode: %s", name);
- return FW_CFG_ORDER_OVERRIDE_LAST;
-}
-
void fw_cfg_add_file_callback(FWCfgState *s, const char *filename,
FWCfgCallback select_cb,
FWCfgWriteCallback write_cb,
@@ -921,7 +842,6 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename,
{
int i, index, count;
size_t dsize;
- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
int order = 0;
if (!s->files) {
@@ -933,22 +853,11 @@ void fw_cfg_add_file_callback(FWCfgState *s, const char *filename,
count = be32_to_cpu(s->files->count);
assert(count < fw_cfg_file_slots(s));
- /* Find the insertion point. */
- if (mc->legacy_fw_cfg_order) {
- /*
- * Sort by order. For files with the same order, we keep them
- * in the sequence in which they were added.
- */
- order = get_fw_cfg_order(s, filename);
- for (index = count;
- index > 0 && order < s->entry_order[index - 1];
- index--);
- } else {
- /* Sort by file name. */
- for (index = count;
- index > 0 && strcmp(filename, s->files->f[index - 1].name) < 0;
- index--);
- }
+ /* Find the insertion point, sorting by file name. */
+ for (index = count;
+ index > 0 && strcmp(filename, s->files->f[index - 1].name) < 0;
+ index--)
+ ;
/*
* Move all the entries from the index point and after down one
@@ -1058,7 +967,6 @@ bool fw_cfg_add_file_from_generator(FWCfgState *s,
static void fw_cfg_machine_reset(void *opaque)
{
- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
FWCfgState *s = opaque;
void *ptr;
size_t len;
@@ -1068,11 +976,9 @@ static void fw_cfg_machine_reset(void *opaque)
ptr = fw_cfg_modify_file(s, "bootorder", (uint8_t *)buf, len);
g_free(ptr);
- if (!mc->legacy_fw_cfg_order) {
- buf = get_boot_devices_lchs_list(&len);
- ptr = fw_cfg_modify_file(s, "bios-geometry", (uint8_t *)buf, len);
- g_free(ptr);
- }
+ buf = get_boot_devices_lchs_list(&len);
+ ptr = fw_cfg_modify_file(s, "bios-geometry", (uint8_t *)buf, len);
+ g_free(ptr);
}
static void fw_cfg_machine_ready(struct Notifier *n, void *data)
diff --git a/hw/pci-host/gt64120.c b/hw/pci-host/gt64120.c
index 56a6ef9..b12a256 100644
--- a/hw/pci-host/gt64120.c
+++ b/hw/pci-host/gt64120.c
@@ -320,38 +320,6 @@ static void gt64120_isd_mapping(GT64120State *s)
memory_region_transaction_commit();
}
-static void gt64120_update_pci_cfgdata_mapping(GT64120State *s)
-{
- /* Indexed on MByteSwap bit, see Table 158: PCI_0 Command, Offset: 0xc00 */
- static const MemoryRegionOps *pci_host_data_ops[] = {
- &pci_host_data_be_ops, &pci_host_data_le_ops
- };
- PCIHostState *phb = PCI_HOST_BRIDGE(s);
-
- memory_region_transaction_begin();
-
- /*
- * The setting of the MByteSwap bit and MWordSwap bit in the PCI Internal
- * Command Register determines how data transactions from the CPU to/from
- * PCI are handled along with the setting of the Endianness bit in the CPU
- * Configuration Register. See:
- * - Table 16: 32-bit PCI Transaction Endianness
- * - Table 158: PCI_0 Command, Offset: 0xc00
- */
-
- if (memory_region_is_mapped(&phb->data_mem)) {
- memory_region_del_subregion(&s->ISD_mem, &phb->data_mem);
- object_unparent(OBJECT(&phb->data_mem));
- }
- memory_region_init_io(&phb->data_mem, OBJECT(phb),
- pci_host_data_ops[s->regs[GT_PCI0_CMD] & 1],
- s, "pci-conf-data", 4);
- memory_region_add_subregion_overlap(&s->ISD_mem, GT_PCI0_CFGDATA << 2,
- &phb->data_mem, 1);
-
- memory_region_transaction_commit();
-}
-
static void gt64120_pci_mapping(GT64120State *s)
{
memory_region_transaction_begin();
@@ -645,7 +613,6 @@ static void gt64120_writel(void *opaque, hwaddr addr,
case GT_PCI0_CMD:
case GT_PCI1_CMD:
s->regs[saddr] = val & 0x0401fc0f;
- gt64120_update_pci_cfgdata_mapping(s);
break;
case GT_PCI0_TOR:
case GT_PCI0_BS_SCS10:
@@ -1024,6 +991,48 @@ static const MemoryRegionOps isd_mem_ops = {
},
};
+static bool bswap(const GT64120State *s)
+{
+ PCIHostState *phb = PCI_HOST_BRIDGE(s);
+ /*check for bus == 0 && device == 0, Bits 11:15 = Device , Bits 16:23 = Bus*/
+ bool is_phb_dev0 = extract32(phb->config_reg, 11, 13) == 0;
+ bool le_mode = FIELD_EX32(s->regs[GT_PCI0_CMD], GT_PCI0_CMD, MByteSwap);
+ /* Only swap for non-bridge devices in big-endian mode */
+ return !le_mode && !is_phb_dev0;
+}
+
+static uint64_t gt64120_pci_data_read(void *opaque, hwaddr addr, unsigned size)
+{
+ GT64120State *s = opaque;
+ uint32_t val = pci_host_data_le_ops.read(opaque, addr, size);
+
+ if (bswap(s)) {
+ val = bswap32(val);
+ }
+ return val;
+}
+
+static void gt64120_pci_data_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ GT64120State *s = opaque;
+
+ if (bswap(s)) {
+ val = bswap32(val);
+ }
+ pci_host_data_le_ops.write(opaque, addr, val, size);
+}
+
+static const MemoryRegionOps gt64120_pci_data_ops = {
+ .read = gt64120_pci_data_read,
+ .write = gt64120_pci_data_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+};
+
static void gt64120_reset(DeviceState *dev)
{
GT64120State *s = GT64120_PCI_HOST_BRIDGE(dev);
@@ -1178,7 +1187,6 @@ static void gt64120_reset(DeviceState *dev)
gt64120_isd_mapping(s);
gt64120_pci_mapping(s);
- gt64120_update_pci_cfgdata_mapping(s);
}
static void gt64120_realize(DeviceState *dev, Error **errp)
@@ -1202,6 +1210,12 @@ static void gt64120_realize(DeviceState *dev, Error **errp)
memory_region_add_subregion_overlap(&s->ISD_mem, GT_PCI0_CFGADDR << 2,
&phb->conf_mem, 1);
+ memory_region_init_io(&phb->data_mem, OBJECT(phb),
+ &gt64120_pci_data_ops,
+ s, "pci-conf-data", 4);
+ memory_region_add_subregion_overlap(&s->ISD_mem, GT_PCI0_CFGDATA << 2,
+ &phb->data_mem, 1);
+
/*
* The whole address space decoded by the GT-64120A doesn't generate
diff --git a/hw/pci-host/ppce500.c b/hw/pci-host/ppce500.c
index e97a515..52269b0 100644
--- a/hw/pci-host/ppce500.c
+++ b/hw/pci-host/ppce500.c
@@ -16,7 +16,6 @@
#include "qemu/osdep.h"
#include "hw/irq.h"
-#include "hw/ppc/e500-ccsr.h"
#include "hw/qdev-properties.h"
#include "migration/vmstate.h"
#include "hw/pci/pci_device.h"
@@ -418,11 +417,12 @@ static const VMStateDescription vmstate_ppce500_pci = {
static void e500_pcihost_bridge_realize(PCIDevice *d, Error **errp)
{
PPCE500PCIBridgeState *b = PPC_E500_PCI_BRIDGE(d);
- PPCE500CCSRState *ccsr = CCSR(
+ SysBusDevice *ccsr = SYS_BUS_DEVICE(
object_resolve_path_component(qdev_get_machine(), "e500-ccsr"));
+ MemoryRegion *ccsr_space = sysbus_mmio_get_region(ccsr, 0);
- memory_region_init_alias(&b->bar0, OBJECT(ccsr), "e500-pci-bar0", &ccsr->ccsr_space,
- 0, int128_get64(ccsr->ccsr_space.size));
+ memory_region_init_alias(&b->bar0, OBJECT(ccsr), "e500-pci-bar0",
+ ccsr_space, 0, int128_get64(ccsr_space->size));
pci_register_bar(d, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &b->bar0);
}
diff --git a/hw/pci-host/raven.c b/hw/pci-host/raven.c
index 21f7ca6..f8c0be5 100644
--- a/hw/pci-host/raven.c
+++ b/hw/pci-host/raven.c
@@ -24,7 +24,6 @@
*/
#include "qemu/osdep.h"
-#include "qemu/datadir.h"
#include "qemu/units.h"
#include "qemu/log.h"
#include "qapi/error.h"
@@ -35,9 +34,7 @@
#include "migration/vmstate.h"
#include "hw/intc/i8259.h"
#include "hw/irq.h"
-#include "hw/loader.h"
#include "hw/or-irq.h"
-#include "elf.h"
#include "qom/object.h"
#define TYPE_RAVEN_PCI_DEVICE "raven"
@@ -47,10 +44,6 @@ OBJECT_DECLARE_SIMPLE_TYPE(RavenPCIState, RAVEN_PCI_DEVICE)
struct RavenPCIState {
PCIDevice dev;
-
- uint32_t elf_machine;
- char *bios_name;
- MemoryRegion bios;
};
typedef struct PRePPCIState PREPPCIState;
@@ -75,11 +68,8 @@ struct PRePPCIState {
RavenPCIState pci_dev;
int contiguous_map;
- bool is_legacy_prep;
};
-#define BIOS_SIZE (1 * MiB)
-
#define PCI_IO_BASE_ADDR 0x80000000 /* Physical address on main bus */
static inline uint32_t raven_pci_io_config(hwaddr addr)
@@ -243,22 +233,18 @@ static void raven_pcihost_realizefn(DeviceState *d, Error **errp)
MemoryRegion *address_space_mem = get_system_memory();
int i;
- if (s->is_legacy_prep) {
- for (i = 0; i < PCI_NUM_PINS; i++) {
- sysbus_init_irq(dev, &s->pci_irqs[i]);
- }
- } else {
- /* According to PReP specification section 6.1.6 "System Interrupt
- * Assignments", all PCI interrupts are routed via IRQ 15 */
- s->or_irq = OR_IRQ(object_new(TYPE_OR_IRQ));
- object_property_set_int(OBJECT(s->or_irq), "num-lines", PCI_NUM_PINS,
- &error_fatal);
- qdev_realize(DEVICE(s->or_irq), NULL, &error_fatal);
- sysbus_init_irq(dev, &s->or_irq->out_irq);
-
- for (i = 0; i < PCI_NUM_PINS; i++) {
- s->pci_irqs[i] = qdev_get_gpio_in(DEVICE(s->or_irq), i);
- }
+ /*
+ * According to PReP specification section 6.1.6 "System Interrupt
+ * Assignments", all PCI interrupts are routed via IRQ 15
+ */
+ s->or_irq = OR_IRQ(object_new(TYPE_OR_IRQ));
+ object_property_set_int(OBJECT(s->or_irq), "num-lines", PCI_NUM_PINS,
+ &error_fatal);
+ qdev_realize(DEVICE(s->or_irq), NULL, &error_fatal);
+ sysbus_init_irq(dev, &s->or_irq->out_irq);
+
+ for (i = 0; i < PCI_NUM_PINS; i++) {
+ s->pci_irqs[i] = qdev_get_gpio_in(DEVICE(s->or_irq), i);
}
qdev_init_gpio_in(d, raven_change_gpio, 1);
@@ -338,48 +324,9 @@ static void raven_pcihost_initfn(Object *obj)
static void raven_realize(PCIDevice *d, Error **errp)
{
- RavenPCIState *s = RAVEN_PCI_DEVICE(d);
- char *filename;
- int bios_size = -1;
-
d->config[PCI_CACHE_LINE_SIZE] = 0x08;
d->config[PCI_LATENCY_TIMER] = 0x10;
d->config[PCI_CAPABILITY_LIST] = 0x00;
-
- if (!memory_region_init_rom_nomigrate(&s->bios, OBJECT(s), "bios",
- BIOS_SIZE, errp)) {
- return;
- }
- memory_region_add_subregion(get_system_memory(), (uint32_t)(-BIOS_SIZE),
- &s->bios);
- if (s->bios_name) {
- filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, s->bios_name);
- if (filename) {
- if (s->elf_machine != EM_NONE) {
- bios_size = load_elf(filename, NULL, NULL, NULL, NULL,
- NULL, NULL, NULL,
- ELFDATA2MSB, s->elf_machine, 0, 0);
- }
- if (bios_size < 0) {
- bios_size = get_image_size(filename);
- if (bios_size > 0 && bios_size <= BIOS_SIZE) {
- hwaddr bios_addr;
- bios_size = (bios_size + 0xfff) & ~0xfff;
- bios_addr = (uint32_t)(-BIOS_SIZE);
- bios_size = load_image_targphys(filename, bios_addr,
- bios_size);
- }
- }
- }
- g_free(filename);
- if (bios_size < 0 || bios_size > BIOS_SIZE) {
- memory_region_del_subregion(get_system_memory(), &s->bios);
- error_setg(errp, "Could not load bios image '%s'", s->bios_name);
- return;
- }
- }
-
- vmstate_register_ram_global(&s->bios);
}
static const VMStateDescription vmstate_raven = {
@@ -422,22 +369,12 @@ static const TypeInfo raven_info = {
},
};
-static const Property raven_pcihost_properties[] = {
- DEFINE_PROP_UINT32("elf-machine", PREPPCIState, pci_dev.elf_machine,
- EM_NONE),
- DEFINE_PROP_STRING("bios-name", PREPPCIState, pci_dev.bios_name),
- /* Temporary workaround until legacy prep machine is removed */
- DEFINE_PROP_BOOL("is-legacy-prep", PREPPCIState, is_legacy_prep,
- false),
-};
-
static void raven_pcihost_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
dc->realize = raven_pcihost_realizefn;
- device_class_set_props(dc, raven_pcihost_properties);
dc->fw_name = "pci";
}
diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 66f27b9..8c7f670 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -72,7 +72,7 @@ static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
return dev->msix_pba + vector / 8;
}
-static int msix_is_pending(PCIDevice *dev, int vector)
+int msix_is_pending(PCIDevice *dev, unsigned int vector)
{
return *msix_pending_byte(dev, vector) & msix_pending_mask(vector);
}
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index f5ab510..c70b5ce 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -32,6 +32,7 @@
#include "hw/pci/pci_host.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"
+#include "migration/cpr.h"
#include "migration/qemu-file-types.h"
#include "migration/vmstate.h"
#include "net/net.h"
@@ -128,6 +129,12 @@ static GSequence *pci_acpi_index_list(void)
return used_acpi_index_list;
}
+static void pci_set_master(PCIDevice *d, bool enable)
+{
+ memory_region_set_enabled(&d->bus_master_enable_region, enable);
+ d->is_master = enable; /* cache the status */
+}
+
static void pci_init_bus_master(PCIDevice *pci_dev)
{
AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
@@ -135,7 +142,7 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
memory_region_init_alias(&pci_dev->bus_master_enable_region,
OBJECT(pci_dev), "bus master",
dma_as->root, 0, memory_region_size(dma_as->root));
- memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
+ pci_set_master(pci_dev, false);
memory_region_add_subregion(&pci_dev->bus_master_container_region, 0,
&pci_dev->bus_master_enable_region);
}
@@ -531,6 +538,10 @@ static void pci_reset_regions(PCIDevice *dev)
static void pci_do_device_reset(PCIDevice *dev)
{
+ if ((dev->cap_present & QEMU_PCI_SKIP_RESET_ON_CPR) && cpr_is_incoming()) {
+ return;
+ }
+
pci_device_deassert_intx(dev);
assert(dev->irq_state == 0);
@@ -804,9 +815,8 @@ static int get_pci_config_device(QEMUFile *f, void *pv, size_t size,
pci_bridge_update_mappings(PCI_BRIDGE(s));
}
- memory_region_set_enabled(&s->bus_master_enable_region,
- pci_get_word(s->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER);
+ pci_set_master(s, pci_get_word(s->config + PCI_COMMAND)
+ & PCI_COMMAND_MASTER);
g_free(config);
return 0;
@@ -1725,7 +1735,7 @@ static void pci_update_mappings(PCIDevice *d)
pci_update_vga(d);
}
-static inline int pci_irq_disabled(PCIDevice *d)
+int pci_irq_disabled(PCIDevice *d)
{
return pci_get_word(d->config + PCI_COMMAND) & PCI_COMMAND_INTX_DISABLE;
}
@@ -1787,9 +1797,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
if (ranges_overlap(addr, l, PCI_COMMAND, 2)) {
pci_update_irq_disabled(d, was_irq_disabled);
- memory_region_set_enabled(&d->bus_master_enable_region,
- (pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER) && d->enabled);
+ pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND) &
+ PCI_COMMAND_MASTER) && d->enabled);
}
msi_write_config(d, addr, val_in, l);
@@ -2935,6 +2944,23 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
return &address_space_memory;
}
+int pci_iommu_init_iotlb_notifier(PCIDevice *dev, IOMMUNotifier *n,
+ IOMMUNotify fn, void *opaque)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->init_iotlb_notifier) {
+ iommu_bus->iommu_ops->init_iotlb_notifier(bus, iommu_bus->iommu_opaque,
+ devfn, n, fn, opaque);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
Error **errp)
{
@@ -2966,6 +2992,170 @@ void pci_device_unset_iommu_device(PCIDevice *dev)
}
}
+int pci_pri_request_page(PCIDevice *dev, uint32_t pasid, bool priv_req,
+ bool exec_req, hwaddr addr, bool lpig,
+ uint16_t prgi, bool is_read, bool is_write)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if (!dev->is_master ||
+ ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+ return -EPERM;
+ }
+
+ if (!pcie_pri_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->pri_request_page) {
+ return iommu_bus->iommu_ops->pri_request_page(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, priv_req,
+ exec_req, addr, lpig, prgi,
+ is_read, is_write);
+ }
+
+ return -ENODEV;
+}
+
+int pci_pri_register_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUPRINotifier *notifier)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if (!dev->is_master ||
+ ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->pri_register_notifier) {
+ iommu_bus->iommu_ops->pri_register_notifier(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, notifier);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+void pci_pri_unregister_notifier(PCIDevice *dev, uint32_t pasid)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->pri_unregister_notifier) {
+ iommu_bus->iommu_ops->pri_unregister_notifier(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid);
+ }
+}
+
+ssize_t pci_ats_request_translation(PCIDevice *dev, uint32_t pasid,
+ bool priv_req, bool exec_req,
+ hwaddr addr, size_t length,
+ bool no_write, IOMMUTLBEntry *result,
+ size_t result_length,
+ uint32_t *err_count)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if (!dev->is_master ||
+ ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev))) {
+ return -EPERM;
+ }
+
+ if (result_length == 0) {
+ return -ENOSPC;
+ }
+
+ if (!pcie_ats_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->ats_request_translation) {
+ return iommu_bus->iommu_ops->ats_request_translation(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, priv_req,
+ exec_req, addr, length,
+ no_write, result,
+ result_length, err_count);
+ }
+
+ return -ENODEV;
+}
+
+int pci_iommu_register_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUNotifier *n)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->register_iotlb_notifier) {
+ iommu_bus->iommu_ops->register_iotlb_notifier(bus,
+ iommu_bus->iommu_opaque, devfn,
+ pasid, n);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+int pci_iommu_unregister_iotlb_notifier(PCIDevice *dev, uint32_t pasid,
+ IOMMUNotifier *n)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ if ((pasid != PCI_NO_PASID) && !pcie_pasid_enabled(dev)) {
+ return -EPERM;
+ }
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->unregister_iotlb_notifier) {
+ iommu_bus->iommu_ops->unregister_iotlb_notifier(bus,
+ iommu_bus->iommu_opaque,
+ devfn, pasid, n);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+int pci_iommu_get_iotlb_info(PCIDevice *dev, uint8_t *addr_width,
+ uint32_t *min_page_size)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+ if (iommu_bus && iommu_bus->iommu_ops->get_iotlb_info) {
+ iommu_bus->iommu_ops->get_iotlb_info(iommu_bus->iommu_opaque,
+ addr_width, min_page_size);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
{
/*
@@ -3100,9 +3290,8 @@ void pci_set_enabled(PCIDevice *d, bool state)
d->enabled = state;
pci_update_mappings(d);
- memory_region_set_enabled(&d->bus_master_enable_region,
- (pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER) && d->enabled);
+ pci_set_master(d, (pci_get_word(d->config + PCI_COMMAND)
+ & PCI_COMMAND_MASTER) && d->enabled);
if (qdev_is_realized(&d->qdev)) {
pci_device_reset(d);
}
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index abe83bb..7179d99 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -217,12 +217,6 @@ const MemoryRegionOps pci_host_data_le_ops = {
.endianness = DEVICE_LITTLE_ENDIAN,
};
-const MemoryRegionOps pci_host_data_be_ops = {
- .read = pci_host_data_read,
- .write = pci_host_data_write,
- .endianness = DEVICE_BIG_ENDIAN,
-};
-
static bool pci_host_needed(void *opaque)
{
PCIHostState *s = opaque;
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 1b12db6..eaeb688 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1214,3 +1214,81 @@ void pcie_acs_reset(PCIDevice *dev)
pci_set_word(dev->config + dev->exp.acs_cap + PCI_ACS_CTRL, 0);
}
}
+
+/* PASID */
+void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width,
+ bool exec_perm, bool priv_mod)
+{
+ static const uint16_t control_reg_rw_mask = 0x07;
+ uint16_t capability_reg;
+
+ assert(pasid_width <= PCI_EXT_CAP_PASID_MAX_WIDTH);
+
+ pcie_add_capability(dev, PCI_EXT_CAP_ID_PASID, PCI_PASID_VER, offset,
+ PCI_EXT_CAP_PASID_SIZEOF);
+
+ capability_reg = ((uint16_t)pasid_width) << PCI_PASID_CAP_WIDTH_SHIFT;
+ capability_reg |= exec_perm ? PCI_PASID_CAP_EXEC : 0;
+ capability_reg |= priv_mod ? PCI_PASID_CAP_PRIV : 0;
+ pci_set_word(dev->config + offset + PCI_PASID_CAP, capability_reg);
+
+ /* Everything is disabled by default */
+ pci_set_word(dev->config + offset + PCI_PASID_CTRL, 0);
+
+ pci_set_word(dev->wmask + offset + PCI_PASID_CTRL, control_reg_rw_mask);
+
+ dev->exp.pasid_cap = offset;
+}
+
+/* PRI */
+void pcie_pri_init(PCIDevice *dev, uint16_t offset, uint32_t outstanding_pr_cap,
+ bool prg_response_pasid_req)
+{
+ static const uint16_t control_reg_rw_mask = 0x3;
+ static const uint16_t status_reg_rw1_mask = 0x3;
+ static const uint32_t pr_alloc_reg_rw_mask = 0xffffffff;
+ uint16_t status_reg;
+
+ status_reg = prg_response_pasid_req ? PCI_PRI_STATUS_PASID : 0;
+ status_reg |= PCI_PRI_STATUS_STOPPED; /* Stopped by default */
+
+ pcie_add_capability(dev, PCI_EXT_CAP_ID_PRI, PCI_PRI_VER, offset,
+ PCI_EXT_CAP_PRI_SIZEOF);
+ /* Disabled by default */
+
+ pci_set_word(dev->config + offset + PCI_PRI_STATUS, status_reg);
+ pci_set_long(dev->config + offset + PCI_PRI_MAX_REQ, outstanding_pr_cap);
+
+ pci_set_word(dev->wmask + offset + PCI_PRI_CTRL, control_reg_rw_mask);
+ pci_set_word(dev->w1cmask + offset + PCI_PRI_STATUS, status_reg_rw1_mask);
+ pci_set_long(dev->wmask + offset + PCI_PRI_ALLOC_REQ, pr_alloc_reg_rw_mask);
+
+ dev->exp.pri_cap = offset;
+}
+
+bool pcie_pri_enabled(const PCIDevice *dev)
+{
+ if (!pci_is_express(dev) || !dev->exp.pri_cap) {
+ return false;
+ }
+ return (pci_get_word(dev->config + dev->exp.pri_cap + PCI_PRI_CTRL) &
+ PCI_PRI_CTRL_ENABLE) != 0;
+}
+
+bool pcie_pasid_enabled(const PCIDevice *dev)
+{
+ if (!pci_is_express(dev) || !dev->exp.pasid_cap) {
+ return false;
+ }
+ return (pci_get_word(dev->config + dev->exp.pasid_cap + PCI_PASID_CTRL) &
+ PCI_PASID_CTRL_ENABLE) != 0;
+}
+
+bool pcie_ats_enabled(const PCIDevice *dev)
+{
+ if (!pci_is_express(dev) || !dev->exp.ats_cap) {
+ return false;
+ }
+ return (pci_get_word(dev->config + dev->exp.ats_cap + PCI_ATS_CTRL) &
+ PCI_ATS_CTRL_ENABLE) != 0;
+}
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 809078a..723c97f 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -79,8 +79,6 @@
#define MPC85XX_ESDHC_IRQ 72
#define RTC_REGS_OFFSET 0x68
-#define PLATFORM_CLK_FREQ_HZ (400 * 1000 * 1000)
-
struct boot_info
{
uint32_t dt_base;
@@ -120,7 +118,7 @@ static uint32_t *pci_map_create(void *fdt, uint32_t mpic, int first_slot,
}
static void dt_serial_create(void *fdt, unsigned long long offset,
- const char *soc, const char *mpic,
+ const char *soc, uint32_t freq, const char *mpic,
const char *alias, int idx, bool defcon)
{
char *ser;
@@ -131,7 +129,7 @@ static void dt_serial_create(void *fdt, unsigned long long offset,
qemu_fdt_setprop_string(fdt, ser, "compatible", "ns16550");
qemu_fdt_setprop_cells(fdt, ser, "reg", offset, 0x100);
qemu_fdt_setprop_cell(fdt, ser, "cell-index", idx);
- qemu_fdt_setprop_cell(fdt, ser, "clock-frequency", PLATFORM_CLK_FREQ_HZ);
+ qemu_fdt_setprop_cell(fdt, ser, "clock-frequency", freq);
qemu_fdt_setprop_cells(fdt, ser, "interrupts", 42, 2);
qemu_fdt_setprop_phandle(fdt, ser, "interrupt-parent", mpic);
qemu_fdt_setprop_string(fdt, "/aliases", alias, ser);
@@ -382,8 +380,7 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms,
int fdt_size;
void *fdt;
uint8_t hypercall[16];
- uint32_t clock_freq = PLATFORM_CLK_FREQ_HZ;
- uint32_t tb_freq = PLATFORM_CLK_FREQ_HZ;
+ uint32_t clock_freq, tb_freq;
int i;
char compatible_sb[] = "fsl,mpc8544-immr\0simple-bus";
char *soc;
@@ -411,7 +408,7 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms,
if (dtb_file) {
char *filename;
- filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, dtb_file);
+ filename = qemu_find_file(QEMU_FILE_TYPE_DTB, dtb_file);
if (!filename) {
goto out;
}
@@ -484,6 +481,9 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms,
if (kvmppc_get_hasidle(env)) {
qemu_fdt_setprop(fdt, "/hypervisor", "has-idle", NULL, 0);
}
+ } else {
+ clock_freq = pmc->clock_freq;
+ tb_freq = pmc->tb_freq;
}
/* Create CPU nodes */
@@ -564,12 +564,12 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms,
*/
if (serial_hd(1)) {
dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET,
- soc, mpic, "serial1", 1, false);
+ soc, pmc->clock_freq, mpic, "serial1", 1, false);
}
if (serial_hd(0)) {
dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET,
- soc, mpic, "serial0", 0, true);
+ soc, pmc->clock_freq, mpic, "serial0", 0, true);
}
/* i2c */
@@ -931,7 +931,6 @@ void ppce500_init(MachineState *machine)
CPUPPCState *firstenv = NULL;
MemoryRegion *ccsr_addr_space;
SysBusDevice *s;
- PPCE500CCSRState *ccsr;
I2CBus *i2c;
irqs = g_new0(IrqLines, smp_cpus);
@@ -968,7 +967,7 @@ void ppce500_init(MachineState *machine)
env->spr_cb[SPR_BOOKE_PIR].default_value = cs->cpu_index = i;
env->mpic_iack = pmc->ccsrbar_base + MPC8544_MPIC_REGS_OFFSET + 0xa0;
- ppc_booke_timers_init(cpu, PLATFORM_CLK_FREQ_HZ, PPC_TIMER_E500);
+ ppc_booke_timers_init(cpu, pmc->tb_freq, PPC_TIMER_E500);
/* Register reset handler */
if (!i) {
@@ -993,10 +992,10 @@ void ppce500_init(MachineState *machine)
memory_region_add_subregion(address_space_mem, 0, machine->ram);
dev = qdev_new("e500-ccsr");
+ s = SYS_BUS_DEVICE(dev);
object_property_add_child(OBJECT(machine), "e500-ccsr", OBJECT(dev));
- sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
- ccsr = CCSR(dev);
- ccsr_addr_space = &ccsr->ccsr_space;
+ sysbus_realize_and_unref(s, &error_fatal);
+ ccsr_addr_space = sysbus_mmio_get_region(s, 0);
memory_region_add_subregion(address_space_mem, pmc->ccsrbar_base,
ccsr_addr_space);
@@ -1284,6 +1283,7 @@ static void e500_ccsr_initfn(Object *obj)
PPCE500CCSRState *ccsr = CCSR(obj);
memory_region_init(&ccsr->ccsr_space, obj, "e500-ccsr",
MPC8544_CCSRBAR_SIZE);
+ sysbus_init_mmio(SYS_BUS_DEVICE(ccsr), &ccsr->ccsr_space);
}
static const TypeInfo e500_ccsr_info = {
diff --git a/hw/ppc/e500.h b/hw/ppc/e500.h
index 01db102..00f4905 100644
--- a/hw/ppc/e500.h
+++ b/hw/ppc/e500.h
@@ -5,6 +5,8 @@
#include "hw/platform-bus.h"
#include "qom/object.h"
+#define PLATFORM_CLK_FREQ_HZ (400 * 1000 * 1000)
+
struct PPCE500MachineState {
/*< private >*/
MachineState parent_obj;
@@ -37,6 +39,8 @@ struct PPCE500MachineClass {
hwaddr pci_mmio_base;
hwaddr pci_mmio_bus_base;
hwaddr spin_base;
+ uint32_t clock_freq;
+ uint32_t tb_freq;
};
void ppce500_init(MachineState *machine);
diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c
index 775b9d8..4f1d659 100644
--- a/hw/ppc/e500plat.c
+++ b/hw/ppc/e500plat.c
@@ -93,6 +93,8 @@ static void e500plat_machine_class_init(ObjectClass *oc, const void *data)
pmc->pci_mmio_base = 0xC00000000ULL;
pmc->pci_mmio_bus_base = 0xE0000000ULL;
pmc->spin_base = 0xFEF000000ULL;
+ pmc->clock_freq = PLATFORM_CLK_FREQ_HZ;
+ pmc->tb_freq = PLATFORM_CLK_FREQ_HZ;
mc->desc = "generic paravirt e500 platform";
mc->init = e500plat_init;
diff --git a/hw/ppc/mpc8544ds.c b/hw/ppc/mpc8544ds.c
index 97fb0f3..5826985 100644
--- a/hw/ppc/mpc8544ds.c
+++ b/hw/ppc/mpc8544ds.c
@@ -55,6 +55,8 @@ static void mpc8544ds_machine_class_init(ObjectClass *oc, const void *data)
pmc->pci_mmio_bus_base = 0xC0000000ULL;
pmc->pci_pio_base = 0xE1000000ULL;
pmc->spin_base = 0xEF000000ULL;
+ pmc->clock_freq = PLATFORM_CLK_FREQ_HZ;
+ pmc->tb_freq = PLATFORM_CLK_FREQ_HZ;
mc->desc = "mpc8544ds";
mc->init = mpc8544ds_init;
diff --git a/hw/ppc/pnv_occ.c b/hw/ppc/pnv_occ.c
index fa6f31c..24b789c 100644
--- a/hw/ppc/pnv_occ.c
+++ b/hw/ppc/pnv_occ.c
@@ -789,7 +789,7 @@ static bool occ_opal_process_command(PnvOCC *occ,
static bool occ_model_tick(PnvOCC *occ)
{
- struct occ_dynamic_data dynamic_data;
+ QEMU_UNINITIALIZED struct occ_dynamic_data dynamic_data;
if (!occ_read_dynamic_data(occ, &dynamic_data, NULL)) {
/* Can't move OCC state field to safe because we can't map it! */
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
index 7395263..982e40e 100644
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -35,6 +35,7 @@
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/log.h"
+#include "qemu/datadir.h"
#include "hw/loader.h"
#include "hw/rtc/mc146818rtc.h"
#include "hw/isa/pc87312.h"
@@ -55,6 +56,8 @@
#define KERNEL_LOAD_ADDR 0x01000000
#define INITRD_LOAD_ADDR 0x01800000
+#define BIOS_ADDR 0xfff00000
+#define BIOS_SIZE (1 * MiB)
#define NVRAM_SIZE 0x2000
static void fw_cfg_boot_set(void *opaque, const char *boot_device,
@@ -241,6 +244,9 @@ static void ibm_40p_init(MachineState *machine)
ISADevice *isa_dev;
ISABus *isa_bus;
void *fw_cfg;
+ MemoryRegion *bios = g_new(MemoryRegion, 1);
+ char *filename;
+ ssize_t bios_size = -1;
uint32_t kernel_base = 0, initrd_base = 0;
long kernel_size = 0, initrd_size = 0;
char boot_device;
@@ -263,10 +269,27 @@ static void ibm_40p_init(MachineState *machine)
cpu_ppc_tb_init(env, 100UL * 1000UL * 1000UL);
qemu_register_reset(ppc_prep_reset, cpu);
+ /* allocate and load firmware */
+ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+ if (!filename) {
+ error_report("Could not find bios image '%s'", bios_name);
+ exit(1);
+ }
+ memory_region_init_rom(bios, NULL, "bios", BIOS_SIZE, &error_fatal);
+ memory_region_add_subregion(get_system_memory(), BIOS_ADDR, bios);
+ bios_size = load_elf(filename, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ ELFDATA2MSB, PPC_ELF_MACHINE, 0, 0);
+ if (bios_size < 0) {
+ bios_size = load_image_targphys(filename, BIOS_ADDR, BIOS_SIZE);
+ }
+ if (bios_size < 0 || bios_size > BIOS_SIZE) {
+ error_report("Could not load bios image '%s'", filename);
+ return;
+ }
+ g_free(filename);
+
/* PCI host */
dev = qdev_new("raven-pcihost");
- qdev_prop_set_string(dev, "bios-name", bios_name);
- qdev_prop_set_uint32(dev, "elf-machine", PPC_ELF_MACHINE);
pcihost = SYS_BUS_DEVICE(dev);
object_property_add_child(qdev_get_machine(), "raven", OBJECT(dev));
sysbus_realize_and_unref(pcihost, &error_fatal);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 702f774..08615f6 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -577,7 +577,7 @@ static int spapr_dt_dynamic_memory(SpaprMachineState *spapr, void *fdt,
/*
* Adds ibm,dynamic-reconfiguration-memory node.
- * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation
+ * Refer to docs/specs/ppc-spapr-hotplug.rst for the documentation
* of this device tree node.
*/
static int spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr,
diff --git a/hw/ppc/spapr_tpm_proxy.c b/hw/ppc/spapr_tpm_proxy.c
index 862eeaa..1297b3a 100644
--- a/hw/ppc/spapr_tpm_proxy.c
+++ b/hw/ppc/spapr_tpm_proxy.c
@@ -41,8 +41,8 @@ static ssize_t tpm_execute(SpaprTpmProxy *tpm_proxy, target_ulong *args)
target_ulong data_in_size = args[2];
uint64_t data_out = ppc64_phys_to_real(args[3]);
target_ulong data_out_size = args[4];
- uint8_t buf_in[TPM_SPAPR_BUFSIZE];
- uint8_t buf_out[TPM_SPAPR_BUFSIZE];
+ QEMU_UNINITIALIZED uint8_t buf_in[TPM_SPAPR_BUFSIZE];
+ QEMU_UNINITIALIZED uint8_t buf_out[TPM_SPAPR_BUFSIZE];
ssize_t ret;
trace_spapr_tpm_execute(data_in, data_in_size, data_out, data_out_size);
diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
index e6a0ac1..fc9c35b 100644
--- a/hw/riscv/Kconfig
+++ b/hw/riscv/Kconfig
@@ -119,3 +119,12 @@ config SPIKE
select HTIF
select RISCV_ACLINT
select SIFIVE_PLIC
+
+config XIANGSHAN_KUNMINGHU
+ bool
+ default y
+ depends on RISCV64
+ select RISCV_ACLINT
+ select RISCV_APLIC
+ select RISCV_IMSIC
+ select SERIAL_MM
diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
index 765b9e2..828a867 100644
--- a/hw/riscv/boot.c
+++ b/hw/riscv/boot.c
@@ -37,7 +37,7 @@
bool riscv_is_32bit(RISCVHartArrayState *harts)
{
RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(&harts->harts[0]);
- return mcc->misa_mxl_max == MXL_RV32;
+ return mcc->def->misa_mxl_max == MXL_RV32;
}
/*
diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
index c22f3a7..2a8d5b1 100644
--- a/hw/riscv/meson.build
+++ b/hw/riscv/meson.build
@@ -13,5 +13,6 @@ riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c'))
riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files(
'riscv-iommu.c', 'riscv-iommu-pci.c', 'riscv-iommu-sys.c', 'riscv-iommu-hpm.c'))
riscv_ss.add(when: 'CONFIG_MICROBLAZE_V', if_true: files('microblaze-v-generic.c'))
+riscv_ss.add(when: 'CONFIG_XIANGSHAN_KUNMINGHU', if_true: files('xiangshan_kmh.c'))
hw_arch += {'riscv': riscv_ss}
diff --git a/hw/riscv/riscv-iommu-bits.h b/hw/riscv/riscv-iommu-bits.h
index 1017d73..47fe01b 100644
--- a/hw/riscv/riscv-iommu-bits.h
+++ b/hw/riscv/riscv-iommu-bits.h
@@ -79,6 +79,7 @@ struct riscv_iommu_pq_record {
#define RISCV_IOMMU_CAP_SV39 BIT_ULL(9)
#define RISCV_IOMMU_CAP_SV48 BIT_ULL(10)
#define RISCV_IOMMU_CAP_SV57 BIT_ULL(11)
+#define RISCV_IOMMU_CAP_SVRSW60T59B BIT_ULL(14)
#define RISCV_IOMMU_CAP_SV32X4 BIT_ULL(16)
#define RISCV_IOMMU_CAP_SV39X4 BIT_ULL(17)
#define RISCV_IOMMU_CAP_SV48X4 BIT_ULL(18)
diff --git a/hw/riscv/riscv-iommu-pci.c b/hw/riscv/riscv-iommu-pci.c
index 1f44eef..cdb4a7a 100644
--- a/hw/riscv/riscv-iommu-pci.c
+++ b/hw/riscv/riscv-iommu-pci.c
@@ -68,12 +68,6 @@ typedef struct RISCVIOMMUStatePci {
RISCVIOMMUState iommu; /* common IOMMU state */
} RISCVIOMMUStatePci;
-struct RISCVIOMMUPciClass {
- /*< public >*/
- DeviceRealize parent_realize;
- ResettablePhases parent_phases;
-};
-
/* interrupt delivery callback */
static void riscv_iommu_pci_notify(RISCVIOMMUState *iommu, unsigned vector)
{
diff --git a/hw/riscv/riscv-iommu-sys.c b/hw/riscv/riscv-iommu-sys.c
index 74e76b9..e34d00a 100644
--- a/hw/riscv/riscv-iommu-sys.c
+++ b/hw/riscv/riscv-iommu-sys.c
@@ -53,12 +53,6 @@ struct RISCVIOMMUStateSys {
uint8_t *msix_pba;
};
-struct RISCVIOMMUSysClass {
- /*< public >*/
- DeviceRealize parent_realize;
- ResettablePhases parent_phases;
-};
-
static uint64_t msix_table_mmio_read(void *opaque, hwaddr addr,
unsigned size)
{
diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c
index a877e5d..96a7fbd 100644
--- a/hw/riscv/riscv-iommu.c
+++ b/hw/riscv/riscv-iommu.c
@@ -1935,11 +1935,7 @@ static void riscv_iommu_process_dbg(RISCVIOMMUState *s)
iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10);
} else {
iova = iotlb.translated_addr & ~iotlb.addr_mask;
- iova >>= TARGET_PAGE_BITS;
- iova &= RISCV_IOMMU_TR_RESPONSE_PPN;
-
- /* We do not support superpages (> 4kbs) for now */
- iova &= ~RISCV_IOMMU_TR_RESPONSE_S;
+ iova = set_field(0, RISCV_IOMMU_TR_RESPONSE_PPN, PPN_DOWN(iova));
}
riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova);
}
@@ -2355,7 +2351,8 @@ static void riscv_iommu_realize(DeviceState *dev, Error **errp)
}
if (s->enable_g_stage) {
s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 |
- RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4;
+ RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4 |
+ RISCV_IOMMU_CAP_SVRSW60T59B;
}
if (s->hpm_cntrs > 0) {
diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c
index 1eef2fb..8b5683d 100644
--- a/hw/riscv/virt-acpi-build.c
+++ b/hw/riscv/virt-acpi-build.c
@@ -287,7 +287,7 @@ static void build_rhct(GArray *table_data,
uint32_t isa_offset, num_rhct_nodes, cmo_offset = 0;
RISCVCPU *cpu = &s->soc[0].harts[0];
uint32_t mmu_offset = 0;
- uint8_t satp_mode_max;
+ bool rv32 = riscv_cpu_is_32bit(cpu);
g_autofree char *isa = NULL;
AcpiTable table = { .sig = "RHCT", .rev = 1, .oem_id = s->oem_id,
@@ -307,7 +307,7 @@ static void build_rhct(GArray *table_data,
num_rhct_nodes++;
}
- if (cpu->cfg.satp_mode.supported != 0) {
+ if (!rv32 && cpu->cfg.max_satp_mode >= VM_1_10_SV39) {
num_rhct_nodes++;
}
@@ -367,22 +367,21 @@ static void build_rhct(GArray *table_data,
}
/* MMU node structure */
- if (cpu->cfg.satp_mode.supported != 0) {
- satp_mode_max = satp_mode_max_from_map(cpu->cfg.satp_mode.map);
+ if (!rv32 && cpu->cfg.max_satp_mode >= VM_1_10_SV39) {
mmu_offset = table_data->len - table.table_offset;
build_append_int_noprefix(table_data, 2, 2); /* Type */
build_append_int_noprefix(table_data, 8, 2); /* Length */
build_append_int_noprefix(table_data, 0x1, 2); /* Revision */
build_append_int_noprefix(table_data, 0, 1); /* Reserved */
/* MMU Type */
- if (satp_mode_max == VM_1_10_SV57) {
+ if (cpu->cfg.max_satp_mode == VM_1_10_SV57) {
build_append_int_noprefix(table_data, 2, 1); /* Sv57 */
- } else if (satp_mode_max == VM_1_10_SV48) {
+ } else if (cpu->cfg.max_satp_mode == VM_1_10_SV48) {
build_append_int_noprefix(table_data, 1, 1); /* Sv48 */
- } else if (satp_mode_max == VM_1_10_SV39) {
+ } else if (cpu->cfg.max_satp_mode == VM_1_10_SV39) {
build_append_int_noprefix(table_data, 0, 1); /* Sv39 */
} else {
- assert(1);
+ g_assert_not_reached();
}
}
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 0dcced1..47e573f 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -237,10 +237,10 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket,
uint32_t cpu_phandle;
MachineState *ms = MACHINE(s);
bool is_32_bit = riscv_is_32bit(&s->soc[0]);
- uint8_t satp_mode_max;
for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) {
RISCVCPU *cpu_ptr = &s->soc[socket].harts[cpu];
+ int8_t satp_mode_max = cpu_ptr->cfg.max_satp_mode;
g_autofree char *cpu_name = NULL;
g_autofree char *core_name = NULL;
g_autofree char *intc_name = NULL;
@@ -252,8 +252,7 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int socket,
s->soc[socket].hartid_base + cpu);
qemu_fdt_add_subnode(ms->fdt, cpu_name);
- if (cpu_ptr->cfg.satp_mode.supported != 0) {
- satp_mode_max = satp_mode_max_from_map(cpu_ptr->cfg.satp_mode.map);
+ if (satp_mode_max != -1) {
sv_name = g_strdup_printf("riscv,%s",
satp_mode_str(satp_mode_max, is_32_bit));
qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", sv_name);
@@ -312,8 +311,7 @@ static void create_fdt_socket_memory(RISCVVirtState *s, int socket)
size = riscv_socket_mem_size(ms, socket);
mem_name = g_strdup_printf("/memory@%"HWADDR_PRIx, addr);
qemu_fdt_add_subnode(ms->fdt, mem_name);
- qemu_fdt_setprop_cells(ms->fdt, mem_name, "reg",
- addr >> 32, addr, size >> 32, size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, mem_name, "reg", 2, addr, 2, size);
qemu_fdt_setprop_string(ms->fdt, mem_name, "device_type", "memory");
riscv_socket_fdt_write_id(ms, mem_name, socket);
}
@@ -325,7 +323,7 @@ static void create_fdt_socket_clint(RISCVVirtState *s,
int cpu;
g_autofree char *clint_name = NULL;
g_autofree uint32_t *clint_cells = NULL;
- unsigned long clint_addr;
+ hwaddr clint_addr;
MachineState *ms = MACHINE(s);
static const char * const clint_compat[2] = {
"sifive,clint0", "riscv,clint0"
@@ -341,14 +339,14 @@ static void create_fdt_socket_clint(RISCVVirtState *s,
}
clint_addr = s->memmap[VIRT_CLINT].base +
- (s->memmap[VIRT_CLINT].size * socket);
- clint_name = g_strdup_printf("/soc/clint@%lx", clint_addr);
+ s->memmap[VIRT_CLINT].size * socket;
+ clint_name = g_strdup_printf("/soc/clint@%"HWADDR_PRIx, clint_addr);
qemu_fdt_add_subnode(ms->fdt, clint_name);
qemu_fdt_setprop_string_array(ms->fdt, clint_name, "compatible",
(char **)&clint_compat,
ARRAY_SIZE(clint_compat));
- qemu_fdt_setprop_cells(ms->fdt, clint_name, "reg",
- 0x0, clint_addr, 0x0, s->memmap[VIRT_CLINT].size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, clint_name, "reg",
+ 2, clint_addr, 2, s->memmap[VIRT_CLINT].size);
qemu_fdt_setprop(ms->fdt, clint_name, "interrupts-extended",
clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4);
riscv_socket_fdt_write_id(ms, clint_name, socket);
@@ -389,8 +387,8 @@ static void create_fdt_socket_aclint(RISCVVirtState *s,
qemu_fdt_add_subnode(ms->fdt, name);
qemu_fdt_setprop_string(ms->fdt, name, "compatible",
"riscv,aclint-mswi");
- qemu_fdt_setprop_cells(ms->fdt, name, "reg",
- 0x0, addr, 0x0, RISCV_ACLINT_SWI_SIZE);
+ qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg",
+ 2, addr, 2, RISCV_ACLINT_SWI_SIZE);
qemu_fdt_setprop(ms->fdt, name, "interrupts-extended",
aclint_mswi_cells, aclint_cells_size);
qemu_fdt_setprop(ms->fdt, name, "interrupt-controller", NULL, 0);
@@ -412,11 +410,11 @@ static void create_fdt_socket_aclint(RISCVVirtState *s,
qemu_fdt_add_subnode(ms->fdt, name);
qemu_fdt_setprop_string(ms->fdt, name, "compatible",
"riscv,aclint-mtimer");
- qemu_fdt_setprop_cells(ms->fdt, name, "reg",
- 0x0, addr + RISCV_ACLINT_DEFAULT_MTIME,
- 0x0, size - RISCV_ACLINT_DEFAULT_MTIME,
- 0x0, addr + RISCV_ACLINT_DEFAULT_MTIMECMP,
- 0x0, RISCV_ACLINT_DEFAULT_MTIME);
+ qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg",
+ 2, addr + RISCV_ACLINT_DEFAULT_MTIME,
+ 2, size - RISCV_ACLINT_DEFAULT_MTIME,
+ 2, addr + RISCV_ACLINT_DEFAULT_MTIMECMP,
+ 2, RISCV_ACLINT_DEFAULT_MTIME);
qemu_fdt_setprop(ms->fdt, name, "interrupts-extended",
aclint_mtimer_cells, aclint_cells_size);
riscv_socket_fdt_write_id(ms, name, socket);
@@ -430,8 +428,8 @@ static void create_fdt_socket_aclint(RISCVVirtState *s,
qemu_fdt_add_subnode(ms->fdt, name);
qemu_fdt_setprop_string(ms->fdt, name, "compatible",
"riscv,aclint-sswi");
- qemu_fdt_setprop_cells(ms->fdt, name, "reg",
- 0x0, addr, 0x0, s->memmap[VIRT_ACLINT_SSWI].size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg",
+ 2, addr, 2, s->memmap[VIRT_ACLINT_SSWI].size);
qemu_fdt_setprop(ms->fdt, name, "interrupts-extended",
aclint_sswi_cells, aclint_cells_size);
qemu_fdt_setprop(ms->fdt, name, "interrupt-controller", NULL, 0);
@@ -495,8 +493,8 @@ static void create_fdt_socket_plic(RISCVVirtState *s,
s->soc[socket].num_harts * sizeof(uint32_t) * 4);
}
- qemu_fdt_setprop_cells(ms->fdt, plic_name, "reg",
- 0x0, plic_addr, 0x0, s->memmap[VIRT_PLIC].size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, plic_name, "reg",
+ 2, plic_addr, 2, s->memmap[VIRT_PLIC].size);
qemu_fdt_setprop_cell(ms->fdt, plic_name, "riscv,ndev",
VIRT_IRQCHIP_NUM_SOURCES - 1);
riscv_socket_fdt_write_id(ms, plic_name, socket);
@@ -657,8 +655,8 @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket,
qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle);
}
- qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg",
- 0x0, aplic_addr, 0x0, aplic_size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, aplic_name, "reg",
+ 2, aplic_addr, 2, aplic_size);
qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources",
VIRT_IRQCHIP_NUM_SOURCES);
@@ -858,9 +856,7 @@ static void create_fdt_virtio(RISCVVirtState *s, uint32_t irq_virtio_phandle)
qemu_fdt_add_subnode(ms->fdt, name);
qemu_fdt_setprop_string(ms->fdt, name, "compatible", "virtio,mmio");
- qemu_fdt_setprop_cells(ms->fdt, name, "reg",
- 0x0, addr,
- 0x0, size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", 2, addr, 2, size);
qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent",
irq_virtio_phandle);
if (s->aia_type == VIRT_AIA_TYPE_NONE) {
@@ -898,8 +894,8 @@ static void create_fdt_pcie(RISCVVirtState *s,
if (s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) {
qemu_fdt_setprop_cell(ms->fdt, name, "msi-parent", msi_pcie_phandle);
}
- qemu_fdt_setprop_cells(ms->fdt, name, "reg", 0,
- s->memmap[VIRT_PCIE_ECAM].base, 0, s->memmap[VIRT_PCIE_ECAM].size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg", 2,
+ s->memmap[VIRT_PCIE_ECAM].base, 2, s->memmap[VIRT_PCIE_ECAM].size);
qemu_fdt_setprop_sized_cells(ms->fdt, name, "ranges",
1, FDT_PCI_RANGE_IOPORT, 2, 0,
2, s->memmap[VIRT_PCIE_PIO].base, 2, s->memmap[VIRT_PCIE_PIO].size,
@@ -936,8 +932,9 @@ static void create_fdt_reset(RISCVVirtState *s, uint32_t *phandle)
qemu_fdt_setprop_string_array(ms->fdt, name, "compatible",
(char **)&compat, ARRAY_SIZE(compat));
}
- qemu_fdt_setprop_cells(ms->fdt, name, "reg",
- 0x0, s->memmap[VIRT_TEST].base, 0x0, s->memmap[VIRT_TEST].size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg",
+ 2, s->memmap[VIRT_TEST].base,
+ 2, s->memmap[VIRT_TEST].size);
qemu_fdt_setprop_cell(ms->fdt, name, "phandle", test_phandle);
test_phandle = qemu_fdt_get_phandle(ms->fdt, name);
g_free(name);
@@ -969,9 +966,9 @@ static void create_fdt_uart(RISCVVirtState *s,
s->memmap[VIRT_UART0].base);
qemu_fdt_add_subnode(ms->fdt, name);
qemu_fdt_setprop_string(ms->fdt, name, "compatible", "ns16550a");
- qemu_fdt_setprop_cells(ms->fdt, name, "reg",
- 0x0, s->memmap[VIRT_UART0].base,
- 0x0, s->memmap[VIRT_UART0].size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg",
+ 2, s->memmap[VIRT_UART0].base,
+ 2, s->memmap[VIRT_UART0].size);
qemu_fdt_setprop_cell(ms->fdt, name, "clock-frequency", 3686400);
qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent", irq_mmio_phandle);
if (s->aia_type == VIRT_AIA_TYPE_NONE) {
@@ -995,8 +992,9 @@ static void create_fdt_rtc(RISCVVirtState *s,
qemu_fdt_add_subnode(ms->fdt, name);
qemu_fdt_setprop_string(ms->fdt, name, "compatible",
"google,goldfish-rtc");
- qemu_fdt_setprop_cells(ms->fdt, name, "reg",
- 0x0, s->memmap[VIRT_RTC].base, 0x0, s->memmap[VIRT_RTC].size);
+ qemu_fdt_setprop_sized_cells(ms->fdt, name, "reg",
+ 2, s->memmap[VIRT_RTC].base,
+ 2, s->memmap[VIRT_RTC].size);
qemu_fdt_setprop_cell(ms->fdt, name, "interrupt-parent",
irq_mmio_phandle);
if (s->aia_type == VIRT_AIA_TYPE_NONE) {
@@ -1090,8 +1088,7 @@ static void create_fdt_iommu_sys(RISCVVirtState *s, uint32_t irq_chip,
qemu_fdt_setprop_cell(fdt, iommu_node, "#iommu-cells", 1);
qemu_fdt_setprop_cell(fdt, iommu_node, "phandle", iommu_phandle);
- qemu_fdt_setprop_cells(fdt, iommu_node, "reg",
- addr >> 32, addr, size >> 32, size);
+ qemu_fdt_setprop_sized_cells(fdt, iommu_node, "reg", 2, addr, 2, size);
qemu_fdt_setprop_cell(fdt, iommu_node, "interrupt-parent", irq_chip);
qemu_fdt_setprop_cells(fdt, iommu_node, "interrupts",
diff --git a/hw/riscv/xiangshan_kmh.c b/hw/riscv/xiangshan_kmh.c
new file mode 100644
index 0000000..a95fd61
--- /dev/null
+++ b/hw/riscv/xiangshan_kmh.c
@@ -0,0 +1,220 @@
+/*
+ * QEMU RISC-V Board Compatible with the Xiangshan Kunminghu
+ * FPGA prototype platform
+ *
+ * Copyright (c) 2025 Beijing Institute of Open Source Chip (BOSC)
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Provides a board compatible with the Xiangshan Kunminghu
+ * FPGA prototype platform:
+ *
+ * 0) UART (16550A)
+ * 1) CLINT (Core-Local Interruptor)
+ * 2) IMSIC (Incoming MSI Controller)
+ * 3) APLIC (Advanced Platform-Level Interrupt Controller)
+ *
+ * More information can be found in our Github repository:
+ * https://github.com/OpenXiangShan/XiangShan
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "system/address-spaces.h"
+#include "hw/boards.h"
+#include "hw/char/serial-mm.h"
+#include "hw/intc/riscv_aclint.h"
+#include "hw/intc/riscv_aplic.h"
+#include "hw/intc/riscv_imsic.h"
+#include "hw/qdev-properties.h"
+#include "hw/riscv/boot.h"
+#include "hw/riscv/xiangshan_kmh.h"
+#include "hw/riscv/riscv_hart.h"
+#include "system/system.h"
+
+static const MemMapEntry xiangshan_kmh_memmap[] = {
+ [XIANGSHAN_KMH_ROM] = { 0x1000, 0xF000 },
+ [XIANGSHAN_KMH_UART0] = { 0x310B0000, 0x10000 },
+ [XIANGSHAN_KMH_CLINT] = { 0x38000000, 0x10000 },
+ [XIANGSHAN_KMH_APLIC_M] = { 0x31100000, 0x4000 },
+ [XIANGSHAN_KMH_APLIC_S] = { 0x31120000, 0x4000 },
+ [XIANGSHAN_KMH_IMSIC_M] = { 0x3A800000, 0x10000 },
+ [XIANGSHAN_KMH_IMSIC_S] = { 0x3B000000, 0x80000 },
+ [XIANGSHAN_KMH_DRAM] = { 0x80000000, 0x0 },
+};
+
+static DeviceState *xiangshan_kmh_create_aia(uint32_t num_harts)
+{
+ int i;
+ const MemMapEntry *memmap = xiangshan_kmh_memmap;
+ hwaddr addr = 0;
+ DeviceState *aplic_m = NULL;
+
+ /* M-level IMSICs */
+ addr = memmap[XIANGSHAN_KMH_IMSIC_M].base;
+ for (i = 0; i < num_harts; i++) {
+ riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0), i, true,
+ 1, XIANGSHAN_KMH_IMSIC_NUM_IDS);
+ }
+
+ /* S-level IMSICs */
+ addr = memmap[XIANGSHAN_KMH_IMSIC_S].base;
+ for (i = 0; i < num_harts; i++) {
+ riscv_imsic_create(addr +
+ i * IMSIC_HART_SIZE(XIANGSHAN_KMH_IMSIC_GUEST_BITS),
+ i, false, 1 + XIANGSHAN_KMH_IMSIC_GUEST_BITS,
+ XIANGSHAN_KMH_IMSIC_NUM_IDS);
+ }
+
+ /* M-level APLIC */
+ aplic_m = riscv_aplic_create(memmap[XIANGSHAN_KMH_APLIC_M].base,
+ memmap[XIANGSHAN_KMH_APLIC_M].size,
+ 0, 0, XIANGSHAN_KMH_APLIC_NUM_SOURCES,
+ 1, true, true, NULL);
+
+ /* S-level APLIC */
+ riscv_aplic_create(memmap[XIANGSHAN_KMH_APLIC_S].base,
+ memmap[XIANGSHAN_KMH_APLIC_S].size,
+ 0, 0, XIANGSHAN_KMH_APLIC_NUM_SOURCES,
+ 1, true, false, aplic_m);
+
+ return aplic_m;
+}
+
+static void xiangshan_kmh_soc_realize(DeviceState *dev, Error **errp)
+{
+ MachineState *ms = MACHINE(qdev_get_machine());
+ XiangshanKmhSoCState *s = XIANGSHAN_KMH_SOC(dev);
+ const MemMapEntry *memmap = xiangshan_kmh_memmap;
+ MemoryRegion *system_memory = get_system_memory();
+ uint32_t num_harts = ms->smp.cpus;
+
+ qdev_prop_set_uint32(DEVICE(&s->cpus), "num-harts", num_harts);
+ qdev_prop_set_uint32(DEVICE(&s->cpus), "hartid-base", 0);
+ qdev_prop_set_string(DEVICE(&s->cpus), "cpu-type",
+ TYPE_RISCV_CPU_XIANGSHAN_KMH);
+ sysbus_realize(SYS_BUS_DEVICE(&s->cpus), &error_fatal);
+
+ /* AIA */
+ s->irqchip = xiangshan_kmh_create_aia(num_harts);
+
+ /* UART */
+ serial_mm_init(system_memory, memmap[XIANGSHAN_KMH_UART0].base, 2,
+ qdev_get_gpio_in(s->irqchip, XIANGSHAN_KMH_UART0_IRQ),
+ 115200, serial_hd(0), DEVICE_LITTLE_ENDIAN);
+
+ /* CLINT */
+ riscv_aclint_swi_create(memmap[XIANGSHAN_KMH_CLINT].base,
+ 0, num_harts, false);
+ riscv_aclint_mtimer_create(memmap[XIANGSHAN_KMH_CLINT].base +
+ RISCV_ACLINT_SWI_SIZE,
+ RISCV_ACLINT_DEFAULT_MTIMER_SIZE,
+ 0, num_harts, RISCV_ACLINT_DEFAULT_MTIMECMP,
+ RISCV_ACLINT_DEFAULT_MTIME,
+ XIANGSHAN_KMH_CLINT_TIMEBASE_FREQ, true);
+
+ /* ROM */
+ memory_region_init_rom(&s->rom, OBJECT(dev), "xiangshan.kunminghu.rom",
+ memmap[XIANGSHAN_KMH_ROM].size, &error_fatal);
+ memory_region_add_subregion(system_memory,
+ memmap[XIANGSHAN_KMH_ROM].base, &s->rom);
+}
+
+static void xiangshan_kmh_soc_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->realize = xiangshan_kmh_soc_realize;
+ dc->user_creatable = false;
+}
+
+static void xiangshan_kmh_soc_instance_init(Object *obj)
+{
+ XiangshanKmhSoCState *s = XIANGSHAN_KMH_SOC(obj);
+
+ object_initialize_child(obj, "cpus", &s->cpus, TYPE_RISCV_HART_ARRAY);
+}
+
+static const TypeInfo xiangshan_kmh_soc_info = {
+ .name = TYPE_XIANGSHAN_KMH_SOC,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(XiangshanKmhSoCState),
+ .instance_init = xiangshan_kmh_soc_instance_init,
+ .class_init = xiangshan_kmh_soc_class_init,
+};
+
+static void xiangshan_kmh_soc_register_types(void)
+{
+ type_register_static(&xiangshan_kmh_soc_info);
+}
+type_init(xiangshan_kmh_soc_register_types)
+
+static void xiangshan_kmh_machine_init(MachineState *machine)
+{
+ XiangshanKmhState *s = XIANGSHAN_KMH_MACHINE(machine);
+ const MemMapEntry *memmap = xiangshan_kmh_memmap;
+ MemoryRegion *system_memory = get_system_memory();
+ hwaddr start_addr = memmap[XIANGSHAN_KMH_DRAM].base;
+
+ /* Initialize SoC */
+ object_initialize_child(OBJECT(machine), "soc", &s->soc,
+ TYPE_XIANGSHAN_KMH_SOC);
+ qdev_realize(DEVICE(&s->soc), NULL, &error_fatal);
+
+ /* Register RAM */
+ memory_region_add_subregion(system_memory,
+ memmap[XIANGSHAN_KMH_DRAM].base,
+ machine->ram);
+
+ /* ROM reset vector */
+ riscv_setup_rom_reset_vec(machine, &s->soc.cpus,
+ start_addr,
+ memmap[XIANGSHAN_KMH_ROM].base,
+ memmap[XIANGSHAN_KMH_ROM].size, 0, 0);
+ if (machine->firmware) {
+ riscv_load_firmware(machine->firmware, &start_addr, NULL);
+ }
+
+ /* Note: dtb has been integrated into firmware(OpenSBI) when compiling */
+}
+
+static void xiangshan_kmh_machine_class_init(ObjectClass *klass, const void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(klass);
+ static const char *const valid_cpu_types[] = {
+ TYPE_RISCV_CPU_XIANGSHAN_KMH,
+ NULL
+ };
+
+ mc->desc = "RISC-V Board compatible with the Xiangshan " \
+ "Kunminghu FPGA prototype platform";
+ mc->init = xiangshan_kmh_machine_init;
+ mc->max_cpus = XIANGSHAN_KMH_MAX_CPUS;
+ mc->default_cpu_type = TYPE_RISCV_CPU_XIANGSHAN_KMH;
+ mc->valid_cpu_types = valid_cpu_types;
+ mc->default_ram_id = "xiangshan.kunminghu.ram";
+}
+
+static const TypeInfo xiangshan_kmh_machine_info = {
+ .name = TYPE_XIANGSHAN_KMH_MACHINE,
+ .parent = TYPE_MACHINE,
+ .instance_size = sizeof(XiangshanKmhState),
+ .class_init = xiangshan_kmh_machine_class_init,
+};
+
+static void xiangshan_kmh_machine_register_types(void)
+{
+ type_register_static(&xiangshan_kmh_machine_info);
+}
+type_init(xiangshan_kmh_machine_register_types)
diff --git a/hw/s390x/ap-stub.c b/hw/s390x/ap-stub.c
new file mode 100644
index 0000000..001fe5f
--- /dev/null
+++ b/hw/s390x/ap-stub.c
@@ -0,0 +1,21 @@
+/*
+ * VFIO based AP matrix device assignment
+ *
+ * Copyright 2025 IBM Corp.
+ * Author(s): Rorie Reyes <rreyes@linux.ibm.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/s390x/ap-bridge.h"
+
+int ap_chsc_sei_nt0_get_event(void *res)
+{
+ return EVENT_INFORMATION_NOT_STORED;
+}
+
+bool ap_chsc_sei_nt0_have_event(void)
+{
+ return false;
+}
diff --git a/hw/s390x/ccw-device.c b/hw/s390x/ccw-device.c
index 19c2238..8be1813 100644
--- a/hw/s390x/ccw-device.c
+++ b/hw/s390x/ccw-device.c
@@ -57,7 +57,7 @@ static void ccw_device_set_loadparm(Object *obj, Visitor *v,
Error **errp)
{
CcwDevice *dev = CCW_DEVICE(obj);
- char *val;
+ g_autofree char *val = NULL;
int index;
index = object_property_get_int(obj, "bootindex", NULL);
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
index 7d4e1f5..b513f89 100644
--- a/hw/s390x/cpu-topology.c
+++ b/hw/s390x/cpu-topology.c
@@ -23,8 +23,8 @@
#include "target/s390x/cpu.h"
#include "hw/s390x/s390-virtio-ccw.h"
#include "hw/s390x/cpu-topology.h"
-#include "qapi/qapi-commands-machine-target.h"
-#include "qapi/qapi-events-machine-target.h"
+#include "qapi/qapi-commands-machine-s390x.h"
+#include "qapi/qapi-events-machine-s390x.h"
/*
* s390_topology is used to keep the topology information.
diff --git a/hw/s390x/event-facility.c b/hw/s390x/event-facility.c
index 7b7bf23..fee286e 100644
--- a/hw/s390x/event-facility.c
+++ b/hw/s390x/event-facility.c
@@ -4,6 +4,7 @@
* handles SCLP event types
* - Signal Quiesce - system power down
* - ASCII Console Data - VT220 read and write
+ * - Control-Program Identification - Send OS data from guest to host
*
* Copyright IBM, Corp. 2012
*
@@ -40,6 +41,7 @@ struct SCLPEventFacility {
SysBusDevice parent_obj;
SCLPEventsBus sbus;
SCLPEvent quiesce, cpu_hotplug;
+ SCLPEventCPI cpi;
/* guest's receive mask */
union {
uint32_t receive_mask_pieces[2];
diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build
index 3bbebfd..1bc8583 100644
--- a/hw/s390x/meson.build
+++ b/hw/s390x/meson.build
@@ -13,6 +13,7 @@ s390x_ss.add(files(
's390-skeys.c',
's390-stattrib.c',
'sclp.c',
+ 'sclpcpi.c',
'sclpcpu.c',
'sclpquiesce.c',
'tod.c',
@@ -33,6 +34,7 @@ s390x_ss.add(when: 'CONFIG_S390_CCW_VIRTIO', if_true: files(
))
s390x_ss.add(when: 'CONFIG_TERMINAL3270', if_true: files('3270-ccw.c'))
s390x_ss.add(when: 'CONFIG_VFIO', if_true: files('s390-pci-vfio.c'))
+s390x_ss.add(when: 'CONFIG_VFIO_AP', if_false: files('ap-stub.c'))
virtio_ss = ss.source_set()
virtio_ss.add(files('virtio-ccw.c'))
diff --git a/hw/s390x/s390-skeys.c b/hw/s390x/s390-skeys.c
index aedb62b..8eeecfd 100644
--- a/hw/s390x/s390-skeys.c
+++ b/hw/s390x/s390-skeys.c
@@ -17,7 +17,6 @@
#include "hw/s390x/storage-keys.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-machine.h"
-#include "qapi/qapi-commands-misc-target.h"
#include "qobject/qdict.h"
#include "qemu/error-report.h"
#include "system/memory_mapping.h"
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index f69a4d8..a79bd13 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -260,9 +260,21 @@ static void s390_create_sclpconsole(SCLPDevice *sclp,
qdev_realize_and_unref(dev, ev_fac_bus, &error_fatal);
}
+static void s390_create_sclpcpi(SCLPDevice *sclp)
+{
+ SCLPEventFacility *ef = sclp->event_facility;
+ BusState *ev_fac_bus = sclp_get_event_facility_bus(ef);
+ DeviceState *dev;
+
+ dev = qdev_new(TYPE_SCLP_EVENT_CPI);
+ object_property_add_child(OBJECT(ef), "sclpcpi", OBJECT(dev));
+ qdev_realize_and_unref(dev, ev_fac_bus, &error_fatal);
+}
+
static void ccw_init(MachineState *machine)
{
MachineClass *mc = MACHINE_GET_CLASS(machine);
+ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc);
S390CcwMachineState *ms = S390_CCW_MACHINE(machine);
int ret;
VirtualCssBus *css_bus;
@@ -323,6 +335,12 @@ static void ccw_init(MachineState *machine)
/* init the TOD clock */
s390_init_tod();
+
+ /* init SCLP event Control-Program Identification */
+ if (s390mc->use_cpi) {
+ s390_create_sclpcpi(ms->sclp);
+ }
+
}
static void s390_cpu_plug(HotplugHandler *hotplug_dev,
@@ -783,6 +801,7 @@ static void ccw_machine_class_init(ObjectClass *oc, const void *data)
DumpSKeysInterface *dsi = DUMP_SKEYS_INTERFACE_CLASS(oc);
s390mc->max_threads = 1;
+ s390mc->use_cpi = true;
mc->reset = s390_machine_reset;
mc->block_default_type = IF_VIRTIO;
mc->no_cdrom = 1;
@@ -908,6 +927,9 @@ static void ccw_machine_10_0_instance_options(MachineState *machine)
static void ccw_machine_10_0_class_options(MachineClass *mc)
{
+ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc);
+ s390mc->use_cpi = false;
+
ccw_machine_10_1_class_options(mc);
compat_props_add(mc->compat_props, hw_compat_10_0, hw_compat_10_0_len);
}
@@ -1145,20 +1167,6 @@ static void ccw_machine_4_2_class_options(MachineClass *mc)
}
DEFINE_CCW_MACHINE(4, 2);
-static void ccw_machine_4_1_instance_options(MachineState *machine)
-{
- static const S390FeatInit qemu_cpu_feat = { S390_FEAT_LIST_QEMU_V4_1 };
- ccw_machine_4_2_instance_options(machine);
- s390_set_qemu_cpu_model(0x2964, 13, 2, qemu_cpu_feat);
-}
-
-static void ccw_machine_4_1_class_options(MachineClass *mc)
-{
- ccw_machine_4_2_class_options(mc);
- compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
-}
-DEFINE_CCW_MACHINE(4, 1);
-
static void ccw_machine_register_types(void)
{
type_register_static(&ccw_machine_info);
diff --git a/hw/s390x/sclpcpi.c b/hw/s390x/sclpcpi.c
new file mode 100644
index 0000000..7aa039d
--- /dev/null
+++ b/hw/s390x/sclpcpi.c
@@ -0,0 +1,212 @@
+ /*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * SCLP event type 11 - Control-Program Identification (CPI):
+ * CPI is used to send program identifiers from the guest to the
+ * Service-Call Logical Processor (SCLP). It is not sent by the SCLP.
+ *
+ * Control-program identifiers provide data about the guest operating
+ * system. The control-program identifiers are: system type, system name,
+ * system level and sysplex name.
+ *
+ * In Linux, all the control-program identifiers are user configurable. The
+ * system type, system name, and sysplex name use EBCDIC characters from
+ * this set: capital A-Z, 0-9, $, @, #, and blank. In Linux, the system
+ * type, system name and sysplex name are arbitrary free-form texts.
+ *
+ * In Linux, the 8-byte hexadecimal system-level has the format
+ * 0x<a><b><cc><dd><eeee><ff><gg><hh>, where:
+ * <a>: is a 4-bit digit, its most significant bit indicates hypervisor use
+ * <b>: is one digit that represents Linux distributions as follows
+ * 0: generic Linux
+ * 1: Red Hat Enterprise Linux
+ * 2: SUSE Linux Enterprise Server
+ * 3: Canonical Ubuntu
+ * 4: Fedora
+ * 5: openSUSE Leap
+ * 6: Debian GNU/Linux
+ * 7: Red Hat Enterprise Linux CoreOS
+ * <cc>: are two digits for a distribution-specific encoding of the major
+ * version of the distribution
+ * <dd>: are two digits for a distribution-specific encoding of the minor
+ * version of the distribution
+ * <eeee>: are four digits for the patch level of the distribution
+ * <ff>: are two digits for the major version of the kernel
+ * <gg>: are two digits for the minor version of the kernel
+ * <hh>: are two digits for the stable version of the kernel
+ * (e.g. 74872343805430528, when converted to hex is 0x010a000000060b00). On
+ * machines prior to z16, some of the values are not available to display.
+ *
+ * Sysplex refers to a cluster of logical partitions that communicates and
+ * co-operates with each other.
+ *
+ * The CPI feature is supported since 10.1.
+ *
+ * Copyright IBM, Corp. 2024
+ *
+ * Authors:
+ * Shalini Chellathurai Saroja <shalini@linux.ibm.com>
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/timer.h"
+#include "hw/s390x/event-facility.h"
+#include "hw/s390x/ebcdic.h"
+#include "qapi/qapi-visit-machine.h"
+#include "migration/vmstate.h"
+
+typedef struct Data {
+ uint8_t id_format;
+ uint8_t reserved0;
+ uint8_t system_type[8];
+ uint64_t reserved1;
+ uint8_t system_name[8];
+ uint64_t reserved2;
+ uint64_t system_level;
+ uint64_t reserved3;
+ uint8_t sysplex_name[8];
+ uint8_t reserved4[16];
+} QEMU_PACKED Data;
+
+typedef struct ControlProgramIdMsg {
+ EventBufferHeader ebh;
+ Data data;
+} QEMU_PACKED ControlProgramIdMsg;
+
+static bool can_handle_event(uint8_t type)
+{
+ return type == SCLP_EVENT_CTRL_PGM_ID;
+}
+
+static sccb_mask_t send_mask(void)
+{
+ return 0;
+}
+
+/* Enable SCLP to accept buffers of event type CPI from the control-program. */
+static sccb_mask_t receive_mask(void)
+{
+ return SCLP_EVENT_MASK_CTRL_PGM_ID;
+}
+
+static int write_event_data(SCLPEvent *event, EventBufferHeader *evt_buf_hdr)
+{
+ ControlProgramIdMsg *cpim = container_of(evt_buf_hdr, ControlProgramIdMsg,
+ ebh);
+ SCLPEventCPI *e = SCLP_EVENT_CPI(event);
+
+ ascii_put(e->system_type, (char *)cpim->data.system_type,
+ sizeof(cpim->data.system_type));
+ ascii_put(e->system_name, (char *)cpim->data.system_name,
+ sizeof(cpim->data.system_name));
+ ascii_put(e->sysplex_name, (char *)cpim->data.sysplex_name,
+ sizeof(cpim->data.sysplex_name));
+ e->system_level = ldq_be_p(&cpim->data.system_level);
+ e->timestamp = qemu_clock_get_ns(QEMU_CLOCK_HOST);
+
+ cpim->ebh.flags = SCLP_EVENT_BUFFER_ACCEPTED;
+ return SCLP_RC_NORMAL_COMPLETION;
+}
+
+static char *get_system_type(Object *obj, Error **errp)
+{
+ SCLPEventCPI *e = SCLP_EVENT_CPI(obj);
+
+ return g_strndup((char *) e->system_type, sizeof(e->system_type));
+}
+
+static char *get_system_name(Object *obj, Error **errp)
+{
+ SCLPEventCPI *e = SCLP_EVENT_CPI(obj);
+
+ return g_strndup((char *) e->system_name, sizeof(e->system_name));
+}
+
+static char *get_sysplex_name(Object *obj, Error **errp)
+{
+ SCLPEventCPI *e = SCLP_EVENT_CPI(obj);
+
+ return g_strndup((char *) e->sysplex_name, sizeof(e->sysplex_name));
+}
+
+static void get_system_level(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ SCLPEventCPI *e = SCLP_EVENT_CPI(obj);
+
+ visit_type_uint64(v, name, &e->system_level, errp);
+}
+
+static void get_timestamp(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ SCLPEventCPI *e = SCLP_EVENT_CPI(obj);
+
+ visit_type_uint64(v, name, &e->timestamp, errp);
+}
+
+static const VMStateDescription vmstate_sclpcpi = {
+ .name = "s390_control_program_id",
+ .version_id = 0,
+ .fields = (const VMStateField[]) {
+ VMSTATE_UINT8_ARRAY(system_type, SCLPEventCPI, 8),
+ VMSTATE_UINT8_ARRAY(system_name, SCLPEventCPI, 8),
+ VMSTATE_UINT64(system_level, SCLPEventCPI),
+ VMSTATE_UINT8_ARRAY(sysplex_name, SCLPEventCPI, 8),
+ VMSTATE_UINT64(timestamp, SCLPEventCPI),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void cpi_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ SCLPEventClass *k = SCLP_EVENT_CLASS(klass);
+
+ dc->user_creatable = false;
+ dc->vmsd = &vmstate_sclpcpi;
+
+ k->can_handle_event = can_handle_event;
+ k->get_send_mask = send_mask;
+ k->get_receive_mask = receive_mask;
+ k->write_event_data = write_event_data;
+
+ object_class_property_add_str(klass, "system_type", get_system_type, NULL);
+ object_class_property_set_description(klass, "system_type",
+ "operating system e.g. \"LINUX \"");
+
+ object_class_property_add_str(klass, "system_name", get_system_name, NULL);
+ object_class_property_set_description(klass, "system_name",
+ "user configurable name of the VM e.g. \"TESTVM \"");
+
+ object_class_property_add_str(klass, "sysplex_name", get_sysplex_name,
+ NULL);
+ object_class_property_set_description(klass, "sysplex_name",
+ "name of the cluster which the VM belongs to, if any"
+ " e.g. \"PLEX \"");
+
+ object_class_property_add(klass, "system_level", "uint64", get_system_level,
+ NULL, NULL, NULL);
+ object_class_property_set_description(klass, "system_level",
+ "distribution and kernel version in Linux e.g. 74872343805430528");
+
+ object_class_property_add(klass, "timestamp", "uint64", get_timestamp,
+ NULL, NULL, NULL);
+ object_class_property_set_description(klass, "timestamp",
+ "latest update of CPI data in nanoseconds since the UNIX EPOCH");
+}
+
+static const TypeInfo sclp_cpi_info = {
+ .name = TYPE_SCLP_EVENT_CPI,
+ .parent = TYPE_SCLP_EVENT,
+ .instance_size = sizeof(SCLPEventCPI),
+ .class_init = cpi_class_init,
+};
+
+static void sclp_cpi_register_types(void)
+{
+ type_register_static(&sclp_cpi_info);
+}
+
+type_init(sclp_cpi_register_types)
diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
index f4f2ef3..9ea4aa0 100644
--- a/hw/scsi/lsi53c895a.c
+++ b/hw/scsi/lsi53c895a.c
@@ -1112,7 +1112,7 @@ bad:
static void lsi_memcpy(LSIState *s, uint32_t dest, uint32_t src, int count)
{
int n;
- uint8_t buf[LSI_BUF_SIZE];
+ QEMU_UNINITIALIZED uint8_t buf[LSI_BUF_SIZE];
trace_lsi_memcpy(dest, src, count);
while (count) {
diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index 55cd188..844643d 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -981,13 +981,11 @@ static int megasas_event_wait(MegasasState *s, MegasasCmd *cmd)
static int megasas_dcmd_pd_get_list(MegasasState *s, MegasasCmd *cmd)
{
- struct mfi_pd_list info;
- size_t dcmd_size = sizeof(info);
+ struct mfi_pd_list info = {};
BusChild *kid;
uint32_t offset, dcmd_limit, num_pd_disks = 0, max_pd_disks;
dma_addr_t residual;
- memset(&info, 0, dcmd_size);
offset = 8;
dcmd_limit = offset + sizeof(struct mfi_pd_address);
if (cmd->iov_size < dcmd_limit) {
@@ -1429,11 +1427,10 @@ static int megasas_dcmd_cfg_read(MegasasState *s, MegasasCmd *cmd)
static int megasas_dcmd_get_properties(MegasasState *s, MegasasCmd *cmd)
{
- struct mfi_ctrl_props info;
+ struct mfi_ctrl_props info = {};
size_t dcmd_size = sizeof(info);
dma_addr_t residual;
- memset(&info, 0x0, dcmd_size);
if (cmd->iov_size < dcmd_size) {
trace_megasas_dcmd_invalid_xfer_len(cmd->index, cmd->iov_size,
dcmd_size);
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index cb4af1b..b4782c6 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -74,7 +74,7 @@ struct SCSIDiskClass {
*/
DMAIOFunc *dma_readv;
DMAIOFunc *dma_writev;
- bool (*need_fua_emulation)(SCSICommand *cmd);
+ bool (*need_fua)(SCSICommand *cmd);
void (*update_sense)(SCSIRequest *r);
};
@@ -85,7 +85,7 @@ typedef struct SCSIDiskReq {
uint32_t sector_count;
uint32_t buflen;
bool started;
- bool need_fua_emulation;
+ bool need_fua;
struct iovec iov;
QEMUIOVector qiov;
BlockAcctCookie acct;
@@ -389,24 +389,6 @@ static bool scsi_is_cmd_fua(SCSICommand *cmd)
}
}
-static void scsi_write_do_fua(SCSIDiskReq *r)
-{
- SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
-
- assert(r->req.aiocb == NULL);
- assert(!r->req.io_canceled);
-
- if (r->need_fua_emulation) {
- block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
- BLOCK_ACCT_FLUSH);
- r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_aio_complete, r);
- return;
- }
-
- scsi_req_complete(&r->req, GOOD);
- scsi_req_unref(&r->req);
-}
-
static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret)
{
assert(r->req.aiocb == NULL);
@@ -416,12 +398,7 @@ static void scsi_dma_complete_noio(SCSIDiskReq *r, int ret)
r->sector += r->sector_count;
r->sector_count = 0;
- if (r->req.cmd.mode == SCSI_XFER_TO_DEV) {
- scsi_write_do_fua(r);
- return;
- } else {
- scsi_req_complete(&r->req, GOOD);
- }
+ scsi_req_complete(&r->req, GOOD);
done:
scsi_req_unref(&r->req);
@@ -564,7 +541,7 @@ static void scsi_read_data(SCSIRequest *req)
first = !r->started;
r->started = true;
- if (first && r->need_fua_emulation) {
+ if (first && r->need_fua) {
block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
BLOCK_ACCT_FLUSH);
r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, scsi_do_read_cb, r);
@@ -589,8 +566,7 @@ static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
r->sector += n;
r->sector_count -= n;
if (r->sector_count == 0) {
- scsi_write_do_fua(r);
- return;
+ scsi_req_complete(&r->req, GOOD);
} else {
scsi_init_iovec(r, SCSI_DMA_BUF_SIZE);
trace_scsi_disk_write_complete_noio(r->req.tag, r->qiov.size);
@@ -623,6 +599,7 @@ static void scsi_write_data(SCSIRequest *req)
SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s));
+ BlockCompletionFunc *cb;
/* No data transfer may already be in progress */
assert(r->req.aiocb == NULL);
@@ -648,11 +625,10 @@ static void scsi_write_data(SCSIRequest *req)
if (r->req.cmd.buf[0] == VERIFY_10 || r->req.cmd.buf[0] == VERIFY_12 ||
r->req.cmd.buf[0] == VERIFY_16) {
- if (r->req.sg) {
- scsi_dma_complete_noio(r, 0);
- } else {
- scsi_write_complete_noio(r, 0);
- }
+ block_acct_start(blk_get_stats(s->qdev.conf.blk), &r->acct, 0,
+ BLOCK_ACCT_FLUSH);
+ cb = r->req.sg ? scsi_dma_complete : scsi_write_complete;
+ r->req.aiocb = blk_aio_flush(s->qdev.conf.blk, cb, r);
return;
}
@@ -2391,7 +2367,7 @@ static int32_t scsi_disk_dma_command(SCSIRequest *req, uint8_t *buf)
scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
return 0;
}
- r->need_fua_emulation = sdc->need_fua_emulation(&r->req.cmd);
+ r->need_fua = sdc->need_fua(&r->req.cmd);
if (r->sector_count == 0) {
scsi_req_complete(&r->req, GOOD);
}
@@ -3137,7 +3113,8 @@ BlockAIOCB *scsi_dma_writev(int64_t offset, QEMUIOVector *iov,
{
SCSIDiskReq *r = opaque;
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
- return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, 0, cb, cb_opaque);
+ int flags = r->need_fua ? BDRV_REQ_FUA : 0;
+ return blk_aio_pwritev(s->qdev.conf.blk, offset, iov, flags, cb, cb_opaque);
}
static char *scsi_property_get_loadparm(Object *obj, Error **errp)
@@ -3186,7 +3163,7 @@ static void scsi_disk_base_class_initfn(ObjectClass *klass, const void *data)
device_class_set_legacy_reset(dc, scsi_disk_reset);
sdc->dma_readv = scsi_dma_readv;
sdc->dma_writev = scsi_dma_writev;
- sdc->need_fua_emulation = scsi_is_cmd_fua;
+ sdc->need_fua = scsi_is_cmd_fua;
}
static const TypeInfo scsi_disk_base_info = {
@@ -3215,7 +3192,7 @@ static const Property scsi_hd_properties[] = {
DEFINE_PROP_BIT("removable", SCSIDiskState, features,
SCSI_DISK_F_REMOVABLE, false),
DEFINE_PROP_BIT("dpofua", SCSIDiskState, features,
- SCSI_DISK_F_DPOFUA, false),
+ SCSI_DISK_F_DPOFUA, true),
DEFINE_PROP_UINT64("wwn", SCSIDiskState, qdev.wwn, 0),
DEFINE_PROP_UINT64("port_wwn", SCSIDiskState, qdev.port_wwn, 0),
DEFINE_PROP_UINT16("port_index", SCSIDiskState, port_index, 0),
@@ -3338,7 +3315,7 @@ static void scsi_block_class_initfn(ObjectClass *klass, const void *data)
sdc->dma_readv = scsi_block_dma_readv;
sdc->dma_writev = scsi_block_dma_writev;
sdc->update_sense = scsi_block_update_sense;
- sdc->need_fua_emulation = scsi_block_no_fua;
+ sdc->need_fua = scsi_block_no_fua;
dc->desc = "SCSI block device passthrough";
device_class_set_props(dc, scsi_block_properties);
dc->vmsd = &vmstate_scsi_disk_state;
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
index d5825b6..7c98b1b 100644
--- a/hw/scsi/vmw_pvscsi.c
+++ b/hw/scsi/vmw_pvscsi.c
@@ -68,18 +68,7 @@ struct PVSCSIClass {
OBJECT_DECLARE_TYPE(PVSCSIState, PVSCSIClass, PVSCSI)
-/* Compatibility flags for migration */
-#define PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT 0
-#define PVSCSI_COMPAT_OLD_PCI_CONFIGURATION \
- (1 << PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT)
-#define PVSCSI_COMPAT_DISABLE_PCIE_BIT 1
-#define PVSCSI_COMPAT_DISABLE_PCIE \
- (1 << PVSCSI_COMPAT_DISABLE_PCIE_BIT)
-
-#define PVSCSI_USE_OLD_PCI_CONFIGURATION(s) \
- ((s)->compat_flags & PVSCSI_COMPAT_OLD_PCI_CONFIGURATION)
-#define PVSCSI_MSI_OFFSET(s) \
- (PVSCSI_USE_OLD_PCI_CONFIGURATION(s) ? 0x50 : 0x7c)
+#define PVSCSI_MSI_OFFSET (0x7c)
#define PVSCSI_EXP_EP_OFFSET (0x40)
typedef struct PVSCSIRingInfo {
@@ -129,8 +118,6 @@ struct PVSCSIState {
uint8_t msi_used; /* For migration compatibility */
PVSCSIRingInfo rings; /* Data transfer rings manager */
uint32_t resetting; /* Reset in progress */
-
- uint32_t compat_flags;
};
typedef struct PVSCSIRequest {
@@ -1110,7 +1097,7 @@ pvscsi_init_msi(PVSCSIState *s)
int res;
PCIDevice *d = PCI_DEVICE(s);
- res = msi_init(d, PVSCSI_MSI_OFFSET(s), PVSCSI_MSIX_NUM_VECTORS,
+ res = msi_init(d, PVSCSI_MSI_OFFSET, PVSCSI_MSIX_NUM_VECTORS,
PVSCSI_USE_64BIT, PVSCSI_PER_VECTOR_MASK, NULL);
if (res < 0) {
trace_pvscsi_init_msi_fail(res);
@@ -1158,15 +1145,11 @@ pvscsi_realizefn(PCIDevice *pci_dev, Error **errp)
trace_pvscsi_state("init");
/* PCI subsystem ID, subsystem vendor ID, revision */
- if (PVSCSI_USE_OLD_PCI_CONFIGURATION(s)) {
- pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, 0x1000);
- } else {
- pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID,
- PCI_VENDOR_ID_VMWARE);
- pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID,
- PCI_DEVICE_ID_VMWARE_PVSCSI);
- pci_config_set_revision(pci_dev->config, 0x2);
- }
+ pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID,
+ PCI_VENDOR_ID_VMWARE);
+ pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID,
+ PCI_DEVICE_ID_VMWARE_PVSCSI);
+ pci_config_set_revision(pci_dev->config, 0x2);
/* PCI latency timer = 255 */
pci_dev->config[PCI_LATENCY_TIMER] = 0xff;
@@ -1234,21 +1217,8 @@ pvscsi_post_load(void *opaque, int version_id)
return 0;
}
-static bool pvscsi_vmstate_need_pcie_device(void *opaque)
-{
- PVSCSIState *s = PVSCSI(opaque);
-
- return !(s->compat_flags & PVSCSI_COMPAT_DISABLE_PCIE);
-}
-
-static bool pvscsi_vmstate_test_pci_device(void *opaque, int version_id)
-{
- return !pvscsi_vmstate_need_pcie_device(opaque);
-}
-
static const VMStateDescription vmstate_pvscsi_pcie_device = {
.name = "pvscsi/pcie",
- .needed = pvscsi_vmstate_need_pcie_device,
.fields = (const VMStateField[]) {
VMSTATE_PCI_DEVICE(parent_obj, PVSCSIState),
VMSTATE_END_OF_LIST()
@@ -1262,9 +1232,6 @@ static const VMStateDescription vmstate_pvscsi = {
.pre_save = pvscsi_pre_save,
.post_load = pvscsi_post_load,
.fields = (const VMStateField[]) {
- VMSTATE_STRUCT_TEST(parent_obj, PVSCSIState,
- pvscsi_vmstate_test_pci_device, 0,
- vmstate_pci_device, PCIDevice),
VMSTATE_UINT8(msi_used, PVSCSIState),
VMSTATE_UINT32(resetting, PVSCSIState),
VMSTATE_UINT64(reg_interrupt_status, PVSCSIState),
@@ -1298,30 +1265,17 @@ static const VMStateDescription vmstate_pvscsi = {
static const Property pvscsi_properties[] = {
DEFINE_PROP_UINT8("use_msg", PVSCSIState, use_msg, 1),
- DEFINE_PROP_BIT("x-old-pci-configuration", PVSCSIState, compat_flags,
- PVSCSI_COMPAT_OLD_PCI_CONFIGURATION_BIT, false),
- DEFINE_PROP_BIT("x-disable-pcie", PVSCSIState, compat_flags,
- PVSCSI_COMPAT_DISABLE_PCIE_BIT, false),
};
-static void pvscsi_realize(DeviceState *qdev, Error **errp)
+static void pvscsi_instance_init(Object *obj)
{
- PVSCSIClass *pvs_c = PVSCSI_GET_CLASS(qdev);
- PCIDevice *pci_dev = PCI_DEVICE(qdev);
- PVSCSIState *s = PVSCSI(qdev);
-
- if (!(s->compat_flags & PVSCSI_COMPAT_DISABLE_PCIE)) {
- pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
- }
-
- pvs_c->parent_dc_realize(qdev, errp);
+ PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS;
}
static void pvscsi_class_init(ObjectClass *klass, const void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
- PVSCSIClass *pvs_k = PVSCSI_CLASS(klass);
HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
k->realize = pvscsi_realizefn;
@@ -1330,8 +1284,6 @@ static void pvscsi_class_init(ObjectClass *klass, const void *data)
k->device_id = PCI_DEVICE_ID_VMWARE_PVSCSI;
k->class_id = PCI_CLASS_STORAGE_SCSI;
k->subsystem_id = 0x1000;
- device_class_set_parent_realize(dc, pvscsi_realize,
- &pvs_k->parent_dc_realize);
device_class_set_legacy_reset(dc, pvscsi_reset);
dc->vmsd = &vmstate_pvscsi;
device_class_set_props(dc, pvscsi_properties);
@@ -1346,6 +1298,7 @@ static const TypeInfo pvscsi_info = {
.class_size = sizeof(PVSCSIClass),
.instance_size = sizeof(PVSCSIState),
.class_init = pvscsi_class_init,
+ .instance_init = pvscsi_instance_init,
.interfaces = (const InterfaceInfo[]) {
{ TYPE_HOTPLUG_HANDLER },
{ INTERFACE_PCIE_DEVICE },
diff --git a/hw/scsi/vmw_pvscsi.h b/hw/scsi/vmw_pvscsi.h
index 17fcf66..a3ae517 100644
--- a/hw/scsi/vmw_pvscsi.h
+++ b/hw/scsi/vmw_pvscsi.h
@@ -14,8 +14,8 @@
* details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ * along with this program; if not, see
+ * <https://www.gnu.org/licenses/>.
*
* Maintained by: Arvind Kumar <arvindkumar@vmware.com>
*
diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index d1b7bc5..cb48cc1 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -328,16 +328,16 @@ static const VMStateDescription vmstate_hpet_timer = {
static const VMStateDescription vmstate_hpet = {
.name = "hpet",
.version_id = 2,
- .minimum_version_id = 1,
+ .minimum_version_id = 2,
.pre_save = hpet_pre_save,
.post_load = hpet_post_load,
.fields = (const VMStateField[]) {
VMSTATE_UINT64(config, HPETState),
VMSTATE_UINT64(isr, HPETState),
VMSTATE_UINT64(hpet_counter, HPETState),
- VMSTATE_UINT8_V(num_timers_save, HPETState, 2),
+ VMSTATE_UINT8(num_timers_save, HPETState),
VMSTATE_VALIDATE("num_timers must match", hpet_validate_num_timers),
- VMSTATE_STRUCT_VARRAY_UINT8(timer, HPETState, num_timers, 0,
+ VMSTATE_STRUCT_VARRAY_UINT8(timer, HPETState, num_timers_save, 0,
vmstate_hpet_timer, HPETTimer),
VMSTATE_END_OF_LIST()
},
@@ -426,30 +426,11 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr,
uint64_t cur_tick;
trace_hpet_ram_read(addr);
+ addr &= ~4;
- /*address range of all TN regs*/
- if (addr >= 0x100 && addr <= 0x3ff) {
- uint8_t timer_id = (addr - 0x100) / 0x20;
- HPETTimer *timer = &s->timer[timer_id];
-
- if (timer_id > s->num_timers) {
- trace_hpet_timer_id_out_of_range(timer_id);
- return 0;
- }
-
- switch (addr & 0x18) {
- case HPET_TN_CFG: // including interrupt capabilities
- return timer->config >> shift;
- case HPET_TN_CMP: // comparator register
- return timer->cmp >> shift;
- case HPET_TN_ROUTE:
- return timer->fsb >> shift;
- default:
- trace_hpet_ram_read_invalid();
- break;
- }
- } else {
- switch (addr & ~4) {
+ /*address range of all global regs*/
+ if (addr <= 0xff) {
+ switch (addr) {
case HPET_ID: // including HPET_PERIOD
return s->capability >> shift;
case HPET_CFG:
@@ -468,6 +449,26 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr,
trace_hpet_ram_read_invalid();
break;
}
+ } else {
+ uint8_t timer_id = (addr - 0x100) / 0x20;
+ HPETTimer *timer = &s->timer[timer_id];
+
+ if (timer_id > s->num_timers) {
+ trace_hpet_timer_id_out_of_range(timer_id);
+ return 0;
+ }
+
+ switch (addr & 0x1f) {
+ case HPET_TN_CFG: // including interrupt capabilities
+ return timer->config >> shift;
+ case HPET_TN_CMP: // comparator register
+ return timer->cmp >> shift;
+ case HPET_TN_ROUTE:
+ return timer->fsb >> shift;
+ default:
+ trace_hpet_ram_read_invalid();
+ break;
+ }
}
return 0;
}
@@ -482,9 +483,67 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
uint64_t old_val, new_val, cleared;
trace_hpet_ram_write(addr, value);
+ addr &= ~4;
- /*address range of all TN regs*/
- if (addr >= 0x100 && addr <= 0x3ff) {
+ /*address range of all global regs*/
+ if (addr <= 0xff) {
+ switch (addr) {
+ case HPET_ID:
+ return;
+ case HPET_CFG:
+ old_val = s->config;
+ new_val = deposit64(old_val, shift, len, value);
+ new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK);
+ s->config = new_val;
+ if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
+ /* Enable main counter and interrupt generation. */
+ s->hpet_offset =
+ ticks_to_ns(s->hpet_counter) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ for (i = 0; i < s->num_timers; i++) {
+ if (timer_enabled(&s->timer[i]) && (s->isr & (1 << i))) {
+ update_irq(&s->timer[i], 1);
+ }
+ hpet_set_timer(&s->timer[i]);
+ }
+ } else if (deactivating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
+ /* Halt main counter and disable interrupt generation. */
+ s->hpet_counter = hpet_get_ticks(s);
+ for (i = 0; i < s->num_timers; i++) {
+ hpet_del_timer(&s->timer[i]);
+ }
+ }
+ /* i8254 and RTC output pins are disabled
+ * when HPET is in legacy mode */
+ if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
+ qemu_set_irq(s->pit_enabled, 0);
+ qemu_irq_lower(s->irqs[0]);
+ qemu_irq_lower(s->irqs[RTC_ISA_IRQ]);
+ } else if (deactivating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
+ qemu_irq_lower(s->irqs[0]);
+ qemu_set_irq(s->pit_enabled, 1);
+ qemu_set_irq(s->irqs[RTC_ISA_IRQ], s->rtc_irq_level);
+ }
+ break;
+ case HPET_STATUS:
+ new_val = value << shift;
+ cleared = new_val & s->isr;
+ for (i = 0; i < s->num_timers; i++) {
+ if (cleared & (1 << i)) {
+ update_irq(&s->timer[i], 0);
+ }
+ }
+ break;
+ case HPET_COUNTER:
+ if (hpet_enabled(s)) {
+ trace_hpet_ram_write_counter_write_while_enabled();
+ }
+ s->hpet_counter = deposit64(s->hpet_counter, shift, len, value);
+ break;
+ default:
+ trace_hpet_ram_write_invalid();
+ break;
+ }
+ } else {
uint8_t timer_id = (addr - 0x100) / 0x20;
HPETTimer *timer = &s->timer[timer_id];
@@ -550,63 +609,6 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
break;
}
return;
- } else {
- switch (addr & ~4) {
- case HPET_ID:
- return;
- case HPET_CFG:
- old_val = s->config;
- new_val = deposit64(old_val, shift, len, value);
- new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK);
- s->config = new_val;
- if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
- /* Enable main counter and interrupt generation. */
- s->hpet_offset =
- ticks_to_ns(s->hpet_counter) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- for (i = 0; i < s->num_timers; i++) {
- if (timer_enabled(&s->timer[i]) && (s->isr & (1 << i))) {
- update_irq(&s->timer[i], 1);
- }
- hpet_set_timer(&s->timer[i]);
- }
- } else if (deactivating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
- /* Halt main counter and disable interrupt generation. */
- s->hpet_counter = hpet_get_ticks(s);
- for (i = 0; i < s->num_timers; i++) {
- hpet_del_timer(&s->timer[i]);
- }
- }
- /* i8254 and RTC output pins are disabled
- * when HPET is in legacy mode */
- if (activating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
- qemu_set_irq(s->pit_enabled, 0);
- qemu_irq_lower(s->irqs[0]);
- qemu_irq_lower(s->irqs[RTC_ISA_IRQ]);
- } else if (deactivating_bit(old_val, new_val, HPET_CFG_LEGACY)) {
- qemu_irq_lower(s->irqs[0]);
- qemu_set_irq(s->pit_enabled, 1);
- qemu_set_irq(s->irqs[RTC_ISA_IRQ], s->rtc_irq_level);
- }
- break;
- case HPET_STATUS:
- new_val = value << shift;
- cleared = new_val & s->isr;
- for (i = 0; i < s->num_timers; i++) {
- if (cleared & (1 << i)) {
- update_irq(&s->timer[i], 0);
- }
- }
- break;
- case HPET_COUNTER:
- if (hpet_enabled(s)) {
- trace_hpet_ram_write_counter_write_while_enabled();
- }
- s->hpet_counter = deposit64(s->hpet_counter, shift, len, value);
- break;
- default:
- trace_hpet_ram_write_invalid();
- break;
- }
}
}
@@ -689,8 +691,14 @@ static void hpet_realize(DeviceState *dev, Error **errp)
int i;
HPETTimer *timer;
+ if (s->num_timers < HPET_MIN_TIMERS || s->num_timers > HPET_MAX_TIMERS) {
+ error_setg(errp, "hpet.num_timers must be between %d and %d",
+ HPET_MIN_TIMERS, HPET_MAX_TIMERS);
+ return;
+ }
if (!s->intcap) {
- warn_report("Hpet's intcap not initialized");
+ error_setg(errp, "hpet.hpet-intcap not initialized");
+ return;
}
if (hpet_fw_cfg.count == UINT8_MAX) {
/* first instance */
@@ -698,7 +706,7 @@ static void hpet_realize(DeviceState *dev, Error **errp)
}
if (hpet_fw_cfg.count == 8) {
- error_setg(errp, "Only 8 instances of HPET is allowed");
+ error_setg(errp, "Only 8 instances of HPET are allowed");
return;
}
@@ -708,11 +716,6 @@ static void hpet_realize(DeviceState *dev, Error **errp)
sysbus_init_irq(sbd, &s->irqs[i]);
}
- if (s->num_timers < HPET_MIN_TIMERS) {
- s->num_timers = HPET_MIN_TIMERS;
- } else if (s->num_timers > HPET_MAX_TIMERS) {
- s->num_timers = HPET_MAX_TIMERS;
- }
for (i = 0; i < HPET_MAX_TIMERS; i++) {
timer = &s->timer[i];
timer->qemu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, hpet_timer, timer);
diff --git a/hw/ufs/lu.c b/hw/ufs/lu.c
index 57b307e..2d8ffd7 100644
--- a/hw/ufs/lu.c
+++ b/hw/ufs/lu.c
@@ -194,7 +194,7 @@ static int ufs_emulate_wlun_inquiry(UfsRequest *req, uint8_t *outbuf,
static UfsReqResult ufs_emulate_scsi_cmd(UfsLu *lu, UfsRequest *req)
{
uint8_t lun = lu->lun;
- uint8_t outbuf[4096];
+ QEMU_UNINITIALIZED uint8_t outbuf[4096];
uint8_t sense_buf[UFS_SENSE_SIZE];
uint8_t scsi_status;
int len = 0;
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 71b5491..72a9f9f 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -577,7 +577,7 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed)
USBDevice *dev;
USBEndpoint *ep;
USBPacket *pkt;
- uint8_t buf[8192];
+ QEMU_UNINITIALIZED uint8_t buf[8192];
bool int_req;
struct ohci_iso_td iso_td;
uint32_t addr;
diff --git a/hw/vfio-user/Kconfig b/hw/vfio-user/Kconfig
new file mode 100644
index 0000000..24bdf7a
--- /dev/null
+++ b/hw/vfio-user/Kconfig
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+config VFIO_USER
+ bool
+ default y
+ depends on VFIO_PCI
+
diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c
new file mode 100644
index 0000000..d318e6a
--- /dev/null
+++ b/hw/vfio-user/container.c
@@ -0,0 +1,361 @@
+/*
+ * Container for vfio-user IOMMU type: rather than communicating with the kernel
+ * vfio driver, we communicate over a socket to a server using the vfio-user
+ * protocol.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <sys/ioctl.h>
+#include <linux/vfio.h>
+#include "qemu/osdep.h"
+
+#include "hw/vfio-user/container.h"
+#include "hw/vfio-user/device.h"
+#include "hw/vfio-user/trace.h"
+#include "hw/vfio/vfio-device.h"
+#include "hw/vfio/vfio-listener.h"
+#include "qapi/error.h"
+
+/*
+ * When DMA space is the physical address space, the region add/del listeners
+ * will fire during memory update transactions. These depend on BQL being held,
+ * so do any resulting map/demap ops async while keeping BQL.
+ */
+static void vfio_user_listener_begin(VFIOContainerBase *bcontainer)
+{
+ VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
+ bcontainer);
+
+ container->proxy->async_ops = true;
+}
+
+static void vfio_user_listener_commit(VFIOContainerBase *bcontainer)
+{
+ VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
+ bcontainer);
+
+ /* wait here for any async requests sent during the transaction */
+ container->proxy->async_ops = false;
+ vfio_user_wait_reqs(container->proxy);
+}
+
+static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ IOMMUTLBEntry *iotlb, bool unmap_all)
+{
+ VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
+ bcontainer);
+ Error *local_err = NULL;
+ int ret = 0;
+
+ VFIOUserDMAUnmap *msgp = g_malloc(sizeof(*msgp));
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_UNMAP, sizeof(*msgp), 0);
+ msgp->argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+ msgp->flags = unmap_all ? VFIO_DMA_UNMAP_FLAG_ALL : 0;
+ msgp->iova = iova;
+ msgp->size = size;
+ trace_vfio_user_dma_unmap(msgp->iova, msgp->size, msgp->flags,
+ container->proxy->async_ops);
+
+ if (container->proxy->async_ops) {
+ if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, NULL,
+ 0, &local_err)) {
+ error_report_err(local_err);
+ ret = -EFAULT;
+ } else {
+ ret = 0;
+ }
+ } else {
+ if (!vfio_user_send_wait(container->proxy, &msgp->hdr, NULL,
+ 0, &local_err)) {
+ error_report_err(local_err);
+ ret = -EFAULT;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ ret = -msgp->hdr.error_reply;
+ }
+
+ g_free(msgp);
+ }
+
+ return ret;
+}
+
+static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
+ ram_addr_t size, void *vaddr, bool readonly,
+ MemoryRegion *mrp)
+{
+ VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
+ bcontainer);
+ int fd = memory_region_get_fd(mrp);
+ Error *local_err = NULL;
+ int ret;
+
+ VFIOUserFDs *fds = NULL;
+ VFIOUserDMAMap *msgp = g_malloc0(sizeof(*msgp));
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_MAP, sizeof(*msgp), 0);
+ msgp->argsz = sizeof(struct vfio_iommu_type1_dma_map);
+ msgp->flags = VFIO_DMA_MAP_FLAG_READ;
+ msgp->offset = 0;
+ msgp->iova = iova;
+ msgp->size = size;
+
+ /*
+ * vaddr enters as a QEMU process address; make it either a file offset
+ * for mapped areas or leave as 0.
+ */
+ if (fd != -1) {
+ msgp->offset = qemu_ram_block_host_offset(mrp->ram_block, vaddr);
+ }
+
+ if (!readonly) {
+ msgp->flags |= VFIO_DMA_MAP_FLAG_WRITE;
+ }
+
+ trace_vfio_user_dma_map(msgp->iova, msgp->size, msgp->offset, msgp->flags,
+ container->proxy->async_ops);
+
+ /*
+ * The async_ops case sends without blocking. They're later waited for in
+ * vfio_send_wait_reqs.
+ */
+ if (container->proxy->async_ops) {
+ /* can't use auto variable since we don't block */
+ if (fd != -1) {
+ fds = vfio_user_getfds(1);
+ fds->send_fds = 1;
+ fds->fds[0] = fd;
+ }
+
+ if (!vfio_user_send_nowait(container->proxy, &msgp->hdr, fds,
+ 0, &local_err)) {
+ error_report_err(local_err);
+ ret = -EFAULT;
+ } else {
+ ret = 0;
+ }
+ } else {
+ VFIOUserFDs local_fds = { 1, 0, &fd };
+
+ fds = fd != -1 ? &local_fds : NULL;
+
+ if (!vfio_user_send_wait(container->proxy, &msgp->hdr, fds,
+ 0, &local_err)) {
+ error_report_err(local_err);
+ ret = -EFAULT;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ ret = -msgp->hdr.error_reply;
+ }
+
+ g_free(msgp);
+ }
+
+ return ret;
+}
+
+static int
+vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
+ bool start, Error **errp)
+{
+ error_setg_errno(errp, ENOTSUP, "Not supported");
+ return -ENOTSUP;
+}
+
+static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
+ VFIOBitmap *vbmap, hwaddr iova,
+ hwaddr size, Error **errp)
+{
+ error_setg_errno(errp, ENOTSUP, "Not supported");
+ return -ENOTSUP;
+}
+
+static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp)
+{
+ VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
+ bcontainer);
+
+ assert(container->proxy->dma_pgsizes != 0);
+ bcontainer->pgsizes = container->proxy->dma_pgsizes;
+ bcontainer->dma_max_mappings = container->proxy->max_dma;
+
+ /* No live migration support yet. */
+ bcontainer->dirty_pages_supported = false;
+ bcontainer->max_dirty_bitmap_size = container->proxy->max_bitmap;
+ bcontainer->dirty_pgsizes = container->proxy->migr_pgsize;
+
+ return true;
+}
+
+static VFIOUserContainer *vfio_user_create_container(VFIODevice *vbasedev,
+ Error **errp)
+{
+ VFIOUserContainer *container;
+
+ container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER));
+ container->proxy = vbasedev->proxy;
+ return container;
+}
+
+/*
+ * Try to mirror vfio_container_connect() as much as possible.
+ */
+static VFIOUserContainer *
+vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev,
+ Error **errp)
+{
+ VFIOContainerBase *bcontainer;
+ VFIOUserContainer *container;
+ VFIOAddressSpace *space;
+ VFIOIOMMUClass *vioc;
+ int ret;
+
+ space = vfio_address_space_get(as);
+
+ container = vfio_user_create_container(vbasedev, errp);
+ if (!container) {
+ goto put_space_exit;
+ }
+
+ bcontainer = &container->bcontainer;
+
+ ret = ram_block_uncoordinated_discard_disable(true);
+ if (ret) {
+ error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
+ goto free_container_exit;
+ }
+
+ vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ assert(vioc->setup);
+
+ if (!vioc->setup(bcontainer, errp)) {
+ goto enable_discards_exit;
+ }
+
+ vfio_address_space_insert(space, bcontainer);
+
+ if (!vfio_listener_register(bcontainer, errp)) {
+ goto listener_release_exit;
+ }
+
+ bcontainer->initialized = true;
+
+ return container;
+
+listener_release_exit:
+ vfio_listener_unregister(bcontainer);
+ if (vioc->release) {
+ vioc->release(bcontainer);
+ }
+
+enable_discards_exit:
+ ram_block_uncoordinated_discard_disable(false);
+
+free_container_exit:
+ object_unref(container);
+
+put_space_exit:
+ vfio_address_space_put(space);
+
+ return NULL;
+}
+
+static void vfio_user_container_disconnect(VFIOUserContainer *container)
+{
+ VFIOContainerBase *bcontainer = &container->bcontainer;
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ VFIOAddressSpace *space = bcontainer->space;
+
+ ram_block_uncoordinated_discard_disable(false);
+
+ vfio_listener_unregister(bcontainer);
+ if (vioc->release) {
+ vioc->release(bcontainer);
+ }
+
+ object_unref(container);
+
+ vfio_address_space_put(space);
+}
+
+static bool vfio_user_device_get(VFIOUserContainer *container,
+ VFIODevice *vbasedev, Error **errp)
+{
+ struct vfio_device_info info = { .argsz = sizeof(info) };
+
+
+ if (!vfio_user_get_device_info(vbasedev->proxy, &info, errp)) {
+ return false;
+ }
+
+ vbasedev->fd = -1;
+
+ vfio_device_prepare(vbasedev, &container->bcontainer, &info);
+
+ return true;
+}
+
+/*
+ * vfio_user_device_attach: attach a device to a new container.
+ */
+static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev,
+ AddressSpace *as, Error **errp)
+{
+ VFIOUserContainer *container;
+
+ container = vfio_user_container_connect(as, vbasedev, errp);
+ if (container == NULL) {
+ error_prepend(errp, "failed to connect proxy");
+ return false;
+ }
+
+ return vfio_user_device_get(container, vbasedev, errp);
+}
+
+static void vfio_user_device_detach(VFIODevice *vbasedev)
+{
+ VFIOUserContainer *container = container_of(vbasedev->bcontainer,
+ VFIOUserContainer, bcontainer);
+
+ vfio_device_unprepare(vbasedev);
+
+ vfio_user_container_disconnect(container);
+}
+
+static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single)
+{
+ /* ->needs_reset is always false for vfio-user. */
+ return 0;
+}
+
+static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data)
+{
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
+
+ vioc->setup = vfio_user_setup;
+ vioc->listener_begin = vfio_user_listener_begin,
+ vioc->listener_commit = vfio_user_listener_commit,
+ vioc->dma_map = vfio_user_dma_map;
+ vioc->dma_unmap = vfio_user_dma_unmap;
+ vioc->attach_device = vfio_user_device_attach;
+ vioc->detach_device = vfio_user_device_detach;
+ vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking;
+ vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap;
+ vioc->pci_hot_reset = vfio_user_pci_hot_reset;
+};
+
+static const TypeInfo types[] = {
+ {
+ .name = TYPE_VFIO_IOMMU_USER,
+ .parent = TYPE_VFIO_IOMMU,
+ .instance_size = sizeof(VFIOUserContainer),
+ .class_init = vfio_iommu_user_class_init,
+ },
+};
+
+DEFINE_TYPES(types)
diff --git a/hw/vfio-user/container.h b/hw/vfio-user/container.h
new file mode 100644
index 0000000..2bb1fa1
--- /dev/null
+++ b/hw/vfio-user/container.h
@@ -0,0 +1,23 @@
+/*
+ * vfio-user specific definitions.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VFIO_USER_CONTAINER_H
+#define HW_VFIO_USER_CONTAINER_H
+
+#include "qemu/osdep.h"
+
+#include "hw/vfio/vfio-container-base.h"
+#include "hw/vfio-user/proxy.h"
+
+/* MMU container sub-class for vfio-user. */
+typedef struct VFIOUserContainer {
+ VFIOContainerBase bcontainer;
+ VFIOUserProxy *proxy;
+} VFIOUserContainer;
+
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserContainer, VFIO_IOMMU_USER);
+
+#endif /* HW_VFIO_USER_CONTAINER_H */
diff --git a/hw/vfio-user/device.c b/hw/vfio-user/device.c
new file mode 100644
index 0000000..0609a7d
--- /dev/null
+++ b/hw/vfio-user/device.c
@@ -0,0 +1,441 @@
+/*
+ * vfio protocol over a UNIX socket device handling.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/lockable.h"
+#include "qemu/thread.h"
+
+#include "hw/vfio-user/device.h"
+#include "hw/vfio-user/trace.h"
+
+/*
+ * These are to defend against a malign server trying
+ * to force us to run out of memory.
+ */
+#define VFIO_USER_MAX_REGIONS 100
+#define VFIO_USER_MAX_IRQS 50
+
+bool vfio_user_get_device_info(VFIOUserProxy *proxy,
+ struct vfio_device_info *info, Error **errp)
+{
+ VFIOUserDeviceInfo msg;
+ uint32_t argsz = sizeof(msg) - sizeof(msg.hdr);
+
+ memset(&msg, 0, sizeof(msg));
+ vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_INFO, sizeof(msg), 0);
+ msg.argsz = argsz;
+
+ if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, errp)) {
+ return false;
+ }
+
+ if (msg.hdr.flags & VFIO_USER_ERROR) {
+ error_setg_errno(errp, -msg.hdr.error_reply,
+ "VFIO_USER_DEVICE_GET_INFO failed");
+ return false;
+ }
+
+ trace_vfio_user_get_info(msg.num_regions, msg.num_irqs);
+
+ memcpy(info, &msg.argsz, argsz);
+
+ /* defend against a malicious server */
+ if (info->num_regions > VFIO_USER_MAX_REGIONS ||
+ info->num_irqs > VFIO_USER_MAX_IRQS) {
+ error_setg_errno(errp, EINVAL, "invalid reply");
+ return false;
+ }
+
+ return true;
+}
+
+void vfio_user_device_reset(VFIOUserProxy *proxy)
+{
+ Error *local_err = NULL;
+ VFIOUserHdr hdr;
+
+ vfio_user_request_msg(&hdr, VFIO_USER_DEVICE_RESET, sizeof(hdr), 0);
+
+ if (!vfio_user_send_wait(proxy, &hdr, NULL, 0, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return;
+ }
+
+ if (hdr.flags & VFIO_USER_ERROR) {
+ error_printf("reset reply error %d\n", hdr.error_reply);
+ }
+}
+
+static int vfio_user_get_region_info(VFIOUserProxy *proxy,
+ struct vfio_region_info *info,
+ VFIOUserFDs *fds)
+{
+ g_autofree VFIOUserRegionInfo *msgp = NULL;
+ Error *local_err = NULL;
+ uint32_t size;
+
+ /* data returned can be larger than vfio_region_info */
+ if (info->argsz < sizeof(*info)) {
+ error_printf("vfio_user_get_region_info argsz too small\n");
+ return -E2BIG;
+ }
+ if (fds != NULL && fds->send_fds != 0) {
+ error_printf("vfio_user_get_region_info can't send FDs\n");
+ return -EINVAL;
+ }
+
+ size = info->argsz + sizeof(VFIOUserHdr);
+ msgp = g_malloc0(size);
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO,
+ sizeof(*msgp), 0);
+ msgp->argsz = info->argsz;
+ msgp->index = info->index;
+
+ if (!vfio_user_send_wait(proxy, &msgp->hdr, fds, size, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return -EFAULT;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ return -msgp->hdr.error_reply;
+ }
+ trace_vfio_user_get_region_info(msgp->index, msgp->flags, msgp->size);
+
+ memcpy(info, &msgp->argsz, info->argsz);
+
+ /*
+ * If at least one region is directly mapped into the VM, then we can no
+ * longer rely on the sequential nature of vfio-user request handling to
+ * ensure that posted writes are completed before a subsequent read. In this
+ * case, disable posted write support. This is a per-device property, not
+ * per-region.
+ */
+ if (info->flags & VFIO_REGION_INFO_FLAG_MMAP) {
+ vfio_user_disable_posted_writes(proxy);
+ }
+
+ return 0;
+}
+
+static int vfio_user_device_io_get_region_info(VFIODevice *vbasedev,
+ struct vfio_region_info *info,
+ int *fd)
+{
+ VFIOUserFDs fds = { 0, 1, fd};
+ int ret;
+
+ if (info->index > vbasedev->num_regions) {
+ return -EINVAL;
+ }
+
+ ret = vfio_user_get_region_info(vbasedev->proxy, info, &fds);
+ if (ret) {
+ return ret;
+ }
+
+ /* cap_offset in valid area */
+ if ((info->flags & VFIO_REGION_INFO_FLAG_CAPS) &&
+ (info->cap_offset < sizeof(*info) || info->cap_offset > info->argsz)) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int vfio_user_device_io_get_irq_info(VFIODevice *vbasedev,
+ struct vfio_irq_info *info)
+{
+ VFIOUserProxy *proxy = vbasedev->proxy;
+ Error *local_err = NULL;
+ VFIOUserIRQInfo msg;
+
+ memset(&msg, 0, sizeof(msg));
+ vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO,
+ sizeof(msg), 0);
+ msg.argsz = info->argsz;
+ msg.index = info->index;
+
+ if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return -EFAULT;
+ }
+
+ if (msg.hdr.flags & VFIO_USER_ERROR) {
+ return -msg.hdr.error_reply;
+ }
+ trace_vfio_user_get_irq_info(msg.index, msg.flags, msg.count);
+
+ memcpy(info, &msg.argsz, sizeof(*info));
+ return 0;
+}
+
+static int irq_howmany(int *fdp, uint32_t cur, uint32_t max)
+{
+ int n = 0;
+
+ if (fdp[cur] != -1) {
+ do {
+ n++;
+ } while (n < max && fdp[cur + n] != -1);
+ } else {
+ do {
+ n++;
+ } while (n < max && fdp[cur + n] == -1);
+ }
+
+ return n;
+}
+
+static int vfio_user_device_io_set_irqs(VFIODevice *vbasedev,
+ struct vfio_irq_set *irq)
+{
+ VFIOUserProxy *proxy = vbasedev->proxy;
+ g_autofree VFIOUserIRQSet *msgp = NULL;
+ uint32_t size, nfds, send_fds, sent_fds, max;
+ Error *local_err = NULL;
+
+ if (irq->argsz < sizeof(*irq)) {
+ error_printf("vfio_user_set_irqs argsz too small\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Handle simple case
+ */
+ if ((irq->flags & VFIO_IRQ_SET_DATA_EVENTFD) == 0) {
+ size = sizeof(VFIOUserHdr) + irq->argsz;
+ msgp = g_malloc0(size);
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, size, 0);
+ msgp->argsz = irq->argsz;
+ msgp->flags = irq->flags;
+ msgp->index = irq->index;
+ msgp->start = irq->start;
+ msgp->count = irq->count;
+ trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count,
+ msgp->flags);
+
+ if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return -EFAULT;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ return -msgp->hdr.error_reply;
+ }
+
+ return 0;
+ }
+
+ /*
+ * Calculate the number of FDs to send
+ * and adjust argsz
+ */
+ nfds = (irq->argsz - sizeof(*irq)) / sizeof(int);
+ irq->argsz = sizeof(*irq);
+ msgp = g_malloc0(sizeof(*msgp));
+ /*
+ * Send in chunks if over max_send_fds
+ */
+ for (sent_fds = 0; nfds > sent_fds; sent_fds += send_fds) {
+ VFIOUserFDs *arg_fds, loop_fds;
+
+ /* must send all valid FDs or all invalid FDs in single msg */
+ max = nfds - sent_fds;
+ if (max > proxy->max_send_fds) {
+ max = proxy->max_send_fds;
+ }
+ send_fds = irq_howmany((int *)irq->data, sent_fds, max);
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS,
+ sizeof(*msgp), 0);
+ msgp->argsz = irq->argsz;
+ msgp->flags = irq->flags;
+ msgp->index = irq->index;
+ msgp->start = irq->start + sent_fds;
+ msgp->count = send_fds;
+ trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count,
+ msgp->flags);
+
+ loop_fds.send_fds = send_fds;
+ loop_fds.recv_fds = 0;
+ loop_fds.fds = (int *)irq->data + sent_fds;
+ arg_fds = loop_fds.fds[0] != -1 ? &loop_fds : NULL;
+
+ if (!vfio_user_send_wait(proxy, &msgp->hdr, arg_fds, 0, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return -EFAULT;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ return -msgp->hdr.error_reply;
+ }
+ }
+
+ return 0;
+}
+
+static int vfio_user_device_io_region_read(VFIODevice *vbasedev, uint8_t index,
+ off_t off, uint32_t count,
+ void *data)
+{
+ g_autofree VFIOUserRegionRW *msgp = NULL;
+ VFIOUserProxy *proxy = vbasedev->proxy;
+ int size = sizeof(*msgp) + count;
+ Error *local_err = NULL;
+
+ if (count > proxy->max_xfer_size) {
+ return -EINVAL;
+ }
+
+ msgp = g_malloc0(size);
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_READ, sizeof(*msgp), 0);
+ msgp->offset = off;
+ msgp->region = index;
+ msgp->count = count;
+ trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count);
+
+ if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, size, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return -EFAULT;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ return -msgp->hdr.error_reply;
+ } else if (msgp->count > count) {
+ return -E2BIG;
+ } else {
+ memcpy(data, &msgp->data, msgp->count);
+ }
+
+ return msgp->count;
+}
+
+/*
+ * If this is a posted write, and VFIO_PROXY_NO_POST is not set, then we are OK
+ * to send the write to the socket without waiting for the server's reply:
+ * a subsequent read (of any region) will not pass the posted write, as all
+ * messages are handled sequentially.
+ */
+static int vfio_user_device_io_region_write(VFIODevice *vbasedev, uint8_t index,
+ off_t off, unsigned count,
+ void *data, bool post)
+{
+ VFIOUserRegionRW *msgp = NULL;
+ VFIOUserProxy *proxy = vbasedev->proxy;
+ int size = sizeof(*msgp) + count;
+ Error *local_err = NULL;
+ bool can_multi;
+ int flags = 0;
+ int ret;
+
+ if (count > proxy->max_xfer_size) {
+ return -EINVAL;
+ }
+
+ if (proxy->flags & VFIO_PROXY_NO_POST) {
+ post = false;
+ }
+
+ if (post) {
+ flags |= VFIO_USER_NO_REPLY;
+ }
+
+ /* write eligible to be in a WRITE_MULTI msg ? */
+ can_multi = (proxy->flags & VFIO_PROXY_USE_MULTI) && post &&
+ count <= VFIO_USER_MULTI_DATA;
+
+ /*
+ * This should be a rare case, so first check without the lock,
+ * if we're wrong, vfio_send_queued() will flush any posted writes
+ * we missed here
+ */
+ if (proxy->wr_multi != NULL ||
+ (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi)) {
+
+ /*
+ * re-check with lock
+ *
+ * if already building a WRITE_MULTI msg,
+ * add this one if possible else flush pending before
+ * sending the current one
+ *
+ * else if outgoing queue is over the highwater,
+ * start a new WRITE_MULTI message
+ */
+ WITH_QEMU_LOCK_GUARD(&proxy->lock) {
+ if (proxy->wr_multi != NULL) {
+ if (can_multi) {
+ vfio_user_add_multi(proxy, index, off, count, data);
+ return count;
+ }
+ vfio_user_flush_multi(proxy);
+ } else if (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi) {
+ vfio_user_create_multi(proxy);
+ vfio_user_add_multi(proxy, index, off, count, data);
+ return count;
+ }
+ }
+ }
+
+ msgp = g_malloc0(size);
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags);
+ msgp->offset = off;
+ msgp->region = index;
+ msgp->count = count;
+ memcpy(&msgp->data, data, count);
+ trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count);
+
+ /* async send will free msg after it's sent */
+ if (post) {
+ if (!vfio_user_send_async(proxy, &msgp->hdr, NULL, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return -EFAULT;
+ }
+
+ return count;
+ }
+
+ if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ g_free(msgp);
+ return -EFAULT;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ ret = -msgp->hdr.error_reply;
+ } else {
+ ret = count;
+ }
+
+ g_free(msgp);
+ return ret;
+}
+
+/*
+ * Socket-based io_ops
+ */
+VFIODeviceIOOps vfio_user_device_io_ops_sock = {
+ .get_region_info = vfio_user_device_io_get_region_info,
+ .get_irq_info = vfio_user_device_io_get_irq_info,
+ .set_irqs = vfio_user_device_io_set_irqs,
+ .region_read = vfio_user_device_io_region_read,
+ .region_write = vfio_user_device_io_region_write,
+
+};
diff --git a/hw/vfio-user/device.h b/hw/vfio-user/device.h
new file mode 100644
index 0000000..d183a39
--- /dev/null
+++ b/hw/vfio-user/device.h
@@ -0,0 +1,24 @@
+#ifndef VFIO_USER_DEVICE_H
+#define VFIO_USER_DEVICE_H
+
+/*
+ * vfio protocol over a UNIX socket device handling.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "linux/vfio.h"
+
+#include "hw/vfio-user/proxy.h"
+
+bool vfio_user_get_device_info(VFIOUserProxy *proxy,
+ struct vfio_device_info *info, Error **errp);
+
+void vfio_user_device_reset(VFIOUserProxy *proxy);
+
+extern VFIODeviceIOOps vfio_user_device_io_ops_sock;
+
+#endif /* VFIO_USER_DEVICE_H */
diff --git a/hw/vfio-user/meson.build b/hw/vfio-user/meson.build
new file mode 100644
index 0000000..2ed0ae5
--- /dev/null
+++ b/hw/vfio-user/meson.build
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+vfio_user_ss = ss.source_set()
+vfio_user_ss.add(files(
+ 'container.c',
+ 'device.c',
+ 'pci.c',
+ 'proxy.c',
+))
+
+system_ss.add_all(when: 'CONFIG_VFIO_USER', if_true: vfio_user_ss)
diff --git a/hw/vfio-user/pci.c b/hw/vfio-user/pci.c
new file mode 100644
index 0000000..be71c77
--- /dev/null
+++ b/hw/vfio-user/pci.c
@@ -0,0 +1,475 @@
+/*
+ * vfio PCI device over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <sys/ioctl.h>
+#include "qemu/osdep.h"
+#include "qapi-visit-sockets.h"
+#include "qemu/error-report.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/vfio/pci.h"
+#include "hw/vfio-user/device.h"
+#include "hw/vfio-user/proxy.h"
+
+#define TYPE_VFIO_USER_PCI "vfio-user-pci"
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI)
+
+struct VFIOUserPCIDevice {
+ VFIOPCIDevice device;
+ SocketAddress *socket;
+ bool send_queued; /* all sends are queued */
+ uint32_t wait_time; /* timeout for message replies */
+ bool no_post; /* all region writes are sync */
+};
+
+/*
+ * The server maintains the device's pending interrupts,
+ * via its MSIX table and PBA, so we treat these accesses
+ * like PCI config space and forward them.
+ */
+static uint64_t vfio_user_pba_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ VFIOPCIDevice *vdev = opaque;
+ VFIORegion *region = &vdev->bars[vdev->msix->pba_bar].region;
+ uint64_t data;
+
+ /* server copy is what matters */
+ data = vfio_region_read(region, addr + vdev->msix->pba_offset, size);
+ return data;
+}
+
+static void vfio_user_pba_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ /* dropped */
+}
+
+static const MemoryRegionOps vfio_user_pba_ops = {
+ .read = vfio_user_pba_read,
+ .write = vfio_user_pba_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void vfio_user_msix_setup(VFIOPCIDevice *vdev)
+{
+ MemoryRegion *vfio_reg, *msix_reg, *pba_reg;
+
+ pba_reg = g_new0(MemoryRegion, 1);
+ vdev->msix->pba_region = pba_reg;
+
+ vfio_reg = vdev->bars[vdev->msix->pba_bar].mr;
+ msix_reg = &vdev->pdev.msix_pba_mmio;
+ memory_region_init_io(pba_reg, OBJECT(vdev), &vfio_user_pba_ops, vdev,
+ "VFIO MSIX PBA", int128_get64(msix_reg->size));
+ memory_region_add_subregion_overlap(vfio_reg, vdev->msix->pba_offset,
+ pba_reg, 1);
+}
+
+static void vfio_user_msix_teardown(VFIOPCIDevice *vdev)
+{
+ MemoryRegion *mr, *sub;
+
+ mr = vdev->bars[vdev->msix->pba_bar].mr;
+ sub = vdev->msix->pba_region;
+ memory_region_del_subregion(mr, sub);
+
+ g_free(vdev->msix->pba_region);
+ vdev->msix->pba_region = NULL;
+}
+
+static void vfio_user_dma_read(VFIOPCIDevice *vdev, VFIOUserDMARW *msg)
+{
+ PCIDevice *pdev = &vdev->pdev;
+ VFIOUserProxy *proxy = vdev->vbasedev.proxy;
+ VFIOUserDMARW *res;
+ MemTxResult r;
+ size_t size;
+
+ if (msg->hdr.size < sizeof(*msg)) {
+ vfio_user_send_error(proxy, &msg->hdr, EINVAL);
+ return;
+ }
+ if (msg->count > proxy->max_xfer_size) {
+ vfio_user_send_error(proxy, &msg->hdr, E2BIG);
+ return;
+ }
+
+ /* switch to our own message buffer */
+ size = msg->count + sizeof(VFIOUserDMARW);
+ res = g_malloc0(size);
+ memcpy(res, msg, sizeof(*res));
+ g_free(msg);
+
+ r = pci_dma_read(pdev, res->offset, &res->data, res->count);
+
+ switch (r) {
+ case MEMTX_OK:
+ if (res->hdr.flags & VFIO_USER_NO_REPLY) {
+ g_free(res);
+ return;
+ }
+ vfio_user_send_reply(proxy, &res->hdr, size);
+ break;
+ case MEMTX_ERROR:
+ vfio_user_send_error(proxy, &res->hdr, EFAULT);
+ break;
+ case MEMTX_DECODE_ERROR:
+ vfio_user_send_error(proxy, &res->hdr, ENODEV);
+ break;
+ case MEMTX_ACCESS_ERROR:
+ vfio_user_send_error(proxy, &res->hdr, EPERM);
+ break;
+ default:
+ error_printf("vfio_user_dma_read unknown error %d\n", r);
+ vfio_user_send_error(vdev->vbasedev.proxy, &res->hdr, EINVAL);
+ }
+}
+
+static void vfio_user_dma_write(VFIOPCIDevice *vdev, VFIOUserDMARW *msg)
+{
+ PCIDevice *pdev = &vdev->pdev;
+ VFIOUserProxy *proxy = vdev->vbasedev.proxy;
+ MemTxResult r;
+
+ if (msg->hdr.size < sizeof(*msg)) {
+ vfio_user_send_error(proxy, &msg->hdr, EINVAL);
+ return;
+ }
+ /* make sure transfer count isn't larger than the message data */
+ if (msg->count > msg->hdr.size - sizeof(*msg)) {
+ vfio_user_send_error(proxy, &msg->hdr, E2BIG);
+ return;
+ }
+
+ r = pci_dma_write(pdev, msg->offset, &msg->data, msg->count);
+
+ switch (r) {
+ case MEMTX_OK:
+ if ((msg->hdr.flags & VFIO_USER_NO_REPLY) == 0) {
+ vfio_user_send_reply(proxy, &msg->hdr, sizeof(msg->hdr));
+ } else {
+ g_free(msg);
+ }
+ break;
+ case MEMTX_ERROR:
+ vfio_user_send_error(proxy, &msg->hdr, EFAULT);
+ break;
+ case MEMTX_DECODE_ERROR:
+ vfio_user_send_error(proxy, &msg->hdr, ENODEV);
+ break;
+ case MEMTX_ACCESS_ERROR:
+ vfio_user_send_error(proxy, &msg->hdr, EPERM);
+ break;
+ default:
+ error_printf("vfio_user_dma_write unknown error %d\n", r);
+ vfio_user_send_error(vdev->vbasedev.proxy, &msg->hdr, EINVAL);
+ }
+}
+
+/*
+ * Incoming request message callback.
+ *
+ * Runs off main loop, so BQL held.
+ */
+static void vfio_user_pci_process_req(void *opaque, VFIOUserMsg *msg)
+{
+ VFIOPCIDevice *vdev = opaque;
+ VFIOUserHdr *hdr = msg->hdr;
+
+ /* no incoming PCI requests pass FDs */
+ if (msg->fds != NULL) {
+ vfio_user_send_error(vdev->vbasedev.proxy, hdr, EINVAL);
+ vfio_user_putfds(msg);
+ return;
+ }
+
+ switch (hdr->command) {
+ case VFIO_USER_DMA_READ:
+ vfio_user_dma_read(vdev, (VFIOUserDMARW *)hdr);
+ break;
+ case VFIO_USER_DMA_WRITE:
+ vfio_user_dma_write(vdev, (VFIOUserDMARW *)hdr);
+ break;
+ default:
+ error_printf("vfio_user_pci_process_req unknown cmd %d\n",
+ hdr->command);
+ vfio_user_send_error(vdev->vbasedev.proxy, hdr, ENOSYS);
+ }
+}
+
+/*
+ * Emulated devices don't use host hot reset
+ */
+static void vfio_user_compute_needs_reset(VFIODevice *vbasedev)
+{
+ vbasedev->needs_reset = false;
+}
+
+static Object *vfio_user_pci_get_object(VFIODevice *vbasedev)
+{
+ VFIOUserPCIDevice *vdev = container_of(vbasedev, VFIOUserPCIDevice,
+ device.vbasedev);
+
+ return OBJECT(vdev);
+}
+
+static VFIODeviceOps vfio_user_pci_ops = {
+ .vfio_compute_needs_reset = vfio_user_compute_needs_reset,
+ .vfio_eoi = vfio_pci_intx_eoi,
+ .vfio_get_object = vfio_user_pci_get_object,
+ /* No live migration support yet. */
+ .vfio_save_config = NULL,
+ .vfio_load_config = NULL,
+};
+
+static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
+{
+ ERRP_GUARD();
+ VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIODevice *vbasedev = &vdev->vbasedev;
+ const char *sock_name;
+ AddressSpace *as;
+ SocketAddress addr;
+ VFIOUserProxy *proxy;
+
+ if (!udev->socket) {
+ error_setg(errp, "No socket specified");
+ error_append_hint(errp, "e.g. -device '{"
+ "\"driver\":\"vfio-user-pci\", "
+ "\"socket\": {\"path\": \"/tmp/vfio-user.sock\", "
+ "\"type\": \"unix\"}'"
+ "}'\n");
+ return;
+ }
+
+ sock_name = udev->socket->u.q_unix.path;
+
+ vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name);
+
+ memset(&addr, 0, sizeof(addr));
+ addr.type = SOCKET_ADDRESS_TYPE_UNIX;
+ addr.u.q_unix.path = (char *)sock_name;
+ proxy = vfio_user_connect_dev(&addr, errp);
+ if (!proxy) {
+ return;
+ }
+ vbasedev->proxy = proxy;
+ vfio_user_set_handler(vbasedev, vfio_user_pci_process_req, vdev);
+
+ vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name);
+
+ if (udev->send_queued) {
+ proxy->flags |= VFIO_PROXY_FORCE_QUEUED;
+ }
+
+ if (udev->no_post) {
+ proxy->flags |= VFIO_PROXY_NO_POST;
+ }
+
+ /* user specified or 5 sec default */
+ proxy->wait_time = udev->wait_time;
+
+ if (!vfio_user_validate_version(proxy, errp)) {
+ goto error;
+ }
+
+ /*
+ * Use socket-based device I/O instead of vfio kernel driver.
+ */
+ vbasedev->io_ops = &vfio_user_device_io_ops_sock;
+
+ /*
+ * vfio-user devices are effectively mdevs (don't use a host iommu).
+ */
+ vbasedev->mdev = true;
+
+ /*
+ * Enable per-region fds.
+ */
+ vbasedev->use_region_fds = true;
+
+ as = pci_device_iommu_address_space(pdev);
+ if (!vfio_device_attach_by_iommu_type(TYPE_VFIO_IOMMU_USER,
+ vbasedev->name, vbasedev,
+ as, errp)) {
+ goto error;
+ }
+
+ if (!vfio_pci_populate_device(vdev, errp)) {
+ goto error;
+ }
+
+ if (!vfio_pci_config_setup(vdev, errp)) {
+ goto error;
+ }
+
+ /*
+ * vfio_pci_config_setup will have registered the device's BARs
+ * and setup any MSIX BARs, so errors after it succeeds must
+ * use out_teardown
+ */
+
+ if (!vfio_pci_add_capabilities(vdev, errp)) {
+ goto out_teardown;
+ }
+
+ if (vdev->msix != NULL) {
+ vfio_user_msix_setup(vdev);
+ }
+
+ if (!vfio_pci_interrupt_setup(vdev, errp)) {
+ goto out_teardown;
+ }
+
+ vfio_pci_register_err_notifier(vdev);
+ vfio_pci_register_req_notifier(vdev);
+
+ return;
+
+out_teardown:
+ vfio_pci_teardown_msi(vdev);
+ vfio_pci_bars_exit(vdev);
+error:
+ error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+ vfio_pci_put_device(vdev);
+}
+
+static void vfio_user_instance_init(Object *obj)
+{
+ PCIDevice *pci_dev = PCI_DEVICE(obj);
+ VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
+ VFIODevice *vbasedev = &vdev->vbasedev;
+
+ device_add_bootindex_property(obj, &vdev->bootindex,
+ "bootindex", NULL,
+ &pci_dev->qdev);
+ vdev->host.domain = ~0U;
+ vdev->host.bus = ~0U;
+ vdev->host.slot = ~0U;
+ vdev->host.function = ~0U;
+
+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_user_pci_ops,
+ DEVICE(vdev), false);
+
+ vdev->nv_gpudirect_clique = 0xFF;
+
+ /*
+ * QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command
+ * line, therefore, no need to wait to realize like other devices.
+ */
+ pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
+}
+
+static void vfio_user_instance_finalize(Object *obj)
+{
+ VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
+ VFIODevice *vbasedev = &vdev->vbasedev;
+
+ if (vdev->msix != NULL) {
+ vfio_user_msix_teardown(vdev);
+ }
+
+ vfio_pci_put_device(vdev);
+
+ if (vbasedev->proxy != NULL) {
+ vfio_user_disconnect(vbasedev->proxy);
+ }
+}
+
+static void vfio_user_pci_reset(DeviceState *dev)
+{
+ VFIOPCIDevice *vdev = VFIO_PCI_BASE(dev);
+ VFIODevice *vbasedev = &vdev->vbasedev;
+
+ vfio_pci_pre_reset(vdev);
+
+ if (vbasedev->reset_works) {
+ vfio_user_device_reset(vbasedev->proxy);
+ }
+
+ vfio_pci_post_reset(vdev);
+}
+
+static const Property vfio_user_pci_dev_properties[] = {
+ DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice,
+ vendor_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice,
+ device_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
+ sub_vendor_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
+ sub_device_id, PCI_ANY_ID),
+ DEFINE_PROP_BOOL("x-send-queued", VFIOUserPCIDevice, send_queued, false),
+ DEFINE_PROP_UINT32("x-msg-timeout", VFIOUserPCIDevice, wait_time, 5000),
+ DEFINE_PROP_BOOL("x-no-posted-writes", VFIOUserPCIDevice, no_post, false),
+};
+
+static void vfio_user_pci_set_socket(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ VFIOUserPCIDevice *udev = VFIO_USER_PCI(obj);
+ bool success;
+
+ if (udev->device.vbasedev.proxy) {
+ error_setg(errp, "Proxy is connected");
+ return;
+ }
+
+ qapi_free_SocketAddress(udev->socket);
+
+ udev->socket = NULL;
+
+ success = visit_type_SocketAddress(v, name, &udev->socket, errp);
+
+ if (!success) {
+ return;
+ }
+
+ if (udev->socket->type != SOCKET_ADDRESS_TYPE_UNIX) {
+ error_setg(errp, "Unsupported socket type %s",
+ SocketAddressType_str(udev->socket->type));
+ qapi_free_SocketAddress(udev->socket);
+ udev->socket = NULL;
+ return;
+ }
+}
+
+static void vfio_user_pci_dev_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
+
+ device_class_set_legacy_reset(dc, vfio_user_pci_reset);
+ device_class_set_props(dc, vfio_user_pci_dev_properties);
+
+ object_class_property_add(klass, "socket", "SocketAddress", NULL,
+ vfio_user_pci_set_socket, NULL, NULL);
+ object_class_property_set_description(klass, "socket",
+ "SocketAddress (UNIX sockets only)");
+
+ dc->desc = "VFIO over socket PCI device assignment";
+ pdc->realize = vfio_user_pci_realize;
+}
+
+static const TypeInfo vfio_user_pci_dev_info = {
+ .name = TYPE_VFIO_USER_PCI,
+ .parent = TYPE_VFIO_PCI_BASE,
+ .instance_size = sizeof(VFIOUserPCIDevice),
+ .class_init = vfio_user_pci_dev_class_init,
+ .instance_init = vfio_user_instance_init,
+ .instance_finalize = vfio_user_instance_finalize,
+};
+
+static void register_vfio_user_dev_type(void)
+{
+ type_register_static(&vfio_user_pci_dev_info);
+}
+
+ type_init(register_vfio_user_dev_type)
diff --git a/hw/vfio-user/protocol.h b/hw/vfio-user/protocol.h
new file mode 100644
index 0000000..3249a4a
--- /dev/null
+++ b/hw/vfio-user/protocol.h
@@ -0,0 +1,242 @@
+#ifndef VFIO_USER_PROTOCOL_H
+#define VFIO_USER_PROTOCOL_H
+
+/*
+ * vfio protocol over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * Each message has a standard header that describes the command
+ * being sent, which is almost always a VFIO ioctl().
+ *
+ * The header may be followed by command-specific data, such as the
+ * region and offset info for read and write commands.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+typedef struct {
+ uint16_t id;
+ uint16_t command;
+ uint32_t size;
+ uint32_t flags;
+ uint32_t error_reply;
+} VFIOUserHdr;
+
+/* VFIOUserHdr commands */
+enum vfio_user_command {
+ VFIO_USER_VERSION = 1,
+ VFIO_USER_DMA_MAP = 2,
+ VFIO_USER_DMA_UNMAP = 3,
+ VFIO_USER_DEVICE_GET_INFO = 4,
+ VFIO_USER_DEVICE_GET_REGION_INFO = 5,
+ VFIO_USER_DEVICE_GET_REGION_IO_FDS = 6,
+ VFIO_USER_DEVICE_GET_IRQ_INFO = 7,
+ VFIO_USER_DEVICE_SET_IRQS = 8,
+ VFIO_USER_REGION_READ = 9,
+ VFIO_USER_REGION_WRITE = 10,
+ VFIO_USER_DMA_READ = 11,
+ VFIO_USER_DMA_WRITE = 12,
+ VFIO_USER_DEVICE_RESET = 13,
+ VFIO_USER_DIRTY_PAGES = 14,
+ VFIO_USER_REGION_WRITE_MULTI = 15,
+ VFIO_USER_MAX,
+};
+
+/* VFIOUserHdr flags */
+#define VFIO_USER_REQUEST 0x0
+#define VFIO_USER_REPLY 0x1
+#define VFIO_USER_TYPE 0xF
+
+#define VFIO_USER_NO_REPLY 0x10
+#define VFIO_USER_ERROR 0x20
+
+
+/*
+ * VFIO_USER_VERSION
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint16_t major;
+ uint16_t minor;
+ char capabilities[];
+} VFIOUserVersion;
+
+#define VFIO_USER_MAJOR_VER 0
+#define VFIO_USER_MINOR_VER 0
+
+#define VFIO_USER_CAP "capabilities"
+
+/* "capabilities" members */
+#define VFIO_USER_CAP_MAX_FDS "max_msg_fds"
+#define VFIO_USER_CAP_MAX_XFER "max_data_xfer_size"
+#define VFIO_USER_CAP_PGSIZES "pgsizes"
+#define VFIO_USER_CAP_MAP_MAX "max_dma_maps"
+#define VFIO_USER_CAP_MIGR "migration"
+#define VFIO_USER_CAP_MULTI "write_multiple"
+
+/* "migration" members */
+#define VFIO_USER_CAP_PGSIZE "pgsize"
+#define VFIO_USER_CAP_MAX_BITMAP "max_bitmap_size"
+
+/*
+ * Max FDs mainly comes into play when a device supports multiple interrupts
+ * where each ones uses an eventfd to inject it into the guest.
+ * It is clamped by the the number of FDs the qio channel supports in a
+ * single message.
+ */
+#define VFIO_USER_DEF_MAX_FDS 8
+#define VFIO_USER_MAX_MAX_FDS 16
+
+/*
+ * Max transfer limits the amount of data in region and DMA messages.
+ * Region R/W will be very small (limited by how much a single instruction
+ * can process) so just use a reasonable limit here.
+ */
+#define VFIO_USER_DEF_MAX_XFER (1024 * 1024)
+#define VFIO_USER_MAX_MAX_XFER (64 * 1024 * 1024)
+
+/*
+ * Default pagesizes supported is 4k.
+ */
+#define VFIO_USER_DEF_PGSIZE 4096
+
+/*
+ * Default max number of DMA mappings is stolen from the
+ * linux kernel "dma_entry_limit"
+ */
+#define VFIO_USER_DEF_MAP_MAX 65535
+
+/*
+ * Default max bitmap size is also take from the linux kernel,
+ * where usage of signed ints limits the VA range to 2^31 bytes.
+ * Dividing that by the number of bits per byte yields 256MB
+ */
+#define VFIO_USER_DEF_MAX_BITMAP (256 * 1024 * 1024)
+
+/*
+ * VFIO_USER_DMA_MAP
+ * imported from struct vfio_iommu_type1_dma_map
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint64_t offset; /* FD offset */
+ uint64_t iova;
+ uint64_t size;
+} VFIOUserDMAMap;
+
+/*
+ * VFIO_USER_DMA_UNMAP
+ * imported from struct vfio_iommu_type1_dma_unmap
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint64_t iova;
+ uint64_t size;
+} VFIOUserDMAUnmap;
+
+/*
+ * VFIO_USER_DEVICE_GET_INFO
+ * imported from struct vfio_device_info
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint32_t num_regions;
+ uint32_t num_irqs;
+} VFIOUserDeviceInfo;
+
+/*
+ * VFIO_USER_DEVICE_GET_REGION_INFO
+ * imported from struct vfio_region_info
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint32_t index;
+ uint32_t cap_offset;
+ uint64_t size;
+ uint64_t offset;
+} VFIOUserRegionInfo;
+
+/*
+ * VFIO_USER_DEVICE_GET_IRQ_INFO
+ * imported from struct vfio_irq_info
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint32_t index;
+ uint32_t count;
+} VFIOUserIRQInfo;
+
+/*
+ * VFIO_USER_DEVICE_SET_IRQS
+ * imported from struct vfio_irq_set
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint32_t index;
+ uint32_t start;
+ uint32_t count;
+} VFIOUserIRQSet;
+
+/*
+ * VFIO_USER_REGION_READ
+ * VFIO_USER_REGION_WRITE
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint64_t offset;
+ uint32_t region;
+ uint32_t count;
+ char data[];
+} VFIOUserRegionRW;
+
+/*
+ * VFIO_USER_DMA_READ
+ * VFIO_USER_DMA_WRITE
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint64_t offset;
+ uint32_t count;
+ char data[];
+} VFIOUserDMARW;
+
+/* imported from struct vfio_bitmap */
+typedef struct {
+ uint64_t pgsize;
+ uint64_t size;
+ char data[];
+} VFIOUserBitmap;
+
+/*
+ * VFIO_USER_REGION_WRITE_MULTI
+ */
+#define VFIO_USER_MULTI_DATA 8
+#define VFIO_USER_MULTI_MAX 200
+
+typedef struct {
+ uint64_t offset;
+ uint32_t region;
+ uint32_t count;
+ char data[VFIO_USER_MULTI_DATA];
+} VFIOUserWROne;
+
+typedef struct {
+ VFIOUserHdr hdr;
+ uint64_t wr_cnt;
+ VFIOUserWROne wrs[VFIO_USER_MULTI_MAX];
+} VFIOUserWRMulti;
+
+#endif /* VFIO_USER_PROTOCOL_H */
diff --git a/hw/vfio-user/proxy.c b/hw/vfio-user/proxy.c
new file mode 100644
index 0000000..c418954
--- /dev/null
+++ b/hw/vfio-user/proxy.c
@@ -0,0 +1,1356 @@
+/*
+ * vfio protocol over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include <sys/ioctl.h>
+
+#include "hw/vfio/vfio-device.h"
+#include "hw/vfio-user/proxy.h"
+#include "hw/vfio-user/trace.h"
+#include "qapi/error.h"
+#include "qobject/qbool.h"
+#include "qobject/qdict.h"
+#include "qobject/qjson.h"
+#include "qobject/qnum.h"
+#include "qemu/error-report.h"
+#include "qemu/lockable.h"
+#include "qemu/main-loop.h"
+#include "qemu/thread.h"
+#include "system/iothread.h"
+
+static IOThread *vfio_user_iothread;
+
+static void vfio_user_shutdown(VFIOUserProxy *proxy);
+static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds);
+static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg);
+
+static void vfio_user_recv(void *opaque);
+static void vfio_user_send(void *opaque);
+static void vfio_user_cb(void *opaque);
+
+static void vfio_user_request(void *opaque);
+
+static inline void vfio_user_set_error(VFIOUserHdr *hdr, uint32_t err)
+{
+ hdr->flags |= VFIO_USER_ERROR;
+ hdr->error_reply = err;
+}
+
+/*
+ * Functions called by main, CPU, or iothread threads
+ */
+
+static void vfio_user_shutdown(VFIOUserProxy *proxy)
+{
+ qio_channel_shutdown(proxy->ioc, QIO_CHANNEL_SHUTDOWN_READ, NULL);
+ qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, NULL,
+ proxy->ctx, NULL, NULL);
+}
+
+/*
+ * Same return values as qio_channel_writev_full():
+ *
+ * QIO_CHANNEL_ERR_BLOCK: *errp not set
+ * -1: *errp will be populated
+ * otherwise: bytes written
+ */
+static ssize_t vfio_user_send_qio(VFIOUserProxy *proxy, VFIOUserMsg *msg,
+ Error **errp)
+{
+ VFIOUserFDs *fds = msg->fds;
+ struct iovec iov = {
+ .iov_base = msg->hdr,
+ .iov_len = msg->hdr->size,
+ };
+ size_t numfds = 0;
+ int *fdp = NULL;
+ ssize_t ret;
+
+ if (fds != NULL && fds->send_fds != 0) {
+ numfds = fds->send_fds;
+ fdp = fds->fds;
+ }
+
+ ret = qio_channel_writev_full(proxy->ioc, &iov, 1, fdp, numfds, 0, errp);
+
+ if (ret == -1) {
+ vfio_user_set_error(msg->hdr, EIO);
+ vfio_user_shutdown(proxy);
+ }
+ trace_vfio_user_send_write(msg->hdr->id, ret);
+
+ return ret;
+}
+
+static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds)
+{
+ VFIOUserMsg *msg;
+
+ msg = QTAILQ_FIRST(&proxy->free);
+ if (msg != NULL) {
+ QTAILQ_REMOVE(&proxy->free, msg, next);
+ } else {
+ msg = g_malloc0(sizeof(*msg));
+ qemu_cond_init(&msg->cv);
+ }
+
+ msg->hdr = hdr;
+ msg->fds = fds;
+ return msg;
+}
+
+/*
+ * Recycle a message list entry to the free list.
+ */
+static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg)
+{
+ if (msg->type == VFIO_MSG_NONE) {
+ error_printf("vfio_user_recycle - freeing free msg\n");
+ return;
+ }
+
+ /* free msg buffer if no one is waiting to consume the reply */
+ if (msg->type == VFIO_MSG_NOWAIT || msg->type == VFIO_MSG_ASYNC) {
+ g_free(msg->hdr);
+ if (msg->fds != NULL) {
+ g_free(msg->fds);
+ }
+ }
+
+ msg->type = VFIO_MSG_NONE;
+ msg->hdr = NULL;
+ msg->fds = NULL;
+ msg->complete = false;
+ msg->pending = false;
+ QTAILQ_INSERT_HEAD(&proxy->free, msg, next);
+}
+
+VFIOUserFDs *vfio_user_getfds(int numfds)
+{
+ VFIOUserFDs *fds = g_malloc0(sizeof(*fds) + (numfds * sizeof(int)));
+
+ fds->fds = (int *)((char *)fds + sizeof(*fds));
+
+ return fds;
+}
+
+/*
+ * Functions only called by iothread
+ */
+
+/*
+ * Process a received message.
+ */
+static void vfio_user_process(VFIOUserProxy *proxy, VFIOUserMsg *msg,
+ bool isreply)
+{
+
+ /*
+ * Replies signal a waiter, if none just check for errors
+ * and free the message buffer.
+ *
+ * Requests get queued for the BH.
+ */
+ if (isreply) {
+ msg->complete = true;
+ if (msg->type == VFIO_MSG_WAIT) {
+ qemu_cond_signal(&msg->cv);
+ } else {
+ if (msg->hdr->flags & VFIO_USER_ERROR) {
+ error_printf("vfio_user_process: error reply on async ");
+ error_printf("request command %x error %s\n",
+ msg->hdr->command,
+ strerror(msg->hdr->error_reply));
+ }
+ /* youngest nowait msg has been ack'd */
+ if (proxy->last_nowait == msg) {
+ proxy->last_nowait = NULL;
+ }
+ vfio_user_recycle(proxy, msg);
+ }
+ } else {
+ QTAILQ_INSERT_TAIL(&proxy->incoming, msg, next);
+ qemu_bh_schedule(proxy->req_bh);
+ }
+}
+
+/*
+ * Complete a partial message read
+ */
+static int vfio_user_complete(VFIOUserProxy *proxy, Error **errp)
+{
+ VFIOUserMsg *msg = proxy->part_recv;
+ size_t msgleft = proxy->recv_left;
+ bool isreply;
+ char *data;
+ int ret;
+
+ data = (char *)msg->hdr + (msg->hdr->size - msgleft);
+ while (msgleft > 0) {
+ ret = qio_channel_read(proxy->ioc, data, msgleft, errp);
+
+ /* error or would block */
+ if (ret <= 0) {
+ /* try for rest on next iternation */
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
+ proxy->recv_left = msgleft;
+ }
+ return ret;
+ }
+ trace_vfio_user_recv_read(msg->hdr->id, ret);
+
+ msgleft -= ret;
+ data += ret;
+ }
+
+ /*
+ * Read complete message, process it.
+ */
+ proxy->part_recv = NULL;
+ proxy->recv_left = 0;
+ isreply = (msg->hdr->flags & VFIO_USER_TYPE) == VFIO_USER_REPLY;
+ vfio_user_process(proxy, msg, isreply);
+
+ /* return positive value */
+ return 1;
+}
+
+/*
+ * Receive and process one incoming message.
+ *
+ * For replies, find matching outgoing request and wake any waiters.
+ * For requests, queue in incoming list and run request BH.
+ */
+static int vfio_user_recv_one(VFIOUserProxy *proxy, Error **errp)
+{
+ VFIOUserMsg *msg = NULL;
+ g_autofree int *fdp = NULL;
+ VFIOUserFDs *reqfds;
+ VFIOUserHdr hdr;
+ struct iovec iov = {
+ .iov_base = &hdr,
+ .iov_len = sizeof(hdr),
+ };
+ bool isreply = false;
+ int i, ret;
+ size_t msgleft, numfds = 0;
+ char *data = NULL;
+ char *buf = NULL;
+
+ /*
+ * Complete any partial reads
+ */
+ if (proxy->part_recv != NULL) {
+ ret = vfio_user_complete(proxy, errp);
+
+ /* still not complete, try later */
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
+ return ret;
+ }
+
+ if (ret <= 0) {
+ goto fatal;
+ }
+ /* else fall into reading another msg */
+ }
+
+ /*
+ * Read header
+ */
+ ret = qio_channel_readv_full(proxy->ioc, &iov, 1, &fdp, &numfds, 0,
+ errp);
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
+ return ret;
+ }
+
+ /* read error or other side closed connection */
+ if (ret <= 0) {
+ goto fatal;
+ }
+
+ if (ret < sizeof(hdr)) {
+ error_setg(errp, "short read of header");
+ goto fatal;
+ }
+
+ /*
+ * Validate header
+ */
+ if (hdr.size < sizeof(VFIOUserHdr)) {
+ error_setg(errp, "bad header size");
+ goto fatal;
+ }
+ switch (hdr.flags & VFIO_USER_TYPE) {
+ case VFIO_USER_REQUEST:
+ isreply = false;
+ break;
+ case VFIO_USER_REPLY:
+ isreply = true;
+ break;
+ default:
+ error_setg(errp, "unknown message type");
+ goto fatal;
+ }
+ trace_vfio_user_recv_hdr(proxy->sockname, hdr.id, hdr.command, hdr.size,
+ hdr.flags);
+
+ /*
+ * For replies, find the matching pending request.
+ * For requests, reap incoming FDs.
+ */
+ if (isreply) {
+ QTAILQ_FOREACH(msg, &proxy->pending, next) {
+ if (hdr.id == msg->id) {
+ break;
+ }
+ }
+ if (msg == NULL) {
+ error_setg(errp, "unexpected reply");
+ goto err;
+ }
+ QTAILQ_REMOVE(&proxy->pending, msg, next);
+
+ /*
+ * Process any received FDs
+ */
+ if (numfds != 0) {
+ if (msg->fds == NULL || msg->fds->recv_fds < numfds) {
+ error_setg(errp, "unexpected FDs");
+ goto err;
+ }
+ msg->fds->recv_fds = numfds;
+ memcpy(msg->fds->fds, fdp, numfds * sizeof(int));
+ }
+ } else {
+ if (numfds != 0) {
+ reqfds = vfio_user_getfds(numfds);
+ memcpy(reqfds->fds, fdp, numfds * sizeof(int));
+ } else {
+ reqfds = NULL;
+ }
+ }
+
+ /*
+ * Put the whole message into a single buffer.
+ */
+ if (isreply) {
+ if (hdr.size > msg->rsize) {
+ error_setg(errp, "reply larger than recv buffer");
+ goto err;
+ }
+ *msg->hdr = hdr;
+ data = (char *)msg->hdr + sizeof(hdr);
+ } else {
+ if (hdr.size > proxy->max_xfer_size + sizeof(VFIOUserDMARW)) {
+ error_setg(errp, "vfio_user_recv request larger than max");
+ goto err;
+ }
+ buf = g_malloc0(hdr.size);
+ memcpy(buf, &hdr, sizeof(hdr));
+ data = buf + sizeof(hdr);
+ msg = vfio_user_getmsg(proxy, (VFIOUserHdr *)buf, reqfds);
+ msg->type = VFIO_MSG_REQ;
+ }
+
+ /*
+ * Read rest of message.
+ */
+ msgleft = hdr.size - sizeof(hdr);
+ while (msgleft > 0) {
+ ret = qio_channel_read(proxy->ioc, data, msgleft, errp);
+
+ /* prepare to complete read on next iternation */
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
+ proxy->part_recv = msg;
+ proxy->recv_left = msgleft;
+ return ret;
+ }
+
+ if (ret <= 0) {
+ goto fatal;
+ }
+ trace_vfio_user_recv_read(hdr.id, ret);
+
+ msgleft -= ret;
+ data += ret;
+ }
+
+ vfio_user_process(proxy, msg, isreply);
+ return 0;
+
+ /*
+ * fatal means the other side closed or we don't trust the stream
+ * err means this message is corrupt
+ */
+fatal:
+ vfio_user_shutdown(proxy);
+ proxy->state = VFIO_PROXY_ERROR;
+
+ /* set error if server side closed */
+ if (ret == 0) {
+ error_setg(errp, "server closed socket");
+ }
+
+err:
+ for (i = 0; i < numfds; i++) {
+ close(fdp[i]);
+ }
+ if (isreply && msg != NULL) {
+ /* force an error to keep sending thread from hanging */
+ vfio_user_set_error(msg->hdr, EINVAL);
+ msg->complete = true;
+ qemu_cond_signal(&msg->cv);
+ }
+ return -1;
+}
+
+static void vfio_user_recv(void *opaque)
+{
+ VFIOUserProxy *proxy = opaque;
+
+ QEMU_LOCK_GUARD(&proxy->lock);
+
+ if (proxy->state == VFIO_PROXY_CONNECTED) {
+ Error *local_err = NULL;
+
+ while (vfio_user_recv_one(proxy, &local_err) == 0) {
+ ;
+ }
+
+ if (local_err != NULL) {
+ error_report_err(local_err);
+ }
+ }
+}
+
+/*
+ * Send a single message, same return semantics as vfio_user_send_qio().
+ *
+ * Sent async messages are freed, others are moved to pending queue.
+ */
+static ssize_t vfio_user_send_one(VFIOUserProxy *proxy, Error **errp)
+{
+ VFIOUserMsg *msg;
+ ssize_t ret;
+
+ msg = QTAILQ_FIRST(&proxy->outgoing);
+ ret = vfio_user_send_qio(proxy, msg, errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ QTAILQ_REMOVE(&proxy->outgoing, msg, next);
+ proxy->num_outgoing--;
+ if (msg->type == VFIO_MSG_ASYNC) {
+ vfio_user_recycle(proxy, msg);
+ } else {
+ QTAILQ_INSERT_TAIL(&proxy->pending, msg, next);
+ msg->pending = true;
+ }
+
+ return ret;
+}
+
+/*
+ * Send messages from outgoing queue when the socket buffer has space.
+ * If we deplete 'outgoing', remove ourselves from the poll list.
+ */
+static void vfio_user_send(void *opaque)
+{
+ VFIOUserProxy *proxy = opaque;
+
+ QEMU_LOCK_GUARD(&proxy->lock);
+
+ if (proxy->state == VFIO_PROXY_CONNECTED) {
+ while (!QTAILQ_EMPTY(&proxy->outgoing)) {
+ Error *local_err = NULL;
+ int ret;
+
+ ret = vfio_user_send_one(proxy, &local_err);
+
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
+ return;
+ } else if (ret == -1) {
+ error_report_err(local_err);
+ return;
+ }
+ }
+ qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx,
+ vfio_user_recv, NULL, NULL, proxy);
+
+ /* queue empty - send any pending multi write msgs */
+ if (proxy->wr_multi != NULL) {
+ vfio_user_flush_multi(proxy);
+ }
+ }
+}
+
+static void vfio_user_cb(void *opaque)
+{
+ VFIOUserProxy *proxy = opaque;
+
+ QEMU_LOCK_GUARD(&proxy->lock);
+
+ proxy->state = VFIO_PROXY_CLOSED;
+ qemu_cond_signal(&proxy->close_cv);
+}
+
+
+/*
+ * Functions called by main or CPU threads
+ */
+
+/*
+ * Process incoming requests.
+ *
+ * The bus-specific callback has the form:
+ * request(opaque, msg)
+ * where 'opaque' was specified in vfio_user_set_handler
+ * and 'msg' is the inbound message.
+ *
+ * The callback is responsible for disposing of the message buffer,
+ * usually by re-using it when calling vfio_send_reply or vfio_send_error,
+ * both of which free their message buffer when the reply is sent.
+ *
+ * If the callback uses a new buffer, it needs to free the old one.
+ */
+static void vfio_user_request(void *opaque)
+{
+ VFIOUserProxy *proxy = opaque;
+ VFIOUserMsgQ new, free;
+ VFIOUserMsg *msg, *m1;
+
+ /* reap all incoming */
+ QTAILQ_INIT(&new);
+ WITH_QEMU_LOCK_GUARD(&proxy->lock) {
+ QTAILQ_FOREACH_SAFE(msg, &proxy->incoming, next, m1) {
+ QTAILQ_REMOVE(&proxy->incoming, msg, next);
+ QTAILQ_INSERT_TAIL(&new, msg, next);
+ }
+ }
+
+ /* process list */
+ QTAILQ_INIT(&free);
+ QTAILQ_FOREACH_SAFE(msg, &new, next, m1) {
+ QTAILQ_REMOVE(&new, msg, next);
+ trace_vfio_user_recv_request(msg->hdr->command);
+ proxy->request(proxy->req_arg, msg);
+ QTAILQ_INSERT_HEAD(&free, msg, next);
+ }
+
+ /* free list */
+ WITH_QEMU_LOCK_GUARD(&proxy->lock) {
+ QTAILQ_FOREACH_SAFE(msg, &free, next, m1) {
+ vfio_user_recycle(proxy, msg);
+ }
+ }
+}
+
+/*
+ * Messages are queued onto the proxy's outgoing list.
+ *
+ * It handles 3 types of messages:
+ *
+ * async messages - replies and posted writes
+ *
+ * There will be no reply from the server, so message
+ * buffers are freed after they're sent.
+ *
+ * nowait messages - map/unmap during address space transactions
+ *
+ * These are also sent async, but a reply is expected so that
+ * vfio_wait_reqs() can wait for the youngest nowait request.
+ * They transition from the outgoing list to the pending list
+ * when sent, and are freed when the reply is received.
+ *
+ * wait messages - all other requests
+ *
+ * The reply to these messages is waited for by their caller.
+ * They also transition from outgoing to pending when sent, but
+ * the message buffer is returned to the caller with the reply
+ * contents. The caller is responsible for freeing these messages.
+ *
+ * As an optimization, if the outgoing list and the socket send
+ * buffer are empty, the message is sent inline instead of being
+ * added to the outgoing list. The rest of the transitions are
+ * unchanged.
+ */
+static bool vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg,
+ Error **errp)
+{
+ int ret;
+
+ /* older coalesced writes go first */
+ if (proxy->wr_multi != NULL &&
+ ((msg->hdr->flags & VFIO_USER_TYPE) == VFIO_USER_REQUEST)) {
+ vfio_user_flush_multi(proxy);
+ }
+
+ /*
+ * Unsent outgoing msgs - add to tail
+ */
+ if (!QTAILQ_EMPTY(&proxy->outgoing)) {
+ QTAILQ_INSERT_TAIL(&proxy->outgoing, msg, next);
+ proxy->num_outgoing++;
+ return true;
+ }
+
+ /*
+ * Try inline - if blocked, queue it and kick send poller
+ */
+ if (proxy->flags & VFIO_PROXY_FORCE_QUEUED) {
+ ret = QIO_CHANNEL_ERR_BLOCK;
+ } else {
+ ret = vfio_user_send_qio(proxy, msg, errp);
+ }
+
+ if (ret == QIO_CHANNEL_ERR_BLOCK) {
+ QTAILQ_INSERT_HEAD(&proxy->outgoing, msg, next);
+ proxy->num_outgoing = 1;
+ qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx,
+ vfio_user_recv, proxy->ctx,
+ vfio_user_send, proxy);
+ return true;
+ }
+ if (ret == -1) {
+ return false;
+ }
+
+ /*
+ * Sent - free async, add others to pending
+ */
+ if (msg->type == VFIO_MSG_ASYNC) {
+ vfio_user_recycle(proxy, msg);
+ } else {
+ QTAILQ_INSERT_TAIL(&proxy->pending, msg, next);
+ msg->pending = true;
+ }
+
+ return true;
+}
+
+/*
+ * nowait send - vfio_wait_reqs() can wait for it later
+ *
+ * Returns false if we did not successfully receive a reply message, in which
+ * case @errp will be populated.
+ *
+ * In either case, ownership of @hdr and @fds is taken, and the caller must
+ * *not* free them itself.
+ */
+bool vfio_user_send_nowait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds, int rsize, Error **errp)
+{
+ VFIOUserMsg *msg;
+
+ QEMU_LOCK_GUARD(&proxy->lock);
+
+ msg = vfio_user_getmsg(proxy, hdr, fds);
+ msg->id = hdr->id;
+ msg->rsize = rsize ? rsize : hdr->size;
+ msg->type = VFIO_MSG_NOWAIT;
+
+ if (hdr->flags & VFIO_USER_NO_REPLY) {
+ error_setg_errno(errp, EINVAL, "%s on NO_REPLY message", __func__);
+ vfio_user_recycle(proxy, msg);
+ return false;
+ }
+
+ if (!vfio_user_send_queued(proxy, msg, errp)) {
+ vfio_user_recycle(proxy, msg);
+ return false;
+ }
+
+ proxy->last_nowait = msg;
+
+ return true;
+}
+
+/*
+ * Returns false if we did not successfully receive a reply message, in which
+ * case @errp will be populated.
+ *
+ * In either case, the caller must free @hdr and @fds if needed.
+ */
+bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds, int rsize, Error **errp)
+{
+ VFIOUserMsg *msg;
+ bool ok = false;
+
+ if (hdr->flags & VFIO_USER_NO_REPLY) {
+ error_setg_errno(errp, EINVAL, "%s on NO_REPLY message", __func__);
+ return false;
+ }
+
+ qemu_mutex_lock(&proxy->lock);
+
+ msg = vfio_user_getmsg(proxy, hdr, fds);
+ msg->id = hdr->id;
+ msg->rsize = rsize ? rsize : hdr->size;
+ msg->type = VFIO_MSG_WAIT;
+
+ ok = vfio_user_send_queued(proxy, msg, errp);
+
+ if (ok) {
+ while (!msg->complete) {
+ if (!qemu_cond_timedwait(&msg->cv, &proxy->lock,
+ proxy->wait_time)) {
+ VFIOUserMsgQ *list;
+
+ list = msg->pending ? &proxy->pending : &proxy->outgoing;
+ QTAILQ_REMOVE(list, msg, next);
+ error_setg_errno(errp, ETIMEDOUT,
+ "timed out waiting for reply");
+ ok = false;
+ break;
+ }
+ }
+ }
+
+ vfio_user_recycle(proxy, msg);
+
+ qemu_mutex_unlock(&proxy->lock);
+
+ return ok;
+}
+
+/*
+ * async send - msg can be queued, but will be freed when sent
+ *
+ * Returns false on failure, in which case @errp will be populated.
+ *
+ * In either case, ownership of @hdr and @fds is taken, and the caller must
+ * *not* free them itself.
+ */
+bool vfio_user_send_async(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds, Error **errp)
+{
+ VFIOUserMsg *msg;
+
+ QEMU_LOCK_GUARD(&proxy->lock);
+
+ msg = vfio_user_getmsg(proxy, hdr, fds);
+ msg->id = hdr->id;
+ msg->rsize = 0;
+ msg->type = VFIO_MSG_ASYNC;
+
+ if (!(hdr->flags & (VFIO_USER_NO_REPLY | VFIO_USER_REPLY))) {
+ error_setg_errno(errp, EINVAL, "%s on sync message", __func__);
+ vfio_user_recycle(proxy, msg);
+ return false;
+ }
+
+ if (!vfio_user_send_queued(proxy, msg, errp)) {
+ vfio_user_recycle(proxy, msg);
+ return false;
+ }
+
+ return true;
+}
+
+void vfio_user_wait_reqs(VFIOUserProxy *proxy)
+{
+ VFIOUserMsg *msg;
+
+ /*
+ * Any DMA map/unmap requests sent in the middle
+ * of a memory region transaction were sent nowait.
+ * Wait for them here.
+ */
+ qemu_mutex_lock(&proxy->lock);
+ if (proxy->last_nowait != NULL) {
+ /*
+ * Change type to WAIT to wait for reply
+ */
+ msg = proxy->last_nowait;
+ msg->type = VFIO_MSG_WAIT;
+ proxy->last_nowait = NULL;
+ while (!msg->complete) {
+ if (!qemu_cond_timedwait(&msg->cv, &proxy->lock,
+ proxy->wait_time)) {
+ VFIOUserMsgQ *list;
+
+ list = msg->pending ? &proxy->pending : &proxy->outgoing;
+ QTAILQ_REMOVE(list, msg, next);
+ error_printf("vfio_wait_reqs - timed out\n");
+ break;
+ }
+ }
+
+ if (msg->hdr->flags & VFIO_USER_ERROR) {
+ error_printf("vfio_user_wait_reqs - error reply on async ");
+ error_printf("request: command %x error %s\n", msg->hdr->command,
+ strerror(msg->hdr->error_reply));
+ }
+
+ /*
+ * Change type back to NOWAIT to free
+ */
+ msg->type = VFIO_MSG_NOWAIT;
+ vfio_user_recycle(proxy, msg);
+ }
+
+ qemu_mutex_unlock(&proxy->lock);
+}
+
+/*
+ * Reply to an incoming request.
+ */
+void vfio_user_send_reply(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int size)
+{
+ Error *local_err = NULL;
+
+ if (size < sizeof(VFIOUserHdr)) {
+ error_printf("%s: size too small", __func__);
+ g_free(hdr);
+ return;
+ }
+
+ /*
+ * convert header to associated reply
+ */
+ hdr->flags = VFIO_USER_REPLY;
+ hdr->size = size;
+
+ if (!vfio_user_send_async(proxy, hdr, NULL, &local_err)) {
+ error_report_err(local_err);
+ }
+}
+
+/*
+ * Send an error reply to an incoming request.
+ */
+void vfio_user_send_error(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int error)
+{
+ Error *local_err = NULL;
+
+ /*
+ * convert header to associated reply
+ */
+ hdr->flags = VFIO_USER_REPLY;
+ hdr->flags |= VFIO_USER_ERROR;
+ hdr->error_reply = error;
+ hdr->size = sizeof(*hdr);
+
+ if (!vfio_user_send_async(proxy, hdr, NULL, &local_err)) {
+ error_report_err(local_err);
+ }
+}
+
+/*
+ * Close FDs erroneously received in an incoming request.
+ */
+void vfio_user_putfds(VFIOUserMsg *msg)
+{
+ VFIOUserFDs *fds = msg->fds;
+ int i;
+
+ for (i = 0; i < fds->recv_fds; i++) {
+ close(fds->fds[i]);
+ }
+ g_free(fds);
+ msg->fds = NULL;
+}
+
+void
+vfio_user_disable_posted_writes(VFIOUserProxy *proxy)
+{
+ WITH_QEMU_LOCK_GUARD(&proxy->lock) {
+ proxy->flags |= VFIO_PROXY_NO_POST;
+ }
+}
+
+static QLIST_HEAD(, VFIOUserProxy) vfio_user_sockets =
+ QLIST_HEAD_INITIALIZER(vfio_user_sockets);
+
+VFIOUserProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp)
+{
+ VFIOUserProxy *proxy;
+ QIOChannelSocket *sioc;
+ QIOChannel *ioc;
+ char *sockname;
+
+ if (addr->type != SOCKET_ADDRESS_TYPE_UNIX) {
+ error_setg(errp, "vfio_user_connect - bad address family");
+ return NULL;
+ }
+ sockname = addr->u.q_unix.path;
+
+ sioc = qio_channel_socket_new();
+ ioc = QIO_CHANNEL(sioc);
+ if (qio_channel_socket_connect_sync(sioc, addr, errp)) {
+ object_unref(OBJECT(ioc));
+ return NULL;
+ }
+ qio_channel_set_blocking(ioc, false, NULL);
+
+ proxy = g_malloc0(sizeof(VFIOUserProxy));
+ proxy->sockname = g_strdup_printf("unix:%s", sockname);
+ proxy->ioc = ioc;
+
+ /* init defaults */
+ proxy->max_xfer_size = VFIO_USER_DEF_MAX_XFER;
+ proxy->max_send_fds = VFIO_USER_DEF_MAX_FDS;
+ proxy->max_dma = VFIO_USER_DEF_MAP_MAX;
+ proxy->dma_pgsizes = VFIO_USER_DEF_PGSIZE;
+ proxy->max_bitmap = VFIO_USER_DEF_MAX_BITMAP;
+ proxy->migr_pgsize = VFIO_USER_DEF_PGSIZE;
+
+ proxy->flags = VFIO_PROXY_CLIENT;
+ proxy->state = VFIO_PROXY_CONNECTED;
+
+ qemu_mutex_init(&proxy->lock);
+ qemu_cond_init(&proxy->close_cv);
+
+ if (vfio_user_iothread == NULL) {
+ vfio_user_iothread = iothread_create("VFIO user", errp);
+ }
+
+ proxy->ctx = iothread_get_aio_context(vfio_user_iothread);
+ proxy->req_bh = qemu_bh_new(vfio_user_request, proxy);
+
+ QTAILQ_INIT(&proxy->outgoing);
+ QTAILQ_INIT(&proxy->incoming);
+ QTAILQ_INIT(&proxy->free);
+ QTAILQ_INIT(&proxy->pending);
+ QLIST_INSERT_HEAD(&vfio_user_sockets, proxy, next);
+
+ return proxy;
+}
+
+void vfio_user_set_handler(VFIODevice *vbasedev,
+ void (*handler)(void *opaque, VFIOUserMsg *msg),
+ void *req_arg)
+{
+ VFIOUserProxy *proxy = vbasedev->proxy;
+
+ proxy->request = handler;
+ proxy->req_arg = req_arg;
+ qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx,
+ vfio_user_recv, NULL, NULL, proxy);
+}
+
+void vfio_user_disconnect(VFIOUserProxy *proxy)
+{
+ VFIOUserMsg *r1, *r2;
+
+ qemu_mutex_lock(&proxy->lock);
+
+ /* our side is quitting */
+ if (proxy->state == VFIO_PROXY_CONNECTED) {
+ vfio_user_shutdown(proxy);
+ if (!QTAILQ_EMPTY(&proxy->pending)) {
+ error_printf("vfio_user_disconnect: outstanding requests\n");
+ }
+ }
+ object_unref(OBJECT(proxy->ioc));
+ proxy->ioc = NULL;
+ qemu_bh_delete(proxy->req_bh);
+ proxy->req_bh = NULL;
+
+ proxy->state = VFIO_PROXY_CLOSING;
+ QTAILQ_FOREACH_SAFE(r1, &proxy->outgoing, next, r2) {
+ qemu_cond_destroy(&r1->cv);
+ QTAILQ_REMOVE(&proxy->outgoing, r1, next);
+ g_free(r1);
+ }
+ QTAILQ_FOREACH_SAFE(r1, &proxy->incoming, next, r2) {
+ qemu_cond_destroy(&r1->cv);
+ QTAILQ_REMOVE(&proxy->incoming, r1, next);
+ g_free(r1);
+ }
+ QTAILQ_FOREACH_SAFE(r1, &proxy->pending, next, r2) {
+ qemu_cond_destroy(&r1->cv);
+ QTAILQ_REMOVE(&proxy->pending, r1, next);
+ g_free(r1);
+ }
+ QTAILQ_FOREACH_SAFE(r1, &proxy->free, next, r2) {
+ qemu_cond_destroy(&r1->cv);
+ QTAILQ_REMOVE(&proxy->free, r1, next);
+ g_free(r1);
+ }
+
+ /*
+ * Make sure the iothread isn't blocking anywhere
+ * with a ref to this proxy by waiting for a BH
+ * handler to run after the proxy fd handlers were
+ * deleted above.
+ */
+ aio_bh_schedule_oneshot(proxy->ctx, vfio_user_cb, proxy);
+ qemu_cond_wait(&proxy->close_cv, &proxy->lock);
+
+ /* we now hold the only ref to proxy */
+ qemu_mutex_unlock(&proxy->lock);
+ qemu_cond_destroy(&proxy->close_cv);
+ qemu_mutex_destroy(&proxy->lock);
+
+ QLIST_REMOVE(proxy, next);
+ if (QLIST_EMPTY(&vfio_user_sockets)) {
+ iothread_destroy(vfio_user_iothread);
+ vfio_user_iothread = NULL;
+ }
+
+ g_free(proxy->sockname);
+ g_free(proxy);
+}
+
+void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd,
+ uint32_t size, uint32_t flags)
+{
+ static uint16_t next_id;
+
+ hdr->id = qatomic_fetch_inc(&next_id);
+ hdr->command = cmd;
+ hdr->size = size;
+ hdr->flags = (flags & ~VFIO_USER_TYPE) | VFIO_USER_REQUEST;
+ hdr->error_reply = 0;
+}
+
+struct cap_entry {
+ const char *name;
+ bool (*check)(VFIOUserProxy *proxy, QObject *qobj, Error **errp);
+};
+
+static bool caps_parse(VFIOUserProxy *proxy, QDict *qdict,
+ struct cap_entry caps[], Error **errp)
+{
+ QObject *qobj;
+ struct cap_entry *p;
+
+ for (p = caps; p->name != NULL; p++) {
+ qobj = qdict_get(qdict, p->name);
+ if (qobj != NULL) {
+ if (!p->check(proxy, qobj, errp)) {
+ return false;
+ }
+ qdict_del(qdict, p->name);
+ }
+ }
+
+ /* warning, for now */
+ if (qdict_size(qdict) != 0) {
+ warn_report("spurious capabilities");
+ }
+ return true;
+}
+
+static bool check_migr_pgsize(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
+{
+ QNum *qn = qobject_to(QNum, qobj);
+ uint64_t pgsize;
+
+ if (qn == NULL || !qnum_get_try_uint(qn, &pgsize)) {
+ error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZE);
+ return false;
+ }
+
+ /* must be larger than default */
+ if (pgsize & (VFIO_USER_DEF_PGSIZE - 1)) {
+ error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsize);
+ return false;
+ }
+
+ proxy->migr_pgsize = pgsize;
+ return true;
+}
+
+static bool check_bitmap(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
+{
+ QNum *qn = qobject_to(QNum, qobj);
+ uint64_t bitmap_size;
+
+ if (qn == NULL || !qnum_get_try_uint(qn, &bitmap_size)) {
+ error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_BITMAP);
+ return false;
+ }
+
+ /* can only lower it */
+ if (bitmap_size > VFIO_USER_DEF_MAX_BITMAP) {
+ error_setg(errp, "%s too large", VFIO_USER_CAP_MAX_BITMAP);
+ return false;
+ }
+
+ proxy->max_bitmap = bitmap_size;
+ return true;
+}
+
+static struct cap_entry caps_migr[] = {
+ { VFIO_USER_CAP_PGSIZE, check_migr_pgsize },
+ { VFIO_USER_CAP_MAX_BITMAP, check_bitmap },
+ { NULL }
+};
+
+static bool check_max_fds(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
+{
+ QNum *qn = qobject_to(QNum, qobj);
+ uint64_t max_send_fds;
+
+ if (qn == NULL || !qnum_get_try_uint(qn, &max_send_fds) ||
+ max_send_fds > VFIO_USER_MAX_MAX_FDS) {
+ error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS);
+ return false;
+ }
+ proxy->max_send_fds = max_send_fds;
+ return true;
+}
+
+static bool check_max_xfer(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
+{
+ QNum *qn = qobject_to(QNum, qobj);
+ uint64_t max_xfer_size;
+
+ if (qn == NULL || !qnum_get_try_uint(qn, &max_xfer_size) ||
+ max_xfer_size > VFIO_USER_MAX_MAX_XFER) {
+ error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_XFER);
+ return false;
+ }
+ proxy->max_xfer_size = max_xfer_size;
+ return true;
+}
+
+static bool check_pgsizes(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
+{
+ QNum *qn = qobject_to(QNum, qobj);
+ uint64_t pgsizes;
+
+ if (qn == NULL || !qnum_get_try_uint(qn, &pgsizes)) {
+ error_setg(errp, "malformed %s", VFIO_USER_CAP_PGSIZES);
+ return false;
+ }
+
+ /* must be larger than default */
+ if (pgsizes & (VFIO_USER_DEF_PGSIZE - 1)) {
+ error_setg(errp, "pgsize 0x%"PRIx64" too small", pgsizes);
+ return false;
+ }
+
+ proxy->dma_pgsizes = pgsizes;
+ return true;
+}
+
+static bool check_max_dma(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
+{
+ QNum *qn = qobject_to(QNum, qobj);
+ uint64_t max_dma;
+
+ if (qn == NULL || !qnum_get_try_uint(qn, &max_dma)) {
+ error_setg(errp, "malformed %s", VFIO_USER_CAP_MAP_MAX);
+ return false;
+ }
+
+ /* can only lower it */
+ if (max_dma > VFIO_USER_DEF_MAP_MAX) {
+ error_setg(errp, "%s too large", VFIO_USER_CAP_MAP_MAX);
+ return false;
+ }
+
+ proxy->max_dma = max_dma;
+ return true;
+}
+
+static bool check_migr(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
+{
+ QDict *qdict = qobject_to(QDict, qobj);
+
+ if (qdict == NULL) {
+ error_setg(errp, "malformed %s", VFIO_USER_CAP_MAX_FDS);
+ return true;
+ }
+ return caps_parse(proxy, qdict, caps_migr, errp);
+}
+
+static bool check_multi(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
+{
+ QBool *qb = qobject_to(QBool, qobj);
+
+ if (qb == NULL) {
+ error_setg(errp, "malformed %s", VFIO_USER_CAP_MULTI);
+ return false;
+ }
+ if (qbool_get_bool(qb)) {
+ proxy->flags |= VFIO_PROXY_USE_MULTI;
+ }
+ return true;
+}
+
+static struct cap_entry caps_cap[] = {
+ { VFIO_USER_CAP_MAX_FDS, check_max_fds },
+ { VFIO_USER_CAP_MAX_XFER, check_max_xfer },
+ { VFIO_USER_CAP_PGSIZES, check_pgsizes },
+ { VFIO_USER_CAP_MAP_MAX, check_max_dma },
+ { VFIO_USER_CAP_MIGR, check_migr },
+ { VFIO_USER_CAP_MULTI, check_multi },
+ { NULL }
+};
+
+static bool check_cap(VFIOUserProxy *proxy, QObject *qobj, Error **errp)
+{
+ QDict *qdict = qobject_to(QDict, qobj);
+
+ if (qdict == NULL) {
+ error_setg(errp, "malformed %s", VFIO_USER_CAP);
+ return false;
+ }
+ return caps_parse(proxy, qdict, caps_cap, errp);
+}
+
+static struct cap_entry ver_0_0[] = {
+ { VFIO_USER_CAP, check_cap },
+ { NULL }
+};
+
+static bool caps_check(VFIOUserProxy *proxy, int minor, const char *caps,
+ Error **errp)
+{
+ QObject *qobj;
+ QDict *qdict;
+ bool ret;
+
+ qobj = qobject_from_json(caps, NULL);
+ if (qobj == NULL) {
+ error_setg(errp, "malformed capabilities %s", caps);
+ return false;
+ }
+ qdict = qobject_to(QDict, qobj);
+ if (qdict == NULL) {
+ error_setg(errp, "capabilities %s not an object", caps);
+ qobject_unref(qobj);
+ return false;
+ }
+ ret = caps_parse(proxy, qdict, ver_0_0, errp);
+
+ qobject_unref(qobj);
+ return ret;
+}
+
+static GString *caps_json(void)
+{
+ QDict *dict = qdict_new();
+ QDict *capdict = qdict_new();
+ QDict *migdict = qdict_new();
+ GString *str;
+
+ qdict_put_int(migdict, VFIO_USER_CAP_PGSIZE, VFIO_USER_DEF_PGSIZE);
+ qdict_put_int(migdict, VFIO_USER_CAP_MAX_BITMAP, VFIO_USER_DEF_MAX_BITMAP);
+ qdict_put_obj(capdict, VFIO_USER_CAP_MIGR, QOBJECT(migdict));
+
+ qdict_put_int(capdict, VFIO_USER_CAP_MAX_FDS, VFIO_USER_MAX_MAX_FDS);
+ qdict_put_int(capdict, VFIO_USER_CAP_MAX_XFER, VFIO_USER_DEF_MAX_XFER);
+ qdict_put_int(capdict, VFIO_USER_CAP_PGSIZES, VFIO_USER_DEF_PGSIZE);
+ qdict_put_int(capdict, VFIO_USER_CAP_MAP_MAX, VFIO_USER_DEF_MAP_MAX);
+ qdict_put_bool(capdict, VFIO_USER_CAP_MULTI, true);
+
+ qdict_put_obj(dict, VFIO_USER_CAP, QOBJECT(capdict));
+
+ str = qobject_to_json(QOBJECT(dict));
+ qobject_unref(dict);
+ return str;
+}
+
+bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp)
+{
+ g_autofree VFIOUserVersion *msgp = NULL;
+ GString *caps;
+ char *reply;
+ int size, caplen;
+
+ caps = caps_json();
+ caplen = caps->len + 1;
+ size = sizeof(*msgp) + caplen;
+ msgp = g_malloc0(size);
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_VERSION, size, 0);
+ msgp->major = VFIO_USER_MAJOR_VER;
+ msgp->minor = VFIO_USER_MINOR_VER;
+ memcpy(&msgp->capabilities, caps->str, caplen);
+ g_string_free(caps, true);
+ trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities);
+
+ if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, errp)) {
+ return false;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ error_setg_errno(errp, msgp->hdr.error_reply, "version reply");
+ return false;
+ }
+
+ if (msgp->major != VFIO_USER_MAJOR_VER ||
+ msgp->minor > VFIO_USER_MINOR_VER) {
+ error_setg(errp, "incompatible server version");
+ return false;
+ }
+
+ reply = msgp->capabilities;
+ if (reply[msgp->hdr.size - sizeof(*msgp) - 1] != '\0') {
+ error_setg(errp, "corrupt version reply");
+ return false;
+ }
+
+ if (!caps_check(proxy, msgp->minor, reply, errp)) {
+ return false;
+ }
+
+ trace_vfio_user_version(msgp->major, msgp->minor, msgp->capabilities);
+ return true;
+}
+
+void vfio_user_flush_multi(VFIOUserProxy *proxy)
+{
+ VFIOUserMsg *msg;
+ VFIOUserWRMulti *wm = proxy->wr_multi;
+ Error *local_err = NULL;
+
+ proxy->wr_multi = NULL;
+
+ /* adjust size for actual # of writes */
+ wm->hdr.size -= (VFIO_USER_MULTI_MAX - wm->wr_cnt) * sizeof(VFIOUserWROne);
+
+ msg = vfio_user_getmsg(proxy, &wm->hdr, NULL);
+ msg->id = wm->hdr.id;
+ msg->rsize = 0;
+ msg->type = VFIO_MSG_ASYNC;
+ trace_vfio_user_wrmulti("flush", wm->wr_cnt);
+
+ if (!vfio_user_send_queued(proxy, msg, &local_err)) {
+ error_report_err(local_err);
+ vfio_user_recycle(proxy, msg);
+ }
+}
+
+void vfio_user_create_multi(VFIOUserProxy *proxy)
+{
+ VFIOUserWRMulti *wm;
+
+ wm = g_malloc0(sizeof(*wm));
+ vfio_user_request_msg(&wm->hdr, VFIO_USER_REGION_WRITE_MULTI,
+ sizeof(*wm), VFIO_USER_NO_REPLY);
+ proxy->wr_multi = wm;
+}
+
+void vfio_user_add_multi(VFIOUserProxy *proxy, uint8_t index,
+ off_t offset, uint32_t count, void *data)
+{
+ VFIOUserWRMulti *wm = proxy->wr_multi;
+ VFIOUserWROne *w1 = &wm->wrs[wm->wr_cnt];
+
+ w1->offset = offset;
+ w1->region = index;
+ w1->count = count;
+ memcpy(&w1->data, data, count);
+
+ wm->wr_cnt++;
+ trace_vfio_user_wrmulti("add", wm->wr_cnt);
+ if (wm->wr_cnt == VFIO_USER_MULTI_MAX ||
+ proxy->num_outgoing < VFIO_USER_OUT_LOW) {
+ vfio_user_flush_multi(proxy);
+ }
+}
diff --git a/hw/vfio-user/proxy.h b/hw/vfio-user/proxy.h
new file mode 100644
index 0000000..61e64a0
--- /dev/null
+++ b/hw/vfio-user/proxy.h
@@ -0,0 +1,135 @@
+#ifndef VFIO_USER_PROXY_H
+#define VFIO_USER_PROXY_H
+
+/*
+ * vfio protocol over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "io/channel.h"
+#include "io/channel-socket.h"
+
+#include "qemu/queue.h"
+#include "qemu/sockets.h"
+#include "qemu/thread.h"
+#include "hw/vfio/vfio-device.h"
+#include "hw/vfio-user/protocol.h"
+
+typedef struct {
+ int send_fds;
+ int recv_fds;
+ int *fds;
+} VFIOUserFDs;
+
+enum msg_type {
+ VFIO_MSG_NONE,
+ VFIO_MSG_ASYNC,
+ VFIO_MSG_WAIT,
+ VFIO_MSG_NOWAIT,
+ VFIO_MSG_REQ,
+};
+
+typedef struct VFIOUserMsg {
+ QTAILQ_ENTRY(VFIOUserMsg) next;
+ VFIOUserHdr *hdr;
+ VFIOUserFDs *fds;
+ uint32_t rsize;
+ uint32_t id;
+ QemuCond cv;
+ bool complete;
+ bool pending;
+ enum msg_type type;
+} VFIOUserMsg;
+
+
+enum proxy_state {
+ VFIO_PROXY_CONNECTED = 1,
+ VFIO_PROXY_ERROR = 2,
+ VFIO_PROXY_CLOSING = 3,
+ VFIO_PROXY_CLOSED = 4,
+};
+
+typedef QTAILQ_HEAD(VFIOUserMsgQ, VFIOUserMsg) VFIOUserMsgQ;
+
+typedef struct VFIOUserProxy {
+ QLIST_ENTRY(VFIOUserProxy) next;
+ char *sockname;
+ struct QIOChannel *ioc;
+ void (*request)(void *opaque, VFIOUserMsg *msg);
+ void *req_arg;
+ uint64_t max_xfer_size;
+ uint64_t max_send_fds;
+ uint64_t max_dma;
+ uint64_t dma_pgsizes;
+ uint64_t max_bitmap;
+ uint64_t migr_pgsize;
+ int flags;
+ uint32_t wait_time;
+ QemuCond close_cv;
+ AioContext *ctx;
+ QEMUBH *req_bh;
+ bool async_ops;
+
+ /*
+ * above only changed when BQL is held
+ * below are protected by per-proxy lock
+ */
+ QemuMutex lock;
+ VFIOUserMsgQ free;
+ VFIOUserMsgQ pending;
+ VFIOUserMsgQ incoming;
+ VFIOUserMsgQ outgoing;
+ VFIOUserMsg *last_nowait;
+ VFIOUserMsg *part_recv;
+ size_t recv_left;
+ VFIOUserWRMulti *wr_multi;
+ int num_outgoing;
+ enum proxy_state state;
+} VFIOUserProxy;
+
+/* VFIOProxy flags */
+#define VFIO_PROXY_CLIENT 0x1
+#define VFIO_PROXY_FORCE_QUEUED 0x4
+#define VFIO_PROXY_NO_POST 0x8
+#define VFIO_PROXY_USE_MULTI 0x16
+
+/* coalescing high and low water marks for VFIOProxy num_outgoing */
+#define VFIO_USER_OUT_HIGH 1024
+#define VFIO_USER_OUT_LOW 128
+
+typedef struct VFIODevice VFIODevice;
+
+VFIOUserProxy *vfio_user_connect_dev(SocketAddress *addr, Error **errp);
+void vfio_user_disconnect(VFIOUserProxy *proxy);
+void vfio_user_set_handler(VFIODevice *vbasedev,
+ void (*handler)(void *opaque, VFIOUserMsg *msg),
+ void *reqarg);
+bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp);
+
+VFIOUserFDs *vfio_user_getfds(int numfds);
+void vfio_user_putfds(VFIOUserMsg *msg);
+
+void vfio_user_disable_posted_writes(VFIOUserProxy *proxy);
+
+void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd,
+ uint32_t size, uint32_t flags);
+void vfio_user_wait_reqs(VFIOUserProxy *proxy);
+bool vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds, int rsize, Error **errp);
+bool vfio_user_send_nowait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds, int rsize, Error **errp);
+bool vfio_user_send_async(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds, Error **errp);
+
+void vfio_user_send_reply(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int size);
+void vfio_user_send_error(VFIOUserProxy *proxy, VFIOUserHdr *hdr, int error);
+
+void vfio_user_flush_multi(VFIOUserProxy *proxy);
+void vfio_user_create_multi(VFIOUserProxy *proxy);
+void vfio_user_add_multi(VFIOUserProxy *proxy, uint8_t index,
+ off_t offset, uint32_t count, void *data);
+
+#endif /* VFIO_USER_PROXY_H */
diff --git a/hw/vfio-user/trace-events b/hw/vfio-user/trace-events
new file mode 100644
index 0000000..abb67f4
--- /dev/null
+++ b/hw/vfio-user/trace-events
@@ -0,0 +1,20 @@
+# See docs/devel/tracing.rst for syntax documentation.
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+# common.c
+vfio_user_recv_hdr(const char *name, uint16_t id, uint16_t cmd, uint32_t size, uint32_t flags) " (%s) id 0x%x cmd 0x%x size 0x%x flags 0x%x"
+vfio_user_recv_read(uint16_t id, int read) " id 0x%x read 0x%x"
+vfio_user_recv_request(uint16_t cmd) " command 0x%x"
+vfio_user_send_write(uint16_t id, int wrote) " id 0x%x wrote 0x%x"
+vfio_user_version(uint16_t major, uint16_t minor, const char *caps) " major %d minor %d caps: %s"
+vfio_user_get_info(uint32_t nregions, uint32_t nirqs) " #regions %d #irqs %d"
+vfio_user_get_region_info(uint32_t index, uint32_t flags, uint64_t size) " index %d flags 0x%x size 0x%"PRIx64
+vfio_user_region_rw(uint32_t region, uint64_t off, uint32_t count) " region %d offset 0x%"PRIx64" count %d"
+vfio_user_get_irq_info(uint32_t index, uint32_t flags, uint32_t count) " index %d flags 0x%x count %d"
+vfio_user_set_irqs(uint32_t index, uint32_t start, uint32_t count, uint32_t flags) " index %d start %d count %d flags 0x%x"
+vfio_user_wrmulti(const char *s, uint64_t wr_cnt) " %s count 0x%"PRIx64
+
+# container.c
+vfio_user_dma_map(uint64_t iova, uint64_t size, uint64_t off, uint32_t flags, bool async_ops) " iova 0x%"PRIx64" size 0x%"PRIx64" off 0x%"PRIx64" flags 0x%x async_ops %d"
+vfio_user_dma_unmap(uint64_t iova, uint64_t size, uint32_t flags, bool async_ops) " iova 0x%"PRIx64" size 0x%"PRIx64" flags 0x%x async_ops %d"
diff --git a/hw/vfio-user/trace.h b/hw/vfio-user/trace.h
new file mode 100644
index 0000000..9cf02d9
--- /dev/null
+++ b/hw/vfio-user/trace.h
@@ -0,0 +1,4 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "trace/trace-hw_vfio_user.h"
diff --git a/hw/vfio/Kconfig b/hw/vfio/Kconfig
index 7cdba05..91d9023 100644
--- a/hw/vfio/Kconfig
+++ b/hw/vfio/Kconfig
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
config VFIO
bool
depends on LINUX
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index 785c0a0..7719f24 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -10,6 +10,7 @@
* directory.
*/
+#include <stdbool.h>
#include "qemu/osdep.h"
#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
#include <linux/vfio.h>
@@ -18,8 +19,10 @@
#include "hw/vfio/vfio-device.h"
#include "system/iommufd.h"
#include "hw/s390x/ap-device.h"
+#include "hw/s390x/css.h"
#include "qemu/error-report.h"
#include "qemu/event_notifier.h"
+#include "qemu/lockable.h"
#include "qemu/main-loop.h"
#include "qemu/module.h"
#include "qemu/option.h"
@@ -37,8 +40,23 @@ struct VFIOAPDevice {
APDevice apdev;
VFIODevice vdev;
EventNotifier req_notifier;
+ EventNotifier cfg_notifier;
};
+typedef struct APConfigChgEvent {
+ QTAILQ_ENTRY(APConfigChgEvent) next;
+} APConfigChgEvent;
+
+static QTAILQ_HEAD(, APConfigChgEvent) cfg_chg_events =
+ QTAILQ_HEAD_INITIALIZER(cfg_chg_events);
+
+static QemuMutex cfg_chg_events_lock;
+
+static void __attribute__((constructor)) vfio_ap_global_init(void)
+{
+ qemu_mutex_init(&cfg_chg_events_lock);
+}
+
OBJECT_DECLARE_SIMPLE_TYPE(VFIOAPDevice, VFIO_AP_DEVICE)
static void vfio_ap_compute_needs_reset(VFIODevice *vdev)
@@ -70,6 +88,57 @@ static void vfio_ap_req_notifier_handler(void *opaque)
}
}
+static void vfio_ap_cfg_chg_notifier_handler(void *opaque)
+{
+ APConfigChgEvent *cfg_chg_event;
+ VFIOAPDevice *vapdev = opaque;
+
+ if (!event_notifier_test_and_clear(&vapdev->cfg_notifier)) {
+ return;
+ }
+
+ cfg_chg_event = g_new0(APConfigChgEvent, 1);
+
+ WITH_QEMU_LOCK_GUARD(&cfg_chg_events_lock) {
+ QTAILQ_INSERT_TAIL(&cfg_chg_events, cfg_chg_event, next);
+ }
+
+ css_generate_css_crws(0);
+
+}
+
+int ap_chsc_sei_nt0_get_event(void *res)
+{
+ ChscSeiNt0Res *nt0_res = (ChscSeiNt0Res *)res;
+ APConfigChgEvent *cfg_chg_event;
+
+ WITH_QEMU_LOCK_GUARD(&cfg_chg_events_lock) {
+ if (QTAILQ_EMPTY(&cfg_chg_events)) {
+ return EVENT_INFORMATION_NOT_STORED;
+ }
+
+ cfg_chg_event = QTAILQ_FIRST(&cfg_chg_events);
+ QTAILQ_REMOVE(&cfg_chg_events, cfg_chg_event, next);
+ }
+
+ memset(nt0_res, 0, sizeof(*nt0_res));
+ g_free(cfg_chg_event);
+ nt0_res->flags |= PENDING_EVENT_INFO_BITMASK;
+ nt0_res->length = sizeof(ChscSeiNt0Res);
+ nt0_res->code = NT0_RES_RESPONSE_CODE;
+ nt0_res->nt = NT0_RES_NT_DEFAULT;
+ nt0_res->rs = NT0_RES_RS_AP_CHANGE;
+ nt0_res->cc = NT0_RES_CC_AP_CHANGE;
+
+ return EVENT_INFORMATION_STORED;
+}
+
+bool ap_chsc_sei_nt0_have_event(void)
+{
+ QEMU_LOCK_GUARD(&cfg_chg_events_lock);
+ return !QTAILQ_EMPTY(&cfg_chg_events);
+}
+
static bool vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev,
unsigned int irq, Error **errp)
{
@@ -85,6 +154,10 @@ static bool vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev,
notifier = &vapdev->req_notifier;
fd_read = vfio_ap_req_notifier_handler;
break;
+ case VFIO_AP_CFG_CHG_IRQ_INDEX:
+ notifier = &vapdev->cfg_notifier;
+ fd_read = vfio_ap_cfg_chg_notifier_handler;
+ break;
default:
error_setg(errp, "vfio: Unsupported device irq(%d)", irq);
return false;
@@ -137,6 +210,9 @@ static void vfio_ap_unregister_irq_notifier(VFIOAPDevice *vapdev,
case VFIO_AP_REQ_IRQ_INDEX:
notifier = &vapdev->req_notifier;
break;
+ case VFIO_AP_CFG_CHG_IRQ_INDEX:
+ notifier = &vapdev->cfg_notifier;
+ break;
default:
error_report("vfio: Unsupported device irq(%d)", irq);
return;
@@ -176,11 +252,20 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp)
warn_report_err(err);
}
+ if (!vfio_ap_register_irq_notifier(vapdev, VFIO_AP_CFG_CHG_IRQ_INDEX, &err))
+ {
+ /*
+ * Report this error, but do not make it a failing condition.
+ * Lack of this IRQ in the host does not prevent normal operation.
+ */
+ warn_report_err(err);
+ }
+
return;
error:
error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
- g_free(vbasedev->name);
+ vfio_device_free_name(vbasedev);
}
static void vfio_ap_unrealize(DeviceState *dev)
@@ -188,8 +273,9 @@ static void vfio_ap_unrealize(DeviceState *dev)
VFIOAPDevice *vapdev = VFIO_AP_DEVICE(dev);
vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX);
+ vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_CFG_CHG_IRQ_INDEX);
vfio_device_detach(&vapdev->vdev);
- g_free(vapdev->vdev.name);
+ vfio_device_free_name(&vapdev->vdev);
}
static const Property vfio_ap_properties[] = {
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index cea9d6e..9560b8d 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -619,7 +619,7 @@ out_io_notifier_err:
out_region_err:
vfio_device_detach(vbasedev);
out_attach_dev_err:
- g_free(vbasedev->name);
+ vfio_device_free_name(vbasedev);
out_unrealize:
if (cdc->unrealize) {
cdc->unrealize(cdev);
@@ -637,7 +637,7 @@ static void vfio_ccw_unrealize(DeviceState *dev)
vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX);
vfio_ccw_put_region(vcdev);
vfio_device_detach(&vcdev->vdev);
- g_free(vcdev->vdev.name);
+ vfio_device_free_name(&vcdev->vdev);
if (cdc->unrealize) {
cdc->unrealize(cdev);
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index 1c6ca94..5630497 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -75,12 +75,21 @@ void vfio_address_space_insert(VFIOAddressSpace *space,
int vfio_container_dma_map(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- void *vaddr, bool readonly)
+ void *vaddr, bool readonly, MemoryRegion *mr)
{
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ RAMBlock *rb = mr->ram_block;
+ int mfd = rb ? qemu_ram_get_fd(rb) : -1;
+ if (mfd >= 0 && vioc->dma_map_file) {
+ unsigned long start = vaddr - qemu_ram_get_host_addr(rb);
+ unsigned long offset = qemu_ram_get_fd_offset(rb);
+
+ return vioc->dma_map_file(bcontainer, iova, size, mfd, start + offset,
+ readonly);
+ }
g_assert(vioc->dma_map);
- return vioc->dma_map(bcontainer, iova, size, vaddr, readonly);
+ return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr);
}
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index a9f0dba..3e13fea 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -31,10 +31,11 @@
#include "system/reset.h"
#include "trace.h"
#include "qapi/error.h"
+#include "migration/cpr.h"
+#include "migration/blocker.h"
#include "pci.h"
#include "hw/vfio/vfio-container.h"
#include "vfio-helpers.h"
-#include "vfio-cpr.h"
#include "vfio-listener.h"
#define TYPE_HOST_IOMMU_DEVICE_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio"
@@ -135,6 +136,8 @@ static int vfio_legacy_dma_unmap_one(const VFIOContainerBase *bcontainer,
int ret;
Error *local_err = NULL;
+ g_assert(!cpr_is_incoming());
+
if (iotlb && vfio_container_dirty_tracking_is_started(bcontainer)) {
if (!vfio_container_devices_dirty_tracking_is_supported(bcontainer) &&
bcontainer->dirty_pages_supported) {
@@ -207,7 +210,8 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
}
static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
- ram_addr_t size, void *vaddr, bool readonly)
+ ram_addr_t size, void *vaddr, bool readonly,
+ MemoryRegion *mr)
{
const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
bcontainer);
@@ -425,7 +429,12 @@ static VFIOContainer *vfio_create_container(int fd, VFIOGroup *group,
return NULL;
}
- if (!vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
+ /*
+ * During CPR, just set the container type and skip the ioctls, as the
+ * container and group are already configured in the kernel.
+ */
+ if (!cpr_is_incoming() &&
+ !vfio_set_iommu(fd, group->fd, &iommu_type, errp)) {
return NULL;
}
@@ -592,6 +601,11 @@ static bool vfio_container_group_add(VFIOContainer *container, VFIOGroup *group,
group->container = container;
QLIST_INSERT_HEAD(&container->group_list, group, container_next);
vfio_group_add_kvm_device(group);
+ /*
+ * Remember the container fd for each group, so we can attach to the same
+ * container after CPR.
+ */
+ cpr_resave_fd("vfio_container_for_group", group->groupid, container->fd);
return true;
}
@@ -601,6 +615,7 @@ static void vfio_container_group_del(VFIOContainer *container, VFIOGroup *group)
group->container = NULL;
vfio_group_del_kvm_device(group);
vfio_ram_block_discard_disable(container, false);
+ cpr_delete_fd("vfio_container_for_group", group->groupid);
}
static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as,
@@ -615,17 +630,34 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as,
bool group_was_added = false;
space = vfio_address_space_get(as);
+ fd = cpr_find_fd("vfio_container_for_group", group->groupid);
- QLIST_FOREACH(bcontainer, &space->containers, next) {
- container = container_of(bcontainer, VFIOContainer, bcontainer);
- if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
- return vfio_container_group_add(container, group, errp);
+ if (!cpr_is_incoming()) {
+ QLIST_FOREACH(bcontainer, &space->containers, next) {
+ container = container_of(bcontainer, VFIOContainer, bcontainer);
+ if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
+ return vfio_container_group_add(container, group, errp);
+ }
}
- }
- fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
- if (fd < 0) {
- goto fail;
+ fd = qemu_open("/dev/vfio/vfio", O_RDWR, errp);
+ if (fd < 0) {
+ goto fail;
+ }
+ } else {
+ /*
+ * For incoming CPR, the group is already attached in the kernel.
+ * If a container with matching fd is found, then update the
+ * userland group list and return. If not, then after the loop,
+ * create the container struct and group list.
+ */
+ QLIST_FOREACH(bcontainer, &space->containers, next) {
+ container = container_of(bcontainer, VFIOContainer, bcontainer);
+
+ if (vfio_cpr_container_match(container, group, fd)) {
+ return vfio_container_group_add(container, group, errp);
+ }
+ }
}
ret = ioctl(fd, VFIO_GET_API_VERSION);
@@ -642,7 +674,7 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as,
new_container = true;
bcontainer = &container->bcontainer;
- if (!vfio_cpr_register_container(bcontainer, errp)) {
+ if (!vfio_legacy_cpr_register_container(container, errp)) {
goto fail;
}
@@ -660,8 +692,17 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as,
}
group_was_added = true;
- if (!vfio_listener_register(bcontainer, errp)) {
- goto fail;
+ /*
+ * If CPR, register the listener later, after all state that may
+ * affect regions and mapping boundaries has been cpr load'ed. Later,
+ * the listener will invoke its callback on each flat section and call
+ * dma_map to supply the new vaddr, and the calls will match the mappings
+ * remembered by the kernel.
+ */
+ if (!cpr_is_incoming()) {
+ if (!vfio_listener_register(bcontainer, errp)) {
+ goto fail;
+ }
}
bcontainer->initialized = true;
@@ -669,7 +710,9 @@ static bool vfio_container_connect(VFIOGroup *group, AddressSpace *as,
return true;
fail:
- vfio_listener_unregister(bcontainer);
+ if (new_container) {
+ vfio_listener_unregister(bcontainer);
+ }
if (group_was_added) {
vfio_container_group_del(container, group);
@@ -678,7 +721,7 @@ fail:
vioc->release(bcontainer);
}
if (new_container) {
- vfio_cpr_unregister_container(bcontainer);
+ vfio_legacy_cpr_unregister_container(container);
object_unref(container);
}
if (fd >= 0) {
@@ -697,6 +740,7 @@ static void vfio_container_disconnect(VFIOGroup *group)
QLIST_REMOVE(group, container_next);
group->container = NULL;
+ cpr_delete_fd("vfio_container_for_group", group->groupid);
/*
* Explicitly release the listener first before unset container,
@@ -719,7 +763,7 @@ static void vfio_container_disconnect(VFIOGroup *group)
VFIOAddressSpace *space = bcontainer->space;
trace_vfio_container_disconnect(container->fd);
- vfio_cpr_unregister_container(bcontainer);
+ vfio_legacy_cpr_unregister_container(container);
close(container->fd);
object_unref(container);
@@ -750,7 +794,7 @@ static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp)
group = g_malloc0(sizeof(*group));
snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
- group->fd = qemu_open(path, O_RDWR, errp);
+ group->fd = cpr_open_fd(path, O_RDWR, "vfio_group", groupid, errp);
if (group->fd < 0) {
goto free_group_exit;
}
@@ -782,6 +826,7 @@ static VFIOGroup *vfio_group_get(int groupid, AddressSpace *as, Error **errp)
return group;
close_fd_exit:
+ cpr_delete_fd("vfio_group", groupid);
close(group->fd);
free_group_exit:
@@ -803,6 +848,7 @@ static void vfio_group_put(VFIOGroup *group)
vfio_container_disconnect(group);
QLIST_REMOVE(group, next);
trace_vfio_group_put(group->fd);
+ cpr_delete_fd("vfio_group", group->groupid);
close(group->fd);
g_free(group);
}
@@ -813,7 +859,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name,
g_autofree struct vfio_device_info *info = NULL;
int fd;
- fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
+ fd = vfio_cpr_group_get_device_fd(group->fd, name);
if (fd < 0) {
error_setg_errno(errp, errno, "error getting device from group %d",
group->groupid);
@@ -826,8 +872,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name,
info = vfio_get_device_info(fd);
if (!info) {
error_setg_errno(errp, errno, "error getting device info");
- close(fd);
- return false;
+ goto fail;
}
/*
@@ -841,8 +886,7 @@ static bool vfio_device_get(VFIOGroup *group, const char *name,
if (!QLIST_EMPTY(&group->device_list)) {
error_setg(errp, "Inconsistent setting of support for discarding "
"RAM (e.g., balloon) within group");
- close(fd);
- return false;
+ goto fail;
}
if (!group->ram_block_discard_allowed) {
@@ -860,6 +904,11 @@ static bool vfio_device_get(VFIOGroup *group, const char *name,
trace_vfio_device_get(name, info->flags, info->num_regions, info->num_irqs);
return true;
+
+fail:
+ close(fd);
+ cpr_delete_fd(name, 0);
+ return false;
}
static void vfio_device_put(VFIODevice *vbasedev)
@@ -870,6 +919,7 @@ static void vfio_device_put(VFIODevice *vbasedev)
QLIST_REMOVE(vbasedev, next);
vbasedev->group = NULL;
trace_vfio_device_put(vbasedev->fd);
+ cpr_delete_fd(vbasedev->name, 0);
close(vbasedev->fd);
}
@@ -939,8 +989,19 @@ static bool vfio_legacy_attach_device(const char *name, VFIODevice *vbasedev,
goto device_put_exit;
}
+ if (vbasedev->mdev) {
+ error_setg(&vbasedev->cpr.mdev_blocker,
+ "CPR does not support vfio mdev %s", vbasedev->name);
+ if (migrate_add_blocker_modes(&vbasedev->cpr.mdev_blocker, errp,
+ MIG_MODE_CPR_TRANSFER, -1) < 0) {
+ goto hiod_unref_exit;
+ }
+ }
+
return true;
+hiod_unref_exit:
+ object_unref(vbasedev->hiod);
device_put_exit:
vfio_device_put(vbasedev);
group_put_exit:
@@ -956,6 +1017,7 @@ static void vfio_legacy_detach_device(VFIODevice *vbasedev)
vfio_device_unprepare(vbasedev);
+ migrate_del_blocker(&vbasedev->cpr.mdev_blocker);
object_unref(vbasedev->hiod);
vfio_device_put(vbasedev);
vfio_group_put(group);
diff --git a/hw/vfio/cpr-iommufd.c b/hw/vfio/cpr-iommufd.c
new file mode 100644
index 0000000..148a06d
--- /dev/null
+++ b/hw/vfio/cpr-iommufd.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2024-2025 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "hw/vfio/vfio-cpr.h"
+#include "hw/vfio/vfio-device.h"
+#include "migration/blocker.h"
+#include "migration/cpr.h"
+#include "migration/migration.h"
+#include "migration/vmstate.h"
+#include "system/iommufd.h"
+#include "vfio-iommufd.h"
+#include "trace.h"
+
+typedef struct CprVFIODevice {
+ char *name;
+ unsigned int namelen;
+ uint32_t ioas_id;
+ int devid;
+ uint32_t hwpt_id;
+ QLIST_ENTRY(CprVFIODevice) next;
+} CprVFIODevice;
+
+static const VMStateDescription vmstate_cpr_vfio_device = {
+ .name = "cpr vfio device",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(namelen, CprVFIODevice),
+ VMSTATE_VBUFFER_ALLOC_UINT32(name, CprVFIODevice, 0, NULL, namelen),
+ VMSTATE_INT32(devid, CprVFIODevice),
+ VMSTATE_UINT32(ioas_id, CprVFIODevice),
+ VMSTATE_UINT32(hwpt_id, CprVFIODevice),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+const VMStateDescription vmstate_cpr_vfio_devices = {
+ .name = CPR_STATE "/vfio devices",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (const VMStateField[]){
+ VMSTATE_QLIST_V(vfio_devices, CprState, 1, vmstate_cpr_vfio_device,
+ CprVFIODevice, next),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void vfio_cpr_save_device(VFIODevice *vbasedev)
+{
+ CprVFIODevice *elem = g_new0(CprVFIODevice, 1);
+
+ elem->name = g_strdup(vbasedev->name);
+ elem->namelen = strlen(vbasedev->name) + 1;
+ elem->ioas_id = vbasedev->cpr.ioas_id;
+ elem->devid = vbasedev->devid;
+ elem->hwpt_id = vbasedev->cpr.hwpt_id;
+ QLIST_INSERT_HEAD(&cpr_state.vfio_devices, elem, next);
+}
+
+static CprVFIODevice *find_device(const char *name)
+{
+ CprVFIODeviceList *head = &cpr_state.vfio_devices;
+ CprVFIODevice *elem;
+
+ QLIST_FOREACH(elem, head, next) {
+ if (!strcmp(elem->name, name)) {
+ return elem;
+ }
+ }
+ return NULL;
+}
+
+static void vfio_cpr_delete_device(const char *name)
+{
+ CprVFIODevice *elem = find_device(name);
+
+ if (elem) {
+ QLIST_REMOVE(elem, next);
+ g_free(elem->name);
+ g_free(elem);
+ }
+}
+
+static bool vfio_cpr_find_device(VFIODevice *vbasedev)
+{
+ CprVFIODevice *elem = find_device(vbasedev->name);
+
+ if (elem) {
+ vbasedev->cpr.ioas_id = elem->ioas_id;
+ vbasedev->devid = elem->devid;
+ vbasedev->cpr.hwpt_id = elem->hwpt_id;
+ trace_vfio_cpr_find_device(elem->ioas_id, elem->devid, elem->hwpt_id);
+ return true;
+ }
+ return false;
+}
+
+static bool vfio_cpr_supported(IOMMUFDBackend *be, Error **errp)
+{
+ if (!iommufd_change_process_capable(be)) {
+ if (errp) {
+ error_setg(errp, "vfio iommufd backend does not support "
+ "IOMMU_IOAS_CHANGE_PROCESS");
+ }
+ return false;
+ }
+ return true;
+}
+
+static int iommufd_cpr_pre_save(void *opaque)
+{
+ IOMMUFDBackend *be = opaque;
+
+ /*
+ * The process has not changed yet, but proactively try the ioctl,
+ * and it will fail if any DMA mappings are not supported.
+ */
+ if (!iommufd_change_process_capable(be)) {
+ error_report("some memory regions do not support "
+ "IOMMU_IOAS_CHANGE_PROCESS");
+ return -1;
+ }
+ return 0;
+}
+
+static int iommufd_cpr_post_load(void *opaque, int version_id)
+{
+ IOMMUFDBackend *be = opaque;
+ Error *local_err = NULL;
+
+ if (!iommufd_change_process(be, &local_err)) {
+ error_report_err(local_err);
+ return -1;
+ }
+ return 0;
+}
+
+static const VMStateDescription iommufd_cpr_vmstate = {
+ .name = "iommufd",
+ .version_id = 0,
+ .minimum_version_id = 0,
+ .pre_save = iommufd_cpr_pre_save,
+ .post_load = iommufd_cpr_post_load,
+ .needed = cpr_incoming_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+bool vfio_iommufd_cpr_register_iommufd(IOMMUFDBackend *be, Error **errp)
+{
+ Error **cpr_blocker = &be->cpr_blocker;
+
+ if (!vfio_cpr_supported(be, cpr_blocker)) {
+ return migrate_add_blocker_modes(cpr_blocker, errp,
+ MIG_MODE_CPR_TRANSFER, -1) == 0;
+ }
+
+ vmstate_register(NULL, -1, &iommufd_cpr_vmstate, be);
+
+ return true;
+}
+
+void vfio_iommufd_cpr_unregister_iommufd(IOMMUFDBackend *be)
+{
+ vmstate_unregister(NULL, &iommufd_cpr_vmstate, be);
+ migrate_del_blocker(&be->cpr_blocker);
+}
+
+bool vfio_iommufd_cpr_register_container(VFIOIOMMUFDContainer *container,
+ Error **errp)
+{
+ VFIOContainerBase *bcontainer = &container->bcontainer;
+
+ migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier,
+ vfio_cpr_reboot_notifier,
+ MIG_MODE_CPR_REBOOT);
+
+ vfio_cpr_add_kvm_notifier();
+
+ return true;
+}
+
+void vfio_iommufd_cpr_unregister_container(VFIOIOMMUFDContainer *container)
+{
+ VFIOContainerBase *bcontainer = &container->bcontainer;
+
+ migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
+}
+
+void vfio_iommufd_cpr_register_device(VFIODevice *vbasedev)
+{
+ if (!cpr_is_incoming()) {
+ /*
+ * Beware fd may have already been saved by vfio_device_set_fd,
+ * so call resave to avoid a duplicate entry.
+ */
+ cpr_resave_fd(vbasedev->name, 0, vbasedev->fd);
+ vfio_cpr_save_device(vbasedev);
+ }
+}
+
+void vfio_iommufd_cpr_unregister_device(VFIODevice *vbasedev)
+{
+ cpr_delete_fd(vbasedev->name, 0);
+ vfio_cpr_delete_device(vbasedev->name);
+}
+
+void vfio_cpr_load_device(VFIODevice *vbasedev)
+{
+ if (cpr_is_incoming()) {
+ bool ret = vfio_cpr_find_device(vbasedev);
+ g_assert(ret);
+
+ if (vbasedev->fd < 0) {
+ vbasedev->fd = cpr_find_fd(vbasedev->name, 0);
+ }
+ }
+}
diff --git a/hw/vfio/cpr-legacy.c b/hw/vfio/cpr-legacy.c
new file mode 100644
index 0000000..553b203
--- /dev/null
+++ b/hw/vfio/cpr-legacy.c
@@ -0,0 +1,284 @@
+/*
+ * Copyright (c) 2021-2025 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <sys/ioctl.h>
+#include <linux/vfio.h>
+#include "qemu/osdep.h"
+#include "hw/vfio/vfio-container.h"
+#include "hw/vfio/vfio-device.h"
+#include "hw/vfio/vfio-listener.h"
+#include "migration/blocker.h"
+#include "migration/cpr.h"
+#include "migration/migration.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+
+static bool vfio_dma_unmap_vaddr_all(VFIOContainer *container, Error **errp)
+{
+ struct vfio_iommu_type1_dma_unmap unmap = {
+ .argsz = sizeof(unmap),
+ .flags = VFIO_DMA_UNMAP_FLAG_VADDR | VFIO_DMA_UNMAP_FLAG_ALL,
+ .iova = 0,
+ .size = 0,
+ };
+ if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
+ error_setg_errno(errp, errno, "vfio_dma_unmap_vaddr_all");
+ return false;
+ }
+ container->cpr.vaddr_unmapped = true;
+ return true;
+}
+
+/*
+ * Set the new @vaddr for any mappings registered during cpr load.
+ * The incoming state is cleared thereafter.
+ */
+static int vfio_legacy_cpr_dma_map(const VFIOContainerBase *bcontainer,
+ hwaddr iova, ram_addr_t size, void *vaddr,
+ bool readonly, MemoryRegion *mr)
+{
+ const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
+ bcontainer);
+ struct vfio_iommu_type1_dma_map map = {
+ .argsz = sizeof(map),
+ .flags = VFIO_DMA_MAP_FLAG_VADDR,
+ .vaddr = (__u64)(uintptr_t)vaddr,
+ .iova = iova,
+ .size = size,
+ };
+
+ g_assert(cpr_is_incoming());
+
+ if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map)) {
+ return -errno;
+ }
+
+ return 0;
+}
+
+static void vfio_region_remap(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ VFIOContainer *container = container_of(listener, VFIOContainer,
+ cpr.remap_listener);
+ vfio_container_region_add(&container->bcontainer, section, true);
+}
+
+static bool vfio_cpr_supported(VFIOContainer *container, Error **errp)
+{
+ if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UPDATE_VADDR)) {
+ error_setg(errp, "VFIO container does not support VFIO_UPDATE_VADDR");
+ return false;
+
+ } else if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UNMAP_ALL)) {
+ error_setg(errp, "VFIO container does not support VFIO_UNMAP_ALL");
+ return false;
+
+ } else {
+ return true;
+ }
+}
+
+static int vfio_container_pre_save(void *opaque)
+{
+ VFIOContainer *container = opaque;
+ Error *local_err = NULL;
+
+ if (!vfio_dma_unmap_vaddr_all(container, &local_err)) {
+ error_report_err(local_err);
+ return -1;
+ }
+ return 0;
+}
+
+static int vfio_container_post_load(void *opaque, int version_id)
+{
+ VFIOContainer *container = opaque;
+ VFIOContainerBase *bcontainer = &container->bcontainer;
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ dma_map_fn saved_dma_map = vioc->dma_map;
+ Error *local_err = NULL;
+
+ /* During incoming CPR, divert calls to dma_map. */
+ vioc->dma_map = vfio_legacy_cpr_dma_map;
+
+ if (!vfio_listener_register(bcontainer, &local_err)) {
+ error_report_err(local_err);
+ return -1;
+ }
+
+ /* Restore original dma_map function */
+ vioc->dma_map = saved_dma_map;
+
+ return 0;
+}
+
+static const VMStateDescription vfio_container_vmstate = {
+ .name = "vfio-container",
+ .version_id = 0,
+ .minimum_version_id = 0,
+ .priority = MIG_PRI_LOW, /* Must happen after devices and groups */
+ .pre_save = vfio_container_pre_save,
+ .post_load = vfio_container_post_load,
+ .needed = cpr_incoming_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier,
+ MigrationEvent *e, Error **errp)
+{
+ VFIOContainer *container =
+ container_of(notifier, VFIOContainer, cpr.transfer_notifier);
+ VFIOContainerBase *bcontainer = &container->bcontainer;
+
+ if (e->type != MIG_EVENT_PRECOPY_FAILED) {
+ return 0;
+ }
+
+ if (container->cpr.vaddr_unmapped) {
+ /*
+ * Force a call to vfio_region_remap for each mapped section by
+ * temporarily registering a listener, and temporarily diverting
+ * dma_map to vfio_legacy_cpr_dma_map. The latter restores vaddr.
+ */
+
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ dma_map_fn saved_dma_map = vioc->dma_map;
+ vioc->dma_map = vfio_legacy_cpr_dma_map;
+
+ container->cpr.remap_listener = (MemoryListener) {
+ .name = "vfio cpr recover",
+ .region_add = vfio_region_remap
+ };
+ memory_listener_register(&container->cpr.remap_listener,
+ bcontainer->space->as);
+ memory_listener_unregister(&container->cpr.remap_listener);
+ container->cpr.vaddr_unmapped = false;
+ vioc->dma_map = saved_dma_map;
+ }
+ return 0;
+}
+
+bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp)
+{
+ VFIOContainerBase *bcontainer = &container->bcontainer;
+ Error **cpr_blocker = &container->cpr.blocker;
+
+ migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier,
+ vfio_cpr_reboot_notifier,
+ MIG_MODE_CPR_REBOOT);
+
+ if (!vfio_cpr_supported(container, cpr_blocker)) {
+ return migrate_add_blocker_modes(cpr_blocker, errp,
+ MIG_MODE_CPR_TRANSFER, -1) == 0;
+ }
+
+ vfio_cpr_add_kvm_notifier();
+
+ vmstate_register(NULL, -1, &vfio_container_vmstate, container);
+
+ migration_add_notifier_mode(&container->cpr.transfer_notifier,
+ vfio_cpr_fail_notifier,
+ MIG_MODE_CPR_TRANSFER);
+ return true;
+}
+
+void vfio_legacy_cpr_unregister_container(VFIOContainer *container)
+{
+ VFIOContainerBase *bcontainer = &container->bcontainer;
+
+ migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
+ migrate_del_blocker(&container->cpr.blocker);
+ vmstate_unregister(NULL, &vfio_container_vmstate, container);
+ migration_remove_notifier(&container->cpr.transfer_notifier);
+}
+
+/*
+ * In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after
+ * succeeding for others, so the latter have lost their vaddr. Call this
+ * to restore vaddr for a section with a giommu.
+ *
+ * The giommu already exists. Find it and replay it, which calls
+ * vfio_legacy_cpr_dma_map further down the stack.
+ */
+void vfio_cpr_giommu_remap(VFIOContainerBase *bcontainer,
+ MemoryRegionSection *section)
+{
+ VFIOGuestIOMMU *giommu = NULL;
+ hwaddr as_offset = section->offset_within_address_space;
+ hwaddr iommu_offset = as_offset - section->offset_within_region;
+
+ QLIST_FOREACH(giommu, &bcontainer->giommu_list, giommu_next) {
+ if (giommu->iommu_mr == IOMMU_MEMORY_REGION(section->mr) &&
+ giommu->iommu_offset == iommu_offset) {
+ break;
+ }
+ }
+ g_assert(giommu);
+ memory_region_iommu_replay(giommu->iommu_mr, &giommu->n);
+}
+
+/*
+ * In old QEMU, VFIO_DMA_UNMAP_FLAG_VADDR may fail on some mapping after
+ * succeeding for others, so the latter have lost their vaddr. Call this
+ * to restore vaddr for a section with a RamDiscardManager.
+ *
+ * The ram discard listener already exists. Call its populate function
+ * directly, which calls vfio_legacy_cpr_dma_map.
+ */
+bool vfio_cpr_ram_discard_register_listener(VFIOContainerBase *bcontainer,
+ MemoryRegionSection *section)
+{
+ VFIORamDiscardListener *vrdl =
+ vfio_find_ram_discard_listener(bcontainer, section);
+
+ g_assert(vrdl);
+ return vrdl->listener.notify_populate(&vrdl->listener, section) == 0;
+}
+
+int vfio_cpr_group_get_device_fd(int d, const char *name)
+{
+ const int id = 0;
+ int fd = cpr_find_fd(name, id);
+
+ if (fd < 0) {
+ fd = ioctl(d, VFIO_GROUP_GET_DEVICE_FD, name);
+ if (fd >= 0) {
+ cpr_save_fd(name, id, fd);
+ }
+ }
+ return fd;
+}
+
+static bool same_device(int fd1, int fd2)
+{
+ struct stat st1, st2;
+
+ return !fstat(fd1, &st1) && !fstat(fd2, &st2) && st1.st_dev == st2.st_dev;
+}
+
+bool vfio_cpr_container_match(VFIOContainer *container, VFIOGroup *group,
+ int fd)
+{
+ if (container->fd == fd) {
+ return true;
+ }
+ if (!same_device(container->fd, fd)) {
+ return false;
+ }
+ /*
+ * Same device, different fd. This occurs when the container fd is
+ * cpr_save'd multiple times, once for each groupid, so SCM_RIGHTS
+ * produces duplicates. De-dup it.
+ */
+ cpr_delete_fd("vfio_container_for_group", group->groupid);
+ close(fd);
+ cpr_save_fd("vfio_container_for_group", group->groupid, container->fd);
+ return true;
+}
diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c
index 3214184..af0f12a 100644
--- a/hw/vfio/cpr.c
+++ b/hw/vfio/cpr.c
@@ -7,13 +7,16 @@
#include "qemu/osdep.h"
#include "hw/vfio/vfio-device.h"
-#include "migration/misc.h"
+#include "hw/vfio/vfio-cpr.h"
+#include "hw/vfio/pci.h"
+#include "hw/pci/msix.h"
+#include "hw/pci/msi.h"
+#include "migration/cpr.h"
#include "qapi/error.h"
#include "system/runstate.h"
-#include "vfio-cpr.h"
-static int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier,
- MigrationEvent *e, Error **errp)
+int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier,
+ MigrationEvent *e, Error **errp)
{
if (e->type == MIG_EVENT_PRECOPY_SETUP &&
!runstate_check(RUN_STATE_SUSPENDED) && !vm_get_suspended()) {
@@ -26,15 +29,172 @@ static int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier,
return 0;
}
-bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp)
+#define STRDUP_VECTOR_FD_NAME(vdev, name) \
+ g_strdup_printf("%s_%s", (vdev)->vbasedev.name, (name))
+
+void vfio_cpr_save_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr,
+ int fd)
+{
+ g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
+ cpr_save_fd(fdname, nr, fd);
+}
+
+int vfio_cpr_load_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr)
+{
+ g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
+ return cpr_find_fd(fdname, nr);
+}
+
+void vfio_cpr_delete_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr)
{
- migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier,
- vfio_cpr_reboot_notifier,
- MIG_MODE_CPR_REBOOT);
- return true;
+ g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
+ cpr_delete_fd(fdname, nr);
+}
+
+static void vfio_cpr_claim_vectors(VFIOPCIDevice *vdev, int nr_vectors,
+ bool msix)
+{
+ int i, fd;
+ bool pending = false;
+ PCIDevice *pdev = &vdev->pdev;
+
+ vdev->nr_vectors = nr_vectors;
+ vdev->msi_vectors = g_new0(VFIOMSIVector, nr_vectors);
+ vdev->interrupt = msix ? VFIO_INT_MSIX : VFIO_INT_MSI;
+
+ vfio_pci_prepare_kvm_msi_virq_batch(vdev);
+
+ for (i = 0; i < nr_vectors; i++) {
+ VFIOMSIVector *vector = &vdev->msi_vectors[i];
+
+ fd = vfio_cpr_load_vector_fd(vdev, "interrupt", i);
+ if (fd >= 0) {
+ vfio_pci_vector_init(vdev, i);
+ vfio_pci_msi_set_handler(vdev, i);
+ }
+
+ if (vfio_cpr_load_vector_fd(vdev, "kvm_interrupt", i) >= 0) {
+ vfio_pci_add_kvm_msi_virq(vdev, vector, i, msix);
+ } else {
+ vdev->msi_vectors[i].virq = -1;
+ }
+
+ if (msix && msix_is_pending(pdev, i) && msix_is_masked(pdev, i)) {
+ set_bit(i, vdev->msix->pending);
+ pending = true;
+ }
+ }
+
+ vfio_pci_commit_kvm_msi_virq_batch(vdev);
+
+ if (msix) {
+ memory_region_set_enabled(&pdev->msix_pba_mmio, pending);
+ }
}
-void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer)
+/*
+ * The kernel may change non-emulated config bits. Exclude them from the
+ * changed-bits check in get_pci_config_device.
+ */
+static int vfio_cpr_pci_pre_load(void *opaque)
{
- migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
+ VFIOPCIDevice *vdev = opaque;
+ PCIDevice *pdev = &vdev->pdev;
+ int size = MIN(pci_config_size(pdev), vdev->config_size);
+ int i;
+
+ for (i = 0; i < size; i++) {
+ pdev->cmask[i] &= vdev->emulated_config_bits[i];
+ }
+
+ return 0;
+}
+
+static int vfio_cpr_pci_post_load(void *opaque, int version_id)
+{
+ VFIOPCIDevice *vdev = opaque;
+ PCIDevice *pdev = &vdev->pdev;
+ int nr_vectors;
+
+ if (msix_enabled(pdev)) {
+ vfio_pci_msix_set_notifiers(vdev);
+ nr_vectors = vdev->msix->entries;
+ vfio_cpr_claim_vectors(vdev, nr_vectors, true);
+
+ } else if (msi_enabled(pdev)) {
+ nr_vectors = msi_nr_vectors_allocated(pdev);
+ vfio_cpr_claim_vectors(vdev, nr_vectors, false);
+
+ } else if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) {
+ Error *local_err = NULL;
+ if (!vfio_pci_intx_enable(vdev, &local_err)) {
+ error_report_err(local_err);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static bool pci_msix_present(void *opaque, int version_id)
+{
+ PCIDevice *pdev = opaque;
+
+ return msix_present(pdev);
+}
+
+static const VMStateDescription vfio_intx_vmstate = {
+ .name = "vfio-cpr-intx",
+ .version_id = 0,
+ .minimum_version_id = 0,
+ .fields = (VMStateField[]) {
+ VMSTATE_BOOL(pending, VFIOINTx),
+ VMSTATE_UINT32(route.mode, VFIOINTx),
+ VMSTATE_INT32(route.irq, VFIOINTx),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+#define VMSTATE_VFIO_INTX(_field, _state) { \
+ .name = (stringify(_field)), \
+ .size = sizeof(VFIOINTx), \
+ .vmsd = &vfio_intx_vmstate, \
+ .flags = VMS_STRUCT, \
+ .offset = vmstate_offset_value(_state, _field, VFIOINTx), \
+}
+
+const VMStateDescription vfio_cpr_pci_vmstate = {
+ .name = "vfio-cpr-pci",
+ .version_id = 0,
+ .minimum_version_id = 0,
+ .pre_load = vfio_cpr_pci_pre_load,
+ .post_load = vfio_cpr_pci_post_load,
+ .needed = cpr_incoming_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice),
+ VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, pci_msix_present),
+ VMSTATE_VFIO_INTX(intx, VFIOPCIDevice),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static NotifierWithReturn kvm_close_notifier;
+
+static int vfio_cpr_kvm_close_notifier(NotifierWithReturn *notifier,
+ MigrationEvent *e,
+ Error **errp)
+{
+ if (e->type == MIG_EVENT_PRECOPY_DONE) {
+ vfio_kvm_device_close();
+ }
+ return 0;
+}
+
+void vfio_cpr_add_kvm_notifier(void)
+{
+ if (!kvm_close_notifier.notify) {
+ migration_add_notifier_mode(&kvm_close_notifier,
+ vfio_cpr_kvm_close_notifier,
+ MIG_MODE_CPR_TRANSFER);
+ }
}
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index 9fba2c7..96cf214 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -28,6 +28,8 @@
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/units.h"
+#include "migration/cpr.h"
+#include "migration/blocker.h"
#include "monitor/monitor.h"
#include "vfio-helpers.h"
@@ -200,6 +202,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
struct vfio_region_info **info)
{
size_t argsz = sizeof(struct vfio_region_info);
+ int fd = -1;
int ret;
/* check cache */
@@ -214,7 +217,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
retry:
(*info)->argsz = argsz;
- ret = vbasedev->io_ops->get_region_info(vbasedev, *info);
+ ret = vbasedev->io_ops->get_region_info(vbasedev, *info, &fd);
if (ret != 0) {
g_free(*info);
*info = NULL;
@@ -225,15 +228,30 @@ retry:
argsz = (*info)->argsz;
*info = g_realloc(*info, argsz);
+ if (fd != -1) {
+ close(fd);
+ fd = -1;
+ }
+
goto retry;
}
/* fill cache */
vbasedev->reginfo[index] = *info;
+ if (vbasedev->region_fds != NULL) {
+ vbasedev->region_fds[index] = fd;
+ }
return 0;
}
+int vfio_device_get_region_fd(VFIODevice *vbasedev, int index)
+{
+ return vbasedev->region_fds ?
+ vbasedev->region_fds[index] :
+ vbasedev->fd;
+}
+
int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
uint32_t subtype, struct vfio_region_info **info)
{
@@ -300,28 +318,40 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
error_setg(errp, "Use FD passing only with iommufd backend");
return false;
}
- /*
- * Give a name with fd so any function printing out vbasedev->name
- * will not break.
- */
if (!vbasedev->name) {
- vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
+
+ if (vbasedev->dev->id) {
+ vbasedev->name = g_strdup(vbasedev->dev->id);
+ return true;
+ } else {
+ /*
+ * Assign a name so any function printing it will not break.
+ * The fd number changes across processes, so this cannot be
+ * used as an invariant name for CPR.
+ */
+ vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
+ error_setg(&vbasedev->cpr.id_blocker,
+ "vfio device with fd=%d needs an id property",
+ vbasedev->fd);
+ return migrate_add_blocker_modes(&vbasedev->cpr.id_blocker,
+ errp, MIG_MODE_CPR_TRANSFER,
+ -1) == 0;
+ }
}
}
return true;
}
-void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
+void vfio_device_free_name(VFIODevice *vbasedev)
{
- ERRP_GUARD();
- int fd = monitor_fd_param(monitor_cur(), str, errp);
+ g_clear_pointer(&vbasedev->name, g_free);
+ migrate_del_blocker(&vbasedev->cpr.id_blocker);
+}
- if (fd < 0) {
- error_prepend(errp, "Could not parse remote object fd %s:", str);
- return;
- }
- vbasedev->fd = fd;
+void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
+{
+ vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, errp);
}
static VFIODeviceIOOps vfio_device_io_ops_ioctl;
@@ -334,6 +364,7 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
vbasedev->io_ops = &vfio_device_io_ops_ioctl;
vbasedev->dev = dev;
vbasedev->fd = -1;
+ vbasedev->use_region_fds = false;
vbasedev->ram_block_discard_allowed = ram_discard;
}
@@ -444,6 +475,9 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
vbasedev->reginfo = g_new0(struct vfio_region_info *,
vbasedev->num_regions);
+ if (vbasedev->use_region_fds) {
+ vbasedev->region_fds = g_new0(int, vbasedev->num_regions);
+ }
}
void vfio_device_unprepare(VFIODevice *vbasedev)
@@ -452,9 +486,14 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
for (i = 0; i < vbasedev->num_regions; i++) {
g_free(vbasedev->reginfo[i]);
+ if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) {
+ close(vbasedev->region_fds[i]);
+ }
+
}
- g_free(vbasedev->reginfo);
- vbasedev->reginfo = NULL;
+
+ g_clear_pointer(&vbasedev->reginfo, g_free);
+ g_clear_pointer(&vbasedev->region_fds, g_free);
QLIST_REMOVE(vbasedev, container_next);
QLIST_REMOVE(vbasedev, global_next);
@@ -476,10 +515,13 @@ static int vfio_device_io_device_feature(VFIODevice *vbasedev,
}
static int vfio_device_io_get_region_info(VFIODevice *vbasedev,
- struct vfio_region_info *info)
+ struct vfio_region_info *info,
+ int *fd)
{
int ret;
+ *fd = -1;
+
ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info);
return ret < 0 ? -errno : ret;
@@ -522,7 +564,8 @@ static int vfio_device_io_region_read(VFIODevice *vbasedev, uint8_t index,
}
static int vfio_device_io_region_write(VFIODevice *vbasedev, uint8_t index,
- off_t off, uint32_t size, void *data)
+ off_t off, uint32_t size, void *data,
+ bool post)
{
struct vfio_region_info *info;
int ret;
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index d0dbab1..9a5f621 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -117,6 +117,17 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
int vfio_kvm_device_fd = -1;
#endif
+void vfio_kvm_device_close(void)
+{
+#ifdef CONFIG_KVM
+ kvm_close();
+ if (vfio_kvm_device_fd != -1) {
+ close(vfio_kvm_device_fd);
+ vfio_kvm_device_fd = -1;
+ }
+#endif
+}
+
int vfio_kvm_device_add_fd(int fd, Error **errp)
{
#ifdef CONFIG_KVM
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
index e7952d1..e7a9d1f 100644
--- a/hw/vfio/igd.c
+++ b/hw/vfio/igd.c
@@ -187,23 +187,21 @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
}
static bool vfio_pci_igd_opregion_detect(VFIOPCIDevice *vdev,
- struct vfio_region_info **opregion,
- Error **errp)
+ struct vfio_region_info **opregion)
{
int ret;
- /* Hotplugging is not supported for opregion access */
- if (vdev->pdev.qdev.hotplugged) {
- error_setg(errp, "IGD OpRegion is not supported on hotplugged device");
- return false;
- }
-
ret = vfio_device_get_region_info_type(&vdev->vbasedev,
VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, opregion);
if (ret) {
- error_setg_errno(errp, -ret,
- "Device does not supports IGD OpRegion feature");
+ return false;
+ }
+
+ /* Hotplugging is not supported for opregion access */
+ if (vdev->pdev.qdev.hotplugged) {
+ warn_report("IGD device detected, but OpRegion is not supported "
+ "on hotplugged device.");
return false;
}
@@ -524,7 +522,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
}
/* IGD device always comes with OpRegion */
- if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
+ if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
return true;
}
info_report("OpRegion detected on Intel display %x.", vdev->device_id);
@@ -695,7 +693,7 @@ static bool vfio_pci_kvmgt_config_quirk(VFIOPCIDevice *vdev, Error **errp)
return true;
}
- if (!vfio_pci_igd_opregion_detect(vdev, &opregion, errp)) {
+ if (!vfio_pci_igd_opregion_detect(vdev, &opregion)) {
/* Should never reach here, KVMGT always emulates OpRegion */
return false;
}
diff --git a/hw/vfio/iommufd-stubs.c b/hw/vfio/iommufd-stubs.c
new file mode 100644
index 0000000..0be5276
--- /dev/null
+++ b/hw/vfio/iommufd-stubs.c
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2025 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "migration/cpr.h"
+#include "migration/vmstate.h"
+
+const VMStateDescription vmstate_cpr_vfio_devices = {
+ .name = CPR_STATE "/vfio devices",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (const VMStateField[]){
+ VMSTATE_END_OF_LIST()
+ }
+};
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index af1c7ab..48c590b 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -21,20 +21,22 @@
#include "qapi/error.h"
#include "system/iommufd.h"
#include "hw/qdev-core.h"
+#include "hw/vfio/vfio-cpr.h"
#include "system/reset.h"
#include "qemu/cutils.h"
#include "qemu/chardev_open.h"
+#include "migration/cpr.h"
#include "pci.h"
#include "vfio-iommufd.h"
#include "vfio-helpers.h"
-#include "vfio-cpr.h"
#include "vfio-listener.h"
#define TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO \
TYPE_HOST_IOMMU_DEVICE_IOMMUFD "-vfio"
static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
- ram_addr_t size, void *vaddr, bool readonly)
+ ram_addr_t size, void *vaddr, bool readonly,
+ MemoryRegion *mr)
{
const VFIOIOMMUFDContainer *container =
container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
@@ -44,6 +46,18 @@ static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
iova, size, vaddr, readonly);
}
+static int iommufd_cdev_map_file(const VFIOContainerBase *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ int fd, unsigned long start, bool readonly)
+{
+ const VFIOIOMMUFDContainer *container =
+ container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
+
+ return iommufd_backend_map_file_dma(container->be,
+ container->ioas_id,
+ iova, size, fd, start, readonly);
+}
+
static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb, bool unmap_all)
@@ -108,6 +122,10 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp)
goto err_kvm_device_add;
}
+ if (cpr_is_incoming()) {
+ goto skip_bind;
+ }
+
/* Bind device to iommufd */
bind.iommufd = iommufd->fd;
if (ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind)) {
@@ -119,6 +137,8 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp)
vbasedev->devid = bind.out_devid;
trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name,
vbasedev->fd, vbasedev->devid);
+
+skip_bind:
return true;
err_bind:
iommufd_cdev_kvm_device_del(vbasedev);
@@ -312,7 +332,14 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
/* Try to find a domain */
QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
- ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
+ if (!cpr_is_incoming()) {
+ ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
+ } else if (vbasedev->cpr.hwpt_id == hwpt->hwpt_id) {
+ ret = 0;
+ } else {
+ continue;
+ }
+
if (ret) {
/* -EINVAL means the domain is incompatible with the device. */
if (ret == -EINVAL) {
@@ -329,6 +356,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
return false;
} else {
vbasedev->hwpt = hwpt;
+ vbasedev->cpr.hwpt_id = hwpt->hwpt_id;
QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
return true;
@@ -351,6 +379,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
}
+ if (cpr_is_incoming()) {
+ hwpt_id = vbasedev->cpr.hwpt_id;
+ goto skip_alloc;
+ }
+
if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
container->ioas_id, flags,
IOMMU_HWPT_DATA_NONE, 0, NULL,
@@ -358,19 +391,20 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
return false;
}
+ ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp);
+ if (ret) {
+ iommufd_backend_free_id(container->be, hwpt_id);
+ return false;
+ }
+
+skip_alloc:
hwpt = g_malloc0(sizeof(*hwpt));
hwpt->hwpt_id = hwpt_id;
hwpt->hwpt_flags = flags;
QLIST_INIT(&hwpt->device_list);
- ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
- if (ret) {
- iommufd_backend_free_id(container->be, hwpt->hwpt_id);
- g_free(hwpt);
- return false;
- }
-
vbasedev->hwpt = hwpt;
+ vbasedev->cpr.hwpt_id = hwpt->hwpt_id;
vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
@@ -408,7 +442,9 @@ static bool iommufd_cdev_attach_container(VFIODevice *vbasedev,
return iommufd_cdev_autodomains_get(vbasedev, container, errp);
}
- return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
+ /* If CPR, we are already attached to ioas_id. */
+ return cpr_is_incoming() ||
+ !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
}
static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
@@ -433,7 +469,7 @@ static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container)
if (!QLIST_EMPTY(&bcontainer->device_list)) {
return;
}
- vfio_cpr_unregister_container(bcontainer);
+ vfio_iommufd_cpr_unregister_container(container);
vfio_listener_unregister(bcontainer);
iommufd_backend_free_id(container->be, container->ioas_id);
object_unref(container);
@@ -497,11 +533,14 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
VFIOAddressSpace *space;
struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
int ret, devfd;
+ bool res;
uint32_t ioas_id;
Error *err = NULL;
const VFIOIOMMUClass *iommufd_vioc =
VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
+ vfio_cpr_load_device(vbasedev);
+
if (vbasedev->fd < 0) {
devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
if (devfd < 0) {
@@ -525,7 +564,16 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
vbasedev->iommufd != container->be) {
continue;
}
- if (!iommufd_cdev_attach_container(vbasedev, container, &err)) {
+
+ if (!cpr_is_incoming()) {
+ res = iommufd_cdev_attach_container(vbasedev, container, &err);
+ } else if (vbasedev->cpr.ioas_id == container->ioas_id) {
+ res = true;
+ } else {
+ continue;
+ }
+
+ if (!res) {
const char *msg = error_get_pretty(err);
trace_iommufd_cdev_fail_attach_existing_container(msg);
@@ -542,6 +590,11 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
}
}
+ if (cpr_is_incoming()) {
+ ioas_id = vbasedev->cpr.ioas_id;
+ goto skip_ioas_alloc;
+ }
+
/* Need to allocate a new dedicated container */
if (!iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp)) {
goto err_alloc_ioas;
@@ -549,10 +602,12 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id);
+skip_ioas_alloc:
container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD));
container->be = vbasedev->iommufd;
container->ioas_id = ioas_id;
QLIST_INIT(&container->hwpt_list);
+ vbasedev->cpr.ioas_id = ioas_id;
bcontainer = &container->bcontainer;
vfio_address_space_insert(space, bcontainer);
@@ -579,7 +634,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
goto err_listener_register;
}
- if (!vfio_cpr_register_container(bcontainer, errp)) {
+ if (!vfio_iommufd_cpr_register_container(container, errp)) {
goto err_listener_register;
}
@@ -592,6 +647,10 @@ found_container:
goto err_listener_register;
}
+ /*
+ * Do not move this code before attachment! The nested IOMMU support
+ * needs device and hwpt id which are generated only after attachment.
+ */
if (!vfio_device_hiod_create_and_realize(vbasedev,
TYPE_HOST_IOMMU_DEVICE_IOMMUFD_VFIO, errp)) {
goto err_listener_register;
@@ -606,6 +665,7 @@ found_container:
}
vfio_device_prepare(vbasedev, bcontainer, &dev_info);
+ vfio_iommufd_cpr_register_device(vbasedev);
trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs,
vbasedev->num_regions, vbasedev->flags);
@@ -643,6 +703,7 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev)
iommufd_cdev_container_destroy(container);
vfio_address_space_put(space);
+ vfio_iommufd_cpr_unregister_device(vbasedev);
iommufd_cdev_unbind_and_disconnect(vbasedev);
close(vbasedev->fd);
}
@@ -802,6 +863,7 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data)
VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
vioc->dma_map = iommufd_cdev_map;
+ vioc->dma_map_file = iommufd_cdev_map_file;
vioc->dma_unmap = iommufd_cdev_unmap;
vioc->attach_device = iommufd_cdev_attach;
vioc->detach_device = iommufd_cdev_detach;
@@ -810,21 +872,38 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data)
vioc->query_dirty_bitmap = iommufd_query_dirty_bitmap;
};
+static bool
+host_iommu_device_iommufd_vfio_attach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ uint32_t hwpt_id, Error **errp)
+{
+ VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
+
+ return !iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp);
+}
+
+static bool
+host_iommu_device_iommufd_vfio_detach_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+ Error **errp)
+{
+ VFIODevice *vbasedev = HOST_IOMMU_DEVICE(idev)->agent;
+
+ return iommufd_cdev_detach_ioas_hwpt(vbasedev, errp);
+}
+
static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
Error **errp)
{
VFIODevice *vdev = opaque;
+ HostIOMMUDeviceIOMMUFD *idev;
HostIOMMUDeviceCaps *caps = &hiod->caps;
+ VendorCaps *vendor_caps = &caps->vendor_caps;
enum iommu_hw_info_type type;
- union {
- struct iommu_hw_info_vtd vtd;
- } data;
uint64_t hw_caps;
hiod->agent = opaque;
- if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid,
- &type, &data, sizeof(data),
+ if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid, &type,
+ vendor_caps, sizeof(*vendor_caps),
&hw_caps, errp)) {
return false;
}
@@ -833,6 +912,11 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice *hiod, void *opaque,
caps->type = type;
caps->hw_caps = hw_caps;
+ idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod);
+ idev->iommufd = vdev->iommufd;
+ idev->devid = vdev->devid;
+ idev->hwpt_id = vdev->hwpt->hwpt_id;
+
return true;
}
@@ -858,10 +942,14 @@ hiod_iommufd_vfio_get_page_size_mask(HostIOMMUDevice *hiod)
static void hiod_iommufd_vfio_class_init(ObjectClass *oc, const void *data)
{
HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc);
+ HostIOMMUDeviceIOMMUFDClass *idevc = HOST_IOMMU_DEVICE_IOMMUFD_CLASS(oc);
hiodc->realize = hiod_iommufd_vfio_realize;
hiodc->get_iova_ranges = hiod_iommufd_vfio_get_iova_ranges;
hiodc->get_page_size_mask = hiod_iommufd_vfio_get_page_size_mask;
+
+ idevc->attach_hwpt = host_iommu_device_iommufd_vfio_attach_hwpt;
+ idevc->detach_hwpt = host_iommu_device_iommufd_vfio_detach_hwpt;
};
static const TypeInfo types[] = {
diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c
index bfacb3d..f498e23 100644
--- a/hw/vfio/listener.c
+++ b/hw/vfio/listener.c
@@ -90,16 +90,17 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section)
section->offset_within_address_space & (1ULL << 63);
}
-/* Called with rcu_read_lock held. */
-static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
- ram_addr_t *ram_addr, bool *read_only,
- Error **errp)
+/*
+ * Called with rcu_read_lock held.
+ * The returned MemoryRegion must not be accessed after calling rcu_read_unlock.
+ */
+static MemoryRegion *vfio_translate_iotlb(IOMMUTLBEntry *iotlb, hwaddr *xlat_p,
+ Error **errp)
{
- bool ret, mr_has_discard_manager;
+ MemoryRegion *mr;
- ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, read_only,
- &mr_has_discard_manager, errp);
- if (ret && mr_has_discard_manager) {
+ mr = memory_translate_iotlb(iotlb, xlat_p, errp);
+ if (mr && memory_region_has_ram_discard_manager(mr)) {
/*
* Malicious VMs might trigger discarding of IOMMU-mapped memory. The
* pages will remain pinned inside vfio until unmapped, resulting in a
@@ -118,7 +119,7 @@ static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
" intended via an IOMMU. It's possible to mitigate "
" by setting/adjusting RLIMIT_MEMLOCK.");
}
- return ret;
+ return mr;
}
static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
@@ -126,6 +127,8 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
VFIOContainerBase *bcontainer = giommu->bcontainer;
hwaddr iova = iotlb->iova + giommu->iommu_offset;
+ MemoryRegion *mr;
+ hwaddr xlat;
void *vaddr;
int ret;
Error *local_err = NULL;
@@ -150,10 +153,14 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
bool read_only;
- if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, &local_err)) {
+ mr = vfio_translate_iotlb(iotlb, &xlat, &local_err);
+ if (!mr) {
error_report_err(local_err);
goto out;
}
+ vaddr = memory_region_get_ram_ptr(mr) + xlat;
+ read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly;
+
/*
* vaddr is only valid until rcu_read_unlock(). But after
* vfio_dma_map has set up the mapping the pages will be
@@ -163,7 +170,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
*/
ret = vfio_container_dma_map(bcontainer, iova,
iotlb->addr_mask + 1, vaddr,
- read_only);
+ read_only, mr);
if (ret) {
error_report("vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx", %p) = %d (%s)",
@@ -233,7 +240,7 @@ static int vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
vaddr = memory_region_get_ram_ptr(section->mr) + start;
ret = vfio_container_dma_map(bcontainer, iova, next - start,
- vaddr, section->readonly);
+ vaddr, section->readonly, section->mr);
if (ret) {
/* Rollback */
vfio_ram_discard_notify_discard(rdl, section);
@@ -430,7 +437,7 @@ static void vfio_listener_commit(MemoryListener *listener)
listener);
void (*listener_commit)(VFIOContainerBase *bcontainer);
- listener_commit = VFIO_IOMMU_GET_CLASS(bcontainer)->listener_begin;
+ listener_commit = VFIO_IOMMU_GET_CLASS(bcontainer)->listener_commit;
if (listener_commit) {
listener_commit(bcontainer);
@@ -449,11 +456,38 @@ static void vfio_device_error_append(VFIODevice *vbasedev, Error **errp)
}
}
+VFIORamDiscardListener *vfio_find_ram_discard_listener(
+ VFIOContainerBase *bcontainer, MemoryRegionSection *section)
+{
+ VFIORamDiscardListener *vrdl = NULL;
+
+ QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
+ if (vrdl->mr == section->mr &&
+ vrdl->offset_within_address_space ==
+ section->offset_within_address_space) {
+ break;
+ }
+ }
+
+ if (!vrdl) {
+ hw_error("vfio: Trying to sync missing RAM discard listener");
+ /* does not return */
+ }
+ return vrdl;
+}
+
static void vfio_listener_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
listener);
+ vfio_container_region_add(bcontainer, section, false);
+}
+
+void vfio_container_region_add(VFIOContainerBase *bcontainer,
+ MemoryRegionSection *section,
+ bool cpr_remap)
+{
hwaddr iova, end;
Int128 llend, llsize;
void *vaddr;
@@ -489,6 +523,11 @@ static void vfio_listener_region_add(MemoryListener *listener,
int iommu_idx;
trace_vfio_listener_region_add_iommu(section->mr->name, iova, end);
+
+ if (cpr_remap) {
+ vfio_cpr_giommu_remap(bcontainer, section);
+ }
+
/*
* FIXME: For VFIO iommu types which have KVM acceleration to
* avoid bouncing all map/unmaps through qemu this way, this
@@ -531,7 +570,12 @@ static void vfio_listener_region_add(MemoryListener *listener,
* about changes.
*/
if (memory_region_has_ram_discard_manager(section->mr)) {
- vfio_ram_discard_register_listener(bcontainer, section);
+ if (!cpr_remap) {
+ vfio_ram_discard_register_listener(bcontainer, section);
+ } else if (!vfio_cpr_ram_discard_register_listener(bcontainer,
+ section)) {
+ goto fail;
+ }
return;
}
@@ -557,7 +601,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
}
ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize),
- vaddr, section->readonly);
+ vaddr, section->readonly, section->mr);
if (ret) {
error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx", %p) = %d (%s)",
@@ -1010,6 +1054,8 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
ram_addr_t translated_addr;
Error *local_err = NULL;
int ret = -EINVAL;
+ MemoryRegion *mr;
+ hwaddr xlat;
trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask);
@@ -1021,9 +1067,11 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
}
rcu_read_lock();
- if (!vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL, &local_err)) {
+ mr = vfio_translate_iotlb(iotlb, &xlat, &local_err);
+ if (!mr) {
goto out_unlock;
}
+ translated_addr = memory_region_get_ram_addr(mr) + xlat;
ret = vfio_container_query_dirty_bitmap(bcontainer, iova, iotlb->addr_mask + 1,
translated_addr, &local_err);
@@ -1075,19 +1123,8 @@ vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainerBase *bcontainer,
MemoryRegionSection *section)
{
RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
- VFIORamDiscardListener *vrdl = NULL;
-
- QLIST_FOREACH(vrdl, &bcontainer->vrdl_list, next) {
- if (vrdl->mr == section->mr &&
- vrdl->offset_within_address_space ==
- section->offset_within_address_space) {
- break;
- }
- }
-
- if (!vrdl) {
- hw_error("vfio: Trying to sync missing RAM discard listener");
- }
+ VFIORamDiscardListener *vrdl =
+ vfio_find_ram_discard_listener(bcontainer, section);
/*
* We only want/can synchronize the bitmap for actually mapped parts -
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
index bccb050..bfaf6be 100644
--- a/hw/vfio/meson.build
+++ b/hw/vfio/meson.build
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
vfio_ss = ss.source_set()
vfio_ss.add(files(
'listener.c',
@@ -21,6 +23,7 @@ system_ss.add(when: 'CONFIG_VFIO_XGMAC', if_true: files('calxeda-xgmac.c'))
system_ss.add(when: 'CONFIG_VFIO_AMD_XGBE', if_true: files('amd-xgbe.c'))
system_ss.add(when: 'CONFIG_VFIO', if_true: files(
'cpr.c',
+ 'cpr-legacy.c',
'device.c',
'migration.c',
'migration-multifd.c',
@@ -28,7 +31,9 @@ system_ss.add(when: 'CONFIG_VFIO', if_true: files(
))
system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files(
'iommufd.c',
+ 'cpr-iommufd.c',
))
+system_ss.add(when: 'CONFIG_IOMMUFD', if_false: files('iommufd-stubs.c'))
system_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
'display.c',
))
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index a1bfdfe..1093b28 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -29,7 +29,9 @@
#include "hw/pci/pci_bridge.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"
+#include "hw/vfio/vfio-cpr.h"
#include "migration/vmstate.h"
+#include "migration/cpr.h"
#include "qobject/qdict.h"
#include "qemu/error-report.h"
#include "qemu/main-loop.h"
@@ -56,6 +58,36 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
static void vfio_msi_disable_common(VFIOPCIDevice *vdev);
+/* Create new or reuse existing eventfd */
+static bool vfio_notifier_init(VFIOPCIDevice *vdev, EventNotifier *e,
+ const char *name, int nr, Error **errp)
+{
+ int fd, ret;
+
+ fd = vfio_cpr_load_vector_fd(vdev, name, nr);
+ if (fd >= 0) {
+ event_notifier_init_fd(e, fd);
+ return true;
+ }
+
+ ret = event_notifier_init(e, 0);
+ if (ret) {
+ error_setg_errno(errp, -ret, "vfio_notifier_init %s failed", name);
+ return false;
+ }
+
+ fd = event_notifier_get_fd(e);
+ vfio_cpr_save_vector_fd(vdev, name, nr, fd);
+ return true;
+}
+
+static void vfio_notifier_cleanup(VFIOPCIDevice *vdev, EventNotifier *e,
+ const char *name, int nr)
+{
+ vfio_cpr_delete_vector_fd(vdev, name, nr);
+ event_notifier_cleanup(e);
+}
+
/*
* Disabling BAR mmaping can be slow, but toggling it around INTx can
* also be a huge overhead. We try to get the best of both worlds by
@@ -103,7 +135,7 @@ static void vfio_intx_interrupt(void *opaque)
}
}
-static void vfio_intx_eoi(VFIODevice *vbasedev)
+void vfio_pci_intx_eoi(VFIODevice *vbasedev)
{
VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
@@ -111,7 +143,7 @@ static void vfio_intx_eoi(VFIODevice *vbasedev)
return;
}
- trace_vfio_intx_eoi(vbasedev->name);
+ trace_vfio_pci_intx_eoi(vbasedev->name);
vdev->intx.pending = false;
pci_irq_deassert(&vdev->pdev);
@@ -136,8 +168,7 @@ static bool vfio_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp)
pci_irq_deassert(&vdev->pdev);
/* Get an eventfd for resample/unmask */
- if (event_notifier_init(&vdev->intx.unmask, 0)) {
- error_setg(errp, "event_notifier_init failed eoi");
+ if (!vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0, errp)) {
goto fail;
}
@@ -169,7 +200,7 @@ fail_vfio:
kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vdev->intx.interrupt,
vdev->intx.route.irq);
fail_irqfd:
- event_notifier_cleanup(&vdev->intx.unmask);
+ vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0);
fail:
qemu_set_fd_handler(irq_fd, vfio_intx_interrupt, NULL, vdev);
vfio_device_irq_unmask(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
@@ -179,6 +210,36 @@ fail:
#endif
}
+static bool vfio_cpr_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp)
+{
+#ifdef CONFIG_KVM
+ if (vdev->no_kvm_intx || !kvm_irqfds_enabled() ||
+ vdev->intx.route.mode != PCI_INTX_ENABLED ||
+ !kvm_resamplefds_enabled()) {
+ return true;
+ }
+
+ if (!vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0, errp)) {
+ return false;
+ }
+
+ if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
+ &vdev->intx.interrupt,
+ &vdev->intx.unmask,
+ vdev->intx.route.irq)) {
+ error_setg_errno(errp, errno, "failed to setup resample irqfd");
+ vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0);
+ return false;
+ }
+
+ vdev->intx.kvm_accel = true;
+ trace_vfio_intx_enable_kvm(vdev->vbasedev.name);
+ return true;
+#else
+ return true;
+#endif
+}
+
static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev)
{
#ifdef CONFIG_KVM
@@ -201,7 +262,7 @@ static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev)
}
/* We only need to close the eventfd for VFIO to cleanup the kernel side */
- event_notifier_cleanup(&vdev->intx.unmask);
+ vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0);
/* QEMU starts listening for interrupt events. */
qemu_set_fd_handler(event_notifier_get_fd(&vdev->intx.interrupt),
@@ -236,7 +297,7 @@ static void vfio_intx_update(VFIOPCIDevice *vdev, PCIINTxRoute *route)
}
/* Re-enable the interrupt in cased we missed an EOI */
- vfio_intx_eoi(&vdev->vbasedev);
+ vfio_pci_intx_eoi(&vdev->vbasedev);
}
static void vfio_intx_routing_notifier(PCIDevice *pdev)
@@ -268,14 +329,19 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1);
Error *err = NULL;
int32_t fd;
- int ret;
if (!pin) {
return true;
}
- vfio_disable_interrupts(vdev);
+ /*
+ * Do not alter interrupt state during vfio_realize and cpr load.
+ * The incoming state is cleared thereafter.
+ */
+ if (!cpr_is_incoming()) {
+ vfio_disable_interrupts(vdev);
+ }
vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */
pci_config_set_interrupt_pin(vdev->pdev.config, pin);
@@ -291,18 +357,25 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
}
#endif
- ret = event_notifier_init(&vdev->intx.interrupt, 0);
- if (ret) {
- error_setg_errno(errp, -ret, "event_notifier_init failed");
+ if (!vfio_notifier_init(vdev, &vdev->intx.interrupt, "intx-interrupt", 0,
+ errp)) {
return false;
}
fd = event_notifier_get_fd(&vdev->intx.interrupt);
qemu_set_fd_handler(fd, vfio_intx_interrupt, NULL, vdev);
+
+ if (cpr_is_incoming()) {
+ if (!vfio_cpr_intx_enable_kvm(vdev, &err)) {
+ warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+ }
+ goto skip_signaling;
+ }
+
if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) {
qemu_set_fd_handler(fd, NULL, NULL, vdev);
- event_notifier_cleanup(&vdev->intx.interrupt);
+ vfio_notifier_cleanup(vdev, &vdev->intx.interrupt, "intx-interrupt", 0);
return false;
}
@@ -310,6 +383,7 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
+skip_signaling:
vdev->interrupt = VFIO_INT_INTx;
trace_vfio_intx_enable(vdev->vbasedev.name);
@@ -329,13 +403,18 @@ static void vfio_intx_disable(VFIOPCIDevice *vdev)
fd = event_notifier_get_fd(&vdev->intx.interrupt);
qemu_set_fd_handler(fd, NULL, NULL, vdev);
- event_notifier_cleanup(&vdev->intx.interrupt);
+ vfio_notifier_cleanup(vdev, &vdev->intx.interrupt, "intx-interrupt", 0);
vdev->interrupt = VFIO_INT_NONE;
trace_vfio_intx_disable(vdev->vbasedev.name);
}
+bool vfio_pci_intx_enable(VFIOPCIDevice *vdev, Error **errp)
+{
+ return vfio_intx_enable(vdev, errp);
+}
+
/*
* MSI/X
*/
@@ -374,6 +453,14 @@ static void vfio_msi_interrupt(void *opaque)
notify(&vdev->pdev, nr);
}
+void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOMSIVector *vector = &vdev->msi_vectors[nr];
+ int fd = event_notifier_get_fd(&vector->interrupt);
+
+ qemu_set_fd_handler(fd, vfio_msi_interrupt, NULL, vector);
+}
+
/*
* Get MSI-X enabled, but no vector enabled, by setting vector 0 with an invalid
* fd to kernel.
@@ -460,8 +547,8 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix)
return ret;
}
-static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
- int vector_n, bool msix)
+void vfio_pci_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
+ int vector_n, bool msix)
{
if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) {
return;
@@ -471,13 +558,16 @@ static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
vector_n, &vdev->pdev);
}
-static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector)
+static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector, int nr)
{
+ const char *name = "kvm_interrupt";
+
if (vector->virq < 0) {
return;
}
- if (event_notifier_init(&vector->kvm_interrupt, 0)) {
+ if (!vfio_notifier_init(vector->vdev, &vector->kvm_interrupt, name, nr,
+ NULL)) {
goto fail_notifier;
}
@@ -489,19 +579,20 @@ static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector)
return;
fail_kvm:
- event_notifier_cleanup(&vector->kvm_interrupt);
+ vfio_notifier_cleanup(vector->vdev, &vector->kvm_interrupt, name, nr);
fail_notifier:
kvm_irqchip_release_virq(kvm_state, vector->virq);
vector->virq = -1;
}
-static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector)
+static void vfio_remove_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
+ int nr)
{
kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt,
vector->virq);
kvm_irqchip_release_virq(kvm_state, vector->virq);
vector->virq = -1;
- event_notifier_cleanup(&vector->kvm_interrupt);
+ vfio_notifier_cleanup(vdev, &vector->kvm_interrupt, "kvm_interrupt", nr);
}
static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg,
@@ -511,6 +602,43 @@ static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg,
kvm_irqchip_commit_routes(kvm_state);
}
+static void set_irq_signalling(VFIODevice *vbasedev, VFIOMSIVector *vector,
+ unsigned int nr)
+{
+ Error *err = NULL;
+ int32_t fd;
+
+ if (vector->virq >= 0) {
+ fd = event_notifier_get_fd(&vector->kvm_interrupt);
+ } else {
+ fd = event_notifier_get_fd(&vector->interrupt);
+ }
+
+ if (!vfio_device_irq_set_signaling(vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, nr,
+ VFIO_IRQ_SET_ACTION_TRIGGER,
+ fd, &err)) {
+ error_reportf_err(err, VFIO_MSG_PREFIX, vbasedev->name);
+ }
+}
+
+void vfio_pci_vector_init(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOMSIVector *vector = &vdev->msi_vectors[nr];
+ PCIDevice *pdev = &vdev->pdev;
+ Error *local_err = NULL;
+
+ vector->vdev = vdev;
+ vector->virq = -1;
+ if (!vfio_notifier_init(vdev, &vector->interrupt, "interrupt", nr,
+ &local_err)) {
+ error_report_err(local_err);
+ }
+ vector->use = true;
+ if (vdev->interrupt == VFIO_INT_MSIX) {
+ msix_vector_use(pdev, nr);
+ }
+}
+
static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
MSIMessage *msg, IOHandler *handler)
{
@@ -524,13 +652,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
vector = &vdev->msi_vectors[nr];
if (!vector->use) {
- vector->vdev = vdev;
- vector->virq = -1;
- if (event_notifier_init(&vector->interrupt, 0)) {
- error_report("vfio: Error: event_notifier_init failed");
- }
- vector->use = true;
- msix_vector_use(pdev, nr);
+ vfio_pci_vector_init(vdev, nr);
}
qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
@@ -542,19 +664,19 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
*/
if (vector->virq >= 0) {
if (!msg) {
- vfio_remove_kvm_msi_virq(vector);
+ vfio_remove_kvm_msi_virq(vdev, vector, nr);
} else {
vfio_update_kvm_msi_virq(vector, *msg, pdev);
}
} else {
if (msg) {
if (vdev->defer_kvm_irq_routing) {
- vfio_add_kvm_msi_virq(vdev, vector, nr, true);
+ vfio_pci_add_kvm_msi_virq(vdev, vector, nr, true);
} else {
vfio_route_change = kvm_irqchip_begin_route_changes(kvm_state);
- vfio_add_kvm_msi_virq(vdev, vector, nr, true);
+ vfio_pci_add_kvm_msi_virq(vdev, vector, nr, true);
kvm_irqchip_commit_route_changes(&vfio_route_change);
- vfio_connect_kvm_msi_virq(vector);
+ vfio_connect_kvm_msi_virq(vector, nr);
}
}
}
@@ -583,21 +705,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
strerror(-ret));
}
} else {
- Error *err = NULL;
- int32_t fd;
-
- if (vector->virq >= 0) {
- fd = event_notifier_get_fd(&vector->kvm_interrupt);
- } else {
- fd = event_notifier_get_fd(&vector->interrupt);
- }
-
- if (!vfio_device_irq_set_signaling(&vdev->vbasedev,
- VFIO_PCI_MSIX_IRQ_INDEX, nr,
- VFIO_IRQ_SET_ACTION_TRIGGER, fd,
- &err)) {
- error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
- }
+ set_irq_signalling(&vdev->vbasedev, vector, nr);
}
}
@@ -615,6 +723,15 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
static int vfio_msix_vector_use(PCIDevice *pdev,
unsigned int nr, MSIMessage msg)
{
+ /*
+ * Ignore the callback from msix_set_vector_notifiers during resume.
+ * The necessary subset of these actions is called from
+ * vfio_cpr_claim_vectors during post load.
+ */
+ if (cpr_is_incoming()) {
+ return 0;
+ }
+
return vfio_msix_vector_do_use(pdev, nr, &msg, vfio_msi_interrupt);
}
@@ -645,14 +762,20 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
}
}
-static void vfio_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
+void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev)
+{
+ msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
+ vfio_msix_vector_release, NULL);
+}
+
+void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
{
assert(!vdev->defer_kvm_irq_routing);
vdev->defer_kvm_irq_routing = true;
vfio_route_change = kvm_irqchip_begin_route_changes(kvm_state);
}
-static void vfio_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
+void vfio_pci_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
{
int i;
@@ -662,7 +785,7 @@ static void vfio_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
kvm_irqchip_commit_route_changes(&vfio_route_change);
for (i = 0; i < vdev->nr_vectors; i++) {
- vfio_connect_kvm_msi_virq(&vdev->msi_vectors[i]);
+ vfio_connect_kvm_msi_virq(&vdev->msi_vectors[i], i);
}
}
@@ -682,14 +805,14 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev)
* routes once rather than per vector provides a substantial
* performance improvement.
*/
- vfio_prepare_kvm_msi_virq_batch(vdev);
+ vfio_pci_prepare_kvm_msi_virq_batch(vdev);
if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
vfio_msix_vector_release, NULL)) {
error_report("vfio: msix_set_vector_notifiers failed");
}
- vfio_commit_kvm_msi_virq_batch(vdev);
+ vfio_pci_commit_kvm_msi_virq_batch(vdev);
if (vdev->nr_vectors) {
ret = vfio_enable_vectors(vdev, true);
@@ -733,19 +856,21 @@ retry:
* Deferring to commit the KVM routes once rather than per vector
* provides a substantial performance improvement.
*/
- vfio_prepare_kvm_msi_virq_batch(vdev);
+ vfio_pci_prepare_kvm_msi_virq_batch(vdev);
vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->nr_vectors);
for (i = 0; i < vdev->nr_vectors; i++) {
VFIOMSIVector *vector = &vdev->msi_vectors[i];
+ Error *local_err = NULL;
vector->vdev = vdev;
vector->virq = -1;
vector->use = true;
- if (event_notifier_init(&vector->interrupt, 0)) {
- error_report("vfio: Error: event_notifier_init failed");
+ if (!vfio_notifier_init(vdev, &vector->interrupt, "interrupt", i,
+ &local_err)) {
+ error_report_err(local_err);
}
qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
@@ -755,10 +880,10 @@ retry:
* Attempt to enable route through KVM irqchip,
* default to userspace handling if unavailable.
*/
- vfio_add_kvm_msi_virq(vdev, vector, i, false);
+ vfio_pci_add_kvm_msi_virq(vdev, vector, i, false);
}
- vfio_commit_kvm_msi_virq_batch(vdev);
+ vfio_pci_commit_kvm_msi_virq_batch(vdev);
/* Set interrupt type prior to possible interrupts */
vdev->interrupt = VFIO_INT_MSI;
@@ -801,11 +926,11 @@ static void vfio_msi_disable_common(VFIOPCIDevice *vdev)
VFIOMSIVector *vector = &vdev->msi_vectors[i];
if (vdev->msi_vectors[i].use) {
if (vector->virq >= 0) {
- vfio_remove_kvm_msi_virq(vector);
+ vfio_remove_kvm_msi_virq(vdev, vector, i);
}
qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
NULL, NULL, NULL);
- event_notifier_cleanup(&vector->interrupt);
+ vfio_notifier_cleanup(vdev, &vector->interrupt, "interrupt", i);
}
}
@@ -984,7 +1109,7 @@ static int vfio_pci_config_space_write(VFIOPCIDevice *vdev, off_t offset,
{
return vdev->vbasedev.io_ops->region_write(&vdev->vbasedev,
VFIO_PCI_CONFIG_REGION_INDEX,
- offset, size, data);
+ offset, size, data, false);
}
static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
@@ -1738,7 +1863,7 @@ static bool vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
return true;
}
-static void vfio_teardown_msi(VFIOPCIDevice *vdev)
+void vfio_pci_teardown_msi(VFIOPCIDevice *vdev)
{
msi_uninit(&vdev->pdev);
@@ -1788,6 +1913,9 @@ static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr)
bar->type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK :
~PCI_BASE_ADDRESS_MEM_MASK);
bar->size = bar->region.size;
+
+ /* IO regions are sync, memory can be async */
+ bar->region.post_wr = (bar->ioport == 0);
}
static void vfio_bars_prepare(VFIOPCIDevice *vdev)
@@ -1834,7 +1962,7 @@ static void vfio_bars_register(VFIOPCIDevice *vdev)
}
}
-static void vfio_bars_exit(VFIOPCIDevice *vdev)
+void vfio_pci_bars_exit(VFIOPCIDevice *vdev)
{
int i;
@@ -2425,7 +2553,7 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
g_free(config);
}
-static bool vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp)
+bool vfio_pci_add_capabilities(VFIOPCIDevice *vdev, Error **errp)
{
PCIDevice *pdev = &vdev->pdev;
@@ -2701,7 +2829,7 @@ static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
static VFIODeviceOps vfio_pci_ops = {
.vfio_compute_needs_reset = vfio_pci_compute_needs_reset,
.vfio_hot_reset_multi = vfio_pci_hot_reset_multi,
- .vfio_eoi = vfio_intx_eoi,
+ .vfio_eoi = vfio_pci_intx_eoi,
.vfio_get_object = vfio_pci_get_object,
.vfio_save_config = vfio_pci_save_config,
.vfio_load_config = vfio_pci_load_config,
@@ -2772,7 +2900,7 @@ bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
return true;
}
-static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
+bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp)
{
VFIODevice *vbasedev = &vdev->vbasedev;
struct vfio_region_info *reg_info = NULL;
@@ -2818,7 +2946,7 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
return false;
}
- trace_vfio_populate_device_config(vdev->vbasedev.name,
+ trace_vfio_pci_populate_device_config(vdev->vbasedev.name,
(unsigned long)reg_info->size,
(unsigned long)reg_info->offset,
(unsigned long)reg_info->flags);
@@ -2840,7 +2968,7 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
ret = vfio_device_get_irq_info(vbasedev, VFIO_PCI_ERR_IRQ_INDEX, &irq_info);
if (ret) {
/* This can fail for an old kernel or legacy PCI dev */
- trace_vfio_populate_device_get_irq_info_failure(strerror(-ret));
+ trace_vfio_pci_populate_device_get_irq_info_failure(strerror(-ret));
} else if (irq_info.count == 1) {
vdev->pci_aer = true;
} else {
@@ -2852,11 +2980,23 @@ static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
return true;
}
-static void vfio_pci_put_device(VFIOPCIDevice *vdev)
+void vfio_pci_put_device(VFIOPCIDevice *vdev)
{
+ vfio_display_finalize(vdev);
+ vfio_bars_finalize(vdev);
+ g_free(vdev->emulated_config_bits);
+ g_free(vdev->rom);
+ /*
+ * XXX Leaking igd_opregion is not an oversight, we can't remove the
+ * fw_cfg entry therefore leaking this allocation seems like the safest
+ * option.
+ *
+ * g_free(vdev->igd_opregion);
+ */
+
vfio_device_detach(&vdev->vbasedev);
- g_free(vdev->vbasedev.name);
+ vfio_device_free_name(&vdev->vbasedev);
g_free(vdev->msix);
}
@@ -2888,7 +3028,7 @@ static void vfio_err_notifier_handler(void *opaque)
* and continue after disabling error recovery support for the
* device.
*/
-static void vfio_register_err_notifier(VFIOPCIDevice *vdev)
+void vfio_pci_register_err_notifier(VFIOPCIDevice *vdev)
{
Error *err = NULL;
int32_t fd;
@@ -2897,8 +3037,9 @@ static void vfio_register_err_notifier(VFIOPCIDevice *vdev)
return;
}
- if (event_notifier_init(&vdev->err_notifier, 0)) {
- error_report("vfio: Unable to init event notifier for error detection");
+ if (!vfio_notifier_init(vdev, &vdev->err_notifier, "err_notifier", 0,
+ &err)) {
+ error_report_err(err);
vdev->pci_aer = false;
return;
}
@@ -2906,11 +3047,16 @@ static void vfio_register_err_notifier(VFIOPCIDevice *vdev)
fd = event_notifier_get_fd(&vdev->err_notifier);
qemu_set_fd_handler(fd, vfio_err_notifier_handler, NULL, vdev);
+ /* Do not alter irq_signaling during vfio_realize for cpr */
+ if (cpr_is_incoming()) {
+ return;
+ }
+
if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_ERR_IRQ_INDEX, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
qemu_set_fd_handler(fd, NULL, NULL, vdev);
- event_notifier_cleanup(&vdev->err_notifier);
+ vfio_notifier_cleanup(vdev, &vdev->err_notifier, "err_notifier", 0);
vdev->pci_aer = false;
}
}
@@ -2929,7 +3075,7 @@ static void vfio_unregister_err_notifier(VFIOPCIDevice *vdev)
}
qemu_set_fd_handler(event_notifier_get_fd(&vdev->err_notifier),
NULL, NULL, vdev);
- event_notifier_cleanup(&vdev->err_notifier);
+ vfio_notifier_cleanup(vdev, &vdev->err_notifier, "err_notifier", 0);
}
static void vfio_req_notifier_handler(void *opaque)
@@ -2947,7 +3093,7 @@ static void vfio_req_notifier_handler(void *opaque)
}
}
-static void vfio_register_req_notifier(VFIOPCIDevice *vdev)
+void vfio_pci_register_req_notifier(VFIOPCIDevice *vdev)
{
struct vfio_irq_info irq_info;
Error *err = NULL;
@@ -2964,19 +3110,26 @@ static void vfio_register_req_notifier(VFIOPCIDevice *vdev)
return;
}
- if (event_notifier_init(&vdev->req_notifier, 0)) {
- error_report("vfio: Unable to init event notifier for device request");
+ if (!vfio_notifier_init(vdev, &vdev->req_notifier, "req_notifier", 0,
+ &err)) {
+ error_report_err(err);
return;
}
fd = event_notifier_get_fd(&vdev->req_notifier);
qemu_set_fd_handler(fd, vfio_req_notifier_handler, NULL, vdev);
+ /* Do not alter irq_signaling during vfio_realize for cpr */
+ if (cpr_is_incoming()) {
+ vdev->req_enabled = true;
+ return;
+ }
+
if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
qemu_set_fd_handler(fd, NULL, NULL, vdev);
- event_notifier_cleanup(&vdev->req_notifier);
+ vfio_notifier_cleanup(vdev, &vdev->req_notifier, "req_notifier", 0);
} else {
vdev->req_enabled = true;
}
@@ -2996,15 +3149,28 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
}
qemu_set_fd_handler(event_notifier_get_fd(&vdev->req_notifier),
NULL, NULL, vdev);
- event_notifier_cleanup(&vdev->req_notifier);
+ vfio_notifier_cleanup(vdev, &vdev->req_notifier, "req_notifier", 0);
vdev->req_enabled = false;
}
-static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
+bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
{
PCIDevice *pdev = &vdev->pdev;
VFIODevice *vbasedev = &vdev->vbasedev;
+ uint32_t config_space_size;
+ int ret;
+
+ config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size);
+
+ /* Get a copy of config space */
+ ret = vfio_pci_config_space_read(vdev, 0, config_space_size,
+ vdev->pdev.config);
+ if (ret < (int)config_space_size) {
+ ret = ret < 0 ? -ret : EFAULT;
+ error_setg_errno(errp, ret, "failed to read device config space");
+ return false;
+ }
/* vfio emulates a lot for us, but some bits need extra love */
vdev->emulated_config_bits = g_malloc0(vdev->config_size);
@@ -3094,7 +3260,7 @@ static bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
return true;
}
-static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
+bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
{
PCIDevice *pdev = &vdev->pdev;
@@ -3116,7 +3282,13 @@ static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
vfio_intx_routing_notifier);
vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
- if (!vfio_intx_enable(vdev, errp)) {
+
+ /*
+ * During CPR, do not call vfio_intx_enable at this time. Instead,
+ * call it from vfio_pci_post_load after the intx routing data has
+ * been loaded from vmstate.
+ */
+ if (!cpr_is_incoming() && !vfio_intx_enable(vdev, errp)) {
timer_free(vdev->intx.mmap_timer);
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
@@ -3126,15 +3298,14 @@ static bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
return true;
}
-static void vfio_realize(PCIDevice *pdev, Error **errp)
+static void vfio_pci_realize(PCIDevice *pdev, Error **errp)
{
ERRP_GUARD();
VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
VFIODevice *vbasedev = &vdev->vbasedev;
- int i, ret;
+ int i;
char uuid[UUID_STR_LEN];
g_autofree char *name = NULL;
- uint32_t config_space_size;
if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
if (!(~vdev->host.domain || ~vdev->host.bus ||
@@ -3185,18 +3356,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
goto error;
}
- if (!vfio_populate_device(vdev, errp)) {
- goto error;
- }
-
- config_space_size = MIN(pci_config_size(&vdev->pdev), vdev->config_size);
-
- /* Get a copy of config space */
- ret = vfio_pci_config_space_read(vdev, 0, config_space_size,
- vdev->pdev.config);
- if (ret < (int)config_space_size) {
- ret = ret < 0 ? -ret : EFAULT;
- error_setg_errno(errp, ret, "failed to read device config space");
+ if (!vfio_pci_populate_device(vdev, errp)) {
goto error;
}
@@ -3210,7 +3370,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
goto out_teardown;
}
- if (!vfio_add_capabilities(vdev, errp)) {
+ if (!vfio_pci_add_capabilities(vdev, errp)) {
goto out_unset_idev;
}
@@ -3226,7 +3386,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
vfio_bar_quirk_setup(vdev, i);
}
- if (!vfio_interrupt_setup(vdev, errp)) {
+ if (!vfio_pci_interrupt_setup(vdev, errp)) {
goto out_unset_idev;
}
@@ -3270,8 +3430,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
}
}
- vfio_register_err_notifier(vdev);
- vfio_register_req_notifier(vdev);
+ vfio_pci_register_err_notifier(vdev);
+ vfio_pci_register_req_notifier(vdev);
vfio_setup_resetfn_quirk(vdev);
return;
@@ -3292,8 +3452,8 @@ out_unset_idev:
pci_device_unset_iommu_device(pdev);
}
out_teardown:
- vfio_teardown_msi(vdev);
- vfio_bars_exit(vdev);
+ vfio_pci_teardown_msi(vdev);
+ vfio_pci_bars_exit(vdev);
error:
error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
}
@@ -3302,17 +3462,6 @@ static void vfio_instance_finalize(Object *obj)
{
VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
- vfio_display_finalize(vdev);
- vfio_bars_finalize(vdev);
- g_free(vdev->emulated_config_bits);
- g_free(vdev->rom);
- /*
- * XXX Leaking igd_opregion is not an oversight, we can't remove the
- * fw_cfg entry therefore leaking this allocation seems like the safest
- * option.
- *
- * g_free(vdev->igd_opregion);
- */
vfio_pci_put_device(vdev);
}
@@ -3331,9 +3480,9 @@ static void vfio_exitfn(PCIDevice *pdev)
if (vdev->intx.mmap_timer) {
timer_free(vdev->intx.mmap_timer);
}
- vfio_teardown_msi(vdev);
+ vfio_pci_teardown_msi(vdev);
vfio_pci_disable_rp_atomics(vdev);
- vfio_bars_exit(vdev);
+ vfio_pci_bars_exit(vdev);
vfio_migration_exit(vbasedev);
if (!vbasedev->mdev) {
pci_device_unset_iommu_device(pdev);
@@ -3344,6 +3493,11 @@ static void vfio_pci_reset(DeviceState *dev)
{
VFIOPCIDevice *vdev = VFIO_PCI_BASE(dev);
+ /* Do not reset the device during qemu_system_reset prior to cpr load */
+ if (cpr_is_incoming()) {
+ return;
+ }
+
trace_vfio_pci_reset(vdev->vbasedev.name);
vfio_pci_pre_reset(vdev);
@@ -3401,6 +3555,13 @@ static void vfio_instance_init(Object *obj)
/* QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command
* line, therefore, no need to wait to realize like other devices */
pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
+
+ /*
+ * A device that is resuming for cpr is already configured, so do not
+ * reset it during qemu_system_reset prior to cpr load, else interrupts
+ * may be lost.
+ */
+ pci_dev->cap_present |= QEMU_PCI_SKIP_RESET_ON_CPR;
}
static void vfio_pci_base_dev_class_init(ObjectClass *klass, const void *data)
@@ -3418,7 +3579,7 @@ static void vfio_pci_base_dev_class_init(ObjectClass *klass, const void *data)
static const TypeInfo vfio_pci_base_dev_info = {
.name = TYPE_VFIO_PCI_BASE,
.parent = TYPE_PCI_DEVICE,
- .instance_size = 0,
+ .instance_size = sizeof(VFIOPCIDevice),
.abstract = true,
.class_init = vfio_pci_base_dev_class_init,
.interfaces = (const InterfaceInfo[]) {
@@ -3513,8 +3674,9 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data)
#ifdef CONFIG_IOMMUFD
object_class_property_add_str(klass, "fd", NULL, vfio_pci_set_fd);
#endif
+ dc->vmsd = &vfio_cpr_pci_vmstate;
dc->desc = "VFIO-based PCI device assignment";
- pdc->realize = vfio_realize;
+ pdc->realize = vfio_pci_realize;
object_class_property_set_description(klass, /* 1.3 */
"host",
@@ -3640,7 +3802,6 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, const void *data)
static const TypeInfo vfio_pci_dev_info = {
.name = TYPE_VFIO_PCI,
.parent = TYPE_VFIO_PCI_BASE,
- .instance_size = sizeof(VFIOPCIDevice),
.class_init = vfio_pci_dev_class_init,
.instance_init = vfio_instance_init,
.instance_finalize = vfio_instance_finalize,
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 5ce0fb9..495fae7 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -116,6 +116,7 @@ typedef struct VFIOMSIXInfo {
uint32_t pba_offset;
unsigned long *pending;
bool noresize;
+ MemoryRegion *pba_region;
} VFIOMSIXInfo;
/*
@@ -210,6 +211,16 @@ static inline bool vfio_is_vga(VFIOPCIDevice *vdev)
return class == PCI_CLASS_DISPLAY_VGA;
}
+/* MSI/MSI-X/INTx */
+void vfio_pci_vector_init(VFIOPCIDevice *vdev, int nr);
+void vfio_pci_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
+ int vector_n, bool msix);
+void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev);
+void vfio_pci_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev);
+bool vfio_pci_intx_enable(VFIOPCIDevice *vdev, Error **errp);
+void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev);
+void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr);
+
uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
void vfio_pci_write_config(PCIDevice *pdev,
uint32_t addr, uint32_t val, int len);
@@ -248,4 +259,15 @@ void vfio_display_finalize(VFIOPCIDevice *vdev);
extern const VMStateDescription vfio_display_vmstate;
+void vfio_pci_bars_exit(VFIOPCIDevice *vdev);
+bool vfio_pci_add_capabilities(VFIOPCIDevice *vdev, Error **errp);
+bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp);
+bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp);
+void vfio_pci_intx_eoi(VFIODevice *vbasedev);
+void vfio_pci_put_device(VFIOPCIDevice *vdev);
+bool vfio_pci_populate_device(VFIOPCIDevice *vdev, Error **errp);
+void vfio_pci_register_err_notifier(VFIOPCIDevice *vdev);
+void vfio_pci_register_req_notifier(VFIOPCIDevice *vdev);
+void vfio_pci_teardown_msi(VFIOPCIDevice *vdev);
+
#endif /* HW_VFIO_VFIO_PCI_H */
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index 9a21f2e..5c1795a 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -530,7 +530,7 @@ static bool vfio_base_device_init(VFIODevice *vbasedev, Error **errp)
{
/* @fd takes precedence over @sysfsdev which takes precedence over @host */
if (vbasedev->fd < 0 && vbasedev->sysfsdev) {
- g_free(vbasedev->name);
+ vfio_device_free_name(vbasedev);
vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
} else if (vbasedev->fd < 0) {
if (!vbasedev->name || strchr(vbasedev->name, '/')) {
diff --git a/hw/vfio/region.c b/hw/vfio/region.c
index 34752c3..d04c57d 100644
--- a/hw/vfio/region.c
+++ b/hw/vfio/region.c
@@ -66,7 +66,7 @@ void vfio_region_write(void *opaque, hwaddr addr,
}
ret = vbasedev->io_ops->region_write(vbasedev, region->nr,
- addr, size, &buf);
+ addr, size, &buf, region->post_wr);
if (ret != size) {
error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
",%d) failed: %s",
@@ -200,6 +200,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
region->size = info->size;
region->fd_offset = info->offset;
region->nr = index;
+ region->post_wr = false;
if (region->size) {
region->mem = g_new0(MemoryRegion, 1);
@@ -241,6 +242,7 @@ int vfio_region_mmap(VFIORegion *region)
{
int i, ret, prot = 0;
char *name;
+ int fd;
if (!region->mem) {
return 0;
@@ -271,14 +273,15 @@ int vfio_region_mmap(VFIORegion *region)
goto no_mmap;
}
+ fd = vfio_device_get_region_fd(region->vbasedev, region->nr);
+
map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align);
munmap(map_base, map_align - map_base);
munmap(map_align + region->mmaps[i].size,
align - (map_align - map_base));
region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot,
- MAP_SHARED | MAP_FIXED,
- region->vbasedev->fd,
+ MAP_SHARED | MAP_FIXED, fd,
region->fd_offset +
region->mmaps[i].offset);
if (region->mmaps[i].mmap == MAP_FAILED) {
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index e90ec9b..8ec0ad0 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -1,8 +1,10 @@
# See docs/devel/tracing.rst for syntax documentation.
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
# pci.c
vfio_intx_interrupt(const char *name, char line) " (%s) Pin %c"
-vfio_intx_eoi(const char *name) " (%s) EOI"
+vfio_pci_intx_eoi(const char *name) " (%s) EOI"
vfio_intx_enable_kvm(const char *name) " (%s) KVM INTx accel enabled"
vfio_intx_disable_kvm(const char *name) " (%s) KVM INTx accel disabled"
vfio_intx_update(const char *name, int new_irq, int target_irq) " (%s) IRQ moved %d -> %d"
@@ -35,8 +37,8 @@ vfio_pci_hot_reset(const char *name, const char *type) " (%s) %s"
vfio_pci_hot_reset_has_dep_devices(const char *name) "%s: hot reset dependent devices:"
vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int group_id) "\t%04x:%02x:%02x.%x group %d"
vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s"
-vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device '%s' config: size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
-vfio_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s"
+vfio_pci_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device '%s' config: size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
+vfio_pci_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s"
vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d"
vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x"
vfio_pci_reset(const char *name) " (%s)"
@@ -195,6 +197,9 @@ iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD con
iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d"
iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d"
+# cpr-iommufd.c
+vfio_cpr_find_device(uint32_t ioas_id, int devid, uint32_t hwpt_id) "ioas_id %u, devid %d, hwpt_id %u"
+
# device.c
vfio_device_get_region_info_type(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x"
vfio_device_reset_handler(void) ""
diff --git a/hw/vfio/trace.h b/hw/vfio/trace.h
index 5a343aa..b34b61d 100644
--- a/hw/vfio/trace.h
+++ b/hw/vfio/trace.h
@@ -1 +1,4 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
#include "trace/trace-hw_vfio.h"
diff --git a/hw/vfio/vfio-cpr.h b/hw/vfio/vfio-cpr.h
deleted file mode 100644
index 134b83a..0000000
--- a/hw/vfio/vfio-cpr.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * VFIO CPR
- *
- * Copyright (c) 2025 Oracle and/or its affiliates.
- *
- * SPDX-License-Identifier: GPL-2.0-or-later
- */
-
-#ifndef HW_VFIO_CPR_H
-#define HW_VFIO_CPR_H
-
-bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp);
-void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer);
-
-#endif /* HW_VFIO_CPR_H */
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 1ab2c11..7061b6e 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -209,6 +209,8 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
int ret;
Int128 llend;
Error *local_err = NULL;
+ MemoryRegion *mr;
+ hwaddr xlat;
if (iotlb->target_as != &address_space_memory) {
error_report("Wrong target AS \"%s\", only system memory is allowed",
@@ -228,11 +230,14 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
bool read_only;
- if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL,
- &local_err)) {
+ mr = memory_translate_iotlb(iotlb, &xlat, &local_err);
+ if (!mr) {
error_report_err(local_err);
return;
}
+ vaddr = memory_region_get_ram_ptr(mr) + xlat;
+ read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly;
+
ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova,
iotlb->addr_mask + 1, vaddr, read_only);
if (ret) {
@@ -594,6 +599,36 @@ static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v)
v->shadow_vqs = g_steal_pointer(&shadow_vqs);
}
+static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
+{
+ struct vhost_vdpa *v = dev->opaque;
+
+ uint64_t features;
+ uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
+ 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
+ 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID |
+ 0x1ULL << VHOST_BACKEND_F_SUSPEND;
+ int r;
+
+ if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
+ return -EFAULT;
+ }
+
+ features &= f;
+
+ if (vhost_vdpa_first_dev(dev)) {
+ r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
+ if (r) {
+ return -EFAULT;
+ }
+ }
+
+ dev->backend_cap = features;
+ v->shared->backend_cap = features;
+
+ return 0;
+}
+
static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
{
struct vhost_vdpa *v = opaque;
@@ -603,7 +638,12 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
v->dev = dev;
dev->opaque = opaque ;
- v->shared->listener = vhost_vdpa_memory_listener;
+
+ ret = vhost_vdpa_set_backend_cap(dev);
+ if (unlikely(ret != 0)) {
+ return ret;
+ }
+
vhost_vdpa_init_svq(dev, v);
error_propagate(&dev->migration_blocker, v->migration_blocker);
@@ -639,6 +679,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
VIRTIO_CONFIG_S_DRIVER);
+ v->shared->listener = vhost_vdpa_memory_listener;
return 0;
}
@@ -841,36 +882,6 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev,
return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
}
-static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
-{
- struct vhost_vdpa *v = dev->opaque;
-
- uint64_t features;
- uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
- 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
- 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID |
- 0x1ULL << VHOST_BACKEND_F_SUSPEND;
- int r;
-
- if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
- return -EFAULT;
- }
-
- features &= f;
-
- if (vhost_vdpa_first_dev(dev)) {
- r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
- if (r) {
- return -EFAULT;
- }
- }
-
- dev->backend_cap = features;
- v->shared->backend_cap = features;
-
- return 0;
-}
-
static int vhost_vdpa_get_device_id(struct vhost_dev *dev,
uint32_t *device_id)
{
@@ -888,8 +899,14 @@ static int vhost_vdpa_reset_device(struct vhost_dev *dev)
ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
trace_vhost_vdpa_reset_device(dev);
+ if (ret) {
+ return ret;
+ }
+
+ memory_listener_unregister(&v->shared->listener);
+ v->shared->listener_registered = false;
v->suspended = false;
- return ret;
+ return 0;
}
static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
@@ -1373,7 +1390,15 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
"IOMMU and try again");
return -1;
}
- memory_listener_register(&v->shared->listener, dev->vdev->dma_as);
+ if (v->shared->listener_registered &&
+ dev->vdev->dma_as != v->shared->listener.address_space) {
+ memory_listener_unregister(&v->shared->listener);
+ v->shared->listener_registered = false;
+ }
+ if (!v->shared->listener_registered) {
+ memory_listener_register(&v->shared->listener, dev->vdev->dma_as);
+ v->shared->listener_registered = true;
+ }
return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
}
@@ -1383,8 +1408,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
static void vhost_vdpa_reset_status(struct vhost_dev *dev)
{
- struct vhost_vdpa *v = dev->opaque;
-
if (!vhost_vdpa_last_dev(dev)) {
return;
}
@@ -1392,7 +1415,6 @@ static void vhost_vdpa_reset_status(struct vhost_dev *dev)
vhost_vdpa_reset_device(dev);
vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
VIRTIO_CONFIG_S_DRIVER);
- memory_listener_unregister(&v->shared->listener);
}
static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
@@ -1526,12 +1548,27 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev,
static int vhost_vdpa_set_owner(struct vhost_dev *dev)
{
+ int r;
+ struct vhost_vdpa *v;
+
if (!vhost_vdpa_first_dev(dev)) {
return 0;
}
trace_vhost_vdpa_set_owner(dev);
- return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
+ r = vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL);
+ if (unlikely(r < 0)) {
+ return r;
+ }
+
+ /*
+ * Being optimistic and listening address space memory. If the device
+ * uses vIOMMU, it is changed at vhost_vdpa_dev_start.
+ */
+ v = dev->opaque;
+ memory_listener_register(&v->shared->listener, &address_space_memory);
+ v->shared->listener_registered = true;
+ return 0;
}
static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev,
@@ -1563,7 +1600,6 @@ const VhostOps vdpa_ops = {
.vhost_set_vring_kick = vhost_vdpa_set_vring_kick,
.vhost_set_vring_call = vhost_vdpa_set_vring_call,
.vhost_get_features = vhost_vdpa_get_features,
- .vhost_set_backend_cap = vhost_vdpa_set_backend_cap,
.vhost_set_owner = vhost_vdpa_set_owner,
.vhost_set_vring_endian = NULL,
.vhost_backend_memslots_limit = vhost_vdpa_memslots_limit,
diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
index a3d1a67..c46f6f9 100644
--- a/hw/virtio/virtio-mem.c
+++ b/hw/virtio/virtio-mem.c
@@ -244,28 +244,6 @@ static int virtio_mem_for_each_plugged_range(VirtIOMEM *vmem, void *arg,
return ret;
}
-/*
- * Adjust the memory section to cover the intersection with the given range.
- *
- * Returns false if the intersection is empty, otherwise returns true.
- */
-static bool virtio_mem_intersect_memory_section(MemoryRegionSection *s,
- uint64_t offset, uint64_t size)
-{
- uint64_t start = MAX(s->offset_within_region, offset);
- uint64_t end = MIN(s->offset_within_region + int128_get64(s->size),
- offset + size);
-
- if (end <= start) {
- return false;
- }
-
- s->offset_within_address_space += start - s->offset_within_region;
- s->offset_within_region = start;
- s->size = int128_make64(end - start);
- return true;
-}
-
typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg);
static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
@@ -287,7 +265,7 @@ static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem,
first_bit + 1) - 1;
size = (last_bit - first_bit + 1) * vmem->block_size;
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
break;
}
ret = cb(&tmp, arg);
@@ -319,7 +297,7 @@ static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem,
first_bit + 1) - 1;
size = (last_bit - first_bit + 1) * vmem->block_size;
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
break;
}
ret = cb(&tmp, arg);
@@ -355,7 +333,7 @@ static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset,
QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
MemoryRegionSection tmp = *rdl->section;
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
continue;
}
rdl->notify_discard(rdl, &tmp);
@@ -371,7 +349,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
QLIST_FOREACH(rdl, &vmem->rdl_list, next) {
MemoryRegionSection tmp = *rdl->section;
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
continue;
}
ret = rdl->notify_populate(rdl, &tmp);
@@ -388,7 +366,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset,
if (rdl2 == rdl) {
break;
}
- if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) {
+ if (!memory_region_section_intersect_range(&tmp, offset, size)) {
continue;
}
rdl2->notify_discard(rdl2, &tmp);
@@ -1070,6 +1048,17 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
}
/*
+ * Set ourselves as RamDiscardManager before the plug handler maps the
+ * memory region and exposes it via an address space.
+ */
+ if (memory_region_set_ram_discard_manager(&vmem->memdev->mr,
+ RAM_DISCARD_MANAGER(vmem))) {
+ error_setg(errp, "Failed to set RamDiscardManager");
+ ram_block_coordinated_discard_require(false);
+ return;
+ }
+
+ /*
* We don't know at this point whether shared RAM is migrated using
* QEMU or migrated using the file content. "x-ignore-shared" will be
* configured after realizing the device. So in case we have an
@@ -1083,6 +1072,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
if (ret) {
error_setg_errno(errp, -ret, "Unexpected error discarding RAM");
+ memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
ram_block_coordinated_discard_require(false);
return;
}
@@ -1144,13 +1134,6 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp)
vmem->system_reset = VIRTIO_MEM_SYSTEM_RESET(obj);
vmem->system_reset->vmem = vmem;
qemu_register_resettable(obj);
-
- /*
- * Set ourselves as RamDiscardManager before the plug handler maps the
- * memory region and exposes it via an address space.
- */
- memory_region_set_ram_discard_manager(&vmem->memdev->mr,
- RAM_DISCARD_MANAGER(vmem));
}
static void virtio_mem_device_unrealize(DeviceState *dev)
@@ -1158,12 +1141,6 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIOMEM *vmem = VIRTIO_MEM(dev);
- /*
- * The unplug handler unmapped the memory region, it cannot be
- * found via an address space anymore. Unset ourselves.
- */
- memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
-
qemu_unregister_resettable(OBJECT(vmem->system_reset));
object_unref(OBJECT(vmem->system_reset));
@@ -1176,6 +1153,11 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
virtio_del_queue(vdev, 0);
virtio_cleanup(vdev);
g_free(vmem->bitmap);
+ /*
+ * The unplug handler unmapped the memory region, it cannot be
+ * found via an address space anymore. Unset ourselves.
+ */
+ memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL);
ram_block_coordinated_discard_require(false);
}
@@ -1750,7 +1732,7 @@ static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm,
}
struct VirtIOMEMReplayData {
- void *fn;
+ ReplayRamDiscardState fn;
void *opaque;
};
@@ -1758,12 +1740,12 @@ static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg)
{
struct VirtIOMEMReplayData *data = arg;
- return ((ReplayRamPopulate)data->fn)(s, data->opaque);
+ return data->fn(s, data->opaque);
}
static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm,
MemoryRegionSection *s,
- ReplayRamPopulate replay_fn,
+ ReplayRamDiscardState replay_fn,
void *opaque)
{
const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
@@ -1782,14 +1764,13 @@ static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s,
{
struct VirtIOMEMReplayData *data = arg;
- ((ReplayRamDiscard)data->fn)(s, data->opaque);
- return 0;
+ return data->fn(s, data->opaque);
}
-static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
- MemoryRegionSection *s,
- ReplayRamDiscard replay_fn,
- void *opaque)
+static int virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
+ MemoryRegionSection *s,
+ ReplayRamDiscardState replay_fn,
+ void *opaque)
{
const VirtIOMEM *vmem = VIRTIO_MEM(rdm);
struct VirtIOMEMReplayData data = {
@@ -1798,8 +1779,8 @@ static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm,
};
g_assert(s->mr == &vmem->memdev->mr);
- virtio_mem_for_each_unplugged_section(vmem, s, &data,
- virtio_mem_rdm_replay_discarded_cb);
+ return virtio_mem_for_each_unplugged_section(vmem, s, &data,
+ virtio_mem_rdm_replay_discarded_cb);
}
static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm,
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 9b48aa8..fba2372 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -146,9 +146,7 @@ static const VMStateDescription vmstate_virtio_pci = {
static bool virtio_pci_has_extra_state(DeviceState *d)
{
- VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
-
- return proxy->flags & VIRTIO_PCI_FLAG_MIGRATE_EXTRA;
+ return true;
}
static void virtio_pci_save_extra_state(DeviceState *d, QEMUFile *f)
@@ -1215,7 +1213,12 @@ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign,
static bool virtio_pci_query_guest_notifiers(DeviceState *d)
{
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
- return msix_enabled(&proxy->pci_dev);
+
+ if (msix_enabled(&proxy->pci_dev)) {
+ return true;
+ } else {
+ return pci_irq_disabled(&proxy->pci_dev);
+ }
}
static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
@@ -2363,12 +2366,8 @@ static void virtio_pci_bus_reset_hold(Object *obj, ResetType type)
static const Property virtio_pci_properties[] = {
DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags,
VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false),
- DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags,
- VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true),
DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags,
VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false),
- DEFINE_PROP_BIT("x-disable-pcie", VirtIOPCIProxy, flags,
- VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false),
DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags,
VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false),
DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy,
@@ -2397,8 +2396,7 @@ static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp)
VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev);
PCIDevice *pci_dev = &proxy->pci_dev;
- if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) &&
- virtio_pci_modern(proxy)) {
+ if (virtio_pci_modern(proxy)) {
pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
}
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 2e98cec..82a285a 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -205,6 +205,15 @@ static const char *virtio_id_to_name(uint16_t device_id)
return name;
}
+static void virtio_check_indirect_feature(VirtIODevice *vdev)
+{
+ if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Device %s: indirect_desc was not negotiated!\n",
+ vdev->name);
+ }
+}
+
/* Called within call_rcu(). */
static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
{
@@ -1680,8 +1689,8 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
VirtIODevice *vdev = vq->vdev;
VirtQueueElement *elem = NULL;
unsigned out_num, in_num, elem_entries;
- hwaddr addr[VIRTQUEUE_MAX_SIZE];
- struct iovec iov[VIRTQUEUE_MAX_SIZE];
+ hwaddr QEMU_UNINITIALIZED addr[VIRTQUEUE_MAX_SIZE];
+ struct iovec QEMU_UNINITIALIZED iov[VIRTQUEUE_MAX_SIZE];
VRingDesc desc;
int rc;
@@ -1733,6 +1742,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
virtio_error(vdev, "Invalid size for indirect buffer table");
goto done;
}
+ virtio_check_indirect_feature(vdev);
/* loop over the indirect descriptor table */
len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
@@ -1826,8 +1836,8 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
VirtIODevice *vdev = vq->vdev;
VirtQueueElement *elem = NULL;
unsigned out_num, in_num, elem_entries;
- hwaddr addr[VIRTQUEUE_MAX_SIZE];
- struct iovec iov[VIRTQUEUE_MAX_SIZE];
+ hwaddr QEMU_UNINITIALIZED addr[VIRTQUEUE_MAX_SIZE];
+ struct iovec QEMU_UNINITIALIZED iov[VIRTQUEUE_MAX_SIZE];
VRingPackedDesc desc;
uint16_t id;
int rc;
@@ -1870,6 +1880,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
virtio_error(vdev, "Invalid size for indirect buffer table");
goto done;
}
+ virtio_check_indirect_feature(vdev);
/* loop over the indirect descriptor table */
len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,