aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/arm/Kconfig1
-rw-r--r--hw/arm/aspeed.c285
-rw-r--r--hw/arm/aspeed_eeprom.c21
-rw-r--r--hw/arm/aspeed_eeprom.h3
-rw-r--r--hw/arm/highbank.c2
-rw-r--r--hw/i386/Kconfig2
-rw-r--r--hw/misc/aspeed_scu.c22
-rw-r--r--hw/misc/aspeed_sdmc.c3
-rw-r--r--hw/vfio-user/container.c11
-rw-r--r--hw/vfio/ap.c4
-rw-r--r--hw/vfio/ccw.c4
-rw-r--r--hw/vfio/container-base.c9
-rw-r--r--hw/vfio/cpr-iommufd.c225
-rw-r--r--hw/vfio/cpr-legacy.c25
-rw-r--r--hw/vfio/cpr.c144
-rw-r--r--hw/vfio/device.c40
-rw-r--r--hw/vfio/helpers.c11
-rw-r--r--hw/vfio/iommufd-stubs.c18
-rw-r--r--hw/vfio/iommufd.c81
-rw-r--r--hw/vfio/meson.build2
-rw-r--r--hw/vfio/pci.c109
-rw-r--r--hw/vfio/pci.h2
-rw-r--r--hw/vfio/platform.c2
-rw-r--r--hw/vfio/trace-events3
24 files changed, 957 insertions, 72 deletions
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index f543d94..6ea8653 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -532,6 +532,7 @@ config ASPEED_SOC
select I2C
select DPS310
select PCA9552
+ select PCA9554
select SERIAL_MM
select SMBUS_EEPROM
select PCA954X
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index d0b3336..c31bbe7 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -19,6 +19,7 @@
#include "hw/i2c/i2c_mux_pca954x.h"
#include "hw/i2c/smbus_eeprom.h"
#include "hw/gpio/pca9552.h"
+#include "hw/gpio/pca9554.h"
#include "hw/nvram/eeprom_at24c.h"
#include "hw/sensor/tmp105.h"
#include "hw/misc/led.h"
@@ -197,9 +198,12 @@ struct AspeedMachineState {
#define FUJI_BMC_HW_STRAP2 0x00000000
/* Bletchley hardware value */
-/* TODO: Leave same as EVB for now. */
-#define BLETCHLEY_BMC_HW_STRAP1 AST2600_EVB_HW_STRAP1
-#define BLETCHLEY_BMC_HW_STRAP2 AST2600_EVB_HW_STRAP2
+#define BLETCHLEY_BMC_HW_STRAP1 0x00002000
+#define BLETCHLEY_BMC_HW_STRAP2 0x00000801
+
+/* GB200NVL hardware value */
+#define GB200NVL_BMC_HW_STRAP1 AST2600_EVB_HW_STRAP1
+#define GB200NVL_BMC_HW_STRAP2 AST2600_EVB_HW_STRAP2
/* Qualcomm DC-SCM hardware value */
#define QCOM_DC_SCM_V1_BMC_HW_STRAP1 0x00000000
@@ -465,6 +469,8 @@ static void aspeed_machine_init(MachineState *machine)
aspeed_board_init_flashes(&bmc->soc->spi[0],
bmc->spi_model ? bmc->spi_model : amc->spi_model,
1, amc->num_cs);
+ aspeed_board_init_flashes(&bmc->soc->spi[1],
+ amc->spi2_model, 1, amc->num_cs2);
}
if (machine->kernel_filename && sc->num_cpus > 1) {
@@ -645,6 +651,12 @@ static void create_pca9552(AspeedSoCState *soc, int bus_id, int addr)
TYPE_PCA9552, addr);
}
+static I2CSlave *create_pca9554(AspeedSoCState *soc, int bus_id, int addr)
+{
+ return i2c_slave_create_simple(aspeed_i2c_get_bus(&soc->i2c, bus_id),
+ TYPE_PCA9554, addr);
+}
+
static void sonorapass_bmc_i2c_init(AspeedMachineState *bmc)
{
AspeedSoCState *soc = bmc->soc;
@@ -1003,6 +1015,180 @@ static void fuji_bmc_i2c_init(AspeedMachineState *bmc)
}
#define TYPE_TMP421 "tmp421"
+#define TYPE_DS1338 "ds1338"
+
+/* Catalina hardware value */
+#define CATALINA_BMC_HW_STRAP1 0x00002002
+#define CATALINA_BMC_HW_STRAP2 0x00000800
+
+#define CATALINA_BMC_RAM_SIZE ASPEED_RAM_SIZE(2 * GiB)
+
+static void catalina_bmc_i2c_init(AspeedMachineState *bmc)
+{
+ /* Reference from v6.16-rc2 aspeed-bmc-facebook-catalina.dts */
+
+ AspeedSoCState *soc = bmc->soc;
+ I2CBus *i2c[16] = {};
+ I2CSlave *i2c_mux;
+
+ /* busses 0-15 are all used. */
+ for (int i = 0; i < ARRAY_SIZE(i2c); i++) {
+ i2c[i] = aspeed_i2c_get_bus(&soc->i2c, i);
+ }
+
+ /* &i2c0 */
+ /* i2c-mux@71 (PCA9546) on i2c0 */
+ i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x71);
+
+ /* i2c-mux@72 (PCA9546) on i2c0 */
+ i2c_mux = i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x72);
+
+ /* i2c0mux1ch1 */
+ /* io_expander7 - pca9535@20 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 1),
+ TYPE_PCA9552, 0x20);
+ /* eeprom@50 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x50, 8 * KiB);
+
+ /* i2c-mux@73 (PCA9546) on i2c0 */
+ i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x73);
+
+ /* i2c-mux@75 (PCA9546) on i2c0 */
+ i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x75);
+
+ /* i2c-mux@76 (PCA9546) on i2c0 */
+ i2c_mux = i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x76);
+
+ /* i2c0mux4ch1 */
+ /* io_expander8 - pca9535@21 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 1),
+ TYPE_PCA9552, 0x21);
+ /* eeprom@50 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 1), 0x50, 8 * KiB);
+
+ /* i2c-mux@77 (PCA9546) on i2c0 */
+ i2c_slave_create_simple(i2c[0], TYPE_PCA9546, 0x77);
+
+
+ /* &i2c1 */
+ /* i2c-mux@70 (PCA9548) on i2c1 */
+ i2c_mux = i2c_slave_create_simple(i2c[1], TYPE_PCA9548, 0x70);
+ /* i2c1mux0ch0 */
+ /* ina238@41 - no model */
+ /* ina238@42 - no model */
+ /* ina238@44 - no model */
+ /* i2c1mux0ch1 */
+ /* ina238@41 - no model */
+ /* ina238@43 - no model */
+ /* i2c1mux0ch4 */
+ /* ltc4287@42 - no model */
+ /* ltc4287@43 - no model */
+
+ /* i2c1mux0ch5 */
+ /* eeprom@54 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 5), 0x54, 8 * KiB);
+ /* tpm75@4f */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 5), TYPE_TMP75, 0x4f);
+
+ /* i2c1mux0ch6 */
+ /* io_expander5 - pca9554@27 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 6),
+ TYPE_PCA9554, 0x27);
+ /* io_expander6 - pca9555@25 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 6),
+ TYPE_PCA9552, 0x25);
+ /* eeprom@51 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 6), 0x51, 8 * KiB);
+
+ /* i2c1mux0ch7 */
+ /* eeprom@53 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 7), 0x53, 8 * KiB);
+ /* temperature-sensor@4b - tmp75 */
+ i2c_slave_create_simple(pca954x_i2c_get_bus(i2c_mux, 7), TYPE_TMP75, 0x4b);
+
+ /* &i2c2 */
+ /* io_expander0 - pca9555@20 */
+ i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x20);
+ /* io_expander0 - pca9555@21 */
+ i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x21);
+ /* io_expander0 - pca9555@27 */
+ i2c_slave_create_simple(i2c[2], TYPE_PCA9552, 0x27);
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[2], 0x50, 8 * KiB);
+ /* eeprom@51 */
+ at24c_eeprom_init(i2c[2], 0x51, 8 * KiB);
+
+ /* &i2c5 */
+ /* i2c-mux@70 (PCA9548) on i2c5 */
+ i2c_mux = i2c_slave_create_simple(i2c[5], TYPE_PCA9548, 0x70);
+ /* i2c5mux0ch6 */
+ /* eeprom@52 */
+ at24c_eeprom_init(pca954x_i2c_get_bus(i2c_mux, 6), 0x52, 8 * KiB);
+ /* i2c5mux0ch7 */
+ /* ina230@40 - no model */
+ /* ina230@41 - no model */
+ /* ina230@44 - no model */
+ /* ina230@45 - no model */
+
+ /* &i2c6 */
+ /* io_expander3 - pca9555@21 */
+ i2c_slave_create_simple(i2c[6], TYPE_PCA9552, 0x21);
+ /* rtc@6f - nct3018y */
+ i2c_slave_create_simple(i2c[6], TYPE_DS1338, 0x6f);
+
+ /* &i2c9 */
+ /* io_expander4 - pca9555@4f */
+ i2c_slave_create_simple(i2c[9], TYPE_PCA9552, 0x4f);
+ /* temperature-sensor@4b - tpm75 */
+ i2c_slave_create_simple(i2c[9], TYPE_TMP75, 0x4b);
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[9], 0x50, 8 * KiB);
+ /* eeprom@56 */
+ at24c_eeprom_init(i2c[9], 0x56, 8 * KiB);
+
+ /* &i2c10 */
+ /* temperature-sensor@1f - tpm421 */
+ i2c_slave_create_simple(i2c[10], TYPE_TMP421, 0x1f);
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[10], 0x50, 8 * KiB);
+
+ /* &i2c11 */
+ /* ssif-bmc@10 - no model */
+
+ /* &i2c12 */
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[12], 0x50, 8 * KiB);
+
+ /* &i2c13 */
+ /* eeprom@50 */
+ at24c_eeprom_init(i2c[13], 0x50, 8 * KiB);
+ /* eeprom@54 */
+ at24c_eeprom_init(i2c[13], 0x54, 256);
+ /* eeprom@55 */
+ at24c_eeprom_init(i2c[13], 0x55, 256);
+ /* eeprom@57 */
+ at24c_eeprom_init(i2c[13], 0x57, 256);
+
+ /* &i2c14 */
+ /* io_expander9 - pca9555@10 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x10);
+ /* io_expander10 - pca9555@11 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x11);
+ /* io_expander11 - pca9555@12 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x12);
+ /* io_expander12 - pca9555@13 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x13);
+ /* io_expander13 - pca9555@14 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x14);
+ /* io_expander14 - pca9555@15 */
+ i2c_slave_create_simple(i2c[14], TYPE_PCA9552, 0x15);
+
+ /* &i2c15 */
+ /* temperature-sensor@1f - tmp421 */
+ i2c_slave_create_simple(i2c[15], TYPE_TMP421, 0x1f);
+ /* eeprom@52 */
+ at24c_eeprom_init(i2c[15], 0x52, 8 * KiB);
+}
static void bletchley_bmc_i2c_init(AspeedMachineState *bmc)
{
@@ -1050,6 +1236,45 @@ static void bletchley_bmc_i2c_init(AspeedMachineState *bmc)
i2c_slave_create_simple(i2c[12], TYPE_PCA9552, 0x67);
}
+
+static void gb200nvl_bmc_i2c_init(AspeedMachineState *bmc)
+{
+ AspeedSoCState *soc = bmc->soc;
+ I2CBus *i2c[15] = {};
+ DeviceState *dev;
+ for (int i = 0; i < sizeof(i2c) / sizeof(i2c[0]); i++) {
+ if ((i == 11) || (i == 12) || (i == 13)) {
+ continue;
+ }
+ i2c[i] = aspeed_i2c_get_bus(&soc->i2c, i);
+ }
+
+ /* Bus 5 Expander */
+ create_pca9554(soc, 4, 0x21);
+
+ /* Mux I2c Expanders */
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x71);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x72);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x73);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x75);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x76);
+ i2c_slave_create_simple(i2c[5], "pca9546", 0x77);
+
+ /* Bus 10 */
+ dev = DEVICE(create_pca9554(soc, 9, 0x20));
+
+ /* Set FPGA_READY */
+ object_property_set_str(OBJECT(dev), "pin1", "high", &error_fatal);
+
+ create_pca9554(soc, 9, 0x21);
+ at24c_eeprom_init(i2c[9], 0x50, 64 * KiB);
+ at24c_eeprom_init(i2c[9], 0x51, 64 * KiB);
+
+ /* Bus 11 */
+ at24c_eeprom_init_rom(i2c[10], 0x50, 256, gb200nvl_bmc_fruid,
+ gb200nvl_bmc_fruid_len);
+}
+
static void fby35_i2c_init(AspeedMachineState *bmc)
{
AspeedSoCState *soc = bmc->soc;
@@ -1585,6 +1810,52 @@ static void aspeed_machine_bletchley_class_init(ObjectClass *oc,
aspeed_machine_class_init_cpus_defaults(mc);
}
+static void aspeed_machine_catalina_class_init(ObjectClass *oc,
+ const void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
+
+ mc->desc = "Facebook Catalina BMC (Cortex-A7)";
+ amc->soc_name = "ast2600-a3";
+ amc->hw_strap1 = CATALINA_BMC_HW_STRAP1;
+ amc->hw_strap2 = CATALINA_BMC_HW_STRAP2;
+ amc->fmc_model = "w25q01jvq";
+ amc->spi_model = NULL;
+ amc->num_cs = 2;
+ amc->macs_mask = ASPEED_MAC2_ON;
+ amc->i2c_init = catalina_bmc_i2c_init;
+ mc->auto_create_sdcard = true;
+ mc->default_ram_size = CATALINA_BMC_RAM_SIZE;
+ aspeed_machine_class_init_cpus_defaults(mc);
+ aspeed_machine_ast2600_class_emmc_init(oc);
+}
+
+#define GB200NVL_BMC_RAM_SIZE ASPEED_RAM_SIZE(1 * GiB)
+
+static void aspeed_machine_gb200nvl_class_init(ObjectClass *oc,
+ const void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
+
+ mc->desc = "Nvidia GB200NVL BMC (Cortex-A7)";
+ amc->soc_name = "ast2600-a3";
+ amc->hw_strap1 = GB200NVL_BMC_HW_STRAP1;
+ amc->hw_strap2 = GB200NVL_BMC_HW_STRAP2;
+ amc->fmc_model = "mx66u51235f";
+ amc->spi_model = "mx66u51235f";
+ amc->num_cs = 2;
+
+ amc->spi2_model = "mx66u51235f";
+ amc->num_cs2 = 1;
+ amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON;
+ amc->i2c_init = gb200nvl_bmc_i2c_init;
+ mc->default_ram_size = GB200NVL_BMC_RAM_SIZE;
+ aspeed_machine_class_init_cpus_defaults(mc);
+ aspeed_machine_ast2600_class_emmc_init(oc);
+}
+
static void fby35_reset(MachineState *state, ResetType type)
{
AspeedMachineState *bmc = ASPEED_MACHINE(state);
@@ -1878,6 +2149,14 @@ static const TypeInfo aspeed_machine_types[] = {
.parent = TYPE_ASPEED_MACHINE,
.class_init = aspeed_machine_bletchley_class_init,
}, {
+ .name = MACHINE_TYPE_NAME("gb200nvl-bmc"),
+ .parent = TYPE_ASPEED_MACHINE,
+ .class_init = aspeed_machine_gb200nvl_class_init,
+ }, {
+ .name = MACHINE_TYPE_NAME("catalina-bmc"),
+ .parent = TYPE_ASPEED_MACHINE,
+ .class_init = aspeed_machine_catalina_class_init,
+ }, {
.name = MACHINE_TYPE_NAME("fby35-bmc"),
.parent = MACHINE_TYPE_NAME("ast2600-evb"),
.class_init = aspeed_machine_fby35_class_init,
diff --git a/hw/arm/aspeed_eeprom.c b/hw/arm/aspeed_eeprom.c
index daa3d32..8bbbdec 100644
--- a/hw/arm/aspeed_eeprom.c
+++ b/hw/arm/aspeed_eeprom.c
@@ -162,6 +162,25 @@ const uint8_t rainier_bmc_fruid[] = {
0x31, 0x50, 0x46, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
};
+const uint8_t gb200nvl_bmc_fruid[] = {
+ 0x01, 0x00, 0x00, 0x01, 0x0b, 0x00, 0x00, 0xf3, 0x01, 0x0a, 0x19, 0x1f,
+ 0x0f, 0xe6, 0xc6, 0x4e, 0x56, 0x49, 0x44, 0x49, 0x41, 0xc5, 0x50, 0x33,
+ 0x38, 0x30, 0x39, 0xcd, 0x31, 0x35, 0x38, 0x33, 0x33, 0x32, 0x34, 0x38,
+ 0x30, 0x30, 0x31, 0x35, 0x30, 0xd2, 0x36, 0x39, 0x39, 0x2d, 0x31, 0x33,
+ 0x38, 0x30, 0x39, 0x2d, 0x30, 0x34, 0x30, 0x34, 0x2d, 0x36, 0x30, 0x30,
+ 0xc0, 0x01, 0x01, 0xd6, 0x4d, 0x41, 0x43, 0x3a, 0x20, 0x33, 0x43, 0x3a,
+ 0x36, 0x44, 0x3a, 0x36, 0x36, 0x3a, 0x31, 0x34, 0x3a, 0x43, 0x38, 0x3a,
+ 0x37, 0x41, 0xc1, 0x3b, 0x01, 0x09, 0x19, 0xc6, 0x4e, 0x56, 0x49, 0x44,
+ 0x49, 0x41, 0xc9, 0x50, 0x33, 0x38, 0x30, 0x39, 0x2d, 0x42, 0x4d, 0x43,
+ 0xd2, 0x36, 0x39, 0x39, 0x2d, 0x31, 0x33, 0x38, 0x30, 0x39, 0x2d, 0x30,
+ 0x34, 0x30, 0x34, 0x2d, 0x36, 0x30, 0x30, 0xc4, 0x41, 0x45, 0x2e, 0x31,
+ 0xcd, 0x31, 0x35, 0x38, 0x33, 0x33, 0x32, 0x34, 0x38, 0x30, 0x30, 0x31,
+ 0x35, 0x30, 0xc0, 0xc4, 0x76, 0x30, 0x2e, 0x31, 0xc1, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+
+};
+
const size_t tiogapass_bmc_fruid_len = sizeof(tiogapass_bmc_fruid);
const size_t fby35_nic_fruid_len = sizeof(fby35_nic_fruid);
const size_t fby35_bb_fruid_len = sizeof(fby35_bb_fruid);
@@ -169,3 +188,5 @@ const size_t fby35_bmc_fruid_len = sizeof(fby35_bmc_fruid);
const size_t yosemitev2_bmc_fruid_len = sizeof(yosemitev2_bmc_fruid);
const size_t rainier_bb_fruid_len = sizeof(rainier_bb_fruid);
const size_t rainier_bmc_fruid_len = sizeof(rainier_bmc_fruid);
+const size_t gb200nvl_bmc_fruid_len = sizeof(gb200nvl_bmc_fruid);
+
diff --git a/hw/arm/aspeed_eeprom.h b/hw/arm/aspeed_eeprom.h
index f08c16e..3ed9bc1 100644
--- a/hw/arm/aspeed_eeprom.h
+++ b/hw/arm/aspeed_eeprom.h
@@ -26,4 +26,7 @@ extern const size_t rainier_bb_fruid_len;
extern const uint8_t rainier_bmc_fruid[];
extern const size_t rainier_bmc_fruid_len;
+extern const uint8_t gb200nvl_bmc_fruid[];
+extern const size_t gb200nvl_bmc_fruid_len;
+
#endif
diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c
index 3ae26eb..165c0b7 100644
--- a/hw/arm/highbank.c
+++ b/hw/arm/highbank.c
@@ -357,6 +357,7 @@ static void highbank_class_init(ObjectClass *oc, const void *data)
mc->max_cpus = 4;
mc->ignore_memory_transaction_failures = true;
mc->default_ram_id = "highbank.dram";
+ mc->deprecation_reason = "no known users left for this machine";
}
static const TypeInfo highbank_type = {
@@ -381,6 +382,7 @@ static void midway_class_init(ObjectClass *oc, const void *data)
mc->max_cpus = 4;
mc->ignore_memory_transaction_failures = true;
mc->default_ram_id = "highbank.dram";
+ mc->deprecation_reason = "no known users left for this machine";
}
static const TypeInfo midway_type = {
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index eb65bda..14d23e2 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -13,7 +13,7 @@ config SGX
config TDX
bool
select X86_FW_OVMF
- depends on KVM
+ depends on KVM && X86_64
config PC
bool
diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c
index 4930e00..a0ab5ee 100644
--- a/hw/misc/aspeed_scu.c
+++ b/hw/misc/aspeed_scu.c
@@ -91,6 +91,7 @@
#define BMC_DEV_ID TO_REG(0x1A4)
#define AST2600_PROT_KEY TO_REG(0x00)
+#define AST2600_PROT_KEY2 TO_REG(0x10)
#define AST2600_SILICON_REV TO_REG(0x04)
#define AST2600_SILICON_REV2 TO_REG(0x14)
#define AST2600_SYS_RST_CTRL TO_REG(0x40)
@@ -176,6 +177,7 @@
#define AST2700_SCUIO_UARTCLK_GEN TO_REG(0x330)
#define AST2700_SCUIO_HUARTCLK_GEN TO_REG(0x334)
#define AST2700_SCUIO_CLK_DUTY_MEAS_RST TO_REG(0x388)
+#define AST2700_SCUIO_FREQ_CNT_CTL TO_REG(0x3A0)
#define SCU_IO_REGION_SIZE 0x1000
@@ -722,6 +724,8 @@ static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset,
int reg = TO_REG(offset);
/* Truncate here so bitwise operations below behave as expected */
uint32_t data = data64;
+ bool prot_data_state = data == ASPEED_SCU_PROT_KEY;
+ bool unlocked = s->regs[AST2600_PROT_KEY] && s->regs[AST2600_PROT_KEY2];
if (reg >= ASPEED_AST2600_SCU_NR_REGS) {
qemu_log_mask(LOG_GUEST_ERROR,
@@ -730,15 +734,24 @@ static void aspeed_ast2600_scu_write(void *opaque, hwaddr offset,
return;
}
- if (reg > PROT_KEY && !s->regs[PROT_KEY]) {
+ if ((reg != AST2600_PROT_KEY && reg != AST2600_PROT_KEY2) && !unlocked) {
qemu_log_mask(LOG_GUEST_ERROR, "%s: SCU is locked!\n", __func__);
+ return;
}
trace_aspeed_scu_write(offset, size, data);
switch (reg) {
case AST2600_PROT_KEY:
- s->regs[reg] = (data == ASPEED_SCU_PROT_KEY) ? 1 : 0;
+ /*
+ * Writing a value to SCU000 will modify both protection
+ * registers to each protection register individually.
+ */
+ s->regs[AST2600_PROT_KEY] = prot_data_state;
+ s->regs[AST2600_PROT_KEY2] = prot_data_state;
+ return;
+ case AST2600_PROT_KEY2:
+ s->regs[AST2600_PROT_KEY2] = prot_data_state;
return;
case AST2600_HW_STRAP1:
case AST2600_HW_STRAP2:
@@ -1022,6 +1035,10 @@ static void aspeed_ast2700_scuio_write(void *opaque, hwaddr offset,
s->regs[reg - 1] ^= data;
updated = true;
break;
+ case AST2700_SCUIO_FREQ_CNT_CTL:
+ s->regs[reg] = deposit32(s->regs[reg], 6, 1, !!(data & BIT(1)));
+ updated = true;
+ break;
default:
qemu_log_mask(LOG_GUEST_ERROR,
"%s: Unhandled write at offset 0x%" HWADDR_PRIx "\n",
@@ -1066,6 +1083,7 @@ static const uint32_t ast2700_a0_resets_io[ASPEED_AST2700_SCU_NR_REGS] = {
[AST2700_SCUIO_UARTCLK_GEN] = 0x00014506,
[AST2700_SCUIO_HUARTCLK_GEN] = 0x000145c0,
[AST2700_SCUIO_CLK_DUTY_MEAS_RST] = 0x0c9100d2,
+ [AST2700_SCUIO_FREQ_CNT_CTL] = 0x00000080,
};
static void aspeed_2700_scuio_class_init(ObjectClass *klass, const void *data)
diff --git a/hw/misc/aspeed_sdmc.c b/hw/misc/aspeed_sdmc.c
index f04d993..dff7cc3 100644
--- a/hw/misc/aspeed_sdmc.c
+++ b/hw/misc/aspeed_sdmc.c
@@ -570,6 +570,9 @@ static void aspeed_2700_sdmc_reset(DeviceState *dev)
/* Set ram size bit and defaults values */
s->regs[R_MAIN_CONF] = asc->compute_conf(s, 0);
+ /* Skipping dram init */
+ s->regs[R_MAIN_CONTROL] = BIT(16);
+
if (s->unlocked) {
s->regs[R_2700_PROT] = PROT_UNLOCKED;
}
diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c
index 3133fef..d318e6a 100644
--- a/hw/vfio-user/container.c
+++ b/hw/vfio-user/container.c
@@ -13,7 +13,6 @@
#include "hw/vfio-user/container.h"
#include "hw/vfio-user/device.h"
#include "hw/vfio-user/trace.h"
-#include "hw/vfio/vfio-cpr.h"
#include "hw/vfio/vfio-device.h"
#include "hw/vfio/vfio-listener.h"
#include "qapi/error.h"
@@ -225,14 +224,10 @@ vfio_user_container_connect(AddressSpace *as, VFIODevice *vbasedev,
bcontainer = &container->bcontainer;
- if (!vfio_cpr_register_container(bcontainer, errp)) {
- goto free_container_exit;
- }
-
ret = ram_block_uncoordinated_discard_disable(true);
if (ret) {
error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
- goto unregister_container_exit;
+ goto free_container_exit;
}
vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
@@ -261,9 +256,6 @@ listener_release_exit:
enable_discards_exit:
ram_block_uncoordinated_discard_disable(false);
-unregister_container_exit:
- vfio_cpr_unregister_container(bcontainer);
-
free_container_exit:
object_unref(container);
@@ -286,7 +278,6 @@ static void vfio_user_container_disconnect(VFIOUserContainer *container)
vioc->release(bcontainer);
}
- vfio_cpr_unregister_container(bcontainer);
object_unref(container);
vfio_address_space_put(space);
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index 1df4438..7719f24 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -265,7 +265,7 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp)
error:
error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
- g_free(vbasedev->name);
+ vfio_device_free_name(vbasedev);
}
static void vfio_ap_unrealize(DeviceState *dev)
@@ -275,7 +275,7 @@ static void vfio_ap_unrealize(DeviceState *dev)
vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX);
vfio_ap_unregister_irq_notifier(vapdev, VFIO_AP_CFG_CHG_IRQ_INDEX);
vfio_device_detach(&vapdev->vdev);
- g_free(vapdev->vdev.name);
+ vfio_device_free_name(&vapdev->vdev);
}
static const Property vfio_ap_properties[] = {
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index cea9d6e..9560b8d 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -619,7 +619,7 @@ out_io_notifier_err:
out_region_err:
vfio_device_detach(vbasedev);
out_attach_dev_err:
- g_free(vbasedev->name);
+ vfio_device_free_name(vbasedev);
out_unrealize:
if (cdc->unrealize) {
cdc->unrealize(cdev);
@@ -637,7 +637,7 @@ static void vfio_ccw_unrealize(DeviceState *dev)
vfio_ccw_unregister_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX);
vfio_ccw_put_region(vcdev);
vfio_device_detach(&vcdev->vdev);
- g_free(vcdev->vdev.name);
+ vfio_device_free_name(&vcdev->vdev);
if (cdc->unrealize) {
cdc->unrealize(cdev);
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index d834bd4..5630497 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -78,7 +78,16 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer,
void *vaddr, bool readonly, MemoryRegion *mr)
{
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ RAMBlock *rb = mr->ram_block;
+ int mfd = rb ? qemu_ram_get_fd(rb) : -1;
+ if (mfd >= 0 && vioc->dma_map_file) {
+ unsigned long start = vaddr - qemu_ram_get_host_addr(rb);
+ unsigned long offset = qemu_ram_get_fd_offset(rb);
+
+ return vioc->dma_map_file(bcontainer, iova, size, mfd, start + offset,
+ readonly);
+ }
g_assert(vioc->dma_map);
return vioc->dma_map(bcontainer, iova, size, vaddr, readonly, mr);
}
diff --git a/hw/vfio/cpr-iommufd.c b/hw/vfio/cpr-iommufd.c
new file mode 100644
index 0000000..148a06d
--- /dev/null
+++ b/hw/vfio/cpr-iommufd.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 2024-2025 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "hw/vfio/vfio-cpr.h"
+#include "hw/vfio/vfio-device.h"
+#include "migration/blocker.h"
+#include "migration/cpr.h"
+#include "migration/migration.h"
+#include "migration/vmstate.h"
+#include "system/iommufd.h"
+#include "vfio-iommufd.h"
+#include "trace.h"
+
+typedef struct CprVFIODevice {
+ char *name;
+ unsigned int namelen;
+ uint32_t ioas_id;
+ int devid;
+ uint32_t hwpt_id;
+ QLIST_ENTRY(CprVFIODevice) next;
+} CprVFIODevice;
+
+static const VMStateDescription vmstate_cpr_vfio_device = {
+ .name = "cpr vfio device",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(namelen, CprVFIODevice),
+ VMSTATE_VBUFFER_ALLOC_UINT32(name, CprVFIODevice, 0, NULL, namelen),
+ VMSTATE_INT32(devid, CprVFIODevice),
+ VMSTATE_UINT32(ioas_id, CprVFIODevice),
+ VMSTATE_UINT32(hwpt_id, CprVFIODevice),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+const VMStateDescription vmstate_cpr_vfio_devices = {
+ .name = CPR_STATE "/vfio devices",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (const VMStateField[]){
+ VMSTATE_QLIST_V(vfio_devices, CprState, 1, vmstate_cpr_vfio_device,
+ CprVFIODevice, next),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void vfio_cpr_save_device(VFIODevice *vbasedev)
+{
+ CprVFIODevice *elem = g_new0(CprVFIODevice, 1);
+
+ elem->name = g_strdup(vbasedev->name);
+ elem->namelen = strlen(vbasedev->name) + 1;
+ elem->ioas_id = vbasedev->cpr.ioas_id;
+ elem->devid = vbasedev->devid;
+ elem->hwpt_id = vbasedev->cpr.hwpt_id;
+ QLIST_INSERT_HEAD(&cpr_state.vfio_devices, elem, next);
+}
+
+static CprVFIODevice *find_device(const char *name)
+{
+ CprVFIODeviceList *head = &cpr_state.vfio_devices;
+ CprVFIODevice *elem;
+
+ QLIST_FOREACH(elem, head, next) {
+ if (!strcmp(elem->name, name)) {
+ return elem;
+ }
+ }
+ return NULL;
+}
+
+static void vfio_cpr_delete_device(const char *name)
+{
+ CprVFIODevice *elem = find_device(name);
+
+ if (elem) {
+ QLIST_REMOVE(elem, next);
+ g_free(elem->name);
+ g_free(elem);
+ }
+}
+
+static bool vfio_cpr_find_device(VFIODevice *vbasedev)
+{
+ CprVFIODevice *elem = find_device(vbasedev->name);
+
+ if (elem) {
+ vbasedev->cpr.ioas_id = elem->ioas_id;
+ vbasedev->devid = elem->devid;
+ vbasedev->cpr.hwpt_id = elem->hwpt_id;
+ trace_vfio_cpr_find_device(elem->ioas_id, elem->devid, elem->hwpt_id);
+ return true;
+ }
+ return false;
+}
+
+static bool vfio_cpr_supported(IOMMUFDBackend *be, Error **errp)
+{
+ if (!iommufd_change_process_capable(be)) {
+ if (errp) {
+ error_setg(errp, "vfio iommufd backend does not support "
+ "IOMMU_IOAS_CHANGE_PROCESS");
+ }
+ return false;
+ }
+ return true;
+}
+
+static int iommufd_cpr_pre_save(void *opaque)
+{
+ IOMMUFDBackend *be = opaque;
+
+ /*
+ * The process has not changed yet, but proactively try the ioctl,
+ * and it will fail if any DMA mappings are not supported.
+ */
+ if (!iommufd_change_process_capable(be)) {
+ error_report("some memory regions do not support "
+ "IOMMU_IOAS_CHANGE_PROCESS");
+ return -1;
+ }
+ return 0;
+}
+
+static int iommufd_cpr_post_load(void *opaque, int version_id)
+{
+ IOMMUFDBackend *be = opaque;
+ Error *local_err = NULL;
+
+ if (!iommufd_change_process(be, &local_err)) {
+ error_report_err(local_err);
+ return -1;
+ }
+ return 0;
+}
+
+static const VMStateDescription iommufd_cpr_vmstate = {
+ .name = "iommufd",
+ .version_id = 0,
+ .minimum_version_id = 0,
+ .pre_save = iommufd_cpr_pre_save,
+ .post_load = iommufd_cpr_post_load,
+ .needed = cpr_incoming_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+bool vfio_iommufd_cpr_register_iommufd(IOMMUFDBackend *be, Error **errp)
+{
+ Error **cpr_blocker = &be->cpr_blocker;
+
+ if (!vfio_cpr_supported(be, cpr_blocker)) {
+ return migrate_add_blocker_modes(cpr_blocker, errp,
+ MIG_MODE_CPR_TRANSFER, -1) == 0;
+ }
+
+ vmstate_register(NULL, -1, &iommufd_cpr_vmstate, be);
+
+ return true;
+}
+
+void vfio_iommufd_cpr_unregister_iommufd(IOMMUFDBackend *be)
+{
+ vmstate_unregister(NULL, &iommufd_cpr_vmstate, be);
+ migrate_del_blocker(&be->cpr_blocker);
+}
+
+bool vfio_iommufd_cpr_register_container(VFIOIOMMUFDContainer *container,
+ Error **errp)
+{
+ VFIOContainerBase *bcontainer = &container->bcontainer;
+
+ migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier,
+ vfio_cpr_reboot_notifier,
+ MIG_MODE_CPR_REBOOT);
+
+ vfio_cpr_add_kvm_notifier();
+
+ return true;
+}
+
+void vfio_iommufd_cpr_unregister_container(VFIOIOMMUFDContainer *container)
+{
+ VFIOContainerBase *bcontainer = &container->bcontainer;
+
+ migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
+}
+
+void vfio_iommufd_cpr_register_device(VFIODevice *vbasedev)
+{
+ if (!cpr_is_incoming()) {
+ /*
+ * Beware fd may have already been saved by vfio_device_set_fd,
+ * so call resave to avoid a duplicate entry.
+ */
+ cpr_resave_fd(vbasedev->name, 0, vbasedev->fd);
+ vfio_cpr_save_device(vbasedev);
+ }
+}
+
+void vfio_iommufd_cpr_unregister_device(VFIODevice *vbasedev)
+{
+ cpr_delete_fd(vbasedev->name, 0);
+ vfio_cpr_delete_device(vbasedev->name);
+}
+
+void vfio_cpr_load_device(VFIODevice *vbasedev)
+{
+ if (cpr_is_incoming()) {
+ bool ret = vfio_cpr_find_device(vbasedev);
+ g_assert(ret);
+
+ if (vbasedev->fd < 0) {
+ vbasedev->fd = cpr_find_fd(vbasedev->name, 0);
+ }
+ }
+}
diff --git a/hw/vfio/cpr-legacy.c b/hw/vfio/cpr-legacy.c
index a84c324..553b203 100644
--- a/hw/vfio/cpr-legacy.c
+++ b/hw/vfio/cpr-legacy.c
@@ -99,20 +99,21 @@ static int vfio_container_post_load(void *opaque, int version_id)
{
VFIOContainer *container = opaque;
VFIOContainerBase *bcontainer = &container->bcontainer;
- VFIOGroup *group;
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ dma_map_fn saved_dma_map = vioc->dma_map;
Error *local_err = NULL;
+ /* During incoming CPR, divert calls to dma_map. */
+ vioc->dma_map = vfio_legacy_cpr_dma_map;
+
if (!vfio_listener_register(bcontainer, &local_err)) {
error_report_err(local_err);
return -1;
}
- QLIST_FOREACH(group, &container->group_list, container_next) {
- VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ /* Restore original dma_map function */
+ vioc->dma_map = saved_dma_map;
- /* Restore original dma_map function */
- vioc->dma_map = container->cpr.saved_dma_map;
- }
return 0;
}
@@ -148,6 +149,7 @@ static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier,
*/
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ dma_map_fn saved_dma_map = vioc->dma_map;
vioc->dma_map = vfio_legacy_cpr_dma_map;
container->cpr.remap_listener = (MemoryListener) {
@@ -158,7 +160,7 @@ static int vfio_cpr_fail_notifier(NotifierWithReturn *notifier,
bcontainer->space->as);
memory_listener_unregister(&container->cpr.remap_listener);
container->cpr.vaddr_unmapped = false;
- vioc->dma_map = container->cpr.saved_dma_map;
+ vioc->dma_map = saved_dma_map;
}
return 0;
}
@@ -177,14 +179,9 @@ bool vfio_legacy_cpr_register_container(VFIOContainer *container, Error **errp)
MIG_MODE_CPR_TRANSFER, -1) == 0;
}
- vmstate_register(NULL, -1, &vfio_container_vmstate, container);
+ vfio_cpr_add_kvm_notifier();
- /* During incoming CPR, divert calls to dma_map. */
- if (cpr_is_incoming()) {
- VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
- container->cpr.saved_dma_map = vioc->dma_map;
- vioc->dma_map = vfio_legacy_cpr_dma_map;
- }
+ vmstate_register(NULL, -1, &vfio_container_vmstate, container);
migration_add_notifier_mode(&container->cpr.transfer_notifier,
vfio_cpr_fail_notifier,
diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c
index fdbb58e..af0f12a 100644
--- a/hw/vfio/cpr.c
+++ b/hw/vfio/cpr.c
@@ -9,6 +9,8 @@
#include "hw/vfio/vfio-device.h"
#include "hw/vfio/vfio-cpr.h"
#include "hw/vfio/pci.h"
+#include "hw/pci/msix.h"
+#include "hw/pci/msi.h"
#include "migration/cpr.h"
#include "qapi/error.h"
#include "system/runstate.h"
@@ -27,17 +29,67 @@ int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier,
return 0;
}
-bool vfio_cpr_register_container(VFIOContainerBase *bcontainer, Error **errp)
+#define STRDUP_VECTOR_FD_NAME(vdev, name) \
+ g_strdup_printf("%s_%s", (vdev)->vbasedev.name, (name))
+
+void vfio_cpr_save_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr,
+ int fd)
+{
+ g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
+ cpr_save_fd(fdname, nr, fd);
+}
+
+int vfio_cpr_load_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr)
+{
+ g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
+ return cpr_find_fd(fdname, nr);
+}
+
+void vfio_cpr_delete_vector_fd(VFIOPCIDevice *vdev, const char *name, int nr)
{
- migration_add_notifier_mode(&bcontainer->cpr_reboot_notifier,
- vfio_cpr_reboot_notifier,
- MIG_MODE_CPR_REBOOT);
- return true;
+ g_autofree char *fdname = STRDUP_VECTOR_FD_NAME(vdev, name);
+ cpr_delete_fd(fdname, nr);
}
-void vfio_cpr_unregister_container(VFIOContainerBase *bcontainer)
+static void vfio_cpr_claim_vectors(VFIOPCIDevice *vdev, int nr_vectors,
+ bool msix)
{
- migration_remove_notifier(&bcontainer->cpr_reboot_notifier);
+ int i, fd;
+ bool pending = false;
+ PCIDevice *pdev = &vdev->pdev;
+
+ vdev->nr_vectors = nr_vectors;
+ vdev->msi_vectors = g_new0(VFIOMSIVector, nr_vectors);
+ vdev->interrupt = msix ? VFIO_INT_MSIX : VFIO_INT_MSI;
+
+ vfio_pci_prepare_kvm_msi_virq_batch(vdev);
+
+ for (i = 0; i < nr_vectors; i++) {
+ VFIOMSIVector *vector = &vdev->msi_vectors[i];
+
+ fd = vfio_cpr_load_vector_fd(vdev, "interrupt", i);
+ if (fd >= 0) {
+ vfio_pci_vector_init(vdev, i);
+ vfio_pci_msi_set_handler(vdev, i);
+ }
+
+ if (vfio_cpr_load_vector_fd(vdev, "kvm_interrupt", i) >= 0) {
+ vfio_pci_add_kvm_msi_virq(vdev, vector, i, msix);
+ } else {
+ vdev->msi_vectors[i].virq = -1;
+ }
+
+ if (msix && msix_is_pending(pdev, i) && msix_is_masked(pdev, i)) {
+ set_bit(i, vdev->msix->pending);
+ pending = true;
+ }
+ }
+
+ vfio_pci_commit_kvm_msi_virq_batch(vdev);
+
+ if (msix) {
+ memory_region_set_enabled(&pdev->msix_pba_mmio, pending);
+ }
}
/*
@@ -58,13 +110,91 @@ static int vfio_cpr_pci_pre_load(void *opaque)
return 0;
}
+static int vfio_cpr_pci_post_load(void *opaque, int version_id)
+{
+ VFIOPCIDevice *vdev = opaque;
+ PCIDevice *pdev = &vdev->pdev;
+ int nr_vectors;
+
+ if (msix_enabled(pdev)) {
+ vfio_pci_msix_set_notifiers(vdev);
+ nr_vectors = vdev->msix->entries;
+ vfio_cpr_claim_vectors(vdev, nr_vectors, true);
+
+ } else if (msi_enabled(pdev)) {
+ nr_vectors = msi_nr_vectors_allocated(pdev);
+ vfio_cpr_claim_vectors(vdev, nr_vectors, false);
+
+ } else if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) {
+ Error *local_err = NULL;
+ if (!vfio_pci_intx_enable(vdev, &local_err)) {
+ error_report_err(local_err);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static bool pci_msix_present(void *opaque, int version_id)
+{
+ PCIDevice *pdev = opaque;
+
+ return msix_present(pdev);
+}
+
+static const VMStateDescription vfio_intx_vmstate = {
+ .name = "vfio-cpr-intx",
+ .version_id = 0,
+ .minimum_version_id = 0,
+ .fields = (VMStateField[]) {
+ VMSTATE_BOOL(pending, VFIOINTx),
+ VMSTATE_UINT32(route.mode, VFIOINTx),
+ VMSTATE_INT32(route.irq, VFIOINTx),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+#define VMSTATE_VFIO_INTX(_field, _state) { \
+ .name = (stringify(_field)), \
+ .size = sizeof(VFIOINTx), \
+ .vmsd = &vfio_intx_vmstate, \
+ .flags = VMS_STRUCT, \
+ .offset = vmstate_offset_value(_state, _field, VFIOINTx), \
+}
+
const VMStateDescription vfio_cpr_pci_vmstate = {
.name = "vfio-cpr-pci",
.version_id = 0,
.minimum_version_id = 0,
.pre_load = vfio_cpr_pci_pre_load,
+ .post_load = vfio_cpr_pci_post_load,
.needed = cpr_incoming_needed,
.fields = (VMStateField[]) {
+ VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice),
+ VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, pci_msix_present),
+ VMSTATE_VFIO_INTX(intx, VFIOPCIDevice),
VMSTATE_END_OF_LIST()
}
};
+
+static NotifierWithReturn kvm_close_notifier;
+
+static int vfio_cpr_kvm_close_notifier(NotifierWithReturn *notifier,
+ MigrationEvent *e,
+ Error **errp)
+{
+ if (e->type == MIG_EVENT_PRECOPY_DONE) {
+ vfio_kvm_device_close();
+ }
+ return 0;
+}
+
+void vfio_cpr_add_kvm_notifier(void)
+{
+ if (!kvm_close_notifier.notify) {
+ migration_add_notifier_mode(&kvm_close_notifier,
+ vfio_cpr_kvm_close_notifier,
+ MIG_MODE_CPR_TRANSFER);
+ }
+}
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index d91c695..96cf214 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -28,6 +28,8 @@
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/units.h"
+#include "migration/cpr.h"
+#include "migration/blocker.h"
#include "monitor/monitor.h"
#include "vfio-helpers.h"
@@ -316,28 +318,40 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error **errp)
error_setg(errp, "Use FD passing only with iommufd backend");
return false;
}
- /*
- * Give a name with fd so any function printing out vbasedev->name
- * will not break.
- */
if (!vbasedev->name) {
- vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
+
+ if (vbasedev->dev->id) {
+ vbasedev->name = g_strdup(vbasedev->dev->id);
+ return true;
+ } else {
+ /*
+ * Assign a name so any function printing it will not break.
+ * The fd number changes across processes, so this cannot be
+ * used as an invariant name for CPR.
+ */
+ vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
+ error_setg(&vbasedev->cpr.id_blocker,
+ "vfio device with fd=%d needs an id property",
+ vbasedev->fd);
+ return migrate_add_blocker_modes(&vbasedev->cpr.id_blocker,
+ errp, MIG_MODE_CPR_TRANSFER,
+ -1) == 0;
+ }
}
}
return true;
}
-void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
+void vfio_device_free_name(VFIODevice *vbasedev)
{
- ERRP_GUARD();
- int fd = monitor_fd_param(monitor_cur(), str, errp);
+ g_clear_pointer(&vbasedev->name, g_free);
+ migrate_del_blocker(&vbasedev->cpr.id_blocker);
+}
- if (fd < 0) {
- error_prepend(errp, "Could not parse remote object fd %s:", str);
- return;
- }
- vbasedev->fd = fd;
+void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
+{
+ vbasedev->fd = cpr_get_fd_param(vbasedev->dev->id, str, 0, errp);
}
static VFIODeviceIOOps vfio_device_io_ops_ioctl;
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index d0dbab1..9a5f621 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -117,6 +117,17 @@ bool vfio_get_info_dma_avail(struct vfio_iommu_type1_info *info,
int vfio_kvm_device_fd = -1;
#endif
+void vfio_kvm_device_close(void)
+{
+#ifdef CONFIG_KVM
+ kvm_close();
+ if (vfio_kvm_device_fd != -1) {
+ close(vfio_kvm_device_fd);
+ vfio_kvm_device_fd = -1;
+ }
+#endif
+}
+
int vfio_kvm_device_add_fd(int fd, Error **errp)
{
#ifdef CONFIG_KVM
diff --git a/hw/vfio/iommufd-stubs.c b/hw/vfio/iommufd-stubs.c
new file mode 100644
index 0000000..0be5276
--- /dev/null
+++ b/hw/vfio/iommufd-stubs.c
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2025 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "migration/cpr.h"
+#include "migration/vmstate.h"
+
+const VMStateDescription vmstate_cpr_vfio_devices = {
+ .name = CPR_STATE "/vfio devices",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (const VMStateField[]){
+ VMSTATE_END_OF_LIST()
+ }
+};
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index d3efef7..48c590b 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -25,6 +25,7 @@
#include "system/reset.h"
#include "qemu/cutils.h"
#include "qemu/chardev_open.h"
+#include "migration/cpr.h"
#include "pci.h"
#include "vfio-iommufd.h"
#include "vfio-helpers.h"
@@ -45,6 +46,18 @@ static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
iova, size, vaddr, readonly);
}
+static int iommufd_cdev_map_file(const VFIOContainerBase *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ int fd, unsigned long start, bool readonly)
+{
+ const VFIOIOMMUFDContainer *container =
+ container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
+
+ return iommufd_backend_map_file_dma(container->be,
+ container->ioas_id,
+ iova, size, fd, start, readonly);
+}
+
static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb, bool unmap_all)
@@ -109,6 +122,10 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp)
goto err_kvm_device_add;
}
+ if (cpr_is_incoming()) {
+ goto skip_bind;
+ }
+
/* Bind device to iommufd */
bind.iommufd = iommufd->fd;
if (ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind)) {
@@ -120,6 +137,8 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice *vbasedev, Error **errp)
vbasedev->devid = bind.out_devid;
trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name,
vbasedev->fd, vbasedev->devid);
+
+skip_bind:
return true;
err_bind:
iommufd_cdev_kvm_device_del(vbasedev);
@@ -313,7 +332,14 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
/* Try to find a domain */
QLIST_FOREACH(hwpt, &container->hwpt_list, next) {
- ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
+ if (!cpr_is_incoming()) {
+ ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
+ } else if (vbasedev->cpr.hwpt_id == hwpt->hwpt_id) {
+ ret = 0;
+ } else {
+ continue;
+ }
+
if (ret) {
/* -EINVAL means the domain is incompatible with the device. */
if (ret == -EINVAL) {
@@ -330,6 +356,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
return false;
} else {
vbasedev->hwpt = hwpt;
+ vbasedev->cpr.hwpt_id = hwpt->hwpt_id;
QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
return true;
@@ -352,6 +379,11 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
}
+ if (cpr_is_incoming()) {
+ hwpt_id = vbasedev->cpr.hwpt_id;
+ goto skip_alloc;
+ }
+
if (!iommufd_backend_alloc_hwpt(iommufd, vbasedev->devid,
container->ioas_id, flags,
IOMMU_HWPT_DATA_NONE, 0, NULL,
@@ -359,19 +391,20 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
return false;
}
+ ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt_id, errp);
+ if (ret) {
+ iommufd_backend_free_id(container->be, hwpt_id);
+ return false;
+ }
+
+skip_alloc:
hwpt = g_malloc0(sizeof(*hwpt));
hwpt->hwpt_id = hwpt_id;
hwpt->hwpt_flags = flags;
QLIST_INIT(&hwpt->device_list);
- ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
- if (ret) {
- iommufd_backend_free_id(container->be, hwpt->hwpt_id);
- g_free(hwpt);
- return false;
- }
-
vbasedev->hwpt = hwpt;
+ vbasedev->cpr.hwpt_id = hwpt->hwpt_id;
vbasedev->iommu_dirty_tracking = iommufd_hwpt_dirty_tracking(hwpt);
QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
@@ -409,7 +442,9 @@ static bool iommufd_cdev_attach_container(VFIODevice *vbasedev,
return iommufd_cdev_autodomains_get(vbasedev, container, errp);
}
- return !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
+ /* If CPR, we are already attached to ioas_id. */
+ return cpr_is_incoming() ||
+ !iommufd_cdev_attach_ioas_hwpt(vbasedev, container->ioas_id, errp);
}
static void iommufd_cdev_detach_container(VFIODevice *vbasedev,
@@ -434,7 +469,7 @@ static void iommufd_cdev_container_destroy(VFIOIOMMUFDContainer *container)
if (!QLIST_EMPTY(&bcontainer->device_list)) {
return;
}
- vfio_cpr_unregister_container(bcontainer);
+ vfio_iommufd_cpr_unregister_container(container);
vfio_listener_unregister(bcontainer);
iommufd_backend_free_id(container->be, container->ioas_id);
object_unref(container);
@@ -498,11 +533,14 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
VFIOAddressSpace *space;
struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
int ret, devfd;
+ bool res;
uint32_t ioas_id;
Error *err = NULL;
const VFIOIOMMUClass *iommufd_vioc =
VFIO_IOMMU_CLASS(object_class_by_name(TYPE_VFIO_IOMMU_IOMMUFD));
+ vfio_cpr_load_device(vbasedev);
+
if (vbasedev->fd < 0) {
devfd = iommufd_cdev_getfd(vbasedev->sysfsdev, errp);
if (devfd < 0) {
@@ -526,7 +564,16 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
vbasedev->iommufd != container->be) {
continue;
}
- if (!iommufd_cdev_attach_container(vbasedev, container, &err)) {
+
+ if (!cpr_is_incoming()) {
+ res = iommufd_cdev_attach_container(vbasedev, container, &err);
+ } else if (vbasedev->cpr.ioas_id == container->ioas_id) {
+ res = true;
+ } else {
+ continue;
+ }
+
+ if (!res) {
const char *msg = error_get_pretty(err);
trace_iommufd_cdev_fail_attach_existing_container(msg);
@@ -543,6 +590,11 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
}
}
+ if (cpr_is_incoming()) {
+ ioas_id = vbasedev->cpr.ioas_id;
+ goto skip_ioas_alloc;
+ }
+
/* Need to allocate a new dedicated container */
if (!iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp)) {
goto err_alloc_ioas;
@@ -550,10 +602,12 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id);
+skip_ioas_alloc:
container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD));
container->be = vbasedev->iommufd;
container->ioas_id = ioas_id;
QLIST_INIT(&container->hwpt_list);
+ vbasedev->cpr.ioas_id = ioas_id;
bcontainer = &container->bcontainer;
vfio_address_space_insert(space, bcontainer);
@@ -580,7 +634,7 @@ static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
goto err_listener_register;
}
- if (!vfio_cpr_register_container(bcontainer, errp)) {
+ if (!vfio_iommufd_cpr_register_container(container, errp)) {
goto err_listener_register;
}
@@ -611,6 +665,7 @@ found_container:
}
vfio_device_prepare(vbasedev, bcontainer, &dev_info);
+ vfio_iommufd_cpr_register_device(vbasedev);
trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs,
vbasedev->num_regions, vbasedev->flags);
@@ -648,6 +703,7 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev)
iommufd_cdev_container_destroy(container);
vfio_address_space_put(space);
+ vfio_iommufd_cpr_unregister_device(vbasedev);
iommufd_cdev_unbind_and_disconnect(vbasedev);
close(vbasedev->fd);
}
@@ -807,6 +863,7 @@ static void vfio_iommu_iommufd_class_init(ObjectClass *klass, const void *data)
VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
vioc->dma_map = iommufd_cdev_map;
+ vioc->dma_map_file = iommufd_cdev_map_file;
vioc->dma_unmap = iommufd_cdev_unmap;
vioc->attach_device = iommufd_cdev_attach;
vioc->detach_device = iommufd_cdev_detach;
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
index 63ea393..bfaf6be 100644
--- a/hw/vfio/meson.build
+++ b/hw/vfio/meson.build
@@ -31,7 +31,9 @@ system_ss.add(when: 'CONFIG_VFIO', if_true: files(
))
system_ss.add(when: ['CONFIG_VFIO', 'CONFIG_IOMMUFD'], if_true: files(
'iommufd.c',
+ 'cpr-iommufd.c',
))
+system_ss.add(when: 'CONFIG_IOMMUFD', if_false: files('iommufd-stubs.c'))
system_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
'display.c',
))
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index fa25bde..1093b28 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -29,6 +29,7 @@
#include "hw/pci/pci_bridge.h"
#include "hw/qdev-properties.h"
#include "hw/qdev-properties-system.h"
+#include "hw/vfio/vfio-cpr.h"
#include "migration/vmstate.h"
#include "migration/cpr.h"
#include "qobject/qdict.h"
@@ -57,20 +58,33 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
static void vfio_msi_disable_common(VFIOPCIDevice *vdev);
+/* Create new or reuse existing eventfd */
static bool vfio_notifier_init(VFIOPCIDevice *vdev, EventNotifier *e,
const char *name, int nr, Error **errp)
{
- int ret = event_notifier_init(e, 0);
+ int fd, ret;
+ fd = vfio_cpr_load_vector_fd(vdev, name, nr);
+ if (fd >= 0) {
+ event_notifier_init_fd(e, fd);
+ return true;
+ }
+
+ ret = event_notifier_init(e, 0);
if (ret) {
error_setg_errno(errp, -ret, "vfio_notifier_init %s failed", name);
+ return false;
}
- return !ret;
+
+ fd = event_notifier_get_fd(e);
+ vfio_cpr_save_vector_fd(vdev, name, nr, fd);
+ return true;
}
static void vfio_notifier_cleanup(VFIOPCIDevice *vdev, EventNotifier *e,
const char *name, int nr)
{
+ vfio_cpr_delete_vector_fd(vdev, name, nr);
event_notifier_cleanup(e);
}
@@ -196,6 +210,36 @@ fail:
#endif
}
+static bool vfio_cpr_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp)
+{
+#ifdef CONFIG_KVM
+ if (vdev->no_kvm_intx || !kvm_irqfds_enabled() ||
+ vdev->intx.route.mode != PCI_INTX_ENABLED ||
+ !kvm_resamplefds_enabled()) {
+ return true;
+ }
+
+ if (!vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0, errp)) {
+ return false;
+ }
+
+ if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
+ &vdev->intx.interrupt,
+ &vdev->intx.unmask,
+ vdev->intx.route.irq)) {
+ error_setg_errno(errp, errno, "failed to setup resample irqfd");
+ vfio_notifier_cleanup(vdev, &vdev->intx.unmask, "intx-unmask", 0);
+ return false;
+ }
+
+ vdev->intx.kvm_accel = true;
+ trace_vfio_intx_enable_kvm(vdev->vbasedev.name);
+ return true;
+#else
+ return true;
+#endif
+}
+
static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev)
{
#ifdef CONFIG_KVM
@@ -291,7 +335,13 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
return true;
}
- vfio_disable_interrupts(vdev);
+ /*
+ * Do not alter interrupt state during vfio_realize and cpr load.
+ * The incoming state is cleared thereafter.
+ */
+ if (!cpr_is_incoming()) {
+ vfio_disable_interrupts(vdev);
+ }
vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */
pci_config_set_interrupt_pin(vdev->pdev.config, pin);
@@ -314,6 +364,14 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
fd = event_notifier_get_fd(&vdev->intx.interrupt);
qemu_set_fd_handler(fd, vfio_intx_interrupt, NULL, vdev);
+
+ if (cpr_is_incoming()) {
+ if (!vfio_cpr_intx_enable_kvm(vdev, &err)) {
+ warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
+ }
+ goto skip_signaling;
+ }
+
if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) {
qemu_set_fd_handler(fd, NULL, NULL, vdev);
@@ -325,6 +383,7 @@ static bool vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
+skip_signaling:
vdev->interrupt = VFIO_INT_INTx;
trace_vfio_intx_enable(vdev->vbasedev.name);
@@ -394,6 +453,14 @@ static void vfio_msi_interrupt(void *opaque)
notify(&vdev->pdev, nr);
}
+void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr)
+{
+ VFIOMSIVector *vector = &vdev->msi_vectors[nr];
+ int fd = event_notifier_get_fd(&vector->interrupt);
+
+ qemu_set_fd_handler(fd, vfio_msi_interrupt, NULL, vector);
+}
+
/*
* Get MSI-X enabled, but no vector enabled, by setting vector 0 with an invalid
* fd to kernel.
@@ -656,6 +723,15 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
static int vfio_msix_vector_use(PCIDevice *pdev,
unsigned int nr, MSIMessage msg)
{
+ /*
+ * Ignore the callback from msix_set_vector_notifiers during resume.
+ * The necessary subset of these actions is called from
+ * vfio_cpr_claim_vectors during post load.
+ */
+ if (cpr_is_incoming()) {
+ return 0;
+ }
+
return vfio_msix_vector_do_use(pdev, nr, &msg, vfio_msi_interrupt);
}
@@ -686,6 +762,12 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
}
}
+void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev)
+{
+ msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
+ vfio_msix_vector_release, NULL);
+}
+
void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev)
{
assert(!vdev->defer_kvm_irq_routing);
@@ -2914,7 +2996,7 @@ void vfio_pci_put_device(VFIOPCIDevice *vdev)
vfio_device_detach(&vdev->vbasedev);
- g_free(vdev->vbasedev.name);
+ vfio_device_free_name(&vdev->vbasedev);
g_free(vdev->msix);
}
@@ -2965,6 +3047,11 @@ void vfio_pci_register_err_notifier(VFIOPCIDevice *vdev)
fd = event_notifier_get_fd(&vdev->err_notifier);
qemu_set_fd_handler(fd, vfio_err_notifier_handler, NULL, vdev);
+ /* Do not alter irq_signaling during vfio_realize for cpr */
+ if (cpr_is_incoming()) {
+ return;
+ }
+
if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_ERR_IRQ_INDEX, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
@@ -3032,6 +3119,12 @@ void vfio_pci_register_req_notifier(VFIOPCIDevice *vdev)
fd = event_notifier_get_fd(&vdev->req_notifier);
qemu_set_fd_handler(fd, vfio_req_notifier_handler, NULL, vdev);
+ /* Do not alter irq_signaling during vfio_realize for cpr */
+ if (cpr_is_incoming()) {
+ vdev->req_enabled = true;
+ return;
+ }
+
if (!vfio_device_irq_set_signaling(&vdev->vbasedev, VFIO_PCI_REQ_IRQ_INDEX, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, fd, &err)) {
error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
@@ -3189,7 +3282,13 @@ bool vfio_pci_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
vfio_intx_routing_notifier);
vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
- if (!vfio_intx_enable(vdev, errp)) {
+
+ /*
+ * During CPR, do not call vfio_intx_enable at this time. Instead,
+ * call it from vfio_pci_post_load after the intx routing data has
+ * been loaded from vmstate.
+ */
+ if (!cpr_is_incoming() && !vfio_intx_enable(vdev, errp)) {
timer_free(vdev->intx.mmap_timer);
pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 5ba7330..495fae7 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -218,6 +218,8 @@ void vfio_pci_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector,
void vfio_pci_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev);
void vfio_pci_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev);
bool vfio_pci_intx_enable(VFIOPCIDevice *vdev, Error **errp);
+void vfio_pci_msix_set_notifiers(VFIOPCIDevice *vdev);
+void vfio_pci_msi_set_handler(VFIOPCIDevice *vdev, int nr);
uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
void vfio_pci_write_config(PCIDevice *pdev,
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index 9a21f2e..5c1795a 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -530,7 +530,7 @@ static bool vfio_base_device_init(VFIODevice *vbasedev, Error **errp)
{
/* @fd takes precedence over @sysfsdev which takes precedence over @host */
if (vbasedev->fd < 0 && vbasedev->sysfsdev) {
- g_free(vbasedev->name);
+ vfio_device_free_name(vbasedev);
vbasedev->name = g_path_get_basename(vbasedev->sysfsdev);
} else if (vbasedev->fd < 0) {
if (!vbasedev->name || strchr(vbasedev->name, '/')) {
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index e1728c4..8ec0ad0 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -197,6 +197,9 @@ iommufd_cdev_alloc_ioas(int iommufd, int ioas_id) " [iommufd=%d] new IOMMUFD con
iommufd_cdev_device_info(char *name, int devfd, int num_irqs, int num_regions, int flags) " %s (%d) num_irqs=%d num_regions=%d flags=%d"
iommufd_cdev_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d"
+# cpr-iommufd.c
+vfio_cpr_find_device(uint32_t ioas_id, int devid, uint32_t hwpt_id) "ioas_id %u, devid %d, hwpt_id %u"
+
# device.c
vfio_device_get_region_info_type(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%08x"
vfio_device_reset_handler(void) ""