diff options
Diffstat (limited to 'include/hw')
45 files changed, 729 insertions, 99 deletions
diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h index b12bf61..a2e22bd 100644 --- a/include/hw/arm/boot.h +++ b/include/hw/arm/boot.h @@ -132,6 +132,9 @@ struct arm_boot_info { bool secure_board_setup; arm_endianness endianness; + + /* CPU having load the kernel and that should be the first to boot. */ + ARMCPU *primary_cpu; }; /** diff --git a/include/hw/arm/npcm8xx.h b/include/hw/arm/npcm8xx.h index 3436abf..a8377db 100644 --- a/include/hw/arm/npcm8xx.h +++ b/include/hw/arm/npcm8xx.h @@ -28,7 +28,8 @@ #include "hw/misc/npcm7xx_mft.h" #include "hw/misc/npcm7xx_pwm.h" #include "hw/misc/npcm7xx_rng.h" -#include "hw/net/npcm7xx_emc.h" +#include "hw/net/npcm_gmac.h" +#include "hw/net/npcm_pcs.h" #include "hw/nvram/npcm7xx_otp.h" #include "hw/sd/npcm7xx_sdhci.h" #include "hw/timer/npcm7xx_timer.h" @@ -99,6 +100,8 @@ struct NPCM8xxState { EHCISysBusState ehci[2]; OHCISysBusState ohci[2]; NPCM7xxFIUState fiu[3]; + NPCMGMACState gmac[4]; + NPCMPCSState pcs; NPCM7xxSDHCIState mmc; NPCMPSPIState pspi; }; diff --git a/include/hw/block/flash.h b/include/hw/block/flash.h index 5fd67f5..3671f01 100644 --- a/include/hw/block/flash.h +++ b/include/hw/block/flash.h @@ -44,24 +44,6 @@ PFlashCFI02 *pflash_cfi02_register(hwaddr base, uint16_t unlock_addr1, int be); -/* nand.c */ -DeviceState *nand_init(BlockBackend *blk, int manf_id, int chip_id); -void nand_setpins(DeviceState *dev, uint8_t cle, uint8_t ale, - uint8_t ce, uint8_t wp, uint8_t gnd); -void nand_getpins(DeviceState *dev, int *rb); -void nand_setio(DeviceState *dev, uint32_t value); -uint32_t nand_getio(DeviceState *dev); -uint32_t nand_getbuswidth(DeviceState *dev); - -#define NAND_MFR_TOSHIBA 0x98 -#define NAND_MFR_SAMSUNG 0xec -#define NAND_MFR_FUJITSU 0x04 -#define NAND_MFR_NATIONAL 0x8f -#define NAND_MFR_RENESAS 0x07 -#define NAND_MFR_STMICRO 0x20 -#define NAND_MFR_HYNIX 0xad -#define NAND_MFR_MICRON 0x2c - /* m25p80.c */ #define TYPE_M25P80 "m25p80-generic" diff --git a/include/hw/boards.h b/include/hw/boards.h index a7b1fcf..f424b2b 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -286,8 +286,7 @@ struct MachineClass { no_parallel:1, no_floppy:1, no_cdrom:1, - pci_allow_0_address:1, - legacy_fw_cfg_order:1; + pci_allow_0_address:1; bool auto_create_sdcard; bool is_default; const char *default_machine_opts; @@ -863,10 +862,4 @@ extern const size_t hw_compat_2_7_len; extern GlobalProperty hw_compat_2_6[]; extern const size_t hw_compat_2_6_len; -extern GlobalProperty hw_compat_2_5[]; -extern const size_t hw_compat_2_5_len; - -extern GlobalProperty hw_compat_2_4[]; -extern const size_t hw_compat_2_4_len; - #endif diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 1e87f7d..33296a1 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -1126,4 +1126,10 @@ extern const VMStateDescription vmstate_cpu_common; #define UNASSIGNED_CPU_INDEX -1 #define UNASSIGNED_CLUSTER_INDEX -1 +enum CacheType { + DATA_CACHE, + INSTRUCTION_CACHE, + UNIFIED_CACHE +}; + #endif diff --git a/include/hw/core/resetcontainer.h b/include/hw/core/resetcontainer.h index 23db0c7..daeb18c 100644 --- a/include/hw/core/resetcontainer.h +++ b/include/hw/core/resetcontainer.h @@ -20,7 +20,7 @@ #include "qom/object.h" #define TYPE_RESETTABLE_CONTAINER "resettable-container" -OBJECT_DECLARE_TYPE(ResettableContainer, ResettableContainerClass, RESETTABLE_CONTAINER) +OBJECT_DECLARE_SIMPLE_TYPE(ResettableContainer, RESETTABLE_CONTAINER) /** * resettable_container_add: Add a resettable object to the container diff --git a/include/hw/gpio/aspeed_gpio.h b/include/hw/gpio/aspeed_gpio.h index e1e6c54..e6b2fe7 100644 --- a/include/hw/gpio/aspeed_gpio.h +++ b/include/hw/gpio/aspeed_gpio.h @@ -70,7 +70,7 @@ typedef struct AspeedGPIOReg { } AspeedGPIOReg; struct AspeedGPIOClass { - SysBusDevice parent_obj; + SysBusDeviceClass parent_class; const GPIOSetProperties *props; uint32_t nr_gpio_pins; uint32_t nr_gpio_sets; diff --git a/include/hw/i2c/aspeed_i2c.h b/include/hw/i2c/aspeed_i2c.h index 2c4c81b..2daacc1 100644 --- a/include/hw/i2c/aspeed_i2c.h +++ b/include/hw/i2c/aspeed_i2c.h @@ -14,8 +14,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * with this program; if not, see <https://www.gnu.org/licenses/>. */ #ifndef ASPEED_I2C_H diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 9563674..79b72c5 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -107,7 +107,6 @@ struct PCMachineClass { /* RAM / address space compat: */ bool gigabyte_align; bool has_reserved_memory; - bool broken_reserved_end; bool enforce_amd_1tb_hole; bool isa_bios_alias; @@ -299,12 +298,6 @@ extern const size_t pc_compat_2_7_len; extern GlobalProperty pc_compat_2_6[]; extern const size_t pc_compat_2_6_len; -extern GlobalProperty pc_compat_2_5[]; -extern const size_t pc_compat_2_5_len; - -extern GlobalProperty pc_compat_2_4[]; -extern const size_t pc_compat_2_4_len; - #define DEFINE_PC_MACHINE(suffix, namestr, initfn, optsfn) \ static void pc_machine_##suffix##_class_init(ObjectClass *oc, \ const void *data) \ diff --git a/include/hw/i386/tdvf.h b/include/hw/i386/tdvf.h new file mode 100644 index 0000000..e75c8d1 --- /dev/null +++ b/include/hw/i386/tdvf.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2025 Intel Corporation + * Author: Isaku Yamahata <isaku.yamahata at gmail.com> + * <isaku.yamahata at intel.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_I386_TDVF_H +#define HW_I386_TDVF_H + +#include "qemu/osdep.h" + +#define TDVF_SECTION_TYPE_BFV 0 +#define TDVF_SECTION_TYPE_CFV 1 +#define TDVF_SECTION_TYPE_TD_HOB 2 +#define TDVF_SECTION_TYPE_TEMP_MEM 3 + +#define TDVF_SECTION_ATTRIBUTES_MR_EXTEND (1U << 0) +#define TDVF_SECTION_ATTRIBUTES_PAGE_AUG (1U << 1) + +typedef struct TdxFirmwareEntry { + uint32_t data_offset; + uint32_t data_len; + uint64_t address; + uint64_t size; + uint32_t type; + uint32_t attributes; + + void *mem_ptr; +} TdxFirmwareEntry; + +typedef struct TdxFirmware { + void *mem_ptr; + + uint32_t nr_entries; + TdxFirmwareEntry *entries; +} TdxFirmware; + +#define for_each_tdx_fw_entry(fw, e) \ + for (e = (fw)->entries; e != (fw)->entries + (fw)->nr_entries; e++) + +int tdvf_parse_metadata(TdxFirmware *fw, void *flash_ptr, int size); + +#endif /* HW_I386_TDVF_H */ diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index 258b134..fc460b8 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -27,13 +27,8 @@ #include "qom/object.h" struct X86MachineClass { - /*< private >*/ MachineClass parent; - /*< public >*/ - - /* TSC rate migration: */ - bool save_tsc_khz; /* use DMA capable linuxboot option rom */ bool fwcfg_dma_enabled; /* CPU and apic information: */ diff --git a/include/hw/intc/arm_gic.h b/include/hw/intc/arm_gic.h index 48f6a51..be923f7 100644 --- a/include/hw/intc/arm_gic.h +++ b/include/hw/intc/arm_gic.h @@ -27,6 +27,9 @@ * implement the security extensions * + QOM property "has-virtualization-extensions": set true if the GIC should * implement the virtualization extensions + * + QOM property "first-cpu-index": index of the first cpu attached to the + * GIC (default 0). The CPUs connected to the GIC are assumed to be + * first-cpu-index, first-cpu-index + 1, ... first-cpu-index + num-cpu - 1. * + unnamed GPIO inputs: (where P is number of SPIs, i.e. num-irq - 32) * [0..P-1] SPIs * [P..P+31] PPIs for CPU 0 diff --git a/include/hw/intc/arm_gic_common.h b/include/hw/intc/arm_gic_common.h index 97fea41..93a3cc2 100644 --- a/include/hw/intc/arm_gic_common.h +++ b/include/hw/intc/arm_gic_common.h @@ -129,6 +129,8 @@ struct GICState { uint32_t num_lrs; uint32_t num_cpu; + /* cpu_index of the first CPU, attached to this GIC. */ + uint32_t first_cpu_index; MemoryRegion iomem; /* Distributor */ /* This is just so we can have an opaque pointer which identifies diff --git a/include/hw/intc/loongarch_extioi.h b/include/hw/intc/loongarch_extioi.h index 4a6ae90..9be1d73 100644 --- a/include/hw/intc/loongarch_extioi.h +++ b/include/hw/intc/loongarch_extioi.h @@ -15,6 +15,7 @@ OBJECT_DECLARE_TYPE(LoongArchExtIOIState, LoongArchExtIOIClass, LOONGARCH_EXTIOI struct LoongArchExtIOIState { LoongArchExtIOICommonState parent_obj; + int dev_fd; }; struct LoongArchExtIOIClass { @@ -25,4 +26,8 @@ struct LoongArchExtIOIClass { ResettablePhases parent_phases; }; +void kvm_extioi_realize(DeviceState *dev, Error **errp); +int kvm_extioi_get(void *opaque); +int kvm_extioi_put(void *opaque, int version_id); + #endif /* LOONGARCH_EXTIOI_H */ diff --git a/include/hw/intc/loongarch_extioi_common.h b/include/hw/intc/loongarch_extioi_common.h index 735bfee..dca25ff 100644 --- a/include/hw/intc/loongarch_extioi_common.h +++ b/include/hw/intc/loongarch_extioi_common.h @@ -35,7 +35,7 @@ #define EXTIOI_ISR_START (0x700 - APIC_OFFSET) #define EXTIOI_ISR_END (0x720 - APIC_OFFSET) #define EXTIOI_COREISR_START (0x800 - APIC_OFFSET) -#define EXTIOI_COREISR_END (0xB20 - APIC_OFFSET) +#define EXTIOI_COREISR_END (0x820 - APIC_OFFSET) #define EXTIOI_COREMAP_START (0xC00 - APIC_OFFSET) #define EXTIOI_COREMAP_END (0xD00 - APIC_OFFSET) #define EXTIOI_SIZE 0x800 diff --git a/include/hw/intc/loongarch_ipi.h b/include/hw/intc/loongarch_ipi.h index a7c6bf8..5175a6b 100644 --- a/include/hw/intc/loongarch_ipi.h +++ b/include/hw/intc/loongarch_ipi.h @@ -16,6 +16,7 @@ OBJECT_DECLARE_TYPE(LoongarchIPIState, LoongarchIPIClass, LOONGARCH_IPI) struct LoongarchIPIState { LoongsonIPICommonState parent_obj; + int dev_fd; }; struct LoongarchIPIClass { @@ -24,4 +25,8 @@ struct LoongarchIPIClass { ResettablePhases parent_phases; }; +void kvm_ipi_realize(DeviceState *dev, Error **errp); +int kvm_ipi_get(void *opaque); +int kvm_ipi_put(void *opaque, int version_id); + #endif diff --git a/include/hw/intc/loongarch_pch_pic.h b/include/hw/intc/loongarch_pch_pic.h index 839a59a..a46b6f8 100644 --- a/include/hw/intc/loongarch_pch_pic.h +++ b/include/hw/intc/loongarch_pch_pic.h @@ -16,6 +16,7 @@ OBJECT_DECLARE_TYPE(LoongarchPICState, LoongarchPICClass, LOONGARCH_PIC) struct LoongarchPICState { LoongArchPICCommonState parent_obj; + int dev_fd; }; struct LoongarchPICClass { @@ -25,4 +26,8 @@ struct LoongarchPICClass { ResettablePhases parent_phases; }; +void kvm_pic_realize(DeviceState *dev, Error **errp); +int kvm_pic_get(void *opaque); +int kvm_pic_put(void *opaque, int version_id); + #endif /* HW_LOONGARCH_PCH_PIC_H */ diff --git a/include/hw/intc/loongarch_pic_common.h b/include/hw/intc/loongarch_pic_common.h index 9349a05..f774c97 100644 --- a/include/hw/intc/loongarch_pic_common.h +++ b/include/hw/intc/loongarch_pic_common.h @@ -23,6 +23,7 @@ #define PCH_PIC_ROUTE_ENTRY_END 0x13f #define PCH_PIC_HTMSI_VEC 0x200 #define PCH_PIC_HTMSI_VEC_END 0x23f +#define PCH_PIC_INT_REQUEST 0x380 #define PCH_PIC_INT_STATUS 0x3a0 #define PCH_PIC_INT_POL 0x3e0 diff --git a/include/hw/intc/loongson_ipi_common.h b/include/hw/intc/loongson_ipi_common.h index b587f9c..e58ce2a 100644 --- a/include/hw/intc/loongson_ipi_common.h +++ b/include/hw/intc/loongson_ipi_common.h @@ -48,6 +48,8 @@ struct LoongsonIPICommonClass { AddressSpace *(*get_iocsr_as)(CPUState *cpu); int (*cpu_by_arch_id)(LoongsonIPICommonState *lics, int64_t id, int *index, CPUState **pcs); + int (*pre_save)(void *opaque); + int (*post_load)(void *opaque, int version_id); }; MemTxResult loongson_ipi_core_readl(void *opaque, hwaddr addr, uint64_t *data, diff --git a/include/hw/loader.h b/include/hw/loader.h index d280dc3..c96b5e1 100644 --- a/include/hw/loader.h +++ b/include/hw/loader.h @@ -270,8 +270,6 @@ int rom_add_elf_program(const char *name, GMappedFile *mapped_file, void *data, AddressSpace *as); int rom_check_and_register_reset(void); void rom_set_fw(FWCfgState *f); -void rom_set_order_override(int order); -void rom_reset_order_override(void); /** * rom_transaction_begin: diff --git a/include/hw/loongarch/boot.h b/include/hw/loongarch/boot.h index b3b870d..9819f7f 100644 --- a/include/hw/loongarch/boot.h +++ b/include/hw/loongarch/boot.h @@ -102,11 +102,10 @@ struct loongarch_boot_info { const char *kernel_cmdline; const char *initrd_filename; uint64_t a0, a1, a2; + uint64_t initrd_addr; + uint64_t initrd_size; }; -extern struct memmap_entry *memmap_table; -extern unsigned memmap_entries; - struct memmap_entry { uint64_t address; uint64_t length; diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h index 2b7d199..602feab 100644 --- a/include/hw/loongarch/virt.h +++ b/include/hw/loongarch/virt.h @@ -63,6 +63,8 @@ struct LoongArchVirtMachineState { struct loongarch_boot_info bootinfo; DeviceState *ipi; DeviceState *extioi; + struct memmap_entry *memmap_table; + unsigned int memmap_entries; }; #define TYPE_LOONGARCH_VIRT_MACHINE MACHINE_TYPE_NAME("virt") diff --git a/include/hw/misc/aspeed_hace.h b/include/hw/misc/aspeed_hace.h index 5d4aa19..d5d07c6 100644 --- a/include/hw/misc/aspeed_hace.h +++ b/include/hw/misc/aspeed_hace.h @@ -22,7 +22,6 @@ OBJECT_DECLARE_TYPE(AspeedHACEState, AspeedHACEClass, ASPEED_HACE) -#define ASPEED_HACE_NR_REGS (0x64 >> 2) #define ASPEED_HACE_MAX_SG 256 /* max number of entries */ struct AspeedHACEState { @@ -31,10 +30,8 @@ struct AspeedHACEState { MemoryRegion iomem; qemu_irq irq; - struct iovec iov_cache[ASPEED_HACE_MAX_SG]; - uint32_t regs[ASPEED_HACE_NR_REGS]; + uint32_t *regs; uint32_t total_req_len; - uint32_t iov_count; MemoryRegion *dram_mr; AddressSpace dram_as; @@ -46,11 +43,17 @@ struct AspeedHACEState { struct AspeedHACEClass { SysBusDeviceClass parent_class; + const MemoryRegionOps *reg_ops; uint32_t src_mask; uint32_t dest_mask; uint32_t key_mask; uint32_t hash_mask; + uint64_t nr_regs; bool raise_crypt_interrupt_workaround; + uint32_t src_hi_mask; + uint32_t dest_hi_mask; + uint32_t key_hi_mask; + bool has_dma64; }; #endif /* ASPEED_HACE_H */ diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h index 47578cc..d41b932 100644 --- a/include/hw/nvram/fw_cfg.h +++ b/include/hw/nvram/fw_cfg.h @@ -42,14 +42,6 @@ struct FWCfgDataGeneratorClass { typedef struct fw_cfg_file FWCfgFile; -#define FW_CFG_ORDER_OVERRIDE_VGA 70 -#define FW_CFG_ORDER_OVERRIDE_NIC 80 -#define FW_CFG_ORDER_OVERRIDE_USER 100 -#define FW_CFG_ORDER_OVERRIDE_DEVICE 110 - -void fw_cfg_set_order_override(FWCfgState *fw_cfg, int order); -void fw_cfg_reset_order_override(FWCfgState *fw_cfg); - typedef struct FWCfgFiles { uint32_t count; FWCfgFile f[]; @@ -75,8 +67,6 @@ struct FWCfgState { uint32_t cur_offset; Notifier machine_ready; - int fw_cfg_order_override; - bool dma_enabled; dma_addr_t dma_addr; AddressSpace *dma_as; diff --git a/include/hw/pci/msix.h b/include/hw/pci/msix.h index 0e6f257..11ef945 100644 --- a/include/hw/pci/msix.h +++ b/include/hw/pci/msix.h @@ -32,6 +32,7 @@ int msix_present(PCIDevice *dev); bool msix_is_masked(PCIDevice *dev, unsigned vector); void msix_set_pending(PCIDevice *dev, unsigned vector); void msix_clr_pending(PCIDevice *dev, int vector); +int msix_is_pending(PCIDevice *dev, unsigned vector); void msix_vector_use(PCIDevice *dev, unsigned vector); void msix_vector_unuse(PCIDevice *dev, unsigned vector); diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index c2fe6ca..df3cc7b 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -222,6 +222,8 @@ enum { QEMU_PCIE_EXT_TAG = (1 << QEMU_PCIE_EXT_TAG_BITNR), #define QEMU_PCI_CAP_PM_BITNR 14 QEMU_PCI_CAP_PM = (1 << QEMU_PCI_CAP_PM_BITNR), +#define QEMU_PCI_SKIP_RESET_ON_CPR_BITNR 15 + QEMU_PCI_SKIP_RESET_ON_CPR = (1 << QEMU_PCI_SKIP_RESET_ON_CPR_BITNR), }; typedef struct PCIINTxRoute { @@ -375,6 +377,28 @@ void pci_bus_get_w64_range(PCIBus *bus, Range *range); void pci_device_deassert_intx(PCIDevice *dev); +/* Page Request Interface */ +typedef enum { + IOMMU_PRI_RESP_SUCCESS, + IOMMU_PRI_RESP_INVALID_REQUEST, + IOMMU_PRI_RESP_FAILURE, +} IOMMUPRIResponseCode; + +typedef struct IOMMUPRIResponse { + IOMMUPRIResponseCode response_code; + uint16_t prgi; +} IOMMUPRIResponse; + +struct IOMMUPRINotifier; + +typedef void (*IOMMUPRINotify)(struct IOMMUPRINotifier *notifier, + IOMMUPRIResponse *response); + +typedef struct IOMMUPRINotifier { + IOMMUPRINotify notify; +} IOMMUPRINotifier; + +#define PCI_PRI_PRGI_MASK 0x1ffU /** * struct PCIIOMMUOps: callbacks structure for specific IOMMU handlers @@ -429,6 +453,179 @@ typedef struct PCIIOMMUOps { * @devfn: device and function number of the PCI device. */ void (*unset_iommu_device)(PCIBus *bus, void *opaque, int devfn); + /** + * @get_iotlb_info: get properties required to initialize a device IOTLB. + * + * Callback required if devices are allowed to cache translations. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @addr_width: the address width of the IOMMU (output parameter). + * + * @min_page_size: the page size of the IOMMU (output parameter). + */ + void (*get_iotlb_info)(void *opaque, uint8_t *addr_width, + uint32_t *min_page_size); + /** + * @init_iotlb_notifier: initialize an IOMMU notifier. + * + * Optional callback. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @n: the notifier to be initialized. + * + * @fn: the callback to be installed. + * + * @user_opaque: a user pointer that can be used to track a state. + */ + void (*init_iotlb_notifier)(PCIBus *bus, void *opaque, int devfn, + IOMMUNotifier *n, IOMMUNotify fn, + void *user_opaque); + /** + * @register_iotlb_notifier: setup an IOTLB invalidation notifier. + * + * Callback required if devices are allowed to cache translations. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @pasid: the pasid of the address space to watch. + * + * @n: the notifier to register. + */ + void (*register_iotlb_notifier)(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid, IOMMUNotifier *n); + /** + * @unregister_iotlb_notifier: remove an IOTLB invalidation notifier. + * + * Callback required if devices are allowed to cache translations. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @pasid: the pasid of the address space to stop watching. + * + * @n: the notifier to unregister. + */ + void (*unregister_iotlb_notifier)(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid, IOMMUNotifier *n); + /** + * @ats_request_translation: issue an ATS request. + * + * Callback required if devices are allowed to use the address + * translation service. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @pasid: the pasid of the address space to use for the request. + * + * @priv_req: privileged mode bit (PASID TLP). + * + * @exec_req: execute request bit (PASID TLP). + * + * @addr: start address of the memory range to be translated. + * + * @length: length of the memory range in bytes. + * + * @no_write: request a read-only translation (if supported). + * + * @result: buffer in which the TLB entries will be stored. + * + * @result_length: result buffer length. + * + * @err_count: number of untranslated subregions. + * + * Returns: the number of translations stored in the result buffer, or + * -ENOMEM if the buffer is not large enough. + */ + ssize_t (*ats_request_translation)(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid, bool priv_req, + bool exec_req, hwaddr addr, + size_t length, bool no_write, + IOMMUTLBEntry *result, + size_t result_length, + uint32_t *err_count); + /** + * @pri_register_notifier: setup the PRI completion callback. + * + * Callback required if devices are allowed to use the page request + * interface. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @pasid: the pasid of the address space to track. + * + * @notifier: the notifier to register. + */ + void (*pri_register_notifier)(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid, IOMMUPRINotifier *notifier); + /** + * @pri_unregister_notifier: remove the PRI completion callback. + * + * Callback required if devices are allowed to use the page request + * interface. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @pasid: the pasid of the address space to stop tracking. + */ + void (*pri_unregister_notifier)(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid); + /** + * @pri_request_page: issue a PRI request. + * + * Callback required if devices are allowed to use the page request + * interface. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @pasid: the pasid of the address space to use for the request. + * + * @priv_req: privileged mode bit (PASID TLP). + * + * @exec_req: execute request bit (PASID TLP). + * + * @addr: untranslated address of the requested page. + * + * @lpig: last page in group. + * + * @prgi: page request group index. + * + * @is_read: request read access. + * + * @is_write: request write access. + */ + int (*pri_request_page)(PCIBus *bus, void *opaque, int devfn, + uint32_t pasid, bool priv_req, bool exec_req, + hwaddr addr, bool lpig, uint16_t prgi, bool is_read, + bool is_write); } PCIIOMMUOps; AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); @@ -437,6 +634,126 @@ bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod, void pci_device_unset_iommu_device(PCIDevice *dev); /** + * pci_iommu_get_iotlb_info: get properties required to initialize a + * device IOTLB. + * + * Returns 0 on success, or a negative errno otherwise. + * + * @dev: the device that wants to get the information. + * @addr_width: the address width of the IOMMU (output parameter). + * @min_page_size: the page size of the IOMMU (output parameter). + */ +int pci_iommu_get_iotlb_info(PCIDevice *dev, uint8_t *addr_width, + uint32_t *min_page_size); + +/** + * pci_iommu_init_iotlb_notifier: initialize an IOMMU notifier. + * + * This function is used by devices before registering an IOTLB notifier. + * + * @dev: the device. + * @n: the notifier to be initialized. + * @fn: the callback to be installed. + * @opaque: a user pointer that can be used to track a state. + */ +int pci_iommu_init_iotlb_notifier(PCIDevice *dev, IOMMUNotifier *n, + IOMMUNotify fn, void *opaque); + +/** + * pci_ats_request_translation: perform an ATS request. + * + * Returns the number of translations stored in @result in case of success, + * a negative error code otherwise. + * -ENOMEM is returned when the result buffer is not large enough to store + * all the translations. + * + * @dev: the ATS-capable PCI device. + * @pasid: the pasid of the address space in which the translation will be done. + * @priv_req: privileged mode bit (PASID TLP). + * @exec_req: execute request bit (PASID TLP). + * @addr: start address of the memory range to be translated. + * @length: length of the memory range in bytes. + * @no_write: request a read-only translation (if supported). + * @result: buffer in which the TLB entries will be stored. + * @result_length: result buffer length. + * @err_count: number of untranslated subregions. + */ +ssize_t pci_ats_request_translation(PCIDevice *dev, uint32_t pasid, + bool priv_req, bool exec_req, + hwaddr addr, size_t length, + bool no_write, IOMMUTLBEntry *result, + size_t result_length, + uint32_t *err_count); + +/** + * pci_pri_request_page: perform a PRI request. + * + * Returns 0 if the PRI request has been sent to the guest OS, + * an error code otherwise. + * + * @dev: the PRI-capable PCI device. + * @pasid: the pasid of the address space in which the translation will be done. + * @priv_req: privileged mode bit (PASID TLP). + * @exec_req: execute request bit (PASID TLP). + * @addr: untranslated address of the requested page. + * @lpig: last page in group. + * @prgi: page request group index. + * @is_read: request read access. + * @is_write: request write access. + */ +int pci_pri_request_page(PCIDevice *dev, uint32_t pasid, bool priv_req, + bool exec_req, hwaddr addr, bool lpig, + uint16_t prgi, bool is_read, bool is_write); + +/** + * pci_pri_register_notifier: register the PRI callback for a given address + * space. + * + * Returns 0 on success, an error code otherwise. + * + * @dev: the PRI-capable PCI device. + * @pasid: the pasid of the address space to track. + * @notifier: the notifier to register. + */ +int pci_pri_register_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUPRINotifier *notifier); + +/** + * pci_pri_unregister_notifier: remove the PRI callback from a given address + * space. + * + * @dev: the PRI-capable PCI device. + * @pasid: the pasid of the address space to stop tracking. + */ +void pci_pri_unregister_notifier(PCIDevice *dev, uint32_t pasid); + +/** + * pci_iommu_register_iotlb_notifier: register a notifier for changes to + * IOMMU translation entries in a specific address space. + * + * Returns 0 on success, or a negative errno otherwise. + * + * @dev: the device that wants to get notified. + * @pasid: the pasid of the address space to track. + * @n: the notifier to register. + */ +int pci_iommu_register_iotlb_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUNotifier *n); + +/** + * pci_iommu_unregister_iotlb_notifier: unregister a notifier that has been + * registerd with pci_iommu_register_iotlb_notifier. + * + * Returns 0 on success, or a negative errno otherwise. + * + * @dev: the device that wants to stop notifications. + * @pasid: the pasid of the address space to stop tracking. + * @n: the notifier to unregister. + */ +int pci_iommu_unregister_iotlb_notifier(PCIDevice *dev, uint32_t pasid, + IOMMUNotifier *n); + +/** * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus * * Let PCI host bridges define specific operations. @@ -668,6 +985,7 @@ void lsi53c8xx_handle_legacy_cmdline(DeviceState *lsi_dev); qemu_irq pci_allocate_irq(PCIDevice *pci_dev); void pci_set_irq(PCIDevice *pci_dev, int level); +int pci_irq_disabled(PCIDevice *d); static inline void pci_irq_assert(PCIDevice *pci_dev) { diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h index b0f5204..8cdacbc 100644 --- a/include/hw/pci/pci_bridge.h +++ b/include/hw/pci/pci_bridge.h @@ -14,8 +14,8 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * along with this program; if not, see + * <https://www.gnu.org/licenses/>. * * split out pci bus specific stuff from pci.[hc] to pci_bridge.[hc] * Copyright (c) 2009 Isaku Yamahata <yamahata at valinux co jp> diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h index e41d95b..eee0338 100644 --- a/include/hw/pci/pci_device.h +++ b/include/hw/pci/pci_device.h @@ -90,6 +90,7 @@ struct PCIDevice { char name[64]; PCIIORegion io_regions[PCI_NUM_REGIONS]; AddressSpace bus_master_as; + bool is_master; MemoryRegion bus_master_container_region; MemoryRegion bus_master_enable_region; diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h index 70a5de0..ff6ce08 100644 --- a/include/hw/pci/pcie.h +++ b/include/hw/pci/pcie.h @@ -70,8 +70,10 @@ struct PCIExpressDevice { uint16_t aer_cap; PCIEAERLog aer_log; - /* Offset of ATS capability in config space */ + /* Offset of ATS, PRI and PASID capabilities in config space */ uint16_t ats_cap; + uint16_t pasid_cap; + uint16_t pri_cap; /* ACS */ uint16_t acs_cap; @@ -150,4 +152,13 @@ void pcie_cap_slot_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); + +void pcie_pasid_init(PCIDevice *dev, uint16_t offset, uint8_t pasid_width, + bool exec_perm, bool priv_mod); +void pcie_pri_init(PCIDevice *dev, uint16_t offset, uint32_t outstanding_pr_cap, + bool prg_response_pasid_req); + +bool pcie_pri_enabled(const PCIDevice *dev); +bool pcie_pasid_enabled(const PCIDevice *dev); +bool pcie_ats_enabled(const PCIDevice *dev); #endif /* QEMU_PCIE_H */ diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h index 9d3b686..33a2222 100644 --- a/include/hw/pci/pcie_regs.h +++ b/include/hw/pci/pcie_regs.h @@ -86,6 +86,14 @@ typedef enum PCIExpLinkWidth { #define PCI_ARI_VER 1 #define PCI_ARI_SIZEOF 8 +/* PASID */ +#define PCI_PASID_VER 1 +#define PCI_EXT_CAP_PASID_MAX_WIDTH 20 +#define PCI_PASID_CAP_WIDTH_SHIFT 8 + +/* PRI */ +#define PCI_PRI_VER 1 + /* AER */ #define PCI_ERR_VER 2 #define PCI_ERR_SIZEOF 0x48 diff --git a/include/hw/riscv/iommu.h b/include/hw/riscv/iommu.h index b03339d..8a8acfc 100644 --- a/include/hw/riscv/iommu.h +++ b/include/hw/riscv/iommu.h @@ -30,14 +30,12 @@ typedef struct RISCVIOMMUState RISCVIOMMUState; typedef struct RISCVIOMMUSpace RISCVIOMMUSpace; #define TYPE_RISCV_IOMMU_PCI "riscv-iommu-pci" -OBJECT_DECLARE_TYPE(RISCVIOMMUStatePci, RISCVIOMMUPciClass, RISCV_IOMMU_PCI) +OBJECT_DECLARE_SIMPLE_TYPE(RISCVIOMMUStatePci, RISCV_IOMMU_PCI) typedef struct RISCVIOMMUStatePci RISCVIOMMUStatePci; -typedef struct RISCVIOMMUPciClass RISCVIOMMUPciClass; #define TYPE_RISCV_IOMMU_SYS "riscv-iommu-device" -OBJECT_DECLARE_TYPE(RISCVIOMMUStateSys, RISCVIOMMUSysClass, RISCV_IOMMU_SYS) +OBJECT_DECLARE_SIMPLE_TYPE(RISCVIOMMUStateSys, RISCV_IOMMU_SYS) typedef struct RISCVIOMMUStateSys RISCVIOMMUStateSys; -typedef struct RISCVIOMMUSysClass RISCVIOMMUSysClass; #define FDT_IRQ_TYPE_EDGE_LOW 1 diff --git a/include/hw/s390x/ap-bridge.h b/include/hw/s390x/ap-bridge.h index 470e439..7efc529 100644 --- a/include/hw/s390x/ap-bridge.h +++ b/include/hw/s390x/ap-bridge.h @@ -16,4 +16,43 @@ void s390_init_ap(void); +typedef struct ChscSeiNt0Res { + uint16_t length; + uint16_t code; + uint8_t reserved1; + uint16_t reserved2; + uint8_t nt; +#define PENDING_EVENT_INFO_BITMASK 0x80; + uint8_t flags; + uint8_t reserved3; + uint8_t rs; + uint8_t cc; +} QEMU_PACKED ChscSeiNt0Res; + +#define NT0_RES_RESPONSE_CODE 1 +#define NT0_RES_NT_DEFAULT 0 +#define NT0_RES_RS_AP_CHANGE 5 +#define NT0_RES_CC_AP_CHANGE 3 + +#define EVENT_INFORMATION_NOT_STORED 1 +#define EVENT_INFORMATION_STORED 0 + +/** + * ap_chsc_sei_nt0_get_event - Retrieve the next pending AP config + * change event + * @res: Pointer to a ChscSeiNt0Res struct to be filled with event + * data + * + * This function checks for any pending AP config change events and, + * if present, populates the provided response structure with the + * appropriate SEI NT0 fields. + * + * Return: + * EVENT_INFORMATION_STORED - An event was available and written to @res + * EVENT_INFORMATION_NOT_STORED - No event was available + */ +int ap_chsc_sei_nt0_get_event(void *res); + +bool ap_chsc_sei_nt0_have_event(void); + #endif diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h index 9283c94..d5e9aa4 100644 --- a/include/hw/s390x/cpu-topology.h +++ b/include/hw/s390x/cpu-topology.h @@ -13,7 +13,7 @@ #include "qemu/queue.h" #include "hw/boards.h" -#include "qapi/qapi-types-machine-target.h" +#include "qapi/qapi-types-machine-s390x.h" #define S390_TOPOLOGY_CPU_IFL 0x03 diff --git a/include/hw/s390x/event-facility.h b/include/hw/s390x/event-facility.h index ff874e7..eac7a51 100644 --- a/include/hw/s390x/event-facility.h +++ b/include/hw/s390x/event-facility.h @@ -25,6 +25,7 @@ #define SCLP_EVENT_MESSAGE 0x02 #define SCLP_EVENT_CONFIG_MGT_DATA 0x04 #define SCLP_EVENT_PMSGCMD 0x09 +#define SCLP_EVENT_CTRL_PGM_ID 0x0b #define SCLP_EVENT_ASCII_CONSOLE_DATA 0x1a #define SCLP_EVENT_SIGNAL_QUIESCE 0x1d @@ -35,6 +36,7 @@ #define SCLP_EVENT_MASK_MSG SCLP_EVMASK(SCLP_EVENT_MESSAGE) #define SCLP_EVENT_MASK_CONFIG_MGT_DATA SCLP_EVMASK(SCLP_EVENT_CONFIG_MGT_DATA) #define SCLP_EVENT_MASK_PMSGCMD SCLP_EVMASK(SCLP_EVENT_PMSGCMD) +#define SCLP_EVENT_MASK_CTRL_PGM_ID SCLP_EVMASK(SCLP_EVENT_CTRL_PGM_ID) #define SCLP_EVENT_MASK_MSG_ASCII SCLP_EVMASK(SCLP_EVENT_ASCII_CONSOLE_DATA) #define SCLP_EVENT_MASK_SIGNAL_QUIESCE SCLP_EVMASK(SCLP_EVENT_SIGNAL_QUIESCE) @@ -191,6 +193,21 @@ struct SCLPEventClass { bool (*can_handle_event)(uint8_t type); }; +#define TYPE_SCLP_EVENT_CPI "sclpcpi" +typedef struct SCLPEventCPIClass SCLPEventCPIClass; +typedef struct SCLPEventCPI SCLPEventCPI; +OBJECT_DECLARE_TYPE(SCLPEventCPI, SCLPEventCPIClass, + SCLP_EVENT_CPI) + +struct SCLPEventCPI { + SCLPEvent event; + uint8_t system_type[8]; + uint8_t system_name[8]; + uint64_t system_level; + uint8_t sysplex_name[8]; + uint64_t timestamp; +}; + #define TYPE_SCLP_EVENT_FACILITY "s390-sclp-event-facility" typedef struct SCLPEventFacility SCLPEventFacility; typedef struct SCLPEventFacilityClass SCLPEventFacilityClass; diff --git a/include/hw/s390x/s390-virtio-ccw.h b/include/hw/s390x/s390-virtio-ccw.h index 321b26d..526078a 100644 --- a/include/hw/s390x/s390-virtio-ccw.h +++ b/include/hw/s390x/s390-virtio-ccw.h @@ -54,6 +54,7 @@ struct S390CcwMachineClass { /*< public >*/ int max_threads; + bool use_cpi; }; #endif diff --git a/include/hw/timer/aspeed_timer.h b/include/hw/timer/aspeed_timer.h index 767cae4..a850625 100644 --- a/include/hw/timer/aspeed_timer.h +++ b/include/hw/timer/aspeed_timer.h @@ -16,8 +16,7 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * with this program; if not, see <https://www.gnu.org/licenses/>. */ #ifndef ASPEED_TIMER_H #define ASPEED_TIMER_H diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h index 3d392b0..3cd86ec 100644 --- a/include/hw/vfio/vfio-container-base.h +++ b/include/hw/vfio/vfio-container-base.h @@ -78,7 +78,7 @@ void vfio_address_space_insert(VFIOAddressSpace *space, int vfio_container_dma_map(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly); + void *vaddr, bool readonly, MemoryRegion *mr); int vfio_container_dma_unmap(VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb, bool unmap_all); @@ -109,19 +109,64 @@ vfio_container_get_page_size_mask(const VFIOContainerBase *bcontainer) #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy" #define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr" #define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd" +#define TYPE_VFIO_IOMMU_USER TYPE_VFIO_IOMMU "-user" OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU) struct VFIOIOMMUClass { ObjectClass parent_class; - /* basic feature */ + /** + * @setup + * + * Perform basic setup of the container, including configuring IOMMU + * capabilities, IOVA ranges, supported page sizes, etc. + * + * @bcontainer: #VFIOContainerBase + * @errp: pointer to Error*, to store an error if it happens. + * + * Returns true to indicate success and false for error. + */ bool (*setup)(VFIOContainerBase *bcontainer, Error **errp); + + /** + * @listener_begin + * + * Called at the beginning of an address space update transaction. + * See #MemoryListener. + * + * @bcontainer: #VFIOContainerBase + */ void (*listener_begin)(VFIOContainerBase *bcontainer); + + /** + * @listener_commit + * + * Called at the end of an address space update transaction, + * See #MemoryListener. + * + * @bcontainer: #VFIOContainerBase + */ void (*listener_commit)(VFIOContainerBase *bcontainer); + + /** + * @dma_map + * + * Map an address range into the container. Note that the memory region is + * referenced within an RCU read lock region across this call. + * + * @bcontainer: #VFIOContainerBase to use + * @iova: start address to map + * @size: size of the range to map + * @vaddr: process virtual address of mapping + * @readonly: true if mapping should be readonly + * @mr: the memory region for this mapping + * + * Returns 0 to indicate success and -errno otherwise. + */ int (*dma_map)(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, - void *vaddr, bool readonly); + void *vaddr, bool readonly, MemoryRegion *mr); /** * @dma_unmap * @@ -132,12 +177,38 @@ struct VFIOIOMMUClass { * @size: size of the range to unmap * @iotlb: The IOMMU TLB mapping entry (or NULL) * @unmap_all: if set, unmap the entire address space + * + * Returns 0 to indicate success and -errno otherwise. */ int (*dma_unmap)(const VFIOContainerBase *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb, bool unmap_all); + + + /** + * @attach_device + * + * Associate the given device with a container and do some related + * initialization of the device context. + * + * @name: name of the device + * @vbasedev: the device + * @as: address space to use + * @errp: pointer to Error*, to store an error if it happens. + * + * Returns true to indicate success and false for error. + */ bool (*attach_device)(const char *name, VFIODevice *vbasedev, AddressSpace *as, Error **errp); + + /* + * @detach_device + * + * Detach the given device from its container and clean up any necessary + * state. + * + * @vbasedev: the device to disassociate + */ void (*detach_device)(VFIODevice *vbasedev); /* migration feature */ @@ -152,7 +223,7 @@ struct VFIOIOMMUClass { * @start: indicates whether to start or stop dirty pages tracking * @errp: pointer to Error*, to store an error if it happens. * - * Returns zero to indicate success and negative for error + * Returns zero to indicate success and negative for error. */ int (*set_dirty_page_tracking)(const VFIOContainerBase *bcontainer, bool start, Error **errp); @@ -167,7 +238,7 @@ struct VFIOIOMMUClass { * @size: size of iova range * @errp: pointer to Error*, to store an error if it happens. * - * Returns zero to indicate success and negative for error + * Returns zero to indicate success and negative for error. */ int (*query_dirty_bitmap)(const VFIOContainerBase *bcontainer, VFIOBitmap *vbmap, hwaddr iova, hwaddr size, Error **errp); @@ -183,4 +254,10 @@ struct VFIOIOMMUClass { void (*release)(VFIOContainerBase *bcontainer); }; +VFIORamDiscardListener *vfio_find_ram_discard_listener( + VFIOContainerBase *bcontainer, MemoryRegionSection *section); + +void vfio_container_region_add(VFIOContainerBase *bcontainer, + MemoryRegionSection *section, bool cpr_remap); + #endif /* HW_VFIO_VFIO_CONTAINER_BASE_H */ diff --git a/include/hw/vfio/vfio-container.h b/include/hw/vfio/vfio-container.h index afc498d..21e5807 100644 --- a/include/hw/vfio/vfio-container.h +++ b/include/hw/vfio/vfio-container.h @@ -10,6 +10,7 @@ #define HW_VFIO_CONTAINER_H #include "hw/vfio/vfio-container-base.h" +#include "hw/vfio/vfio-cpr.h" typedef struct VFIOContainer VFIOContainer; typedef struct VFIODevice VFIODevice; @@ -29,6 +30,7 @@ typedef struct VFIOContainer { int fd; /* /dev/vfio/vfio, empowered by the attached groups */ unsigned iommu_type; QLIST_HEAD(, VFIOGroup) group_list; + VFIOContainerCPR cpr; } VFIOContainer; OBJECT_DECLARE_SIMPLE_TYPE(VFIOContainer, VFIO_IOMMU_LEGACY); diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h new file mode 100644 index 0000000..8bf85b9 --- /dev/null +++ b/include/hw/vfio/vfio-cpr.h @@ -0,0 +1,57 @@ +/* + * VFIO CPR + * + * Copyright (c) 2025 Oracle and/or its affiliates. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef HW_VFIO_VFIO_CPR_H +#define HW_VFIO_VFIO_CPR_H + +#include "migration/misc.h" +#include "system/memory.h" + +struct VFIOContainer; +struct VFIOContainerBase; +struct VFIOGroup; + +typedef struct VFIOContainerCPR { + Error *blocker; + bool vaddr_unmapped; + NotifierWithReturn transfer_notifier; + MemoryListener remap_listener; + int (*saved_dma_map)(const struct VFIOContainerBase *bcontainer, + hwaddr iova, ram_addr_t size, + void *vaddr, bool readonly, MemoryRegion *mr); +} VFIOContainerCPR; + +typedef struct VFIODeviceCPR { + Error *mdev_blocker; +} VFIODeviceCPR; + +bool vfio_legacy_cpr_register_container(struct VFIOContainer *container, + Error **errp); +void vfio_legacy_cpr_unregister_container(struct VFIOContainer *container); + +int vfio_cpr_reboot_notifier(NotifierWithReturn *notifier, MigrationEvent *e, + Error **errp); + +bool vfio_cpr_register_container(struct VFIOContainerBase *bcontainer, + Error **errp); +void vfio_cpr_unregister_container(struct VFIOContainerBase *bcontainer); + +int vfio_cpr_group_get_device_fd(int d, const char *name); + +bool vfio_cpr_container_match(struct VFIOContainer *container, + struct VFIOGroup *group, int fd); + +void vfio_cpr_giommu_remap(struct VFIOContainerBase *bcontainer, + MemoryRegionSection *section); + +bool vfio_cpr_ram_discard_register_listener( + struct VFIOContainerBase *bcontainer, MemoryRegionSection *section); + +extern const VMStateDescription vfio_cpr_pci_vmstate; + +#endif /* HW_VFIO_VFIO_CPR_H */ diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h index 8bcb3c1..c616652 100644 --- a/include/hw/vfio/vfio-device.h +++ b/include/hw/vfio/vfio-device.h @@ -28,6 +28,7 @@ #endif #include "system/system.h" #include "hw/vfio/vfio-container-base.h" +#include "hw/vfio/vfio-cpr.h" #include "system/host_iommu_device.h" #include "system/iommufd.h" @@ -46,6 +47,7 @@ typedef struct VFIOMigration VFIOMigration; typedef struct IOMMUFDBackend IOMMUFDBackend; typedef struct VFIOIOASHwpt VFIOIOASHwpt; +typedef struct VFIOUserProxy VFIOUserProxy; typedef struct VFIODevice { QLIST_ENTRY(VFIODevice) next; @@ -66,6 +68,7 @@ typedef struct VFIODevice { OnOffAuto enable_migration; OnOffAuto migration_multifd_transfer; bool migration_events; + bool use_region_fds; VFIODeviceOps *ops; VFIODeviceIOOps *io_ops; unsigned int num_irqs; @@ -84,6 +87,9 @@ typedef struct VFIODevice { VFIOIOASHwpt *hwpt; QLIST_ENTRY(VFIODevice) hwpt_next; struct vfio_region_info **reginfo; + int *region_fds; + VFIODeviceCPR cpr; + VFIOUserProxy *proxy; } VFIODevice; struct VFIODeviceOps { @@ -164,36 +170,64 @@ struct VFIODeviceIOOps { * @device_feature * * Fill in feature info for the given device. + * + * @vdev: #VFIODevice to use + * @feat: feature information to fill in + * + * Returns 0 on success or -errno. */ - int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *); + int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *feat); /** * @get_region_info * - * Fill in @info with information on the region given by @info->index. + * Get the information for a given region on the device. + * + * @vdev: #VFIODevice to use + * @info: set @info->index to the region index to look up; the rest of the + * struct will be filled in on success + * @fd: pointer to the fd for the region; will be -1 if not found + * + * Returns 0 on success or -errno. */ int (*get_region_info)(VFIODevice *vdev, - struct vfio_region_info *info); + struct vfio_region_info *info, int *fd); /** * @get_irq_info * - * Fill in @irq with information on the IRQ given by @info->index. + * @vdev: #VFIODevice to use + * @irq: set @irq->index to the IRQ index to look up; the rest of the struct + * will be filled in on success + * + * Returns 0 on success or -errno. */ int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq); /** * @set_irqs * - * Configure IRQs as defined by @irqs. + * Configure IRQs. + * + * @vdev: #VFIODevice to use + * @irqs: IRQ configuration as defined by VFIO docs. + * + * Returns 0 on success or -errno. */ int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs); /** * @region_read * - * Read @size bytes from the region @nr at offset @off into the buffer - * @data. + * Read part of a region. + * + * @vdev: #VFIODevice to use + * @nr: region index + * @off: offset within the region + * @size: size in bytes to read + * @data: buffer to read into + * + * Returns number of bytes read on success or -errno. */ int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size, void *data); @@ -201,11 +235,19 @@ struct VFIODeviceIOOps { /** * @region_write * - * Write @size bytes to the region @nr at offset @off from the buffer - * @data. + * Write part of a region. + * + * @vdev: #VFIODevice to use + * @nr: region index + * @off: offset within the region + * @size: size in bytes to write + * @data: buffer to write from + * @post: true if this is a posted write + * + * Returns number of bytes write on success or -errno. */ int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size, - void *data); + void *data, bool post); }; void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer, @@ -217,6 +259,18 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index, struct vfio_region_info **info); int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type, uint32_t subtype, struct vfio_region_info **info); + +/** + * Return the fd for mapping this region. This is either the device's fd (for + * e.g. kernel vfio), or a per-region fd (for vfio-user). + * + * @vbasedev: #VFIODevice to use + * @index: region index + * + * Returns the fd. + */ +int vfio_device_get_region_fd(VFIODevice *vbasedev, int index); + bool vfio_device_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type); int vfio_device_get_irq_info(VFIODevice *vbasedev, int index, diff --git a/include/hw/vfio/vfio-region.h b/include/hw/vfio/vfio-region.h index cbffb26..ede6e0c 100644 --- a/include/hw/vfio/vfio-region.h +++ b/include/hw/vfio/vfio-region.h @@ -29,6 +29,7 @@ typedef struct VFIORegion { uint32_t nr_mmaps; VFIOMmap *mmaps; uint8_t nr; /* cache the region number for debug */ + bool post_wr; /* writes can be posted */ } VFIORegion; diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 0a9575b..449bf5c 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -43,7 +43,21 @@ typedef struct vhost_vdpa_shared { struct vhost_vdpa_iova_range iova_range; QLIST_HEAD(, vdpa_iommu) iommu_list; - /* IOVA mapping used by the Shadow Virtqueue */ + /* + * IOVA mapping used by the Shadow Virtqueue + * + * It is shared among all ASID for simplicity, whether CVQ shares ASID with + * guest or not: + * - Memory listener need access to guest's memory addresses allocated in + * the IOVA tree. + * - There should be plenty of IOVA address space for both ASID not to + * worry about collisions between them. Guest's translations are still + * validated with virtio virtqueue_pop so there is no risk for the guest + * to access memory that it shouldn't. + * + * To allocate a iova tree per ASID is doable but it complicates the code + * and it is not worth it for the moment. + */ VhostIOVATree *iova_tree; /* Copy of backend features */ @@ -51,6 +65,12 @@ typedef struct vhost_vdpa_shared { bool iotlb_batch_begin_sent; + /* + * The memory listener has been registered, so DMA maps have been sent to + * the device. + */ + bool listener_registered; + /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */ bool shadow_data; diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h index bc4f787..e0ab31b 100644 --- a/include/hw/virtio/virtio-mem.h +++ b/include/hw/virtio/virtio-mem.h @@ -134,7 +134,7 @@ struct VirtioMemSystemReset { struct VirtIOMEMClass { /* private */ - VirtIODevice parent; + VirtioDeviceClass parent_class; /* public */ void (*fill_device_info)(const VirtIOMEM *vmen, VirtioMEMDeviceInfo *vi); diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h index 1dbc385..eab5394 100644 --- a/include/hw/virtio/virtio-pci.h +++ b/include/hw/virtio/virtio-pci.h @@ -32,9 +32,7 @@ DECLARE_OBJ_CHECKERS(VirtioPCIBusState, VirtioPCIBusClass, enum { VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, - VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, - VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, VIRTIO_PCI_FLAG_ATS_BIT, VIRTIO_PCI_FLAG_INIT_DEVERR_BIT, @@ -54,12 +52,6 @@ enum { * vcpu thread using ioeventfd for some devices. */ #define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT) -/* virtio version flags */ -#define VIRTIO_PCI_FLAG_DISABLE_PCIE (1 << VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT) - -/* migrate extra state */ -#define VIRTIO_PCI_FLAG_MIGRATE_EXTRA (1 << VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT) - /* have pio notification for modern device ? */ #define VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY \ (1 << VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT) diff --git a/include/hw/virtio/virtio-pmem.h b/include/hw/virtio/virtio-pmem.h index fc4fd1f..9cce600 100644 --- a/include/hw/virtio/virtio-pmem.h +++ b/include/hw/virtio/virtio-pmem.h @@ -36,7 +36,7 @@ struct VirtIOPMEM { struct VirtIOPMEMClass { /* private */ - VirtIODevice parent; + VirtioDeviceClass parent_class; /* public */ void (*fill_device_info)(const VirtIOPMEM *pmem, VirtioPMEMDeviceInfo *vi); |