diff options
author | Stefan Hajnoczi <stefanha@redhat.com> | 2025-03-05 21:56:46 +0800 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2025-03-05 21:56:46 +0800 |
commit | 9ee727802012ddb32e193d84052a44e382088277 (patch) | |
tree | 1f0b568403bff2387dcd63c38759a5ab66497292 /hw | |
parent | f5e6e13124440797308d2c044f44d9e655fcb74d (diff) | |
parent | 4db19d5b21e058e6eb3474b6be470d1184afaa9e (diff) | |
download | qemu-9ee727802012ddb32e193d84052a44e382088277.zip qemu-9ee727802012ddb32e193d84052a44e382088277.tar.gz qemu-9ee727802012ddb32e193d84052a44e382088277.tar.bz2 |
Merge tag 'pull-riscv-to-apply-20250305-1' of https://github.com/alistair23/qemu into staging
Third RISC-V PR for 10.0
* CSR coverity fixes
* Fix unexpected behavior of vector reduction instructions when vl is 0
* Fix incorrect vlen comparison in prop_vlen_set
* Throw debug exception before page fault
* Remove redundant "hart_idx" masking from APLIC
* Add support for Control Transfer Records Ext
* Remove redundant struct members from the IOMMU
* Remove duplicate definitions from the IOMMU
* Fix tick_offset migration for Goldfish RTC
* Add serial alias in virt machine DTB
* Remove Bin Meng from RISC-V maintainers
* Add support for Control Transfer Records Ext
* Log guest errors when reserved bits are set in PTEs
* Add missing Sdtrig disas CSRs
* Correct the hpmevent sscofpmf mask
* Mask upper sscofpmf bits during validation
* Remove warnings about Smdbltrp/Smrnmi being disabled
* Respect mseccfg.RLB bit for TOR mode PMP entry
* Update KVM support to Linux 6.14-rc3
* IOMMU HPM support
* Support Sscofpmf/Svade/Svadu/Smnpm/Ssnpm extensions in KVM
* Add --ignore-family option to binfmt
* Refinement for AIA with KVM acceleration
* Reset time changes for KVM
# -----BEGIN PGP SIGNATURE-----
#
# iQIzBAABCAAdFiEEaukCtqfKh31tZZKWr3yVEwxTgBMFAmfHrkEACgkQr3yVEwxT
# gBNGTA/+N9nBPZt5cv0E/0EDZMQS8RQrQvz1yHRgAXOq8RnOdcL72v8wovGAfnVu
# l0BXDoVBvw4f2Xm9Q4ptlfH8HAefCeQ4E/K9j5Lwxr8OqZHFg6e+JQIyZOt6wBWI
# hJbz1/laJIbXq3cGgwcE/l0aGfb2UAAsA4dsZVt/MnjAV8GS7BF9RCkgCPxD4FZA
# 0PLiq9dF+4o4q7PxnxAbUVz/uhLzqmcnQemQFHbf9Wms3tZEDKmPSoKP/v+01Rkw
# tm+cgy7OocpgygbMc0nykYG50P+raUBSesk/jFGeKj8cU4IeMuzDsVPWcd4rG+0X
# Z+nENfOY7vOqMCXgaQCW2r4vEQx2Gj0yQG6xmVAemRWzFHJdz5W01/uUSHzJSB+L
# +VbAH55HYKr6sbgecqInQ/rsHKyw6D5QFcj/guz+kvhsH9rJ5q60uywrWL5OEuaK
# vKv7cSZghlf9bwy6soassXxk8z+j4psJ7WnnVpynNKMew9yFFDhayuIFbo9952gH
# 3+NCm2cQrkTYJOXAJwkxBD+I4AXxNSuxNjaVANk9q80uqbT9JiHM7pcvbJI00Fji
# OutJSPYtVXEin9Ev3sJ05YQHsIcZ/Noi3O5IdaRI0AMk/8gyGyhFCVgSpV52dH59
# HguPK05e5cW/xgElGUPHrU+UtzE05p18HnSoVPclF/B5rc8QXN0=
# =dobk
# -----END PGP SIGNATURE-----
# gpg: Signature made Wed 05 Mar 2025 09:52:01 HKT
# gpg: using RSA key 6AE902B6A7CA877D6D659296AF7C95130C538013
# gpg: Good signature from "Alistair Francis <alistair@alistair23.me>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg: There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 6AE9 02B6 A7CA 877D 6D65 9296 AF7C 9513 0C53 8013
* tag 'pull-riscv-to-apply-20250305-1' of https://github.com/alistair23/qemu: (59 commits)
target/riscv/kvm: add missing KVM CSRs
target/riscv/kvm: add kvm_riscv_reset_regs_csr()
target/riscv/cpu: remove unneeded !kvm_enabled() check
hw/intc/aplic: refine kvm_msicfgaddr
hw/intc/aplic: refine the APLIC realize
hw/intc/imsic: refine the IMSIC realize
binfmt: Add --ignore-family option
binfmt: Normalize host CPU architecture
binfmt: Shuffle things around
target/riscv/kvm: Add some exts support
docs/specs/riscv-iommu.rst: add HPM support info
hw/riscv: add IOMMU HPM trace events
hw/riscv/riscv-iommu.c: add RISCV_IOMMU_CAP_HPM cap
hw/riscv/riscv-iommu: add hpm events mmio write
hw/riscv/riscv-iommu: add IOHPMCYCLES mmio write
hw/riscv/riscv-iommu: add IOCOUNTINH mmio writes
hw/riscv/riscv-iommu: instantiate hpm_timer
hw/riscv/riscv-iommu: add riscv_iommu_hpm_incr_ctr()
hw/riscv/riscv-iommu: add riscv-iommu-hpm file
hw/riscv/riscv-iommu-bits.h: HPM bits
...
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw')
-rw-r--r-- | hw/intc/riscv_aplic.c | 74 | ||||
-rw-r--r-- | hw/intc/riscv_imsic.c | 47 | ||||
-rw-r--r-- | hw/riscv/meson.build | 3 | ||||
-rw-r--r-- | hw/riscv/riscv-iommu-bits.h | 69 | ||||
-rw-r--r-- | hw/riscv/riscv-iommu-hpm.c | 381 | ||||
-rw-r--r-- | hw/riscv/riscv-iommu-hpm.h | 33 | ||||
-rw-r--r-- | hw/riscv/riscv-iommu.c | 131 | ||||
-rw-r--r-- | hw/riscv/riscv-iommu.h | 32 | ||||
-rw-r--r-- | hw/riscv/trace-events | 5 | ||||
-rw-r--r-- | hw/riscv/virt.c | 3 | ||||
-rw-r--r-- | hw/rtc/goldfish_rtc.c | 43 |
11 files changed, 697 insertions, 124 deletions
diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c index 4866649..5964cde 100644 --- a/hw/intc/riscv_aplic.c +++ b/hw/intc/riscv_aplic.c @@ -181,8 +181,10 @@ void riscv_aplic_set_kvm_msicfgaddr(RISCVAPLICState *aplic, hwaddr addr) { #ifdef CONFIG_KVM if (riscv_use_emulated_aplic(aplic->msimode)) { + addr >>= APLIC_xMSICFGADDR_PPN_SHIFT; aplic->kvm_msicfgaddr = extract64(addr, 0, 32); - aplic->kvm_msicfgaddrH = extract64(addr, 32, 32); + aplic->kvm_msicfgaddrH = extract64(addr, 32, 32) & + APLIC_xMSICFGADDRH_VALID_MASK; } #endif } @@ -403,12 +405,17 @@ static void riscv_aplic_msi_send(RISCVAPLICState *aplic, } } - if (aplic->mmode) { - msicfgaddr = aplic_m->mmsicfgaddr; - msicfgaddrH = aplic_m->mmsicfgaddrH; + if (aplic->kvm_splitmode) { + msicfgaddr = aplic->kvm_msicfgaddr; + msicfgaddrH = ((uint64_t)aplic->kvm_msicfgaddrH << 32); } else { - msicfgaddr = aplic_m->smsicfgaddr; - msicfgaddrH = aplic_m->smsicfgaddrH; + if (aplic->mmode) { + msicfgaddr = aplic_m->mmsicfgaddr; + msicfgaddrH = aplic_m->mmsicfgaddrH; + } else { + msicfgaddr = aplic_m->smsicfgaddr; + msicfgaddrH = aplic_m->smsicfgaddrH; + } } lhxs = (msicfgaddrH >> APLIC_xMSICFGADDRH_LHXS_SHIFT) & @@ -421,7 +428,6 @@ static void riscv_aplic_msi_send(RISCVAPLICState *aplic, APLIC_xMSICFGADDRH_HHXW_MASK; group_idx = hart_idx >> lhxw; - hart_idx &= APLIC_xMSICFGADDR_PPN_LHX_MASK(lhxw); addr = msicfgaddr; addr |= ((uint64_t)(msicfgaddrH & APLIC_xMSICFGADDRH_BAPPN_MASK)) << 32; @@ -432,11 +438,6 @@ static void riscv_aplic_msi_send(RISCVAPLICState *aplic, addr |= (uint64_t)(guest_idx & APLIC_xMSICFGADDR_PPN_HART(lhxs)); addr <<= APLIC_xMSICFGADDR_PPN_SHIFT; - if (aplic->kvm_splitmode) { - addr |= aplic->kvm_msicfgaddr; - addr |= ((uint64_t)aplic->kvm_msicfgaddrH << 32); - } - address_space_stl_le(&address_space_memory, addr, eiid, MEMTXATTRS_UNSPECIFIED, &result); if (result != MEMTX_OK) { @@ -894,6 +895,26 @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp) RISCVAPLICState *aplic = RISCV_APLIC(dev); if (riscv_use_emulated_aplic(aplic->msimode)) { + /* Create output IRQ lines for non-MSI mode */ + if (!aplic->msimode) { + /* Claim the CPU interrupt to be triggered by this APLIC */ + for (i = 0; i < aplic->num_harts; i++) { + RISCVCPU *cpu; + + cpu = RISCV_CPU(cpu_by_arch_id(aplic->hartid_base + i)); + if (riscv_cpu_claim_interrupts(cpu, + (aplic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) { + error_report("%s already claimed", + (aplic->mmode) ? "MEIP" : "SEIP"); + exit(1); + } + } + + aplic->external_irqs = g_malloc(sizeof(qemu_irq) * + aplic->num_harts); + qdev_init_gpio_out(dev, aplic->external_irqs, aplic->num_harts); + } + aplic->bitfield_words = (aplic->num_irqs + 31) >> 5; aplic->sourcecfg = g_new0(uint32_t, aplic->num_irqs); aplic->state = g_new0(uint32_t, aplic->num_irqs); @@ -928,23 +949,6 @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp) } } - /* Create output IRQ lines for non-MSI mode */ - if (!aplic->msimode) { - aplic->external_irqs = g_malloc(sizeof(qemu_irq) * aplic->num_harts); - qdev_init_gpio_out(dev, aplic->external_irqs, aplic->num_harts); - - /* Claim the CPU interrupt to be triggered by this APLIC */ - for (i = 0; i < aplic->num_harts; i++) { - RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(aplic->hartid_base + i)); - if (riscv_cpu_claim_interrupts(cpu, - (aplic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) { - error_report("%s already claimed", - (aplic->mmode) ? "MEIP" : "SEIP"); - exit(1); - } - } - } - msi_nonbroken = true; } @@ -1068,15 +1072,15 @@ DeviceState *riscv_aplic_create(hwaddr addr, hwaddr size, if (riscv_use_emulated_aplic(msimode)) { sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); - } - if (!msimode) { - for (i = 0; i < num_harts; i++) { - CPUState *cpu = cpu_by_arch_id(hartid_base + i); + if (!msimode) { + for (i = 0; i < num_harts; i++) { + CPUState *cpu = cpu_by_arch_id(hartid_base + i); - qdev_connect_gpio_out_named(dev, NULL, i, - qdev_get_gpio_in(DEVICE(cpu), + qdev_connect_gpio_out_named(dev, NULL, i, + qdev_get_gpio_in(DEVICE(cpu), (mmode) ? IRQ_M_EXT : IRQ_S_EXT)); + } } } diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c index dc8162c..241b12f 100644 --- a/hw/intc/riscv_imsic.c +++ b/hw/intc/riscv_imsic.c @@ -349,7 +349,19 @@ static void riscv_imsic_realize(DeviceState *dev, Error **errp) CPUState *cpu = cpu_by_arch_id(imsic->hartid); CPURISCVState *env = cpu ? cpu_env(cpu) : NULL; + /* Claim the CPU interrupt to be triggered by this IMSIC */ + if (riscv_cpu_claim_interrupts(rcpu, + (imsic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) { + error_setg(errp, "%s already claimed", + (imsic->mmode) ? "MEIP" : "SEIP"); + return; + } + if (!kvm_irqchip_in_kernel()) { + /* Create output IRQ lines */ + imsic->external_irqs = g_malloc(sizeof(qemu_irq) * imsic->num_pages); + qdev_init_gpio_out(dev, imsic->external_irqs, imsic->num_pages); + imsic->num_eistate = imsic->num_pages * imsic->num_irqs; imsic->eidelivery = g_new0(uint32_t, imsic->num_pages); imsic->eithreshold = g_new0(uint32_t, imsic->num_pages); @@ -361,18 +373,6 @@ static void riscv_imsic_realize(DeviceState *dev, Error **errp) IMSIC_MMIO_SIZE(imsic->num_pages)); sysbus_init_mmio(SYS_BUS_DEVICE(dev), &imsic->mmio); - /* Claim the CPU interrupt to be triggered by this IMSIC */ - if (riscv_cpu_claim_interrupts(rcpu, - (imsic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) { - error_setg(errp, "%s already claimed", - (imsic->mmode) ? "MEIP" : "SEIP"); - return; - } - - /* Create output IRQ lines */ - imsic->external_irqs = g_malloc(sizeof(qemu_irq) * imsic->num_pages); - qdev_init_gpio_out(dev, imsic->external_irqs, imsic->num_pages); - /* Force select AIA feature and setup CSR read-modify-write callback */ if (env) { if (!imsic->mmode) { @@ -381,8 +381,11 @@ static void riscv_imsic_realize(DeviceState *dev, Error **errp) } else { rcpu->cfg.ext_smaia = true; } - riscv_cpu_set_aia_ireg_rmw_fn(env, (imsic->mmode) ? PRV_M : PRV_S, - riscv_imsic_rmw, imsic); + + if (!kvm_irqchip_in_kernel()) { + riscv_cpu_set_aia_ireg_rmw_fn(env, (imsic->mmode) ? PRV_M : PRV_S, + riscv_imsic_rmw, imsic); + } } msi_nonbroken = true; @@ -464,15 +467,17 @@ DeviceState *riscv_imsic_create(hwaddr addr, uint32_t hartid, bool mmode, sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); - for (i = 0; i < num_pages; i++) { - if (!i) { - qdev_connect_gpio_out_named(dev, NULL, i, - qdev_get_gpio_in(DEVICE(cpu), + if (!kvm_irqchip_in_kernel()) { + for (i = 0; i < num_pages; i++) { + if (!i) { + qdev_connect_gpio_out_named(dev, NULL, i, + qdev_get_gpio_in(DEVICE(cpu), (mmode) ? IRQ_M_EXT : IRQ_S_EXT)); - } else { - qdev_connect_gpio_out_named(dev, NULL, i, - qdev_get_gpio_in(DEVICE(cpu), + } else { + qdev_connect_gpio_out_named(dev, NULL, i, + qdev_get_gpio_in(DEVICE(cpu), IRQ_LOCAL_MAX + i - 1)); + } } } diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build index 3c7e083..c22f3a7 100644 --- a/hw/riscv/meson.build +++ b/hw/riscv/meson.build @@ -10,7 +10,8 @@ riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c')) riscv_ss.add(when: 'CONFIG_SPIKE', if_true: files('spike.c')) riscv_ss.add(when: 'CONFIG_MICROCHIP_PFSOC', if_true: files('microchip_pfsoc.c')) riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c')) -riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files('riscv-iommu.c', 'riscv-iommu-pci.c', 'riscv-iommu-sys.c')) +riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files( + 'riscv-iommu.c', 'riscv-iommu-pci.c', 'riscv-iommu-sys.c', 'riscv-iommu-hpm.c')) riscv_ss.add(when: 'CONFIG_MICROBLAZE_V', if_true: files('microblaze-v-generic.c')) hw_arch += {'riscv': riscv_ss} diff --git a/hw/riscv/riscv-iommu-bits.h b/hw/riscv/riscv-iommu-bits.h index 485f36b..b7cb1bc 100644 --- a/hw/riscv/riscv-iommu-bits.h +++ b/hw/riscv/riscv-iommu-bits.h @@ -50,8 +50,14 @@ struct riscv_iommu_pq_record { #define RISCV_IOMMU_PREQ_HDR_PRIV BIT_ULL(33) #define RISCV_IOMMU_PREQ_HDR_EXEC BIT_ULL(34) #define RISCV_IOMMU_PREQ_HDR_DID GENMASK_ULL(63, 40) + /* Payload fields */ +#define RISCV_IOMMU_PREQ_PAYLOAD_R BIT_ULL(0) +#define RISCV_IOMMU_PREQ_PAYLOAD_W BIT_ULL(1) +#define RISCV_IOMMU_PREQ_PAYLOAD_L BIT_ULL(2) #define RISCV_IOMMU_PREQ_PAYLOAD_M GENMASK_ULL(2, 0) +#define RISCV_IOMMU_PREQ_PRG_INDEX GENMASK_ULL(11, 3) +#define RISCV_IOMMU_PREQ_UADDR GENMASK_ULL(63, 12) /* Common field positions */ #define RISCV_IOMMU_PPN_FIELD GENMASK_ULL(53, 10) @@ -82,6 +88,7 @@ struct riscv_iommu_pq_record { #define RISCV_IOMMU_CAP_ATS BIT_ULL(25) #define RISCV_IOMMU_CAP_T2GPA BIT_ULL(26) #define RISCV_IOMMU_CAP_IGS GENMASK_ULL(29, 28) +#define RISCV_IOMMU_CAP_HPM BIT_ULL(30) #define RISCV_IOMMU_CAP_DBG BIT_ULL(31) #define RISCV_IOMMU_CAP_PAS GENMASK_ULL(37, 32) #define RISCV_IOMMU_CAP_PD8 BIT_ULL(38) @@ -191,6 +198,52 @@ enum { RISCV_IOMMU_INTR_COUNT }; +#define RISCV_IOMMU_IOCOUNT_NUM 31 + +/* 5.19 Performance monitoring counter overflow status (32bits) */ +#define RISCV_IOMMU_REG_IOCOUNTOVF 0x0058 +#define RISCV_IOMMU_IOCOUNTOVF_CY BIT(0) + +/* 5.20 Performance monitoring counter inhibits (32bits) */ +#define RISCV_IOMMU_REG_IOCOUNTINH 0x005C +#define RISCV_IOMMU_IOCOUNTINH_CY BIT(0) + +/* 5.21 Performance monitoring cycles counter (64bits) */ +#define RISCV_IOMMU_REG_IOHPMCYCLES 0x0060 +#define RISCV_IOMMU_IOHPMCYCLES_COUNTER GENMASK_ULL(62, 0) +#define RISCV_IOMMU_IOHPMCYCLES_OVF BIT_ULL(63) + +/* 5.22 Performance monitoring event counters (31 * 64bits) */ +#define RISCV_IOMMU_REG_IOHPMCTR_BASE 0x0068 +#define RISCV_IOMMU_REG_IOHPMCTR(_n) \ + (RISCV_IOMMU_REG_IOHPMCTR_BASE + (_n * 0x8)) + +/* 5.23 Performance monitoring event selectors (31 * 64bits) */ +#define RISCV_IOMMU_REG_IOHPMEVT_BASE 0x0160 +#define RISCV_IOMMU_REG_IOHPMEVT(_n) \ + (RISCV_IOMMU_REG_IOHPMEVT_BASE + (_n * 0x8)) +#define RISCV_IOMMU_IOHPMEVT_EVENT_ID GENMASK_ULL(14, 0) +#define RISCV_IOMMU_IOHPMEVT_DMASK BIT_ULL(15) +#define RISCV_IOMMU_IOHPMEVT_PID_PSCID GENMASK_ULL(35, 16) +#define RISCV_IOMMU_IOHPMEVT_DID_GSCID GENMASK_ULL(59, 36) +#define RISCV_IOMMU_IOHPMEVT_PV_PSCV BIT_ULL(60) +#define RISCV_IOMMU_IOHPMEVT_DV_GSCV BIT_ULL(61) +#define RISCV_IOMMU_IOHPMEVT_IDT BIT_ULL(62) +#define RISCV_IOMMU_IOHPMEVT_OF BIT_ULL(63) + +enum RISCV_IOMMU_HPMEVENT_id { + RISCV_IOMMU_HPMEVENT_INVALID = 0, + RISCV_IOMMU_HPMEVENT_URQ = 1, + RISCV_IOMMU_HPMEVENT_TRQ = 2, + RISCV_IOMMU_HPMEVENT_ATS_RQ = 3, + RISCV_IOMMU_HPMEVENT_TLB_MISS = 4, + RISCV_IOMMU_HPMEVENT_DD_WALK = 5, + RISCV_IOMMU_HPMEVENT_PD_WALK = 6, + RISCV_IOMMU_HPMEVENT_S_VS_WALKS = 7, + RISCV_IOMMU_HPMEVENT_G_WALKS = 8, + RISCV_IOMMU_HPMEVENT_MAX = 9 +}; + /* 5.24 Translation request IOVA (64bits) */ #define RISCV_IOMMU_REG_TR_REQ_IOVA 0x0258 @@ -382,22 +435,6 @@ enum riscv_iommu_fq_ttypes { RISCV_IOMMU_FW_TTYPE_PCIE_MSG_REQ = 9, }; -/* Header fields */ -#define RISCV_IOMMU_PREQ_HDR_PID GENMASK_ULL(31, 12) -#define RISCV_IOMMU_PREQ_HDR_PV BIT_ULL(32) -#define RISCV_IOMMU_PREQ_HDR_PRIV BIT_ULL(33) -#define RISCV_IOMMU_PREQ_HDR_EXEC BIT_ULL(34) -#define RISCV_IOMMU_PREQ_HDR_DID GENMASK_ULL(63, 40) - -/* Payload fields */ -#define RISCV_IOMMU_PREQ_PAYLOAD_R BIT_ULL(0) -#define RISCV_IOMMU_PREQ_PAYLOAD_W BIT_ULL(1) -#define RISCV_IOMMU_PREQ_PAYLOAD_L BIT_ULL(2) -#define RISCV_IOMMU_PREQ_PAYLOAD_M GENMASK_ULL(2, 0) -#define RISCV_IOMMU_PREQ_PRG_INDEX GENMASK_ULL(11, 3) -#define RISCV_IOMMU_PREQ_UADDR GENMASK_ULL(63, 12) - - /* * struct riscv_iommu_msi_pte - MSI Page Table Entry */ diff --git a/hw/riscv/riscv-iommu-hpm.c b/hw/riscv/riscv-iommu-hpm.c new file mode 100644 index 0000000..c5034bf --- /dev/null +++ b/hw/riscv/riscv-iommu-hpm.c @@ -0,0 +1,381 @@ +/* + * RISC-V IOMMU - Hardware Performance Monitor (HPM) helpers + * + * Copyright (C) 2022-2023 Rivos Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/timer.h" +#include "cpu_bits.h" +#include "riscv-iommu-hpm.h" +#include "riscv-iommu.h" +#include "riscv-iommu-bits.h" +#include "trace.h" + +/* For now we assume IOMMU HPM frequency to be 1GHz so 1-cycle is of 1-ns. */ +static inline uint64_t get_cycles(void) +{ + return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); +} + +uint64_t riscv_iommu_hpmcycle_read(RISCVIOMMUState *s) +{ + const uint64_t cycle = riscv_iommu_reg_get64( + s, RISCV_IOMMU_REG_IOHPMCYCLES); + const uint32_t inhibit = riscv_iommu_reg_get32( + s, RISCV_IOMMU_REG_IOCOUNTINH); + const uint64_t ctr_prev = s->hpmcycle_prev; + const uint64_t ctr_val = s->hpmcycle_val; + + trace_riscv_iommu_hpm_read(cycle, inhibit, ctr_prev, ctr_val); + + if (get_field(inhibit, RISCV_IOMMU_IOCOUNTINH_CY)) { + /* + * Counter should not increment if inhibit bit is set. We can't really + * stop the QEMU_CLOCK_VIRTUAL, so we just return the last updated + * counter value to indicate that counter was not incremented. + */ + return (ctr_val & RISCV_IOMMU_IOHPMCYCLES_COUNTER) | + (cycle & RISCV_IOMMU_IOHPMCYCLES_OVF); + } + + return (ctr_val + get_cycles() - ctr_prev) | + (cycle & RISCV_IOMMU_IOHPMCYCLES_OVF); +} + +static void hpm_incr_ctr(RISCVIOMMUState *s, uint32_t ctr_idx) +{ + const uint32_t off = ctr_idx << 3; + uint64_t cntr_val; + + cntr_val = ldq_le_p(&s->regs_rw[RISCV_IOMMU_REG_IOHPMCTR_BASE + off]); + stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_IOHPMCTR_BASE + off], cntr_val + 1); + + trace_riscv_iommu_hpm_incr_ctr(cntr_val); + + /* Handle the overflow scenario. */ + if (cntr_val == UINT64_MAX) { + /* + * Generate interrupt only if OF bit is clear. +1 to offset the cycle + * register OF bit. + */ + const uint32_t ovf = + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IOCOUNTOVF, + BIT(ctr_idx + 1), 0); + if (!get_field(ovf, BIT(ctr_idx + 1))) { + riscv_iommu_reg_mod64(s, + RISCV_IOMMU_REG_IOHPMEVT_BASE + off, + RISCV_IOMMU_IOHPMEVT_OF, + 0); + riscv_iommu_notify(s, RISCV_IOMMU_INTR_PM); + } + } +} + +void riscv_iommu_hpm_incr_ctr(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, + unsigned event_id) +{ + const uint32_t inhibit = riscv_iommu_reg_get32( + s, RISCV_IOMMU_REG_IOCOUNTINH); + uint32_t did_gscid; + uint32_t pid_pscid; + uint32_t ctr_idx; + gpointer value; + uint32_t ctrs; + uint64_t evt; + + if (!(s->cap & RISCV_IOMMU_CAP_HPM)) { + return; + } + + value = g_hash_table_lookup(s->hpm_event_ctr_map, + GUINT_TO_POINTER(event_id)); + if (value == NULL) { + return; + } + + for (ctrs = GPOINTER_TO_UINT(value); ctrs != 0; ctrs &= ctrs - 1) { + ctr_idx = ctz32(ctrs); + if (get_field(inhibit, BIT(ctr_idx + 1))) { + continue; + } + + evt = riscv_iommu_reg_get64(s, + RISCV_IOMMU_REG_IOHPMEVT_BASE + (ctr_idx << 3)); + + /* + * It's quite possible that event ID has been changed in counter + * but hashtable hasn't been updated yet. We don't want to increment + * counter for the old event ID. + */ + if (event_id != get_field(evt, RISCV_IOMMU_IOHPMEVT_EVENT_ID)) { + continue; + } + + if (get_field(evt, RISCV_IOMMU_IOHPMEVT_IDT)) { + did_gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID); + pid_pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID); + } else { + did_gscid = ctx->devid; + pid_pscid = ctx->process_id; + } + + if (get_field(evt, RISCV_IOMMU_IOHPMEVT_PV_PSCV)) { + /* + * If the transaction does not have a valid process_id, counter + * increments if device_id matches DID_GSCID. If the transaction + * has a valid process_id, counter increments if device_id + * matches DID_GSCID and process_id matches PID_PSCID. See + * IOMMU Specification, Chapter 5.23. Performance-monitoring + * event selector. + */ + if (ctx->process_id && + get_field(evt, RISCV_IOMMU_IOHPMEVT_PID_PSCID) != pid_pscid) { + continue; + } + } + + if (get_field(evt, RISCV_IOMMU_IOHPMEVT_DV_GSCV)) { + uint32_t mask = ~0; + + if (get_field(evt, RISCV_IOMMU_IOHPMEVT_DMASK)) { + /* + * 1001 1011 mask = GSCID + * 0000 0111 mask = mask ^ (mask + 1) + * 1111 1000 mask = ~mask; + */ + mask = get_field(evt, RISCV_IOMMU_IOHPMEVT_DID_GSCID); + mask = mask ^ (mask + 1); + mask = ~mask; + } + + if ((get_field(evt, RISCV_IOMMU_IOHPMEVT_DID_GSCID) & mask) != + (did_gscid & mask)) { + continue; + } + } + + hpm_incr_ctr(s, ctr_idx); + } +} + +/* Timer callback for cycle counter overflow. */ +void riscv_iommu_hpm_timer_cb(void *priv) +{ + RISCVIOMMUState *s = priv; + const uint32_t inhibit = riscv_iommu_reg_get32( + s, RISCV_IOMMU_REG_IOCOUNTINH); + uint32_t ovf; + + if (get_field(inhibit, RISCV_IOMMU_IOCOUNTINH_CY)) { + return; + } + + if (s->irq_overflow_left > 0) { + uint64_t irq_trigger_at = + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + s->irq_overflow_left; + timer_mod_anticipate_ns(s->hpm_timer, irq_trigger_at); + s->irq_overflow_left = 0; + return; + } + + ovf = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTOVF); + if (!get_field(ovf, RISCV_IOMMU_IOCOUNTOVF_CY)) { + /* + * We don't need to set hpmcycle_val to zero and update hpmcycle_prev to + * current clock value. The way we calculate iohpmcycs will overflow + * and return the correct value. This avoids the need to synchronize + * timer callback and write callback. + */ + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IOCOUNTOVF, + RISCV_IOMMU_IOCOUNTOVF_CY, 0); + riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_IOHPMCYCLES, + RISCV_IOMMU_IOHPMCYCLES_OVF, 0); + riscv_iommu_notify(s, RISCV_IOMMU_INTR_PM); + } +} + +static void hpm_setup_timer(RISCVIOMMUState *s, uint64_t value) +{ + const uint32_t inhibit = riscv_iommu_reg_get32( + s, RISCV_IOMMU_REG_IOCOUNTINH); + uint64_t overflow_at, overflow_ns; + + if (get_field(inhibit, RISCV_IOMMU_IOCOUNTINH_CY)) { + return; + } + + /* + * We are using INT64_MAX here instead to UINT64_MAX because cycle counter + * has 63-bit precision and INT64_MAX is the maximum it can store. + */ + if (value) { + overflow_ns = INT64_MAX - value + 1; + } else { + overflow_ns = INT64_MAX; + } + + overflow_at = (uint64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + overflow_ns; + + if (overflow_at > INT64_MAX) { + s->irq_overflow_left = overflow_at - INT64_MAX; + overflow_at = INT64_MAX; + } + + timer_mod_anticipate_ns(s->hpm_timer, overflow_at); +} + +/* Updates the internal cycle counter state when iocntinh:CY is changed. */ +void riscv_iommu_process_iocntinh_cy(RISCVIOMMUState *s, bool prev_cy_inh) +{ + const uint32_t inhibit = riscv_iommu_reg_get32( + s, RISCV_IOMMU_REG_IOCOUNTINH); + + /* We only need to process CY bit toggle. */ + if (!(inhibit ^ prev_cy_inh)) { + return; + } + + trace_riscv_iommu_hpm_iocntinh_cy(prev_cy_inh); + + if (!(inhibit & RISCV_IOMMU_IOCOUNTINH_CY)) { + /* + * Cycle counter is enabled. Just start the timer again and update + * the clock snapshot value to point to the current time to make + * sure iohpmcycles read is correct. + */ + s->hpmcycle_prev = get_cycles(); + hpm_setup_timer(s, s->hpmcycle_val); + } else { + /* + * Cycle counter is disabled. Stop the timer and update the cycle + * counter to record the current value which is last programmed + * value + the cycles passed so far. + */ + s->hpmcycle_val = s->hpmcycle_val + (get_cycles() - s->hpmcycle_prev); + timer_del(s->hpm_timer); + } +} + +void riscv_iommu_process_hpmcycle_write(RISCVIOMMUState *s) +{ + const uint64_t val = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_IOHPMCYCLES); + const uint32_t ovf = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTOVF); + + trace_riscv_iommu_hpm_cycle_write(ovf, val); + + /* + * Clear OF bit in IOCNTOVF if it's being cleared in IOHPMCYCLES register. + */ + if (get_field(ovf, RISCV_IOMMU_IOCOUNTOVF_CY) && + !get_field(val, RISCV_IOMMU_IOHPMCYCLES_OVF)) { + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IOCOUNTOVF, 0, + RISCV_IOMMU_IOCOUNTOVF_CY); + } + + s->hpmcycle_val = val & ~RISCV_IOMMU_IOHPMCYCLES_OVF; + s->hpmcycle_prev = get_cycles(); + hpm_setup_timer(s, s->hpmcycle_val); +} + +static inline bool check_valid_event_id(unsigned event_id) +{ + return event_id > RISCV_IOMMU_HPMEVENT_INVALID && + event_id < RISCV_IOMMU_HPMEVENT_MAX; +} + +static gboolean hpm_event_equal(gpointer key, gpointer value, gpointer udata) +{ + uint32_t *pair = udata; + + if (GPOINTER_TO_UINT(value) & (1 << pair[0])) { + pair[1] = GPOINTER_TO_UINT(key); + return true; + } + + return false; +} + +/* Caller must check ctr_idx against hpm_ctrs to see if its supported or not. */ +static void update_event_map(RISCVIOMMUState *s, uint64_t value, + uint32_t ctr_idx) +{ + unsigned event_id = get_field(value, RISCV_IOMMU_IOHPMEVT_EVENT_ID); + uint32_t pair[2] = { ctr_idx, RISCV_IOMMU_HPMEVENT_INVALID }; + uint32_t new_value = 1 << ctr_idx; + gpointer data; + + /* + * If EventID field is RISCV_IOMMU_HPMEVENT_INVALID + * remove the current mapping. + */ + if (event_id == RISCV_IOMMU_HPMEVENT_INVALID) { + data = g_hash_table_find(s->hpm_event_ctr_map, hpm_event_equal, pair); + + new_value = GPOINTER_TO_UINT(data) & ~(new_value); + if (new_value != 0) { + g_hash_table_replace(s->hpm_event_ctr_map, + GUINT_TO_POINTER(pair[1]), + GUINT_TO_POINTER(new_value)); + } else { + g_hash_table_remove(s->hpm_event_ctr_map, + GUINT_TO_POINTER(pair[1])); + } + + return; + } + + /* Update the counter mask if the event is already enabled. */ + if (g_hash_table_lookup_extended(s->hpm_event_ctr_map, + GUINT_TO_POINTER(event_id), + NULL, + &data)) { + new_value |= GPOINTER_TO_UINT(data); + } + + g_hash_table_insert(s->hpm_event_ctr_map, + GUINT_TO_POINTER(event_id), + GUINT_TO_POINTER(new_value)); +} + +void riscv_iommu_process_hpmevt_write(RISCVIOMMUState *s, uint32_t evt_reg) +{ + const uint32_t ctr_idx = (evt_reg - RISCV_IOMMU_REG_IOHPMEVT_BASE) >> 3; + const uint32_t ovf = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTOVF); + uint64_t val = riscv_iommu_reg_get64(s, evt_reg); + + if (ctr_idx >= s->hpm_cntrs) { + return; + } + + trace_riscv_iommu_hpm_evt_write(ctr_idx, ovf, val); + + /* Clear OF bit in IOCNTOVF if it's being cleared in IOHPMEVT register. */ + if (get_field(ovf, BIT(ctr_idx + 1)) && + !get_field(val, RISCV_IOMMU_IOHPMEVT_OF)) { + /* +1 to offset CYCLE register OF bit. */ + riscv_iommu_reg_mod32( + s, RISCV_IOMMU_REG_IOCOUNTOVF, 0, BIT(ctr_idx + 1)); + } + + if (!check_valid_event_id(get_field(val, RISCV_IOMMU_IOHPMEVT_EVENT_ID))) { + /* Reset EventID (WARL) field to invalid. */ + val = set_field(val, RISCV_IOMMU_IOHPMEVT_EVENT_ID, + RISCV_IOMMU_HPMEVENT_INVALID); + riscv_iommu_reg_set64(s, evt_reg, val); + } + + update_event_map(s, val, ctr_idx); +} diff --git a/hw/riscv/riscv-iommu-hpm.h b/hw/riscv/riscv-iommu-hpm.h new file mode 100644 index 0000000..5fc4ef2 --- /dev/null +++ b/hw/riscv/riscv-iommu-hpm.h @@ -0,0 +1,33 @@ +/* + * RISC-V IOMMU - Hardware Performance Monitor (HPM) helpers + * + * Copyright (C) 2022-2023 Rivos Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef HW_RISCV_IOMMU_HPM_H +#define HW_RISCV_IOMMU_HPM_H + +#include "qom/object.h" +#include "hw/riscv/riscv-iommu.h" + +uint64_t riscv_iommu_hpmcycle_read(RISCVIOMMUState *s); +void riscv_iommu_hpm_incr_ctr(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, + unsigned event_id); +void riscv_iommu_hpm_timer_cb(void *priv); +void riscv_iommu_process_iocntinh_cy(RISCVIOMMUState *s, bool prev_cy_inh); +void riscv_iommu_process_hpmcycle_write(RISCVIOMMUState *s); +void riscv_iommu_process_hpmevt_write(RISCVIOMMUState *s, uint32_t evt_reg); + +#endif diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c index e7568ca..d46beb2 100644 --- a/hw/riscv/riscv-iommu.c +++ b/hw/riscv/riscv-iommu.c @@ -29,6 +29,7 @@ #include "cpu_bits.h" #include "riscv-iommu.h" #include "riscv-iommu-bits.h" +#include "riscv-iommu-hpm.h" #include "trace.h" #define LIMIT_CACHE_CTX (1U << 7) @@ -38,7 +39,6 @@ #define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS) #define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS) -typedef struct RISCVIOMMUContext RISCVIOMMUContext; typedef struct RISCVIOMMUEntry RISCVIOMMUEntry; /* Device assigned I/O address space */ @@ -51,19 +51,6 @@ struct RISCVIOMMUSpace { QLIST_ENTRY(RISCVIOMMUSpace) list; }; -/* Device translation context state. */ -struct RISCVIOMMUContext { - uint64_t devid:24; /* Requester Id, AKA device_id */ - uint64_t process_id:20; /* Process ID. PASID for PCIe */ - uint64_t tc; /* Translation Control */ - uint64_t ta; /* Translation Attributes */ - uint64_t satp; /* S-Stage address translation and protection */ - uint64_t gatp; /* G-Stage address translation and protection */ - uint64_t msi_addr_mask; /* MSI filtering - address mask */ - uint64_t msi_addr_pattern; /* MSI filtering - address pattern */ - uint64_t msiptp; /* MSI redirection page table pointer */ -}; - typedef enum RISCVIOMMUTransTag { RISCV_IOMMU_TRANS_TAG_BY, /* Bypass */ RISCV_IOMMU_TRANS_TAG_SS, /* Single Stage */ @@ -100,7 +87,7 @@ static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type) } } -static void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type) +void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type) { uint32_t ipsr, icvec, vector; @@ -422,6 +409,13 @@ static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, } } + + if (pass == S_STAGE) { + riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_S_VS_WALKS); + } else { + riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_G_WALKS); + } + /* Read page table entry */ if (sc[pass].ptesize == 4) { uint32_t pte32 = 0; @@ -940,6 +934,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) /* Device directory tree walk */ for (; depth-- > 0; ) { + riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK); /* * Select device id index bits based on device directory tree level * and device context format. @@ -967,6 +962,8 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN)); } + riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK); + /* index into device context entry page */ addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK; @@ -1032,6 +1029,8 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) } for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) { + riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK); + /* * Select process id index bits based on process directory tree * level. See IOMMU Specification, 2.2. Process-Directory-Table. @@ -1049,6 +1048,8 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PC_FSC_PPN)); } + riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK); + /* Leaf entry in PDT */ addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK; if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2, @@ -1418,6 +1419,8 @@ static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, GHashTable *iot_cache; int fault; + riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_URQ); + iot_cache = g_hash_table_ref(s->iot_cache); /* * TC[32] is reserved for custom extensions, used here to temporarily @@ -1428,6 +1431,7 @@ static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, /* Check for ATS request. */ if (iotlb->perm == IOMMU_NONE) { + riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_ATS_RQ); /* Check if ATS is disabled. */ if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) { enable_pri = false; @@ -1446,6 +1450,8 @@ static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, goto done; } + riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_TLB_MISS); + /* Translate using device directory / page table information. */ fault = riscv_iommu_spa_fetch(s, ctx, iotlb); @@ -2018,6 +2024,27 @@ static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data) riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr); } +static void riscv_iommu_process_hpm_writes(RISCVIOMMUState *s, + uint32_t regb, + bool prev_cy_inh) +{ + switch (regb) { + case RISCV_IOMMU_REG_IOCOUNTINH: + riscv_iommu_process_iocntinh_cy(s, prev_cy_inh); + break; + + case RISCV_IOMMU_REG_IOHPMCYCLES: + case RISCV_IOMMU_REG_IOHPMCYCLES + 4: + riscv_iommu_process_hpmcycle_write(s); + break; + + case RISCV_IOMMU_REG_IOHPMEVT_BASE ... + RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4: + riscv_iommu_process_hpmevt_write(s, regb & ~7); + break; + } +} + /* * Write the resulting value of 'data' for the reg specified * by 'reg_addr', after considering read-only/read-write/write-clear @@ -2045,6 +2072,7 @@ static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr, uint32_t regb = addr & ~3; uint32_t busy = 0; uint64_t val = 0; + bool cy_inh = false; if ((addr & (size - 1)) != 0) { /* Unsupported MMIO alignment or access size */ @@ -2112,6 +2140,16 @@ static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr, busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY; break; + case RISCV_IOMMU_REG_IOCOUNTINH: + if (addr != RISCV_IOMMU_REG_IOCOUNTINH) { + break; + } + /* Store previous value of CY bit. */ + cy_inh = !!(riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTINH) & + RISCV_IOMMU_IOCOUNTINH_CY); + break; + + default: break; } @@ -2130,6 +2168,12 @@ static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr, stl_le_p(&s->regs_rw[regb], rw | busy); } + /* Process HPM writes and update any internal state if needed. */ + if (regb >= RISCV_IOMMU_REG_IOCOUNTOVF && + regb <= (RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4)) { + riscv_iommu_process_hpm_writes(s, regb, cy_inh); + } + if (process_fn) { process_fn(s); } @@ -2153,7 +2197,28 @@ static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr, return MEMTX_ACCESS_ERROR; } - ptr = &s->regs_rw[addr]; + /* Compute cycle register value. */ + if ((addr & ~7) == RISCV_IOMMU_REG_IOHPMCYCLES) { + val = riscv_iommu_hpmcycle_read(s); + ptr = (uint8_t *)&val + (addr & 7); + } else if ((addr & ~3) == RISCV_IOMMU_REG_IOCOUNTOVF) { + /* + * Software can read RISCV_IOMMU_REG_IOCOUNTOVF before timer + * callback completes. In which case CY_OF bit in + * RISCV_IOMMU_IOHPMCYCLES_OVF would be 0. Here we take the + * CY_OF bit state from RISCV_IOMMU_REG_IOHPMCYCLES register as + * it's not dependent over the timer callback and is computed + * from cycle overflow. + */ + val = ldq_le_p(&s->regs_rw[addr]); + val |= (riscv_iommu_hpmcycle_read(s) & RISCV_IOMMU_IOHPMCYCLES_OVF) + ? RISCV_IOMMU_IOCOUNTOVF_CY + : 0; + ptr = (uint8_t *)&val + (addr & 3); + } else { + ptr = &s->regs_rw[addr]; + } + val = ldn_le_p(ptr, size); *data = val; @@ -2292,6 +2357,15 @@ static void riscv_iommu_realize(DeviceState *dev, Error **errp) RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4; } + if (s->hpm_cntrs > 0) { + /* Clip number of HPM counters to maximum supported (31). */ + if (s->hpm_cntrs > RISCV_IOMMU_IOCOUNT_NUM) { + s->hpm_cntrs = RISCV_IOMMU_IOCOUNT_NUM; + } + /* Enable hardware performance monitor interface */ + s->cap |= RISCV_IOMMU_CAP_HPM; + } + /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */ s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ? RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE); @@ -2339,6 +2413,18 @@ static void riscv_iommu_realize(DeviceState *dev, Error **errp) RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); } + /* If HPM registers are enabled. */ + if (s->cap & RISCV_IOMMU_CAP_HPM) { + /* +1 for cycle counter bit. */ + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOCOUNTINH], + ~((2 << s->hpm_cntrs) - 1)); + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCYCLES], 0); + memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCTR_BASE], + 0x00, s->hpm_cntrs * 8); + memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMEVT_BASE], + 0x00, s->hpm_cntrs * 8); + } + /* Memory region for downstream access, if specified. */ if (s->target_mr) { s->target_as = g_new0(AddressSpace, 1); @@ -2353,6 +2439,12 @@ static void riscv_iommu_realize(DeviceState *dev, Error **errp) memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s, "riscv-iommu-trap", ~0ULL); address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as"); + + if (s->cap & RISCV_IOMMU_CAP_HPM) { + s->hpm_timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, riscv_iommu_hpm_timer_cb, s); + s->hpm_event_ctr_map = g_hash_table_new(g_direct_hash, g_direct_equal); + } } static void riscv_iommu_unrealize(DeviceState *dev) @@ -2361,6 +2453,11 @@ static void riscv_iommu_unrealize(DeviceState *dev) g_hash_table_unref(s->iot_cache); g_hash_table_unref(s->ctx_cache); + + if (s->cap & RISCV_IOMMU_CAP_HPM) { + g_hash_table_unref(s->hpm_event_ctr_map); + timer_free(s->hpm_timer); + } } void riscv_iommu_reset(RISCVIOMMUState *s) @@ -2411,6 +2508,8 @@ static const Property riscv_iommu_properties[] = { DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE), DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr, TYPE_MEMORY_REGION, MemoryRegion *), + DEFINE_PROP_UINT8("hpm-counters", RISCVIOMMUState, hpm_cntrs, + RISCV_IOMMU_IOCOUNT_NUM), }; static void riscv_iommu_class_init(ObjectClass *klass, void* data) diff --git a/hw/riscv/riscv-iommu.h b/hw/riscv/riscv-iommu.h index 9424989..a31aa62 100644 --- a/hw/riscv/riscv-iommu.h +++ b/hw/riscv/riscv-iommu.h @@ -20,6 +20,8 @@ #define HW_RISCV_IOMMU_STATE_H #include "qom/object.h" +#include "hw/qdev-properties.h" +#include "system/dma.h" #include "hw/riscv/iommu.h" #include "hw/riscv/riscv-iommu-bits.h" @@ -58,11 +60,6 @@ struct RISCVIOMMUState { /* interrupt notifier */ void (*notify)(RISCVIOMMUState *iommu, unsigned vector); - /* IOMMU State Machine */ - QemuThread core_proc; /* Background processing thread */ - QemuCond core_cond; /* Background processing wake up signal */ - unsigned core_exec; /* Processing thread execution actions */ - /* IOMMU target address space */ AddressSpace *target_as; MemoryRegion *target_mr; @@ -84,12 +81,37 @@ struct RISCVIOMMUState { QLIST_ENTRY(RISCVIOMMUState) iommus; QLIST_HEAD(, RISCVIOMMUSpace) spaces; + + /* HPM cycle counter */ + QEMUTimer *hpm_timer; + uint64_t hpmcycle_val; /* Current value of cycle register */ + uint64_t hpmcycle_prev; /* Saved value of QEMU_CLOCK_VIRTUAL clock */ + uint64_t irq_overflow_left; /* Value beyond INT64_MAX after overflow */ + + /* HPM event counters */ + GHashTable *hpm_event_ctr_map; /* Mapping of events to counters */ + uint8_t hpm_cntrs; }; void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus, Error **errp); void riscv_iommu_set_cap_igs(RISCVIOMMUState *s, riscv_iommu_igs_mode mode); void riscv_iommu_reset(RISCVIOMMUState *s); +void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type); + +typedef struct RISCVIOMMUContext RISCVIOMMUContext; +/* Device translation context state. */ +struct RISCVIOMMUContext { + uint64_t devid:24; /* Requester Id, AKA device_id */ + uint64_t process_id:20; /* Process ID. PASID for PCIe */ + uint64_t tc; /* Translation Control */ + uint64_t ta; /* Translation Attributes */ + uint64_t satp; /* S-Stage address translation and protection */ + uint64_t gatp; /* G-Stage address translation and protection */ + uint64_t msi_addr_mask; /* MSI filtering - address mask */ + uint64_t msi_addr_pattern; /* MSI filtering - address pattern */ + uint64_t msiptp; /* MSI redirection page table pointer */ +}; /* private helpers */ diff --git a/hw/riscv/trace-events b/hw/riscv/trace-events index 7bcbb03..b50b14a 100644 --- a/hw/riscv/trace-events +++ b/hw/riscv/trace-events @@ -19,3 +19,8 @@ riscv_iommu_sys_irq_sent(uint32_t vector) "IRQ sent to vector %u" riscv_iommu_sys_msi_sent(uint32_t vector, uint64_t msi_addr, uint32_t msi_data, uint32_t result) "MSI sent to vector %u msi_addr 0x%"PRIx64" msi_data 0x%x result %u" riscv_iommu_sys_reset_hold(int reset_type) "reset type %d" riscv_iommu_pci_reset_hold(int reset_type) "reset type %d" +riscv_iommu_hpm_read(uint64_t cycle, uint32_t inhibit, uint64_t ctr_prev, uint64_t ctr_val) "cycle 0x%"PRIx64" inhibit 0x%x ctr_prev 0x%"PRIx64" ctr_val 0x%"PRIx64 +riscv_iommu_hpm_incr_ctr(uint64_t cntr_val) "cntr_val 0x%"PRIx64 +riscv_iommu_hpm_iocntinh_cy(bool prev_cy_inh) "prev_cy_inh %d" +riscv_iommu_hpm_cycle_write(uint32_t ovf, uint64_t val) "ovf 0x%x val 0x%"PRIx64 +riscv_iommu_hpm_evt_write(uint32_t ctr_idx, uint32_t ovf, uint64_t val) "ctr_idx 0x%x ovf 0x%x val 0x%"PRIx64 diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index 241389d..dae46f4 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -971,6 +971,7 @@ static void create_fdt_uart(RISCVVirtState *s, const MemMapEntry *memmap, } qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", name); + qemu_fdt_setprop_string(ms->fdt, "/aliases", "serial0", name); } static void create_fdt_rtc(RISCVVirtState *s, const MemMapEntry *memmap, @@ -1180,6 +1181,8 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap) qemu_fdt_setprop(ms->fdt, "/chosen", "rng-seed", rng_seed, sizeof(rng_seed)); + qemu_fdt_add_subnode(ms->fdt, "/aliases"); + create_fdt_flash(s, memmap); create_fdt_fw_cfg(s, memmap); create_fdt_pmu(s); diff --git a/hw/rtc/goldfish_rtc.c b/hw/rtc/goldfish_rtc.c index fa1d905..0f1b53e 100644 --- a/hw/rtc/goldfish_rtc.c +++ b/hw/rtc/goldfish_rtc.c @@ -178,38 +178,21 @@ static void goldfish_rtc_write(void *opaque, hwaddr offset, trace_goldfish_rtc_write(offset, value); } -static int goldfish_rtc_pre_save(void *opaque) -{ - uint64_t delta; - GoldfishRTCState *s = opaque; - - /* - * We want to migrate this offset, which sounds straightforward. - * Unfortunately, we cannot directly pass tick_offset because - * rtc_clock on destination Host might not be same source Host. - * - * To tackle, this we pass tick_offset relative to vm_clock from - * source Host and make it relative to rtc_clock at destination Host. - */ - delta = qemu_clock_get_ns(rtc_clock) - - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - s->tick_offset_vmstate = s->tick_offset + delta; - - return 0; -} - static int goldfish_rtc_post_load(void *opaque, int version_id) { - uint64_t delta; GoldfishRTCState *s = opaque; - /* - * We extract tick_offset from tick_offset_vmstate by doing - * reverse math compared to pre_save() function. - */ - delta = qemu_clock_get_ns(rtc_clock) - - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - s->tick_offset = s->tick_offset_vmstate - delta; + if (version_id < 3) { + /* + * Previous versions didn't migrate tick_offset directly. Instead, they + * migrated tick_offset_vmstate, which is a recalculation based on + * QEMU_CLOCK_VIRTUAL. We use tick_offset_vmstate when migrating from + * older versions. + */ + uint64_t delta = qemu_clock_get_ns(rtc_clock) - + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + s->tick_offset = s->tick_offset_vmstate - delta; + } goldfish_rtc_set_alarm(s); @@ -239,8 +222,7 @@ static const MemoryRegionOps goldfish_rtc_ops[2] = { static const VMStateDescription goldfish_rtc_vmstate = { .name = TYPE_GOLDFISH_RTC, - .version_id = 2, - .pre_save = goldfish_rtc_pre_save, + .version_id = 3, .post_load = goldfish_rtc_post_load, .fields = (const VMStateField[]) { VMSTATE_UINT64(tick_offset_vmstate, GoldfishRTCState), @@ -249,6 +231,7 @@ static const VMStateDescription goldfish_rtc_vmstate = { VMSTATE_UINT32(irq_pending, GoldfishRTCState), VMSTATE_UINT32(irq_enabled, GoldfishRTCState), VMSTATE_UINT32(time_high, GoldfishRTCState), + VMSTATE_UINT64_V(tick_offset, GoldfishRTCState, 3), VMSTATE_END_OF_LIST() } }; |