aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2025-03-05 21:56:46 +0800
committerStefan Hajnoczi <stefanha@redhat.com>2025-03-05 21:56:46 +0800
commit9ee727802012ddb32e193d84052a44e382088277 (patch)
tree1f0b568403bff2387dcd63c38759a5ab66497292 /hw
parentf5e6e13124440797308d2c044f44d9e655fcb74d (diff)
parent4db19d5b21e058e6eb3474b6be470d1184afaa9e (diff)
downloadqemu-9ee727802012ddb32e193d84052a44e382088277.zip
qemu-9ee727802012ddb32e193d84052a44e382088277.tar.gz
qemu-9ee727802012ddb32e193d84052a44e382088277.tar.bz2
Merge tag 'pull-riscv-to-apply-20250305-1' of https://github.com/alistair23/qemu into staging
Third RISC-V PR for 10.0 * CSR coverity fixes * Fix unexpected behavior of vector reduction instructions when vl is 0 * Fix incorrect vlen comparison in prop_vlen_set * Throw debug exception before page fault * Remove redundant "hart_idx" masking from APLIC * Add support for Control Transfer Records Ext * Remove redundant struct members from the IOMMU * Remove duplicate definitions from the IOMMU * Fix tick_offset migration for Goldfish RTC * Add serial alias in virt machine DTB * Remove Bin Meng from RISC-V maintainers * Add support for Control Transfer Records Ext * Log guest errors when reserved bits are set in PTEs * Add missing Sdtrig disas CSRs * Correct the hpmevent sscofpmf mask * Mask upper sscofpmf bits during validation * Remove warnings about Smdbltrp/Smrnmi being disabled * Respect mseccfg.RLB bit for TOR mode PMP entry * Update KVM support to Linux 6.14-rc3 * IOMMU HPM support * Support Sscofpmf/Svade/Svadu/Smnpm/Ssnpm extensions in KVM * Add --ignore-family option to binfmt * Refinement for AIA with KVM acceleration * Reset time changes for KVM # -----BEGIN PGP SIGNATURE----- # # iQIzBAABCAAdFiEEaukCtqfKh31tZZKWr3yVEwxTgBMFAmfHrkEACgkQr3yVEwxT # gBNGTA/+N9nBPZt5cv0E/0EDZMQS8RQrQvz1yHRgAXOq8RnOdcL72v8wovGAfnVu # l0BXDoVBvw4f2Xm9Q4ptlfH8HAefCeQ4E/K9j5Lwxr8OqZHFg6e+JQIyZOt6wBWI # hJbz1/laJIbXq3cGgwcE/l0aGfb2UAAsA4dsZVt/MnjAV8GS7BF9RCkgCPxD4FZA # 0PLiq9dF+4o4q7PxnxAbUVz/uhLzqmcnQemQFHbf9Wms3tZEDKmPSoKP/v+01Rkw # tm+cgy7OocpgygbMc0nykYG50P+raUBSesk/jFGeKj8cU4IeMuzDsVPWcd4rG+0X # Z+nENfOY7vOqMCXgaQCW2r4vEQx2Gj0yQG6xmVAemRWzFHJdz5W01/uUSHzJSB+L # +VbAH55HYKr6sbgecqInQ/rsHKyw6D5QFcj/guz+kvhsH9rJ5q60uywrWL5OEuaK # vKv7cSZghlf9bwy6soassXxk8z+j4psJ7WnnVpynNKMew9yFFDhayuIFbo9952gH # 3+NCm2cQrkTYJOXAJwkxBD+I4AXxNSuxNjaVANk9q80uqbT9JiHM7pcvbJI00Fji # OutJSPYtVXEin9Ev3sJ05YQHsIcZ/Noi3O5IdaRI0AMk/8gyGyhFCVgSpV52dH59 # HguPK05e5cW/xgElGUPHrU+UtzE05p18HnSoVPclF/B5rc8QXN0= # =dobk # -----END PGP SIGNATURE----- # gpg: Signature made Wed 05 Mar 2025 09:52:01 HKT # gpg: using RSA key 6AE902B6A7CA877D6D659296AF7C95130C538013 # gpg: Good signature from "Alistair Francis <alistair@alistair23.me>" [unknown] # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 6AE9 02B6 A7CA 877D 6D65 9296 AF7C 9513 0C53 8013 * tag 'pull-riscv-to-apply-20250305-1' of https://github.com/alistair23/qemu: (59 commits) target/riscv/kvm: add missing KVM CSRs target/riscv/kvm: add kvm_riscv_reset_regs_csr() target/riscv/cpu: remove unneeded !kvm_enabled() check hw/intc/aplic: refine kvm_msicfgaddr hw/intc/aplic: refine the APLIC realize hw/intc/imsic: refine the IMSIC realize binfmt: Add --ignore-family option binfmt: Normalize host CPU architecture binfmt: Shuffle things around target/riscv/kvm: Add some exts support docs/specs/riscv-iommu.rst: add HPM support info hw/riscv: add IOMMU HPM trace events hw/riscv/riscv-iommu.c: add RISCV_IOMMU_CAP_HPM cap hw/riscv/riscv-iommu: add hpm events mmio write hw/riscv/riscv-iommu: add IOHPMCYCLES mmio write hw/riscv/riscv-iommu: add IOCOUNTINH mmio writes hw/riscv/riscv-iommu: instantiate hpm_timer hw/riscv/riscv-iommu: add riscv_iommu_hpm_incr_ctr() hw/riscv/riscv-iommu: add riscv-iommu-hpm file hw/riscv/riscv-iommu-bits.h: HPM bits ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw')
-rw-r--r--hw/intc/riscv_aplic.c74
-rw-r--r--hw/intc/riscv_imsic.c47
-rw-r--r--hw/riscv/meson.build3
-rw-r--r--hw/riscv/riscv-iommu-bits.h69
-rw-r--r--hw/riscv/riscv-iommu-hpm.c381
-rw-r--r--hw/riscv/riscv-iommu-hpm.h33
-rw-r--r--hw/riscv/riscv-iommu.c131
-rw-r--r--hw/riscv/riscv-iommu.h32
-rw-r--r--hw/riscv/trace-events5
-rw-r--r--hw/riscv/virt.c3
-rw-r--r--hw/rtc/goldfish_rtc.c43
11 files changed, 697 insertions, 124 deletions
diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c
index 4866649..5964cde 100644
--- a/hw/intc/riscv_aplic.c
+++ b/hw/intc/riscv_aplic.c
@@ -181,8 +181,10 @@ void riscv_aplic_set_kvm_msicfgaddr(RISCVAPLICState *aplic, hwaddr addr)
{
#ifdef CONFIG_KVM
if (riscv_use_emulated_aplic(aplic->msimode)) {
+ addr >>= APLIC_xMSICFGADDR_PPN_SHIFT;
aplic->kvm_msicfgaddr = extract64(addr, 0, 32);
- aplic->kvm_msicfgaddrH = extract64(addr, 32, 32);
+ aplic->kvm_msicfgaddrH = extract64(addr, 32, 32) &
+ APLIC_xMSICFGADDRH_VALID_MASK;
}
#endif
}
@@ -403,12 +405,17 @@ static void riscv_aplic_msi_send(RISCVAPLICState *aplic,
}
}
- if (aplic->mmode) {
- msicfgaddr = aplic_m->mmsicfgaddr;
- msicfgaddrH = aplic_m->mmsicfgaddrH;
+ if (aplic->kvm_splitmode) {
+ msicfgaddr = aplic->kvm_msicfgaddr;
+ msicfgaddrH = ((uint64_t)aplic->kvm_msicfgaddrH << 32);
} else {
- msicfgaddr = aplic_m->smsicfgaddr;
- msicfgaddrH = aplic_m->smsicfgaddrH;
+ if (aplic->mmode) {
+ msicfgaddr = aplic_m->mmsicfgaddr;
+ msicfgaddrH = aplic_m->mmsicfgaddrH;
+ } else {
+ msicfgaddr = aplic_m->smsicfgaddr;
+ msicfgaddrH = aplic_m->smsicfgaddrH;
+ }
}
lhxs = (msicfgaddrH >> APLIC_xMSICFGADDRH_LHXS_SHIFT) &
@@ -421,7 +428,6 @@ static void riscv_aplic_msi_send(RISCVAPLICState *aplic,
APLIC_xMSICFGADDRH_HHXW_MASK;
group_idx = hart_idx >> lhxw;
- hart_idx &= APLIC_xMSICFGADDR_PPN_LHX_MASK(lhxw);
addr = msicfgaddr;
addr |= ((uint64_t)(msicfgaddrH & APLIC_xMSICFGADDRH_BAPPN_MASK)) << 32;
@@ -432,11 +438,6 @@ static void riscv_aplic_msi_send(RISCVAPLICState *aplic,
addr |= (uint64_t)(guest_idx & APLIC_xMSICFGADDR_PPN_HART(lhxs));
addr <<= APLIC_xMSICFGADDR_PPN_SHIFT;
- if (aplic->kvm_splitmode) {
- addr |= aplic->kvm_msicfgaddr;
- addr |= ((uint64_t)aplic->kvm_msicfgaddrH << 32);
- }
-
address_space_stl_le(&address_space_memory, addr,
eiid, MEMTXATTRS_UNSPECIFIED, &result);
if (result != MEMTX_OK) {
@@ -894,6 +895,26 @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp)
RISCVAPLICState *aplic = RISCV_APLIC(dev);
if (riscv_use_emulated_aplic(aplic->msimode)) {
+ /* Create output IRQ lines for non-MSI mode */
+ if (!aplic->msimode) {
+ /* Claim the CPU interrupt to be triggered by this APLIC */
+ for (i = 0; i < aplic->num_harts; i++) {
+ RISCVCPU *cpu;
+
+ cpu = RISCV_CPU(cpu_by_arch_id(aplic->hartid_base + i));
+ if (riscv_cpu_claim_interrupts(cpu,
+ (aplic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) {
+ error_report("%s already claimed",
+ (aplic->mmode) ? "MEIP" : "SEIP");
+ exit(1);
+ }
+ }
+
+ aplic->external_irqs = g_malloc(sizeof(qemu_irq) *
+ aplic->num_harts);
+ qdev_init_gpio_out(dev, aplic->external_irqs, aplic->num_harts);
+ }
+
aplic->bitfield_words = (aplic->num_irqs + 31) >> 5;
aplic->sourcecfg = g_new0(uint32_t, aplic->num_irqs);
aplic->state = g_new0(uint32_t, aplic->num_irqs);
@@ -928,23 +949,6 @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp)
}
}
- /* Create output IRQ lines for non-MSI mode */
- if (!aplic->msimode) {
- aplic->external_irqs = g_malloc(sizeof(qemu_irq) * aplic->num_harts);
- qdev_init_gpio_out(dev, aplic->external_irqs, aplic->num_harts);
-
- /* Claim the CPU interrupt to be triggered by this APLIC */
- for (i = 0; i < aplic->num_harts; i++) {
- RISCVCPU *cpu = RISCV_CPU(cpu_by_arch_id(aplic->hartid_base + i));
- if (riscv_cpu_claim_interrupts(cpu,
- (aplic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) {
- error_report("%s already claimed",
- (aplic->mmode) ? "MEIP" : "SEIP");
- exit(1);
- }
- }
- }
-
msi_nonbroken = true;
}
@@ -1068,15 +1072,15 @@ DeviceState *riscv_aplic_create(hwaddr addr, hwaddr size,
if (riscv_use_emulated_aplic(msimode)) {
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
- }
- if (!msimode) {
- for (i = 0; i < num_harts; i++) {
- CPUState *cpu = cpu_by_arch_id(hartid_base + i);
+ if (!msimode) {
+ for (i = 0; i < num_harts; i++) {
+ CPUState *cpu = cpu_by_arch_id(hartid_base + i);
- qdev_connect_gpio_out_named(dev, NULL, i,
- qdev_get_gpio_in(DEVICE(cpu),
+ qdev_connect_gpio_out_named(dev, NULL, i,
+ qdev_get_gpio_in(DEVICE(cpu),
(mmode) ? IRQ_M_EXT : IRQ_S_EXT));
+ }
}
}
diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c
index dc8162c..241b12f 100644
--- a/hw/intc/riscv_imsic.c
+++ b/hw/intc/riscv_imsic.c
@@ -349,7 +349,19 @@ static void riscv_imsic_realize(DeviceState *dev, Error **errp)
CPUState *cpu = cpu_by_arch_id(imsic->hartid);
CPURISCVState *env = cpu ? cpu_env(cpu) : NULL;
+ /* Claim the CPU interrupt to be triggered by this IMSIC */
+ if (riscv_cpu_claim_interrupts(rcpu,
+ (imsic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) {
+ error_setg(errp, "%s already claimed",
+ (imsic->mmode) ? "MEIP" : "SEIP");
+ return;
+ }
+
if (!kvm_irqchip_in_kernel()) {
+ /* Create output IRQ lines */
+ imsic->external_irqs = g_malloc(sizeof(qemu_irq) * imsic->num_pages);
+ qdev_init_gpio_out(dev, imsic->external_irqs, imsic->num_pages);
+
imsic->num_eistate = imsic->num_pages * imsic->num_irqs;
imsic->eidelivery = g_new0(uint32_t, imsic->num_pages);
imsic->eithreshold = g_new0(uint32_t, imsic->num_pages);
@@ -361,18 +373,6 @@ static void riscv_imsic_realize(DeviceState *dev, Error **errp)
IMSIC_MMIO_SIZE(imsic->num_pages));
sysbus_init_mmio(SYS_BUS_DEVICE(dev), &imsic->mmio);
- /* Claim the CPU interrupt to be triggered by this IMSIC */
- if (riscv_cpu_claim_interrupts(rcpu,
- (imsic->mmode) ? MIP_MEIP : MIP_SEIP) < 0) {
- error_setg(errp, "%s already claimed",
- (imsic->mmode) ? "MEIP" : "SEIP");
- return;
- }
-
- /* Create output IRQ lines */
- imsic->external_irqs = g_malloc(sizeof(qemu_irq) * imsic->num_pages);
- qdev_init_gpio_out(dev, imsic->external_irqs, imsic->num_pages);
-
/* Force select AIA feature and setup CSR read-modify-write callback */
if (env) {
if (!imsic->mmode) {
@@ -381,8 +381,11 @@ static void riscv_imsic_realize(DeviceState *dev, Error **errp)
} else {
rcpu->cfg.ext_smaia = true;
}
- riscv_cpu_set_aia_ireg_rmw_fn(env, (imsic->mmode) ? PRV_M : PRV_S,
- riscv_imsic_rmw, imsic);
+
+ if (!kvm_irqchip_in_kernel()) {
+ riscv_cpu_set_aia_ireg_rmw_fn(env, (imsic->mmode) ? PRV_M : PRV_S,
+ riscv_imsic_rmw, imsic);
+ }
}
msi_nonbroken = true;
@@ -464,15 +467,17 @@ DeviceState *riscv_imsic_create(hwaddr addr, uint32_t hartid, bool mmode,
sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr);
- for (i = 0; i < num_pages; i++) {
- if (!i) {
- qdev_connect_gpio_out_named(dev, NULL, i,
- qdev_get_gpio_in(DEVICE(cpu),
+ if (!kvm_irqchip_in_kernel()) {
+ for (i = 0; i < num_pages; i++) {
+ if (!i) {
+ qdev_connect_gpio_out_named(dev, NULL, i,
+ qdev_get_gpio_in(DEVICE(cpu),
(mmode) ? IRQ_M_EXT : IRQ_S_EXT));
- } else {
- qdev_connect_gpio_out_named(dev, NULL, i,
- qdev_get_gpio_in(DEVICE(cpu),
+ } else {
+ qdev_connect_gpio_out_named(dev, NULL, i,
+ qdev_get_gpio_in(DEVICE(cpu),
IRQ_LOCAL_MAX + i - 1));
+ }
}
}
diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build
index 3c7e083..c22f3a7 100644
--- a/hw/riscv/meson.build
+++ b/hw/riscv/meson.build
@@ -10,7 +10,8 @@ riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c'))
riscv_ss.add(when: 'CONFIG_SPIKE', if_true: files('spike.c'))
riscv_ss.add(when: 'CONFIG_MICROCHIP_PFSOC', if_true: files('microchip_pfsoc.c'))
riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c'))
-riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files('riscv-iommu.c', 'riscv-iommu-pci.c', 'riscv-iommu-sys.c'))
+riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files(
+ 'riscv-iommu.c', 'riscv-iommu-pci.c', 'riscv-iommu-sys.c', 'riscv-iommu-hpm.c'))
riscv_ss.add(when: 'CONFIG_MICROBLAZE_V', if_true: files('microblaze-v-generic.c'))
hw_arch += {'riscv': riscv_ss}
diff --git a/hw/riscv/riscv-iommu-bits.h b/hw/riscv/riscv-iommu-bits.h
index 485f36b..b7cb1bc 100644
--- a/hw/riscv/riscv-iommu-bits.h
+++ b/hw/riscv/riscv-iommu-bits.h
@@ -50,8 +50,14 @@ struct riscv_iommu_pq_record {
#define RISCV_IOMMU_PREQ_HDR_PRIV BIT_ULL(33)
#define RISCV_IOMMU_PREQ_HDR_EXEC BIT_ULL(34)
#define RISCV_IOMMU_PREQ_HDR_DID GENMASK_ULL(63, 40)
+
/* Payload fields */
+#define RISCV_IOMMU_PREQ_PAYLOAD_R BIT_ULL(0)
+#define RISCV_IOMMU_PREQ_PAYLOAD_W BIT_ULL(1)
+#define RISCV_IOMMU_PREQ_PAYLOAD_L BIT_ULL(2)
#define RISCV_IOMMU_PREQ_PAYLOAD_M GENMASK_ULL(2, 0)
+#define RISCV_IOMMU_PREQ_PRG_INDEX GENMASK_ULL(11, 3)
+#define RISCV_IOMMU_PREQ_UADDR GENMASK_ULL(63, 12)
/* Common field positions */
#define RISCV_IOMMU_PPN_FIELD GENMASK_ULL(53, 10)
@@ -82,6 +88,7 @@ struct riscv_iommu_pq_record {
#define RISCV_IOMMU_CAP_ATS BIT_ULL(25)
#define RISCV_IOMMU_CAP_T2GPA BIT_ULL(26)
#define RISCV_IOMMU_CAP_IGS GENMASK_ULL(29, 28)
+#define RISCV_IOMMU_CAP_HPM BIT_ULL(30)
#define RISCV_IOMMU_CAP_DBG BIT_ULL(31)
#define RISCV_IOMMU_CAP_PAS GENMASK_ULL(37, 32)
#define RISCV_IOMMU_CAP_PD8 BIT_ULL(38)
@@ -191,6 +198,52 @@ enum {
RISCV_IOMMU_INTR_COUNT
};
+#define RISCV_IOMMU_IOCOUNT_NUM 31
+
+/* 5.19 Performance monitoring counter overflow status (32bits) */
+#define RISCV_IOMMU_REG_IOCOUNTOVF 0x0058
+#define RISCV_IOMMU_IOCOUNTOVF_CY BIT(0)
+
+/* 5.20 Performance monitoring counter inhibits (32bits) */
+#define RISCV_IOMMU_REG_IOCOUNTINH 0x005C
+#define RISCV_IOMMU_IOCOUNTINH_CY BIT(0)
+
+/* 5.21 Performance monitoring cycles counter (64bits) */
+#define RISCV_IOMMU_REG_IOHPMCYCLES 0x0060
+#define RISCV_IOMMU_IOHPMCYCLES_COUNTER GENMASK_ULL(62, 0)
+#define RISCV_IOMMU_IOHPMCYCLES_OVF BIT_ULL(63)
+
+/* 5.22 Performance monitoring event counters (31 * 64bits) */
+#define RISCV_IOMMU_REG_IOHPMCTR_BASE 0x0068
+#define RISCV_IOMMU_REG_IOHPMCTR(_n) \
+ (RISCV_IOMMU_REG_IOHPMCTR_BASE + (_n * 0x8))
+
+/* 5.23 Performance monitoring event selectors (31 * 64bits) */
+#define RISCV_IOMMU_REG_IOHPMEVT_BASE 0x0160
+#define RISCV_IOMMU_REG_IOHPMEVT(_n) \
+ (RISCV_IOMMU_REG_IOHPMEVT_BASE + (_n * 0x8))
+#define RISCV_IOMMU_IOHPMEVT_EVENT_ID GENMASK_ULL(14, 0)
+#define RISCV_IOMMU_IOHPMEVT_DMASK BIT_ULL(15)
+#define RISCV_IOMMU_IOHPMEVT_PID_PSCID GENMASK_ULL(35, 16)
+#define RISCV_IOMMU_IOHPMEVT_DID_GSCID GENMASK_ULL(59, 36)
+#define RISCV_IOMMU_IOHPMEVT_PV_PSCV BIT_ULL(60)
+#define RISCV_IOMMU_IOHPMEVT_DV_GSCV BIT_ULL(61)
+#define RISCV_IOMMU_IOHPMEVT_IDT BIT_ULL(62)
+#define RISCV_IOMMU_IOHPMEVT_OF BIT_ULL(63)
+
+enum RISCV_IOMMU_HPMEVENT_id {
+ RISCV_IOMMU_HPMEVENT_INVALID = 0,
+ RISCV_IOMMU_HPMEVENT_URQ = 1,
+ RISCV_IOMMU_HPMEVENT_TRQ = 2,
+ RISCV_IOMMU_HPMEVENT_ATS_RQ = 3,
+ RISCV_IOMMU_HPMEVENT_TLB_MISS = 4,
+ RISCV_IOMMU_HPMEVENT_DD_WALK = 5,
+ RISCV_IOMMU_HPMEVENT_PD_WALK = 6,
+ RISCV_IOMMU_HPMEVENT_S_VS_WALKS = 7,
+ RISCV_IOMMU_HPMEVENT_G_WALKS = 8,
+ RISCV_IOMMU_HPMEVENT_MAX = 9
+};
+
/* 5.24 Translation request IOVA (64bits) */
#define RISCV_IOMMU_REG_TR_REQ_IOVA 0x0258
@@ -382,22 +435,6 @@ enum riscv_iommu_fq_ttypes {
RISCV_IOMMU_FW_TTYPE_PCIE_MSG_REQ = 9,
};
-/* Header fields */
-#define RISCV_IOMMU_PREQ_HDR_PID GENMASK_ULL(31, 12)
-#define RISCV_IOMMU_PREQ_HDR_PV BIT_ULL(32)
-#define RISCV_IOMMU_PREQ_HDR_PRIV BIT_ULL(33)
-#define RISCV_IOMMU_PREQ_HDR_EXEC BIT_ULL(34)
-#define RISCV_IOMMU_PREQ_HDR_DID GENMASK_ULL(63, 40)
-
-/* Payload fields */
-#define RISCV_IOMMU_PREQ_PAYLOAD_R BIT_ULL(0)
-#define RISCV_IOMMU_PREQ_PAYLOAD_W BIT_ULL(1)
-#define RISCV_IOMMU_PREQ_PAYLOAD_L BIT_ULL(2)
-#define RISCV_IOMMU_PREQ_PAYLOAD_M GENMASK_ULL(2, 0)
-#define RISCV_IOMMU_PREQ_PRG_INDEX GENMASK_ULL(11, 3)
-#define RISCV_IOMMU_PREQ_UADDR GENMASK_ULL(63, 12)
-
-
/*
* struct riscv_iommu_msi_pte - MSI Page Table Entry
*/
diff --git a/hw/riscv/riscv-iommu-hpm.c b/hw/riscv/riscv-iommu-hpm.c
new file mode 100644
index 0000000..c5034bf
--- /dev/null
+++ b/hw/riscv/riscv-iommu-hpm.c
@@ -0,0 +1,381 @@
+/*
+ * RISC-V IOMMU - Hardware Performance Monitor (HPM) helpers
+ *
+ * Copyright (C) 2022-2023 Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/timer.h"
+#include "cpu_bits.h"
+#include "riscv-iommu-hpm.h"
+#include "riscv-iommu.h"
+#include "riscv-iommu-bits.h"
+#include "trace.h"
+
+/* For now we assume IOMMU HPM frequency to be 1GHz so 1-cycle is of 1-ns. */
+static inline uint64_t get_cycles(void)
+{
+ return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+}
+
+uint64_t riscv_iommu_hpmcycle_read(RISCVIOMMUState *s)
+{
+ const uint64_t cycle = riscv_iommu_reg_get64(
+ s, RISCV_IOMMU_REG_IOHPMCYCLES);
+ const uint32_t inhibit = riscv_iommu_reg_get32(
+ s, RISCV_IOMMU_REG_IOCOUNTINH);
+ const uint64_t ctr_prev = s->hpmcycle_prev;
+ const uint64_t ctr_val = s->hpmcycle_val;
+
+ trace_riscv_iommu_hpm_read(cycle, inhibit, ctr_prev, ctr_val);
+
+ if (get_field(inhibit, RISCV_IOMMU_IOCOUNTINH_CY)) {
+ /*
+ * Counter should not increment if inhibit bit is set. We can't really
+ * stop the QEMU_CLOCK_VIRTUAL, so we just return the last updated
+ * counter value to indicate that counter was not incremented.
+ */
+ return (ctr_val & RISCV_IOMMU_IOHPMCYCLES_COUNTER) |
+ (cycle & RISCV_IOMMU_IOHPMCYCLES_OVF);
+ }
+
+ return (ctr_val + get_cycles() - ctr_prev) |
+ (cycle & RISCV_IOMMU_IOHPMCYCLES_OVF);
+}
+
+static void hpm_incr_ctr(RISCVIOMMUState *s, uint32_t ctr_idx)
+{
+ const uint32_t off = ctr_idx << 3;
+ uint64_t cntr_val;
+
+ cntr_val = ldq_le_p(&s->regs_rw[RISCV_IOMMU_REG_IOHPMCTR_BASE + off]);
+ stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_IOHPMCTR_BASE + off], cntr_val + 1);
+
+ trace_riscv_iommu_hpm_incr_ctr(cntr_val);
+
+ /* Handle the overflow scenario. */
+ if (cntr_val == UINT64_MAX) {
+ /*
+ * Generate interrupt only if OF bit is clear. +1 to offset the cycle
+ * register OF bit.
+ */
+ const uint32_t ovf =
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IOCOUNTOVF,
+ BIT(ctr_idx + 1), 0);
+ if (!get_field(ovf, BIT(ctr_idx + 1))) {
+ riscv_iommu_reg_mod64(s,
+ RISCV_IOMMU_REG_IOHPMEVT_BASE + off,
+ RISCV_IOMMU_IOHPMEVT_OF,
+ 0);
+ riscv_iommu_notify(s, RISCV_IOMMU_INTR_PM);
+ }
+ }
+}
+
+void riscv_iommu_hpm_incr_ctr(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
+ unsigned event_id)
+{
+ const uint32_t inhibit = riscv_iommu_reg_get32(
+ s, RISCV_IOMMU_REG_IOCOUNTINH);
+ uint32_t did_gscid;
+ uint32_t pid_pscid;
+ uint32_t ctr_idx;
+ gpointer value;
+ uint32_t ctrs;
+ uint64_t evt;
+
+ if (!(s->cap & RISCV_IOMMU_CAP_HPM)) {
+ return;
+ }
+
+ value = g_hash_table_lookup(s->hpm_event_ctr_map,
+ GUINT_TO_POINTER(event_id));
+ if (value == NULL) {
+ return;
+ }
+
+ for (ctrs = GPOINTER_TO_UINT(value); ctrs != 0; ctrs &= ctrs - 1) {
+ ctr_idx = ctz32(ctrs);
+ if (get_field(inhibit, BIT(ctr_idx + 1))) {
+ continue;
+ }
+
+ evt = riscv_iommu_reg_get64(s,
+ RISCV_IOMMU_REG_IOHPMEVT_BASE + (ctr_idx << 3));
+
+ /*
+ * It's quite possible that event ID has been changed in counter
+ * but hashtable hasn't been updated yet. We don't want to increment
+ * counter for the old event ID.
+ */
+ if (event_id != get_field(evt, RISCV_IOMMU_IOHPMEVT_EVENT_ID)) {
+ continue;
+ }
+
+ if (get_field(evt, RISCV_IOMMU_IOHPMEVT_IDT)) {
+ did_gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID);
+ pid_pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID);
+ } else {
+ did_gscid = ctx->devid;
+ pid_pscid = ctx->process_id;
+ }
+
+ if (get_field(evt, RISCV_IOMMU_IOHPMEVT_PV_PSCV)) {
+ /*
+ * If the transaction does not have a valid process_id, counter
+ * increments if device_id matches DID_GSCID. If the transaction
+ * has a valid process_id, counter increments if device_id
+ * matches DID_GSCID and process_id matches PID_PSCID. See
+ * IOMMU Specification, Chapter 5.23. Performance-monitoring
+ * event selector.
+ */
+ if (ctx->process_id &&
+ get_field(evt, RISCV_IOMMU_IOHPMEVT_PID_PSCID) != pid_pscid) {
+ continue;
+ }
+ }
+
+ if (get_field(evt, RISCV_IOMMU_IOHPMEVT_DV_GSCV)) {
+ uint32_t mask = ~0;
+
+ if (get_field(evt, RISCV_IOMMU_IOHPMEVT_DMASK)) {
+ /*
+ * 1001 1011 mask = GSCID
+ * 0000 0111 mask = mask ^ (mask + 1)
+ * 1111 1000 mask = ~mask;
+ */
+ mask = get_field(evt, RISCV_IOMMU_IOHPMEVT_DID_GSCID);
+ mask = mask ^ (mask + 1);
+ mask = ~mask;
+ }
+
+ if ((get_field(evt, RISCV_IOMMU_IOHPMEVT_DID_GSCID) & mask) !=
+ (did_gscid & mask)) {
+ continue;
+ }
+ }
+
+ hpm_incr_ctr(s, ctr_idx);
+ }
+}
+
+/* Timer callback for cycle counter overflow. */
+void riscv_iommu_hpm_timer_cb(void *priv)
+{
+ RISCVIOMMUState *s = priv;
+ const uint32_t inhibit = riscv_iommu_reg_get32(
+ s, RISCV_IOMMU_REG_IOCOUNTINH);
+ uint32_t ovf;
+
+ if (get_field(inhibit, RISCV_IOMMU_IOCOUNTINH_CY)) {
+ return;
+ }
+
+ if (s->irq_overflow_left > 0) {
+ uint64_t irq_trigger_at =
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + s->irq_overflow_left;
+ timer_mod_anticipate_ns(s->hpm_timer, irq_trigger_at);
+ s->irq_overflow_left = 0;
+ return;
+ }
+
+ ovf = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTOVF);
+ if (!get_field(ovf, RISCV_IOMMU_IOCOUNTOVF_CY)) {
+ /*
+ * We don't need to set hpmcycle_val to zero and update hpmcycle_prev to
+ * current clock value. The way we calculate iohpmcycs will overflow
+ * and return the correct value. This avoids the need to synchronize
+ * timer callback and write callback.
+ */
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IOCOUNTOVF,
+ RISCV_IOMMU_IOCOUNTOVF_CY, 0);
+ riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_IOHPMCYCLES,
+ RISCV_IOMMU_IOHPMCYCLES_OVF, 0);
+ riscv_iommu_notify(s, RISCV_IOMMU_INTR_PM);
+ }
+}
+
+static void hpm_setup_timer(RISCVIOMMUState *s, uint64_t value)
+{
+ const uint32_t inhibit = riscv_iommu_reg_get32(
+ s, RISCV_IOMMU_REG_IOCOUNTINH);
+ uint64_t overflow_at, overflow_ns;
+
+ if (get_field(inhibit, RISCV_IOMMU_IOCOUNTINH_CY)) {
+ return;
+ }
+
+ /*
+ * We are using INT64_MAX here instead to UINT64_MAX because cycle counter
+ * has 63-bit precision and INT64_MAX is the maximum it can store.
+ */
+ if (value) {
+ overflow_ns = INT64_MAX - value + 1;
+ } else {
+ overflow_ns = INT64_MAX;
+ }
+
+ overflow_at = (uint64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + overflow_ns;
+
+ if (overflow_at > INT64_MAX) {
+ s->irq_overflow_left = overflow_at - INT64_MAX;
+ overflow_at = INT64_MAX;
+ }
+
+ timer_mod_anticipate_ns(s->hpm_timer, overflow_at);
+}
+
+/* Updates the internal cycle counter state when iocntinh:CY is changed. */
+void riscv_iommu_process_iocntinh_cy(RISCVIOMMUState *s, bool prev_cy_inh)
+{
+ const uint32_t inhibit = riscv_iommu_reg_get32(
+ s, RISCV_IOMMU_REG_IOCOUNTINH);
+
+ /* We only need to process CY bit toggle. */
+ if (!(inhibit ^ prev_cy_inh)) {
+ return;
+ }
+
+ trace_riscv_iommu_hpm_iocntinh_cy(prev_cy_inh);
+
+ if (!(inhibit & RISCV_IOMMU_IOCOUNTINH_CY)) {
+ /*
+ * Cycle counter is enabled. Just start the timer again and update
+ * the clock snapshot value to point to the current time to make
+ * sure iohpmcycles read is correct.
+ */
+ s->hpmcycle_prev = get_cycles();
+ hpm_setup_timer(s, s->hpmcycle_val);
+ } else {
+ /*
+ * Cycle counter is disabled. Stop the timer and update the cycle
+ * counter to record the current value which is last programmed
+ * value + the cycles passed so far.
+ */
+ s->hpmcycle_val = s->hpmcycle_val + (get_cycles() - s->hpmcycle_prev);
+ timer_del(s->hpm_timer);
+ }
+}
+
+void riscv_iommu_process_hpmcycle_write(RISCVIOMMUState *s)
+{
+ const uint64_t val = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_IOHPMCYCLES);
+ const uint32_t ovf = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTOVF);
+
+ trace_riscv_iommu_hpm_cycle_write(ovf, val);
+
+ /*
+ * Clear OF bit in IOCNTOVF if it's being cleared in IOHPMCYCLES register.
+ */
+ if (get_field(ovf, RISCV_IOMMU_IOCOUNTOVF_CY) &&
+ !get_field(val, RISCV_IOMMU_IOHPMCYCLES_OVF)) {
+ riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IOCOUNTOVF, 0,
+ RISCV_IOMMU_IOCOUNTOVF_CY);
+ }
+
+ s->hpmcycle_val = val & ~RISCV_IOMMU_IOHPMCYCLES_OVF;
+ s->hpmcycle_prev = get_cycles();
+ hpm_setup_timer(s, s->hpmcycle_val);
+}
+
+static inline bool check_valid_event_id(unsigned event_id)
+{
+ return event_id > RISCV_IOMMU_HPMEVENT_INVALID &&
+ event_id < RISCV_IOMMU_HPMEVENT_MAX;
+}
+
+static gboolean hpm_event_equal(gpointer key, gpointer value, gpointer udata)
+{
+ uint32_t *pair = udata;
+
+ if (GPOINTER_TO_UINT(value) & (1 << pair[0])) {
+ pair[1] = GPOINTER_TO_UINT(key);
+ return true;
+ }
+
+ return false;
+}
+
+/* Caller must check ctr_idx against hpm_ctrs to see if its supported or not. */
+static void update_event_map(RISCVIOMMUState *s, uint64_t value,
+ uint32_t ctr_idx)
+{
+ unsigned event_id = get_field(value, RISCV_IOMMU_IOHPMEVT_EVENT_ID);
+ uint32_t pair[2] = { ctr_idx, RISCV_IOMMU_HPMEVENT_INVALID };
+ uint32_t new_value = 1 << ctr_idx;
+ gpointer data;
+
+ /*
+ * If EventID field is RISCV_IOMMU_HPMEVENT_INVALID
+ * remove the current mapping.
+ */
+ if (event_id == RISCV_IOMMU_HPMEVENT_INVALID) {
+ data = g_hash_table_find(s->hpm_event_ctr_map, hpm_event_equal, pair);
+
+ new_value = GPOINTER_TO_UINT(data) & ~(new_value);
+ if (new_value != 0) {
+ g_hash_table_replace(s->hpm_event_ctr_map,
+ GUINT_TO_POINTER(pair[1]),
+ GUINT_TO_POINTER(new_value));
+ } else {
+ g_hash_table_remove(s->hpm_event_ctr_map,
+ GUINT_TO_POINTER(pair[1]));
+ }
+
+ return;
+ }
+
+ /* Update the counter mask if the event is already enabled. */
+ if (g_hash_table_lookup_extended(s->hpm_event_ctr_map,
+ GUINT_TO_POINTER(event_id),
+ NULL,
+ &data)) {
+ new_value |= GPOINTER_TO_UINT(data);
+ }
+
+ g_hash_table_insert(s->hpm_event_ctr_map,
+ GUINT_TO_POINTER(event_id),
+ GUINT_TO_POINTER(new_value));
+}
+
+void riscv_iommu_process_hpmevt_write(RISCVIOMMUState *s, uint32_t evt_reg)
+{
+ const uint32_t ctr_idx = (evt_reg - RISCV_IOMMU_REG_IOHPMEVT_BASE) >> 3;
+ const uint32_t ovf = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTOVF);
+ uint64_t val = riscv_iommu_reg_get64(s, evt_reg);
+
+ if (ctr_idx >= s->hpm_cntrs) {
+ return;
+ }
+
+ trace_riscv_iommu_hpm_evt_write(ctr_idx, ovf, val);
+
+ /* Clear OF bit in IOCNTOVF if it's being cleared in IOHPMEVT register. */
+ if (get_field(ovf, BIT(ctr_idx + 1)) &&
+ !get_field(val, RISCV_IOMMU_IOHPMEVT_OF)) {
+ /* +1 to offset CYCLE register OF bit. */
+ riscv_iommu_reg_mod32(
+ s, RISCV_IOMMU_REG_IOCOUNTOVF, 0, BIT(ctr_idx + 1));
+ }
+
+ if (!check_valid_event_id(get_field(val, RISCV_IOMMU_IOHPMEVT_EVENT_ID))) {
+ /* Reset EventID (WARL) field to invalid. */
+ val = set_field(val, RISCV_IOMMU_IOHPMEVT_EVENT_ID,
+ RISCV_IOMMU_HPMEVENT_INVALID);
+ riscv_iommu_reg_set64(s, evt_reg, val);
+ }
+
+ update_event_map(s, val, ctr_idx);
+}
diff --git a/hw/riscv/riscv-iommu-hpm.h b/hw/riscv/riscv-iommu-hpm.h
new file mode 100644
index 0000000..5fc4ef2
--- /dev/null
+++ b/hw/riscv/riscv-iommu-hpm.h
@@ -0,0 +1,33 @@
+/*
+ * RISC-V IOMMU - Hardware Performance Monitor (HPM) helpers
+ *
+ * Copyright (C) 2022-2023 Rivos Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_RISCV_IOMMU_HPM_H
+#define HW_RISCV_IOMMU_HPM_H
+
+#include "qom/object.h"
+#include "hw/riscv/riscv-iommu.h"
+
+uint64_t riscv_iommu_hpmcycle_read(RISCVIOMMUState *s);
+void riscv_iommu_hpm_incr_ctr(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
+ unsigned event_id);
+void riscv_iommu_hpm_timer_cb(void *priv);
+void riscv_iommu_process_iocntinh_cy(RISCVIOMMUState *s, bool prev_cy_inh);
+void riscv_iommu_process_hpmcycle_write(RISCVIOMMUState *s);
+void riscv_iommu_process_hpmevt_write(RISCVIOMMUState *s, uint32_t evt_reg);
+
+#endif
diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c
index e7568ca..d46beb2 100644
--- a/hw/riscv/riscv-iommu.c
+++ b/hw/riscv/riscv-iommu.c
@@ -29,6 +29,7 @@
#include "cpu_bits.h"
#include "riscv-iommu.h"
#include "riscv-iommu-bits.h"
+#include "riscv-iommu-hpm.h"
#include "trace.h"
#define LIMIT_CACHE_CTX (1U << 7)
@@ -38,7 +39,6 @@
#define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS)
#define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS)
-typedef struct RISCVIOMMUContext RISCVIOMMUContext;
typedef struct RISCVIOMMUEntry RISCVIOMMUEntry;
/* Device assigned I/O address space */
@@ -51,19 +51,6 @@ struct RISCVIOMMUSpace {
QLIST_ENTRY(RISCVIOMMUSpace) list;
};
-/* Device translation context state. */
-struct RISCVIOMMUContext {
- uint64_t devid:24; /* Requester Id, AKA device_id */
- uint64_t process_id:20; /* Process ID. PASID for PCIe */
- uint64_t tc; /* Translation Control */
- uint64_t ta; /* Translation Attributes */
- uint64_t satp; /* S-Stage address translation and protection */
- uint64_t gatp; /* G-Stage address translation and protection */
- uint64_t msi_addr_mask; /* MSI filtering - address mask */
- uint64_t msi_addr_pattern; /* MSI filtering - address pattern */
- uint64_t msiptp; /* MSI redirection page table pointer */
-};
-
typedef enum RISCVIOMMUTransTag {
RISCV_IOMMU_TRANS_TAG_BY, /* Bypass */
RISCV_IOMMU_TRANS_TAG_SS, /* Single Stage */
@@ -100,7 +87,7 @@ static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type)
}
}
-static void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type)
+void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type)
{
uint32_t ipsr, icvec, vector;
@@ -422,6 +409,13 @@ static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
}
}
+
+ if (pass == S_STAGE) {
+ riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_S_VS_WALKS);
+ } else {
+ riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_G_WALKS);
+ }
+
/* Read page table entry */
if (sc[pass].ptesize == 4) {
uint32_t pte32 = 0;
@@ -940,6 +934,7 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
/* Device directory tree walk */
for (; depth-- > 0; ) {
+ riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK);
/*
* Select device id index bits based on device directory tree level
* and device context format.
@@ -967,6 +962,8 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN));
}
+ riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_DD_WALK);
+
/* index into device context entry page */
addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK;
@@ -1032,6 +1029,8 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
}
for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) {
+ riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK);
+
/*
* Select process id index bits based on process directory tree
* level. See IOMMU Specification, 2.2. Process-Directory-Table.
@@ -1049,6 +1048,8 @@ static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx)
addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PC_FSC_PPN));
}
+ riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_PD_WALK);
+
/* Leaf entry in PDT */
addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK;
if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2,
@@ -1418,6 +1419,8 @@ static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
GHashTable *iot_cache;
int fault;
+ riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_URQ);
+
iot_cache = g_hash_table_ref(s->iot_cache);
/*
* TC[32] is reserved for custom extensions, used here to temporarily
@@ -1428,6 +1431,7 @@ static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
/* Check for ATS request. */
if (iotlb->perm == IOMMU_NONE) {
+ riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_ATS_RQ);
/* Check if ATS is disabled. */
if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) {
enable_pri = false;
@@ -1446,6 +1450,8 @@ static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx,
goto done;
}
+ riscv_iommu_hpm_incr_ctr(s, ctx, RISCV_IOMMU_HPMEVENT_TLB_MISS);
+
/* Translate using device directory / page table information. */
fault = riscv_iommu_spa_fetch(s, ctx, iotlb);
@@ -2018,6 +2024,27 @@ static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data)
riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr);
}
+static void riscv_iommu_process_hpm_writes(RISCVIOMMUState *s,
+ uint32_t regb,
+ bool prev_cy_inh)
+{
+ switch (regb) {
+ case RISCV_IOMMU_REG_IOCOUNTINH:
+ riscv_iommu_process_iocntinh_cy(s, prev_cy_inh);
+ break;
+
+ case RISCV_IOMMU_REG_IOHPMCYCLES:
+ case RISCV_IOMMU_REG_IOHPMCYCLES + 4:
+ riscv_iommu_process_hpmcycle_write(s);
+ break;
+
+ case RISCV_IOMMU_REG_IOHPMEVT_BASE ...
+ RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4:
+ riscv_iommu_process_hpmevt_write(s, regb & ~7);
+ break;
+ }
+}
+
/*
* Write the resulting value of 'data' for the reg specified
* by 'reg_addr', after considering read-only/read-write/write-clear
@@ -2045,6 +2072,7 @@ static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr,
uint32_t regb = addr & ~3;
uint32_t busy = 0;
uint64_t val = 0;
+ bool cy_inh = false;
if ((addr & (size - 1)) != 0) {
/* Unsupported MMIO alignment or access size */
@@ -2112,6 +2140,16 @@ static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr,
busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY;
break;
+ case RISCV_IOMMU_REG_IOCOUNTINH:
+ if (addr != RISCV_IOMMU_REG_IOCOUNTINH) {
+ break;
+ }
+ /* Store previous value of CY bit. */
+ cy_inh = !!(riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_IOCOUNTINH) &
+ RISCV_IOMMU_IOCOUNTINH_CY);
+ break;
+
+
default:
break;
}
@@ -2130,6 +2168,12 @@ static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr,
stl_le_p(&s->regs_rw[regb], rw | busy);
}
+ /* Process HPM writes and update any internal state if needed. */
+ if (regb >= RISCV_IOMMU_REG_IOCOUNTOVF &&
+ regb <= (RISCV_IOMMU_REG_IOHPMEVT(RISCV_IOMMU_IOCOUNT_NUM) + 4)) {
+ riscv_iommu_process_hpm_writes(s, regb, cy_inh);
+ }
+
if (process_fn) {
process_fn(s);
}
@@ -2153,7 +2197,28 @@ static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr,
return MEMTX_ACCESS_ERROR;
}
- ptr = &s->regs_rw[addr];
+ /* Compute cycle register value. */
+ if ((addr & ~7) == RISCV_IOMMU_REG_IOHPMCYCLES) {
+ val = riscv_iommu_hpmcycle_read(s);
+ ptr = (uint8_t *)&val + (addr & 7);
+ } else if ((addr & ~3) == RISCV_IOMMU_REG_IOCOUNTOVF) {
+ /*
+ * Software can read RISCV_IOMMU_REG_IOCOUNTOVF before timer
+ * callback completes. In which case CY_OF bit in
+ * RISCV_IOMMU_IOHPMCYCLES_OVF would be 0. Here we take the
+ * CY_OF bit state from RISCV_IOMMU_REG_IOHPMCYCLES register as
+ * it's not dependent over the timer callback and is computed
+ * from cycle overflow.
+ */
+ val = ldq_le_p(&s->regs_rw[addr]);
+ val |= (riscv_iommu_hpmcycle_read(s) & RISCV_IOMMU_IOHPMCYCLES_OVF)
+ ? RISCV_IOMMU_IOCOUNTOVF_CY
+ : 0;
+ ptr = (uint8_t *)&val + (addr & 3);
+ } else {
+ ptr = &s->regs_rw[addr];
+ }
+
val = ldn_le_p(ptr, size);
*data = val;
@@ -2292,6 +2357,15 @@ static void riscv_iommu_realize(DeviceState *dev, Error **errp)
RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4;
}
+ if (s->hpm_cntrs > 0) {
+ /* Clip number of HPM counters to maximum supported (31). */
+ if (s->hpm_cntrs > RISCV_IOMMU_IOCOUNT_NUM) {
+ s->hpm_cntrs = RISCV_IOMMU_IOCOUNT_NUM;
+ }
+ /* Enable hardware performance monitor interface */
+ s->cap |= RISCV_IOMMU_CAP_HPM;
+ }
+
/* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */
s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ?
RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE);
@@ -2339,6 +2413,18 @@ static void riscv_iommu_realize(DeviceState *dev, Error **errp)
RISCV_IOMMU_TR_REQ_CTL_GO_BUSY);
}
+ /* If HPM registers are enabled. */
+ if (s->cap & RISCV_IOMMU_CAP_HPM) {
+ /* +1 for cycle counter bit. */
+ stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOCOUNTINH],
+ ~((2 << s->hpm_cntrs) - 1));
+ stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCYCLES], 0);
+ memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMCTR_BASE],
+ 0x00, s->hpm_cntrs * 8);
+ memset(&s->regs_ro[RISCV_IOMMU_REG_IOHPMEVT_BASE],
+ 0x00, s->hpm_cntrs * 8);
+ }
+
/* Memory region for downstream access, if specified. */
if (s->target_mr) {
s->target_as = g_new0(AddressSpace, 1);
@@ -2353,6 +2439,12 @@ static void riscv_iommu_realize(DeviceState *dev, Error **errp)
memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s,
"riscv-iommu-trap", ~0ULL);
address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as");
+
+ if (s->cap & RISCV_IOMMU_CAP_HPM) {
+ s->hpm_timer =
+ timer_new_ns(QEMU_CLOCK_VIRTUAL, riscv_iommu_hpm_timer_cb, s);
+ s->hpm_event_ctr_map = g_hash_table_new(g_direct_hash, g_direct_equal);
+ }
}
static void riscv_iommu_unrealize(DeviceState *dev)
@@ -2361,6 +2453,11 @@ static void riscv_iommu_unrealize(DeviceState *dev)
g_hash_table_unref(s->iot_cache);
g_hash_table_unref(s->ctx_cache);
+
+ if (s->cap & RISCV_IOMMU_CAP_HPM) {
+ g_hash_table_unref(s->hpm_event_ctr_map);
+ timer_free(s->hpm_timer);
+ }
}
void riscv_iommu_reset(RISCVIOMMUState *s)
@@ -2411,6 +2508,8 @@ static const Property riscv_iommu_properties[] = {
DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE),
DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr,
TYPE_MEMORY_REGION, MemoryRegion *),
+ DEFINE_PROP_UINT8("hpm-counters", RISCVIOMMUState, hpm_cntrs,
+ RISCV_IOMMU_IOCOUNT_NUM),
};
static void riscv_iommu_class_init(ObjectClass *klass, void* data)
diff --git a/hw/riscv/riscv-iommu.h b/hw/riscv/riscv-iommu.h
index 9424989..a31aa62 100644
--- a/hw/riscv/riscv-iommu.h
+++ b/hw/riscv/riscv-iommu.h
@@ -20,6 +20,8 @@
#define HW_RISCV_IOMMU_STATE_H
#include "qom/object.h"
+#include "hw/qdev-properties.h"
+#include "system/dma.h"
#include "hw/riscv/iommu.h"
#include "hw/riscv/riscv-iommu-bits.h"
@@ -58,11 +60,6 @@ struct RISCVIOMMUState {
/* interrupt notifier */
void (*notify)(RISCVIOMMUState *iommu, unsigned vector);
- /* IOMMU State Machine */
- QemuThread core_proc; /* Background processing thread */
- QemuCond core_cond; /* Background processing wake up signal */
- unsigned core_exec; /* Processing thread execution actions */
-
/* IOMMU target address space */
AddressSpace *target_as;
MemoryRegion *target_mr;
@@ -84,12 +81,37 @@ struct RISCVIOMMUState {
QLIST_ENTRY(RISCVIOMMUState) iommus;
QLIST_HEAD(, RISCVIOMMUSpace) spaces;
+
+ /* HPM cycle counter */
+ QEMUTimer *hpm_timer;
+ uint64_t hpmcycle_val; /* Current value of cycle register */
+ uint64_t hpmcycle_prev; /* Saved value of QEMU_CLOCK_VIRTUAL clock */
+ uint64_t irq_overflow_left; /* Value beyond INT64_MAX after overflow */
+
+ /* HPM event counters */
+ GHashTable *hpm_event_ctr_map; /* Mapping of events to counters */
+ uint8_t hpm_cntrs;
};
void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus,
Error **errp);
void riscv_iommu_set_cap_igs(RISCVIOMMUState *s, riscv_iommu_igs_mode mode);
void riscv_iommu_reset(RISCVIOMMUState *s);
+void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type);
+
+typedef struct RISCVIOMMUContext RISCVIOMMUContext;
+/* Device translation context state. */
+struct RISCVIOMMUContext {
+ uint64_t devid:24; /* Requester Id, AKA device_id */
+ uint64_t process_id:20; /* Process ID. PASID for PCIe */
+ uint64_t tc; /* Translation Control */
+ uint64_t ta; /* Translation Attributes */
+ uint64_t satp; /* S-Stage address translation and protection */
+ uint64_t gatp; /* G-Stage address translation and protection */
+ uint64_t msi_addr_mask; /* MSI filtering - address mask */
+ uint64_t msi_addr_pattern; /* MSI filtering - address pattern */
+ uint64_t msiptp; /* MSI redirection page table pointer */
+};
/* private helpers */
diff --git a/hw/riscv/trace-events b/hw/riscv/trace-events
index 7bcbb03..b50b14a 100644
--- a/hw/riscv/trace-events
+++ b/hw/riscv/trace-events
@@ -19,3 +19,8 @@ riscv_iommu_sys_irq_sent(uint32_t vector) "IRQ sent to vector %u"
riscv_iommu_sys_msi_sent(uint32_t vector, uint64_t msi_addr, uint32_t msi_data, uint32_t result) "MSI sent to vector %u msi_addr 0x%"PRIx64" msi_data 0x%x result %u"
riscv_iommu_sys_reset_hold(int reset_type) "reset type %d"
riscv_iommu_pci_reset_hold(int reset_type) "reset type %d"
+riscv_iommu_hpm_read(uint64_t cycle, uint32_t inhibit, uint64_t ctr_prev, uint64_t ctr_val) "cycle 0x%"PRIx64" inhibit 0x%x ctr_prev 0x%"PRIx64" ctr_val 0x%"PRIx64
+riscv_iommu_hpm_incr_ctr(uint64_t cntr_val) "cntr_val 0x%"PRIx64
+riscv_iommu_hpm_iocntinh_cy(bool prev_cy_inh) "prev_cy_inh %d"
+riscv_iommu_hpm_cycle_write(uint32_t ovf, uint64_t val) "ovf 0x%x val 0x%"PRIx64
+riscv_iommu_hpm_evt_write(uint32_t ctr_idx, uint32_t ovf, uint64_t val) "ctr_idx 0x%x ovf 0x%x val 0x%"PRIx64
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 241389d..dae46f4 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -971,6 +971,7 @@ static void create_fdt_uart(RISCVVirtState *s, const MemMapEntry *memmap,
}
qemu_fdt_setprop_string(ms->fdt, "/chosen", "stdout-path", name);
+ qemu_fdt_setprop_string(ms->fdt, "/aliases", "serial0", name);
}
static void create_fdt_rtc(RISCVVirtState *s, const MemMapEntry *memmap,
@@ -1180,6 +1181,8 @@ static void create_fdt(RISCVVirtState *s, const MemMapEntry *memmap)
qemu_fdt_setprop(ms->fdt, "/chosen", "rng-seed",
rng_seed, sizeof(rng_seed));
+ qemu_fdt_add_subnode(ms->fdt, "/aliases");
+
create_fdt_flash(s, memmap);
create_fdt_fw_cfg(s, memmap);
create_fdt_pmu(s);
diff --git a/hw/rtc/goldfish_rtc.c b/hw/rtc/goldfish_rtc.c
index fa1d905..0f1b53e 100644
--- a/hw/rtc/goldfish_rtc.c
+++ b/hw/rtc/goldfish_rtc.c
@@ -178,38 +178,21 @@ static void goldfish_rtc_write(void *opaque, hwaddr offset,
trace_goldfish_rtc_write(offset, value);
}
-static int goldfish_rtc_pre_save(void *opaque)
-{
- uint64_t delta;
- GoldfishRTCState *s = opaque;
-
- /*
- * We want to migrate this offset, which sounds straightforward.
- * Unfortunately, we cannot directly pass tick_offset because
- * rtc_clock on destination Host might not be same source Host.
- *
- * To tackle, this we pass tick_offset relative to vm_clock from
- * source Host and make it relative to rtc_clock at destination Host.
- */
- delta = qemu_clock_get_ns(rtc_clock) -
- qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- s->tick_offset_vmstate = s->tick_offset + delta;
-
- return 0;
-}
-
static int goldfish_rtc_post_load(void *opaque, int version_id)
{
- uint64_t delta;
GoldfishRTCState *s = opaque;
- /*
- * We extract tick_offset from tick_offset_vmstate by doing
- * reverse math compared to pre_save() function.
- */
- delta = qemu_clock_get_ns(rtc_clock) -
- qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
- s->tick_offset = s->tick_offset_vmstate - delta;
+ if (version_id < 3) {
+ /*
+ * Previous versions didn't migrate tick_offset directly. Instead, they
+ * migrated tick_offset_vmstate, which is a recalculation based on
+ * QEMU_CLOCK_VIRTUAL. We use tick_offset_vmstate when migrating from
+ * older versions.
+ */
+ uint64_t delta = qemu_clock_get_ns(rtc_clock) -
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ s->tick_offset = s->tick_offset_vmstate - delta;
+ }
goldfish_rtc_set_alarm(s);
@@ -239,8 +222,7 @@ static const MemoryRegionOps goldfish_rtc_ops[2] = {
static const VMStateDescription goldfish_rtc_vmstate = {
.name = TYPE_GOLDFISH_RTC,
- .version_id = 2,
- .pre_save = goldfish_rtc_pre_save,
+ .version_id = 3,
.post_load = goldfish_rtc_post_load,
.fields = (const VMStateField[]) {
VMSTATE_UINT64(tick_offset_vmstate, GoldfishRTCState),
@@ -249,6 +231,7 @@ static const VMStateDescription goldfish_rtc_vmstate = {
VMSTATE_UINT32(irq_pending, GoldfishRTCState),
VMSTATE_UINT32(irq_enabled, GoldfishRTCState),
VMSTATE_UINT32(time_high, GoldfishRTCState),
+ VMSTATE_UINT64_V(tick_offset, GoldfishRTCState, 3),
VMSTATE_END_OF_LIST()
}
};