diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2024-07-19 07:02:17 +1000 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2024-07-19 07:02:17 +1000 |
commit | 23fa74974d8c96bc95cbecc0d4e2d90f984939f6 (patch) | |
tree | 7dd290177a2f87e1e588836e87feb7093dedce5a | |
parent | 0d9f1016d43302108d33d1268304a06cc3fb2021 (diff) | |
parent | 30a1690f2402e6c1582d5b3ebcf7940bfe2fad4b (diff) | |
download | qemu-23fa74974d8c96bc95cbecc0d4e2d90f984939f6.zip qemu-23fa74974d8c96bc95cbecc0d4e2d90f984939f6.tar.gz qemu-23fa74974d8c96bc95cbecc0d4e2d90f984939f6.tar.bz2 |
Merge tag 'pull-target-arm-20240718' of https://git.linaro.org/people/pmaydell/qemu-arm into staging
target-arm queue:
* Fix handling of LDAPR/STLR with negative offset
* LDAPR should honour SCTLR_ELx.nAA
* Use float_status copy in sme_fmopa_s
* hw/display/bcm2835_fb: fix fb_use_offsets condition
* hw/arm/smmuv3: Support and advertise nesting
* Use FPST_F16 for SME FMOPA (widening)
* tests/arm-cpu-features: Do not assume PMU availability
* hvf: arm: Do not advance PC when raising an exception
# -----BEGIN PGP SIGNATURE-----
#
# iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmaZFlUZHHBldGVyLm1h
# eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3iJuEACtVh1Wp93XMsL3llAZkQlx
# DUCnDCvAM2qiiTIMOqPQzeKTIkRV9aFh1YWzOtMFKai6UkBU6p1b4bPqb5SIr99G
# Ayps4+WzAHsjTqBGEpIIDWL6GqMwv9azBnRAYNb+Cg9O3SzEnCdGOKCfGYTXXPRz
# zQ1NIgqZSUC5jg3XgkU22J3VMsOUWijbzxnGXhOyemSIEhREl+t6Ns3ca3n47/jk
# JIw1g6o0mpefPPkaLq6ftVwpn1L63iYQugn4VCrIhtIoOM8vmnShbI9/GwzL4AYk
# n28nwPl948Xby13kCYmu6Slt8Rmm7M33pBDJzsVtbaeBSd44XHrov8Y1+e1FhAco
# lxrWY/2rG9HiWKGLdAeCKwVxB186DKiTmuK7lcN+eBu3VbOLjDiVE0d1bK4HqGyc
# nzA/Aq81Y9p5Z7wzX40sVFlq0j1pQDQWk6GgPfMA4ueHKEEobxC3C+k1q9m02gjQ
# qesOFzViiGe0j7JER84qqcatIaTk09xfbXL/uMZx8oP/iKa1pyMUx2blChXOXVTx
# oGkO2h3/QCpRIos8d8WM/bso16EkpraInM4748iumSLuxDxTwiIikK/hpsCLDwUN
# dLsH/hAMz+yQOFubFoRt4IlsGVnk5asmTDMb4S8RojdF2KzHuzbJMgdEOe62631g
# IOAc7Tn3TIm5MpAxXOXgJA==
# =/aEm
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 18 Jul 2024 11:19:17 PM AEST
# gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg: issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [full]
# gpg: aka "Peter Maydell <pmaydell@gmail.com>" [full]
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [full]
# gpg: aka "Peter Maydell <peter@archaic.org.uk>" [unknown]
* tag 'pull-target-arm-20240718' of https://git.linaro.org/people/pmaydell/qemu-arm: (26 commits)
hvf: arm: Do not advance PC when raising an exception
tests/arm-cpu-features: Do not assume PMU availability
tests/tcg/aarch64: Add test cases for SME FMOPA (widening)
target/arm: Use FPST_F16 for SME FMOPA (widening)
target/arm: Use float_status copy in sme_fmopa_s
hw/arm/smmu: Refactor SMMU OAS
hw/arm/smmuv3: Support and advertise nesting
hw/arm/smmuv3: Handle translation faults according to SMMUPTWEventInfo
hw/arm/smmuv3: Support nested SMMUs in smmuv3_notify_iova()
hw/arm/smmu: Support nesting in the rest of commands
hw/arm/smmu: Introduce smmu_iotlb_inv_asid_vmid
hw/arm/smmu: Support nesting in smmuv3_range_inval()
hw/arm/smmu-common: Support nested translation
hw/arm/smmu-common: Add support for nested TLB
hw/arm/smmu-common: Rework TLB lookup for nesting
hw/arm/smmuv3: Translate CD and TT using stage-2 table
hw/arm/smmu: Introduce CACHED_ENTRY_TO_ADDR
hw/arm/smmu: Consolidate ASID and VMID types
hw/arm/smmu: Split smmuv3_translate()
hw/arm/smmu: Use enum for SMMU stage
...
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r-- | hw/arm/smmu-common.c | 312 | ||||
-rw-r--r-- | hw/arm/smmuv3-internal.h | 19 | ||||
-rw-r--r-- | hw/arm/smmuv3.c | 467 | ||||
-rw-r--r-- | hw/arm/trace-events | 26 | ||||
-rw-r--r-- | hw/display/bcm2835_fb.c | 2 | ||||
-rw-r--r-- | include/hw/arm/smmu-common.h | 46 | ||||
-rw-r--r-- | target/arm/hvf/hvf.c | 1 | ||||
-rw-r--r-- | target/arm/tcg/a64.decode | 2 | ||||
-rw-r--r-- | target/arm/tcg/sme_helper.c | 2 | ||||
-rw-r--r-- | target/arm/tcg/translate-a64.c | 2 | ||||
-rw-r--r-- | target/arm/tcg/translate-sme.c | 12 | ||||
-rw-r--r-- | tests/qtest/arm-cpu-features.c | 13 | ||||
-rw-r--r-- | tests/tcg/aarch64/Makefile.target | 5 | ||||
-rw-r--r-- | tests/tcg/aarch64/sme-fmopa-1.c | 63 | ||||
-rw-r--r-- | tests/tcg/aarch64/sme-fmopa-2.c | 56 | ||||
-rw-r--r-- | tests/tcg/aarch64/sme-fmopa-3.c | 63 |
16 files changed, 846 insertions, 245 deletions
diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c index b6601cc..d73ad62 100644 --- a/hw/arm/smmu-common.c +++ b/hw/arm/smmu-common.c @@ -57,7 +57,7 @@ static gboolean smmu_iotlb_key_equal(gconstpointer v1, gconstpointer v2) (k1->vmid == k2->vmid); } -SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint16_t vmid, uint64_t iova, +SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova, uint8_t tg, uint8_t level) { SMMUIOTLBKey key = {.asid = asid, .vmid = vmid, .iova = iova, @@ -66,8 +66,10 @@ SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint16_t vmid, uint64_t iova, return key; } -SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, - SMMUTransTableInfo *tt, hwaddr iova) +static SMMUTLBEntry *smmu_iotlb_lookup_all_levels(SMMUState *bs, + SMMUTransCfg *cfg, + SMMUTransTableInfo *tt, + hwaddr iova) { uint8_t tg = (tt->granule_sz - 10) / 2; uint8_t inputsize = 64 - tt->tsz; @@ -88,6 +90,36 @@ SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, } level++; } + return entry; +} + +/** + * smmu_iotlb_lookup - Look up for a TLB entry. + * @bs: SMMU state which includes the TLB instance + * @cfg: Configuration of the translation + * @tt: Translation table info (granule and tsz) + * @iova: IOVA address to lookup + * + * returns a valid entry on success, otherwise NULL. + * In case of nested translation, tt can be updated to include + * the granule of the found entry as it might different from + * the IOVA granule. + */ +SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, + SMMUTransTableInfo *tt, hwaddr iova) +{ + SMMUTLBEntry *entry = NULL; + + entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova); + /* + * For nested translation also try the s2 granule, as the TLB will insert + * it if the size of s2 tlb entry was smaller. + */ + if (!entry && (cfg->stage == SMMU_NESTED) && + (cfg->s2cfg.granule_sz != tt->granule_sz)) { + tt->granule_sz = cfg->s2cfg.granule_sz; + entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova); + } if (entry) { cfg->iotlb_hits++; @@ -127,24 +159,35 @@ void smmu_iotlb_inv_all(SMMUState *s) g_hash_table_remove_all(s->iotlb); } -static gboolean smmu_hash_remove_by_asid(gpointer key, gpointer value, - gpointer user_data) +static gboolean smmu_hash_remove_by_asid_vmid(gpointer key, gpointer value, + gpointer user_data) { - uint16_t asid = *(uint16_t *)user_data; + SMMUIOTLBPageInvInfo *info = (SMMUIOTLBPageInvInfo *)user_data; SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key; - return SMMU_IOTLB_ASID(*iotlb_key) == asid; + return (SMMU_IOTLB_ASID(*iotlb_key) == info->asid) && + (SMMU_IOTLB_VMID(*iotlb_key) == info->vmid); } static gboolean smmu_hash_remove_by_vmid(gpointer key, gpointer value, gpointer user_data) { - uint16_t vmid = *(uint16_t *)user_data; + int vmid = *(int *)user_data; SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key; return SMMU_IOTLB_VMID(*iotlb_key) == vmid; } +static gboolean smmu_hash_remove_by_vmid_s1(gpointer key, gpointer value, + gpointer user_data) +{ + int vmid = *(int *)user_data; + SMMUIOTLBKey *iotlb_key = (SMMUIOTLBKey *)key; + + return (SMMU_IOTLB_VMID(*iotlb_key) == vmid) && + (SMMU_IOTLB_ASID(*iotlb_key) >= 0); +} + static gboolean smmu_hash_remove_by_asid_vmid_iova(gpointer key, gpointer value, gpointer user_data) { @@ -163,6 +206,25 @@ static gboolean smmu_hash_remove_by_asid_vmid_iova(gpointer key, gpointer value, ((entry->iova & ~info->mask) == info->iova); } +static gboolean smmu_hash_remove_by_vmid_ipa(gpointer key, gpointer value, + gpointer user_data) +{ + SMMUTLBEntry *iter = (SMMUTLBEntry *)value; + IOMMUTLBEntry *entry = &iter->entry; + SMMUIOTLBPageInvInfo *info = (SMMUIOTLBPageInvInfo *)user_data; + SMMUIOTLBKey iotlb_key = *(SMMUIOTLBKey *)key; + + if (SMMU_IOTLB_ASID(iotlb_key) >= 0) { + /* This is a stage-1 address. */ + return false; + } + if (info->vmid != SMMU_IOTLB_VMID(iotlb_key)) { + return false; + } + return ((info->iova & ~entry->addr_mask) == entry->iova) || + ((entry->iova & ~info->mask) == info->iova); +} + void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova, uint8_t tg, uint64_t num_pages, uint8_t ttl) { @@ -191,18 +253,57 @@ void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova, &info); } -void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid) +/* + * Similar to smmu_iotlb_inv_iova(), but for Stage-2, ASID is always -1, + * in Stage-1 invalidation ASID = -1, means don't care. + */ +void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, dma_addr_t ipa, uint8_t tg, + uint64_t num_pages, uint8_t ttl) { - trace_smmu_iotlb_inv_asid(asid); - g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid, &asid); + uint8_t granule = tg ? tg * 2 + 10 : 12; + int asid = -1; + + if (ttl && (num_pages == 1)) { + SMMUIOTLBKey key = smmu_get_iotlb_key(asid, vmid, ipa, tg, ttl); + + if (g_hash_table_remove(s->iotlb, &key)) { + return; + } + } + + SMMUIOTLBPageInvInfo info = { + .iova = ipa, + .vmid = vmid, + .mask = (num_pages << granule) - 1}; + + g_hash_table_foreach_remove(s->iotlb, + smmu_hash_remove_by_vmid_ipa, + &info); } -void smmu_iotlb_inv_vmid(SMMUState *s, uint16_t vmid) +void smmu_iotlb_inv_asid_vmid(SMMUState *s, int asid, int vmid) +{ + SMMUIOTLBPageInvInfo info = { + .asid = asid, + .vmid = vmid, + }; + + trace_smmu_iotlb_inv_asid_vmid(asid, vmid); + g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid_vmid, &info); +} + +void smmu_iotlb_inv_vmid(SMMUState *s, int vmid) { trace_smmu_iotlb_inv_vmid(vmid); g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid, &vmid); } +inline void smmu_iotlb_inv_vmid_s1(SMMUState *s, int vmid) +{ + trace_smmu_iotlb_inv_vmid_s1(vmid); + g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid_s1, &vmid); +} + /* VMSAv8-64 Translation */ /** @@ -286,8 +387,41 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova) return NULL; } +/* Translate stage-1 table address using stage-2 page table. */ +static inline int translate_table_addr_ipa(SMMUState *bs, + dma_addr_t *table_addr, + SMMUTransCfg *cfg, + SMMUPTWEventInfo *info) +{ + dma_addr_t addr = *table_addr; + SMMUTLBEntry *cached_entry; + int asid; + + /* + * The translation table walks performed from TTB0 or TTB1 are always + * performed in IPA space if stage 2 translations are enabled. + */ + asid = cfg->asid; + cfg->stage = SMMU_STAGE_2; + cfg->asid = -1; + cached_entry = smmu_translate(bs, cfg, addr, IOMMU_RO, info); + cfg->asid = asid; + cfg->stage = SMMU_NESTED; + + if (cached_entry) { + *table_addr = CACHED_ENTRY_TO_ADDR(cached_entry, addr); + return 0; + } + + info->stage = SMMU_STAGE_2; + info->addr = addr; + info->is_ipa_descriptor = true; + return -EINVAL; +} + /** * smmu_ptw_64_s1 - VMSAv8-64 Walk of the page tables for a given IOVA + * @bs: smmu state which includes TLB instance * @cfg: translation config * @iova: iova to translate * @perm: access type @@ -299,12 +433,12 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, dma_addr_t iova) * Upon success, @tlbe is filled with translated_addr and entry * permission rights. */ -static int smmu_ptw_64_s1(SMMUTransCfg *cfg, +static int smmu_ptw_64_s1(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) { dma_addr_t baseaddr, indexmask; - int stage = cfg->stage; + SMMUStage stage = cfg->stage; SMMUTransTableInfo *tt = select_tt(cfg, iova); uint8_t level, granule_sz, inputsize, stride; @@ -318,7 +452,8 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg, inputsize = 64 - tt->tsz; level = 4 - (inputsize - 4) / stride; indexmask = VMSA_IDXMSK(inputsize, stride, level); - baseaddr = extract64(tt->ttb, 0, 48); + + baseaddr = extract64(tt->ttb, 0, cfg->oas); baseaddr &= ~indexmask; while (level < VMSA_LEVELS) { @@ -349,6 +484,11 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg, goto error; } baseaddr = get_table_pte_address(pte, granule_sz); + if (cfg->stage == SMMU_NESTED) { + if (translate_table_addr_ipa(bs, &baseaddr, cfg, info)) { + goto error; + } + } level++; continue; } else if (is_page_pte(pte, level)) { @@ -381,10 +521,21 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg, goto error; } + /* + * The address output from the translation causes a stage 1 Address + * Size fault if it exceeds the range of the effective IPA size for + * the given CD. + */ + if (gpa >= (1ULL << cfg->oas)) { + info->type = SMMU_PTW_ERR_ADDR_SIZE; + goto error; + } + tlbe->entry.translated_addr = gpa; tlbe->entry.iova = iova & ~mask; tlbe->entry.addr_mask = mask; - tlbe->entry.perm = PTE_AP_TO_PERM(ap); + tlbe->parent_perm = PTE_AP_TO_PERM(ap); + tlbe->entry.perm = tlbe->parent_perm; tlbe->level = level; tlbe->granule = granule_sz; return 0; @@ -392,7 +543,7 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg, info->type = SMMU_PTW_ERR_TRANSLATION; error: - info->stage = 1; + info->stage = SMMU_STAGE_1; tlbe->entry.perm = IOMMU_NONE; return -EINVAL; } @@ -415,7 +566,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg, dma_addr_t ipa, IOMMUAccessFlags perm, SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) { - const int stage = 2; + const SMMUStage stage = SMMU_STAGE_2; int granule_sz = cfg->s2cfg.granule_sz; /* ARM DDI0487I.a: Table D8-7. */ int inputsize = 64 - cfg->s2cfg.tsz; @@ -426,8 +577,8 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg, * Get the ttb from concatenated structure. * The offset is the idx * size of each ttb(number of ptes * (sizeof(pte)) */ - uint64_t baseaddr = extract64(cfg->s2cfg.vttb, 0, 48) + (1 << stride) * - idx * sizeof(uint64_t); + uint64_t baseaddr = extract64(cfg->s2cfg.vttb, 0, cfg->s2cfg.eff_ps) + + (1 << stride) * idx * sizeof(uint64_t); dma_addr_t indexmask = VMSA_IDXMSK(inputsize, stride, level); baseaddr &= ~indexmask; @@ -438,7 +589,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg, */ if (ipa >= (1ULL << inputsize)) { info->type = SMMU_PTW_ERR_TRANSLATION; - goto error; + goto error_ipa; } while (level < VMSA_LEVELS) { @@ -484,13 +635,13 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg, */ if (!PTE_AF(pte) && !cfg->s2cfg.affd) { info->type = SMMU_PTW_ERR_ACCESS; - goto error; + goto error_ipa; } s2ap = PTE_AP(pte); if (is_permission_fault_s2(s2ap, perm)) { info->type = SMMU_PTW_ERR_PERMISSION; - goto error; + goto error_ipa; } /* @@ -499,28 +650,54 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg, */ if (gpa >= (1ULL << cfg->s2cfg.eff_ps)) { info->type = SMMU_PTW_ERR_ADDR_SIZE; - goto error; + goto error_ipa; } tlbe->entry.translated_addr = gpa; tlbe->entry.iova = ipa & ~mask; tlbe->entry.addr_mask = mask; - tlbe->entry.perm = s2ap; + tlbe->parent_perm = s2ap; + tlbe->entry.perm = tlbe->parent_perm; tlbe->level = level; tlbe->granule = granule_sz; return 0; } info->type = SMMU_PTW_ERR_TRANSLATION; +error_ipa: + info->addr = ipa; error: - info->stage = 2; + info->stage = SMMU_STAGE_2; tlbe->entry.perm = IOMMU_NONE; return -EINVAL; } +/* + * combine S1 and S2 TLB entries into a single entry. + * As a result the S1 entry is overriden with combined data. + */ +static void combine_tlb(SMMUTLBEntry *tlbe, SMMUTLBEntry *tlbe_s2, + dma_addr_t iova, SMMUTransCfg *cfg) +{ + if (tlbe_s2->entry.addr_mask < tlbe->entry.addr_mask) { + tlbe->entry.addr_mask = tlbe_s2->entry.addr_mask; + tlbe->granule = tlbe_s2->granule; + tlbe->level = tlbe_s2->level; + } + + tlbe->entry.translated_addr = CACHED_ENTRY_TO_ADDR(tlbe_s2, + tlbe->entry.translated_addr); + + tlbe->entry.iova = iova & ~tlbe->entry.addr_mask; + /* parent_perm has s2 perm while perm keeps s1 perm. */ + tlbe->parent_perm = tlbe_s2->entry.perm; + return; +} + /** * smmu_ptw - Walk the page tables for an IOVA, according to @cfg * + * @bs: smmu state which includes TLB instance * @cfg: translation configuration * @iova: iova to translate * @perm: tentative access type @@ -529,12 +706,16 @@ error: * * return 0 on success */ -int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, - SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) +int smmu_ptw(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t iova, + IOMMUAccessFlags perm, SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info) { - if (cfg->stage == 1) { - return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info); - } else if (cfg->stage == 2) { + int ret; + SMMUTLBEntry tlbe_s2; + dma_addr_t ipa; + + if (cfg->stage == SMMU_STAGE_1) { + return smmu_ptw_64_s1(bs, cfg, iova, perm, tlbe, info); + } else if (cfg->stage == SMMU_STAGE_2) { /* * If bypassing stage 1(or unimplemented), the input address is passed * directly to stage 2 as IPA. If the input address of a transaction @@ -543,7 +724,7 @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, */ if (iova >= (1ULL << cfg->oas)) { info->type = SMMU_PTW_ERR_ADDR_SIZE; - info->stage = 1; + info->stage = SMMU_STAGE_1; tlbe->entry.perm = IOMMU_NONE; return -EINVAL; } @@ -551,7 +732,72 @@ int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, return smmu_ptw_64_s2(cfg, iova, perm, tlbe, info); } - g_assert_not_reached(); + /* SMMU_NESTED. */ + ret = smmu_ptw_64_s1(bs, cfg, iova, perm, tlbe, info); + if (ret) { + return ret; + } + + ipa = CACHED_ENTRY_TO_ADDR(tlbe, iova); + ret = smmu_ptw_64_s2(cfg, ipa, perm, &tlbe_s2, info); + if (ret) { + return ret; + } + + combine_tlb(tlbe, &tlbe_s2, iova, cfg); + return 0; +} + +SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr, + IOMMUAccessFlags flag, SMMUPTWEventInfo *info) +{ + SMMUTLBEntry *cached_entry = NULL; + SMMUTransTableInfo *tt; + int status; + + /* + * Combined attributes used for TLB lookup, holds the attributes for + * the input stage. + */ + SMMUTransTableInfo tt_combined; + + if (cfg->stage == SMMU_STAGE_2) { + /* Stage2. */ + tt_combined.granule_sz = cfg->s2cfg.granule_sz; + tt_combined.tsz = cfg->s2cfg.tsz; + } else { + /* Select stage1 translation table. */ + tt = select_tt(cfg, addr); + if (!tt) { + info->type = SMMU_PTW_ERR_TRANSLATION; + info->stage = SMMU_STAGE_1; + return NULL; + } + tt_combined.granule_sz = tt->granule_sz; + tt_combined.tsz = tt->tsz; + } + + cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, addr); + if (cached_entry) { + if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & + cached_entry->parent_perm & IOMMU_WO)) { + info->type = SMMU_PTW_ERR_PERMISSION; + info->stage = !(cached_entry->entry.perm & IOMMU_WO) ? + SMMU_STAGE_1 : + SMMU_STAGE_2; + return NULL; + } + return cached_entry; + } + + cached_entry = g_new0(SMMUTLBEntry, 1); + status = smmu_ptw(bs, cfg, addr, flag, cached_entry, info); + if (status) { + g_free(cached_entry); + return NULL; + } + smmu_iotlb_insert(bs, cfg, cached_entry); + return cached_entry; } /** diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h index e4dd11e..0ebf2ee 100644 --- a/hw/arm/smmuv3-internal.h +++ b/hw/arm/smmuv3-internal.h @@ -32,6 +32,12 @@ typedef enum SMMUTranslationStatus { SMMU_TRANS_SUCCESS, } SMMUTranslationStatus; +typedef enum SMMUTranslationClass { + SMMU_CLASS_CD, + SMMU_CLASS_TT, + SMMU_CLASS_IN, +} SMMUTranslationClass; + /* MMIO Registers */ REG32(IDR0, 0x0) @@ -596,19 +602,6 @@ static inline int oas2bits(int oas_field) return -1; } -static inline int pa_range(STE *ste) -{ - int oas_field = MIN(STE_S2PS(ste), SMMU_IDR5_OAS); - - if (!STE_S2AA64(ste)) { - return 40; - } - - return oas2bits(oas_field); -} - -#define MAX_PA(ste) ((1 << pa_range(ste)) - 1) - /* CD fields */ #define CD_VALID(x) extract32((x)->word[0], 31, 1) diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c index 445e04d..3971976 100644 --- a/hw/arm/smmuv3.c +++ b/hw/arm/smmuv3.c @@ -34,8 +34,10 @@ #include "smmuv3-internal.h" #include "smmu-internal.h" -#define PTW_RECORD_FAULT(cfg) (((cfg)->stage == 1) ? (cfg)->record_faults : \ - (cfg)->s2cfg.record_faults) +#define PTW_RECORD_FAULT(ptw_info, cfg) (((ptw_info).stage == SMMU_STAGE_1 && \ + (cfg)->record_faults) || \ + ((ptw_info).stage == SMMU_STAGE_2 && \ + (cfg)->s2cfg.record_faults)) /** * smmuv3_trigger_irq - pulse @irq if enabled and update @@ -259,6 +261,9 @@ static void smmuv3_init_regs(SMMUv3State *s) /* Based on sys property, the stages supported in smmu will be advertised.*/ if (s->stage && !strcmp("2", s->stage)) { s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S2P, 1); + } else if (s->stage && !strcmp("nested", s->stage)) { + s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S1P, 1); + s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S2P, 1); } else { s->idr[0] = FIELD_DP32(s->idr[0], IDR0, S1P, 1); } @@ -336,14 +341,35 @@ static int smmu_get_ste(SMMUv3State *s, dma_addr_t addr, STE *buf, } +static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr, + SMMUTransCfg *cfg, + SMMUEventInfo *event, + IOMMUAccessFlags flag, + SMMUTLBEntry **out_entry, + SMMUTranslationClass class); /* @ssid > 0 not supported yet */ -static int smmu_get_cd(SMMUv3State *s, STE *ste, uint32_t ssid, - CD *buf, SMMUEventInfo *event) +static int smmu_get_cd(SMMUv3State *s, STE *ste, SMMUTransCfg *cfg, + uint32_t ssid, CD *buf, SMMUEventInfo *event) { dma_addr_t addr = STE_CTXPTR(ste); int ret, i; + SMMUTranslationStatus status; + SMMUTLBEntry *entry; trace_smmuv3_get_cd(addr); + + if (cfg->stage == SMMU_NESTED) { + status = smmuv3_do_translate(s, addr, cfg, event, + IOMMU_RO, &entry, SMMU_CLASS_CD); + + /* Same PTW faults are reported but with CLASS = CD. */ + if (status != SMMU_TRANS_SUCCESS) { + return -EINVAL; + } + + addr = CACHED_ENTRY_TO_ADDR(entry, addr); + } + /* TODO: guarantee 64-bit single-copy atomicity */ ret = dma_memory_read(&address_space_memory, addr, buf, sizeof(*buf), MEMTXATTRS_UNSPECIFIED); @@ -376,10 +402,10 @@ static bool s2t0sz_valid(SMMUTransCfg *cfg) } if (cfg->s2cfg.granule_sz == 16) { - return (cfg->s2cfg.tsz >= 64 - oas2bits(SMMU_IDR5_OAS)); + return (cfg->s2cfg.tsz >= 64 - cfg->s2cfg.eff_ps); } - return (cfg->s2cfg.tsz >= MAX(64 - oas2bits(SMMU_IDR5_OAS), 16)); + return (cfg->s2cfg.tsz >= MAX(64 - cfg->s2cfg.eff_ps, 16)); } /* @@ -400,9 +426,10 @@ static bool s2_pgtable_config_valid(uint8_t sl0, uint8_t t0sz, uint8_t gran) return nr_concat <= VMSA_MAX_S2_CONCAT; } -static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste) +static int decode_ste_s2_cfg(SMMUv3State *s, SMMUTransCfg *cfg, + STE *ste) { - cfg->stage = 2; + uint8_t oas = FIELD_EX32(s->idr[5], IDR5, OAS); if (STE_S2AA64(ste) == 0x0) { qemu_log_mask(LOG_UNIMP, @@ -436,7 +463,15 @@ static int decode_ste_s2_cfg(SMMUTransCfg *cfg, STE *ste) } /* For AA64, The effective S2PS size is capped to the OAS. */ - cfg->s2cfg.eff_ps = oas2bits(MIN(STE_S2PS(ste), SMMU_IDR5_OAS)); + cfg->s2cfg.eff_ps = oas2bits(MIN(STE_S2PS(ste), oas)); + /* + * For SMMUv3.1 and later, when OAS == IAS == 52, the stage 2 input + * range is further limited to 48 bits unless STE.S2TG indicates a + * 64KB granule. + */ + if (cfg->s2cfg.granule_sz != 16) { + cfg->s2cfg.eff_ps = MIN(cfg->s2cfg.eff_ps, 48); + } /* * It is ILLEGAL for the address in S2TTB to be outside the range * described by the effective S2PS value. @@ -486,11 +521,33 @@ bad_ste: return -EINVAL; } +static void decode_ste_config(SMMUTransCfg *cfg, uint32_t config) +{ + + if (STE_CFG_ABORT(config)) { + cfg->aborted = true; + return; + } + if (STE_CFG_BYPASS(config)) { + cfg->bypassed = true; + return; + } + + if (STE_CFG_S1_ENABLED(config)) { + cfg->stage = SMMU_STAGE_1; + } + + if (STE_CFG_S2_ENABLED(config)) { + cfg->stage |= SMMU_STAGE_2; + } +} + /* Returns < 0 in case of invalid STE, 0 otherwise */ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg, STE *ste, SMMUEventInfo *event) { uint32_t config; + uint8_t oas = FIELD_EX32(s->idr[5], IDR5, OAS); int ret; if (!STE_VALID(ste)) { @@ -502,13 +559,9 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg, config = STE_CONFIG(ste); - if (STE_CFG_ABORT(config)) { - cfg->aborted = true; - return 0; - } + decode_ste_config(cfg, config); - if (STE_CFG_BYPASS(config)) { - cfg->bypassed = true; + if (cfg->aborted || cfg->bypassed) { return 0; } @@ -538,8 +591,8 @@ static int decode_ste(SMMUv3State *s, SMMUTransCfg *cfg, * Stage-1 OAS defaults to OAS even if not enabled as it would be used * in input address check for stage-2. */ - cfg->oas = oas2bits(SMMU_IDR5_OAS); - ret = decode_ste_s2_cfg(cfg, ste); + cfg->oas = oas2bits(oas); + ret = decode_ste_s2_cfg(s, cfg, ste); if (ret) { goto bad_ste; } @@ -658,10 +711,14 @@ static int smmu_find_ste(SMMUv3State *s, uint32_t sid, STE *ste, return 0; } -static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event) +static int decode_cd(SMMUv3State *s, SMMUTransCfg *cfg, + CD *cd, SMMUEventInfo *event) { int ret = -EINVAL; int i; + SMMUTranslationStatus status; + SMMUTLBEntry *entry; + uint8_t oas = FIELD_EX32(s->idr[5], IDR5, OAS); if (!CD_VALID(cd) || !CD_AARCH64(cd)) { goto bad_cd; @@ -678,10 +735,9 @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event) /* we support only those at the moment */ cfg->aa64 = true; - cfg->stage = 1; cfg->oas = oas2bits(CD_IPS(cd)); - cfg->oas = MIN(oas2bits(SMMU_IDR5_OAS), cfg->oas); + cfg->oas = MIN(oas2bits(oas), cfg->oas); cfg->tbi = CD_TBI(cd); cfg->asid = CD_ASID(cd); cfg->affd = CD_AFFD(cd); @@ -710,11 +766,36 @@ static int decode_cd(SMMUTransCfg *cfg, CD *cd, SMMUEventInfo *event) goto bad_cd; } + /* + * An address greater than 48 bits in size can only be output from a + * TTD when, in SMMUv3.1 and later, the effective IPS is 52 and a 64KB + * granule is in use for that translation table + */ + if (tt->granule_sz != 16) { + cfg->oas = MIN(cfg->oas, 48); + } tt->tsz = tsz; tt->ttb = CD_TTB(cd, i); + if (tt->ttb & ~(MAKE_64BIT_MASK(0, cfg->oas))) { goto bad_cd; } + + /* Translate the TTBx, from IPA to PA if nesting is enabled. */ + if (cfg->stage == SMMU_NESTED) { + status = smmuv3_do_translate(s, tt->ttb, cfg, event, IOMMU_RO, + &entry, SMMU_CLASS_TT); + /* + * Same PTW faults are reported but with CLASS = TT. + * If TTBx is larger than the effective stage 1 output addres + * size, it reports C_BAD_CD, which is handled by the above case. + */ + if (status != SMMU_TRANS_SUCCESS) { + return -EINVAL; + } + tt->ttb = CACHED_ENTRY_TO_ADDR(entry, tt->ttb); + } + tt->had = CD_HAD(cd, i); trace_smmuv3_decode_cd_tt(i, tt->tsz, tt->ttb, tt->granule_sz, tt->had); } @@ -762,16 +843,16 @@ static int smmuv3_decode_config(IOMMUMemoryRegion *mr, SMMUTransCfg *cfg, return ret; } - if (cfg->aborted || cfg->bypassed || (cfg->stage == 2)) { + if (cfg->aborted || cfg->bypassed || (cfg->stage == SMMU_STAGE_2)) { return 0; } - ret = smmu_get_cd(s, &ste, 0 /* ssid */, &cd, event); + ret = smmu_get_cd(s, &ste, cfg, 0 /* ssid */, &cd, event); if (ret) { return ret; } - return decode_cd(cfg, &cd, event); + return decode_cd(s, cfg, &cd, event); } /** @@ -826,6 +907,133 @@ static void smmuv3_flush_config(SMMUDevice *sdev) g_hash_table_remove(bc->configs, sdev); } +/* Do translation with TLB lookup. */ +static SMMUTranslationStatus smmuv3_do_translate(SMMUv3State *s, hwaddr addr, + SMMUTransCfg *cfg, + SMMUEventInfo *event, + IOMMUAccessFlags flag, + SMMUTLBEntry **out_entry, + SMMUTranslationClass class) +{ + SMMUPTWEventInfo ptw_info = {}; + SMMUState *bs = ARM_SMMU(s); + SMMUTLBEntry *cached_entry = NULL; + int asid, stage; + bool desc_s2_translation = class != SMMU_CLASS_IN; + + /* + * The function uses the argument class to identify which stage is used: + * - CLASS = IN: Means an input translation, determine the stage from STE. + * - CLASS = CD: Means the addr is an IPA of the CD, and it would be + * translated using the stage-2. + * - CLASS = TT: Means the addr is an IPA of the stage-1 translation table + * and it would be translated using the stage-2. + * For the last 2 cases instead of having intrusive changes in the common + * logic, we modify the cfg to be a stage-2 translation only in case of + * nested, and then restore it after. + */ + if (desc_s2_translation) { + asid = cfg->asid; + stage = cfg->stage; + cfg->asid = -1; + cfg->stage = SMMU_STAGE_2; + } + + cached_entry = smmu_translate(bs, cfg, addr, flag, &ptw_info); + + if (desc_s2_translation) { + cfg->asid = asid; + cfg->stage = stage; + } + + if (!cached_entry) { + /* All faults from PTW has S2 field. */ + event->u.f_walk_eabt.s2 = (ptw_info.stage == SMMU_STAGE_2); + /* + * Fault class is set as follows based on "class" input to + * the function and to "ptw_info" from "smmu_translate()" + * For stage-1: + * - EABT => CLASS_TT (hardcoded) + * - other events => CLASS_IN (input to function) + * For stage-2 => CLASS_IN (input to function) + * For nested, for all events: + * - CD fetch => CLASS_CD (input to function) + * - walking stage 1 translation table => CLASS_TT (from + * is_ipa_descriptor or input in case of TTBx) + * - s2 translation => CLASS_IN (input to function) + */ + class = ptw_info.is_ipa_descriptor ? SMMU_CLASS_TT : class; + switch (ptw_info.type) { + case SMMU_PTW_ERR_WALK_EABT: + event->type = SMMU_EVT_F_WALK_EABT; + event->u.f_walk_eabt.rnw = flag & 0x1; + event->u.f_walk_eabt.class = (ptw_info.stage == SMMU_STAGE_2) ? + class : SMMU_CLASS_TT; + event->u.f_walk_eabt.addr2 = ptw_info.addr; + break; + case SMMU_PTW_ERR_TRANSLATION: + if (PTW_RECORD_FAULT(ptw_info, cfg)) { + event->type = SMMU_EVT_F_TRANSLATION; + event->u.f_translation.addr2 = ptw_info.addr; + event->u.f_translation.class = class; + event->u.f_translation.rnw = flag & 0x1; + } + break; + case SMMU_PTW_ERR_ADDR_SIZE: + if (PTW_RECORD_FAULT(ptw_info, cfg)) { + event->type = SMMU_EVT_F_ADDR_SIZE; + event->u.f_addr_size.addr2 = ptw_info.addr; + event->u.f_addr_size.class = class; + event->u.f_addr_size.rnw = flag & 0x1; + } + break; + case SMMU_PTW_ERR_ACCESS: + if (PTW_RECORD_FAULT(ptw_info, cfg)) { + event->type = SMMU_EVT_F_ACCESS; + event->u.f_access.addr2 = ptw_info.addr; + event->u.f_access.class = class; + event->u.f_access.rnw = flag & 0x1; + } + break; + case SMMU_PTW_ERR_PERMISSION: + if (PTW_RECORD_FAULT(ptw_info, cfg)) { + event->type = SMMU_EVT_F_PERMISSION; + event->u.f_permission.addr2 = ptw_info.addr; + event->u.f_permission.class = class; + event->u.f_permission.rnw = flag & 0x1; + } + break; + default: + g_assert_not_reached(); + } + return SMMU_TRANS_ERROR; + } + *out_entry = cached_entry; + return SMMU_TRANS_SUCCESS; +} + +/* + * Sets the InputAddr for an SMMU_TRANS_ERROR, as it can't be + * set from all contexts, as smmuv3_get_config() can return + * translation faults in case of nested translation (for CD + * and TTBx). But in that case the iova is not known. + */ +static void smmuv3_fixup_event(SMMUEventInfo *event, hwaddr iova) +{ + switch (event->type) { + case SMMU_EVT_F_WALK_EABT: + case SMMU_EVT_F_TRANSLATION: + case SMMU_EVT_F_ADDR_SIZE: + case SMMU_EVT_F_ACCESS: + case SMMU_EVT_F_PERMISSION: + event->u.f_walk_eabt.addr = iova; + break; + default: + break; + } +} + +/* Entry point to SMMU, does everything. */ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, IOMMUAccessFlags flag, int iommu_idx) { @@ -835,12 +1043,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, SMMUEventInfo event = {.type = SMMU_EVT_NONE, .sid = sid, .inval_ste_allowed = false}; - SMMUPTWEventInfo ptw_info = {}; SMMUTranslationStatus status; - SMMUState *bs = ARM_SMMU(s); - uint64_t page_mask, aligned_addr; - SMMUTLBEntry *cached_entry = NULL; - SMMUTransTableInfo *tt; SMMUTransCfg *cfg = NULL; IOMMUTLBEntry entry = { .target_as = &address_space_memory, @@ -849,11 +1052,7 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, .addr_mask = ~(hwaddr)0, .perm = IOMMU_NONE, }; - /* - * Combined attributes used for TLB lookup, as only one stage is supported, - * it will hold attributes based on the enabled stage. - */ - SMMUTransTableInfo tt_combined; + SMMUTLBEntry *cached_entry = NULL; qemu_mutex_lock(&s->mutex); @@ -882,116 +1081,19 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion *mr, hwaddr addr, goto epilogue; } - if (cfg->stage == 1) { - /* Select stage1 translation table. */ - tt = select_tt(cfg, addr); - if (!tt) { - if (cfg->record_faults) { - event.type = SMMU_EVT_F_TRANSLATION; - event.u.f_translation.addr = addr; - event.u.f_translation.rnw = flag & 0x1; - } - status = SMMU_TRANS_ERROR; - goto epilogue; - } - tt_combined.granule_sz = tt->granule_sz; - tt_combined.tsz = tt->tsz; - - } else { - /* Stage2. */ - tt_combined.granule_sz = cfg->s2cfg.granule_sz; - tt_combined.tsz = cfg->s2cfg.tsz; - } - /* - * TLB lookup looks for granule and input size for a translation stage, - * as only one stage is supported right now, choose the right values - * from the configuration. - */ - page_mask = (1ULL << tt_combined.granule_sz) - 1; - aligned_addr = addr & ~page_mask; - - cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, aligned_addr); - if (cached_entry) { - if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) { - status = SMMU_TRANS_ERROR; - /* - * We know that the TLB only contains either stage-1 or stage-2 as - * nesting is not supported. So it is sufficient to check the - * translation stage to know the TLB stage for now. - */ - event.u.f_walk_eabt.s2 = (cfg->stage == 2); - if (PTW_RECORD_FAULT(cfg)) { - event.type = SMMU_EVT_F_PERMISSION; - event.u.f_permission.addr = addr; - event.u.f_permission.rnw = flag & 0x1; - } - } else { - status = SMMU_TRANS_SUCCESS; - } - goto epilogue; - } - - cached_entry = g_new0(SMMUTLBEntry, 1); - - if (smmu_ptw(cfg, aligned_addr, flag, cached_entry, &ptw_info)) { - /* All faults from PTW has S2 field. */ - event.u.f_walk_eabt.s2 = (ptw_info.stage == 2); - g_free(cached_entry); - switch (ptw_info.type) { - case SMMU_PTW_ERR_WALK_EABT: - event.type = SMMU_EVT_F_WALK_EABT; - event.u.f_walk_eabt.addr = addr; - event.u.f_walk_eabt.rnw = flag & 0x1; - event.u.f_walk_eabt.class = 0x1; - event.u.f_walk_eabt.addr2 = ptw_info.addr; - break; - case SMMU_PTW_ERR_TRANSLATION: - if (PTW_RECORD_FAULT(cfg)) { - event.type = SMMU_EVT_F_TRANSLATION; - event.u.f_translation.addr = addr; - event.u.f_translation.rnw = flag & 0x1; - } - break; - case SMMU_PTW_ERR_ADDR_SIZE: - if (PTW_RECORD_FAULT(cfg)) { - event.type = SMMU_EVT_F_ADDR_SIZE; - event.u.f_addr_size.addr = addr; - event.u.f_addr_size.rnw = flag & 0x1; - } - break; - case SMMU_PTW_ERR_ACCESS: - if (PTW_RECORD_FAULT(cfg)) { - event.type = SMMU_EVT_F_ACCESS; - event.u.f_access.addr = addr; - event.u.f_access.rnw = flag & 0x1; - } - break; - case SMMU_PTW_ERR_PERMISSION: - if (PTW_RECORD_FAULT(cfg)) { - event.type = SMMU_EVT_F_PERMISSION; - event.u.f_permission.addr = addr; - event.u.f_permission.rnw = flag & 0x1; - } - break; - default: - g_assert_not_reached(); - } - status = SMMU_TRANS_ERROR; - } else { - smmu_iotlb_insert(bs, cfg, cached_entry); - status = SMMU_TRANS_SUCCESS; - } + status = smmuv3_do_translate(s, addr, cfg, &event, flag, + &cached_entry, SMMU_CLASS_IN); epilogue: qemu_mutex_unlock(&s->mutex); switch (status) { case SMMU_TRANS_SUCCESS: entry.perm = cached_entry->entry.perm; - entry.translated_addr = cached_entry->entry.translated_addr + - (addr & cached_entry->entry.addr_mask); + entry.translated_addr = CACHED_ENTRY_TO_ADDR(cached_entry, addr); entry.addr_mask = cached_entry->entry.addr_mask; trace_smmuv3_translate_success(mr->parent_obj.name, sid, addr, - entry.translated_addr, entry.perm); + entry.translated_addr, entry.perm, + cfg->stage); break; case SMMU_TRANS_DISABLE: entry.perm = flag; @@ -1011,6 +1113,7 @@ epilogue: entry.perm); break; case SMMU_TRANS_ERROR: + smmuv3_fixup_event(&event, addr); qemu_log_mask(LOG_GUEST_ERROR, "%s translation failed for iova=0x%"PRIx64" (%s)\n", mr->parent_obj.name, addr, smmu_event_string(event.type)); @@ -1032,27 +1135,38 @@ epilogue: * @iova: iova * @tg: translation granule (if communicated through range invalidation) * @num_pages: number of @granule sized pages (if tg != 0), otherwise 1 + * @stage: Which stage(1 or 2) is used */ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, IOMMUNotifier *n, int asid, int vmid, dma_addr_t iova, uint8_t tg, - uint64_t num_pages) + uint64_t num_pages, int stage) { SMMUDevice *sdev = container_of(mr, SMMUDevice, iommu); + SMMUEventInfo eventinfo = {.inval_ste_allowed = true}; + SMMUTransCfg *cfg = smmuv3_get_config(sdev, &eventinfo); IOMMUTLBEvent event; uint8_t granule; - SMMUv3State *s = sdev->smmu; + + if (!cfg) { + return; + } + + /* + * stage is passed from TLB invalidation commands which can be either + * stage-1 or stage-2. + * However, IOMMUTLBEvent only understands IOVA, for stage-1 or stage-2 + * SMMU instances we consider the input address as the IOVA, but when + * nesting is used, we can't mix stage-1 and stage-2 addresses, so for + * nesting only stage-1 is considered the IOVA and would be notified. + */ + if ((stage == SMMU_STAGE_2) && (cfg->stage == SMMU_NESTED)) + return; if (!tg) { - SMMUEventInfo eventinfo = {.inval_ste_allowed = true}; - SMMUTransCfg *cfg = smmuv3_get_config(sdev, &eventinfo); SMMUTransTableInfo *tt; - if (!cfg) { - return; - } - if (asid >= 0 && cfg->asid != asid) { return; } @@ -1061,7 +1175,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, return; } - if (STAGE1_SUPPORTED(s)) { + if (stage == SMMU_STAGE_1) { tt = select_tt(cfg, iova); if (!tt) { return; @@ -1087,7 +1201,7 @@ static void smmuv3_notify_iova(IOMMUMemoryRegion *mr, /* invalidate an asid/vmid/iova range tuple in all mr's */ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova, uint8_t tg, - uint64_t num_pages) + uint64_t num_pages, int stage) { SMMUDevice *sdev; @@ -1096,15 +1210,15 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int asid, int vmid, IOMMUNotifier *n; trace_smmuv3_inv_notifiers_iova(mr->parent_obj.name, asid, vmid, - iova, tg, num_pages); + iova, tg, num_pages, stage); IOMMU_NOTIFIER_FOREACH(n, mr) { - smmuv3_notify_iova(mr, n, asid, vmid, iova, tg, num_pages); + smmuv3_notify_iova(mr, n, asid, vmid, iova, tg, num_pages, stage); } } } -static void smmuv3_range_inval(SMMUState *s, Cmd *cmd) +static void smmuv3_range_inval(SMMUState *s, Cmd *cmd, SMMUStage stage) { dma_addr_t end, addr = CMD_ADDR(cmd); uint8_t type = CMD_TYPE(cmd); @@ -1129,9 +1243,13 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd) } if (!tg) { - trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf); - smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, 1); - smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, 1, ttl); + trace_smmuv3_range_inval(vmid, asid, addr, tg, 1, ttl, leaf, stage); + smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, 1, stage); + if (stage == SMMU_STAGE_1) { + smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, 1, ttl); + } else { + smmu_iotlb_inv_ipa(s, vmid, addr, tg, 1, ttl); + } return; } @@ -1147,9 +1265,14 @@ static void smmuv3_range_inval(SMMUState *s, Cmd *cmd) uint64_t mask = dma_aligned_pow2_mask(addr, end, 64); num_pages = (mask + 1) >> granule; - trace_smmuv3_range_inval(vmid, asid, addr, tg, num_pages, ttl, leaf); - smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, num_pages); - smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, num_pages, ttl); + trace_smmuv3_range_inval(vmid, asid, addr, tg, num_pages, + ttl, leaf, stage); + smmuv3_inv_notifiers_iova(s, asid, vmid, addr, tg, num_pages, stage); + if (stage == SMMU_STAGE_1) { + smmu_iotlb_inv_iova(s, asid, vmid, addr, tg, num_pages, ttl); + } else { + smmu_iotlb_inv_ipa(s, vmid, addr, tg, num_pages, ttl); + } addr += mask + 1; } } @@ -1275,26 +1398,50 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) } case SMMU_CMD_TLBI_NH_ASID: { - uint16_t asid = CMD_ASID(&cmd); + int asid = CMD_ASID(&cmd); + int vmid = -1; if (!STAGE1_SUPPORTED(s)) { cmd_error = SMMU_CERROR_ILL; break; } + /* + * VMID is only matched when stage 2 is supported, otherwise set it + * to -1 as the value used for stage-1 only VMIDs. + */ + if (STAGE2_SUPPORTED(s)) { + vmid = CMD_VMID(&cmd); + } + trace_smmuv3_cmdq_tlbi_nh_asid(asid); smmu_inv_notifiers_all(&s->smmu_state); - smmu_iotlb_inv_asid(bs, asid); + smmu_iotlb_inv_asid_vmid(bs, asid, vmid); break; } case SMMU_CMD_TLBI_NH_ALL: + { + int vmid = -1; + if (!STAGE1_SUPPORTED(s)) { cmd_error = SMMU_CERROR_ILL; break; } + + /* + * If stage-2 is supported, invalidate for this VMID only, otherwise + * invalidate the whole thing. + */ + if (STAGE2_SUPPORTED(s)) { + vmid = CMD_VMID(&cmd); + trace_smmuv3_cmdq_tlbi_nh(vmid); + smmu_iotlb_inv_vmid_s1(bs, vmid); + break; + } QEMU_FALLTHROUGH; + } case SMMU_CMD_TLBI_NSNH_ALL: - trace_smmuv3_cmdq_tlbi_nh(); + trace_smmuv3_cmdq_tlbi_nsnh(); smmu_inv_notifiers_all(&s->smmu_state); smmu_iotlb_inv_all(bs); break; @@ -1304,11 +1451,11 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) cmd_error = SMMU_CERROR_ILL; break; } - smmuv3_range_inval(bs, &cmd); + smmuv3_range_inval(bs, &cmd, SMMU_STAGE_1); break; case SMMU_CMD_TLBI_S12_VMALL: { - uint16_t vmid = CMD_VMID(&cmd); + int vmid = CMD_VMID(&cmd); if (!STAGE2_SUPPORTED(s)) { cmd_error = SMMU_CERROR_ILL; @@ -1329,7 +1476,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s) * As currently only either s1 or s2 are supported * we can reuse same function for s2. */ - smmuv3_range_inval(bs, &cmd); + smmuv3_range_inval(bs, &cmd, SMMU_STAGE_2); break; case SMMU_CMD_TLBI_EL3_ALL: case SMMU_CMD_TLBI_EL3_VA: diff --git a/hw/arm/trace-events b/hw/arm/trace-events index f1a54a0..be6c8f7 100644 --- a/hw/arm/trace-events +++ b/hw/arm/trace-events @@ -11,13 +11,14 @@ smmu_ptw_page_pte(int stage, int level, uint64_t iova, uint64_t baseaddr, uint6 smmu_ptw_block_pte(int stage, int level, uint64_t baseaddr, uint64_t pteaddr, uint64_t pte, uint64_t iova, uint64_t gpa, int bsize_mb) "stage=%d level=%d base@=0x%"PRIx64" pte@=0x%"PRIx64" pte=0x%"PRIx64" iova=0x%"PRIx64" block address = 0x%"PRIx64" block size = %d MiB" smmu_get_pte(uint64_t baseaddr, int index, uint64_t pteaddr, uint64_t pte) "baseaddr=0x%"PRIx64" index=0x%x, pteaddr=0x%"PRIx64", pte=0x%"PRIx64 smmu_iotlb_inv_all(void) "IOTLB invalidate all" -smmu_iotlb_inv_asid(uint16_t asid) "IOTLB invalidate asid=%d" -smmu_iotlb_inv_vmid(uint16_t vmid) "IOTLB invalidate vmid=%d" -smmu_iotlb_inv_iova(uint16_t asid, uint64_t addr) "IOTLB invalidate asid=%d addr=0x%"PRIx64 +smmu_iotlb_inv_asid_vmid(int asid, int vmid) "IOTLB invalidate asid=%d vmid=%d" +smmu_iotlb_inv_vmid(int vmid) "IOTLB invalidate vmid=%d" +smmu_iotlb_inv_vmid_s1(int vmid) "IOTLB invalidate vmid=%d" +smmu_iotlb_inv_iova(int asid, uint64_t addr) "IOTLB invalidate asid=%d addr=0x%"PRIx64 smmu_inv_notifiers_mr(const char *name) "iommu mr=%s" -smmu_iotlb_lookup_hit(uint16_t asid, uint16_t vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" -smmu_iotlb_lookup_miss(uint16_t asid, uint16_t vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" -smmu_iotlb_insert(uint16_t asid, uint16_t vmid, uint64_t addr, uint8_t tg, uint8_t level) "IOTLB ++ asid=%d vmid=%d addr=0x%"PRIx64" tg=%d level=%d" +smmu_iotlb_lookup_hit(int asid, int vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" +smmu_iotlb_lookup_miss(int asid, int vmid, uint64_t addr, uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d vmid=%d addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d" +smmu_iotlb_insert(int asid, int vmid, uint64_t addr, uint8_t tg, uint8_t level) "IOTLB ++ asid=%d vmid=%d addr=0x%"PRIx64" tg=%d level=%d" # smmuv3.c smmuv3_read_mmio(uint64_t addr, uint64_t val, unsigned size, uint32_t r) "addr: 0x%"PRIx64" val:0x%"PRIx64" size: 0x%x(%d)" @@ -37,7 +38,7 @@ smmuv3_get_ste(uint64_t addr) "STE addr: 0x%"PRIx64 smmuv3_translate_disable(const char *n, uint16_t sid, uint64_t addr, bool is_write) "%s sid=0x%x bypass (smmu disabled) iova:0x%"PRIx64" is_write=%d" smmuv3_translate_bypass(const char *n, uint16_t sid, uint64_t addr, bool is_write) "%s sid=0x%x STE bypass iova:0x%"PRIx64" is_write=%d" smmuv3_translate_abort(const char *n, uint16_t sid, uint64_t addr, bool is_write) "%s sid=0x%x abort on iova:0x%"PRIx64" is_write=%d" -smmuv3_translate_success(const char *n, uint16_t sid, uint64_t iova, uint64_t translated, int perm) "%s sid=0x%x iova=0x%"PRIx64" translated=0x%"PRIx64" perm=0x%x" +smmuv3_translate_success(const char *n, uint16_t sid, uint64_t iova, uint64_t translated, int perm, int stage) "%s sid=0x%x iova=0x%"PRIx64" translated=0x%"PRIx64" perm=0x%x stage=%d" smmuv3_get_cd(uint64_t addr) "CD addr: 0x%"PRIx64 smmuv3_decode_cd(uint32_t oas) "oas=%d" smmuv3_decode_cd_tt(int i, uint32_t tsz, uint64_t ttb, uint32_t granule_sz, bool had) "TT[%d]:tsz:%d ttb:0x%"PRIx64" granule_sz:%d had:%d" @@ -46,14 +47,15 @@ smmuv3_cmdq_cfgi_ste_range(int start, int end) "start=0x%x - end=0x%x" smmuv3_cmdq_cfgi_cd(uint32_t sid) "sid=0x%x" smmuv3_config_cache_hit(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache HIT for sid=0x%x (hits=%d, misses=%d, hit rate=%d)" smmuv3_config_cache_miss(uint32_t sid, uint32_t hits, uint32_t misses, uint32_t perc) "Config cache MISS for sid=0x%x (hits=%d, misses=%d, hit rate=%d)" -smmuv3_range_inval(int vmid, int asid, uint64_t addr, uint8_t tg, uint64_t num_pages, uint8_t ttl, bool leaf) "vmid=%d asid=%d addr=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" ttl=%d leaf=%d" -smmuv3_cmdq_tlbi_nh(void) "" -smmuv3_cmdq_tlbi_nh_asid(uint16_t asid) "asid=%d" -smmuv3_cmdq_tlbi_s12_vmid(uint16_t vmid) "vmid=%d" +smmuv3_range_inval(int vmid, int asid, uint64_t addr, uint8_t tg, uint64_t num_pages, uint8_t ttl, bool leaf, int stage) "vmid=%d asid=%d addr=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" ttl=%d leaf=%d stage=%d" +smmuv3_cmdq_tlbi_nh(int vmid) "vmid=%d" +smmuv3_cmdq_tlbi_nsnh(void) "" +smmuv3_cmdq_tlbi_nh_asid(int asid) "asid=%d" +smmuv3_cmdq_tlbi_s12_vmid(int vmid) "vmid=%d" smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x" smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s" smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s" -smmuv3_inv_notifiers_iova(const char *name, uint16_t asid, uint16_t vmid, uint64_t iova, uint8_t tg, uint64_t num_pages) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64 +smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t iova, uint8_t tg, uint64_t num_pages, int stage) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" stage=%d" # strongarm.c strongarm_uart_update_parameters(const char *label, int speed, char parity, int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d" diff --git a/hw/display/bcm2835_fb.c b/hw/display/bcm2835_fb.c index e40ed2d..650db3d 100644 --- a/hw/display/bcm2835_fb.c +++ b/hw/display/bcm2835_fb.c @@ -145,7 +145,7 @@ static bool fb_use_offsets(BCM2835FBConfig *config) * viewport size is larger than the physical screen. (It doesn't * prevent the guest setting this silly viewport setting, though...) */ - return config->xres_virtual > config->xres && + return config->xres_virtual > config->xres || config->yres_virtual > config->yres; } diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h index 687b7ca..d1a4a64 100644 --- a/include/hw/arm/smmu-common.h +++ b/include/hw/arm/smmu-common.h @@ -37,6 +37,9 @@ #define VMSA_IDXMSK(isz, strd, lvl) ((1ULL << \ VMSA_BIT_LVL(isz, strd, lvl)) - 1) +#define CACHED_ENTRY_TO_ADDR(ent, addr) ((ent)->entry.translated_addr + \ + ((addr) & (ent)->entry.addr_mask)) + /* * Page table walk error types */ @@ -49,10 +52,18 @@ typedef enum { SMMU_PTW_ERR_PERMISSION, /* Permission fault */ } SMMUPTWEventType; +/* SMMU Stage */ +typedef enum { + SMMU_STAGE_1 = 1, + SMMU_STAGE_2, + SMMU_NESTED, +} SMMUStage; + typedef struct SMMUPTWEventInfo { - int stage; + SMMUStage stage; SMMUPTWEventType type; dma_addr_t addr; /* fetched address that induced an abort, if any */ + bool is_ipa_descriptor; /* src for fault in nested translation. */ } SMMUPTWEventInfo; typedef struct SMMUTransTableInfo { @@ -67,6 +78,7 @@ typedef struct SMMUTLBEntry { IOMMUTLBEntry entry; uint8_t level; uint8_t granule; + IOMMUAccessFlags parent_perm; } SMMUTLBEntry; /* Stage-2 configuration. */ @@ -77,7 +89,7 @@ typedef struct SMMUS2Cfg { bool record_faults; /* Record fault events (S2R) */ uint8_t granule_sz; /* Granule page shift (based on S2TG) */ uint8_t eff_ps; /* Effective PA output range (based on S2PS) */ - uint16_t vmid; /* Virtual Machine ID (S2VMID) */ + int vmid; /* Virtual Machine ID (S2VMID) */ uint64_t vttb; /* Address of translation table base (S2TTB) */ } SMMUS2Cfg; @@ -88,7 +100,7 @@ typedef struct SMMUS2Cfg { */ typedef struct SMMUTransCfg { /* Shared fields between stage-1 and stage-2. */ - int stage; /* translation stage */ + SMMUStage stage; /* translation stage */ bool disabled; /* smmu is disabled */ bool bypassed; /* translation is bypassed */ bool aborted; /* translation is aborted */ @@ -101,7 +113,7 @@ typedef struct SMMUTransCfg { uint64_t ttb; /* TT base address */ uint8_t oas; /* output address width */ uint8_t tbi; /* Top Byte Ignore */ - uint16_t asid; + int asid; SMMUTransTableInfo tt[2]; /* Used by stage-2 only. */ struct SMMUS2Cfg s2cfg; @@ -125,8 +137,8 @@ typedef struct SMMUPciBus { typedef struct SMMUIOTLBKey { uint64_t iova; - uint16_t asid; - uint16_t vmid; + int asid; + int vmid; uint8_t tg; uint8_t level; } SMMUIOTLBKey; @@ -173,8 +185,16 @@ static inline uint16_t smmu_get_sid(SMMUDevice *sdev) * smmu_ptw - Perform the page table walk for a given iova / access flags * pair, according to @cfg translation config */ -int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm, - SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info); +int smmu_ptw(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t iova, + IOMMUAccessFlags perm, SMMUTLBEntry *tlbe, + SMMUPTWEventInfo *info); + +/* + * smmu_translate - Look for a translation in TLB, if not, do a PTW. + * Returns NULL on PTW error or incase of TLB permission errors. + */ +SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t addr, + IOMMUAccessFlags flag, SMMUPTWEventInfo *info); /** * select_tt - compute which translation table shall be used according to @@ -190,14 +210,16 @@ SMMUDevice *smmu_find_sdev(SMMUState *s, uint32_t sid); SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg, SMMUTransTableInfo *tt, hwaddr iova); void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, SMMUTLBEntry *entry); -SMMUIOTLBKey smmu_get_iotlb_key(uint16_t asid, uint16_t vmid, uint64_t iova, +SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, uint64_t iova, uint8_t tg, uint8_t level); void smmu_iotlb_inv_all(SMMUState *s); -void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid); -void smmu_iotlb_inv_vmid(SMMUState *s, uint16_t vmid); +void smmu_iotlb_inv_asid_vmid(SMMUState *s, int asid, int vmid); +void smmu_iotlb_inv_vmid(SMMUState *s, int vmid); +void smmu_iotlb_inv_vmid_s1(SMMUState *s, int vmid); void smmu_iotlb_inv_iova(SMMUState *s, int asid, int vmid, dma_addr_t iova, uint8_t tg, uint64_t num_pages, uint8_t ttl); - +void smmu_iotlb_inv_ipa(SMMUState *s, int vmid, dma_addr_t ipa, uint8_t tg, + uint64_t num_pages, uint8_t ttl); /* Unmap the range of all the notifiers registered to any IOMMU mr */ void smmu_inv_notifiers_all(SMMUState *s); diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index ef9bc42..eb090e6 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -1278,6 +1278,7 @@ static int hvf_sysreg_read(CPUState *cpu, uint32_t reg, uint32_t rt) /* Call the TCG sysreg handler. This is only safe for GICv3 regs. */ if (!hvf_sysreg_read_cp(cpu, reg, &val)) { hvf_raise_exception(cpu, EXCP_UDEF, syn_uncategorized()); + return 1; } break; case SYSREG_DBGBVR0_EL1: diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode index 2922de7..62df4c4 100644 --- a/target/arm/tcg/a64.decode +++ b/target/arm/tcg/a64.decode @@ -520,7 +520,7 @@ LDAPR sz:2 111 0 00 1 0 1 11111 1100 00 rn:5 rt:5 LDRA 11 111 0 00 m:1 . 1 ......... w:1 1 rn:5 rt:5 imm=%ldra_imm &ldapr_stlr_i rn rt imm sz sign ext -@ldapr_stlr_i .. ...... .. . imm:9 .. rn:5 rt:5 &ldapr_stlr_i +@ldapr_stlr_i .. ...... .. . imm:s9 .. rn:5 rt:5 &ldapr_stlr_i STLR_i sz:2 011001 00 0 ......... 00 ..... ..... @ldapr_stlr_i sign=0 ext=0 LDAPR_i sz:2 011001 01 0 ......... 00 ..... ..... @ldapr_stlr_i sign=0 ext=0 LDAPR_i 00 011001 10 0 ......... 00 ..... ..... @ldapr_stlr_i sign=1 ext=0 sz=0 diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c index e2e0575..5a6dd76 100644 --- a/target/arm/tcg/sme_helper.c +++ b/target/arm/tcg/sme_helper.c @@ -916,7 +916,7 @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn, if (pb & 1) { uint32_t *a = vza_row + H1_4(col); uint32_t *m = vzm + H1_4(col); - *a = float32_muladd(n, *m, *a, 0, vst); + *a = float32_muladd(n, *m, *a, 0, &fpst); } col += 4; pb >>= 4; diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 559a6cd..148be28 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -3543,7 +3543,7 @@ static bool trans_LDAPR(DisasContext *s, arg_LDAPR *a) if (a->rn == 31) { gen_check_sp_alignment(s); } - mop = check_atomic_align(s, a->rn, a->sz); + mop = check_ordered_align(s, a->rn, 0, false, a->sz); clean_addr = gen_mte_check1(s, cpu_reg_sp(s, a->rn), false, a->rn != 31, mop); /* diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c index 46c7fce..185a8a9 100644 --- a/target/arm/tcg/translate-sme.c +++ b/target/arm/tcg/translate-sme.c @@ -304,6 +304,7 @@ static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz, } static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz, + ARMFPStatusFlavour e_fpst, gen_helper_gvec_5_ptr *fn) { int svl = streaming_vec_reg_size(s); @@ -319,15 +320,18 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz, zm = vec_full_reg_ptr(s, a->zm); pn = pred_full_reg_ptr(s, a->pn); pm = pred_full_reg_ptr(s, a->pm); - fpst = fpstatus_ptr(FPST_FPCR); + fpst = fpstatus_ptr(e_fpst); fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc)); return true; } -TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h) -TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s) -TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d) +TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, + MO_32, FPST_FPCR_F16, gen_helper_sme_fmopa_h) +TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, + MO_32, FPST_FPCR, gen_helper_sme_fmopa_s) +TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, + MO_64, FPST_FPCR, gen_helper_sme_fmopa_d) /* TODO: FEAT_EBF16 */ TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa) diff --git a/tests/qtest/arm-cpu-features.c b/tests/qtest/arm-cpu-features.c index 966c65d..cfd6f77 100644 --- a/tests/qtest/arm-cpu-features.c +++ b/tests/qtest/arm-cpu-features.c @@ -509,6 +509,7 @@ static void test_query_cpu_model_expansion_kvm(const void *data) assert_set_feature(qts, "host", "kvm-no-adjvtime", false); if (g_str_equal(qtest_get_arch(), "aarch64")) { + bool kvm_supports_pmu; bool kvm_supports_steal_time; bool kvm_supports_sve; char max_name[8], name[8]; @@ -537,11 +538,6 @@ static void test_query_cpu_model_expansion_kvm(const void *data) assert_has_feature_enabled(qts, "host", "aarch64"); - /* Enabling and disabling pmu should always work. */ - assert_has_feature_enabled(qts, "host", "pmu"); - assert_set_feature(qts, "host", "pmu", false); - assert_set_feature(qts, "host", "pmu", true); - /* * Some features would be enabled by default, but they're disabled * because this instance of KVM doesn't support them. Test that the @@ -551,11 +547,18 @@ static void test_query_cpu_model_expansion_kvm(const void *data) assert_has_feature(qts, "host", "sve"); resp = do_query_no_props(qts, "host"); + kvm_supports_pmu = resp_get_feature(resp, "pmu"); kvm_supports_steal_time = resp_get_feature(resp, "kvm-steal-time"); kvm_supports_sve = resp_get_feature(resp, "sve"); vls = resp_get_sve_vls(resp); qobject_unref(resp); + if (kvm_supports_pmu) { + /* If we have pmu then we should be able to toggle it. */ + assert_set_feature(qts, "host", "pmu", false); + assert_set_feature(qts, "host", "pmu", true); + } + if (kvm_supports_steal_time) { /* If we have steal-time then we should be able to toggle it. */ assert_set_feature(qts, "host", "kvm-steal-time", false); diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target index b53218e..8cc62eb 100644 --- a/tests/tcg/aarch64/Makefile.target +++ b/tests/tcg/aarch64/Makefile.target @@ -70,8 +70,9 @@ endif # SME Tests ifneq ($(CROSS_AS_HAS_ARMV9_SME),) -AARCH64_TESTS += sme-outprod1 sme-smopa-1 sme-smopa-2 -sme-outprod1 sme-smopa-1 sme-smopa-2: CFLAGS += $(CROSS_AS_HAS_ARMV9_SME) +SME_TESTS = sme-outprod1 sme-smopa-1 sme-smopa-2 sme-fmopa-1 sme-fmopa-2 sme-fmopa-3 +AARCH64_TESTS += $(SME_TESTS) +$(SME_TESTS): CFLAGS += $(CROSS_AS_HAS_ARMV9_SME) endif # System Registers Tests diff --git a/tests/tcg/aarch64/sme-fmopa-1.c b/tests/tcg/aarch64/sme-fmopa-1.c new file mode 100644 index 0000000..652c4ea --- /dev/null +++ b/tests/tcg/aarch64/sme-fmopa-1.c @@ -0,0 +1,63 @@ +/* + * SME outer product, 1 x 1. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <stdio.h> + +static void foo(float *dst) +{ + asm(".arch_extension sme\n\t" + "smstart\n\t" + "ptrue p0.s, vl4\n\t" + "fmov z0.s, #1.0\n\t" + /* + * An outer product of a vector of 1.0 by itself should be a matrix of 1.0. + * Note that we are using tile 1 here (za1.s) rather than tile 0. + */ + "zero {za}\n\t" + "fmopa za1.s, p0/m, p0/m, z0.s, z0.s\n\t" + /* + * Read the first 4x4 sub-matrix of elements from tile 1: + * Note that za1h should be interchangeable here. + */ + "mov w12, #0\n\t" + "mova z0.s, p0/m, za1v.s[w12, #0]\n\t" + "mova z1.s, p0/m, za1v.s[w12, #1]\n\t" + "mova z2.s, p0/m, za1v.s[w12, #2]\n\t" + "mova z3.s, p0/m, za1v.s[w12, #3]\n\t" + /* + * And store them to the input pointer (dst in the C code): + */ + "st1w {z0.s}, p0, [%0]\n\t" + "add x0, x0, #16\n\t" + "st1w {z1.s}, p0, [x0]\n\t" + "add x0, x0, #16\n\t" + "st1w {z2.s}, p0, [x0]\n\t" + "add x0, x0, #16\n\t" + "st1w {z3.s}, p0, [x0]\n\t" + "smstop" + : : "r"(dst) + : "x12", "d0", "d1", "d2", "d3", "memory"); +} + +int main() +{ + float dst[16] = { }; + + foo(dst); + + for (int i = 0; i < 16; i++) { + if (dst[i] != 1.0f) { + goto failure; + } + } + /* success */ + return 0; + + failure: + for (int i = 0; i < 16; i++) { + printf("%f%c", dst[i], i % 4 == 3 ? '\n' : ' '); + } + return 1; +} diff --git a/tests/tcg/aarch64/sme-fmopa-2.c b/tests/tcg/aarch64/sme-fmopa-2.c new file mode 100644 index 0000000..15f0972 --- /dev/null +++ b/tests/tcg/aarch64/sme-fmopa-2.c @@ -0,0 +1,56 @@ +/* + * SME outer product, FZ vs FZ16 + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <stdint.h> +#include <stdio.h> + +static void test_fmopa(uint32_t *result) +{ + asm(".arch_extension sme\n\t" + "smstart\n\t" /* Z*, P* and ZArray cleared */ + "ptrue p2.b, vl16\n\t" /* Limit vector length to 16 */ + "ptrue p5.b, vl16\n\t" + "movi d0, #0x00ff\n\t" /* fp16 denormal */ + "movi d16, #0x00ff\n\t" + "mov w15, #0x0001000000\n\t" /* FZ=1, FZ16=0 */ + "msr fpcr, x15\n\t" + "fmopa za3.s, p2/m, p5/m, z16.h, z0.h\n\t" + "mov w15, #0\n\t" + "st1w {za3h.s[w15, 0]}, p2, [%0]\n\t" + "add %0, %0, #16\n\t" + "st1w {za3h.s[w15, 1]}, p2, [%0]\n\t" + "mov w15, #2\n\t" + "add %0, %0, #16\n\t" + "st1w {za3h.s[w15, 0]}, p2, [%0]\n\t" + "add %0, %0, #16\n\t" + "st1w {za3h.s[w15, 1]}, p2, [%0]\n\t" + "smstop" + : "+r"(result) : + : "x15", "x16", "p2", "p5", "d0", "d16", "memory"); +} + +int main(void) +{ + uint32_t result[4 * 4] = { }; + + test_fmopa(result); + + if (result[0] != 0x2f7e0100) { + printf("Test failed: Incorrect output in first 4 bytes\n" + "Expected: %08x\n" + "Got: %08x\n", + 0x2f7e0100, result[0]); + return 1; + } + + for (int i = 1; i < 16; ++i) { + if (result[i] != 0) { + printf("Test failed: Non-zero word at position %d\n", i); + return 1; + } + } + + return 0; +} diff --git a/tests/tcg/aarch64/sme-fmopa-3.c b/tests/tcg/aarch64/sme-fmopa-3.c new file mode 100644 index 0000000..3bfec34 --- /dev/null +++ b/tests/tcg/aarch64/sme-fmopa-3.c @@ -0,0 +1,63 @@ +/* + * SME outer product, [ 1 2 3 4 ] squared + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <math.h> + +static const float i_1234[4] = { + 1.0f, 2.0f, 3.0f, 4.0f +}; + +static const float expected[4] = { + 4.515625f, 5.750000f, 6.984375f, 8.218750f +}; + +static void test_fmopa(float *result) +{ + asm(".arch_extension sme\n\t" + "smstart\n\t" /* ZArray cleared */ + "ptrue p2.b, vl16\n\t" /* Limit vector length to 16 */ + "ld1w {z0.s}, p2/z, [%1]\n\t" + "mov w15, #0\n\t" + "mov za3h.s[w15, 0], p2/m, z0.s\n\t" + "mov za3h.s[w15, 1], p2/m, z0.s\n\t" + "mov w15, #2\n\t" + "mov za3h.s[w15, 0], p2/m, z0.s\n\t" + "mov za3h.s[w15, 1], p2/m, z0.s\n\t" + "msr fpcr, xzr\n\t" + "fmopa za3.s, p2/m, p2/m, z0.h, z0.h\n\t" + "mov w15, #0\n\t" + "st1w {za3h.s[w15, 0]}, p2, [%0]\n" + "add %0, %0, #16\n\t" + "st1w {za3h.s[w15, 1]}, p2, [%0]\n\t" + "mov w15, #2\n\t" + "add %0, %0, #16\n\t" + "st1w {za3h.s[w15, 0]}, p2, [%0]\n\t" + "add %0, %0, #16\n\t" + "st1w {za3h.s[w15, 1]}, p2, [%0]\n\t" + "smstop" + : "+r"(result) : "r"(i_1234) + : "x15", "x16", "p2", "d0", "memory"); +} + +int main(void) +{ + float result[4 * 4] = { }; + int ret = 0; + + test_fmopa(result); + + for (int i = 0; i < 4; i++) { + float actual = result[i]; + if (fabsf(actual - expected[i]) > 0.001f) { + printf("Test failed at element %d: Expected %f, got %f\n", + i, expected[i], actual); + ret = 1; + } + } + return ret; +} |