diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2020-06-26 18:22:36 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2020-06-26 18:22:36 +0100 |
commit | 553cf5d7c47bee05a3dec9461c1f8430316d516b (patch) | |
tree | c39e6a48d2746e4bf9d79cda6e3fa23a1c9d9b6d /target | |
parent | 3591ddd39987cbdaa0cfa344a262f315abd97582 (diff) | |
parent | c7459633baa71d1781fde4a245d6ec9ce2f008cf (diff) | |
download | qemu-553cf5d7c47bee05a3dec9461c1f8430316d516b.zip qemu-553cf5d7c47bee05a3dec9461c1f8430316d516b.tar.gz qemu-553cf5d7c47bee05a3dec9461c1f8430316d516b.tar.bz2 |
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200626' into staging
target-arm queue:
* hw/arm/aspeed: improve QOM usage
* hw/misc/pca9552: trace GPIO change events
* target/arm: Implement ARMv8.5-MemTag for system emulation
# gpg: Signature made Fri 26 Jun 2020 16:13:27 BST
# gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg: issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate]
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE
* remotes/pmaydell/tags/pull-target-arm-20200626: (57 commits)
target/arm: Enable MTE
target/arm: Add allocation tag storage for system mode
target/arm: Create tagged ram when MTE is enabled
target/arm: Cache the Tagged bit for a page in MemTxAttrs
target/arm: Always pass cacheattr to get_phys_addr
target/arm: Set PSTATE.TCO on exception entry
target/arm: Implement data cache set allocation tags
target/arm: Complete TBI clearing for user-only for SVE
target/arm: Add mte helpers for sve scatter/gather memory ops
target/arm: Handle TBI for sve scalar + int memory ops
target/arm: Add mte helpers for sve scalar + int ff/nf loads
target/arm: Add mte helpers for sve scalar + int stores
target/arm: Add mte helpers for sve scalar + int loads
target/arm: Add arm_tlb_bti_gp
target/arm: Tidy trans_LD1R_zpri
target/arm: Use mte_check1 for sve LD1R
target/arm: Use mte_checkN for sve unpredicated stores
target/arm: Use mte_checkN for sve unpredicated loads
target/arm: Add helper_mte_check_zva
target/arm: Implement helper_mte_checkN
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target')
-rw-r--r-- | target/arm/Makefile.objs | 1 | ||||
-rw-r--r-- | target/arm/cpu.c | 81 | ||||
-rw-r--r-- | target/arm/cpu.h | 50 | ||||
-rw-r--r-- | target/arm/cpu64.c | 5 | ||||
-rw-r--r-- | target/arm/helper-a64.c | 96 | ||||
-rw-r--r-- | target/arm/helper-a64.h | 16 | ||||
-rw-r--r-- | target/arm/helper-sve.h | 488 | ||||
-rw-r--r-- | target/arm/helper.c | 421 | ||||
-rw-r--r-- | target/arm/helper.h | 2 | ||||
-rw-r--r-- | target/arm/internals.h | 153 | ||||
-rw-r--r-- | target/arm/m_helper.c | 11 | ||||
-rw-r--r-- | target/arm/mte_helper.c | 906 | ||||
-rw-r--r-- | target/arm/op_helper.c | 16 | ||||
-rw-r--r-- | target/arm/sve_helper.c | 614 | ||||
-rw-r--r-- | target/arm/tlb_helper.c | 13 | ||||
-rw-r--r-- | target/arm/translate-a64.c | 657 | ||||
-rw-r--r-- | target/arm/translate-a64.h | 5 | ||||
-rw-r--r-- | target/arm/translate-sve.c | 1420 | ||||
-rw-r--r-- | target/arm/translate-vfp.inc.c | 4 | ||||
-rw-r--r-- | target/arm/translate.c | 16 | ||||
-rw-r--r-- | target/arm/translate.h | 23 |
21 files changed, 4141 insertions, 857 deletions
diff --git a/target/arm/Makefile.objs b/target/arm/Makefile.objs index 83febd2..fa39fd7 100644 --- a/target/arm/Makefile.objs +++ b/target/arm/Makefile.objs @@ -86,3 +86,4 @@ obj-$(CONFIG_SOFTMMU) += psci.o obj-$(TARGET_AARCH64) += translate-a64.o helper-a64.o obj-$(TARGET_AARCH64) += translate-sve.o sve_helper.o obj-$(TARGET_AARCH64) += pauth_helper.o +obj-$(TARGET_AARCH64) += mte_helper.o diff --git a/target/arm/cpu.c b/target/arm/cpu.c index e44e180..5050e18 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -203,6 +203,9 @@ static void arm_cpu_reset(DeviceState *dev) * Enable TBI0 and TBI1. While the real kernel only enables TBI0, * turning on both here will produce smaller code and otherwise * make no difference to the user-level emulation. + * + * In sve_probe_page, we assume that this is set. + * Do not modify this without other changes. */ env->cp15.tcr_el[1].raw_tcr = (3ULL << 37); #else @@ -1249,6 +1252,25 @@ void arm_cpu_post_init(Object *obj) if (kvm_enabled()) { kvm_arm_add_vcpu_properties(obj); } + +#ifndef CONFIG_USER_ONLY + if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) && + cpu_isar_feature(aa64_mte, cpu)) { + object_property_add_link(obj, "tag-memory", + TYPE_MEMORY_REGION, + (Object **)&cpu->tag_memory, + qdev_prop_allow_set_link_before_realize, + OBJ_PROP_LINK_STRONG); + + if (arm_feature(&cpu->env, ARM_FEATURE_EL3)) { + object_property_add_link(obj, "secure-tag-memory", + TYPE_MEMORY_REGION, + (Object **)&cpu->secure_tag_memory, + qdev_prop_allow_set_link_before_realize, + OBJ_PROP_LINK_STRONG); + } + } +#endif } static void arm_cpu_finalizefn(Object *obj) @@ -1738,18 +1760,43 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) #ifndef CONFIG_USER_ONLY MachineState *ms = MACHINE(qdev_get_machine()); unsigned int smp_cpus = ms->smp.cpus; + bool has_secure = cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY); - if (cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY)) { - cs->num_ases = 2; + /* + * We must set cs->num_ases to the final value before + * the first call to cpu_address_space_init. + */ + if (cpu->tag_memory != NULL) { + cs->num_ases = 3 + has_secure; + } else { + cs->num_ases = 1 + has_secure; + } + if (has_secure) { if (!cpu->secure_memory) { cpu->secure_memory = cs->memory; } cpu_address_space_init(cs, ARMASIdx_S, "cpu-secure-memory", cpu->secure_memory); - } else { - cs->num_ases = 1; } + + if (cpu->tag_memory != NULL) { + cpu_address_space_init(cs, ARMASIdx_TagNS, "cpu-tag-memory", + cpu->tag_memory); + if (has_secure) { + cpu_address_space_init(cs, ARMASIdx_TagS, "cpu-tag-memory", + cpu->secure_tag_memory); + } + } else if (cpu_isar_feature(aa64_mte, cpu)) { + /* + * Since there is no tag memory, we can't meaningfully support MTE + * to its fullest. To avoid problems later, when we would come to + * use the tag memory, downgrade support to insns only. + */ + cpu->isar.id_aa64pfr1 = + FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1); + } + cpu_address_space_init(cs, ARMASIdx_NS, "cpu-memory", cs->memory); /* No core_count specified, default to smp_cpus. */ @@ -1758,6 +1805,30 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } #endif + if (tcg_enabled()) { + int dcz_blocklen = 4 << cpu->dcz_blocksize; + + /* + * We only support DCZ blocklen that fits on one page. + * + * Architectually this is always true. However TARGET_PAGE_SIZE + * is variable and, for compatibility with -machine virt-2.7, + * is only 1KiB, as an artifact of legacy ARMv5 subpage support. + * But even then, while the largest architectural DCZ blocklen + * is 2KiB, no cpu actually uses such a large blocklen. + */ + assert(dcz_blocklen <= TARGET_PAGE_SIZE); + + /* + * We only support DCZ blocksize >= 2*TAG_GRANULE, which is to say + * both nibbles of each byte storing tag data may be written at once. + * Since TAG_GRANULE is 16, this means that blocklen must be >= 32. + */ + if (cpu_isar_feature(aa64_mte, cpu)) { + assert(dcz_blocklen >= 2 * TAG_GRANULE); + } + } + qemu_init_vcpu(cs); cpu_reset(cs); @@ -2169,8 +2240,8 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) cc->tlb_fill = arm_cpu_tlb_fill; cc->debug_excp_handler = arm_debug_excp_handler; cc->debug_check_watchpoint = arm_debug_check_watchpoint; -#if !defined(CONFIG_USER_ONLY) cc->do_unaligned_access = arm_cpu_do_unaligned_access; +#if !defined(CONFIG_USER_ONLY) cc->do_transaction_failed = arm_cpu_do_transaction_failed; cc->adjust_watchpoint_address = arm_adjust_watchpoint_address; #endif /* CONFIG_TCG && !CONFIG_USER_ONLY */ diff --git a/target/arm/cpu.h b/target/arm/cpu.h index cf66b8c..cf99dcc 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -502,6 +502,9 @@ typedef struct CPUARMState { uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */ uint64_t vpidr_el2; /* Virtualization Processor ID Register */ uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */ + uint64_t tfsr_el[4]; /* tfsre0_el1 is index 0. */ + uint64_t gcr_el1; + uint64_t rgsr_el1; } cp15; struct { @@ -789,6 +792,10 @@ struct ARMCPU { /* MemoryRegion to use for secure physical accesses */ MemoryRegion *secure_memory; + /* MemoryRegion to use for allocation tag accesses */ + MemoryRegion *tag_memory; + MemoryRegion *secure_tag_memory; + /* For v8M, pointer to the IDAU interface provided by board/SoC */ Object *idau; @@ -1282,6 +1289,7 @@ void pmu_init(ARMCPU *cpu); #define PSTATE_SS (1U << 21) #define PSTATE_PAN (1U << 22) #define PSTATE_UAO (1U << 23) +#define PSTATE_TCO (1U << 25) #define PSTATE_V (1U << 28) #define PSTATE_C (1U << 29) #define PSTATE_Z (1U << 30) @@ -2356,7 +2364,9 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid) #define ARM_CP_NZCV (ARM_CP_SPECIAL | 0x0300) #define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | 0x0400) #define ARM_CP_DC_ZVA (ARM_CP_SPECIAL | 0x0500) -#define ARM_LAST_SPECIAL ARM_CP_DC_ZVA +#define ARM_CP_DC_GVA (ARM_CP_SPECIAL | 0x0600) +#define ARM_CP_DC_GZVA (ARM_CP_SPECIAL | 0x0700) +#define ARM_LAST_SPECIAL ARM_CP_DC_GZVA #define ARM_CP_FPU 0x1000 #define ARM_CP_SVE 0x2000 #define ARM_CP_NO_GDB 0x4000 @@ -2979,6 +2989,8 @@ typedef enum ARMMMUIdxBit { typedef enum ARMASIdx { ARMASIdx_NS = 0, ARMASIdx_S = 1, + ARMASIdx_TagNS = 2, + ARMASIdx_TagS = 3, } ARMASIdx; /* Return the Exception Level targeted by debug exceptions. */ @@ -3183,10 +3195,10 @@ typedef ARMCPU ArchCPU; * | | | TBFLAG_A32 | | * | | +-----+----------+ TBFLAG_AM32 | * | TBFLAG_ANY | |TBFLAG_M32| | - * | | +-+----------+--------------| - * | | | TBFLAG_A64 | - * +--------------+---------+---------------------------+ - * 31 20 15 0 + * | +-----------+----------+--------------| + * | | TBFLAG_A64 | + * +--------------+-------------------------------------+ + * 31 20 0 * * Unless otherwise noted, these bits are cached in env->hflags. */ @@ -3253,6 +3265,10 @@ FIELD(TBFLAG_A64, BT, 9, 1) FIELD(TBFLAG_A64, BTYPE, 10, 2) /* Not cached. */ FIELD(TBFLAG_A64, TBID, 12, 2) FIELD(TBFLAG_A64, UNPRIV, 14, 1) +FIELD(TBFLAG_A64, ATA, 15, 1) +FIELD(TBFLAG_A64, TCMA, 16, 2) +FIELD(TBFLAG_A64, MTE_ACTIVE, 18, 1) +FIELD(TBFLAG_A64, MTE0_ACTIVE, 19, 1) /** * cpu_mmu_index: @@ -3385,6 +3401,20 @@ static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno) /* Shared between translate-sve.c and sve_helper.c. */ extern const uint64_t pred_esz_masks[4]; +/* Helper for the macros below, validating the argument type. */ +static inline MemTxAttrs *typecheck_memtxattrs(MemTxAttrs *x) +{ + return x; +} + +/* + * Lvalue macros for ARM TLB bits that we must cache in the TCG TLB. + * Using these should be a bit more self-documenting than using the + * generic target bits directly. + */ +#define arm_tlb_bti_gp(x) (typecheck_memtxattrs(x)->target_tlb_bit0) +#define arm_tlb_mte_tagged(x) (typecheck_memtxattrs(x)->target_tlb_bit1) + /* * Naming convention for isar_feature functions: * Functions which test 32-bit ID registers should have _aa32_ in @@ -3814,6 +3844,16 @@ static inline bool isar_feature_aa64_bti(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0; } +static inline bool isar_feature_aa64_mte_insn_reg(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) != 0; +} + +static inline bool isar_feature_aa64_mte(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 2; +} + static inline bool isar_feature_aa64_pmu_8_1(const ARMISARegisters *id) { return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 && diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index a0c1d88..a2f4733 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -654,6 +654,11 @@ static void aarch64_max_initfn(Object *obj) t = cpu->isar.id_aa64pfr1; t = FIELD_DP64(t, ID_AA64PFR1, BT, 1); + /* + * Begin with full support for MTE; will be downgraded to MTE=1 + * during realize if the board provides no tag memory. + */ + t = FIELD_DP64(t, ID_AA64PFR1, MTE, 2); cpu->isar.id_aa64pfr1 = t; t = cpu->isar.id_aa64mmfr1; diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index bc0649a..8682630 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -1119,85 +1119,41 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) * (which matches the usual QEMU behaviour of not implementing either * alignment faults or any memory attribute handling). */ - - ARMCPU *cpu = env_archcpu(env); - uint64_t blocklen = 4 << cpu->dcz_blocksize; + int blocklen = 4 << env_archcpu(env)->dcz_blocksize; uint64_t vaddr = vaddr_in & ~(blocklen - 1); + int mmu_idx = cpu_mmu_index(env, false); + void *mem; + + /* + * Trapless lookup. In addition to actual invalid page, may + * return NULL for I/O, watchpoints, clean pages, etc. + */ + mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx); #ifndef CONFIG_USER_ONLY - { + if (unlikely(!mem)) { + uintptr_t ra = GETPC(); + /* - * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than - * the block size so we might have to do more than one TLB lookup. - * We know that in fact for any v8 CPU the page size is at least 4K - * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only - * 1K as an artefact of legacy v5 subpage support being present in the - * same QEMU executable. So in practice the hostaddr[] array has - * two entries, given the current setting of TARGET_PAGE_BITS_MIN. + * Trap if accessing an invalid page. DC_ZVA requires that we supply + * the original pointer for an invalid page. But watchpoints require + * that we probe the actual space. So do both. */ - int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE); - void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)]; - int try, i; - unsigned mmu_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); - - assert(maxidx <= ARRAY_SIZE(hostaddr)); - - for (try = 0; try < 2; try++) { - - for (i = 0; i < maxidx; i++) { - hostaddr[i] = tlb_vaddr_to_host(env, - vaddr + TARGET_PAGE_SIZE * i, - 1, mmu_idx); - if (!hostaddr[i]) { - break; - } - } - if (i == maxidx) { - /* - * If it's all in the TLB it's fair game for just writing to; - * we know we don't need to update dirty status, etc. - */ - for (i = 0; i < maxidx - 1; i++) { - memset(hostaddr[i], 0, TARGET_PAGE_SIZE); - } - memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE)); - return; - } + (void) probe_write(env, vaddr_in, 1, mmu_idx, ra); + mem = probe_write(env, vaddr, blocklen, mmu_idx, ra); + + if (unlikely(!mem)) { /* - * OK, try a store and see if we can populate the tlb. This - * might cause an exception if the memory isn't writable, - * in which case we will longjmp out of here. We must for - * this purpose use the actual register value passed to us - * so that we get the fault address right. + * The only remaining reason for mem == NULL is I/O. + * Just do a series of byte writes as the architecture demands. */ - helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC()); - /* Now we can populate the other TLB entries, if any */ - for (i = 0; i < maxidx; i++) { - uint64_t va = vaddr + TARGET_PAGE_SIZE * i; - if (va != (vaddr_in & TARGET_PAGE_MASK)) { - helper_ret_stb_mmu(env, va, 0, oi, GETPC()); - } + for (int i = 0; i < blocklen; i++) { + cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra); } - } - - /* - * Slow path (probably attempt to do this to an I/O device or - * similar, or clearing of a block of code we have translations - * cached for). Just do a series of byte writes as the architecture - * demands. It's not worth trying to use a cpu_physical_memory_map(), - * memset(), unmap() sequence here because: - * + we'd need to account for the blocksize being larger than a page - * + the direct-RAM access case is almost always going to be dealt - * with in the fastpath code above, so there's no speed benefit - * + we would have to deal with the map returning NULL because the - * bounce buffer was in use - */ - for (i = 0; i < blocklen; i++) { - helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC()); + return; } } -#else - memset(g2h(vaddr), 0, blocklen); #endif + + memset(mem, 0, blocklen); } diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h index 3df7c18..5b0b699 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/helper-a64.h @@ -103,3 +103,19 @@ DEF_HELPER_FLAGS_3(autda, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_3(autdb, TCG_CALL_NO_WG, i64, env, i64, i64) DEF_HELPER_FLAGS_2(xpaci, TCG_CALL_NO_RWG_SE, i64, env, i64) DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64) + +DEF_HELPER_FLAGS_3(mte_check1, TCG_CALL_NO_WG, i64, env, i32, i64) +DEF_HELPER_FLAGS_3(mte_checkN, TCG_CALL_NO_WG, i64, env, i32, i64) +DEF_HELPER_FLAGS_3(mte_check_zva, TCG_CALL_NO_WG, i64, env, i32, i64) +DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64) +DEF_HELPER_FLAGS_4(addsubg, TCG_CALL_NO_RWG_SE, i64, env, i64, s32, i32) +DEF_HELPER_FLAGS_3(ldg, TCG_CALL_NO_WG, i64, env, i64, i64) +DEF_HELPER_FLAGS_3(stg, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_3(stg_parallel, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_2(stg_stub, TCG_CALL_NO_WG, void, env, i64) +DEF_HELPER_FLAGS_3(st2g, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_3(st2g_parallel, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_2(st2g_stub, TCG_CALL_NO_WG, void, env, i64) +DEF_HELPER_FLAGS_2(ldgm, TCG_CALL_NO_WG, i64, env, i64) +DEF_HELPER_FLAGS_3(stgm, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_3(stzgm_tags, TCG_CALL_NO_WG, void, env, i64, i64) diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h index 7a20075..63c4a08 100644 --- a/target/arm/helper-sve.h +++ b/target/arm/helper-sve.h @@ -1196,6 +1196,64 @@ DEF_HELPER_FLAGS_4(sve_ld1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ld1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hsu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1hsu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1hds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1sds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ld1sds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve_ldff1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) @@ -1227,6 +1285,55 @@ DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1hh_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hsu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hdu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hss_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hds_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1hh_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hsu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hdu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hss_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1hds_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1ss_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sdu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sds_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1ss_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sdu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve_ldnf1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) @@ -1258,6 +1365,55 @@ DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1hh_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hsu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hdu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hss_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hds_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1hh_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hsu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hdu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hss_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1hds_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1ss_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sdu_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sds_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1ss_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sdu_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r_mte, TCG_CALL_NO_WG, + void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve_st1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) @@ -1305,6 +1461,53 @@ DEF_HELPER_FLAGS_4(sve_st1hd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1sd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1bh_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1bs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1bd_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1hs_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hs_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1hd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + +DEF_HELPER_FLAGS_4(sve_st1sd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) +DEF_HELPER_FLAGS_4(sve_st1sd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32) + DEF_HELPER_FLAGS_6(sve_ldbsu_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu, TCG_CALL_NO_WG, @@ -1414,6 +1617,115 @@ DEF_HELPER_FLAGS_6(sve_ldsds_le_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldsds_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbsu_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbss_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldbsu_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhsu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbss_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldbdu_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbds_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldbdu_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbds_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldbdu_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhdu_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsdu_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_lddd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldbds_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldhds_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldsds_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu, TCG_CALL_NO_WG, @@ -1523,6 +1835,115 @@ DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbss_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldffbsu_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbss_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldffbdu_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbds_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldffbdu_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbds_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_ldffbdu_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffdd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffbds_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffhds_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + DEF_HELPER_FLAGS_6(sve_stbs_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_sths_le_zsu, TCG_CALL_NO_WG, @@ -1590,4 +2011,71 @@ DEF_HELPER_FLAGS_6(sve_stdd_le_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_stdd_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stbs_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_stbs_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sths_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_stbd_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_le_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_be_zsu_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_stbd_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_le_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_be_zss_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + +DEF_HELPER_FLAGS_6(sve_stbd_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_sthd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_le_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_be_zd_mte, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) + DEF_HELPER_FLAGS_4(sve2_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/target/arm/helper.c b/target/arm/helper.c index 972a766..dc9c29f 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -44,7 +44,8 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, bool s1_is_el0, hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot, target_ulong *page_size_ptr, - ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs); + ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) + __attribute__((nonnull)); #endif static void switch_mode(CPUARMState *env, int mode); @@ -2011,9 +2012,19 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) uint32_t valid_mask = 0x3fff; ARMCPU *cpu = env_archcpu(env); - if (arm_el_is_aa64(env, 3)) { + if (ri->state == ARM_CP_STATE_AA64) { value |= SCR_FW | SCR_AW; /* these two bits are RES1. */ valid_mask &= ~SCR_NET; + + if (cpu_isar_feature(aa64_lor, cpu)) { + valid_mask |= SCR_TLOR; + } + if (cpu_isar_feature(aa64_pauth, cpu)) { + valid_mask |= SCR_API | SCR_APK; + } + if (cpu_isar_feature(aa64_mte, cpu)) { + valid_mask |= SCR_ATA; + } } else { valid_mask &= ~(SCR_RW | SCR_ST); } @@ -2032,12 +2043,6 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) valid_mask &= ~SCR_SMD; } } - if (cpu_isar_feature(aa64_lor, cpu)) { - valid_mask |= SCR_TLOR; - } - if (cpu_isar_feature(aa64_pauth, cpu)) { - valid_mask |= SCR_API | SCR_APK; - } /* Clear all-context RES0 bits. */ value &= valid_mask; @@ -4697,6 +4702,22 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, { ARMCPU *cpu = env_archcpu(env); + if (arm_feature(env, ARM_FEATURE_PMSA) && !cpu->has_mpu) { + /* M bit is RAZ/WI for PMSA with no MPU implemented */ + value &= ~SCTLR_M; + } + + /* ??? Lots of these bits are not implemented. */ + + if (ri->state == ARM_CP_STATE_AA64 && !cpu_isar_feature(aa64_mte, cpu)) { + if (ri->opc1 == 6) { /* SCTLR_EL3 */ + value &= ~(SCTLR_ITFSB | SCTLR_TCF | SCTLR_ATA); + } else { + value &= ~(SCTLR_ITFSB | SCTLR_TCF0 | SCTLR_TCF | + SCTLR_ATA0 | SCTLR_ATA); + } + } + if (raw_read(env, ri) == value) { /* Skip the TLB flush if nothing actually changed; Linux likes * to do a lot of pointless SCTLR writes. @@ -4704,13 +4725,8 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri, return; } - if (arm_feature(env, ARM_FEATURE_PMSA) && !cpu->has_mpu) { - /* M bit is RAZ/WI for PMSA with no MPU implemented */ - value &= ~SCTLR_M; - } - raw_write(env, ri, value); - /* ??? Lots of these bits are not implemented. */ + /* This may enable/disable the MMU, so do a TLB flush. */ tlb_flush(CPU(cpu)); @@ -5236,17 +5252,22 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask) if (cpu_isar_feature(aa64_pauth, cpu)) { valid_mask |= HCR_API | HCR_APK; } + if (cpu_isar_feature(aa64_mte, cpu)) { + valid_mask |= HCR_ATA | HCR_DCT | HCR_TID5; + } } /* Clear RES0 bits. */ value &= valid_mask; - /* These bits change the MMU setup: + /* + * These bits change the MMU setup: * HCR_VM enables stage 2 translation * HCR_PTW forbids certain page-table setups - * HCR_DC Disables stage1 and enables stage2 translation + * HCR_DC disables stage1 and enables stage2 translation + * HCR_DCT enables tagging on (disabled) stage1 translation */ - if ((env->cp15.hcr_el2 ^ value) & (HCR_VM | HCR_PTW | HCR_DC)) { + if ((env->cp15.hcr_el2 ^ value) & (HCR_VM | HCR_PTW | HCR_DC | HCR_DCT)) { tlb_flush(CPU(cpu)); } env->cp15.hcr_el2 = value; @@ -5861,6 +5882,9 @@ static void define_arm_vh_e2h_redirects_aliases(ARMCPU *cpu) { K(3, 0, 1, 2, 0), K(3, 4, 1, 2, 0), K(3, 5, 1, 2, 0), "ZCR_EL1", "ZCR_EL2", "ZCR_EL12", isar_feature_aa64_sve }, + { K(3, 0, 5, 6, 0), K(3, 4, 5, 6, 0), K(3, 5, 5, 6, 0), + "TFSR_EL1", "TFSR_EL2", "TFSR_EL12", isar_feature_aa64_mte }, + /* TODO: ARMv8.2-SPE -- PMSCR_EL2 */ /* TODO: ARMv8.4-Trace -- TRFCR_EL2 */ }; @@ -6835,6 +6859,165 @@ static const ARMCPRegInfo dcpodp_reg[] = { }; #endif /*CONFIG_USER_ONLY*/ +static CPAccessResult access_aa64_tid5(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + if ((arm_current_el(env) < 2) && (arm_hcr_el2_eff(env) & HCR_TID5)) { + return CP_ACCESS_TRAP_EL2; + } + + return CP_ACCESS_OK; +} + +static CPAccessResult access_mte(CPUARMState *env, const ARMCPRegInfo *ri, + bool isread) +{ + int el = arm_current_el(env); + + if (el < 2 && + arm_feature(env, ARM_FEATURE_EL2) && + !(arm_hcr_el2_eff(env) & HCR_ATA)) { + return CP_ACCESS_TRAP_EL2; + } + if (el < 3 && + arm_feature(env, ARM_FEATURE_EL3) && + !(env->cp15.scr_el3 & SCR_ATA)) { + return CP_ACCESS_TRAP_EL3; + } + return CP_ACCESS_OK; +} + +static uint64_t tco_read(CPUARMState *env, const ARMCPRegInfo *ri) +{ + return env->pstate & PSTATE_TCO; +} + +static void tco_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t val) +{ + env->pstate = (env->pstate & ~PSTATE_TCO) | (val & PSTATE_TCO); +} + +static const ARMCPRegInfo mte_reginfo[] = { + { .name = "TFSRE0_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 1, + .access = PL1_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[0]) }, + { .name = "TFSR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 0, + .access = PL1_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[1]) }, + { .name = "TFSR_EL2", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 6, .opc2 = 0, + .access = PL2_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[2]) }, + { .name = "TFSR_EL3", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 6, .crn = 5, .crm = 6, .opc2 = 0, + .access = PL3_RW, + .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[3]) }, + { .name = "RGSR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 5, + .access = PL1_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.rgsr_el1) }, + { .name = "GCR_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 6, + .access = PL1_RW, .accessfn = access_mte, + .fieldoffset = offsetof(CPUARMState, cp15.gcr_el1) }, + { .name = "GMID_EL1", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 4, + .access = PL1_R, .accessfn = access_aa64_tid5, + .type = ARM_CP_CONST, .resetvalue = GMID_EL1_BS }, + { .name = "TCO", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7, + .type = ARM_CP_NO_RAW, + .access = PL0_RW, .readfn = tco_read, .writefn = tco_write }, + { .name = "DC_IGVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL1_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_IGSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 4, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_IGDVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL1_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_IGDSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 6, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_CGSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 4, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_CGDSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 6, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_CIGSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 4, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + { .name = "DC_CIGDSW", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 6, + .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo mte_tco_ro_reginfo[] = { + { .name = "TCO", .state = ARM_CP_STATE_AA64, + .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7, + .type = ARM_CP_CONST, .access = PL0_RW, }, + REGINFO_SENTINEL +}; + +static const ARMCPRegInfo mte_el0_cacheop_reginfo[] = { + { .name = "DC_CGVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGDVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGVAP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGDVAP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGVADP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CGDVADP", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CIGVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 3, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_CIGDVAC", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 5, + .type = ARM_CP_NOP, .access = PL0_W, + .accessfn = aa64_cacheop_poc_access }, + { .name = "DC_GVA", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 3, + .access = PL0_W, .type = ARM_CP_DC_GVA, +#ifndef CONFIG_USER_ONLY + /* Avoid overhead of an access check that always passes in user-mode */ + .accessfn = aa64_zva_access, +#endif + }, + { .name = "DC_GZVA", .state = ARM_CP_STATE_AA64, + .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 4, + .access = PL0_W, .type = ARM_CP_DC_GZVA, +#ifndef CONFIG_USER_ONLY + /* Avoid overhead of an access check that always passes in user-mode */ + .accessfn = aa64_zva_access, +#endif + }, + REGINFO_SENTINEL +}; + #endif static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri, @@ -7960,6 +8143,19 @@ void register_cp_regs_for_features(ARMCPU *cpu) } } #endif /*CONFIG_USER_ONLY*/ + + /* + * If full MTE is enabled, add all of the system registers. + * If only "instructions available at EL0" are enabled, + * then define only a RAZ/WI version of PSTATE.TCO. + */ + if (cpu_isar_feature(aa64_mte, cpu)) { + define_arm_cp_regs(cpu, mte_reginfo); + define_arm_cp_regs(cpu, mte_el0_cacheop_reginfo); + } else if (cpu_isar_feature(aa64_mte_insn_reg, cpu)) { + define_arm_cp_regs(cpu, mte_tco_ro_reginfo); + define_arm_cp_regs(cpu, mte_el0_cacheop_reginfo); + } #endif if (cpu_isar_feature(any_predinv, cpu)) { @@ -9509,6 +9705,9 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs) break; } } + if (cpu_isar_feature(aa64_mte, cpu)) { + new_mode |= PSTATE_TCO; + } pstate_write(env, PSTATE_DAIF | new_mode); env->aarch64 = 1; @@ -9614,42 +9813,6 @@ void arm_cpu_do_interrupt(CPUState *cs) } #endif /* !CONFIG_USER_ONLY */ -/* Return the exception level which controls this address translation regime */ -static uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - switch (mmu_idx) { - case ARMMMUIdx_E20_0: - case ARMMMUIdx_E20_2: - case ARMMMUIdx_E20_2_PAN: - case ARMMMUIdx_Stage2: - case ARMMMUIdx_E2: - return 2; - case ARMMMUIdx_SE3: - return 3; - case ARMMMUIdx_SE10_0: - return arm_el_is_aa64(env, 3) ? 1 : 3; - case ARMMMUIdx_SE10_1: - case ARMMMUIdx_SE10_1_PAN: - case ARMMMUIdx_Stage1_E0: - case ARMMMUIdx_Stage1_E1: - case ARMMMUIdx_Stage1_E1_PAN: - case ARMMMUIdx_E10_0: - case ARMMMUIdx_E10_1: - case ARMMMUIdx_E10_1_PAN: - case ARMMMUIdx_MPrivNegPri: - case ARMMMUIdx_MUserNegPri: - case ARMMMUIdx_MPriv: - case ARMMMUIdx_MUser: - case ARMMMUIdx_MSPrivNegPri: - case ARMMMUIdx_MSUserNegPri: - case ARMMMUIdx_MSPriv: - case ARMMMUIdx_MSUser: - return 1; - default: - g_assert_not_reached(); - } -} - uint64_t arm_sctlr(CPUARMState *env, int el) { /* Only EL0 needs to be adjusted for EL1&0 or EL2&0. */ @@ -9732,15 +9895,6 @@ static inline uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx, #endif /* !CONFIG_USER_ONLY */ -/* Return the TCR controlling this translation regime */ -static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx) -{ - if (mmu_idx == ARMMMUIdx_Stage2) { - return &env->cp15.vtcr_el2; - } - return &env->cp15.tcr_el[regime_el(env, mmu_idx)]; -} - /* Convert a possible stage1+2 MMU index into the appropriate * stage 1 MMU index */ @@ -10541,6 +10695,16 @@ static int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx) } } +static int aa64_va_parameter_tcma(uint64_t tcr, ARMMMUIdx mmu_idx) +{ + if (regime_has_2_ranges(mmu_idx)) { + return extract64(tcr, 57, 2); + } else { + /* Replicate the single TCMA bit so we always have 2 bits. */ + return extract32(tcr, 30, 1) * 3; + } +} + ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va, ARMMMUIdx mmu_idx, bool data) { @@ -10935,22 +11099,19 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address, } /* When in aarch64 mode, and BTI is enabled, remember GP in the IOTLB. */ if (aarch64 && guarded && cpu_isar_feature(aa64_bti, cpu)) { - txattrs->target_tlb_bit0 = true; + arm_tlb_bti_gp(txattrs) = true; } - if (cacheattrs != NULL) { - if (mmu_idx == ARMMMUIdx_Stage2) { - cacheattrs->attrs = convert_stage2_attrs(env, - extract32(attrs, 0, 4)); - } else { - /* Index into MAIR registers for cache attributes */ - uint8_t attrindx = extract32(attrs, 0, 3); - uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; - assert(attrindx <= 7); - cacheattrs->attrs = extract64(mair, attrindx * 8, 8); - } - cacheattrs->shareability = extract32(attrs, 6, 2); + if (mmu_idx == ARMMMUIdx_Stage2) { + cacheattrs->attrs = convert_stage2_attrs(env, extract32(attrs, 0, 4)); + } else { + /* Index into MAIR registers for cache attributes */ + uint8_t attrindx = extract32(attrs, 0, 3); + uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)]; + assert(attrindx <= 7); + cacheattrs->attrs = extract64(mair, attrindx * 8, 8); } + cacheattrs->shareability = extract32(attrs, 6, 2); *phys_ptr = descaddr; *page_size_ptr = page_size; @@ -11673,9 +11834,19 @@ static uint8_t combine_cacheattr_nibble(uint8_t s1, uint8_t s2) */ static ARMCacheAttrs combine_cacheattrs(ARMCacheAttrs s1, ARMCacheAttrs s2) { - uint8_t s1lo = extract32(s1.attrs, 0, 4), s2lo = extract32(s2.attrs, 0, 4); - uint8_t s1hi = extract32(s1.attrs, 4, 4), s2hi = extract32(s2.attrs, 4, 4); + uint8_t s1lo, s2lo, s1hi, s2hi; ARMCacheAttrs ret; + bool tagged = false; + + if (s1.attrs == 0xf0) { + tagged = true; + s1.attrs = 0xff; + } + + s1lo = extract32(s1.attrs, 0, 4); + s2lo = extract32(s2.attrs, 0, 4); + s1hi = extract32(s1.attrs, 4, 4); + s2hi = extract32(s2.attrs, 4, 4); /* Combine shareability attributes (table D4-43) */ if (s1.shareability == 2 || s2.shareability == 2) { @@ -11723,6 +11894,11 @@ static ARMCacheAttrs combine_cacheattrs(ARMCacheAttrs s1, ARMCacheAttrs s2) } } + /* TODO: CombineS1S2Desc does not consider transient, only WB, RWA. */ + if (tagged && ret.attrs == 0xff) { + ret.attrs = 0xf0; + } + return ret; } @@ -11785,28 +11961,32 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, ret = get_phys_addr_lpae(env, ipa, access_type, ARMMMUIdx_Stage2, mmu_idx == ARMMMUIdx_E10_0, phys_ptr, attrs, &s2_prot, - page_size, fi, - cacheattrs != NULL ? &cacheattrs2 : NULL); + page_size, fi, &cacheattrs2); fi->s2addr = ipa; /* Combine the S1 and S2 perms. */ *prot &= s2_prot; - /* Combine the S1 and S2 cache attributes, if needed */ - if (!ret && cacheattrs != NULL) { - if (env->cp15.hcr_el2 & HCR_DC) { - /* - * HCR.DC forces the first stage attributes to - * Normal Non-Shareable, - * Inner Write-Back Read-Allocate Write-Allocate, - * Outer Write-Back Read-Allocate Write-Allocate. - */ + /* If S2 fails, return early. */ + if (ret) { + return ret; + } + + /* Combine the S1 and S2 cache attributes. */ + if (env->cp15.hcr_el2 & HCR_DC) { + /* + * HCR.DC forces the first stage attributes to + * Normal Non-Shareable, + * Inner Write-Back Read-Allocate Write-Allocate, + * Outer Write-Back Read-Allocate Write-Allocate. + * Do not overwrite Tagged within attrs. + */ + if (cacheattrs->attrs != 0xf0) { cacheattrs->attrs = 0xff; - cacheattrs->shareability = 0; } - *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2); + cacheattrs->shareability = 0; } - - return ret; + *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2); + return 0; } else { /* * For non-EL2 CPUs a stage1+stage2 translation is just stage 1. @@ -11867,6 +12047,9 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, /* Definitely a real MMU, not an MPU */ if (regime_translation_disabled(env, mmu_idx)) { + uint64_t hcr; + uint8_t memattr; + /* * MMU disabled. S1 addresses within aa64 translation regimes are * still checked for bounds -- see AArch64.TranslateAddressS1Off. @@ -11904,6 +12087,27 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, *phys_ptr = address; *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; *page_size = TARGET_PAGE_SIZE; + + /* Fill in cacheattr a-la AArch64.TranslateAddressS1Off. */ + hcr = arm_hcr_el2_eff(env); + cacheattrs->shareability = 0; + if (hcr & HCR_DC) { + if (hcr & HCR_DCT) { + memattr = 0xf0; /* Tagged, Normal, WB, RWA */ + } else { + memattr = 0xff; /* Normal, WB, RWA */ + } + } else if (access_type == MMU_INST_FETCH) { + if (regime_sctlr(env, mmu_idx) & SCTLR_I) { + memattr = 0xee; /* Normal, WT, RA, NT */ + } else { + memattr = 0x44; /* Normal, NC, No */ + } + cacheattrs->shareability = 2; /* outer sharable */ + } else { + memattr = 0x00; /* Device, nGnRnE */ + } + cacheattrs->attrs = memattr; return 0; } @@ -11931,11 +12135,12 @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, bool ret; ARMMMUFaultInfo fi = {}; ARMMMUIdx mmu_idx = arm_mmu_idx(env); + ARMCacheAttrs cacheattrs = {}; *attrs = (MemTxAttrs) {}; ret = get_phys_addr(env, addr, 0, mmu_idx, &phys_addr, - attrs, &prot, &page_size, &fi, NULL); + attrs, &prot, &page_size, &fi, &cacheattrs); if (ret) { return -1; @@ -12565,6 +12770,36 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, } } + if (cpu_isar_feature(aa64_mte, env_archcpu(env))) { + /* + * Set MTE_ACTIVE if any access may be Checked, and leave clear + * if all accesses must be Unchecked: + * 1) If no TBI, then there are no tags in the address to check, + * 2) If Tag Check Override, then all accesses are Unchecked, + * 3) If Tag Check Fail == 0, then Checked access have no effect, + * 4) If no Allocation Tag Access, then all accesses are Unchecked. + */ + if (allocation_tag_access_enabled(env, el, sctlr)) { + flags = FIELD_DP32(flags, TBFLAG_A64, ATA, 1); + if (tbid + && !(env->pstate & PSTATE_TCO) + && (sctlr & (el == 0 ? SCTLR_TCF0 : SCTLR_TCF))) { + flags = FIELD_DP32(flags, TBFLAG_A64, MTE_ACTIVE, 1); + } + } + /* And again for unprivileged accesses, if required. */ + if (FIELD_EX32(flags, TBFLAG_A64, UNPRIV) + && tbid + && !(env->pstate & PSTATE_TCO) + && (sctlr & SCTLR_TCF0) + && allocation_tag_access_enabled(env, 0, sctlr)) { + flags = FIELD_DP32(flags, TBFLAG_A64, MTE0_ACTIVE, 1); + } + /* Cache TCMA as well as TBI. */ + flags = FIELD_DP32(flags, TBFLAG_A64, TCMA, + aa64_va_parameter_tcma(tcr, mmu_idx)); + } + return rebuild_hflags_common(env, fp_el, mmu_idx, flags); } diff --git a/target/arm/helper.h b/target/arm/helper.h index 2a20c81..759639a 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -96,6 +96,8 @@ DEF_HELPER_FLAGS_1(rebuild_hflags_a32_newel, TCG_CALL_NO_RWG, void, env) DEF_HELPER_FLAGS_2(rebuild_hflags_a32, TCG_CALL_NO_RWG, void, env, int) DEF_HELPER_FLAGS_2(rebuild_hflags_a64, TCG_CALL_NO_RWG, void, env, int) +DEF_HELPER_FLAGS_5(probe_access, TCG_CALL_NO_WG, void, env, tl, i32, i32, i32) + DEF_HELPER_1(vfp_get_fpscr, i32, env) DEF_HELPER_2(vfp_set_fpscr, void, env, i32) diff --git a/target/arm/internals.h b/target/arm/internals.h index 4bdbc3a..ae99725 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -913,6 +913,51 @@ static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx) } } +/* Return the exception level which controls this address translation regime */ +static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + switch (mmu_idx) { + case ARMMMUIdx_E20_0: + case ARMMMUIdx_E20_2: + case ARMMMUIdx_E20_2_PAN: + case ARMMMUIdx_Stage2: + case ARMMMUIdx_E2: + return 2; + case ARMMMUIdx_SE3: + return 3; + case ARMMMUIdx_SE10_0: + return arm_el_is_aa64(env, 3) ? 1 : 3; + case ARMMMUIdx_SE10_1: + case ARMMMUIdx_SE10_1_PAN: + case ARMMMUIdx_Stage1_E0: + case ARMMMUIdx_Stage1_E1: + case ARMMMUIdx_Stage1_E1_PAN: + case ARMMMUIdx_E10_0: + case ARMMMUIdx_E10_1: + case ARMMMUIdx_E10_1_PAN: + case ARMMMUIdx_MPrivNegPri: + case ARMMMUIdx_MUserNegPri: + case ARMMMUIdx_MPriv: + case ARMMMUIdx_MUser: + case ARMMMUIdx_MSPrivNegPri: + case ARMMMUIdx_MSUserNegPri: + case ARMMMUIdx_MSPriv: + case ARMMMUIdx_MSUser: + return 1; + default: + g_assert_not_reached(); + } +} + +/* Return the TCR controlling this translation regime */ +static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx) +{ + if (mmu_idx == ARMMMUIdx_Stage2) { + return &env->cp15.vtcr_el2; + } + return &env->cp15.tcr_el[regime_el(env, mmu_idx)]; +} + /* Return the FSR value for a debug exception (watchpoint, hardware * breakpoint or BKPT insn) targeting the specified exception level. */ @@ -1159,6 +1204,9 @@ static inline uint32_t aarch64_pstate_valid_mask(const ARMISARegisters *id) if (isar_feature_aa64_uao(id)) { valid |= PSTATE_UAO; } + if (isar_feature_aa64_mte(id)) { + valid |= PSTATE_TCO; + } return valid; } @@ -1195,6 +1243,24 @@ static inline int exception_target_el(CPUARMState *env) return target_el; } +/* Determine if allocation tags are available. */ +static inline bool allocation_tag_access_enabled(CPUARMState *env, int el, + uint64_t sctlr) +{ + if (el < 3 + && arm_feature(env, ARM_FEATURE_EL3) + && !(env->cp15.scr_el3 & SCR_ATA)) { + return false; + } + if (el < 2 + && arm_feature(env, ARM_FEATURE_EL2) + && !(arm_hcr_el2_eff(env) & HCR_ATA)) { + return false; + } + sctlr &= (el == 0 ? SCTLR_ATA0 : SCTLR_ATA); + return sctlr != 0; +} + #ifndef CONFIG_USER_ONLY /* Security attributes for an address, as returned by v8m_security_lookup. */ @@ -1228,10 +1294,95 @@ bool get_phys_addr(CPUARMState *env, target_ulong address, MMUAccessType access_type, ARMMMUIdx mmu_idx, hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot, target_ulong *page_size, - ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs); + ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs) + __attribute__((nonnull)); void arm_log_exception(int idx); #endif /* !CONFIG_USER_ONLY */ +/* + * The log2 of the words in the tag block, for GMID_EL1.BS. + * The is the maximum, 256 bytes, which manipulates 64-bits of tags. + */ +#define GMID_EL1_BS 6 + +/* We associate one allocation tag per 16 bytes, the minimum. */ +#define LOG2_TAG_GRANULE 4 +#define TAG_GRANULE (1 << LOG2_TAG_GRANULE) + +/* + * The SVE simd_data field, for memory ops, contains either + * rd (5 bits) or a shift count (2 bits). + */ +#define SVE_MTEDESC_SHIFT 5 + +/* Bits within a descriptor passed to the helper_mte_check* functions. */ +FIELD(MTEDESC, MIDX, 0, 4) +FIELD(MTEDESC, TBI, 4, 2) +FIELD(MTEDESC, TCMA, 6, 2) +FIELD(MTEDESC, WRITE, 8, 1) +FIELD(MTEDESC, ESIZE, 9, 5) +FIELD(MTEDESC, TSIZE, 14, 10) /* mte_checkN only */ + +bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr); +uint64_t mte_check1(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra); +uint64_t mte_checkN(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra); + +static inline int allocation_tag_from_addr(uint64_t ptr) +{ + return extract64(ptr, 56, 4); +} + +static inline uint64_t address_with_allocation_tag(uint64_t ptr, int rtag) +{ + return deposit64(ptr, 56, 4, rtag); +} + +/* Return true if tbi bits mean that the access is checked. */ +static inline bool tbi_check(uint32_t desc, int bit55) +{ + return (desc >> (R_MTEDESC_TBI_SHIFT + bit55)) & 1; +} + +/* Return true if tcma bits mean that the access is unchecked. */ +static inline bool tcma_check(uint32_t desc, int bit55, int ptr_tag) +{ + /* + * We had extracted bit55 and ptr_tag for other reasons, so fold + * (ptr<59:55> == 00000 || ptr<59:55> == 11111) into a single test. + */ + bool match = ((ptr_tag + bit55) & 0xf) == 0; + bool tcma = (desc >> (R_MTEDESC_TCMA_SHIFT + bit55)) & 1; + return tcma && match; +} + +/* + * For TBI, ideally, we would do nothing. Proper behaviour on fault is + * for the tag to be present in the FAR_ELx register. But for user-only + * mode, we do not have a TLB with which to implement this, so we must + * remove the top byte. + */ +static inline uint64_t useronly_clean_ptr(uint64_t ptr) +{ + /* TBI is known to be enabled. */ +#ifdef CONFIG_USER_ONLY + ptr = sextract64(ptr, 0, 56); +#endif + return ptr; +} + +static inline uint64_t useronly_maybe_clean_ptr(uint32_t desc, uint64_t ptr) +{ +#ifdef CONFIG_USER_ONLY + int64_t clean_ptr = sextract64(ptr, 0, 56); + if (tbi_check(desc, clean_ptr < 0)) { + ptr = clean_ptr; + } +#endif + return ptr; +} + #endif diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c index 5e8a795..0364542 100644 --- a/target/arm/m_helper.c +++ b/target/arm/m_helper.c @@ -187,12 +187,13 @@ static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value, hwaddr physaddr; int prot; ARMMMUFaultInfo fi = {}; + ARMCacheAttrs cacheattrs = {}; bool secure = mmu_idx & ARM_MMU_IDX_M_S; int exc; bool exc_secure; if (get_phys_addr(env, addr, MMU_DATA_STORE, mmu_idx, &physaddr, - &attrs, &prot, &page_size, &fi, NULL)) { + &attrs, &prot, &page_size, &fi, &cacheattrs)) { /* MPU/SAU lookup failed */ if (fi.type == ARMFault_QEMU_SFault) { if (mode == STACK_LAZYFP) { @@ -279,13 +280,14 @@ static bool v7m_stack_read(ARMCPU *cpu, uint32_t *dest, uint32_t addr, hwaddr physaddr; int prot; ARMMMUFaultInfo fi = {}; + ARMCacheAttrs cacheattrs = {}; bool secure = mmu_idx & ARM_MMU_IDX_M_S; int exc; bool exc_secure; uint32_t value; if (get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &physaddr, - &attrs, &prot, &page_size, &fi, NULL)) { + &attrs, &prot, &page_size, &fi, &cacheattrs)) { /* MPU/SAU lookup failed */ if (fi.type == ARMFault_QEMU_SFault) { qemu_log_mask(CPU_LOG_INT, @@ -1928,6 +1930,7 @@ static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx, V8M_SAttributes sattrs = {}; MemTxAttrs attrs = {}; ARMMMUFaultInfo fi = {}; + ARMCacheAttrs cacheattrs = {}; MemTxResult txres; target_ulong page_size; hwaddr physaddr; @@ -1945,8 +1948,8 @@ static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx, "...really SecureFault with SFSR.INVEP\n"); return false; } - if (get_phys_addr(env, addr, MMU_INST_FETCH, mmu_idx, - &physaddr, &attrs, &prot, &page_size, &fi, NULL)) { + if (get_phys_addr(env, addr, MMU_INST_FETCH, mmu_idx, &physaddr, + &attrs, &prot, &page_size, &fi, &cacheattrs)) { /* the MPU lookup failed */ env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_IACCVIOL_MASK; armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM, env->v7m.secure); diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c new file mode 100644 index 0000000..5ea57d4 --- /dev/null +++ b/target/arm/mte_helper.c @@ -0,0 +1,906 @@ +/* + * ARM v8.5-MemTag Operations + * + * Copyright (c) 2020 Linaro, Ltd. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "internals.h" +#include "exec/exec-all.h" +#include "exec/ram_addr.h" +#include "exec/cpu_ldst.h" +#include "exec/helper-proto.h" + + +static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude) +{ + if (exclude == 0xffff) { + return 0; + } + if (offset == 0) { + while (exclude & (1 << tag)) { + tag = (tag + 1) & 15; + } + } else { + do { + do { + tag = (tag + 1) & 15; + } while (exclude & (1 << tag)); + } while (--offset > 0); + } + return tag; +} + +/** + * allocation_tag_mem: + * @env: the cpu environment + * @ptr_mmu_idx: the addressing regime to use for the virtual address + * @ptr: the virtual address for which to look up tag memory + * @ptr_access: the access to use for the virtual address + * @ptr_size: the number of bytes in the normal memory access + * @tag_access: the access to use for the tag memory + * @tag_size: the number of bytes in the tag memory access + * @ra: the return address for exception handling + * + * Our tag memory is formatted as a sequence of little-endian nibbles. + * That is, the byte at (addr >> (LOG2_TAG_GRANULE + 1)) contains two + * tags, with the tag at [3:0] for the lower addr and the tag at [7:4] + * for the higher addr. + * + * Here, resolve the physical address from the virtual address, and return + * a pointer to the corresponding tag byte. Exit with exception if the + * virtual address is not accessible for @ptr_access. + * + * The @ptr_size and @tag_size values may not have an obvious relation + * due to the alignment of @ptr, and the number of tag checks required. + * + * If there is no tag storage corresponding to @ptr, return NULL. + */ +static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, + uint64_t ptr, MMUAccessType ptr_access, + int ptr_size, MMUAccessType tag_access, + int tag_size, uintptr_t ra) +{ +#ifdef CONFIG_USER_ONLY + /* Tag storage not implemented. */ + return NULL; +#else + uintptr_t index; + CPUIOTLBEntry *iotlbentry; + int in_page, flags; + ram_addr_t ptr_ra; + hwaddr ptr_paddr, tag_paddr, xlat; + MemoryRegion *mr; + ARMASIdx tag_asi; + AddressSpace *tag_as; + void *host; + + /* + * Probe the first byte of the virtual address. This raises an + * exception for inaccessible pages, and resolves the virtual address + * into the softmmu tlb. + * + * When RA == 0, this is for mte_probe1. The page is expected to be + * valid. Indicate to probe_access_flags no-fault, then assert that + * we received a valid page. + */ + flags = probe_access_flags(env, ptr, ptr_access, ptr_mmu_idx, + ra == 0, &host, ra); + assert(!(flags & TLB_INVALID_MASK)); + + /* + * Find the iotlbentry for ptr. This *must* be present in the TLB + * because we just found the mapping. + * TODO: Perhaps there should be a cputlb helper that returns a + * matching tlb entry + iotlb entry. + */ + index = tlb_index(env, ptr_mmu_idx, ptr); +# ifdef CONFIG_DEBUG_TCG + { + CPUTLBEntry *entry = tlb_entry(env, ptr_mmu_idx, ptr); + target_ulong comparator = (ptr_access == MMU_DATA_LOAD + ? entry->addr_read + : tlb_addr_write(entry)); + g_assert(tlb_hit(comparator, ptr)); + } +# endif + iotlbentry = &env_tlb(env)->d[ptr_mmu_idx].iotlb[index]; + + /* If the virtual page MemAttr != Tagged, access unchecked. */ + if (!arm_tlb_mte_tagged(&iotlbentry->attrs)) { + return NULL; + } + + /* + * If not backed by host ram, there is no tag storage: access unchecked. + * This is probably a guest os bug though, so log it. + */ + if (unlikely(flags & TLB_MMIO)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Page @ 0x%" PRIx64 " indicates Tagged Normal memory " + "but is not backed by host ram\n", ptr); + return NULL; + } + + /* + * The Normal memory access can extend to the next page. E.g. a single + * 8-byte access to the last byte of a page will check only the last + * tag on the first page. + * Any page access exception has priority over tag check exception. + */ + in_page = -(ptr | TARGET_PAGE_MASK); + if (unlikely(ptr_size > in_page)) { + void *ignore; + flags |= probe_access_flags(env, ptr + in_page, ptr_access, + ptr_mmu_idx, ra == 0, &ignore, ra); + assert(!(flags & TLB_INVALID_MASK)); + } + + /* Any debug exception has priority over a tag check exception. */ + if (unlikely(flags & TLB_WATCHPOINT)) { + int wp = ptr_access == MMU_DATA_LOAD ? BP_MEM_READ : BP_MEM_WRITE; + assert(ra != 0); + cpu_check_watchpoint(env_cpu(env), ptr, ptr_size, + iotlbentry->attrs, wp, ra); + } + + /* + * Find the physical address within the normal mem space. + * The memory region lookup must succeed because TLB_MMIO was + * not set in the cputlb lookup above. + */ + mr = memory_region_from_host(host, &ptr_ra); + tcg_debug_assert(mr != NULL); + tcg_debug_assert(memory_region_is_ram(mr)); + ptr_paddr = ptr_ra; + do { + ptr_paddr += mr->addr; + mr = mr->container; + } while (mr); + + /* Convert to the physical address in tag space. */ + tag_paddr = ptr_paddr >> (LOG2_TAG_GRANULE + 1); + + /* Look up the address in tag space. */ + tag_asi = iotlbentry->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS; + tag_as = cpu_get_address_space(env_cpu(env), tag_asi); + mr = address_space_translate(tag_as, tag_paddr, &xlat, NULL, + tag_access == MMU_DATA_STORE, + iotlbentry->attrs); + + /* + * Note that @mr will never be NULL. If there is nothing in the address + * space at @tag_paddr, the translation will return the unallocated memory + * region. For our purposes, the result must be ram. + */ + if (unlikely(!memory_region_is_ram(mr))) { + /* ??? Failure is a board configuration error. */ + qemu_log_mask(LOG_UNIMP, + "Tag Memory @ 0x%" HWADDR_PRIx " not found for " + "Normal Memory @ 0x%" HWADDR_PRIx "\n", + tag_paddr, ptr_paddr); + return NULL; + } + + /* + * Ensure the tag memory is dirty on write, for migration. + * Tag memory can never contain code or display memory (vga). + */ + if (tag_access == MMU_DATA_STORE) { + ram_addr_t tag_ra = memory_region_get_ram_addr(mr) + xlat; + cpu_physical_memory_set_dirty_flag(tag_ra, DIRTY_MEMORY_MIGRATION); + } + + return memory_region_get_ram_ptr(mr) + xlat; +#endif +} + +uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm) +{ + int rtag; + + /* + * Our IMPDEF choice for GCR_EL1.RRND==1 is to behave as if + * GCR_EL1.RRND==0, always producing deterministic results. + */ + uint16_t exclude = extract32(rm | env->cp15.gcr_el1, 0, 16); + int start = extract32(env->cp15.rgsr_el1, 0, 4); + int seed = extract32(env->cp15.rgsr_el1, 8, 16); + int offset, i; + + /* RandomTag */ + for (i = offset = 0; i < 4; ++i) { + /* NextRandomTagBit */ + int top = (extract32(seed, 5, 1) ^ extract32(seed, 3, 1) ^ + extract32(seed, 2, 1) ^ extract32(seed, 0, 1)); + seed = (top << 15) | (seed >> 1); + offset |= top << i; + } + rtag = choose_nonexcluded_tag(start, offset, exclude); + env->cp15.rgsr_el1 = rtag | (seed << 8); + + return address_with_allocation_tag(rn, rtag); +} + +uint64_t HELPER(addsubg)(CPUARMState *env, uint64_t ptr, + int32_t offset, uint32_t tag_offset) +{ + int start_tag = allocation_tag_from_addr(ptr); + uint16_t exclude = extract32(env->cp15.gcr_el1, 0, 16); + int rtag = choose_nonexcluded_tag(start_tag, tag_offset, exclude); + + return address_with_allocation_tag(ptr + offset, rtag); +} + +static int load_tag1(uint64_t ptr, uint8_t *mem) +{ + int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; + return extract32(*mem, ofs, 4); +} + +uint64_t HELPER(ldg)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + int mmu_idx = cpu_mmu_index(env, false); + uint8_t *mem; + int rtag = 0; + + /* Trap if accessing an invalid page. */ + mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, 1, + MMU_DATA_LOAD, 1, GETPC()); + + /* Load if page supports tags. */ + if (mem) { + rtag = load_tag1(ptr, mem); + } + + return address_with_allocation_tag(xt, rtag); +} + +static void check_tag_aligned(CPUARMState *env, uint64_t ptr, uintptr_t ra) +{ + if (unlikely(!QEMU_IS_ALIGNED(ptr, TAG_GRANULE))) { + arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE, + cpu_mmu_index(env, false), ra); + g_assert_not_reached(); + } +} + +/* For use in a non-parallel context, store to the given nibble. */ +static void store_tag1(uint64_t ptr, uint8_t *mem, int tag) +{ + int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; + *mem = deposit32(*mem, ofs, 4, tag); +} + +/* For use in a parallel context, atomically store to the given nibble. */ +static void store_tag1_parallel(uint64_t ptr, uint8_t *mem, int tag) +{ + int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4; + uint8_t old = atomic_read(mem); + + while (1) { + uint8_t new = deposit32(old, ofs, 4, tag); + uint8_t cmp = atomic_cmpxchg(mem, old, new); + if (likely(cmp == old)) { + return; + } + old = cmp; + } +} + +typedef void stg_store1(uint64_t, uint8_t *, int); + +static inline void do_stg(CPUARMState *env, uint64_t ptr, uint64_t xt, + uintptr_t ra, stg_store1 store1) +{ + int mmu_idx = cpu_mmu_index(env, false); + uint8_t *mem; + + check_tag_aligned(env, ptr, ra); + + /* Trap if accessing an invalid page. */ + mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, TAG_GRANULE, + MMU_DATA_STORE, 1, ra); + + /* Store if page supports tags. */ + if (mem) { + store1(ptr, mem, allocation_tag_from_addr(xt)); + } +} + +void HELPER(stg)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + do_stg(env, ptr, xt, GETPC(), store_tag1); +} + +void HELPER(stg_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + do_stg(env, ptr, xt, GETPC(), store_tag1_parallel); +} + +void HELPER(stg_stub)(CPUARMState *env, uint64_t ptr) +{ + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + + check_tag_aligned(env, ptr, ra); + probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra); +} + +static inline void do_st2g(CPUARMState *env, uint64_t ptr, uint64_t xt, + uintptr_t ra, stg_store1 store1) +{ + int mmu_idx = cpu_mmu_index(env, false); + int tag = allocation_tag_from_addr(xt); + uint8_t *mem1, *mem2; + + check_tag_aligned(env, ptr, ra); + + /* + * Trap if accessing an invalid page(s). + * This takes priority over !allocation_tag_access_enabled. + */ + if (ptr & TAG_GRANULE) { + /* Two stores unaligned mod TAG_GRANULE*2 -- modify two bytes. */ + mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, + TAG_GRANULE, MMU_DATA_STORE, 1, ra); + mem2 = allocation_tag_mem(env, mmu_idx, ptr + TAG_GRANULE, + MMU_DATA_STORE, TAG_GRANULE, + MMU_DATA_STORE, 1, ra); + + /* Store if page(s) support tags. */ + if (mem1) { + store1(TAG_GRANULE, mem1, tag); + } + if (mem2) { + store1(0, mem2, tag); + } + } else { + /* Two stores aligned mod TAG_GRANULE*2 -- modify one byte. */ + mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, + 2 * TAG_GRANULE, MMU_DATA_STORE, 1, ra); + if (mem1) { + tag |= tag << 4; + atomic_set(mem1, tag); + } + } +} + +void HELPER(st2g)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + do_st2g(env, ptr, xt, GETPC(), store_tag1); +} + +void HELPER(st2g_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt) +{ + do_st2g(env, ptr, xt, GETPC(), store_tag1_parallel); +} + +void HELPER(st2g_stub)(CPUARMState *env, uint64_t ptr) +{ + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + int in_page = -(ptr | TARGET_PAGE_MASK); + + check_tag_aligned(env, ptr, ra); + + if (likely(in_page >= 2 * TAG_GRANULE)) { + probe_write(env, ptr, 2 * TAG_GRANULE, mmu_idx, ra); + } else { + probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra); + probe_write(env, ptr + TAG_GRANULE, TAG_GRANULE, mmu_idx, ra); + } +} + +#define LDGM_STGM_SIZE (4 << GMID_EL1_BS) + +uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr) +{ + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + void *tag_mem; + + ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE); + + /* Trap if accessing an invalid page. */ + tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, + LDGM_STGM_SIZE, MMU_DATA_LOAD, + LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra); + + /* The tag is squashed to zero if the page does not support tags. */ + if (!tag_mem) { + return 0; + } + + QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6); + /* + * We are loading 64-bits worth of tags. The ordering of elements + * within the word corresponds to a 64-bit little-endian operation. + */ + return ldq_le_p(tag_mem); +} + +void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val) +{ + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + void *tag_mem; + + ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE); + + /* Trap if accessing an invalid page. */ + tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, + LDGM_STGM_SIZE, MMU_DATA_LOAD, + LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra); + + /* + * Tag store only happens if the page support tags, + * and if the OS has enabled access to the tags. + */ + if (!tag_mem) { + return; + } + + QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6); + /* + * We are storing 64-bits worth of tags. The ordering of elements + * within the word corresponds to a 64-bit little-endian operation. + */ + stq_le_p(tag_mem, val); +} + +void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val) +{ + uintptr_t ra = GETPC(); + int mmu_idx = cpu_mmu_index(env, false); + int log2_dcz_bytes, log2_tag_bytes; + intptr_t dcz_bytes, tag_bytes; + uint8_t *mem; + + /* + * In arm_cpu_realizefn, we assert that dcz > LOG2_TAG_GRANULE+1, + * i.e. 32 bytes, which is an unreasonably small dcz anyway, + * to make sure that we can access one complete tag byte here. + */ + log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2; + log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1); + dcz_bytes = (intptr_t)1 << log2_dcz_bytes; + tag_bytes = (intptr_t)1 << log2_tag_bytes; + ptr &= -dcz_bytes; + + mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, dcz_bytes, + MMU_DATA_STORE, tag_bytes, ra); + if (mem) { + int tag_pair = (val & 0xf) * 0x11; + memset(mem, tag_pair, tag_bytes); + } +} + +/* Record a tag check failure. */ +static void mte_check_fail(CPUARMState *env, int mmu_idx, + uint64_t dirty_ptr, uintptr_t ra) +{ + ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx); + int el, reg_el, tcf, select; + uint64_t sctlr; + + reg_el = regime_el(env, arm_mmu_idx); + sctlr = env->cp15.sctlr_el[reg_el]; + + switch (arm_mmu_idx) { + case ARMMMUIdx_E10_0: + case ARMMMUIdx_E20_0: + el = 0; + tcf = extract64(sctlr, 38, 2); + break; + default: + el = reg_el; + tcf = extract64(sctlr, 40, 2); + } + + switch (tcf) { + case 1: + /* + * Tag check fail causes a synchronous exception. + * + * In restore_state_to_opc, we set the exception syndrome + * for the load or store operation. Unwind first so we + * may overwrite that with the syndrome for the tag check. + */ + cpu_restore_state(env_cpu(env), ra, true); + env->exception.vaddress = dirty_ptr; + raise_exception(env, EXCP_DATA_ABORT, + syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, 0, 0x11), + exception_target_el(env)); + /* noreturn, but fall through to the assert anyway */ + + case 0: + /* + * Tag check fail does not affect the PE. + * We eliminate this case by not setting MTE_ACTIVE + * in tb_flags, so that we never make this runtime call. + */ + g_assert_not_reached(); + + case 2: + /* Tag check fail causes asynchronous flag set. */ + mmu_idx = arm_mmu_idx_el(env, el); + if (regime_has_2_ranges(mmu_idx)) { + select = extract64(dirty_ptr, 55, 1); + } else { + select = 0; + } + env->cp15.tfsr_el[el] |= 1 << select; + break; + + default: + /* Case 3: Reserved. */ + qemu_log_mask(LOG_GUEST_ERROR, + "Tag check failure with SCTLR_EL%d.TCF%s " + "set to reserved value %d\n", + reg_el, el ? "" : "0", tcf); + break; + } +} + +/* + * Perform an MTE checked access for a single logical or atomic access. + */ +static bool mte_probe1_int(CPUARMState *env, uint32_t desc, uint64_t ptr, + uintptr_t ra, int bit55) +{ + int mem_tag, mmu_idx, ptr_tag, size; + MMUAccessType type; + uint8_t *mem; + + ptr_tag = allocation_tag_from_addr(ptr); + + if (tcma_check(desc, bit55, ptr_tag)) { + return true; + } + + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD; + size = FIELD_EX32(desc, MTEDESC, ESIZE); + + mem = allocation_tag_mem(env, mmu_idx, ptr, type, size, + MMU_DATA_LOAD, 1, ra); + if (!mem) { + return true; + } + + mem_tag = load_tag1(ptr, mem); + return ptr_tag == mem_tag; +} + +/* + * No-fault version of mte_check1, to be used by SVE for MemSingleNF. + * Returns false if the access is Checked and the check failed. This + * is only intended to probe the tag -- the validity of the page must + * be checked beforehand. + */ +bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + int bit55 = extract64(ptr, 55, 1); + + /* If TBI is disabled, the access is unchecked. */ + if (unlikely(!tbi_check(desc, bit55))) { + return true; + } + + return mte_probe1_int(env, desc, ptr, 0, bit55); +} + +uint64_t mte_check1(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra) +{ + int bit55 = extract64(ptr, 55, 1); + + /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ + if (unlikely(!tbi_check(desc, bit55))) { + return ptr; + } + + if (unlikely(!mte_probe1_int(env, desc, ptr, ra, bit55))) { + int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + mte_check_fail(env, mmu_idx, ptr, ra); + } + + return useronly_clean_ptr(ptr); +} + +uint64_t HELPER(mte_check1)(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + return mte_check1(env, desc, ptr, GETPC()); +} + +/* + * Perform an MTE checked access for multiple logical accesses. + */ + +/** + * checkN: + * @tag: tag memory to test + * @odd: true to begin testing at tags at odd nibble + * @cmp: the tag to compare against + * @count: number of tags to test + * + * Return the number of successful tests. + * Thus a return value < @count indicates a failure. + * + * A note about sizes: count is expected to be small. + * + * The most common use will be LDP/STP of two integer registers, + * which means 16 bytes of memory touching at most 2 tags, but + * often the access is aligned and thus just 1 tag. + * + * Using AdvSIMD LD/ST (multiple), one can access 64 bytes of memory, + * touching at most 5 tags. SVE LDR/STR (vector) with the default + * vector length is also 64 bytes; the maximum architectural length + * is 256 bytes touching at most 9 tags. + * + * The loop below uses 7 logical operations and 1 memory operation + * per tag pair. An implementation that loads an aligned word and + * uses masking to ignore adjacent tags requires 18 logical operations + * and thus does not begin to pay off until 6 tags. + * Which, according to the survey above, is unlikely to be common. + */ +static int checkN(uint8_t *mem, int odd, int cmp, int count) +{ + int n = 0, diff; + + /* Replicate the test tag and compare. */ + cmp *= 0x11; + diff = *mem++ ^ cmp; + + if (odd) { + goto start_odd; + } + + while (1) { + /* Test even tag. */ + if (unlikely((diff) & 0x0f)) { + break; + } + if (++n == count) { + break; + } + + start_odd: + /* Test odd tag. */ + if (unlikely((diff) & 0xf0)) { + break; + } + if (++n == count) { + break; + } + + diff = *mem++ ^ cmp; + } + return n; +} + +uint64_t mte_checkN(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra) +{ + int mmu_idx, ptr_tag, bit55; + uint64_t ptr_last, ptr_end, prev_page, next_page; + uint64_t tag_first, tag_end; + uint64_t tag_byte_first, tag_byte_end; + uint32_t esize, total, tag_count, tag_size, n, c; + uint8_t *mem1, *mem2; + MMUAccessType type; + + bit55 = extract64(ptr, 55, 1); + + /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ + if (unlikely(!tbi_check(desc, bit55))) { + return ptr; + } + + ptr_tag = allocation_tag_from_addr(ptr); + + if (tcma_check(desc, bit55, ptr_tag)) { + goto done; + } + + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD; + esize = FIELD_EX32(desc, MTEDESC, ESIZE); + total = FIELD_EX32(desc, MTEDESC, TSIZE); + + /* Find the addr of the end of the access, and of the last element. */ + ptr_end = ptr + total; + ptr_last = ptr_end - esize; + + /* Round the bounds to the tag granule, and compute the number of tags. */ + tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE); + tag_end = QEMU_ALIGN_UP(ptr_last, TAG_GRANULE); + tag_count = (tag_end - tag_first) / TAG_GRANULE; + + /* Round the bounds to twice the tag granule, and compute the bytes. */ + tag_byte_first = QEMU_ALIGN_DOWN(ptr, 2 * TAG_GRANULE); + tag_byte_end = QEMU_ALIGN_UP(ptr_last, 2 * TAG_GRANULE); + + /* Locate the page boundaries. */ + prev_page = ptr & TARGET_PAGE_MASK; + next_page = prev_page + TARGET_PAGE_SIZE; + + if (likely(tag_end - prev_page <= TARGET_PAGE_SIZE)) { + /* Memory access stays on one page. */ + tag_size = (tag_byte_end - tag_byte_first) / (2 * TAG_GRANULE); + mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, total, + MMU_DATA_LOAD, tag_size, ra); + if (!mem1) { + goto done; + } + /* Perform all of the comparisons. */ + n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, tag_count); + } else { + /* Memory access crosses to next page. */ + tag_size = (next_page - tag_byte_first) / (2 * TAG_GRANULE); + mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr, + MMU_DATA_LOAD, tag_size, ra); + + tag_size = (tag_byte_end - next_page) / (2 * TAG_GRANULE); + mem2 = allocation_tag_mem(env, mmu_idx, next_page, type, + ptr_end - next_page, + MMU_DATA_LOAD, tag_size, ra); + + /* + * Perform all of the comparisons. + * Note the possible but unlikely case of the operation spanning + * two pages that do not both have tagging enabled. + */ + n = c = (next_page - tag_first) / TAG_GRANULE; + if (mem1) { + n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, c); + } + if (n == c) { + if (!mem2) { + goto done; + } + n += checkN(mem2, 0, ptr_tag, tag_count - c); + } + } + + /* + * If we failed, we know which granule. Compute the element that + * is first in that granule, and signal failure on that element. + */ + if (unlikely(n < tag_count)) { + uint64_t fail_ofs; + + fail_ofs = tag_first + n * TAG_GRANULE - ptr; + fail_ofs = ROUND_UP(fail_ofs, esize); + mte_check_fail(env, mmu_idx, ptr + fail_ofs, ra); + } + + done: + return useronly_clean_ptr(ptr); +} + +uint64_t HELPER(mte_checkN)(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + return mte_checkN(env, desc, ptr, GETPC()); +} + +/* + * Perform an MTE checked access for DC_ZVA. + */ +uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + uintptr_t ra = GETPC(); + int log2_dcz_bytes, log2_tag_bytes; + int mmu_idx, bit55; + intptr_t dcz_bytes, tag_bytes, i; + void *mem; + uint64_t ptr_tag, mem_tag, align_ptr; + + bit55 = extract64(ptr, 55, 1); + + /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ + if (unlikely(!tbi_check(desc, bit55))) { + return ptr; + } + + ptr_tag = allocation_tag_from_addr(ptr); + + if (tcma_check(desc, bit55, ptr_tag)) { + goto done; + } + + /* + * In arm_cpu_realizefn, we asserted that dcz > LOG2_TAG_GRANULE+1, + * i.e. 32 bytes, which is an unreasonably small dcz anyway, to make + * sure that we can access one complete tag byte here. + */ + log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2; + log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1); + dcz_bytes = (intptr_t)1 << log2_dcz_bytes; + tag_bytes = (intptr_t)1 << log2_tag_bytes; + align_ptr = ptr & -dcz_bytes; + + /* + * Trap if accessing an invalid page. DC_ZVA requires that we supply + * the original pointer for an invalid page. But watchpoints require + * that we probe the actual space. So do both. + */ + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + (void) probe_write(env, ptr, 1, mmu_idx, ra); + mem = allocation_tag_mem(env, mmu_idx, align_ptr, MMU_DATA_STORE, + dcz_bytes, MMU_DATA_LOAD, tag_bytes, ra); + if (!mem) { + goto done; + } + + /* + * Unlike the reasoning for checkN, DC_ZVA is always aligned, and thus + * it is quite easy to perform all of the comparisons at once without + * any extra masking. + * + * The most common zva block size is 64; some of the thunderx cpus use + * a block size of 128. For user-only, aarch64_max_initfn will set the + * block size to 512. Fill out the other cases for future-proofing. + * + * In order to be able to find the first miscompare later, we want the + * tag bytes to be in little-endian order. + */ + switch (log2_tag_bytes) { + case 0: /* zva_blocksize 32 */ + mem_tag = *(uint8_t *)mem; + ptr_tag *= 0x11u; + break; + case 1: /* zva_blocksize 64 */ + mem_tag = cpu_to_le16(*(uint16_t *)mem); + ptr_tag *= 0x1111u; + break; + case 2: /* zva_blocksize 128 */ + mem_tag = cpu_to_le32(*(uint32_t *)mem); + ptr_tag *= 0x11111111u; + break; + case 3: /* zva_blocksize 256 */ + mem_tag = cpu_to_le64(*(uint64_t *)mem); + ptr_tag *= 0x1111111111111111ull; + break; + + default: /* zva_blocksize 512, 1024, 2048 */ + ptr_tag *= 0x1111111111111111ull; + i = 0; + do { + mem_tag = cpu_to_le64(*(uint64_t *)(mem + i)); + if (unlikely(mem_tag != ptr_tag)) { + goto fail; + } + i += 8; + align_ptr += 16 * TAG_GRANULE; + } while (i < tag_bytes); + goto done; + } + + if (likely(mem_tag == ptr_tag)) { + goto done; + } + + fail: + /* Locate the first nibble that differs. */ + i = ctz64(mem_tag ^ ptr_tag) >> 4; + mte_check_fail(env, mmu_idx, align_ptr + i * TAG_GRANULE, ra); + + done: + return useronly_clean_ptr(ptr); +} diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c index eb0de08..b106521 100644 --- a/target/arm/op_helper.c +++ b/target/arm/op_helper.c @@ -935,3 +935,19 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i) return ((uint32_t)x >> shift) | (x << (32 - shift)); } } + +void HELPER(probe_access)(CPUARMState *env, target_ulong ptr, + uint32_t access_type, uint32_t mmu_idx, + uint32_t size) +{ + uint32_t in_page = -((uint32_t)ptr | TARGET_PAGE_SIZE); + uintptr_t ra = GETPC(); + + if (likely(size <= in_page)) { + probe_access(env, ptr, size, access_type, mmu_idx, ra); + } else { + probe_access(env, ptr, in_page, access_type, mmu_idx, ra); + probe_access(env, ptr + in_page, size - in_page, + access_type, mmu_idx, ra); + } +} diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c index e590db6..382fa82 100644 --- a/target/arm/sve_helper.c +++ b/target/arm/sve_helper.c @@ -3966,14 +3966,16 @@ static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \ static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ target_ulong addr, uintptr_t ra) \ { \ - *(TYPEE *)(vd + H(reg_off)) = (TYPEM)TLB(env, addr, ra); \ + *(TYPEE *)(vd + H(reg_off)) = \ + (TYPEM)TLB(env, useronly_clean_ptr(addr), ra); \ } #define DO_ST_TLB(NAME, H, TYPEE, TYPEM, TLB) \ static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \ target_ulong addr, uintptr_t ra) \ { \ - TLB(env, addr, (TYPEM)*(TYPEE *)(vd + H(reg_off)), ra); \ + TLB(env, useronly_clean_ptr(addr), \ + (TYPEM)*(TYPEE *)(vd + H(reg_off)), ra); \ } #define DO_LD_PRIM_1(NAME, H, TE, TM) \ @@ -4091,6 +4093,19 @@ static bool sve_probe_page(SVEHostPage *info, bool nofault, int flags; addr += mem_off; + + /* + * User-only currently always issues with TBI. See the comment + * above useronly_clean_ptr. Usually we clean this top byte away + * during translation, but we can't do that for e.g. vector + imm + * addressing modes. + * + * We currently always enable TBI for user-only, and do not provide + * a way to turn it off. So clean the pointer unconditionally here, + * rather than look it up here, or pass it down from above. + */ + addr = useronly_clean_ptr(addr); + flags = probe_access_flags(env, addr, access_type, mmu_idx, nofault, &info->host, retaddr); info->flags = flags; @@ -4393,15 +4408,89 @@ static void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env, #endif } +typedef uint64_t mte_check_fn(CPUARMState *, uint32_t, uint64_t, uintptr_t); + +static inline QEMU_ALWAYS_INLINE +void sve_cont_ldst_mte_check_int(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, int esize, + int msize, uint32_t mtedesc, uintptr_t ra, + mte_check_fn *check) +{ + intptr_t mem_off, reg_off, reg_last; + + /* Process the page only if MemAttr == Tagged. */ + if (arm_tlb_mte_tagged(&info->page[0].attrs)) { + mem_off = info->mem_off_first[0]; + reg_off = info->reg_off_first[0]; + reg_last = info->reg_off_split; + if (reg_last < 0) { + reg_last = info->reg_off_last[0]; + } + + do { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + check(env, mtedesc, addr, ra); + } + reg_off += esize; + mem_off += msize; + } while (reg_off <= reg_last && (reg_off & 63)); + } while (reg_off <= reg_last); + } + + mem_off = info->mem_off_first[1]; + if (mem_off >= 0 && arm_tlb_mte_tagged(&info->page[1].attrs)) { + reg_off = info->reg_off_first[1]; + reg_last = info->reg_off_last[1]; + + do { + uint64_t pg = vg[reg_off >> 6]; + do { + if ((pg >> (reg_off & 63)) & 1) { + check(env, mtedesc, addr, ra); + } + reg_off += esize; + mem_off += msize; + } while (reg_off & 63); + } while (reg_off <= reg_last); + } +} + +typedef void sve_cont_ldst_mte_check_fn(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, + int esize, int msize, uint32_t mtedesc, + uintptr_t ra); + +static void sve_cont_ldst_mte_check1(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, + int esize, int msize, uint32_t mtedesc, + uintptr_t ra) +{ + sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize, + mtedesc, ra, mte_check1); +} + +static void sve_cont_ldst_mte_checkN(SVEContLdSt *info, CPUARMState *env, + uint64_t *vg, target_ulong addr, + int esize, int msize, uint32_t mtedesc, + uintptr_t ra) +{ + sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize, + mtedesc, ra, mte_checkN); +} + + /* * Common helper for all contiguous 1,2,3,4-register predicated stores. */ static inline QEMU_ALWAYS_INLINE void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, uint32_t desc, const uintptr_t retaddr, - const int esz, const int msz, const int N, + const int esz, const int msz, const int N, uint32_t mtedesc, sve_ldst1_host_fn *host_fn, - sve_ldst1_tlb_fn *tlb_fn) + sve_ldst1_tlb_fn *tlb_fn, + sve_cont_ldst_mte_check_fn *mte_check_fn) { const unsigned rd = simd_data(desc); const intptr_t reg_max = simd_oprsz(desc); @@ -4426,7 +4515,14 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, sve_cont_ldst_watchpoints(&info, env, vg, addr, 1 << esz, N << msz, BP_MEM_READ, retaddr); - /* TODO: MTE check. */ + /* + * Handle mte checks for all active elements. + * Since TBI must be set for MTE, !mtedesc => !mte_active. + */ + if (mte_check_fn && mtedesc) { + mte_check_fn(&info, env, vg, addr, 1 << esz, N << msz, + mtedesc, retaddr); + } flags = info.page[0].flags | info.page[1].flags; if (unlikely(flags != 0)) { @@ -4532,26 +4628,67 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr, } } -#define DO_LD1_1(NAME, ESZ) \ -void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, \ - sve_##NAME##_host, sve_##NAME##_tlb); \ +static inline QEMU_ALWAYS_INLINE +void sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esz, const int msz, const int N, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + int bit55 = extract64(addr, 55, 1); + + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* Perform gross MTE suppression early. */ + if (!tbi_check(desc, bit55) || + tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { + mtedesc = 0; + } + + sve_ldN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn, + N == 1 ? sve_cont_ldst_mte_check1 : sve_cont_ldst_mte_checkN); } -#define DO_LD1_2(NAME, ESZ, MSZ) \ -void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ - sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ -} \ -void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ - sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ +#define DO_LD1_1(NAME, ESZ) \ +void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, 0, \ + sve_##NAME##_host, sve_##NAME##_tlb, NULL); \ +} \ +void HELPER(sve_##NAME##_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, \ + sve_##NAME##_host, sve_##NAME##_tlb); \ +} + +#define DO_LD1_2(NAME, ESZ, MSZ) \ +void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \ + sve_##NAME##_le_host, sve_##NAME##_le_tlb, NULL); \ +} \ +void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \ + sve_##NAME##_be_host, sve_##NAME##_be_tlb, NULL); \ +} \ +void HELPER(sve_##NAME##_le_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ + sve_##NAME##_le_host, sve_##NAME##_le_tlb); \ +} \ +void HELPER(sve_##NAME##_be_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \ + sve_##NAME##_be_host, sve_##NAME##_be_tlb); \ } DO_LD1_1(ld1bb, MO_8) @@ -4577,26 +4714,44 @@ DO_LD1_2(ld1dd, MO_64, MO_64) #undef DO_LD1_1 #undef DO_LD1_2 -#define DO_LDN_1(N) \ -void HELPER(sve_ld##N##bb_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldN_r(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, \ - sve_ld1bb_host, sve_ld1bb_tlb); \ +#define DO_LDN_1(N) \ +void HELPER(sve_ld##N##bb_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, 0, \ + sve_ld1bb_host, sve_ld1bb_tlb, NULL); \ +} \ +void HELPER(sve_ld##N##bb_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, \ + sve_ld1bb_host, sve_ld1bb_tlb); \ } -#define DO_LDN_2(N, SUFF, ESZ) \ -void HELPER(sve_ld##N##SUFF##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \ - sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb); \ -} \ -void HELPER(sve_ld##N##SUFF##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \ - sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb); \ +#define DO_LDN_2(N, SUFF, ESZ) \ +void HELPER(sve_ld##N##SUFF##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \ + sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb, NULL); \ +} \ +void HELPER(sve_ld##N##SUFF##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \ + sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb, NULL); \ +} \ +void HELPER(sve_ld##N##SUFF##_le_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \ + sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb); \ +} \ +void HELPER(sve_ld##N##SUFF##_be_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \ + sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb); \ } DO_LDN_1(2) @@ -4654,7 +4809,7 @@ static void record_fault(CPUARMState *env, uintptr_t i, uintptr_t oprsz) */ static inline QEMU_ALWAYS_INLINE void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, - uint32_t desc, const uintptr_t retaddr, + uint32_t desc, const uintptr_t retaddr, uint32_t mtedesc, const int esz, const int msz, const SVEContFault fault, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) @@ -4686,13 +4841,25 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, mem_off = info.mem_off_first[0]; flags = info.page[0].flags; + /* + * Disable MTE checking if the Tagged bit is not set. Since TBI must + * be set within MTEDESC for MTE, !mtedesc => !mte_active. + */ + if (arm_tlb_mte_tagged(&info.page[0].attrs)) { + mtedesc = 0; + } + if (fault == FAULT_FIRST) { + /* Trapping mte check for the first-fault element. */ + if (mtedesc) { + mte_check1(env, mtedesc, addr + mem_off, retaddr); + } + /* * Special handling of the first active element, * if it crosses a page boundary or is MMIO. */ bool is_split = mem_off == info.mem_off_split; - /* TODO: MTE check. */ if (unlikely(flags != 0) || unlikely(is_split)) { /* * Use the slow path for cross-page handling. @@ -4728,7 +4895,9 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, /* Watchpoint hit, see below. */ goto do_fault; } - /* TODO: MTE check. */ + if (mtedesc && !mte_probe1(env, mtedesc, addr + mem_off)) { + goto do_fault; + } /* * Use the slow path for cross-page handling. * This is RAM, without a watchpoint, and will not trap. @@ -4776,7 +4945,9 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, & BP_MEM_READ)) { goto do_fault; } - /* TODO: MTE check. */ + if (mtedesc && !mte_probe1(env, mtedesc, addr + mem_off)) { + goto do_fault; + } host_fn(vd, reg_off, host + mem_off); } reg_off += 1 << esz; @@ -4814,44 +4985,103 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr, record_fault(env, reg_off, reg_max); } -#define DO_LDFF1_LDNF1_1(PART, ESZ) \ +static inline QEMU_ALWAYS_INLINE +void sve_ldnfff1_r_mte(CPUARMState *env, void *vg, target_ulong addr, + uint32_t desc, const uintptr_t retaddr, + const int esz, const int msz, const SVEContFault fault, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + int bit55 = extract64(addr, 55, 1); + + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* Perform gross MTE suppression early. */ + if (!tbi_check(desc, bit55) || + tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { + mtedesc = 0; + } + + sve_ldnfff1_r(env, vg, addr, desc, retaddr, mtedesc, + esz, msz, fault, host_fn, tlb_fn); +} + +#define DO_LDFF1_LDNF1_1(PART, ESZ) \ void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_FIRST, \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MO_8, FAULT_FIRST, \ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ } \ void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_NO, \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MO_8, FAULT_NO, \ + sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ +} \ +void HELPER(sve_ldff1##PART##_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_FIRST, \ + sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ +} \ +void HELPER(sve_ldnf1##PART##_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_NO, \ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \ } -#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \ +#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \ void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_FIRST, \ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ } \ void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_NO, \ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ } \ void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_FIRST, \ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ } \ void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg, \ target_ulong addr, uint32_t desc) \ { \ - sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \ + sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_NO, \ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ +} \ +void HELPER(sve_ldff1##PART##_le_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \ + sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ +} \ +void HELPER(sve_ldnf1##PART##_le_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \ + sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \ +} \ +void HELPER(sve_ldff1##PART##_be_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \ + sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ +} \ +void HELPER(sve_ldnf1##PART##_be_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \ + sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \ } DO_LDFF1_LDNF1_1(bb, MO_8) @@ -4882,11 +5112,12 @@ DO_LDFF1_LDNF1_2(dd, MO_64, MO_64) */ static inline QEMU_ALWAYS_INLINE -void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc, - const uintptr_t retaddr, const int esz, - const int msz, const int N, +void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, + uint32_t desc, const uintptr_t retaddr, + const int esz, const int msz, const int N, uint32_t mtedesc, sve_ldst1_host_fn *host_fn, - sve_ldst1_tlb_fn *tlb_fn) + sve_ldst1_tlb_fn *tlb_fn, + sve_cont_ldst_mte_check_fn *mte_check_fn) { const unsigned rd = simd_data(desc); const intptr_t reg_max = simd_oprsz(desc); @@ -4908,7 +5139,14 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc, sve_cont_ldst_watchpoints(&info, env, vg, addr, 1 << esz, N << msz, BP_MEM_WRITE, retaddr); - /* TODO: MTE check. */ + /* + * Handle mte checks for all active elements. + * Since TBI must be set for MTE, !mtedesc => !mte_active. + */ + if (mte_check_fn && mtedesc) { + mte_check_fn(&info, env, vg, addr, 1 << esz, N << msz, + mtedesc, retaddr); + } flags = info.page[0].flags | info.page[1].flags; if (unlikely(flags != 0)) { @@ -5002,26 +5240,67 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc, } } -#define DO_STN_1(N, NAME, ESZ) \ -void HELPER(sve_st##N##NAME##_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, \ - sve_st1##NAME##_host, sve_st1##NAME##_tlb); \ +static inline QEMU_ALWAYS_INLINE +void sve_stN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr, + uint32_t desc, const uintptr_t ra, + const int esz, const int msz, const int N, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + int bit55 = extract64(addr, 55, 1); + + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* Perform gross MTE suppression early. */ + if (!tbi_check(desc, bit55) || + tcma_check(desc, bit55, allocation_tag_from_addr(addr))) { + mtedesc = 0; + } + + sve_stN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn, + N == 1 ? sve_cont_ldst_mte_check1 : sve_cont_ldst_mte_checkN); } -#define DO_STN_2(N, NAME, ESZ, MSZ) \ -void HELPER(sve_st##N##NAME##_le_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \ - sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb); \ -} \ -void HELPER(sve_st##N##NAME##_be_r)(CPUARMState *env, void *vg, \ - target_ulong addr, uint32_t desc) \ -{ \ - sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \ - sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb); \ +#define DO_STN_1(N, NAME, ESZ) \ +void HELPER(sve_st##N##NAME##_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, 0, \ + sve_st1##NAME##_host, sve_st1##NAME##_tlb, NULL); \ +} \ +void HELPER(sve_st##N##NAME##_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, \ + sve_st1##NAME##_host, sve_st1##NAME##_tlb); \ +} + +#define DO_STN_2(N, NAME, ESZ, MSZ) \ +void HELPER(sve_st##N##NAME##_le_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \ + sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb, NULL); \ +} \ +void HELPER(sve_st##N##NAME##_be_r)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \ + sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb, NULL); \ +} \ +void HELPER(sve_st##N##NAME##_le_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \ + sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb); \ +} \ +void HELPER(sve_st##N##NAME##_be_r_mte)(CPUARMState *env, void *vg, \ + target_ulong addr, uint32_t desc) \ +{ \ + sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \ + sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb); \ } DO_STN_1(1, bb, MO_8) @@ -5090,7 +5369,8 @@ static target_ulong off_zd_d(void *reg, intptr_t reg_ofs) static inline QEMU_ALWAYS_INLINE void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, target_ulong base, uint32_t desc, uintptr_t retaddr, - int esize, int msize, zreg_off_fn *off_fn, + uint32_t mtedesc, int esize, int msize, + zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { @@ -5118,7 +5398,9 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, cpu_check_watchpoint(env_cpu(env), addr, msize, info.attrs, BP_MEM_READ, retaddr); } - /* TODO: MTE check */ + if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { + mte_check1(env, mtedesc, addr, retaddr); + } host_fn(&scratch, reg_off, info.host); } else { /* Element crosses the page boundary. */ @@ -5129,7 +5411,9 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, msize, info.attrs, BP_MEM_READ, retaddr); } - /* TODO: MTE check */ + if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { + mte_check1(env, mtedesc, addr, retaddr); + } tlb_fn(env, &scratch, reg_off, addr, retaddr); } } @@ -5142,20 +5426,53 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, memcpy(vd, &scratch, reg_max); } +static inline QEMU_ALWAYS_INLINE +void sve_ld1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t retaddr, + int esize, int msize, zreg_off_fn *off_fn, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* + * ??? TODO: For the 32-bit offset extractions, base + ofs cannot + * offset base entirely over the address space hole to change the + * pointer tag, or change the bit55 selector. So we could here + * examine TBI + TCMA like we do for sve_ldN_r_mte(). + */ + sve_ld1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc, + esize, msize, off_fn, host_fn, tlb_fn); +} + #define DO_LD1_ZPZ_S(MEM, OFS, MSZ) \ void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ void *vm, target_ulong base, uint32_t desc) \ { \ - sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \ + sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 4, 1 << MSZ, \ off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +} \ +void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \ + off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ } #define DO_LD1_ZPZ_D(MEM, OFS, MSZ) \ void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ void *vm, target_ulong base, uint32_t desc) \ { \ - sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \ + sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 8, 1 << MSZ, \ off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +} \ +void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \ + off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ } DO_LD1_ZPZ_S(bsu, zsu, MO_8) @@ -5234,7 +5551,8 @@ DO_LD1_ZPZ_D(dd_be, zd, MO_64) static inline QEMU_ALWAYS_INLINE void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, target_ulong base, uint32_t desc, uintptr_t retaddr, - const int esz, const int msz, zreg_off_fn *off_fn, + uint32_t mtedesc, const int esz, const int msz, + zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { @@ -5259,6 +5577,9 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, * Probe the first element, allowing faults. */ addr = base + (off_fn(vm, reg_off) << scale); + if (mtedesc) { + mte_check1(env, mtedesc, addr, retaddr); + } tlb_fn(env, vd, reg_off, addr, retaddr); /* After any fault, zero the other elements. */ @@ -5291,7 +5612,11 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, (env_cpu(env), addr, msize) & BP_MEM_READ)) { goto fault; } - /* TODO: MTE check. */ + if (mtedesc && + arm_tlb_mte_tagged(&info.attrs) && + !mte_probe1(env, mtedesc, addr)) { + goto fault; + } host_fn(vd, reg_off, info.host); } @@ -5304,20 +5629,58 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, record_fault(env, reg_off, reg_max); } -#define DO_LDFF1_ZPZ_S(MEM, OFS, MSZ) \ -void HELPER(sve_ldff##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ -{ \ - sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), MO_32, MSZ, \ - off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +static inline QEMU_ALWAYS_INLINE +void sve_ldff1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t retaddr, + const int esz, const int msz, + zreg_off_fn *off_fn, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* + * ??? TODO: For the 32-bit offset extractions, base + ofs cannot + * offset base entirely over the address space hole to change the + * pointer tag, or change the bit55 selector. So we could here + * examine TBI + TCMA like we do for sve_ldN_r_mte(). + */ + sve_ldff1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc, + esz, msz, off_fn, host_fn, tlb_fn); } -#define DO_LDFF1_ZPZ_D(MEM, OFS, MSZ) \ -void HELPER(sve_ldff##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ - void *vm, target_ulong base, uint32_t desc) \ -{ \ - sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), MO_64, MSZ, \ - off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +#define DO_LDFF1_ZPZ_S(MEM, OFS, MSZ) \ +void HELPER(sve_ldff##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), 0, MO_32, MSZ, \ + off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +} \ +void HELPER(sve_ldff##MEM##_##OFS##_mte) \ + (CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_ldff1_z_mte(env, vd, vg, vm, base, desc, GETPC(), MO_32, MSZ, \ + off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +} + +#define DO_LDFF1_ZPZ_D(MEM, OFS, MSZ) \ +void HELPER(sve_ldff##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), 0, MO_64, MSZ, \ + off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ +} \ +void HELPER(sve_ldff##MEM##_##OFS##_mte) \ + (CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_ldff1_z_mte(env, vd, vg, vm, base, desc, GETPC(), MO_64, MSZ, \ + off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \ } DO_LDFF1_ZPZ_S(bsu, zsu, MO_8) @@ -5389,7 +5752,8 @@ DO_LDFF1_ZPZ_D(dd_be, zd, MO_64) static inline QEMU_ALWAYS_INLINE void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, target_ulong base, uint32_t desc, uintptr_t retaddr, - int esize, int msize, zreg_off_fn *off_fn, + uint32_t mtedesc, int esize, int msize, + zreg_off_fn *off_fn, sve_ldst1_host_fn *host_fn, sve_ldst1_tlb_fn *tlb_fn) { @@ -5433,7 +5797,10 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, cpu_check_watchpoint(env_cpu(env), addr, msize, info.attrs, BP_MEM_WRITE, retaddr); } - /* TODO: MTE check. */ + + if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) { + mte_check1(env, mtedesc, addr, retaddr); + } } i += 1; reg_off += esize; @@ -5463,20 +5830,53 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm, } while (reg_off < reg_max); } -#define DO_ST1_ZPZ_S(MEM, OFS, MSZ) \ -void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ +static inline QEMU_ALWAYS_INLINE +void sve_st1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t retaddr, + int esize, int msize, zreg_off_fn *off_fn, + sve_ldst1_host_fn *host_fn, + sve_ldst1_tlb_fn *tlb_fn) +{ + uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + /* Remove mtedesc from the normal sve descriptor. */ + desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT); + + /* + * ??? TODO: For the 32-bit offset extractions, base + ofs cannot + * offset base entirely over the address space hole to change the + * pointer tag, or change the bit55 selector. So we could here + * examine TBI + TCMA like we do for sve_ldN_r_mte(). + */ + sve_st1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc, + esize, msize, off_fn, host_fn, tlb_fn); +} + +#define DO_ST1_ZPZ_S(MEM, OFS, MSZ) \ +void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ void *vm, target_ulong base, uint32_t desc) \ -{ \ - sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \ - off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ +{ \ + sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 4, 1 << MSZ, \ + off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ +} \ +void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \ + off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ } -#define DO_ST1_ZPZ_D(MEM, OFS, MSZ) \ -void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ +#define DO_ST1_ZPZ_D(MEM, OFS, MSZ) \ +void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \ void *vm, target_ulong base, uint32_t desc) \ -{ \ - sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \ - off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ +{ \ + sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 8, 1 << MSZ, \ + off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ +} \ +void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \ + void *vm, target_ulong base, uint32_t desc) \ +{ \ + sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \ + off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \ } DO_ST1_ZPZ_S(bs, zsu, MO_8) diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c index 7388494..b35dc8a 100644 --- a/target/arm/tlb_helper.c +++ b/target/arm/tlb_helper.c @@ -10,8 +10,6 @@ #include "internals.h" #include "exec/exec-all.h" -#if !defined(CONFIG_USER_ONLY) - static inline uint32_t merge_syn_data_abort(uint32_t template_syn, unsigned int target_el, bool same_el, bool ea, @@ -122,6 +120,8 @@ void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, arm_deliver_fault(cpu, vaddr, access_type, mmu_idx, &fi); } +#if !defined(CONFIG_USER_ONLY) + /* * arm_cpu_do_transaction_failed: handle a memory system error response * (eg "no device/memory present at address") by raising an external abort @@ -166,6 +166,7 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, int prot, ret; MemTxAttrs attrs = {}; ARMMMUFaultInfo fi = {}; + ARMCacheAttrs cacheattrs = {}; /* * Walk the page table and (if the mapping exists) add the page @@ -175,7 +176,8 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, */ ret = get_phys_addr(&cpu->env, address, access_type, core_to_arm_mmu_idx(&cpu->env, mmu_idx), - &phys_addr, &attrs, &prot, &page_size, &fi, NULL); + &phys_addr, &attrs, &prot, &page_size, + &fi, &cacheattrs); if (likely(!ret)) { /* * Map a single [sub]page. Regions smaller than our declared @@ -186,6 +188,11 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, phys_addr &= TARGET_PAGE_MASK; address &= TARGET_PAGE_MASK; } + /* Notice and record tagged memory. */ + if (cpu_isar_feature(aa64_mte, cpu) && cacheattrs.attrs == 0xf0) { + arm_tlb_mte_tagged(&attrs) = true; + } + tlb_set_page_with_attrs(cs, address, phys_addr, attrs, prot, mmu_idx, page_size); return true; diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 4cef862..73d753f 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -204,20 +204,20 @@ static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src) } /* - * Return a "clean" address for ADDR according to TBID. - * This is always a fresh temporary, as we need to be able to - * increment this independently of a dirty write-back address. + * Handle MTE and/or TBI. + * + * For TBI, ideally, we would do nothing. Proper behaviour on fault is + * for the tag to be present in the FAR_ELx register. But for user-only + * mode we do not have a TLB with which to implement this, so we must + * remove the top byte now. + * + * Always return a fresh temporary that we can increment independently + * of the write-back address. */ -static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) + +TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) { TCGv_i64 clean = new_tmp_a64(s); - /* - * In order to get the correct value in the FAR_ELx register, - * we must present the memory subsystem with the "dirty" address - * including the TBI. In system mode we can make this work via - * the TLB, dropping the TBI during translation. But for user-only - * mode we don't have that option, and must remove the top byte now. - */ #ifdef CONFIG_USER_ONLY gen_top_byte_ignore(s, clean, addr, s->tbid); #else @@ -226,6 +226,92 @@ static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr) return clean; } +/* Insert a zero tag into src, with the result at dst. */ +static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src) +{ + tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4)); +} + +static void gen_probe_access(DisasContext *s, TCGv_i64 ptr, + MMUAccessType acc, int log2_size) +{ + TCGv_i32 t_acc = tcg_const_i32(acc); + TCGv_i32 t_idx = tcg_const_i32(get_mem_index(s)); + TCGv_i32 t_size = tcg_const_i32(1 << log2_size); + + gen_helper_probe_access(cpu_env, ptr, t_acc, t_idx, t_size); + tcg_temp_free_i32(t_acc); + tcg_temp_free_i32(t_idx); + tcg_temp_free_i32(t_size); +} + +/* + * For MTE, check a single logical or atomic access. This probes a single + * address, the exact one specified. The size and alignment of the access + * is not relevant to MTE, per se, but watchpoints do require the size, + * and we want to recognize those before making any other changes to state. + */ +static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, + bool is_write, bool tag_checked, + int log2_size, bool is_unpriv, + int core_idx) +{ + if (tag_checked && s->mte_active[is_unpriv]) { + TCGv_i32 tcg_desc; + TCGv_i64 ret; + int desc = 0; + + desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx); + desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); + desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); + desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); + desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_size); + tcg_desc = tcg_const_i32(desc); + + ret = new_tmp_a64(s); + gen_helper_mte_check1(ret, cpu_env, tcg_desc, addr); + tcg_temp_free_i32(tcg_desc); + + return ret; + } + return clean_data_tbi(s, addr); +} + +TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int log2_size) +{ + return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size, + false, get_mem_index(s)); +} + +/* + * For MTE, check multiple logical sequential accesses. + */ +TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int log2_esize, int total_size) +{ + if (tag_checked && s->mte_active[0] && total_size != (1 << log2_esize)) { + TCGv_i32 tcg_desc; + TCGv_i64 ret; + int desc = 0; + + desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); + desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); + desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); + desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); + desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_esize); + desc = FIELD_DP32(desc, MTEDESC, TSIZE, total_size); + tcg_desc = tcg_const_i32(desc); + + ret = new_tmp_a64(s); + gen_helper_mte_checkN(ret, cpu_env, tcg_desc, addr); + tcg_temp_free_i32(tcg_desc); + + return ret; + } + return gen_mte_check1(s, addr, is_write, tag_checked, log2_esize); +} + typedef struct DisasCompare64 { TCGCond cond; TCGv_i64 value; @@ -1616,7 +1702,28 @@ static void handle_msr_i(DisasContext *s, uint32_t insn, gen_helper_msr_i_daifclear(cpu_env, t1); tcg_temp_free_i32(t1); /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs. */ - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; + break; + + case 0x1c: /* TCO */ + if (dc_isar_feature(aa64_mte, s)) { + /* Full MTE is enabled -- set the TCO bit as directed. */ + if (crm & 1) { + set_pstate_bits(PSTATE_TCO); + } else { + clear_pstate_bits(PSTATE_TCO); + } + t1 = tcg_const_i32(s->current_el); + gen_helper_rebuild_hflags_a64(cpu_env, t1); + tcg_temp_free_i32(t1); + /* Many factors, including TCO, go into MTE_ACTIVE. */ + s->base.is_jmp = DISAS_UPDATE_NOCHAIN; + } else if (dc_isar_feature(aa64_mte_insn_reg, s)) { + /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */ + s->base.is_jmp = DISAS_NEXT; + } else { + goto do_unallocated; + } break; default: @@ -1750,9 +1857,62 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, return; case ARM_CP_DC_ZVA: /* Writes clear the aligned block of memory which rt points into. */ - tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); + if (s->mte_active[0]) { + TCGv_i32 t_desc; + int desc = 0; + + desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); + desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); + desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); + t_desc = tcg_const_i32(desc); + + tcg_rt = new_tmp_a64(s); + gen_helper_mte_check_zva(tcg_rt, cpu_env, t_desc, cpu_reg(s, rt)); + tcg_temp_free_i32(t_desc); + } else { + tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); + } gen_helper_dc_zva(cpu_env, tcg_rt); return; + case ARM_CP_DC_GVA: + { + TCGv_i64 clean_addr, tag; + + /* + * DC_GVA, like DC_ZVA, requires that we supply the original + * pointer for an invalid page. Probe that address first. + */ + tcg_rt = cpu_reg(s, rt); + clean_addr = clean_data_tbi(s, tcg_rt); + gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); + + if (s->ata) { + /* Extract the tag from the register to match STZGM. */ + tag = tcg_temp_new_i64(); + tcg_gen_shri_i64(tag, tcg_rt, 56); + gen_helper_stzgm_tags(cpu_env, clean_addr, tag); + tcg_temp_free_i64(tag); + } + } + return; + case ARM_CP_DC_GZVA: + { + TCGv_i64 clean_addr, tag; + + /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */ + tcg_rt = cpu_reg(s, rt); + clean_addr = clean_data_tbi(s, tcg_rt); + gen_helper_dc_zva(cpu_env, clean_addr); + + if (s->ata) { + /* Extract the tag from the register to match STZGM. */ + tag = tcg_temp_new_i64(); + tcg_gen_shri_i64(tag, tcg_rt, 56); + gen_helper_stzgm_tags(cpu_env, clean_addr, tag); + tcg_temp_free_i64(tag); + } + } + return; default: break; } @@ -1795,7 +1955,7 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) { /* I/O operations must end the TB here (whether read or write) */ - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) { /* @@ -1810,7 +1970,7 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, * but allow this to be suppressed by the register definition * (usually only necessary to work around guest bugs). */ - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } } @@ -2327,7 +2487,7 @@ static void gen_compare_and_swap(DisasContext *s, int rs, int rt, if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size); tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx, size | MO_ALIGN | s->be_data); } @@ -2345,7 +2505,9 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + + /* This is a single atomic access, despite the "pair". */ + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1); if (size == 2) { TCGv_i64 cmp = tcg_temp_new_i64(); @@ -2470,7 +2632,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (is_lasr) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), + true, rn != 31, size); gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false); return; @@ -2479,7 +2642,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), + false, rn != 31, size); s->is_ldex = true; gen_load_exclusive(s, rt, rt2, clean_addr, size, false); if (is_lasr) { @@ -2499,7 +2663,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) gen_check_sp_alignment(s); } tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), + true, rn != 31, size); do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); return; @@ -2515,7 +2680,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), + false, rn != 31, size); do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr); tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); @@ -2529,7 +2695,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (is_lasr) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), + true, rn != 31, size); gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true); return; } @@ -2547,7 +2714,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), + false, rn != 31, size); s->is_ldex = true; gen_load_exclusive(s, rt, rt2, clean_addr, size, true); if (is_lasr) { @@ -2650,7 +2818,7 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn) * +-----+-------+---+---+-------+---+-------+-------+------+------+ * * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit - * LDPSW 01 + * LDPSW/STGP 01 * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit * V: 0 -> GPR, 1 -> Vector * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index, @@ -2675,6 +2843,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) bool is_signed = false; bool postindex = false; bool wback = false; + bool set_tag = false; TCGv_i64 clean_addr, dirty_addr; @@ -2687,6 +2856,14 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) if (is_vector) { size = 2 + opc; + } else if (opc == 1 && !is_load) { + /* STGP */ + if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) { + unallocated_encoding(s); + return; + } + size = 3; + set_tag = true; } else { size = 2 + extract32(opc, 1, 1); is_signed = extract32(opc, 0, 1); @@ -2727,7 +2904,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) return; } - offset <<= size; + offset <<= (set_tag ? LOG2_TAG_GRANULE : size); if (rn == 31) { gen_check_sp_alignment(s); @@ -2737,7 +2914,24 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn) if (!postindex) { tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); } - clean_addr = clean_data_tbi(s, dirty_addr); + + if (set_tag) { + if (!s->ata) { + /* + * TODO: We could rely on the stores below, at least for + * system mode, if we arrange to add MO_ALIGN_16. + */ + gen_helper_stg_stub(cpu_env, dirty_addr); + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { + gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr); + } else { + gen_helper_stg(cpu_env, dirty_addr, dirty_addr); + } + } + + clean_addr = gen_mte_checkN(s, dirty_addr, !is_load, + (wback || rn != 31) && !set_tag, + size, 2 << size); if (is_vector) { if (is_load) { @@ -2818,6 +3012,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, bool iss_valid = !is_vector; bool post_index; bool writeback; + int memidx; TCGv_i64 clean_addr, dirty_addr; @@ -2875,7 +3070,11 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, if (!post_index) { tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9); } - clean_addr = clean_data_tbi(s, dirty_addr); + + memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); + clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store, + writeback || rn != 31, + size, is_unpriv, memidx); if (is_vector) { if (is_store) { @@ -2885,7 +3084,6 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn, } } else { TCGv_i64 tcg_rt = cpu_reg(s, rt); - int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc); if (is_store) { @@ -2982,7 +3180,7 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn, ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0); tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm); - clean_addr = clean_data_tbi(s, dirty_addr); + clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size); if (is_vector) { if (is_store) { @@ -3067,7 +3265,7 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, dirty_addr = read_cpu_reg_sp(s, rn, 1); offset = imm12 << size; tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); - clean_addr = clean_data_tbi(s, dirty_addr); + clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size); if (is_vector) { if (is_store) { @@ -3160,7 +3358,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, if (rn == 31) { gen_check_sp_alignment(s); } - clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size); if (o3_opc == 014) { /* @@ -3237,7 +3435,8 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn, tcg_gen_addi_i64(dirty_addr, dirty_addr, offset); /* Note that "clean" and "dirty" here refer to TBI not PAC. */ - clean_addr = clean_data_tbi(s, dirty_addr); + clean_addr = gen_mte_check1(s, dirty_addr, false, + is_wback || rn != 31, size); tcg_rt = cpu_reg(s, rt); do_gpr_ld(s, tcg_rt, clean_addr, size, /* is_signed */ false, @@ -3399,7 +3598,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; MemOp endian = s->be_data; - int ebytes; /* bytes per element */ + int total; /* total bytes */ int elements; /* elements per vector */ int rpt; /* num iterations */ int selem; /* structure elements */ @@ -3469,19 +3668,26 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) endian = MO_LE; } - /* Consecutive little-endian elements from a single register + total = rpt * selem * (is_q ? 16 : 8); + tcg_rn = cpu_reg_sp(s, rn); + + /* + * Issue the MTE check vs the logical repeat count, before we + * promote consecutive little-endian elements below. + */ + clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31, + size, total); + + /* + * Consecutive little-endian elements from a single register * can be promoted to a larger little-endian operation. */ if (selem == 1 && endian == MO_LE) { size = 3; } - ebytes = 1 << size; - elements = (is_q ? 16 : 8) / ebytes; - - tcg_rn = cpu_reg_sp(s, rn); - clean_addr = clean_data_tbi(s, tcg_rn); - tcg_ebytes = tcg_const_i64(ebytes); + elements = (is_q ? 16 : 8) >> size; + tcg_ebytes = tcg_const_i64(1 << size); for (r = 0; r < rpt; r++) { int e; for (e = 0; e < elements; e++) { @@ -3515,7 +3721,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn) if (is_postidx) { if (rm == 31) { - tcg_gen_addi_i64(tcg_rn, tcg_rn, rpt * elements * selem * ebytes); + tcg_gen_addi_i64(tcg_rn, tcg_rn, total); } else { tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); } @@ -3561,7 +3767,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) int selem = (extract32(opc, 0, 1) << 1 | R) + 1; bool replicate = false; int index = is_q << 3 | S << 2 | size; - int ebytes, xs; + int xs, total; TCGv_i64 clean_addr, tcg_rn, tcg_ebytes; if (extract32(insn, 31, 1)) { @@ -3615,16 +3821,17 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) return; } - ebytes = 1 << scale; - if (rn == 31) { gen_check_sp_alignment(s); } + total = selem << scale; tcg_rn = cpu_reg_sp(s, rn); - clean_addr = clean_data_tbi(s, tcg_rn); - tcg_ebytes = tcg_const_i64(ebytes); + clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31, + scale, total); + + tcg_ebytes = tcg_const_i64(1 << scale); for (xs = 0; xs < selem; xs++) { if (replicate) { /* Load and replicate to all elements */ @@ -3651,13 +3858,216 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn) if (is_postidx) { if (rm == 31) { - tcg_gen_addi_i64(tcg_rn, tcg_rn, selem * ebytes); + tcg_gen_addi_i64(tcg_rn, tcg_rn, total); } else { tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm)); } } } +/* + * Load/Store memory tags + * + * 31 30 29 24 22 21 12 10 5 0 + * +-----+-------------+-----+---+------+-----+------+------+ + * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 | Rn | Rt | + * +-----+-------------+-----+---+------+-----+------+------+ + */ +static void disas_ldst_tag(DisasContext *s, uint32_t insn) +{ + int rt = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE; + int op2 = extract32(insn, 10, 2); + int op1 = extract32(insn, 22, 2); + bool is_load = false, is_pair = false, is_zero = false, is_mult = false; + int index = 0; + TCGv_i64 addr, clean_addr, tcg_rt; + + /* We checked insn bits [29:24,21] in the caller. */ + if (extract32(insn, 30, 2) != 3) { + goto do_unallocated; + } + + /* + * @index is a tri-state variable which has 3 states: + * < 0 : post-index, writeback + * = 0 : signed offset + * > 0 : pre-index, writeback + */ + switch (op1) { + case 0: + if (op2 != 0) { + /* STG */ + index = op2 - 2; + } else { + /* STZGM */ + if (s->current_el == 0 || offset != 0) { + goto do_unallocated; + } + is_mult = is_zero = true; + } + break; + case 1: + if (op2 != 0) { + /* STZG */ + is_zero = true; + index = op2 - 2; + } else { + /* LDG */ + is_load = true; + } + break; + case 2: + if (op2 != 0) { + /* ST2G */ + is_pair = true; + index = op2 - 2; + } else { + /* STGM */ + if (s->current_el == 0 || offset != 0) { + goto do_unallocated; + } + is_mult = true; + } + break; + case 3: + if (op2 != 0) { + /* STZ2G */ + is_pair = is_zero = true; + index = op2 - 2; + } else { + /* LDGM */ + if (s->current_el == 0 || offset != 0) { + goto do_unallocated; + } + is_mult = is_load = true; + } + break; + + default: + do_unallocated: + unallocated_encoding(s); + return; + } + + if (is_mult + ? !dc_isar_feature(aa64_mte, s) + : !dc_isar_feature(aa64_mte_insn_reg, s)) { + goto do_unallocated; + } + + if (rn == 31) { + gen_check_sp_alignment(s); + } + + addr = read_cpu_reg_sp(s, rn, true); + if (index >= 0) { + /* pre-index or signed offset */ + tcg_gen_addi_i64(addr, addr, offset); + } + + if (is_mult) { + tcg_rt = cpu_reg(s, rt); + + if (is_zero) { + int size = 4 << s->dcz_blocksize; + + if (s->ata) { + gen_helper_stzgm_tags(cpu_env, addr, tcg_rt); + } + /* + * The non-tags portion of STZGM is mostly like DC_ZVA, + * except the alignment happens before the access. + */ + clean_addr = clean_data_tbi(s, addr); + tcg_gen_andi_i64(clean_addr, clean_addr, -size); + gen_helper_dc_zva(cpu_env, clean_addr); + } else if (s->ata) { + if (is_load) { + gen_helper_ldgm(tcg_rt, cpu_env, addr); + } else { + gen_helper_stgm(cpu_env, addr, tcg_rt); + } + } else { + MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; + int size = 4 << GMID_EL1_BS; + + clean_addr = clean_data_tbi(s, addr); + tcg_gen_andi_i64(clean_addr, clean_addr, -size); + gen_probe_access(s, clean_addr, acc, size); + + if (is_load) { + /* The result tags are zeros. */ + tcg_gen_movi_i64(tcg_rt, 0); + } + } + return; + } + + if (is_load) { + tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); + tcg_rt = cpu_reg(s, rt); + if (s->ata) { + gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt); + } else { + clean_addr = clean_data_tbi(s, addr); + gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8); + gen_address_with_allocation_tag0(tcg_rt, addr); + } + } else { + tcg_rt = cpu_reg_sp(s, rt); + if (!s->ata) { + /* + * For STG and ST2G, we need to check alignment and probe memory. + * TODO: For STZG and STZ2G, we could rely on the stores below, + * at least for system mode; user-only won't enforce alignment. + */ + if (is_pair) { + gen_helper_st2g_stub(cpu_env, addr); + } else { + gen_helper_stg_stub(cpu_env, addr); + } + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { + if (is_pair) { + gen_helper_st2g_parallel(cpu_env, addr, tcg_rt); + } else { + gen_helper_stg_parallel(cpu_env, addr, tcg_rt); + } + } else { + if (is_pair) { + gen_helper_st2g(cpu_env, addr, tcg_rt); + } else { + gen_helper_stg(cpu_env, addr, tcg_rt); + } + } + } + + if (is_zero) { + TCGv_i64 clean_addr = clean_data_tbi(s, addr); + TCGv_i64 tcg_zero = tcg_const_i64(0); + int mem_index = get_mem_index(s); + int i, n = (1 + is_pair) << LOG2_TAG_GRANULE; + + tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, + MO_Q | MO_ALIGN_16); + for (i = 8; i < n; i += 8) { + tcg_gen_addi_i64(clean_addr, clean_addr, 8); + tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_Q); + } + tcg_temp_free_i64(tcg_zero); + } + + if (index != 0) { + /* pre-index or post-index */ + if (index < 0) { + /* post-index */ + tcg_gen_addi_i64(addr, addr, offset); + } + tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr); + } +} + /* Loads and stores */ static void disas_ldst(DisasContext *s, uint32_t insn) { @@ -3682,13 +4092,14 @@ static void disas_ldst(DisasContext *s, uint32_t insn) case 0x0d: /* AdvSIMD load/store single structure */ disas_ldst_single_struct(s, insn); break; - case 0x19: /* LDAPR/STLR (unscaled immediate) */ - if (extract32(insn, 10, 2) != 0 || - extract32(insn, 21, 1) != 0) { + case 0x19: + if (extract32(insn, 21, 1) != 0) { + disas_ldst_tag(s, insn); + } else if (extract32(insn, 10, 2) == 0) { + disas_ldst_ldapr_stlr(s, insn); + } else { unallocated_encoding(s); - break; } - disas_ldst_ldapr_stlr(s, insn); break; default: unallocated_encoding(s); @@ -3727,22 +4138,22 @@ static void disas_pc_rel_adr(DisasContext *s, uint32_t insn) /* * Add/subtract (immediate) * - * 31 30 29 28 24 23 22 21 10 9 5 4 0 - * +--+--+--+-----------+-----+-------------+-----+-----+ - * |sf|op| S| 1 0 0 0 1 |shift| imm12 | Rn | Rd | - * +--+--+--+-----------+-----+-------------+-----+-----+ + * 31 30 29 28 23 22 21 10 9 5 4 0 + * +--+--+--+-------------+--+-------------+-----+-----+ + * |sf|op| S| 1 0 0 0 1 0 |sh| imm12 | Rn | Rd | + * +--+--+--+-------------+--+-------------+-----+-----+ * * sf: 0 -> 32bit, 1 -> 64bit * op: 0 -> add , 1 -> sub * S: 1 -> set flags - * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12 + * sh: 1 -> LSL imm by 12 */ static void disas_add_sub_imm(DisasContext *s, uint32_t insn) { int rd = extract32(insn, 0, 5); int rn = extract32(insn, 5, 5); uint64_t imm = extract32(insn, 10, 12); - int shift = extract32(insn, 22, 2); + bool shift = extract32(insn, 22, 1); bool setflags = extract32(insn, 29, 1); bool sub_op = extract32(insn, 30, 1); bool is_64bit = extract32(insn, 31, 1); @@ -3751,15 +4162,8 @@ static void disas_add_sub_imm(DisasContext *s, uint32_t insn) TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd); TCGv_i64 tcg_result; - switch (shift) { - case 0x0: - break; - case 0x1: + if (shift) { imm <<= 12; - break; - default: - unallocated_encoding(s); - return; } tcg_result = tcg_temp_new_i64(); @@ -3788,6 +4192,54 @@ static void disas_add_sub_imm(DisasContext *s, uint32_t insn) tcg_temp_free_i64(tcg_result); } +/* + * Add/subtract (immediate, with tags) + * + * 31 30 29 28 23 22 21 16 14 10 9 5 4 0 + * +--+--+--+-------------+--+---------+--+-------+-----+-----+ + * |sf|op| S| 1 0 0 0 1 1 |o2| uimm6 |o3| uimm4 | Rn | Rd | + * +--+--+--+-------------+--+---------+--+-------+-----+-----+ + * + * op: 0 -> add, 1 -> sub + */ +static void disas_add_sub_imm_with_tags(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int uimm4 = extract32(insn, 10, 4); + int uimm6 = extract32(insn, 16, 6); + bool sub_op = extract32(insn, 30, 1); + TCGv_i64 tcg_rn, tcg_rd; + int imm; + + /* Test all of sf=1, S=0, o2=0, o3=0. */ + if ((insn & 0xa040c000u) != 0x80000000u || + !dc_isar_feature(aa64_mte_insn_reg, s)) { + unallocated_encoding(s); + return; + } + + imm = uimm6 << LOG2_TAG_GRANULE; + if (sub_op) { + imm = -imm; + } + + tcg_rn = cpu_reg_sp(s, rn); + tcg_rd = cpu_reg_sp(s, rd); + + if (s->ata) { + TCGv_i32 offset = tcg_const_i32(imm); + TCGv_i32 tag_offset = tcg_const_i32(uimm4); + + gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn, offset, tag_offset); + tcg_temp_free_i32(tag_offset); + tcg_temp_free_i32(offset); + } else { + tcg_gen_addi_i64(tcg_rd, tcg_rn, imm); + gen_address_with_allocation_tag0(tcg_rd, tcg_rd); + } +} + /* The input should be a value in the bottom e bits (with higher * bits zero); returns that value replicated into every element * of size e in a 64 bit integer. @@ -4147,9 +4599,12 @@ static void disas_data_proc_imm(DisasContext *s, uint32_t insn) case 0x20: case 0x21: /* PC-rel. addressing */ disas_pc_rel_adr(s, insn); break; - case 0x22: case 0x23: /* Add/subtract (immediate) */ + case 0x22: /* Add/subtract (immediate) */ disas_add_sub_imm(s, insn); break; + case 0x23: /* Add/subtract (immediate, with tags) */ + disas_add_sub_imm_with_tags(s, insn); + break; case 0x24: /* Logical (immediate) */ disas_logic_imm(s, insn); break; @@ -5244,25 +5699,72 @@ static void handle_crc32(DisasContext *s, */ static void disas_data_proc_2src(DisasContext *s, uint32_t insn) { - unsigned int sf, rm, opcode, rn, rd; + unsigned int sf, rm, opcode, rn, rd, setflag; sf = extract32(insn, 31, 1); + setflag = extract32(insn, 29, 1); rm = extract32(insn, 16, 5); opcode = extract32(insn, 10, 6); rn = extract32(insn, 5, 5); rd = extract32(insn, 0, 5); - if (extract32(insn, 29, 1)) { + if (setflag && opcode != 0) { unallocated_encoding(s); return; } switch (opcode) { + case 0: /* SUBP(S) */ + if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { + goto do_unallocated; + } else { + TCGv_i64 tcg_n, tcg_m, tcg_d; + + tcg_n = read_cpu_reg_sp(s, rn, true); + tcg_m = read_cpu_reg_sp(s, rm, true); + tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56); + tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56); + tcg_d = cpu_reg(s, rd); + + if (setflag) { + gen_sub_CC(true, tcg_d, tcg_n, tcg_m); + } else { + tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m); + } + } + break; case 2: /* UDIV */ handle_div(s, false, sf, rm, rn, rd); break; case 3: /* SDIV */ handle_div(s, true, sf, rm, rn, rd); break; + case 4: /* IRG */ + if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { + goto do_unallocated; + } + if (s->ata) { + gen_helper_irg(cpu_reg_sp(s, rd), cpu_env, + cpu_reg_sp(s, rn), cpu_reg(s, rm)); + } else { + gen_address_with_allocation_tag0(cpu_reg_sp(s, rd), + cpu_reg_sp(s, rn)); + } + break; + case 5: /* GMI */ + if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { + goto do_unallocated; + } else { + TCGv_i64 t1 = tcg_const_i64(1); + TCGv_i64 t2 = tcg_temp_new_i64(); + + tcg_gen_extract_i64(t2, cpu_reg_sp(s, rn), 56, 4); + tcg_gen_shl_i64(t1, t1, t2); + tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t1); + + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); + } + break; case 8: /* LSLV */ handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd); break; @@ -13971,7 +14473,7 @@ static bool is_guarded_page(CPUARMState *env, DisasContext *s) * table entry even for that case. */ return (tlb_hit(entry->addr_code, addr) && - env_tlb(env)->d[mmu_idx].iotlb[index].attrs.target_tlb_bit0); + arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].iotlb[index].attrs)); #endif } @@ -14150,6 +14652,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx); dc->tbii = FIELD_EX32(tb_flags, TBFLAG_A64, TBII); dc->tbid = FIELD_EX32(tb_flags, TBFLAG_A64, TBID); + dc->tcma = FIELD_EX32(tb_flags, TBFLAG_A64, TCMA); dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); #if !defined(CONFIG_USER_ONLY) dc->user = (dc->current_el == 0); @@ -14161,10 +14664,19 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->bt = FIELD_EX32(tb_flags, TBFLAG_A64, BT); dc->btype = FIELD_EX32(tb_flags, TBFLAG_A64, BTYPE); dc->unpriv = FIELD_EX32(tb_flags, TBFLAG_A64, UNPRIV); + dc->ata = FIELD_EX32(tb_flags, TBFLAG_A64, ATA); + dc->mte_active[0] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE_ACTIVE); + dc->mte_active[1] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE0_ACTIVE); dc->vec_len = 0; dc->vec_stride = 0; dc->cp_regs = arm_cpu->cp_regs; dc->features = env->features; + dc->dcz_blocksize = arm_cpu->dcz_blocksize; + +#ifdef CONFIG_USER_ONLY + /* In sve_probe_page, we assume TBI is enabled. */ + tcg_debug_assert(dc->tbid & 1); +#endif /* Single step state. The code-generation logic here is: * SS_ACTIVE == 0: @@ -14292,12 +14804,15 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) gen_goto_tb(dc, 1, dc->base.pc_next); break; default: - case DISAS_UPDATE: + case DISAS_UPDATE_EXIT: gen_a64_set_pc_im(dc->base.pc_next); /* fall through */ case DISAS_EXIT: tcg_gen_exit_tb(NULL, 0); break; + case DISAS_UPDATE_NOCHAIN: + gen_a64_set_pc_im(dc->base.pc_next); + /* fall through */ case DISAS_JUMP: tcg_gen_lookup_and_goto_ptr(); break; diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h index da0f59a..49e4865 100644 --- a/target/arm/translate-a64.h +++ b/target/arm/translate-a64.h @@ -40,6 +40,11 @@ TCGv_ptr get_fpstatus_ptr(bool); bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn, unsigned int imms, unsigned int immr); bool sve_access_check(DisasContext *s); +TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr); +TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int log2_size); +TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int count, int log2_esize); /* We should have at some point before trying to access an FP register * done the necessary access check, so assert that diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c index ac7b311..f318ca2 100644 --- a/target/arm/translate-sve.c +++ b/target/arm/translate-sve.c @@ -4342,71 +4342,76 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) int len_remain = len % 8; int nparts = len / 8 + ctpop8(len_remain); int midx = get_mem_index(s); - TCGv_i64 addr, t0, t1; + TCGv_i64 dirty_addr, clean_addr, t0, t1; - addr = tcg_temp_new_i64(); - t0 = tcg_temp_new_i64(); + dirty_addr = tcg_temp_new_i64(); + tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); + clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); + tcg_temp_free_i64(dirty_addr); - /* Note that unpredicated load/store of vector/predicate registers + /* + * Note that unpredicated load/store of vector/predicate registers * are defined as a stream of bytes, which equates to little-endian - * operations on larger quantities. There is no nice way to force - * a little-endian load for aarch64_be-linux-user out of line. - * + * operations on larger quantities. * Attempt to keep code expansion to a minimum by limiting the * amount of unrolling done. */ if (nparts <= 4) { int i; + t0 = tcg_temp_new_i64(); for (i = 0; i < len_align; i += 8) { - tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i); - tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ); + tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ); tcg_gen_st_i64(t0, cpu_env, vofs + i); + tcg_gen_addi_i64(clean_addr, cpu_reg_sp(s, rn), 8); } + tcg_temp_free_i64(t0); } else { TCGLabel *loop = gen_new_label(); TCGv_ptr tp, i = tcg_const_local_ptr(0); - gen_set_label(loop); + /* Copy the clean address into a local temp, live across the loop. */ + t0 = clean_addr; + clean_addr = tcg_temp_local_new_i64(); + tcg_gen_mov_i64(clean_addr, t0); + tcg_temp_free_i64(t0); - /* Minimize the number of local temps that must be re-read from - * the stack each iteration. Instead, re-compute values other - * than the loop counter. - */ - tp = tcg_temp_new_ptr(); - tcg_gen_addi_ptr(tp, i, imm); - tcg_gen_extu_ptr_i64(addr, tp); - tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn)); + gen_set_label(loop); - tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ); + t0 = tcg_temp_new_i64(); + tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ); + tcg_gen_addi_i64(clean_addr, clean_addr, 8); + tp = tcg_temp_new_ptr(); tcg_gen_add_ptr(tp, cpu_env, i); tcg_gen_addi_ptr(i, i, 8); tcg_gen_st_i64(t0, tp, vofs); tcg_temp_free_ptr(tp); + tcg_temp_free_i64(t0); tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); tcg_temp_free_ptr(i); } - /* Predicate register loads can be any multiple of 2. + /* + * Predicate register loads can be any multiple of 2. * Note that we still store the entire 64-bit unit into cpu_env. */ if (len_remain) { - tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align); - + t0 = tcg_temp_new_i64(); switch (len_remain) { case 2: case 4: case 8: - tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain)); + tcg_gen_qemu_ld_i64(t0, clean_addr, midx, + MO_LE | ctz32(len_remain)); break; case 6: t1 = tcg_temp_new_i64(); - tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL); - tcg_gen_addi_i64(addr, addr, 4); - tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW); + tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL); + tcg_gen_addi_i64(clean_addr, clean_addr, 4); + tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW); tcg_gen_deposit_i64(t0, t0, t1, 32, 32); tcg_temp_free_i64(t1); break; @@ -4415,9 +4420,9 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) g_assert_not_reached(); } tcg_gen_st_i64(t0, cpu_env, vofs + len_align); + tcg_temp_free_i64(t0); } - tcg_temp_free_i64(addr); - tcg_temp_free_i64(t0); + tcg_temp_free_i64(clean_addr); } /* Similarly for stores. */ @@ -4427,10 +4432,12 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) int len_remain = len % 8; int nparts = len / 8 + ctpop8(len_remain); int midx = get_mem_index(s); - TCGv_i64 addr, t0; + TCGv_i64 dirty_addr, clean_addr, t0; - addr = tcg_temp_new_i64(); - t0 = tcg_temp_new_i64(); + dirty_addr = tcg_temp_new_i64(); + tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm); + clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); + tcg_temp_free_i64(dirty_addr); /* Note that unpredicated load/store of vector/predicate registers * are defined as a stream of bytes, which equates to little-endian @@ -4443,33 +4450,35 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) if (nparts <= 4) { int i; + t0 = tcg_temp_new_i64(); for (i = 0; i < len_align; i += 8) { tcg_gen_ld_i64(t0, cpu_env, vofs + i); - tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i); - tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ); + tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ); + tcg_gen_addi_i64(clean_addr, cpu_reg_sp(s, rn), 8); } + tcg_temp_free_i64(t0); } else { TCGLabel *loop = gen_new_label(); - TCGv_ptr t2, i = tcg_const_local_ptr(0); - - gen_set_label(loop); - - t2 = tcg_temp_new_ptr(); - tcg_gen_add_ptr(t2, cpu_env, i); - tcg_gen_ld_i64(t0, t2, vofs); + TCGv_ptr tp, i = tcg_const_local_ptr(0); - /* Minimize the number of local temps that must be re-read from - * the stack each iteration. Instead, re-compute values other - * than the loop counter. - */ - tcg_gen_addi_ptr(t2, i, imm); - tcg_gen_extu_ptr_i64(addr, t2); - tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn)); - tcg_temp_free_ptr(t2); + /* Copy the clean address into a local temp, live across the loop. */ + t0 = clean_addr; + clean_addr = tcg_temp_local_new_i64(); + tcg_gen_mov_i64(clean_addr, t0); + tcg_temp_free_i64(t0); - tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ); + gen_set_label(loop); + t0 = tcg_temp_new_i64(); + tp = tcg_temp_new_ptr(); + tcg_gen_add_ptr(tp, cpu_env, i); + tcg_gen_ld_i64(t0, tp, vofs); tcg_gen_addi_ptr(i, i, 8); + tcg_temp_free_ptr(tp); + + tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ); + tcg_gen_addi_i64(clean_addr, clean_addr, 8); + tcg_temp_free_i64(t0); tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop); tcg_temp_free_ptr(i); @@ -4477,29 +4486,30 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) /* Predicate register stores can be any multiple of 2. */ if (len_remain) { + t0 = tcg_temp_new_i64(); tcg_gen_ld_i64(t0, cpu_env, vofs + len_align); - tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align); switch (len_remain) { case 2: case 4: case 8: - tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain)); + tcg_gen_qemu_st_i64(t0, clean_addr, midx, + MO_LE | ctz32(len_remain)); break; case 6: - tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL); - tcg_gen_addi_i64(addr, addr, 4); + tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL); + tcg_gen_addi_i64(clean_addr, clean_addr, 4); tcg_gen_shri_i64(t0, t0, 32); - tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW); + tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW); break; default: g_assert_not_reached(); } + tcg_temp_free_i64(t0); } - tcg_temp_free_i64(addr); - tcg_temp_free_i64(t0); + tcg_temp_free_i64(clean_addr); } static bool trans_LDR_zri(DisasContext *s, arg_rri *a) @@ -4565,18 +4575,34 @@ static const uint8_t dtype_esz[16] = { }; static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, - int dtype, gen_helper_gvec_mem *fn) + int dtype, uint32_t mte_n, bool is_write, + gen_helper_gvec_mem *fn) { unsigned vsz = vec_full_reg_size(s); TCGv_ptr t_pg; TCGv_i32 t_desc; - int desc; + int desc = 0; - /* For e.g. LD4, there are not enough arguments to pass all 4 + /* + * For e.g. LD4, there are not enough arguments to pass all 4 * registers as pointers, so encode the regno into the data field. * For consistency, do this even for LD1. */ - desc = simd_desc(vsz, vsz, zt); + if (s->mte_active[0]) { + int msz = dtype_msz(dtype); + + desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); + desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); + desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); + desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); + desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz); + desc = FIELD_DP32(desc, MTEDESC, TSIZE, mte_n << msz); + desc <<= SVE_MTEDESC_SHIFT; + } else { + addr = clean_data_tbi(s, addr); + } + + desc = simd_desc(vsz, vsz, zt | desc); t_desc = tcg_const_i32(desc); t_pg = tcg_temp_new_ptr(); @@ -4590,64 +4616,132 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, static void do_ld_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, int dtype, int nreg) { - static gen_helper_gvec_mem * const fns[2][16][4] = { - /* Little-endian */ - { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, + static gen_helper_gvec_mem * const fns[2][2][16][4] = { + { /* mte inactive, little-endian */ + { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, - { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, - gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, - { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, - gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, - { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, - gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, - - /* Big-endian */ - { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, - gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, - { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, - gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, - { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, - gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, - { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, - - { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, - { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, - gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } + { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r, + gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r }, + { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r, + gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r }, + { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r, + gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } }, + + /* mte inactive, big-endian */ + { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r, + gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r }, + { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r, + gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r }, + { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r, + gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r }, + { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL }, + + { gen_helper_sve_ld1bds_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r, + gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } }, + + { /* mte active, little-endian */ + { { gen_helper_sve_ld1bb_r_mte, + gen_helper_sve_ld2bb_r_mte, + gen_helper_sve_ld3bb_r_mte, + gen_helper_sve_ld4bb_r_mte }, + { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_le_r_mte, + gen_helper_sve_ld2hh_le_r_mte, + gen_helper_sve_ld3hh_le_r_mte, + gen_helper_sve_ld4hh_le_r_mte }, + { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_le_r_mte, + gen_helper_sve_ld2ss_le_r_mte, + gen_helper_sve_ld3ss_le_r_mte, + gen_helper_sve_ld4ss_le_r_mte }, + { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_le_r_mte, + gen_helper_sve_ld2dd_le_r_mte, + gen_helper_sve_ld3dd_le_r_mte, + gen_helper_sve_ld4dd_le_r_mte } }, + + /* mte active, big-endian */ + { { gen_helper_sve_ld1bb_r_mte, + gen_helper_sve_ld2bb_r_mte, + gen_helper_sve_ld3bb_r_mte, + gen_helper_sve_ld4bb_r_mte }, + { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hh_be_r_mte, + gen_helper_sve_ld2hh_be_r_mte, + gen_helper_sve_ld3hh_be_r_mte, + gen_helper_sve_ld4hh_be_r_mte }, + { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1ss_be_r_mte, + gen_helper_sve_ld2ss_be_r_mte, + gen_helper_sve_ld3ss_be_r_mte, + gen_helper_sve_ld4ss_be_r_mte }, + { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL }, + + { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL }, + { gen_helper_sve_ld1dd_be_r_mte, + gen_helper_sve_ld2dd_be_r_mte, + gen_helper_sve_ld3dd_be_r_mte, + gen_helper_sve_ld4dd_be_r_mte } } }, }; - gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg]; + gen_helper_gvec_mem *fn + = fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg]; - /* While there are holes in the table, they are not + /* + * While there are holes in the table, they are not * accessible via the instruction encoding. */ assert(fn != NULL); - do_mem_zpa(s, zt, pg, addr, dtype, fn); + do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn); } static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a) @@ -4681,104 +4775,188 @@ static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a) static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a) { - static gen_helper_gvec_mem * const fns[2][16] = { - /* Little-endian */ - { gen_helper_sve_ldff1bb_r, - gen_helper_sve_ldff1bhu_r, - gen_helper_sve_ldff1bsu_r, - gen_helper_sve_ldff1bdu_r, - - gen_helper_sve_ldff1sds_le_r, - gen_helper_sve_ldff1hh_le_r, - gen_helper_sve_ldff1hsu_le_r, - gen_helper_sve_ldff1hdu_le_r, - - gen_helper_sve_ldff1hds_le_r, - gen_helper_sve_ldff1hss_le_r, - gen_helper_sve_ldff1ss_le_r, - gen_helper_sve_ldff1sdu_le_r, - - gen_helper_sve_ldff1bds_r, - gen_helper_sve_ldff1bss_r, - gen_helper_sve_ldff1bhs_r, - gen_helper_sve_ldff1dd_le_r }, - - /* Big-endian */ - { gen_helper_sve_ldff1bb_r, - gen_helper_sve_ldff1bhu_r, - gen_helper_sve_ldff1bsu_r, - gen_helper_sve_ldff1bdu_r, - - gen_helper_sve_ldff1sds_be_r, - gen_helper_sve_ldff1hh_be_r, - gen_helper_sve_ldff1hsu_be_r, - gen_helper_sve_ldff1hdu_be_r, - - gen_helper_sve_ldff1hds_be_r, - gen_helper_sve_ldff1hss_be_r, - gen_helper_sve_ldff1ss_be_r, - gen_helper_sve_ldff1sdu_be_r, - - gen_helper_sve_ldff1bds_r, - gen_helper_sve_ldff1bss_r, - gen_helper_sve_ldff1bhs_r, - gen_helper_sve_ldff1dd_be_r }, + static gen_helper_gvec_mem * const fns[2][2][16] = { + { /* mte inactive, little-endian */ + { gen_helper_sve_ldff1bb_r, + gen_helper_sve_ldff1bhu_r, + gen_helper_sve_ldff1bsu_r, + gen_helper_sve_ldff1bdu_r, + + gen_helper_sve_ldff1sds_le_r, + gen_helper_sve_ldff1hh_le_r, + gen_helper_sve_ldff1hsu_le_r, + gen_helper_sve_ldff1hdu_le_r, + + gen_helper_sve_ldff1hds_le_r, + gen_helper_sve_ldff1hss_le_r, + gen_helper_sve_ldff1ss_le_r, + gen_helper_sve_ldff1sdu_le_r, + + gen_helper_sve_ldff1bds_r, + gen_helper_sve_ldff1bss_r, + gen_helper_sve_ldff1bhs_r, + gen_helper_sve_ldff1dd_le_r }, + + /* mte inactive, big-endian */ + { gen_helper_sve_ldff1bb_r, + gen_helper_sve_ldff1bhu_r, + gen_helper_sve_ldff1bsu_r, + gen_helper_sve_ldff1bdu_r, + + gen_helper_sve_ldff1sds_be_r, + gen_helper_sve_ldff1hh_be_r, + gen_helper_sve_ldff1hsu_be_r, + gen_helper_sve_ldff1hdu_be_r, + + gen_helper_sve_ldff1hds_be_r, + gen_helper_sve_ldff1hss_be_r, + gen_helper_sve_ldff1ss_be_r, + gen_helper_sve_ldff1sdu_be_r, + + gen_helper_sve_ldff1bds_r, + gen_helper_sve_ldff1bss_r, + gen_helper_sve_ldff1bhs_r, + gen_helper_sve_ldff1dd_be_r } }, + + { /* mte active, little-endian */ + { gen_helper_sve_ldff1bb_r_mte, + gen_helper_sve_ldff1bhu_r_mte, + gen_helper_sve_ldff1bsu_r_mte, + gen_helper_sve_ldff1bdu_r_mte, + + gen_helper_sve_ldff1sds_le_r_mte, + gen_helper_sve_ldff1hh_le_r_mte, + gen_helper_sve_ldff1hsu_le_r_mte, + gen_helper_sve_ldff1hdu_le_r_mte, + + gen_helper_sve_ldff1hds_le_r_mte, + gen_helper_sve_ldff1hss_le_r_mte, + gen_helper_sve_ldff1ss_le_r_mte, + gen_helper_sve_ldff1sdu_le_r_mte, + + gen_helper_sve_ldff1bds_r_mte, + gen_helper_sve_ldff1bss_r_mte, + gen_helper_sve_ldff1bhs_r_mte, + gen_helper_sve_ldff1dd_le_r_mte }, + + /* mte active, big-endian */ + { gen_helper_sve_ldff1bb_r_mte, + gen_helper_sve_ldff1bhu_r_mte, + gen_helper_sve_ldff1bsu_r_mte, + gen_helper_sve_ldff1bdu_r_mte, + + gen_helper_sve_ldff1sds_be_r_mte, + gen_helper_sve_ldff1hh_be_r_mte, + gen_helper_sve_ldff1hsu_be_r_mte, + gen_helper_sve_ldff1hdu_be_r_mte, + + gen_helper_sve_ldff1hds_be_r_mte, + gen_helper_sve_ldff1hss_be_r_mte, + gen_helper_sve_ldff1ss_be_r_mte, + gen_helper_sve_ldff1sdu_be_r_mte, + + gen_helper_sve_ldff1bds_r_mte, + gen_helper_sve_ldff1bss_r_mte, + gen_helper_sve_ldff1bhs_r_mte, + gen_helper_sve_ldff1dd_be_r_mte } }, }; if (sve_access_check(s)) { TCGv_i64 addr = new_tmp_a64(s); tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype)); tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn)); - do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, - fns[s->be_data == MO_BE][a->dtype]); + do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, + fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); } return true; } static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) { - static gen_helper_gvec_mem * const fns[2][16] = { - /* Little-endian */ - { gen_helper_sve_ldnf1bb_r, - gen_helper_sve_ldnf1bhu_r, - gen_helper_sve_ldnf1bsu_r, - gen_helper_sve_ldnf1bdu_r, - - gen_helper_sve_ldnf1sds_le_r, - gen_helper_sve_ldnf1hh_le_r, - gen_helper_sve_ldnf1hsu_le_r, - gen_helper_sve_ldnf1hdu_le_r, - - gen_helper_sve_ldnf1hds_le_r, - gen_helper_sve_ldnf1hss_le_r, - gen_helper_sve_ldnf1ss_le_r, - gen_helper_sve_ldnf1sdu_le_r, - - gen_helper_sve_ldnf1bds_r, - gen_helper_sve_ldnf1bss_r, - gen_helper_sve_ldnf1bhs_r, - gen_helper_sve_ldnf1dd_le_r }, - - /* Big-endian */ - { gen_helper_sve_ldnf1bb_r, - gen_helper_sve_ldnf1bhu_r, - gen_helper_sve_ldnf1bsu_r, - gen_helper_sve_ldnf1bdu_r, - - gen_helper_sve_ldnf1sds_be_r, - gen_helper_sve_ldnf1hh_be_r, - gen_helper_sve_ldnf1hsu_be_r, - gen_helper_sve_ldnf1hdu_be_r, - - gen_helper_sve_ldnf1hds_be_r, - gen_helper_sve_ldnf1hss_be_r, - gen_helper_sve_ldnf1ss_be_r, - gen_helper_sve_ldnf1sdu_be_r, - - gen_helper_sve_ldnf1bds_r, - gen_helper_sve_ldnf1bss_r, - gen_helper_sve_ldnf1bhs_r, - gen_helper_sve_ldnf1dd_be_r }, + static gen_helper_gvec_mem * const fns[2][2][16] = { + { /* mte inactive, little-endian */ + { gen_helper_sve_ldnf1bb_r, + gen_helper_sve_ldnf1bhu_r, + gen_helper_sve_ldnf1bsu_r, + gen_helper_sve_ldnf1bdu_r, + + gen_helper_sve_ldnf1sds_le_r, + gen_helper_sve_ldnf1hh_le_r, + gen_helper_sve_ldnf1hsu_le_r, + gen_helper_sve_ldnf1hdu_le_r, + + gen_helper_sve_ldnf1hds_le_r, + gen_helper_sve_ldnf1hss_le_r, + gen_helper_sve_ldnf1ss_le_r, + gen_helper_sve_ldnf1sdu_le_r, + + gen_helper_sve_ldnf1bds_r, + gen_helper_sve_ldnf1bss_r, + gen_helper_sve_ldnf1bhs_r, + gen_helper_sve_ldnf1dd_le_r }, + + /* mte inactive, big-endian */ + { gen_helper_sve_ldnf1bb_r, + gen_helper_sve_ldnf1bhu_r, + gen_helper_sve_ldnf1bsu_r, + gen_helper_sve_ldnf1bdu_r, + + gen_helper_sve_ldnf1sds_be_r, + gen_helper_sve_ldnf1hh_be_r, + gen_helper_sve_ldnf1hsu_be_r, + gen_helper_sve_ldnf1hdu_be_r, + + gen_helper_sve_ldnf1hds_be_r, + gen_helper_sve_ldnf1hss_be_r, + gen_helper_sve_ldnf1ss_be_r, + gen_helper_sve_ldnf1sdu_be_r, + + gen_helper_sve_ldnf1bds_r, + gen_helper_sve_ldnf1bss_r, + gen_helper_sve_ldnf1bhs_r, + gen_helper_sve_ldnf1dd_be_r } }, + + { /* mte inactive, little-endian */ + { gen_helper_sve_ldnf1bb_r_mte, + gen_helper_sve_ldnf1bhu_r_mte, + gen_helper_sve_ldnf1bsu_r_mte, + gen_helper_sve_ldnf1bdu_r_mte, + + gen_helper_sve_ldnf1sds_le_r_mte, + gen_helper_sve_ldnf1hh_le_r_mte, + gen_helper_sve_ldnf1hsu_le_r_mte, + gen_helper_sve_ldnf1hdu_le_r_mte, + + gen_helper_sve_ldnf1hds_le_r_mte, + gen_helper_sve_ldnf1hss_le_r_mte, + gen_helper_sve_ldnf1ss_le_r_mte, + gen_helper_sve_ldnf1sdu_le_r_mte, + + gen_helper_sve_ldnf1bds_r_mte, + gen_helper_sve_ldnf1bss_r_mte, + gen_helper_sve_ldnf1bhs_r_mte, + gen_helper_sve_ldnf1dd_le_r_mte }, + + /* mte inactive, big-endian */ + { gen_helper_sve_ldnf1bb_r_mte, + gen_helper_sve_ldnf1bhu_r_mte, + gen_helper_sve_ldnf1bsu_r_mte, + gen_helper_sve_ldnf1bdu_r_mte, + + gen_helper_sve_ldnf1sds_be_r_mte, + gen_helper_sve_ldnf1hh_be_r_mte, + gen_helper_sve_ldnf1hsu_be_r_mte, + gen_helper_sve_ldnf1hdu_be_r_mte, + + gen_helper_sve_ldnf1hds_be_r_mte, + gen_helper_sve_ldnf1hss_be_r_mte, + gen_helper_sve_ldnf1ss_be_r_mte, + gen_helper_sve_ldnf1sdu_be_r_mte, + + gen_helper_sve_ldnf1bds_r_mte, + gen_helper_sve_ldnf1bss_r_mte, + gen_helper_sve_ldnf1bhs_r_mte, + gen_helper_sve_ldnf1dd_be_r_mte } }, }; if (sve_access_check(s)) { @@ -4788,8 +4966,8 @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a) TCGv_i64 addr = new_tmp_a64(s); tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off); - do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, - fns[s->be_data == MO_BE][a->dtype]); + do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false, + fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]); } return true; } @@ -4873,16 +5051,18 @@ static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a) /* Load and broadcast element. */ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) { - if (!sve_access_check(s)) { - return true; - } - unsigned vsz = vec_full_reg_size(s); unsigned psz = pred_full_reg_size(s); unsigned esz = dtype_esz[a->dtype]; unsigned msz = dtype_msz(a->dtype); - TCGLabel *over = gen_new_label(); - TCGv_i64 temp; + TCGLabel *over; + TCGv_i64 temp, clean_addr; + + if (!sve_access_check(s)) { + return true; + } + + over = gen_new_label(); /* If the guarding predicate has no bits set, no load occurs. */ if (psz <= 8) { @@ -4905,7 +5085,9 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) /* Load the data. */ temp = tcg_temp_new_i64(); tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz); - tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s), + clean_addr = gen_mte_check1(s, temp, false, true, msz); + + tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), s->be_data | dtype_mop[a->dtype]); /* Broadcast to *all* elements. */ @@ -4922,73 +5104,125 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz, int esz, int nreg) { - static gen_helper_gvec_mem * const fn_single[2][4][4] = { - { { gen_helper_sve_st1bb_r, - gen_helper_sve_st1bh_r, - gen_helper_sve_st1bs_r, - gen_helper_sve_st1bd_r }, - { NULL, - gen_helper_sve_st1hh_le_r, - gen_helper_sve_st1hs_le_r, - gen_helper_sve_st1hd_le_r }, - { NULL, NULL, - gen_helper_sve_st1ss_le_r, - gen_helper_sve_st1sd_le_r }, - { NULL, NULL, NULL, - gen_helper_sve_st1dd_le_r } }, - { { gen_helper_sve_st1bb_r, - gen_helper_sve_st1bh_r, - gen_helper_sve_st1bs_r, - gen_helper_sve_st1bd_r }, - { NULL, - gen_helper_sve_st1hh_be_r, - gen_helper_sve_st1hs_be_r, - gen_helper_sve_st1hd_be_r }, - { NULL, NULL, - gen_helper_sve_st1ss_be_r, - gen_helper_sve_st1sd_be_r }, - { NULL, NULL, NULL, - gen_helper_sve_st1dd_be_r } }, + static gen_helper_gvec_mem * const fn_single[2][2][4][4] = { + { { { gen_helper_sve_st1bb_r, + gen_helper_sve_st1bh_r, + gen_helper_sve_st1bs_r, + gen_helper_sve_st1bd_r }, + { NULL, + gen_helper_sve_st1hh_le_r, + gen_helper_sve_st1hs_le_r, + gen_helper_sve_st1hd_le_r }, + { NULL, NULL, + gen_helper_sve_st1ss_le_r, + gen_helper_sve_st1sd_le_r }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_le_r } }, + { { gen_helper_sve_st1bb_r, + gen_helper_sve_st1bh_r, + gen_helper_sve_st1bs_r, + gen_helper_sve_st1bd_r }, + { NULL, + gen_helper_sve_st1hh_be_r, + gen_helper_sve_st1hs_be_r, + gen_helper_sve_st1hd_be_r }, + { NULL, NULL, + gen_helper_sve_st1ss_be_r, + gen_helper_sve_st1sd_be_r }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_be_r } } }, + + { { { gen_helper_sve_st1bb_r_mte, + gen_helper_sve_st1bh_r_mte, + gen_helper_sve_st1bs_r_mte, + gen_helper_sve_st1bd_r_mte }, + { NULL, + gen_helper_sve_st1hh_le_r_mte, + gen_helper_sve_st1hs_le_r_mte, + gen_helper_sve_st1hd_le_r_mte }, + { NULL, NULL, + gen_helper_sve_st1ss_le_r_mte, + gen_helper_sve_st1sd_le_r_mte }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_le_r_mte } }, + { { gen_helper_sve_st1bb_r_mte, + gen_helper_sve_st1bh_r_mte, + gen_helper_sve_st1bs_r_mte, + gen_helper_sve_st1bd_r_mte }, + { NULL, + gen_helper_sve_st1hh_be_r_mte, + gen_helper_sve_st1hs_be_r_mte, + gen_helper_sve_st1hd_be_r_mte }, + { NULL, NULL, + gen_helper_sve_st1ss_be_r_mte, + gen_helper_sve_st1sd_be_r_mte }, + { NULL, NULL, NULL, + gen_helper_sve_st1dd_be_r_mte } } }, }; - static gen_helper_gvec_mem * const fn_multiple[2][3][4] = { - { { gen_helper_sve_st2bb_r, - gen_helper_sve_st2hh_le_r, - gen_helper_sve_st2ss_le_r, - gen_helper_sve_st2dd_le_r }, - { gen_helper_sve_st3bb_r, - gen_helper_sve_st3hh_le_r, - gen_helper_sve_st3ss_le_r, - gen_helper_sve_st3dd_le_r }, - { gen_helper_sve_st4bb_r, - gen_helper_sve_st4hh_le_r, - gen_helper_sve_st4ss_le_r, - gen_helper_sve_st4dd_le_r } }, - { { gen_helper_sve_st2bb_r, - gen_helper_sve_st2hh_be_r, - gen_helper_sve_st2ss_be_r, - gen_helper_sve_st2dd_be_r }, - { gen_helper_sve_st3bb_r, - gen_helper_sve_st3hh_be_r, - gen_helper_sve_st3ss_be_r, - gen_helper_sve_st3dd_be_r }, - { gen_helper_sve_st4bb_r, - gen_helper_sve_st4hh_be_r, - gen_helper_sve_st4ss_be_r, - gen_helper_sve_st4dd_be_r } }, + static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = { + { { { gen_helper_sve_st2bb_r, + gen_helper_sve_st2hh_le_r, + gen_helper_sve_st2ss_le_r, + gen_helper_sve_st2dd_le_r }, + { gen_helper_sve_st3bb_r, + gen_helper_sve_st3hh_le_r, + gen_helper_sve_st3ss_le_r, + gen_helper_sve_st3dd_le_r }, + { gen_helper_sve_st4bb_r, + gen_helper_sve_st4hh_le_r, + gen_helper_sve_st4ss_le_r, + gen_helper_sve_st4dd_le_r } }, + { { gen_helper_sve_st2bb_r, + gen_helper_sve_st2hh_be_r, + gen_helper_sve_st2ss_be_r, + gen_helper_sve_st2dd_be_r }, + { gen_helper_sve_st3bb_r, + gen_helper_sve_st3hh_be_r, + gen_helper_sve_st3ss_be_r, + gen_helper_sve_st3dd_be_r }, + { gen_helper_sve_st4bb_r, + gen_helper_sve_st4hh_be_r, + gen_helper_sve_st4ss_be_r, + gen_helper_sve_st4dd_be_r } } }, + { { { gen_helper_sve_st2bb_r_mte, + gen_helper_sve_st2hh_le_r_mte, + gen_helper_sve_st2ss_le_r_mte, + gen_helper_sve_st2dd_le_r_mte }, + { gen_helper_sve_st3bb_r_mte, + gen_helper_sve_st3hh_le_r_mte, + gen_helper_sve_st3ss_le_r_mte, + gen_helper_sve_st3dd_le_r_mte }, + { gen_helper_sve_st4bb_r_mte, + gen_helper_sve_st4hh_le_r_mte, + gen_helper_sve_st4ss_le_r_mte, + gen_helper_sve_st4dd_le_r_mte } }, + { { gen_helper_sve_st2bb_r_mte, + gen_helper_sve_st2hh_be_r_mte, + gen_helper_sve_st2ss_be_r_mte, + gen_helper_sve_st2dd_be_r_mte }, + { gen_helper_sve_st3bb_r_mte, + gen_helper_sve_st3hh_be_r_mte, + gen_helper_sve_st3ss_be_r_mte, + gen_helper_sve_st3dd_be_r_mte }, + { gen_helper_sve_st4bb_r_mte, + gen_helper_sve_st4hh_be_r_mte, + gen_helper_sve_st4ss_be_r_mte, + gen_helper_sve_st4dd_be_r_mte } } }, }; gen_helper_gvec_mem *fn; int be = s->be_data == MO_BE; if (nreg == 0) { /* ST1 */ - fn = fn_single[be][msz][esz]; + fn = fn_single[s->mte_active[0]][be][msz][esz]; + nreg = 1; } else { /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */ assert(msz == esz); - fn = fn_multiple[be][nreg - 1][msz]; + fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz]; } assert(fn != NULL); - do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), fn); + do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn); } static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a) @@ -5027,7 +5261,7 @@ static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a) */ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, - int scale, TCGv_i64 scalar, int msz, + int scale, TCGv_i64 scalar, int msz, bool is_write, gen_helper_gvec_mem_scatter *fn) { unsigned vsz = vec_full_reg_size(s); @@ -5035,8 +5269,16 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, TCGv_ptr t_pg = tcg_temp_new_ptr(); TCGv_ptr t_zt = tcg_temp_new_ptr(); TCGv_i32 t_desc; - int desc; + int desc = 0; + if (s->mte_active[0]) { + desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); + desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); + desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); + desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write); + desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz); + desc <<= SVE_MTEDESC_SHIFT; + } desc = simd_desc(vsz, vsz, scale); t_desc = tcg_const_i32(desc); @@ -5051,176 +5293,339 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, tcg_temp_free_i32(t_desc); } -/* Indexed by [be][ff][xs][u][msz]. */ -static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = { - /* Little-endian */ - { { { { gen_helper_sve_ldbss_zsu, - gen_helper_sve_ldhss_le_zsu, - NULL, }, - { gen_helper_sve_ldbsu_zsu, - gen_helper_sve_ldhsu_le_zsu, - gen_helper_sve_ldss_le_zsu, } }, - { { gen_helper_sve_ldbss_zss, - gen_helper_sve_ldhss_le_zss, - NULL, }, - { gen_helper_sve_ldbsu_zss, - gen_helper_sve_ldhsu_le_zss, - gen_helper_sve_ldss_le_zss, } } }, - - /* First-fault */ - { { { gen_helper_sve_ldffbss_zsu, - gen_helper_sve_ldffhss_le_zsu, - NULL, }, - { gen_helper_sve_ldffbsu_zsu, - gen_helper_sve_ldffhsu_le_zsu, - gen_helper_sve_ldffss_le_zsu, } }, - { { gen_helper_sve_ldffbss_zss, - gen_helper_sve_ldffhss_le_zss, - NULL, }, - { gen_helper_sve_ldffbsu_zss, - gen_helper_sve_ldffhsu_le_zss, - gen_helper_sve_ldffss_le_zss, } } } }, - - /* Big-endian */ - { { { { gen_helper_sve_ldbss_zsu, - gen_helper_sve_ldhss_be_zsu, - NULL, }, - { gen_helper_sve_ldbsu_zsu, - gen_helper_sve_ldhsu_be_zsu, - gen_helper_sve_ldss_be_zsu, } }, - { { gen_helper_sve_ldbss_zss, - gen_helper_sve_ldhss_be_zss, - NULL, }, - { gen_helper_sve_ldbsu_zss, - gen_helper_sve_ldhsu_be_zss, - gen_helper_sve_ldss_be_zss, } } }, - - /* First-fault */ - { { { gen_helper_sve_ldffbss_zsu, - gen_helper_sve_ldffhss_be_zsu, - NULL, }, - { gen_helper_sve_ldffbsu_zsu, - gen_helper_sve_ldffhsu_be_zsu, - gen_helper_sve_ldffss_be_zsu, } }, - { { gen_helper_sve_ldffbss_zss, - gen_helper_sve_ldffhss_be_zss, - NULL, }, - { gen_helper_sve_ldffbsu_zss, - gen_helper_sve_ldffhsu_be_zss, - gen_helper_sve_ldffss_be_zss, } } } }, +/* Indexed by [mte][be][ff][xs][u][msz]. */ +static gen_helper_gvec_mem_scatter * const +gather_load_fn32[2][2][2][2][2][3] = { + { /* MTE Inactive */ + { /* Little-endian */ + { { { gen_helper_sve_ldbss_zsu, + gen_helper_sve_ldhss_le_zsu, + NULL, }, + { gen_helper_sve_ldbsu_zsu, + gen_helper_sve_ldhsu_le_zsu, + gen_helper_sve_ldss_le_zsu, } }, + { { gen_helper_sve_ldbss_zss, + gen_helper_sve_ldhss_le_zss, + NULL, }, + { gen_helper_sve_ldbsu_zss, + gen_helper_sve_ldhsu_le_zss, + gen_helper_sve_ldss_le_zss, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu, + gen_helper_sve_ldffhss_le_zsu, + NULL, }, + { gen_helper_sve_ldffbsu_zsu, + gen_helper_sve_ldffhsu_le_zsu, + gen_helper_sve_ldffss_le_zsu, } }, + { { gen_helper_sve_ldffbss_zss, + gen_helper_sve_ldffhss_le_zss, + NULL, }, + { gen_helper_sve_ldffbsu_zss, + gen_helper_sve_ldffhsu_le_zss, + gen_helper_sve_ldffss_le_zss, } } } }, + + { /* Big-endian */ + { { { gen_helper_sve_ldbss_zsu, + gen_helper_sve_ldhss_be_zsu, + NULL, }, + { gen_helper_sve_ldbsu_zsu, + gen_helper_sve_ldhsu_be_zsu, + gen_helper_sve_ldss_be_zsu, } }, + { { gen_helper_sve_ldbss_zss, + gen_helper_sve_ldhss_be_zss, + NULL, }, + { gen_helper_sve_ldbsu_zss, + gen_helper_sve_ldhsu_be_zss, + gen_helper_sve_ldss_be_zss, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu, + gen_helper_sve_ldffhss_be_zsu, + NULL, }, + { gen_helper_sve_ldffbsu_zsu, + gen_helper_sve_ldffhsu_be_zsu, + gen_helper_sve_ldffss_be_zsu, } }, + { { gen_helper_sve_ldffbss_zss, + gen_helper_sve_ldffhss_be_zss, + NULL, }, + { gen_helper_sve_ldffbsu_zss, + gen_helper_sve_ldffhsu_be_zss, + gen_helper_sve_ldffss_be_zss, } } } } }, + { /* MTE Active */ + { /* Little-endian */ + { { { gen_helper_sve_ldbss_zsu_mte, + gen_helper_sve_ldhss_le_zsu_mte, + NULL, }, + { gen_helper_sve_ldbsu_zsu_mte, + gen_helper_sve_ldhsu_le_zsu_mte, + gen_helper_sve_ldss_le_zsu_mte, } }, + { { gen_helper_sve_ldbss_zss_mte, + gen_helper_sve_ldhss_le_zss_mte, + NULL, }, + { gen_helper_sve_ldbsu_zss_mte, + gen_helper_sve_ldhsu_le_zss_mte, + gen_helper_sve_ldss_le_zss_mte, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu_mte, + gen_helper_sve_ldffhss_le_zsu_mte, + NULL, }, + { gen_helper_sve_ldffbsu_zsu_mte, + gen_helper_sve_ldffhsu_le_zsu_mte, + gen_helper_sve_ldffss_le_zsu_mte, } }, + { { gen_helper_sve_ldffbss_zss_mte, + gen_helper_sve_ldffhss_le_zss_mte, + NULL, }, + { gen_helper_sve_ldffbsu_zss_mte, + gen_helper_sve_ldffhsu_le_zss_mte, + gen_helper_sve_ldffss_le_zss_mte, } } } }, + + { /* Big-endian */ + { { { gen_helper_sve_ldbss_zsu_mte, + gen_helper_sve_ldhss_be_zsu_mte, + NULL, }, + { gen_helper_sve_ldbsu_zsu_mte, + gen_helper_sve_ldhsu_be_zsu_mte, + gen_helper_sve_ldss_be_zsu_mte, } }, + { { gen_helper_sve_ldbss_zss_mte, + gen_helper_sve_ldhss_be_zss_mte, + NULL, }, + { gen_helper_sve_ldbsu_zss_mte, + gen_helper_sve_ldhsu_be_zss_mte, + gen_helper_sve_ldss_be_zss_mte, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbss_zsu_mte, + gen_helper_sve_ldffhss_be_zsu_mte, + NULL, }, + { gen_helper_sve_ldffbsu_zsu_mte, + gen_helper_sve_ldffhsu_be_zsu_mte, + gen_helper_sve_ldffss_be_zsu_mte, } }, + { { gen_helper_sve_ldffbss_zss_mte, + gen_helper_sve_ldffhss_be_zss_mte, + NULL, }, + { gen_helper_sve_ldffbsu_zss_mte, + gen_helper_sve_ldffhsu_be_zss_mte, + gen_helper_sve_ldffss_be_zss_mte, } } } } }, }; /* Note that we overload xs=2 to indicate 64-bit offset. */ -static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = { - /* Little-endian */ - { { { { gen_helper_sve_ldbds_zsu, - gen_helper_sve_ldhds_le_zsu, - gen_helper_sve_ldsds_le_zsu, - NULL, }, - { gen_helper_sve_ldbdu_zsu, - gen_helper_sve_ldhdu_le_zsu, - gen_helper_sve_ldsdu_le_zsu, - gen_helper_sve_lddd_le_zsu, } }, - { { gen_helper_sve_ldbds_zss, - gen_helper_sve_ldhds_le_zss, - gen_helper_sve_ldsds_le_zss, - NULL, }, - { gen_helper_sve_ldbdu_zss, - gen_helper_sve_ldhdu_le_zss, - gen_helper_sve_ldsdu_le_zss, - gen_helper_sve_lddd_le_zss, } }, - { { gen_helper_sve_ldbds_zd, - gen_helper_sve_ldhds_le_zd, - gen_helper_sve_ldsds_le_zd, - NULL, }, - { gen_helper_sve_ldbdu_zd, - gen_helper_sve_ldhdu_le_zd, - gen_helper_sve_ldsdu_le_zd, - gen_helper_sve_lddd_le_zd, } } }, - - /* First-fault */ - { { { gen_helper_sve_ldffbds_zsu, - gen_helper_sve_ldffhds_le_zsu, - gen_helper_sve_ldffsds_le_zsu, - NULL, }, - { gen_helper_sve_ldffbdu_zsu, - gen_helper_sve_ldffhdu_le_zsu, - gen_helper_sve_ldffsdu_le_zsu, - gen_helper_sve_ldffdd_le_zsu, } }, - { { gen_helper_sve_ldffbds_zss, - gen_helper_sve_ldffhds_le_zss, - gen_helper_sve_ldffsds_le_zss, - NULL, }, - { gen_helper_sve_ldffbdu_zss, - gen_helper_sve_ldffhdu_le_zss, - gen_helper_sve_ldffsdu_le_zss, - gen_helper_sve_ldffdd_le_zss, } }, - { { gen_helper_sve_ldffbds_zd, - gen_helper_sve_ldffhds_le_zd, - gen_helper_sve_ldffsds_le_zd, - NULL, }, - { gen_helper_sve_ldffbdu_zd, - gen_helper_sve_ldffhdu_le_zd, - gen_helper_sve_ldffsdu_le_zd, - gen_helper_sve_ldffdd_le_zd, } } } }, - - /* Big-endian */ - { { { { gen_helper_sve_ldbds_zsu, - gen_helper_sve_ldhds_be_zsu, - gen_helper_sve_ldsds_be_zsu, - NULL, }, - { gen_helper_sve_ldbdu_zsu, - gen_helper_sve_ldhdu_be_zsu, - gen_helper_sve_ldsdu_be_zsu, - gen_helper_sve_lddd_be_zsu, } }, - { { gen_helper_sve_ldbds_zss, - gen_helper_sve_ldhds_be_zss, - gen_helper_sve_ldsds_be_zss, - NULL, }, - { gen_helper_sve_ldbdu_zss, - gen_helper_sve_ldhdu_be_zss, - gen_helper_sve_ldsdu_be_zss, - gen_helper_sve_lddd_be_zss, } }, - { { gen_helper_sve_ldbds_zd, - gen_helper_sve_ldhds_be_zd, - gen_helper_sve_ldsds_be_zd, - NULL, }, - { gen_helper_sve_ldbdu_zd, - gen_helper_sve_ldhdu_be_zd, - gen_helper_sve_ldsdu_be_zd, - gen_helper_sve_lddd_be_zd, } } }, - - /* First-fault */ - { { { gen_helper_sve_ldffbds_zsu, - gen_helper_sve_ldffhds_be_zsu, - gen_helper_sve_ldffsds_be_zsu, - NULL, }, - { gen_helper_sve_ldffbdu_zsu, - gen_helper_sve_ldffhdu_be_zsu, - gen_helper_sve_ldffsdu_be_zsu, - gen_helper_sve_ldffdd_be_zsu, } }, - { { gen_helper_sve_ldffbds_zss, - gen_helper_sve_ldffhds_be_zss, - gen_helper_sve_ldffsds_be_zss, - NULL, }, - { gen_helper_sve_ldffbdu_zss, - gen_helper_sve_ldffhdu_be_zss, - gen_helper_sve_ldffsdu_be_zss, - gen_helper_sve_ldffdd_be_zss, } }, - { { gen_helper_sve_ldffbds_zd, - gen_helper_sve_ldffhds_be_zd, - gen_helper_sve_ldffsds_be_zd, - NULL, }, - { gen_helper_sve_ldffbdu_zd, - gen_helper_sve_ldffhdu_be_zd, - gen_helper_sve_ldffsdu_be_zd, - gen_helper_sve_ldffdd_be_zd, } } } }, +static gen_helper_gvec_mem_scatter * const +gather_load_fn64[2][2][2][3][2][4] = { + { /* MTE Inactive */ + { /* Little-endian */ + { { { gen_helper_sve_ldbds_zsu, + gen_helper_sve_ldhds_le_zsu, + gen_helper_sve_ldsds_le_zsu, + NULL, }, + { gen_helper_sve_ldbdu_zsu, + gen_helper_sve_ldhdu_le_zsu, + gen_helper_sve_ldsdu_le_zsu, + gen_helper_sve_lddd_le_zsu, } }, + { { gen_helper_sve_ldbds_zss, + gen_helper_sve_ldhds_le_zss, + gen_helper_sve_ldsds_le_zss, + NULL, }, + { gen_helper_sve_ldbdu_zss, + gen_helper_sve_ldhdu_le_zss, + gen_helper_sve_ldsdu_le_zss, + gen_helper_sve_lddd_le_zss, } }, + { { gen_helper_sve_ldbds_zd, + gen_helper_sve_ldhds_le_zd, + gen_helper_sve_ldsds_le_zd, + NULL, }, + { gen_helper_sve_ldbdu_zd, + gen_helper_sve_ldhdu_le_zd, + gen_helper_sve_ldsdu_le_zd, + gen_helper_sve_lddd_le_zd, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu, + gen_helper_sve_ldffhds_le_zsu, + gen_helper_sve_ldffsds_le_zsu, + NULL, }, + { gen_helper_sve_ldffbdu_zsu, + gen_helper_sve_ldffhdu_le_zsu, + gen_helper_sve_ldffsdu_le_zsu, + gen_helper_sve_ldffdd_le_zsu, } }, + { { gen_helper_sve_ldffbds_zss, + gen_helper_sve_ldffhds_le_zss, + gen_helper_sve_ldffsds_le_zss, + NULL, }, + { gen_helper_sve_ldffbdu_zss, + gen_helper_sve_ldffhdu_le_zss, + gen_helper_sve_ldffsdu_le_zss, + gen_helper_sve_ldffdd_le_zss, } }, + { { gen_helper_sve_ldffbds_zd, + gen_helper_sve_ldffhds_le_zd, + gen_helper_sve_ldffsds_le_zd, + NULL, }, + { gen_helper_sve_ldffbdu_zd, + gen_helper_sve_ldffhdu_le_zd, + gen_helper_sve_ldffsdu_le_zd, + gen_helper_sve_ldffdd_le_zd, } } } }, + { /* Big-endian */ + { { { gen_helper_sve_ldbds_zsu, + gen_helper_sve_ldhds_be_zsu, + gen_helper_sve_ldsds_be_zsu, + NULL, }, + { gen_helper_sve_ldbdu_zsu, + gen_helper_sve_ldhdu_be_zsu, + gen_helper_sve_ldsdu_be_zsu, + gen_helper_sve_lddd_be_zsu, } }, + { { gen_helper_sve_ldbds_zss, + gen_helper_sve_ldhds_be_zss, + gen_helper_sve_ldsds_be_zss, + NULL, }, + { gen_helper_sve_ldbdu_zss, + gen_helper_sve_ldhdu_be_zss, + gen_helper_sve_ldsdu_be_zss, + gen_helper_sve_lddd_be_zss, } }, + { { gen_helper_sve_ldbds_zd, + gen_helper_sve_ldhds_be_zd, + gen_helper_sve_ldsds_be_zd, + NULL, }, + { gen_helper_sve_ldbdu_zd, + gen_helper_sve_ldhdu_be_zd, + gen_helper_sve_ldsdu_be_zd, + gen_helper_sve_lddd_be_zd, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu, + gen_helper_sve_ldffhds_be_zsu, + gen_helper_sve_ldffsds_be_zsu, + NULL, }, + { gen_helper_sve_ldffbdu_zsu, + gen_helper_sve_ldffhdu_be_zsu, + gen_helper_sve_ldffsdu_be_zsu, + gen_helper_sve_ldffdd_be_zsu, } }, + { { gen_helper_sve_ldffbds_zss, + gen_helper_sve_ldffhds_be_zss, + gen_helper_sve_ldffsds_be_zss, + NULL, }, + { gen_helper_sve_ldffbdu_zss, + gen_helper_sve_ldffhdu_be_zss, + gen_helper_sve_ldffsdu_be_zss, + gen_helper_sve_ldffdd_be_zss, } }, + { { gen_helper_sve_ldffbds_zd, + gen_helper_sve_ldffhds_be_zd, + gen_helper_sve_ldffsds_be_zd, + NULL, }, + { gen_helper_sve_ldffbdu_zd, + gen_helper_sve_ldffhdu_be_zd, + gen_helper_sve_ldffsdu_be_zd, + gen_helper_sve_ldffdd_be_zd, } } } } }, + { /* MTE Active */ + { /* Little-endian */ + { { { gen_helper_sve_ldbds_zsu_mte, + gen_helper_sve_ldhds_le_zsu_mte, + gen_helper_sve_ldsds_le_zsu_mte, + NULL, }, + { gen_helper_sve_ldbdu_zsu_mte, + gen_helper_sve_ldhdu_le_zsu_mte, + gen_helper_sve_ldsdu_le_zsu_mte, + gen_helper_sve_lddd_le_zsu_mte, } }, + { { gen_helper_sve_ldbds_zss_mte, + gen_helper_sve_ldhds_le_zss_mte, + gen_helper_sve_ldsds_le_zss_mte, + NULL, }, + { gen_helper_sve_ldbdu_zss_mte, + gen_helper_sve_ldhdu_le_zss_mte, + gen_helper_sve_ldsdu_le_zss_mte, + gen_helper_sve_lddd_le_zss_mte, } }, + { { gen_helper_sve_ldbds_zd_mte, + gen_helper_sve_ldhds_le_zd_mte, + gen_helper_sve_ldsds_le_zd_mte, + NULL, }, + { gen_helper_sve_ldbdu_zd_mte, + gen_helper_sve_ldhdu_le_zd_mte, + gen_helper_sve_ldsdu_le_zd_mte, + gen_helper_sve_lddd_le_zd_mte, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu_mte, + gen_helper_sve_ldffhds_le_zsu_mte, + gen_helper_sve_ldffsds_le_zsu_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zsu_mte, + gen_helper_sve_ldffhdu_le_zsu_mte, + gen_helper_sve_ldffsdu_le_zsu_mte, + gen_helper_sve_ldffdd_le_zsu_mte, } }, + { { gen_helper_sve_ldffbds_zss_mte, + gen_helper_sve_ldffhds_le_zss_mte, + gen_helper_sve_ldffsds_le_zss_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zss_mte, + gen_helper_sve_ldffhdu_le_zss_mte, + gen_helper_sve_ldffsdu_le_zss_mte, + gen_helper_sve_ldffdd_le_zss_mte, } }, + { { gen_helper_sve_ldffbds_zd_mte, + gen_helper_sve_ldffhds_le_zd_mte, + gen_helper_sve_ldffsds_le_zd_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zd_mte, + gen_helper_sve_ldffhdu_le_zd_mte, + gen_helper_sve_ldffsdu_le_zd_mte, + gen_helper_sve_ldffdd_le_zd_mte, } } } }, + { /* Big-endian */ + { { { gen_helper_sve_ldbds_zsu_mte, + gen_helper_sve_ldhds_be_zsu_mte, + gen_helper_sve_ldsds_be_zsu_mte, + NULL, }, + { gen_helper_sve_ldbdu_zsu_mte, + gen_helper_sve_ldhdu_be_zsu_mte, + gen_helper_sve_ldsdu_be_zsu_mte, + gen_helper_sve_lddd_be_zsu_mte, } }, + { { gen_helper_sve_ldbds_zss_mte, + gen_helper_sve_ldhds_be_zss_mte, + gen_helper_sve_ldsds_be_zss_mte, + NULL, }, + { gen_helper_sve_ldbdu_zss_mte, + gen_helper_sve_ldhdu_be_zss_mte, + gen_helper_sve_ldsdu_be_zss_mte, + gen_helper_sve_lddd_be_zss_mte, } }, + { { gen_helper_sve_ldbds_zd_mte, + gen_helper_sve_ldhds_be_zd_mte, + gen_helper_sve_ldsds_be_zd_mte, + NULL, }, + { gen_helper_sve_ldbdu_zd_mte, + gen_helper_sve_ldhdu_be_zd_mte, + gen_helper_sve_ldsdu_be_zd_mte, + gen_helper_sve_lddd_be_zd_mte, } } }, + + /* First-fault */ + { { { gen_helper_sve_ldffbds_zsu_mte, + gen_helper_sve_ldffhds_be_zsu_mte, + gen_helper_sve_ldffsds_be_zsu_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zsu_mte, + gen_helper_sve_ldffhdu_be_zsu_mte, + gen_helper_sve_ldffsdu_be_zsu_mte, + gen_helper_sve_ldffdd_be_zsu_mte, } }, + { { gen_helper_sve_ldffbds_zss_mte, + gen_helper_sve_ldffhds_be_zss_mte, + gen_helper_sve_ldffsds_be_zss_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zss_mte, + gen_helper_sve_ldffhdu_be_zss_mte, + gen_helper_sve_ldffsdu_be_zss_mte, + gen_helper_sve_ldffdd_be_zss_mte, } }, + { { gen_helper_sve_ldffbds_zd_mte, + gen_helper_sve_ldffhds_be_zd_mte, + gen_helper_sve_ldffsds_be_zd_mte, + NULL, }, + { gen_helper_sve_ldffbdu_zd_mte, + gen_helper_sve_ldffhdu_be_zd_mte, + gen_helper_sve_ldffsdu_be_zd_mte, + gen_helper_sve_ldffdd_be_zd_mte, } } } } }, }; static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) { gen_helper_gvec_mem_scatter *fn = NULL; - int be = s->be_data == MO_BE; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; if (!sve_access_check(s)) { return true; @@ -5228,23 +5633,24 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a) switch (a->esz) { case MO_32: - fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz]; + fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz]; break; case MO_64: - fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz]; + fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz]; break; } assert(fn != NULL); do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, - cpu_reg_sp(s, a->rn), a->msz, fn); + cpu_reg_sp(s, a->rn), a->msz, false, fn); return true; } static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) { gen_helper_gvec_mem_scatter *fn = NULL; - int be = s->be_data == MO_BE; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; TCGv_i64 imm; if (a->esz < a->msz || (a->esz == a->msz && !a->u)) { @@ -5256,10 +5662,10 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) switch (a->esz) { case MO_32: - fn = gather_load_fn32[be][a->ff][0][a->u][a->msz]; + fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz]; break; case MO_64: - fn = gather_load_fn64[be][a->ff][2][a->u][a->msz]; + fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz]; break; } assert(fn != NULL); @@ -5268,63 +5674,108 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a) * by loading the immediate into the scalar parameter. */ imm = tcg_const_i64(a->imm << a->msz); - do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn); + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn); tcg_temp_free_i64(imm); return true; } -/* Indexed by [be][xs][msz]. */ -static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = { - /* Little-endian */ - { { gen_helper_sve_stbs_zsu, - gen_helper_sve_sths_le_zsu, - gen_helper_sve_stss_le_zsu, }, - { gen_helper_sve_stbs_zss, - gen_helper_sve_sths_le_zss, - gen_helper_sve_stss_le_zss, } }, - /* Big-endian */ - { { gen_helper_sve_stbs_zsu, - gen_helper_sve_sths_be_zsu, - gen_helper_sve_stss_be_zsu, }, - { gen_helper_sve_stbs_zss, - gen_helper_sve_sths_be_zss, - gen_helper_sve_stss_be_zss, } }, +/* Indexed by [mte][be][xs][msz]. */ +static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = { + { /* MTE Inactive */ + { /* Little-endian */ + { gen_helper_sve_stbs_zsu, + gen_helper_sve_sths_le_zsu, + gen_helper_sve_stss_le_zsu, }, + { gen_helper_sve_stbs_zss, + gen_helper_sve_sths_le_zss, + gen_helper_sve_stss_le_zss, } }, + { /* Big-endian */ + { gen_helper_sve_stbs_zsu, + gen_helper_sve_sths_be_zsu, + gen_helper_sve_stss_be_zsu, }, + { gen_helper_sve_stbs_zss, + gen_helper_sve_sths_be_zss, + gen_helper_sve_stss_be_zss, } } }, + { /* MTE Active */ + { /* Little-endian */ + { gen_helper_sve_stbs_zsu_mte, + gen_helper_sve_sths_le_zsu_mte, + gen_helper_sve_stss_le_zsu_mte, }, + { gen_helper_sve_stbs_zss_mte, + gen_helper_sve_sths_le_zss_mte, + gen_helper_sve_stss_le_zss_mte, } }, + { /* Big-endian */ + { gen_helper_sve_stbs_zsu_mte, + gen_helper_sve_sths_be_zsu_mte, + gen_helper_sve_stss_be_zsu_mte, }, + { gen_helper_sve_stbs_zss_mte, + gen_helper_sve_sths_be_zss_mte, + gen_helper_sve_stss_be_zss_mte, } } }, }; /* Note that we overload xs=2 to indicate 64-bit offset. */ -static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = { - /* Little-endian */ - { { gen_helper_sve_stbd_zsu, - gen_helper_sve_sthd_le_zsu, - gen_helper_sve_stsd_le_zsu, - gen_helper_sve_stdd_le_zsu, }, - { gen_helper_sve_stbd_zss, - gen_helper_sve_sthd_le_zss, - gen_helper_sve_stsd_le_zss, - gen_helper_sve_stdd_le_zss, }, - { gen_helper_sve_stbd_zd, - gen_helper_sve_sthd_le_zd, - gen_helper_sve_stsd_le_zd, - gen_helper_sve_stdd_le_zd, } }, - /* Big-endian */ - { { gen_helper_sve_stbd_zsu, - gen_helper_sve_sthd_be_zsu, - gen_helper_sve_stsd_be_zsu, - gen_helper_sve_stdd_be_zsu, }, - { gen_helper_sve_stbd_zss, - gen_helper_sve_sthd_be_zss, - gen_helper_sve_stsd_be_zss, - gen_helper_sve_stdd_be_zss, }, - { gen_helper_sve_stbd_zd, - gen_helper_sve_sthd_be_zd, - gen_helper_sve_stsd_be_zd, - gen_helper_sve_stdd_be_zd, } }, +static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = { + { /* MTE Inactive */ + { /* Little-endian */ + { gen_helper_sve_stbd_zsu, + gen_helper_sve_sthd_le_zsu, + gen_helper_sve_stsd_le_zsu, + gen_helper_sve_stdd_le_zsu, }, + { gen_helper_sve_stbd_zss, + gen_helper_sve_sthd_le_zss, + gen_helper_sve_stsd_le_zss, + gen_helper_sve_stdd_le_zss, }, + { gen_helper_sve_stbd_zd, + gen_helper_sve_sthd_le_zd, + gen_helper_sve_stsd_le_zd, + gen_helper_sve_stdd_le_zd, } }, + { /* Big-endian */ + { gen_helper_sve_stbd_zsu, + gen_helper_sve_sthd_be_zsu, + gen_helper_sve_stsd_be_zsu, + gen_helper_sve_stdd_be_zsu, }, + { gen_helper_sve_stbd_zss, + gen_helper_sve_sthd_be_zss, + gen_helper_sve_stsd_be_zss, + gen_helper_sve_stdd_be_zss, }, + { gen_helper_sve_stbd_zd, + gen_helper_sve_sthd_be_zd, + gen_helper_sve_stsd_be_zd, + gen_helper_sve_stdd_be_zd, } } }, + { /* MTE Inactive */ + { /* Little-endian */ + { gen_helper_sve_stbd_zsu_mte, + gen_helper_sve_sthd_le_zsu_mte, + gen_helper_sve_stsd_le_zsu_mte, + gen_helper_sve_stdd_le_zsu_mte, }, + { gen_helper_sve_stbd_zss_mte, + gen_helper_sve_sthd_le_zss_mte, + gen_helper_sve_stsd_le_zss_mte, + gen_helper_sve_stdd_le_zss_mte, }, + { gen_helper_sve_stbd_zd_mte, + gen_helper_sve_sthd_le_zd_mte, + gen_helper_sve_stsd_le_zd_mte, + gen_helper_sve_stdd_le_zd_mte, } }, + { /* Big-endian */ + { gen_helper_sve_stbd_zsu_mte, + gen_helper_sve_sthd_be_zsu_mte, + gen_helper_sve_stsd_be_zsu_mte, + gen_helper_sve_stdd_be_zsu_mte, }, + { gen_helper_sve_stbd_zss_mte, + gen_helper_sve_sthd_be_zss_mte, + gen_helper_sve_stsd_be_zss_mte, + gen_helper_sve_stdd_be_zss_mte, }, + { gen_helper_sve_stbd_zd_mte, + gen_helper_sve_sthd_be_zd_mte, + gen_helper_sve_stsd_be_zd_mte, + gen_helper_sve_stdd_be_zd_mte, } } }, }; static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) { gen_helper_gvec_mem_scatter *fn; - int be = s->be_data == MO_BE; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; if (a->esz < a->msz || (a->msz == 0 && a->scale)) { return false; @@ -5334,23 +5785,24 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a) } switch (a->esz) { case MO_32: - fn = scatter_store_fn32[be][a->xs][a->msz]; + fn = scatter_store_fn32[mte][be][a->xs][a->msz]; break; case MO_64: - fn = scatter_store_fn64[be][a->xs][a->msz]; + fn = scatter_store_fn64[mte][be][a->xs][a->msz]; break; default: g_assert_not_reached(); } do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz, - cpu_reg_sp(s, a->rn), a->msz, fn); + cpu_reg_sp(s, a->rn), a->msz, true, fn); return true; } static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) { gen_helper_gvec_mem_scatter *fn = NULL; - int be = s->be_data == MO_BE; + bool be = s->be_data == MO_BE; + bool mte = s->mte_active[0]; TCGv_i64 imm; if (a->esz < a->msz) { @@ -5362,10 +5814,10 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) switch (a->esz) { case MO_32: - fn = scatter_store_fn32[be][0][a->msz]; + fn = scatter_store_fn32[mte][be][0][a->msz]; break; case MO_64: - fn = scatter_store_fn64[be][2][a->msz]; + fn = scatter_store_fn64[mte][be][2][a->msz]; break; } assert(fn != NULL); @@ -5374,7 +5826,7 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a) * by loading the immediate into the scalar parameter. */ imm = tcg_const_i64(a->imm << a->msz); - do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn); + do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn); tcg_temp_free_i64(imm); return true; } diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c index bf31b18..afa8a5f 100644 --- a/target/arm/translate-vfp.inc.c +++ b/target/arm/translate-vfp.inc.c @@ -123,7 +123,7 @@ static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled) * this to be the last insn in the TB). */ if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; gen_io_start(); } gen_helper_v7m_preserve_fp_state(cpu_env); @@ -2860,6 +2860,6 @@ static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a) tcg_temp_free_i32(fptr); /* End the TB, because we have updated FP control bits */ - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; return true; } diff --git a/target/arm/translate.c b/target/arm/translate.c index 795964d..c39a929 100644 --- a/target/arm/translate.c +++ b/target/arm/translate.c @@ -2775,7 +2775,7 @@ static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn) tcg_temp_free_i32(tcg_tgtmode); tcg_temp_free_i32(tcg_regno); tcg_temp_free_i32(tcg_reg); - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn) @@ -2797,7 +2797,7 @@ static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn) tcg_temp_free_i32(tcg_tgtmode); tcg_temp_free_i32(tcg_regno); store_reg(s, rn, tcg_reg); - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } /* Store value to PC as for an exception return (ie don't @@ -5114,7 +5114,7 @@ static void gen_srs(DisasContext *s, tcg_temp_free_i32(tmp); } tcg_temp_free_i32(addr); - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } /* Generate a label used for skipping this instruction */ @@ -8160,7 +8160,7 @@ static bool trans_SETEND(DisasContext *s, arg_SETEND *a) } if (a->E != (s->be_data == MO_BE)) { gen_helper_setend(cpu_env); - s->base.is_jmp = DISAS_UPDATE; + s->base.is_jmp = DISAS_UPDATE_EXIT; } return true; } @@ -8873,7 +8873,8 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) break; case DISAS_NEXT: case DISAS_TOO_MANY: - case DISAS_UPDATE: + case DISAS_UPDATE_EXIT: + case DISAS_UPDATE_NOCHAIN: gen_set_pc_im(dc, dc->base.pc_next); /* fall through */ default: @@ -8897,10 +8898,13 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) case DISAS_TOO_MANY: gen_goto_tb(dc, 1, dc->base.pc_next); break; + case DISAS_UPDATE_NOCHAIN: + gen_set_pc_im(dc, dc->base.pc_next); + /* fall through */ case DISAS_JUMP: gen_goto_ptr(); break; - case DISAS_UPDATE: + case DISAS_UPDATE_EXIT: gen_set_pc_im(dc, dc->base.pc_next); /* fall through */ default: diff --git a/target/arm/translate.h b/target/arm/translate.h index 19650a9..16f2699 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -30,6 +30,7 @@ typedef struct DisasContext { ARMMMUIdx mmu_idx; /* MMU index to use for normal loads/stores */ uint8_t tbii; /* TBI1|TBI0 for insns */ uint8_t tbid; /* TBI1|TBI0 for data */ + uint8_t tcma; /* TCMA1|TCMA0 for MTE */ bool ns; /* Use non-secure CPREG bank on access */ int fp_excp_el; /* FP exception EL or 0 if enabled */ int sve_excp_el; /* SVE exception EL or 0 if enabled */ @@ -77,6 +78,10 @@ typedef struct DisasContext { bool unpriv; /* True if v8.3-PAuth is active. */ bool pauth_active; + /* True if v8.5-MTE access to tags is enabled. */ + bool ata; + /* True if v8.5-MTE tag checks affect the PE; index with is_unpriv. */ + bool mte_active[2]; /* True with v8.5-BTI and SCTLR_ELx.BT* set. */ bool bt; /* True if any CP15 access is trapped by HSTR_EL2 */ @@ -86,6 +91,8 @@ typedef struct DisasContext { * < 0, set by the current instruction. */ int8_t btype; + /* A copy of cpu->dcz_blocksize. */ + uint8_t dcz_blocksize; /* True if this page is guarded. */ bool guarded_page; /* Bottom two bits of XScale c15_cpar coprocessor access control reg */ @@ -148,7 +155,8 @@ static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn) /* is_jmp field values */ #define DISAS_JUMP DISAS_TARGET_0 /* only pc was modified dynamically */ -#define DISAS_UPDATE DISAS_TARGET_1 /* cpu state was modified dynamically */ +/* CPU state was modified dynamically; exit to main loop for interrupts. */ +#define DISAS_UPDATE_EXIT DISAS_TARGET_1 /* These instructions trap after executing, so the A32/T32 decoder must * defer them until after the conditional execution state has been updated. * WFI also needs special handling when single-stepping. @@ -164,13 +172,16 @@ static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn) * custom end-of-TB code) */ #define DISAS_BX_EXCRET DISAS_TARGET_8 -/* For instructions which want an immediate exit to the main loop, - * as opposed to attempting to use lookup_and_goto_ptr. Unlike - * DISAS_UPDATE this doesn't write the PC on exiting the translation - * loop so you need to ensure something (gen_a64_set_pc_im or runtime - * helper) has done so before we reach return from cpu_tb_exec. +/* + * For instructions which want an immediate exit to the main loop, as opposed + * to attempting to use lookup_and_goto_ptr. Unlike DISAS_UPDATE_EXIT, this + * doesn't write the PC on exiting the translation loop so you need to ensure + * something (gen_a64_set_pc_im or runtime helper) has done so before we reach + * return from cpu_tb_exec. */ #define DISAS_EXIT DISAS_TARGET_9 +/* CPU state was modified dynamically; no need to exit, but do not chain. */ +#define DISAS_UPDATE_NOCHAIN DISAS_TARGET_10 #ifdef TARGET_AARCH64 void a64_translate_init(void); |