aboutsummaryrefslogtreecommitdiff
path: root/target
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2020-06-26 18:22:36 +0100
committerPeter Maydell <peter.maydell@linaro.org>2020-06-26 18:22:36 +0100
commit553cf5d7c47bee05a3dec9461c1f8430316d516b (patch)
treec39e6a48d2746e4bf9d79cda6e3fa23a1c9d9b6d /target
parent3591ddd39987cbdaa0cfa344a262f315abd97582 (diff)
parentc7459633baa71d1781fde4a245d6ec9ce2f008cf (diff)
downloadqemu-553cf5d7c47bee05a3dec9461c1f8430316d516b.zip
qemu-553cf5d7c47bee05a3dec9461c1f8430316d516b.tar.gz
qemu-553cf5d7c47bee05a3dec9461c1f8430316d516b.tar.bz2
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200626' into staging
target-arm queue: * hw/arm/aspeed: improve QOM usage * hw/misc/pca9552: trace GPIO change events * target/arm: Implement ARMv8.5-MemTag for system emulation # gpg: Signature made Fri 26 Jun 2020 16:13:27 BST # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20200626: (57 commits) target/arm: Enable MTE target/arm: Add allocation tag storage for system mode target/arm: Create tagged ram when MTE is enabled target/arm: Cache the Tagged bit for a page in MemTxAttrs target/arm: Always pass cacheattr to get_phys_addr target/arm: Set PSTATE.TCO on exception entry target/arm: Implement data cache set allocation tags target/arm: Complete TBI clearing for user-only for SVE target/arm: Add mte helpers for sve scatter/gather memory ops target/arm: Handle TBI for sve scalar + int memory ops target/arm: Add mte helpers for sve scalar + int ff/nf loads target/arm: Add mte helpers for sve scalar + int stores target/arm: Add mte helpers for sve scalar + int loads target/arm: Add arm_tlb_bti_gp target/arm: Tidy trans_LD1R_zpri target/arm: Use mte_check1 for sve LD1R target/arm: Use mte_checkN for sve unpredicated stores target/arm: Use mte_checkN for sve unpredicated loads target/arm: Add helper_mte_check_zva target/arm: Implement helper_mte_checkN ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target')
-rw-r--r--target/arm/Makefile.objs1
-rw-r--r--target/arm/cpu.c81
-rw-r--r--target/arm/cpu.h50
-rw-r--r--target/arm/cpu64.c5
-rw-r--r--target/arm/helper-a64.c96
-rw-r--r--target/arm/helper-a64.h16
-rw-r--r--target/arm/helper-sve.h488
-rw-r--r--target/arm/helper.c421
-rw-r--r--target/arm/helper.h2
-rw-r--r--target/arm/internals.h153
-rw-r--r--target/arm/m_helper.c11
-rw-r--r--target/arm/mte_helper.c906
-rw-r--r--target/arm/op_helper.c16
-rw-r--r--target/arm/sve_helper.c614
-rw-r--r--target/arm/tlb_helper.c13
-rw-r--r--target/arm/translate-a64.c657
-rw-r--r--target/arm/translate-a64.h5
-rw-r--r--target/arm/translate-sve.c1420
-rw-r--r--target/arm/translate-vfp.inc.c4
-rw-r--r--target/arm/translate.c16
-rw-r--r--target/arm/translate.h23
21 files changed, 4141 insertions, 857 deletions
diff --git a/target/arm/Makefile.objs b/target/arm/Makefile.objs
index 83febd2..fa39fd7 100644
--- a/target/arm/Makefile.objs
+++ b/target/arm/Makefile.objs
@@ -86,3 +86,4 @@ obj-$(CONFIG_SOFTMMU) += psci.o
obj-$(TARGET_AARCH64) += translate-a64.o helper-a64.o
obj-$(TARGET_AARCH64) += translate-sve.o sve_helper.o
obj-$(TARGET_AARCH64) += pauth_helper.o
+obj-$(TARGET_AARCH64) += mte_helper.o
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index e44e180..5050e18 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -203,6 +203,9 @@ static void arm_cpu_reset(DeviceState *dev)
* Enable TBI0 and TBI1. While the real kernel only enables TBI0,
* turning on both here will produce smaller code and otherwise
* make no difference to the user-level emulation.
+ *
+ * In sve_probe_page, we assume that this is set.
+ * Do not modify this without other changes.
*/
env->cp15.tcr_el[1].raw_tcr = (3ULL << 37);
#else
@@ -1249,6 +1252,25 @@ void arm_cpu_post_init(Object *obj)
if (kvm_enabled()) {
kvm_arm_add_vcpu_properties(obj);
}
+
+#ifndef CONFIG_USER_ONLY
+ if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64) &&
+ cpu_isar_feature(aa64_mte, cpu)) {
+ object_property_add_link(obj, "tag-memory",
+ TYPE_MEMORY_REGION,
+ (Object **)&cpu->tag_memory,
+ qdev_prop_allow_set_link_before_realize,
+ OBJ_PROP_LINK_STRONG);
+
+ if (arm_feature(&cpu->env, ARM_FEATURE_EL3)) {
+ object_property_add_link(obj, "secure-tag-memory",
+ TYPE_MEMORY_REGION,
+ (Object **)&cpu->secure_tag_memory,
+ qdev_prop_allow_set_link_before_realize,
+ OBJ_PROP_LINK_STRONG);
+ }
+ }
+#endif
}
static void arm_cpu_finalizefn(Object *obj)
@@ -1738,18 +1760,43 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
#ifndef CONFIG_USER_ONLY
MachineState *ms = MACHINE(qdev_get_machine());
unsigned int smp_cpus = ms->smp.cpus;
+ bool has_secure = cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY);
- if (cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY)) {
- cs->num_ases = 2;
+ /*
+ * We must set cs->num_ases to the final value before
+ * the first call to cpu_address_space_init.
+ */
+ if (cpu->tag_memory != NULL) {
+ cs->num_ases = 3 + has_secure;
+ } else {
+ cs->num_ases = 1 + has_secure;
+ }
+ if (has_secure) {
if (!cpu->secure_memory) {
cpu->secure_memory = cs->memory;
}
cpu_address_space_init(cs, ARMASIdx_S, "cpu-secure-memory",
cpu->secure_memory);
- } else {
- cs->num_ases = 1;
}
+
+ if (cpu->tag_memory != NULL) {
+ cpu_address_space_init(cs, ARMASIdx_TagNS, "cpu-tag-memory",
+ cpu->tag_memory);
+ if (has_secure) {
+ cpu_address_space_init(cs, ARMASIdx_TagS, "cpu-tag-memory",
+ cpu->secure_tag_memory);
+ }
+ } else if (cpu_isar_feature(aa64_mte, cpu)) {
+ /*
+ * Since there is no tag memory, we can't meaningfully support MTE
+ * to its fullest. To avoid problems later, when we would come to
+ * use the tag memory, downgrade support to insns only.
+ */
+ cpu->isar.id_aa64pfr1 =
+ FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1);
+ }
+
cpu_address_space_init(cs, ARMASIdx_NS, "cpu-memory", cs->memory);
/* No core_count specified, default to smp_cpus. */
@@ -1758,6 +1805,30 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
}
#endif
+ if (tcg_enabled()) {
+ int dcz_blocklen = 4 << cpu->dcz_blocksize;
+
+ /*
+ * We only support DCZ blocklen that fits on one page.
+ *
+ * Architectually this is always true. However TARGET_PAGE_SIZE
+ * is variable and, for compatibility with -machine virt-2.7,
+ * is only 1KiB, as an artifact of legacy ARMv5 subpage support.
+ * But even then, while the largest architectural DCZ blocklen
+ * is 2KiB, no cpu actually uses such a large blocklen.
+ */
+ assert(dcz_blocklen <= TARGET_PAGE_SIZE);
+
+ /*
+ * We only support DCZ blocksize >= 2*TAG_GRANULE, which is to say
+ * both nibbles of each byte storing tag data may be written at once.
+ * Since TAG_GRANULE is 16, this means that blocklen must be >= 32.
+ */
+ if (cpu_isar_feature(aa64_mte, cpu)) {
+ assert(dcz_blocklen >= 2 * TAG_GRANULE);
+ }
+ }
+
qemu_init_vcpu(cs);
cpu_reset(cs);
@@ -2169,8 +2240,8 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
cc->tlb_fill = arm_cpu_tlb_fill;
cc->debug_excp_handler = arm_debug_excp_handler;
cc->debug_check_watchpoint = arm_debug_check_watchpoint;
-#if !defined(CONFIG_USER_ONLY)
cc->do_unaligned_access = arm_cpu_do_unaligned_access;
+#if !defined(CONFIG_USER_ONLY)
cc->do_transaction_failed = arm_cpu_do_transaction_failed;
cc->adjust_watchpoint_address = arm_adjust_watchpoint_address;
#endif /* CONFIG_TCG && !CONFIG_USER_ONLY */
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index cf66b8c..cf99dcc 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -502,6 +502,9 @@ typedef struct CPUARMState {
uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */
uint64_t vpidr_el2; /* Virtualization Processor ID Register */
uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */
+ uint64_t tfsr_el[4]; /* tfsre0_el1 is index 0. */
+ uint64_t gcr_el1;
+ uint64_t rgsr_el1;
} cp15;
struct {
@@ -789,6 +792,10 @@ struct ARMCPU {
/* MemoryRegion to use for secure physical accesses */
MemoryRegion *secure_memory;
+ /* MemoryRegion to use for allocation tag accesses */
+ MemoryRegion *tag_memory;
+ MemoryRegion *secure_tag_memory;
+
/* For v8M, pointer to the IDAU interface provided by board/SoC */
Object *idau;
@@ -1282,6 +1289,7 @@ void pmu_init(ARMCPU *cpu);
#define PSTATE_SS (1U << 21)
#define PSTATE_PAN (1U << 22)
#define PSTATE_UAO (1U << 23)
+#define PSTATE_TCO (1U << 25)
#define PSTATE_V (1U << 28)
#define PSTATE_C (1U << 29)
#define PSTATE_Z (1U << 30)
@@ -2356,7 +2364,9 @@ static inline uint64_t cpreg_to_kvm_id(uint32_t cpregid)
#define ARM_CP_NZCV (ARM_CP_SPECIAL | 0x0300)
#define ARM_CP_CURRENTEL (ARM_CP_SPECIAL | 0x0400)
#define ARM_CP_DC_ZVA (ARM_CP_SPECIAL | 0x0500)
-#define ARM_LAST_SPECIAL ARM_CP_DC_ZVA
+#define ARM_CP_DC_GVA (ARM_CP_SPECIAL | 0x0600)
+#define ARM_CP_DC_GZVA (ARM_CP_SPECIAL | 0x0700)
+#define ARM_LAST_SPECIAL ARM_CP_DC_GZVA
#define ARM_CP_FPU 0x1000
#define ARM_CP_SVE 0x2000
#define ARM_CP_NO_GDB 0x4000
@@ -2979,6 +2989,8 @@ typedef enum ARMMMUIdxBit {
typedef enum ARMASIdx {
ARMASIdx_NS = 0,
ARMASIdx_S = 1,
+ ARMASIdx_TagNS = 2,
+ ARMASIdx_TagS = 3,
} ARMASIdx;
/* Return the Exception Level targeted by debug exceptions. */
@@ -3183,10 +3195,10 @@ typedef ARMCPU ArchCPU;
* | | | TBFLAG_A32 | |
* | | +-----+----------+ TBFLAG_AM32 |
* | TBFLAG_ANY | |TBFLAG_M32| |
- * | | +-+----------+--------------|
- * | | | TBFLAG_A64 |
- * +--------------+---------+---------------------------+
- * 31 20 15 0
+ * | +-----------+----------+--------------|
+ * | | TBFLAG_A64 |
+ * +--------------+-------------------------------------+
+ * 31 20 0
*
* Unless otherwise noted, these bits are cached in env->hflags.
*/
@@ -3253,6 +3265,10 @@ FIELD(TBFLAG_A64, BT, 9, 1)
FIELD(TBFLAG_A64, BTYPE, 10, 2) /* Not cached. */
FIELD(TBFLAG_A64, TBID, 12, 2)
FIELD(TBFLAG_A64, UNPRIV, 14, 1)
+FIELD(TBFLAG_A64, ATA, 15, 1)
+FIELD(TBFLAG_A64, TCMA, 16, 2)
+FIELD(TBFLAG_A64, MTE_ACTIVE, 18, 1)
+FIELD(TBFLAG_A64, MTE0_ACTIVE, 19, 1)
/**
* cpu_mmu_index:
@@ -3385,6 +3401,20 @@ static inline uint64_t *aa64_vfp_qreg(CPUARMState *env, unsigned regno)
/* Shared between translate-sve.c and sve_helper.c. */
extern const uint64_t pred_esz_masks[4];
+/* Helper for the macros below, validating the argument type. */
+static inline MemTxAttrs *typecheck_memtxattrs(MemTxAttrs *x)
+{
+ return x;
+}
+
+/*
+ * Lvalue macros for ARM TLB bits that we must cache in the TCG TLB.
+ * Using these should be a bit more self-documenting than using the
+ * generic target bits directly.
+ */
+#define arm_tlb_bti_gp(x) (typecheck_memtxattrs(x)->target_tlb_bit0)
+#define arm_tlb_mte_tagged(x) (typecheck_memtxattrs(x)->target_tlb_bit1)
+
/*
* Naming convention for isar_feature functions:
* Functions which test 32-bit ID registers should have _aa32_ in
@@ -3814,6 +3844,16 @@ static inline bool isar_feature_aa64_bti(const ARMISARegisters *id)
return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, BT) != 0;
}
+static inline bool isar_feature_aa64_mte_insn_reg(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) != 0;
+}
+
+static inline bool isar_feature_aa64_mte(const ARMISARegisters *id)
+{
+ return FIELD_EX64(id->id_aa64pfr1, ID_AA64PFR1, MTE) >= 2;
+}
+
static inline bool isar_feature_aa64_pmu_8_1(const ARMISARegisters *id)
{
return FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) >= 4 &&
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index a0c1d88..a2f4733 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -654,6 +654,11 @@ static void aarch64_max_initfn(Object *obj)
t = cpu->isar.id_aa64pfr1;
t = FIELD_DP64(t, ID_AA64PFR1, BT, 1);
+ /*
+ * Begin with full support for MTE; will be downgraded to MTE=1
+ * during realize if the board provides no tag memory.
+ */
+ t = FIELD_DP64(t, ID_AA64PFR1, MTE, 2);
cpu->isar.id_aa64pfr1 = t;
t = cpu->isar.id_aa64mmfr1;
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index bc0649a..8682630 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -1119,85 +1119,41 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
* (which matches the usual QEMU behaviour of not implementing either
* alignment faults or any memory attribute handling).
*/
-
- ARMCPU *cpu = env_archcpu(env);
- uint64_t blocklen = 4 << cpu->dcz_blocksize;
+ int blocklen = 4 << env_archcpu(env)->dcz_blocksize;
uint64_t vaddr = vaddr_in & ~(blocklen - 1);
+ int mmu_idx = cpu_mmu_index(env, false);
+ void *mem;
+
+ /*
+ * Trapless lookup. In addition to actual invalid page, may
+ * return NULL for I/O, watchpoints, clean pages, etc.
+ */
+ mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
#ifndef CONFIG_USER_ONLY
- {
+ if (unlikely(!mem)) {
+ uintptr_t ra = GETPC();
+
/*
- * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than
- * the block size so we might have to do more than one TLB lookup.
- * We know that in fact for any v8 CPU the page size is at least 4K
- * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only
- * 1K as an artefact of legacy v5 subpage support being present in the
- * same QEMU executable. So in practice the hostaddr[] array has
- * two entries, given the current setting of TARGET_PAGE_BITS_MIN.
+ * Trap if accessing an invalid page. DC_ZVA requires that we supply
+ * the original pointer for an invalid page. But watchpoints require
+ * that we probe the actual space. So do both.
*/
- int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
- void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)];
- int try, i;
- unsigned mmu_idx = cpu_mmu_index(env, false);
- TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
-
- assert(maxidx <= ARRAY_SIZE(hostaddr));
-
- for (try = 0; try < 2; try++) {
-
- for (i = 0; i < maxidx; i++) {
- hostaddr[i] = tlb_vaddr_to_host(env,
- vaddr + TARGET_PAGE_SIZE * i,
- 1, mmu_idx);
- if (!hostaddr[i]) {
- break;
- }
- }
- if (i == maxidx) {
- /*
- * If it's all in the TLB it's fair game for just writing to;
- * we know we don't need to update dirty status, etc.
- */
- for (i = 0; i < maxidx - 1; i++) {
- memset(hostaddr[i], 0, TARGET_PAGE_SIZE);
- }
- memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE));
- return;
- }
+ (void) probe_write(env, vaddr_in, 1, mmu_idx, ra);
+ mem = probe_write(env, vaddr, blocklen, mmu_idx, ra);
+
+ if (unlikely(!mem)) {
/*
- * OK, try a store and see if we can populate the tlb. This
- * might cause an exception if the memory isn't writable,
- * in which case we will longjmp out of here. We must for
- * this purpose use the actual register value passed to us
- * so that we get the fault address right.
+ * The only remaining reason for mem == NULL is I/O.
+ * Just do a series of byte writes as the architecture demands.
*/
- helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC());
- /* Now we can populate the other TLB entries, if any */
- for (i = 0; i < maxidx; i++) {
- uint64_t va = vaddr + TARGET_PAGE_SIZE * i;
- if (va != (vaddr_in & TARGET_PAGE_MASK)) {
- helper_ret_stb_mmu(env, va, 0, oi, GETPC());
- }
+ for (int i = 0; i < blocklen; i++) {
+ cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra);
}
- }
-
- /*
- * Slow path (probably attempt to do this to an I/O device or
- * similar, or clearing of a block of code we have translations
- * cached for). Just do a series of byte writes as the architecture
- * demands. It's not worth trying to use a cpu_physical_memory_map(),
- * memset(), unmap() sequence here because:
- * + we'd need to account for the blocksize being larger than a page
- * + the direct-RAM access case is almost always going to be dealt
- * with in the fastpath code above, so there's no speed benefit
- * + we would have to deal with the map returning NULL because the
- * bounce buffer was in use
- */
- for (i = 0; i < blocklen; i++) {
- helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC());
+ return;
}
}
-#else
- memset(g2h(vaddr), 0, blocklen);
#endif
+
+ memset(mem, 0, blocklen);
}
diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index 3df7c18..5b0b699 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -103,3 +103,19 @@ DEF_HELPER_FLAGS_3(autda, TCG_CALL_NO_WG, i64, env, i64, i64)
DEF_HELPER_FLAGS_3(autdb, TCG_CALL_NO_WG, i64, env, i64, i64)
DEF_HELPER_FLAGS_2(xpaci, TCG_CALL_NO_RWG_SE, i64, env, i64)
DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64)
+
+DEF_HELPER_FLAGS_3(mte_check1, TCG_CALL_NO_WG, i64, env, i32, i64)
+DEF_HELPER_FLAGS_3(mte_checkN, TCG_CALL_NO_WG, i64, env, i32, i64)
+DEF_HELPER_FLAGS_3(mte_check_zva, TCG_CALL_NO_WG, i64, env, i32, i64)
+DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_4(addsubg, TCG_CALL_NO_RWG_SE, i64, env, i64, s32, i32)
+DEF_HELPER_FLAGS_3(ldg, TCG_CALL_NO_WG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(stg, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_3(stg_parallel, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_2(stg_stub, TCG_CALL_NO_WG, void, env, i64)
+DEF_HELPER_FLAGS_3(st2g, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_3(st2g_parallel, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_2(st2g_stub, TCG_CALL_NO_WG, void, env, i64)
+DEF_HELPER_FLAGS_2(ldgm, TCG_CALL_NO_WG, i64, env, i64)
+DEF_HELPER_FLAGS_3(stgm, TCG_CALL_NO_WG, void, env, i64, i64)
+DEF_HELPER_FLAGS_3(stzgm_tags, TCG_CALL_NO_WG, void, env, i64, i64)
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 7a20075..63c4a08 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -1196,6 +1196,64 @@ DEF_HELPER_FLAGS_4(sve_ld1sds_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ld1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1hsu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1hsu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1hds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1sdu_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1sds_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ld1sdu_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ld1sds_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
DEF_HELPER_FLAGS_4(sve_ldff1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldff1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldff1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -1227,6 +1285,55 @@ DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldff1hh_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hsu_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hdu_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hss_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hds_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldff1hh_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hsu_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hdu_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hss_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1hds_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldff1ss_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sdu_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sds_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldff1ss_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sdu_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1sds_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldff1dd_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldff1dd_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
DEF_HELPER_FLAGS_4(sve_ldnf1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -1258,6 +1365,55 @@ DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bhu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bsu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bdu_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bhs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bss_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1bds_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldnf1hh_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hsu_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hdu_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hss_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hds_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldnf1hh_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hsu_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hdu_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hss_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1hds_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldnf1ss_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sdu_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sds_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldnf1ss_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sdu_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1sds_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_ldnf1dd_le_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_ldnf1dd_be_r_mte, TCG_CALL_NO_WG,
+ void, env, ptr, tl, i32)
+
DEF_HELPER_FLAGS_4(sve_st1bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st2bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st3bb_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
@@ -1305,6 +1461,53 @@ DEF_HELPER_FLAGS_4(sve_st1hd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1sd_le_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
DEF_HELPER_FLAGS_4(sve_st1sd_be_r, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4bb_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4hh_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4hh_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4ss_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4ss_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4dd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st2dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st3dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st4dd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1bh_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1bs_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1bd_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1hs_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hs_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1hd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve_st1sd_le_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+DEF_HELPER_FLAGS_4(sve_st1sd_be_r_mte, TCG_CALL_NO_WG, void, env, ptr, tl, i32)
+
DEF_HELPER_FLAGS_6(sve_ldbsu_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu, TCG_CALL_NO_WG,
@@ -1414,6 +1617,115 @@ DEF_HELPER_FLAGS_6(sve_ldsds_le_zd, TCG_CALL_NO_WG,
DEF_HELPER_FLAGS_6(sve_ldsds_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldbsu_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhsu_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhsu_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldss_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldss_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldbss_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhss_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhss_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldbsu_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhsu_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhsu_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldss_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldss_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldbss_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhss_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhss_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldbdu_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldbds_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldbdu_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldbds_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldbdu_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhdu_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsdu_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_lddd_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldbds_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldhds_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldsds_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu, TCG_CALL_NO_WG,
@@ -1523,6 +1835,115 @@ DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd, TCG_CALL_NO_WG,
DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffbsu_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffss_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffss_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffbss_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhss_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhss_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldffbsu_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhsu_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhsu_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffss_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffss_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffbss_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhss_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhss_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldffbdu_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffbds_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldffbdu_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffbds_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_ldffbdu_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhdu_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsdu_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffdd_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffbds_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffhds_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_ldffsds_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
DEF_HELPER_FLAGS_6(sve_stbs_zsu, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_6(sve_sths_le_zsu, TCG_CALL_NO_WG,
@@ -1590,4 +2011,71 @@ DEF_HELPER_FLAGS_6(sve_stdd_le_zd, TCG_CALL_NO_WG,
DEF_HELPER_FLAGS_6(sve_stdd_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stbs_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sths_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sths_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stss_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stss_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_stbs_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sths_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sths_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stss_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stss_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_stbd_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_le_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_be_zsu_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_stbd_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_le_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_be_zss_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_6(sve_stbd_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_sthd_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stsd_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_le_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_6(sve_stdd_be_zd_mte, TCG_CALL_NO_WG,
+ void, env, ptr, ptr, ptr, tl, i32)
+
DEF_HELPER_FLAGS_4(sve2_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 972a766..dc9c29f 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -44,7 +44,8 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
bool s1_is_el0,
hwaddr *phys_ptr, MemTxAttrs *txattrs, int *prot,
target_ulong *page_size_ptr,
- ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs);
+ ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
+ __attribute__((nonnull));
#endif
static void switch_mode(CPUARMState *env, int mode);
@@ -2011,9 +2012,19 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
uint32_t valid_mask = 0x3fff;
ARMCPU *cpu = env_archcpu(env);
- if (arm_el_is_aa64(env, 3)) {
+ if (ri->state == ARM_CP_STATE_AA64) {
value |= SCR_FW | SCR_AW; /* these two bits are RES1. */
valid_mask &= ~SCR_NET;
+
+ if (cpu_isar_feature(aa64_lor, cpu)) {
+ valid_mask |= SCR_TLOR;
+ }
+ if (cpu_isar_feature(aa64_pauth, cpu)) {
+ valid_mask |= SCR_API | SCR_APK;
+ }
+ if (cpu_isar_feature(aa64_mte, cpu)) {
+ valid_mask |= SCR_ATA;
+ }
} else {
valid_mask &= ~(SCR_RW | SCR_ST);
}
@@ -2032,12 +2043,6 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
valid_mask &= ~SCR_SMD;
}
}
- if (cpu_isar_feature(aa64_lor, cpu)) {
- valid_mask |= SCR_TLOR;
- }
- if (cpu_isar_feature(aa64_pauth, cpu)) {
- valid_mask |= SCR_API | SCR_APK;
- }
/* Clear all-context RES0 bits. */
value &= valid_mask;
@@ -4697,6 +4702,22 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri,
{
ARMCPU *cpu = env_archcpu(env);
+ if (arm_feature(env, ARM_FEATURE_PMSA) && !cpu->has_mpu) {
+ /* M bit is RAZ/WI for PMSA with no MPU implemented */
+ value &= ~SCTLR_M;
+ }
+
+ /* ??? Lots of these bits are not implemented. */
+
+ if (ri->state == ARM_CP_STATE_AA64 && !cpu_isar_feature(aa64_mte, cpu)) {
+ if (ri->opc1 == 6) { /* SCTLR_EL3 */
+ value &= ~(SCTLR_ITFSB | SCTLR_TCF | SCTLR_ATA);
+ } else {
+ value &= ~(SCTLR_ITFSB | SCTLR_TCF0 | SCTLR_TCF |
+ SCTLR_ATA0 | SCTLR_ATA);
+ }
+ }
+
if (raw_read(env, ri) == value) {
/* Skip the TLB flush if nothing actually changed; Linux likes
* to do a lot of pointless SCTLR writes.
@@ -4704,13 +4725,8 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri,
return;
}
- if (arm_feature(env, ARM_FEATURE_PMSA) && !cpu->has_mpu) {
- /* M bit is RAZ/WI for PMSA with no MPU implemented */
- value &= ~SCTLR_M;
- }
-
raw_write(env, ri, value);
- /* ??? Lots of these bits are not implemented. */
+
/* This may enable/disable the MMU, so do a TLB flush. */
tlb_flush(CPU(cpu));
@@ -5236,17 +5252,22 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask)
if (cpu_isar_feature(aa64_pauth, cpu)) {
valid_mask |= HCR_API | HCR_APK;
}
+ if (cpu_isar_feature(aa64_mte, cpu)) {
+ valid_mask |= HCR_ATA | HCR_DCT | HCR_TID5;
+ }
}
/* Clear RES0 bits. */
value &= valid_mask;
- /* These bits change the MMU setup:
+ /*
+ * These bits change the MMU setup:
* HCR_VM enables stage 2 translation
* HCR_PTW forbids certain page-table setups
- * HCR_DC Disables stage1 and enables stage2 translation
+ * HCR_DC disables stage1 and enables stage2 translation
+ * HCR_DCT enables tagging on (disabled) stage1 translation
*/
- if ((env->cp15.hcr_el2 ^ value) & (HCR_VM | HCR_PTW | HCR_DC)) {
+ if ((env->cp15.hcr_el2 ^ value) & (HCR_VM | HCR_PTW | HCR_DC | HCR_DCT)) {
tlb_flush(CPU(cpu));
}
env->cp15.hcr_el2 = value;
@@ -5861,6 +5882,9 @@ static void define_arm_vh_e2h_redirects_aliases(ARMCPU *cpu)
{ K(3, 0, 1, 2, 0), K(3, 4, 1, 2, 0), K(3, 5, 1, 2, 0),
"ZCR_EL1", "ZCR_EL2", "ZCR_EL12", isar_feature_aa64_sve },
+ { K(3, 0, 5, 6, 0), K(3, 4, 5, 6, 0), K(3, 5, 5, 6, 0),
+ "TFSR_EL1", "TFSR_EL2", "TFSR_EL12", isar_feature_aa64_mte },
+
/* TODO: ARMv8.2-SPE -- PMSCR_EL2 */
/* TODO: ARMv8.4-Trace -- TRFCR_EL2 */
};
@@ -6835,6 +6859,165 @@ static const ARMCPRegInfo dcpodp_reg[] = {
};
#endif /*CONFIG_USER_ONLY*/
+static CPAccessResult access_aa64_tid5(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ if ((arm_current_el(env) < 2) && (arm_hcr_el2_eff(env) & HCR_TID5)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+
+ return CP_ACCESS_OK;
+}
+
+static CPAccessResult access_mte(CPUARMState *env, const ARMCPRegInfo *ri,
+ bool isread)
+{
+ int el = arm_current_el(env);
+
+ if (el < 2 &&
+ arm_feature(env, ARM_FEATURE_EL2) &&
+ !(arm_hcr_el2_eff(env) & HCR_ATA)) {
+ return CP_ACCESS_TRAP_EL2;
+ }
+ if (el < 3 &&
+ arm_feature(env, ARM_FEATURE_EL3) &&
+ !(env->cp15.scr_el3 & SCR_ATA)) {
+ return CP_ACCESS_TRAP_EL3;
+ }
+ return CP_ACCESS_OK;
+}
+
+static uint64_t tco_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ return env->pstate & PSTATE_TCO;
+}
+
+static void tco_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t val)
+{
+ env->pstate = (env->pstate & ~PSTATE_TCO) | (val & PSTATE_TCO);
+}
+
+static const ARMCPRegInfo mte_reginfo[] = {
+ { .name = "TFSRE0_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 1,
+ .access = PL1_RW, .accessfn = access_mte,
+ .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[0]) },
+ { .name = "TFSR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 5, .crm = 6, .opc2 = 0,
+ .access = PL1_RW, .accessfn = access_mte,
+ .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[1]) },
+ { .name = "TFSR_EL2", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 6, .opc2 = 0,
+ .access = PL2_RW, .accessfn = access_mte,
+ .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[2]) },
+ { .name = "TFSR_EL3", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 6, .crn = 5, .crm = 6, .opc2 = 0,
+ .access = PL3_RW,
+ .fieldoffset = offsetof(CPUARMState, cp15.tfsr_el[3]) },
+ { .name = "RGSR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 5,
+ .access = PL1_RW, .accessfn = access_mte,
+ .fieldoffset = offsetof(CPUARMState, cp15.rgsr_el1) },
+ { .name = "GCR_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 0, .crn = 1, .crm = 0, .opc2 = 6,
+ .access = PL1_RW, .accessfn = access_mte,
+ .fieldoffset = offsetof(CPUARMState, cp15.gcr_el1) },
+ { .name = "GMID_EL1", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 1, .crn = 0, .crm = 0, .opc2 = 4,
+ .access = PL1_R, .accessfn = access_aa64_tid5,
+ .type = ARM_CP_CONST, .resetvalue = GMID_EL1_BS },
+ { .name = "TCO", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7,
+ .type = ARM_CP_NO_RAW,
+ .access = PL0_RW, .readfn = tco_read, .writefn = tco_write },
+ { .name = "DC_IGVAC", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 3,
+ .type = ARM_CP_NOP, .access = PL1_W,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_IGSW", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 4,
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
+ { .name = "DC_IGDVAC", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 5,
+ .type = ARM_CP_NOP, .access = PL1_W,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_IGDSW", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 6,
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
+ { .name = "DC_CGSW", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 4,
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
+ { .name = "DC_CGDSW", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 6,
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
+ { .name = "DC_CIGSW", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 4,
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
+ { .name = "DC_CIGDSW", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 6,
+ .type = ARM_CP_NOP, .access = PL1_W, .accessfn = access_tsw },
+ REGINFO_SENTINEL
+};
+
+static const ARMCPRegInfo mte_tco_ro_reginfo[] = {
+ { .name = "TCO", .state = ARM_CP_STATE_AA64,
+ .opc0 = 3, .opc1 = 3, .crn = 4, .crm = 2, .opc2 = 7,
+ .type = ARM_CP_CONST, .access = PL0_RW, },
+ REGINFO_SENTINEL
+};
+
+static const ARMCPRegInfo mte_el0_cacheop_reginfo[] = {
+ { .name = "DC_CGVAC", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 3,
+ .type = ARM_CP_NOP, .access = PL0_W,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_CGDVAC", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 5,
+ .type = ARM_CP_NOP, .access = PL0_W,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_CGVAP", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 3,
+ .type = ARM_CP_NOP, .access = PL0_W,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_CGDVAP", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 5,
+ .type = ARM_CP_NOP, .access = PL0_W,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_CGVADP", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 3,
+ .type = ARM_CP_NOP, .access = PL0_W,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_CGDVADP", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 5,
+ .type = ARM_CP_NOP, .access = PL0_W,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_CIGVAC", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 3,
+ .type = ARM_CP_NOP, .access = PL0_W,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_CIGDVAC", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 5,
+ .type = ARM_CP_NOP, .access = PL0_W,
+ .accessfn = aa64_cacheop_poc_access },
+ { .name = "DC_GVA", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 3,
+ .access = PL0_W, .type = ARM_CP_DC_GVA,
+#ifndef CONFIG_USER_ONLY
+ /* Avoid overhead of an access check that always passes in user-mode */
+ .accessfn = aa64_zva_access,
+#endif
+ },
+ { .name = "DC_GZVA", .state = ARM_CP_STATE_AA64,
+ .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 4, .opc2 = 4,
+ .access = PL0_W, .type = ARM_CP_DC_GZVA,
+#ifndef CONFIG_USER_ONLY
+ /* Avoid overhead of an access check that always passes in user-mode */
+ .accessfn = aa64_zva_access,
+#endif
+ },
+ REGINFO_SENTINEL
+};
+
#endif
static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -7960,6 +8143,19 @@ void register_cp_regs_for_features(ARMCPU *cpu)
}
}
#endif /*CONFIG_USER_ONLY*/
+
+ /*
+ * If full MTE is enabled, add all of the system registers.
+ * If only "instructions available at EL0" are enabled,
+ * then define only a RAZ/WI version of PSTATE.TCO.
+ */
+ if (cpu_isar_feature(aa64_mte, cpu)) {
+ define_arm_cp_regs(cpu, mte_reginfo);
+ define_arm_cp_regs(cpu, mte_el0_cacheop_reginfo);
+ } else if (cpu_isar_feature(aa64_mte_insn_reg, cpu)) {
+ define_arm_cp_regs(cpu, mte_tco_ro_reginfo);
+ define_arm_cp_regs(cpu, mte_el0_cacheop_reginfo);
+ }
#endif
if (cpu_isar_feature(any_predinv, cpu)) {
@@ -9509,6 +9705,9 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
break;
}
}
+ if (cpu_isar_feature(aa64_mte, cpu)) {
+ new_mode |= PSTATE_TCO;
+ }
pstate_write(env, PSTATE_DAIF | new_mode);
env->aarch64 = 1;
@@ -9614,42 +9813,6 @@ void arm_cpu_do_interrupt(CPUState *cs)
}
#endif /* !CONFIG_USER_ONLY */
-/* Return the exception level which controls this address translation regime */
-static uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx)
-{
- switch (mmu_idx) {
- case ARMMMUIdx_E20_0:
- case ARMMMUIdx_E20_2:
- case ARMMMUIdx_E20_2_PAN:
- case ARMMMUIdx_Stage2:
- case ARMMMUIdx_E2:
- return 2;
- case ARMMMUIdx_SE3:
- return 3;
- case ARMMMUIdx_SE10_0:
- return arm_el_is_aa64(env, 3) ? 1 : 3;
- case ARMMMUIdx_SE10_1:
- case ARMMMUIdx_SE10_1_PAN:
- case ARMMMUIdx_Stage1_E0:
- case ARMMMUIdx_Stage1_E1:
- case ARMMMUIdx_Stage1_E1_PAN:
- case ARMMMUIdx_E10_0:
- case ARMMMUIdx_E10_1:
- case ARMMMUIdx_E10_1_PAN:
- case ARMMMUIdx_MPrivNegPri:
- case ARMMMUIdx_MUserNegPri:
- case ARMMMUIdx_MPriv:
- case ARMMMUIdx_MUser:
- case ARMMMUIdx_MSPrivNegPri:
- case ARMMMUIdx_MSUserNegPri:
- case ARMMMUIdx_MSPriv:
- case ARMMMUIdx_MSUser:
- return 1;
- default:
- g_assert_not_reached();
- }
-}
-
uint64_t arm_sctlr(CPUARMState *env, int el)
{
/* Only EL0 needs to be adjusted for EL1&0 or EL2&0. */
@@ -9732,15 +9895,6 @@ static inline uint64_t regime_ttbr(CPUARMState *env, ARMMMUIdx mmu_idx,
#endif /* !CONFIG_USER_ONLY */
-/* Return the TCR controlling this translation regime */
-static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx)
-{
- if (mmu_idx == ARMMMUIdx_Stage2) {
- return &env->cp15.vtcr_el2;
- }
- return &env->cp15.tcr_el[regime_el(env, mmu_idx)];
-}
-
/* Convert a possible stage1+2 MMU index into the appropriate
* stage 1 MMU index
*/
@@ -10541,6 +10695,16 @@ static int aa64_va_parameter_tbid(uint64_t tcr, ARMMMUIdx mmu_idx)
}
}
+static int aa64_va_parameter_tcma(uint64_t tcr, ARMMMUIdx mmu_idx)
+{
+ if (regime_has_2_ranges(mmu_idx)) {
+ return extract64(tcr, 57, 2);
+ } else {
+ /* Replicate the single TCMA bit so we always have 2 bits. */
+ return extract32(tcr, 30, 1) * 3;
+ }
+}
+
ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
ARMMMUIdx mmu_idx, bool data)
{
@@ -10935,22 +11099,19 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
}
/* When in aarch64 mode, and BTI is enabled, remember GP in the IOTLB. */
if (aarch64 && guarded && cpu_isar_feature(aa64_bti, cpu)) {
- txattrs->target_tlb_bit0 = true;
+ arm_tlb_bti_gp(txattrs) = true;
}
- if (cacheattrs != NULL) {
- if (mmu_idx == ARMMMUIdx_Stage2) {
- cacheattrs->attrs = convert_stage2_attrs(env,
- extract32(attrs, 0, 4));
- } else {
- /* Index into MAIR registers for cache attributes */
- uint8_t attrindx = extract32(attrs, 0, 3);
- uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)];
- assert(attrindx <= 7);
- cacheattrs->attrs = extract64(mair, attrindx * 8, 8);
- }
- cacheattrs->shareability = extract32(attrs, 6, 2);
+ if (mmu_idx == ARMMMUIdx_Stage2) {
+ cacheattrs->attrs = convert_stage2_attrs(env, extract32(attrs, 0, 4));
+ } else {
+ /* Index into MAIR registers for cache attributes */
+ uint8_t attrindx = extract32(attrs, 0, 3);
+ uint64_t mair = env->cp15.mair_el[regime_el(env, mmu_idx)];
+ assert(attrindx <= 7);
+ cacheattrs->attrs = extract64(mair, attrindx * 8, 8);
}
+ cacheattrs->shareability = extract32(attrs, 6, 2);
*phys_ptr = descaddr;
*page_size_ptr = page_size;
@@ -11673,9 +11834,19 @@ static uint8_t combine_cacheattr_nibble(uint8_t s1, uint8_t s2)
*/
static ARMCacheAttrs combine_cacheattrs(ARMCacheAttrs s1, ARMCacheAttrs s2)
{
- uint8_t s1lo = extract32(s1.attrs, 0, 4), s2lo = extract32(s2.attrs, 0, 4);
- uint8_t s1hi = extract32(s1.attrs, 4, 4), s2hi = extract32(s2.attrs, 4, 4);
+ uint8_t s1lo, s2lo, s1hi, s2hi;
ARMCacheAttrs ret;
+ bool tagged = false;
+
+ if (s1.attrs == 0xf0) {
+ tagged = true;
+ s1.attrs = 0xff;
+ }
+
+ s1lo = extract32(s1.attrs, 0, 4);
+ s2lo = extract32(s2.attrs, 0, 4);
+ s1hi = extract32(s1.attrs, 4, 4);
+ s2hi = extract32(s2.attrs, 4, 4);
/* Combine shareability attributes (table D4-43) */
if (s1.shareability == 2 || s2.shareability == 2) {
@@ -11723,6 +11894,11 @@ static ARMCacheAttrs combine_cacheattrs(ARMCacheAttrs s1, ARMCacheAttrs s2)
}
}
+ /* TODO: CombineS1S2Desc does not consider transient, only WB, RWA. */
+ if (tagged && ret.attrs == 0xff) {
+ ret.attrs = 0xf0;
+ }
+
return ret;
}
@@ -11785,28 +11961,32 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
ret = get_phys_addr_lpae(env, ipa, access_type, ARMMMUIdx_Stage2,
mmu_idx == ARMMMUIdx_E10_0,
phys_ptr, attrs, &s2_prot,
- page_size, fi,
- cacheattrs != NULL ? &cacheattrs2 : NULL);
+ page_size, fi, &cacheattrs2);
fi->s2addr = ipa;
/* Combine the S1 and S2 perms. */
*prot &= s2_prot;
- /* Combine the S1 and S2 cache attributes, if needed */
- if (!ret && cacheattrs != NULL) {
- if (env->cp15.hcr_el2 & HCR_DC) {
- /*
- * HCR.DC forces the first stage attributes to
- * Normal Non-Shareable,
- * Inner Write-Back Read-Allocate Write-Allocate,
- * Outer Write-Back Read-Allocate Write-Allocate.
- */
+ /* If S2 fails, return early. */
+ if (ret) {
+ return ret;
+ }
+
+ /* Combine the S1 and S2 cache attributes. */
+ if (env->cp15.hcr_el2 & HCR_DC) {
+ /*
+ * HCR.DC forces the first stage attributes to
+ * Normal Non-Shareable,
+ * Inner Write-Back Read-Allocate Write-Allocate,
+ * Outer Write-Back Read-Allocate Write-Allocate.
+ * Do not overwrite Tagged within attrs.
+ */
+ if (cacheattrs->attrs != 0xf0) {
cacheattrs->attrs = 0xff;
- cacheattrs->shareability = 0;
}
- *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2);
+ cacheattrs->shareability = 0;
}
-
- return ret;
+ *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2);
+ return 0;
} else {
/*
* For non-EL2 CPUs a stage1+stage2 translation is just stage 1.
@@ -11867,6 +12047,9 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
/* Definitely a real MMU, not an MPU */
if (regime_translation_disabled(env, mmu_idx)) {
+ uint64_t hcr;
+ uint8_t memattr;
+
/*
* MMU disabled. S1 addresses within aa64 translation regimes are
* still checked for bounds -- see AArch64.TranslateAddressS1Off.
@@ -11904,6 +12087,27 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
*phys_ptr = address;
*prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
*page_size = TARGET_PAGE_SIZE;
+
+ /* Fill in cacheattr a-la AArch64.TranslateAddressS1Off. */
+ hcr = arm_hcr_el2_eff(env);
+ cacheattrs->shareability = 0;
+ if (hcr & HCR_DC) {
+ if (hcr & HCR_DCT) {
+ memattr = 0xf0; /* Tagged, Normal, WB, RWA */
+ } else {
+ memattr = 0xff; /* Normal, WB, RWA */
+ }
+ } else if (access_type == MMU_INST_FETCH) {
+ if (regime_sctlr(env, mmu_idx) & SCTLR_I) {
+ memattr = 0xee; /* Normal, WT, RA, NT */
+ } else {
+ memattr = 0x44; /* Normal, NC, No */
+ }
+ cacheattrs->shareability = 2; /* outer sharable */
+ } else {
+ memattr = 0x00; /* Device, nGnRnE */
+ }
+ cacheattrs->attrs = memattr;
return 0;
}
@@ -11931,11 +12135,12 @@ hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr,
bool ret;
ARMMMUFaultInfo fi = {};
ARMMMUIdx mmu_idx = arm_mmu_idx(env);
+ ARMCacheAttrs cacheattrs = {};
*attrs = (MemTxAttrs) {};
ret = get_phys_addr(env, addr, 0, mmu_idx, &phys_addr,
- attrs, &prot, &page_size, &fi, NULL);
+ attrs, &prot, &page_size, &fi, &cacheattrs);
if (ret) {
return -1;
@@ -12565,6 +12770,36 @@ static uint32_t rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
}
}
+ if (cpu_isar_feature(aa64_mte, env_archcpu(env))) {
+ /*
+ * Set MTE_ACTIVE if any access may be Checked, and leave clear
+ * if all accesses must be Unchecked:
+ * 1) If no TBI, then there are no tags in the address to check,
+ * 2) If Tag Check Override, then all accesses are Unchecked,
+ * 3) If Tag Check Fail == 0, then Checked access have no effect,
+ * 4) If no Allocation Tag Access, then all accesses are Unchecked.
+ */
+ if (allocation_tag_access_enabled(env, el, sctlr)) {
+ flags = FIELD_DP32(flags, TBFLAG_A64, ATA, 1);
+ if (tbid
+ && !(env->pstate & PSTATE_TCO)
+ && (sctlr & (el == 0 ? SCTLR_TCF0 : SCTLR_TCF))) {
+ flags = FIELD_DP32(flags, TBFLAG_A64, MTE_ACTIVE, 1);
+ }
+ }
+ /* And again for unprivileged accesses, if required. */
+ if (FIELD_EX32(flags, TBFLAG_A64, UNPRIV)
+ && tbid
+ && !(env->pstate & PSTATE_TCO)
+ && (sctlr & SCTLR_TCF0)
+ && allocation_tag_access_enabled(env, 0, sctlr)) {
+ flags = FIELD_DP32(flags, TBFLAG_A64, MTE0_ACTIVE, 1);
+ }
+ /* Cache TCMA as well as TBI. */
+ flags = FIELD_DP32(flags, TBFLAG_A64, TCMA,
+ aa64_va_parameter_tcma(tcr, mmu_idx));
+ }
+
return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
}
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 2a20c81..759639a 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -96,6 +96,8 @@ DEF_HELPER_FLAGS_1(rebuild_hflags_a32_newel, TCG_CALL_NO_RWG, void, env)
DEF_HELPER_FLAGS_2(rebuild_hflags_a32, TCG_CALL_NO_RWG, void, env, int)
DEF_HELPER_FLAGS_2(rebuild_hflags_a64, TCG_CALL_NO_RWG, void, env, int)
+DEF_HELPER_FLAGS_5(probe_access, TCG_CALL_NO_WG, void, env, tl, i32, i32, i32)
+
DEF_HELPER_1(vfp_get_fpscr, i32, env)
DEF_HELPER_2(vfp_set_fpscr, void, env, i32)
diff --git a/target/arm/internals.h b/target/arm/internals.h
index 4bdbc3a..ae99725 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -913,6 +913,51 @@ static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx)
}
}
+/* Return the exception level which controls this address translation regime */
+static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx)
+{
+ switch (mmu_idx) {
+ case ARMMMUIdx_E20_0:
+ case ARMMMUIdx_E20_2:
+ case ARMMMUIdx_E20_2_PAN:
+ case ARMMMUIdx_Stage2:
+ case ARMMMUIdx_E2:
+ return 2;
+ case ARMMMUIdx_SE3:
+ return 3;
+ case ARMMMUIdx_SE10_0:
+ return arm_el_is_aa64(env, 3) ? 1 : 3;
+ case ARMMMUIdx_SE10_1:
+ case ARMMMUIdx_SE10_1_PAN:
+ case ARMMMUIdx_Stage1_E0:
+ case ARMMMUIdx_Stage1_E1:
+ case ARMMMUIdx_Stage1_E1_PAN:
+ case ARMMMUIdx_E10_0:
+ case ARMMMUIdx_E10_1:
+ case ARMMMUIdx_E10_1_PAN:
+ case ARMMMUIdx_MPrivNegPri:
+ case ARMMMUIdx_MUserNegPri:
+ case ARMMMUIdx_MPriv:
+ case ARMMMUIdx_MUser:
+ case ARMMMUIdx_MSPrivNegPri:
+ case ARMMMUIdx_MSUserNegPri:
+ case ARMMMUIdx_MSPriv:
+ case ARMMMUIdx_MSUser:
+ return 1;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+/* Return the TCR controlling this translation regime */
+static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx)
+{
+ if (mmu_idx == ARMMMUIdx_Stage2) {
+ return &env->cp15.vtcr_el2;
+ }
+ return &env->cp15.tcr_el[regime_el(env, mmu_idx)];
+}
+
/* Return the FSR value for a debug exception (watchpoint, hardware
* breakpoint or BKPT insn) targeting the specified exception level.
*/
@@ -1159,6 +1204,9 @@ static inline uint32_t aarch64_pstate_valid_mask(const ARMISARegisters *id)
if (isar_feature_aa64_uao(id)) {
valid |= PSTATE_UAO;
}
+ if (isar_feature_aa64_mte(id)) {
+ valid |= PSTATE_TCO;
+ }
return valid;
}
@@ -1195,6 +1243,24 @@ static inline int exception_target_el(CPUARMState *env)
return target_el;
}
+/* Determine if allocation tags are available. */
+static inline bool allocation_tag_access_enabled(CPUARMState *env, int el,
+ uint64_t sctlr)
+{
+ if (el < 3
+ && arm_feature(env, ARM_FEATURE_EL3)
+ && !(env->cp15.scr_el3 & SCR_ATA)) {
+ return false;
+ }
+ if (el < 2
+ && arm_feature(env, ARM_FEATURE_EL2)
+ && !(arm_hcr_el2_eff(env) & HCR_ATA)) {
+ return false;
+ }
+ sctlr &= (el == 0 ? SCTLR_ATA0 : SCTLR_ATA);
+ return sctlr != 0;
+}
+
#ifndef CONFIG_USER_ONLY
/* Security attributes for an address, as returned by v8m_security_lookup. */
@@ -1228,10 +1294,95 @@ bool get_phys_addr(CPUARMState *env, target_ulong address,
MMUAccessType access_type, ARMMMUIdx mmu_idx,
hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
target_ulong *page_size,
- ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs);
+ ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
+ __attribute__((nonnull));
void arm_log_exception(int idx);
#endif /* !CONFIG_USER_ONLY */
+/*
+ * The log2 of the words in the tag block, for GMID_EL1.BS.
+ * The is the maximum, 256 bytes, which manipulates 64-bits of tags.
+ */
+#define GMID_EL1_BS 6
+
+/* We associate one allocation tag per 16 bytes, the minimum. */
+#define LOG2_TAG_GRANULE 4
+#define TAG_GRANULE (1 << LOG2_TAG_GRANULE)
+
+/*
+ * The SVE simd_data field, for memory ops, contains either
+ * rd (5 bits) or a shift count (2 bits).
+ */
+#define SVE_MTEDESC_SHIFT 5
+
+/* Bits within a descriptor passed to the helper_mte_check* functions. */
+FIELD(MTEDESC, MIDX, 0, 4)
+FIELD(MTEDESC, TBI, 4, 2)
+FIELD(MTEDESC, TCMA, 6, 2)
+FIELD(MTEDESC, WRITE, 8, 1)
+FIELD(MTEDESC, ESIZE, 9, 5)
+FIELD(MTEDESC, TSIZE, 14, 10) /* mte_checkN only */
+
+bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr);
+uint64_t mte_check1(CPUARMState *env, uint32_t desc,
+ uint64_t ptr, uintptr_t ra);
+uint64_t mte_checkN(CPUARMState *env, uint32_t desc,
+ uint64_t ptr, uintptr_t ra);
+
+static inline int allocation_tag_from_addr(uint64_t ptr)
+{
+ return extract64(ptr, 56, 4);
+}
+
+static inline uint64_t address_with_allocation_tag(uint64_t ptr, int rtag)
+{
+ return deposit64(ptr, 56, 4, rtag);
+}
+
+/* Return true if tbi bits mean that the access is checked. */
+static inline bool tbi_check(uint32_t desc, int bit55)
+{
+ return (desc >> (R_MTEDESC_TBI_SHIFT + bit55)) & 1;
+}
+
+/* Return true if tcma bits mean that the access is unchecked. */
+static inline bool tcma_check(uint32_t desc, int bit55, int ptr_tag)
+{
+ /*
+ * We had extracted bit55 and ptr_tag for other reasons, so fold
+ * (ptr<59:55> == 00000 || ptr<59:55> == 11111) into a single test.
+ */
+ bool match = ((ptr_tag + bit55) & 0xf) == 0;
+ bool tcma = (desc >> (R_MTEDESC_TCMA_SHIFT + bit55)) & 1;
+ return tcma && match;
+}
+
+/*
+ * For TBI, ideally, we would do nothing. Proper behaviour on fault is
+ * for the tag to be present in the FAR_ELx register. But for user-only
+ * mode, we do not have a TLB with which to implement this, so we must
+ * remove the top byte.
+ */
+static inline uint64_t useronly_clean_ptr(uint64_t ptr)
+{
+ /* TBI is known to be enabled. */
+#ifdef CONFIG_USER_ONLY
+ ptr = sextract64(ptr, 0, 56);
+#endif
+ return ptr;
+}
+
+static inline uint64_t useronly_maybe_clean_ptr(uint32_t desc, uint64_t ptr)
+{
+#ifdef CONFIG_USER_ONLY
+ int64_t clean_ptr = sextract64(ptr, 0, 56);
+ if (tbi_check(desc, clean_ptr < 0)) {
+ ptr = clean_ptr;
+ }
+#endif
+ return ptr;
+}
+
#endif
diff --git a/target/arm/m_helper.c b/target/arm/m_helper.c
index 5e8a795..0364542 100644
--- a/target/arm/m_helper.c
+++ b/target/arm/m_helper.c
@@ -187,12 +187,13 @@ static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value,
hwaddr physaddr;
int prot;
ARMMMUFaultInfo fi = {};
+ ARMCacheAttrs cacheattrs = {};
bool secure = mmu_idx & ARM_MMU_IDX_M_S;
int exc;
bool exc_secure;
if (get_phys_addr(env, addr, MMU_DATA_STORE, mmu_idx, &physaddr,
- &attrs, &prot, &page_size, &fi, NULL)) {
+ &attrs, &prot, &page_size, &fi, &cacheattrs)) {
/* MPU/SAU lookup failed */
if (fi.type == ARMFault_QEMU_SFault) {
if (mode == STACK_LAZYFP) {
@@ -279,13 +280,14 @@ static bool v7m_stack_read(ARMCPU *cpu, uint32_t *dest, uint32_t addr,
hwaddr physaddr;
int prot;
ARMMMUFaultInfo fi = {};
+ ARMCacheAttrs cacheattrs = {};
bool secure = mmu_idx & ARM_MMU_IDX_M_S;
int exc;
bool exc_secure;
uint32_t value;
if (get_phys_addr(env, addr, MMU_DATA_LOAD, mmu_idx, &physaddr,
- &attrs, &prot, &page_size, &fi, NULL)) {
+ &attrs, &prot, &page_size, &fi, &cacheattrs)) {
/* MPU/SAU lookup failed */
if (fi.type == ARMFault_QEMU_SFault) {
qemu_log_mask(CPU_LOG_INT,
@@ -1928,6 +1930,7 @@ static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx,
V8M_SAttributes sattrs = {};
MemTxAttrs attrs = {};
ARMMMUFaultInfo fi = {};
+ ARMCacheAttrs cacheattrs = {};
MemTxResult txres;
target_ulong page_size;
hwaddr physaddr;
@@ -1945,8 +1948,8 @@ static bool v7m_read_half_insn(ARMCPU *cpu, ARMMMUIdx mmu_idx,
"...really SecureFault with SFSR.INVEP\n");
return false;
}
- if (get_phys_addr(env, addr, MMU_INST_FETCH, mmu_idx,
- &physaddr, &attrs, &prot, &page_size, &fi, NULL)) {
+ if (get_phys_addr(env, addr, MMU_INST_FETCH, mmu_idx, &physaddr,
+ &attrs, &prot, &page_size, &fi, &cacheattrs)) {
/* the MPU lookup failed */
env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_IACCVIOL_MASK;
armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM, env->v7m.secure);
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
new file mode 100644
index 0000000..5ea57d4
--- /dev/null
+++ b/target/arm/mte_helper.c
@@ -0,0 +1,906 @@
+/*
+ * ARM v8.5-MemTag Operations
+ *
+ * Copyright (c) 2020 Linaro, Ltd.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "internals.h"
+#include "exec/exec-all.h"
+#include "exec/ram_addr.h"
+#include "exec/cpu_ldst.h"
+#include "exec/helper-proto.h"
+
+
+static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude)
+{
+ if (exclude == 0xffff) {
+ return 0;
+ }
+ if (offset == 0) {
+ while (exclude & (1 << tag)) {
+ tag = (tag + 1) & 15;
+ }
+ } else {
+ do {
+ do {
+ tag = (tag + 1) & 15;
+ } while (exclude & (1 << tag));
+ } while (--offset > 0);
+ }
+ return tag;
+}
+
+/**
+ * allocation_tag_mem:
+ * @env: the cpu environment
+ * @ptr_mmu_idx: the addressing regime to use for the virtual address
+ * @ptr: the virtual address for which to look up tag memory
+ * @ptr_access: the access to use for the virtual address
+ * @ptr_size: the number of bytes in the normal memory access
+ * @tag_access: the access to use for the tag memory
+ * @tag_size: the number of bytes in the tag memory access
+ * @ra: the return address for exception handling
+ *
+ * Our tag memory is formatted as a sequence of little-endian nibbles.
+ * That is, the byte at (addr >> (LOG2_TAG_GRANULE + 1)) contains two
+ * tags, with the tag at [3:0] for the lower addr and the tag at [7:4]
+ * for the higher addr.
+ *
+ * Here, resolve the physical address from the virtual address, and return
+ * a pointer to the corresponding tag byte. Exit with exception if the
+ * virtual address is not accessible for @ptr_access.
+ *
+ * The @ptr_size and @tag_size values may not have an obvious relation
+ * due to the alignment of @ptr, and the number of tag checks required.
+ *
+ * If there is no tag storage corresponding to @ptr, return NULL.
+ */
+static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
+ uint64_t ptr, MMUAccessType ptr_access,
+ int ptr_size, MMUAccessType tag_access,
+ int tag_size, uintptr_t ra)
+{
+#ifdef CONFIG_USER_ONLY
+ /* Tag storage not implemented. */
+ return NULL;
+#else
+ uintptr_t index;
+ CPUIOTLBEntry *iotlbentry;
+ int in_page, flags;
+ ram_addr_t ptr_ra;
+ hwaddr ptr_paddr, tag_paddr, xlat;
+ MemoryRegion *mr;
+ ARMASIdx tag_asi;
+ AddressSpace *tag_as;
+ void *host;
+
+ /*
+ * Probe the first byte of the virtual address. This raises an
+ * exception for inaccessible pages, and resolves the virtual address
+ * into the softmmu tlb.
+ *
+ * When RA == 0, this is for mte_probe1. The page is expected to be
+ * valid. Indicate to probe_access_flags no-fault, then assert that
+ * we received a valid page.
+ */
+ flags = probe_access_flags(env, ptr, ptr_access, ptr_mmu_idx,
+ ra == 0, &host, ra);
+ assert(!(flags & TLB_INVALID_MASK));
+
+ /*
+ * Find the iotlbentry for ptr. This *must* be present in the TLB
+ * because we just found the mapping.
+ * TODO: Perhaps there should be a cputlb helper that returns a
+ * matching tlb entry + iotlb entry.
+ */
+ index = tlb_index(env, ptr_mmu_idx, ptr);
+# ifdef CONFIG_DEBUG_TCG
+ {
+ CPUTLBEntry *entry = tlb_entry(env, ptr_mmu_idx, ptr);
+ target_ulong comparator = (ptr_access == MMU_DATA_LOAD
+ ? entry->addr_read
+ : tlb_addr_write(entry));
+ g_assert(tlb_hit(comparator, ptr));
+ }
+# endif
+ iotlbentry = &env_tlb(env)->d[ptr_mmu_idx].iotlb[index];
+
+ /* If the virtual page MemAttr != Tagged, access unchecked. */
+ if (!arm_tlb_mte_tagged(&iotlbentry->attrs)) {
+ return NULL;
+ }
+
+ /*
+ * If not backed by host ram, there is no tag storage: access unchecked.
+ * This is probably a guest os bug though, so log it.
+ */
+ if (unlikely(flags & TLB_MMIO)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Page @ 0x%" PRIx64 " indicates Tagged Normal memory "
+ "but is not backed by host ram\n", ptr);
+ return NULL;
+ }
+
+ /*
+ * The Normal memory access can extend to the next page. E.g. a single
+ * 8-byte access to the last byte of a page will check only the last
+ * tag on the first page.
+ * Any page access exception has priority over tag check exception.
+ */
+ in_page = -(ptr | TARGET_PAGE_MASK);
+ if (unlikely(ptr_size > in_page)) {
+ void *ignore;
+ flags |= probe_access_flags(env, ptr + in_page, ptr_access,
+ ptr_mmu_idx, ra == 0, &ignore, ra);
+ assert(!(flags & TLB_INVALID_MASK));
+ }
+
+ /* Any debug exception has priority over a tag check exception. */
+ if (unlikely(flags & TLB_WATCHPOINT)) {
+ int wp = ptr_access == MMU_DATA_LOAD ? BP_MEM_READ : BP_MEM_WRITE;
+ assert(ra != 0);
+ cpu_check_watchpoint(env_cpu(env), ptr, ptr_size,
+ iotlbentry->attrs, wp, ra);
+ }
+
+ /*
+ * Find the physical address within the normal mem space.
+ * The memory region lookup must succeed because TLB_MMIO was
+ * not set in the cputlb lookup above.
+ */
+ mr = memory_region_from_host(host, &ptr_ra);
+ tcg_debug_assert(mr != NULL);
+ tcg_debug_assert(memory_region_is_ram(mr));
+ ptr_paddr = ptr_ra;
+ do {
+ ptr_paddr += mr->addr;
+ mr = mr->container;
+ } while (mr);
+
+ /* Convert to the physical address in tag space. */
+ tag_paddr = ptr_paddr >> (LOG2_TAG_GRANULE + 1);
+
+ /* Look up the address in tag space. */
+ tag_asi = iotlbentry->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS;
+ tag_as = cpu_get_address_space(env_cpu(env), tag_asi);
+ mr = address_space_translate(tag_as, tag_paddr, &xlat, NULL,
+ tag_access == MMU_DATA_STORE,
+ iotlbentry->attrs);
+
+ /*
+ * Note that @mr will never be NULL. If there is nothing in the address
+ * space at @tag_paddr, the translation will return the unallocated memory
+ * region. For our purposes, the result must be ram.
+ */
+ if (unlikely(!memory_region_is_ram(mr))) {
+ /* ??? Failure is a board configuration error. */
+ qemu_log_mask(LOG_UNIMP,
+ "Tag Memory @ 0x%" HWADDR_PRIx " not found for "
+ "Normal Memory @ 0x%" HWADDR_PRIx "\n",
+ tag_paddr, ptr_paddr);
+ return NULL;
+ }
+
+ /*
+ * Ensure the tag memory is dirty on write, for migration.
+ * Tag memory can never contain code or display memory (vga).
+ */
+ if (tag_access == MMU_DATA_STORE) {
+ ram_addr_t tag_ra = memory_region_get_ram_addr(mr) + xlat;
+ cpu_physical_memory_set_dirty_flag(tag_ra, DIRTY_MEMORY_MIGRATION);
+ }
+
+ return memory_region_get_ram_ptr(mr) + xlat;
+#endif
+}
+
+uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm)
+{
+ int rtag;
+
+ /*
+ * Our IMPDEF choice for GCR_EL1.RRND==1 is to behave as if
+ * GCR_EL1.RRND==0, always producing deterministic results.
+ */
+ uint16_t exclude = extract32(rm | env->cp15.gcr_el1, 0, 16);
+ int start = extract32(env->cp15.rgsr_el1, 0, 4);
+ int seed = extract32(env->cp15.rgsr_el1, 8, 16);
+ int offset, i;
+
+ /* RandomTag */
+ for (i = offset = 0; i < 4; ++i) {
+ /* NextRandomTagBit */
+ int top = (extract32(seed, 5, 1) ^ extract32(seed, 3, 1) ^
+ extract32(seed, 2, 1) ^ extract32(seed, 0, 1));
+ seed = (top << 15) | (seed >> 1);
+ offset |= top << i;
+ }
+ rtag = choose_nonexcluded_tag(start, offset, exclude);
+ env->cp15.rgsr_el1 = rtag | (seed << 8);
+
+ return address_with_allocation_tag(rn, rtag);
+}
+
+uint64_t HELPER(addsubg)(CPUARMState *env, uint64_t ptr,
+ int32_t offset, uint32_t tag_offset)
+{
+ int start_tag = allocation_tag_from_addr(ptr);
+ uint16_t exclude = extract32(env->cp15.gcr_el1, 0, 16);
+ int rtag = choose_nonexcluded_tag(start_tag, tag_offset, exclude);
+
+ return address_with_allocation_tag(ptr + offset, rtag);
+}
+
+static int load_tag1(uint64_t ptr, uint8_t *mem)
+{
+ int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
+ return extract32(*mem, ofs, 4);
+}
+
+uint64_t HELPER(ldg)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+ int mmu_idx = cpu_mmu_index(env, false);
+ uint8_t *mem;
+ int rtag = 0;
+
+ /* Trap if accessing an invalid page. */
+ mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, 1,
+ MMU_DATA_LOAD, 1, GETPC());
+
+ /* Load if page supports tags. */
+ if (mem) {
+ rtag = load_tag1(ptr, mem);
+ }
+
+ return address_with_allocation_tag(xt, rtag);
+}
+
+static void check_tag_aligned(CPUARMState *env, uint64_t ptr, uintptr_t ra)
+{
+ if (unlikely(!QEMU_IS_ALIGNED(ptr, TAG_GRANULE))) {
+ arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE,
+ cpu_mmu_index(env, false), ra);
+ g_assert_not_reached();
+ }
+}
+
+/* For use in a non-parallel context, store to the given nibble. */
+static void store_tag1(uint64_t ptr, uint8_t *mem, int tag)
+{
+ int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
+ *mem = deposit32(*mem, ofs, 4, tag);
+}
+
+/* For use in a parallel context, atomically store to the given nibble. */
+static void store_tag1_parallel(uint64_t ptr, uint8_t *mem, int tag)
+{
+ int ofs = extract32(ptr, LOG2_TAG_GRANULE, 1) * 4;
+ uint8_t old = atomic_read(mem);
+
+ while (1) {
+ uint8_t new = deposit32(old, ofs, 4, tag);
+ uint8_t cmp = atomic_cmpxchg(mem, old, new);
+ if (likely(cmp == old)) {
+ return;
+ }
+ old = cmp;
+ }
+}
+
+typedef void stg_store1(uint64_t, uint8_t *, int);
+
+static inline void do_stg(CPUARMState *env, uint64_t ptr, uint64_t xt,
+ uintptr_t ra, stg_store1 store1)
+{
+ int mmu_idx = cpu_mmu_index(env, false);
+ uint8_t *mem;
+
+ check_tag_aligned(env, ptr, ra);
+
+ /* Trap if accessing an invalid page. */
+ mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, TAG_GRANULE,
+ MMU_DATA_STORE, 1, ra);
+
+ /* Store if page supports tags. */
+ if (mem) {
+ store1(ptr, mem, allocation_tag_from_addr(xt));
+ }
+}
+
+void HELPER(stg)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+ do_stg(env, ptr, xt, GETPC(), store_tag1);
+}
+
+void HELPER(stg_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+ do_stg(env, ptr, xt, GETPC(), store_tag1_parallel);
+}
+
+void HELPER(stg_stub)(CPUARMState *env, uint64_t ptr)
+{
+ int mmu_idx = cpu_mmu_index(env, false);
+ uintptr_t ra = GETPC();
+
+ check_tag_aligned(env, ptr, ra);
+ probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra);
+}
+
+static inline void do_st2g(CPUARMState *env, uint64_t ptr, uint64_t xt,
+ uintptr_t ra, stg_store1 store1)
+{
+ int mmu_idx = cpu_mmu_index(env, false);
+ int tag = allocation_tag_from_addr(xt);
+ uint8_t *mem1, *mem2;
+
+ check_tag_aligned(env, ptr, ra);
+
+ /*
+ * Trap if accessing an invalid page(s).
+ * This takes priority over !allocation_tag_access_enabled.
+ */
+ if (ptr & TAG_GRANULE) {
+ /* Two stores unaligned mod TAG_GRANULE*2 -- modify two bytes. */
+ mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE,
+ TAG_GRANULE, MMU_DATA_STORE, 1, ra);
+ mem2 = allocation_tag_mem(env, mmu_idx, ptr + TAG_GRANULE,
+ MMU_DATA_STORE, TAG_GRANULE,
+ MMU_DATA_STORE, 1, ra);
+
+ /* Store if page(s) support tags. */
+ if (mem1) {
+ store1(TAG_GRANULE, mem1, tag);
+ }
+ if (mem2) {
+ store1(0, mem2, tag);
+ }
+ } else {
+ /* Two stores aligned mod TAG_GRANULE*2 -- modify one byte. */
+ mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE,
+ 2 * TAG_GRANULE, MMU_DATA_STORE, 1, ra);
+ if (mem1) {
+ tag |= tag << 4;
+ atomic_set(mem1, tag);
+ }
+ }
+}
+
+void HELPER(st2g)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+ do_st2g(env, ptr, xt, GETPC(), store_tag1);
+}
+
+void HELPER(st2g_parallel)(CPUARMState *env, uint64_t ptr, uint64_t xt)
+{
+ do_st2g(env, ptr, xt, GETPC(), store_tag1_parallel);
+}
+
+void HELPER(st2g_stub)(CPUARMState *env, uint64_t ptr)
+{
+ int mmu_idx = cpu_mmu_index(env, false);
+ uintptr_t ra = GETPC();
+ int in_page = -(ptr | TARGET_PAGE_MASK);
+
+ check_tag_aligned(env, ptr, ra);
+
+ if (likely(in_page >= 2 * TAG_GRANULE)) {
+ probe_write(env, ptr, 2 * TAG_GRANULE, mmu_idx, ra);
+ } else {
+ probe_write(env, ptr, TAG_GRANULE, mmu_idx, ra);
+ probe_write(env, ptr + TAG_GRANULE, TAG_GRANULE, mmu_idx, ra);
+ }
+}
+
+#define LDGM_STGM_SIZE (4 << GMID_EL1_BS)
+
+uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr)
+{
+ int mmu_idx = cpu_mmu_index(env, false);
+ uintptr_t ra = GETPC();
+ void *tag_mem;
+
+ ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE);
+
+ /* Trap if accessing an invalid page. */
+ tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD,
+ LDGM_STGM_SIZE, MMU_DATA_LOAD,
+ LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra);
+
+ /* The tag is squashed to zero if the page does not support tags. */
+ if (!tag_mem) {
+ return 0;
+ }
+
+ QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6);
+ /*
+ * We are loading 64-bits worth of tags. The ordering of elements
+ * within the word corresponds to a 64-bit little-endian operation.
+ */
+ return ldq_le_p(tag_mem);
+}
+
+void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val)
+{
+ int mmu_idx = cpu_mmu_index(env, false);
+ uintptr_t ra = GETPC();
+ void *tag_mem;
+
+ ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE);
+
+ /* Trap if accessing an invalid page. */
+ tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE,
+ LDGM_STGM_SIZE, MMU_DATA_LOAD,
+ LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra);
+
+ /*
+ * Tag store only happens if the page support tags,
+ * and if the OS has enabled access to the tags.
+ */
+ if (!tag_mem) {
+ return;
+ }
+
+ QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6);
+ /*
+ * We are storing 64-bits worth of tags. The ordering of elements
+ * within the word corresponds to a 64-bit little-endian operation.
+ */
+ stq_le_p(tag_mem, val);
+}
+
+void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val)
+{
+ uintptr_t ra = GETPC();
+ int mmu_idx = cpu_mmu_index(env, false);
+ int log2_dcz_bytes, log2_tag_bytes;
+ intptr_t dcz_bytes, tag_bytes;
+ uint8_t *mem;
+
+ /*
+ * In arm_cpu_realizefn, we assert that dcz > LOG2_TAG_GRANULE+1,
+ * i.e. 32 bytes, which is an unreasonably small dcz anyway,
+ * to make sure that we can access one complete tag byte here.
+ */
+ log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2;
+ log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1);
+ dcz_bytes = (intptr_t)1 << log2_dcz_bytes;
+ tag_bytes = (intptr_t)1 << log2_tag_bytes;
+ ptr &= -dcz_bytes;
+
+ mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, dcz_bytes,
+ MMU_DATA_STORE, tag_bytes, ra);
+ if (mem) {
+ int tag_pair = (val & 0xf) * 0x11;
+ memset(mem, tag_pair, tag_bytes);
+ }
+}
+
+/* Record a tag check failure. */
+static void mte_check_fail(CPUARMState *env, int mmu_idx,
+ uint64_t dirty_ptr, uintptr_t ra)
+{
+ ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx);
+ int el, reg_el, tcf, select;
+ uint64_t sctlr;
+
+ reg_el = regime_el(env, arm_mmu_idx);
+ sctlr = env->cp15.sctlr_el[reg_el];
+
+ switch (arm_mmu_idx) {
+ case ARMMMUIdx_E10_0:
+ case ARMMMUIdx_E20_0:
+ el = 0;
+ tcf = extract64(sctlr, 38, 2);
+ break;
+ default:
+ el = reg_el;
+ tcf = extract64(sctlr, 40, 2);
+ }
+
+ switch (tcf) {
+ case 1:
+ /*
+ * Tag check fail causes a synchronous exception.
+ *
+ * In restore_state_to_opc, we set the exception syndrome
+ * for the load or store operation. Unwind first so we
+ * may overwrite that with the syndrome for the tag check.
+ */
+ cpu_restore_state(env_cpu(env), ra, true);
+ env->exception.vaddress = dirty_ptr;
+ raise_exception(env, EXCP_DATA_ABORT,
+ syn_data_abort_no_iss(el != 0, 0, 0, 0, 0, 0, 0x11),
+ exception_target_el(env));
+ /* noreturn, but fall through to the assert anyway */
+
+ case 0:
+ /*
+ * Tag check fail does not affect the PE.
+ * We eliminate this case by not setting MTE_ACTIVE
+ * in tb_flags, so that we never make this runtime call.
+ */
+ g_assert_not_reached();
+
+ case 2:
+ /* Tag check fail causes asynchronous flag set. */
+ mmu_idx = arm_mmu_idx_el(env, el);
+ if (regime_has_2_ranges(mmu_idx)) {
+ select = extract64(dirty_ptr, 55, 1);
+ } else {
+ select = 0;
+ }
+ env->cp15.tfsr_el[el] |= 1 << select;
+ break;
+
+ default:
+ /* Case 3: Reserved. */
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Tag check failure with SCTLR_EL%d.TCF%s "
+ "set to reserved value %d\n",
+ reg_el, el ? "" : "0", tcf);
+ break;
+ }
+}
+
+/*
+ * Perform an MTE checked access for a single logical or atomic access.
+ */
+static bool mte_probe1_int(CPUARMState *env, uint32_t desc, uint64_t ptr,
+ uintptr_t ra, int bit55)
+{
+ int mem_tag, mmu_idx, ptr_tag, size;
+ MMUAccessType type;
+ uint8_t *mem;
+
+ ptr_tag = allocation_tag_from_addr(ptr);
+
+ if (tcma_check(desc, bit55, ptr_tag)) {
+ return true;
+ }
+
+ mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
+ type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD;
+ size = FIELD_EX32(desc, MTEDESC, ESIZE);
+
+ mem = allocation_tag_mem(env, mmu_idx, ptr, type, size,
+ MMU_DATA_LOAD, 1, ra);
+ if (!mem) {
+ return true;
+ }
+
+ mem_tag = load_tag1(ptr, mem);
+ return ptr_tag == mem_tag;
+}
+
+/*
+ * No-fault version of mte_check1, to be used by SVE for MemSingleNF.
+ * Returns false if the access is Checked and the check failed. This
+ * is only intended to probe the tag -- the validity of the page must
+ * be checked beforehand.
+ */
+bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr)
+{
+ int bit55 = extract64(ptr, 55, 1);
+
+ /* If TBI is disabled, the access is unchecked. */
+ if (unlikely(!tbi_check(desc, bit55))) {
+ return true;
+ }
+
+ return mte_probe1_int(env, desc, ptr, 0, bit55);
+}
+
+uint64_t mte_check1(CPUARMState *env, uint32_t desc,
+ uint64_t ptr, uintptr_t ra)
+{
+ int bit55 = extract64(ptr, 55, 1);
+
+ /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */
+ if (unlikely(!tbi_check(desc, bit55))) {
+ return ptr;
+ }
+
+ if (unlikely(!mte_probe1_int(env, desc, ptr, ra, bit55))) {
+ int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
+ mte_check_fail(env, mmu_idx, ptr, ra);
+ }
+
+ return useronly_clean_ptr(ptr);
+}
+
+uint64_t HELPER(mte_check1)(CPUARMState *env, uint32_t desc, uint64_t ptr)
+{
+ return mte_check1(env, desc, ptr, GETPC());
+}
+
+/*
+ * Perform an MTE checked access for multiple logical accesses.
+ */
+
+/**
+ * checkN:
+ * @tag: tag memory to test
+ * @odd: true to begin testing at tags at odd nibble
+ * @cmp: the tag to compare against
+ * @count: number of tags to test
+ *
+ * Return the number of successful tests.
+ * Thus a return value < @count indicates a failure.
+ *
+ * A note about sizes: count is expected to be small.
+ *
+ * The most common use will be LDP/STP of two integer registers,
+ * which means 16 bytes of memory touching at most 2 tags, but
+ * often the access is aligned and thus just 1 tag.
+ *
+ * Using AdvSIMD LD/ST (multiple), one can access 64 bytes of memory,
+ * touching at most 5 tags. SVE LDR/STR (vector) with the default
+ * vector length is also 64 bytes; the maximum architectural length
+ * is 256 bytes touching at most 9 tags.
+ *
+ * The loop below uses 7 logical operations and 1 memory operation
+ * per tag pair. An implementation that loads an aligned word and
+ * uses masking to ignore adjacent tags requires 18 logical operations
+ * and thus does not begin to pay off until 6 tags.
+ * Which, according to the survey above, is unlikely to be common.
+ */
+static int checkN(uint8_t *mem, int odd, int cmp, int count)
+{
+ int n = 0, diff;
+
+ /* Replicate the test tag and compare. */
+ cmp *= 0x11;
+ diff = *mem++ ^ cmp;
+
+ if (odd) {
+ goto start_odd;
+ }
+
+ while (1) {
+ /* Test even tag. */
+ if (unlikely((diff) & 0x0f)) {
+ break;
+ }
+ if (++n == count) {
+ break;
+ }
+
+ start_odd:
+ /* Test odd tag. */
+ if (unlikely((diff) & 0xf0)) {
+ break;
+ }
+ if (++n == count) {
+ break;
+ }
+
+ diff = *mem++ ^ cmp;
+ }
+ return n;
+}
+
+uint64_t mte_checkN(CPUARMState *env, uint32_t desc,
+ uint64_t ptr, uintptr_t ra)
+{
+ int mmu_idx, ptr_tag, bit55;
+ uint64_t ptr_last, ptr_end, prev_page, next_page;
+ uint64_t tag_first, tag_end;
+ uint64_t tag_byte_first, tag_byte_end;
+ uint32_t esize, total, tag_count, tag_size, n, c;
+ uint8_t *mem1, *mem2;
+ MMUAccessType type;
+
+ bit55 = extract64(ptr, 55, 1);
+
+ /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */
+ if (unlikely(!tbi_check(desc, bit55))) {
+ return ptr;
+ }
+
+ ptr_tag = allocation_tag_from_addr(ptr);
+
+ if (tcma_check(desc, bit55, ptr_tag)) {
+ goto done;
+ }
+
+ mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
+ type = FIELD_EX32(desc, MTEDESC, WRITE) ? MMU_DATA_STORE : MMU_DATA_LOAD;
+ esize = FIELD_EX32(desc, MTEDESC, ESIZE);
+ total = FIELD_EX32(desc, MTEDESC, TSIZE);
+
+ /* Find the addr of the end of the access, and of the last element. */
+ ptr_end = ptr + total;
+ ptr_last = ptr_end - esize;
+
+ /* Round the bounds to the tag granule, and compute the number of tags. */
+ tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE);
+ tag_end = QEMU_ALIGN_UP(ptr_last, TAG_GRANULE);
+ tag_count = (tag_end - tag_first) / TAG_GRANULE;
+
+ /* Round the bounds to twice the tag granule, and compute the bytes. */
+ tag_byte_first = QEMU_ALIGN_DOWN(ptr, 2 * TAG_GRANULE);
+ tag_byte_end = QEMU_ALIGN_UP(ptr_last, 2 * TAG_GRANULE);
+
+ /* Locate the page boundaries. */
+ prev_page = ptr & TARGET_PAGE_MASK;
+ next_page = prev_page + TARGET_PAGE_SIZE;
+
+ if (likely(tag_end - prev_page <= TARGET_PAGE_SIZE)) {
+ /* Memory access stays on one page. */
+ tag_size = (tag_byte_end - tag_byte_first) / (2 * TAG_GRANULE);
+ mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, total,
+ MMU_DATA_LOAD, tag_size, ra);
+ if (!mem1) {
+ goto done;
+ }
+ /* Perform all of the comparisons. */
+ n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, tag_count);
+ } else {
+ /* Memory access crosses to next page. */
+ tag_size = (next_page - tag_byte_first) / (2 * TAG_GRANULE);
+ mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr,
+ MMU_DATA_LOAD, tag_size, ra);
+
+ tag_size = (tag_byte_end - next_page) / (2 * TAG_GRANULE);
+ mem2 = allocation_tag_mem(env, mmu_idx, next_page, type,
+ ptr_end - next_page,
+ MMU_DATA_LOAD, tag_size, ra);
+
+ /*
+ * Perform all of the comparisons.
+ * Note the possible but unlikely case of the operation spanning
+ * two pages that do not both have tagging enabled.
+ */
+ n = c = (next_page - tag_first) / TAG_GRANULE;
+ if (mem1) {
+ n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, c);
+ }
+ if (n == c) {
+ if (!mem2) {
+ goto done;
+ }
+ n += checkN(mem2, 0, ptr_tag, tag_count - c);
+ }
+ }
+
+ /*
+ * If we failed, we know which granule. Compute the element that
+ * is first in that granule, and signal failure on that element.
+ */
+ if (unlikely(n < tag_count)) {
+ uint64_t fail_ofs;
+
+ fail_ofs = tag_first + n * TAG_GRANULE - ptr;
+ fail_ofs = ROUND_UP(fail_ofs, esize);
+ mte_check_fail(env, mmu_idx, ptr + fail_ofs, ra);
+ }
+
+ done:
+ return useronly_clean_ptr(ptr);
+}
+
+uint64_t HELPER(mte_checkN)(CPUARMState *env, uint32_t desc, uint64_t ptr)
+{
+ return mte_checkN(env, desc, ptr, GETPC());
+}
+
+/*
+ * Perform an MTE checked access for DC_ZVA.
+ */
+uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr)
+{
+ uintptr_t ra = GETPC();
+ int log2_dcz_bytes, log2_tag_bytes;
+ int mmu_idx, bit55;
+ intptr_t dcz_bytes, tag_bytes, i;
+ void *mem;
+ uint64_t ptr_tag, mem_tag, align_ptr;
+
+ bit55 = extract64(ptr, 55, 1);
+
+ /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */
+ if (unlikely(!tbi_check(desc, bit55))) {
+ return ptr;
+ }
+
+ ptr_tag = allocation_tag_from_addr(ptr);
+
+ if (tcma_check(desc, bit55, ptr_tag)) {
+ goto done;
+ }
+
+ /*
+ * In arm_cpu_realizefn, we asserted that dcz > LOG2_TAG_GRANULE+1,
+ * i.e. 32 bytes, which is an unreasonably small dcz anyway, to make
+ * sure that we can access one complete tag byte here.
+ */
+ log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2;
+ log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1);
+ dcz_bytes = (intptr_t)1 << log2_dcz_bytes;
+ tag_bytes = (intptr_t)1 << log2_tag_bytes;
+ align_ptr = ptr & -dcz_bytes;
+
+ /*
+ * Trap if accessing an invalid page. DC_ZVA requires that we supply
+ * the original pointer for an invalid page. But watchpoints require
+ * that we probe the actual space. So do both.
+ */
+ mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX);
+ (void) probe_write(env, ptr, 1, mmu_idx, ra);
+ mem = allocation_tag_mem(env, mmu_idx, align_ptr, MMU_DATA_STORE,
+ dcz_bytes, MMU_DATA_LOAD, tag_bytes, ra);
+ if (!mem) {
+ goto done;
+ }
+
+ /*
+ * Unlike the reasoning for checkN, DC_ZVA is always aligned, and thus
+ * it is quite easy to perform all of the comparisons at once without
+ * any extra masking.
+ *
+ * The most common zva block size is 64; some of the thunderx cpus use
+ * a block size of 128. For user-only, aarch64_max_initfn will set the
+ * block size to 512. Fill out the other cases for future-proofing.
+ *
+ * In order to be able to find the first miscompare later, we want the
+ * tag bytes to be in little-endian order.
+ */
+ switch (log2_tag_bytes) {
+ case 0: /* zva_blocksize 32 */
+ mem_tag = *(uint8_t *)mem;
+ ptr_tag *= 0x11u;
+ break;
+ case 1: /* zva_blocksize 64 */
+ mem_tag = cpu_to_le16(*(uint16_t *)mem);
+ ptr_tag *= 0x1111u;
+ break;
+ case 2: /* zva_blocksize 128 */
+ mem_tag = cpu_to_le32(*(uint32_t *)mem);
+ ptr_tag *= 0x11111111u;
+ break;
+ case 3: /* zva_blocksize 256 */
+ mem_tag = cpu_to_le64(*(uint64_t *)mem);
+ ptr_tag *= 0x1111111111111111ull;
+ break;
+
+ default: /* zva_blocksize 512, 1024, 2048 */
+ ptr_tag *= 0x1111111111111111ull;
+ i = 0;
+ do {
+ mem_tag = cpu_to_le64(*(uint64_t *)(mem + i));
+ if (unlikely(mem_tag != ptr_tag)) {
+ goto fail;
+ }
+ i += 8;
+ align_ptr += 16 * TAG_GRANULE;
+ } while (i < tag_bytes);
+ goto done;
+ }
+
+ if (likely(mem_tag == ptr_tag)) {
+ goto done;
+ }
+
+ fail:
+ /* Locate the first nibble that differs. */
+ i = ctz64(mem_tag ^ ptr_tag) >> 4;
+ mte_check_fail(env, mmu_idx, align_ptr + i * TAG_GRANULE, ra);
+
+ done:
+ return useronly_clean_ptr(ptr);
+}
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index eb0de08..b106521 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -935,3 +935,19 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i)
return ((uint32_t)x >> shift) | (x << (32 - shift));
}
}
+
+void HELPER(probe_access)(CPUARMState *env, target_ulong ptr,
+ uint32_t access_type, uint32_t mmu_idx,
+ uint32_t size)
+{
+ uint32_t in_page = -((uint32_t)ptr | TARGET_PAGE_SIZE);
+ uintptr_t ra = GETPC();
+
+ if (likely(size <= in_page)) {
+ probe_access(env, ptr, size, access_type, mmu_idx, ra);
+ } else {
+ probe_access(env, ptr, in_page, access_type, mmu_idx, ra);
+ probe_access(env, ptr + in_page, size - in_page,
+ access_type, mmu_idx, ra);
+ }
+}
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index e590db6..382fa82 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -3966,14 +3966,16 @@ static void sve_##NAME##_host(void *vd, intptr_t reg_off, void *host) \
static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \
target_ulong addr, uintptr_t ra) \
{ \
- *(TYPEE *)(vd + H(reg_off)) = (TYPEM)TLB(env, addr, ra); \
+ *(TYPEE *)(vd + H(reg_off)) = \
+ (TYPEM)TLB(env, useronly_clean_ptr(addr), ra); \
}
#define DO_ST_TLB(NAME, H, TYPEE, TYPEM, TLB) \
static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \
target_ulong addr, uintptr_t ra) \
{ \
- TLB(env, addr, (TYPEM)*(TYPEE *)(vd + H(reg_off)), ra); \
+ TLB(env, useronly_clean_ptr(addr), \
+ (TYPEM)*(TYPEE *)(vd + H(reg_off)), ra); \
}
#define DO_LD_PRIM_1(NAME, H, TE, TM) \
@@ -4091,6 +4093,19 @@ static bool sve_probe_page(SVEHostPage *info, bool nofault,
int flags;
addr += mem_off;
+
+ /*
+ * User-only currently always issues with TBI. See the comment
+ * above useronly_clean_ptr. Usually we clean this top byte away
+ * during translation, but we can't do that for e.g. vector + imm
+ * addressing modes.
+ *
+ * We currently always enable TBI for user-only, and do not provide
+ * a way to turn it off. So clean the pointer unconditionally here,
+ * rather than look it up here, or pass it down from above.
+ */
+ addr = useronly_clean_ptr(addr);
+
flags = probe_access_flags(env, addr, access_type, mmu_idx, nofault,
&info->host, retaddr);
info->flags = flags;
@@ -4393,15 +4408,89 @@ static void sve_cont_ldst_watchpoints(SVEContLdSt *info, CPUARMState *env,
#endif
}
+typedef uint64_t mte_check_fn(CPUARMState *, uint32_t, uint64_t, uintptr_t);
+
+static inline QEMU_ALWAYS_INLINE
+void sve_cont_ldst_mte_check_int(SVEContLdSt *info, CPUARMState *env,
+ uint64_t *vg, target_ulong addr, int esize,
+ int msize, uint32_t mtedesc, uintptr_t ra,
+ mte_check_fn *check)
+{
+ intptr_t mem_off, reg_off, reg_last;
+
+ /* Process the page only if MemAttr == Tagged. */
+ if (arm_tlb_mte_tagged(&info->page[0].attrs)) {
+ mem_off = info->mem_off_first[0];
+ reg_off = info->reg_off_first[0];
+ reg_last = info->reg_off_split;
+ if (reg_last < 0) {
+ reg_last = info->reg_off_last[0];
+ }
+
+ do {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ check(env, mtedesc, addr, ra);
+ }
+ reg_off += esize;
+ mem_off += msize;
+ } while (reg_off <= reg_last && (reg_off & 63));
+ } while (reg_off <= reg_last);
+ }
+
+ mem_off = info->mem_off_first[1];
+ if (mem_off >= 0 && arm_tlb_mte_tagged(&info->page[1].attrs)) {
+ reg_off = info->reg_off_first[1];
+ reg_last = info->reg_off_last[1];
+
+ do {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ check(env, mtedesc, addr, ra);
+ }
+ reg_off += esize;
+ mem_off += msize;
+ } while (reg_off & 63);
+ } while (reg_off <= reg_last);
+ }
+}
+
+typedef void sve_cont_ldst_mte_check_fn(SVEContLdSt *info, CPUARMState *env,
+ uint64_t *vg, target_ulong addr,
+ int esize, int msize, uint32_t mtedesc,
+ uintptr_t ra);
+
+static void sve_cont_ldst_mte_check1(SVEContLdSt *info, CPUARMState *env,
+ uint64_t *vg, target_ulong addr,
+ int esize, int msize, uint32_t mtedesc,
+ uintptr_t ra)
+{
+ sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize,
+ mtedesc, ra, mte_check1);
+}
+
+static void sve_cont_ldst_mte_checkN(SVEContLdSt *info, CPUARMState *env,
+ uint64_t *vg, target_ulong addr,
+ int esize, int msize, uint32_t mtedesc,
+ uintptr_t ra)
+{
+ sve_cont_ldst_mte_check_int(info, env, vg, addr, esize, msize,
+ mtedesc, ra, mte_checkN);
+}
+
+
/*
* Common helper for all contiguous 1,2,3,4-register predicated stores.
*/
static inline QEMU_ALWAYS_INLINE
void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
uint32_t desc, const uintptr_t retaddr,
- const int esz, const int msz, const int N,
+ const int esz, const int msz, const int N, uint32_t mtedesc,
sve_ldst1_host_fn *host_fn,
- sve_ldst1_tlb_fn *tlb_fn)
+ sve_ldst1_tlb_fn *tlb_fn,
+ sve_cont_ldst_mte_check_fn *mte_check_fn)
{
const unsigned rd = simd_data(desc);
const intptr_t reg_max = simd_oprsz(desc);
@@ -4426,7 +4515,14 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
sve_cont_ldst_watchpoints(&info, env, vg, addr, 1 << esz, N << msz,
BP_MEM_READ, retaddr);
- /* TODO: MTE check. */
+ /*
+ * Handle mte checks for all active elements.
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
+ */
+ if (mte_check_fn && mtedesc) {
+ mte_check_fn(&info, env, vg, addr, 1 << esz, N << msz,
+ mtedesc, retaddr);
+ }
flags = info.page[0].flags | info.page[1].flags;
if (unlikely(flags != 0)) {
@@ -4532,26 +4628,67 @@ void sve_ldN_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
}
}
-#define DO_LD1_1(NAME, ESZ) \
-void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, \
- sve_##NAME##_host, sve_##NAME##_tlb); \
+static inline QEMU_ALWAYS_INLINE
+void sve_ldN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr,
+ uint32_t desc, const uintptr_t ra,
+ const int esz, const int msz, const int N,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+ int bit55 = extract64(addr, 55, 1);
+
+ /* Remove mtedesc from the normal sve descriptor. */
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+
+ /* Perform gross MTE suppression early. */
+ if (!tbi_check(desc, bit55) ||
+ tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
+ mtedesc = 0;
+ }
+
+ sve_ldN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn,
+ N == 1 ? sve_cont_ldst_mte_check1 : sve_cont_ldst_mte_checkN);
}
-#define DO_LD1_2(NAME, ESZ, MSZ) \
-void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \
- sve_##NAME##_le_host, sve_##NAME##_le_tlb); \
-} \
-void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \
- sve_##NAME##_be_host, sve_##NAME##_be_tlb); \
+#define DO_LD1_1(NAME, ESZ) \
+void HELPER(sve_##NAME##_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, 0, \
+ sve_##NAME##_host, sve_##NAME##_tlb, NULL); \
+} \
+void HELPER(sve_##NAME##_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, 1, \
+ sve_##NAME##_host, sve_##NAME##_tlb); \
+}
+
+#define DO_LD1_2(NAME, ESZ, MSZ) \
+void HELPER(sve_##NAME##_le_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \
+ sve_##NAME##_le_host, sve_##NAME##_le_tlb, NULL); \
+} \
+void HELPER(sve_##NAME##_be_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, 0, \
+ sve_##NAME##_be_host, sve_##NAME##_be_tlb, NULL); \
+} \
+void HELPER(sve_##NAME##_le_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \
+ sve_##NAME##_le_host, sve_##NAME##_le_tlb); \
+} \
+void HELPER(sve_##NAME##_be_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, 1, \
+ sve_##NAME##_be_host, sve_##NAME##_be_tlb); \
}
DO_LD1_1(ld1bb, MO_8)
@@ -4577,26 +4714,44 @@ DO_LD1_2(ld1dd, MO_64, MO_64)
#undef DO_LD1_1
#undef DO_LD1_2
-#define DO_LDN_1(N) \
-void HELPER(sve_ld##N##bb_r)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- sve_ldN_r(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, \
- sve_ld1bb_host, sve_ld1bb_tlb); \
+#define DO_LDN_1(N) \
+void HELPER(sve_ld##N##bb_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, 0, \
+ sve_ld1bb_host, sve_ld1bb_tlb, NULL); \
+} \
+void HELPER(sve_ld##N##bb_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), MO_8, MO_8, N, \
+ sve_ld1bb_host, sve_ld1bb_tlb); \
}
-#define DO_LDN_2(N, SUFF, ESZ) \
-void HELPER(sve_ld##N##SUFF##_le_r)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \
- sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb); \
-} \
-void HELPER(sve_ld##N##SUFF##_be_r)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \
- sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb); \
+#define DO_LDN_2(N, SUFF, ESZ) \
+void HELPER(sve_ld##N##SUFF##_le_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \
+ sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb, NULL); \
+} \
+void HELPER(sve_ld##N##SUFF##_be_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, 0, \
+ sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb, NULL); \
+} \
+void HELPER(sve_ld##N##SUFF##_le_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \
+ sve_ld1##SUFF##_le_host, sve_ld1##SUFF##_le_tlb); \
+} \
+void HELPER(sve_ld##N##SUFF##_be_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldN_r_mte(env, vg, addr, desc, GETPC(), ESZ, ESZ, N, \
+ sve_ld1##SUFF##_be_host, sve_ld1##SUFF##_be_tlb); \
}
DO_LDN_1(2)
@@ -4654,7 +4809,7 @@ static void record_fault(CPUARMState *env, uintptr_t i, uintptr_t oprsz)
*/
static inline QEMU_ALWAYS_INLINE
void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
- uint32_t desc, const uintptr_t retaddr,
+ uint32_t desc, const uintptr_t retaddr, uint32_t mtedesc,
const int esz, const int msz, const SVEContFault fault,
sve_ldst1_host_fn *host_fn,
sve_ldst1_tlb_fn *tlb_fn)
@@ -4686,13 +4841,25 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
mem_off = info.mem_off_first[0];
flags = info.page[0].flags;
+ /*
+ * Disable MTE checking if the Tagged bit is not set. Since TBI must
+ * be set within MTEDESC for MTE, !mtedesc => !mte_active.
+ */
+ if (arm_tlb_mte_tagged(&info.page[0].attrs)) {
+ mtedesc = 0;
+ }
+
if (fault == FAULT_FIRST) {
+ /* Trapping mte check for the first-fault element. */
+ if (mtedesc) {
+ mte_check1(env, mtedesc, addr + mem_off, retaddr);
+ }
+
/*
* Special handling of the first active element,
* if it crosses a page boundary or is MMIO.
*/
bool is_split = mem_off == info.mem_off_split;
- /* TODO: MTE check. */
if (unlikely(flags != 0) || unlikely(is_split)) {
/*
* Use the slow path for cross-page handling.
@@ -4728,7 +4895,9 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
/* Watchpoint hit, see below. */
goto do_fault;
}
- /* TODO: MTE check. */
+ if (mtedesc && !mte_probe1(env, mtedesc, addr + mem_off)) {
+ goto do_fault;
+ }
/*
* Use the slow path for cross-page handling.
* This is RAM, without a watchpoint, and will not trap.
@@ -4776,7 +4945,9 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
& BP_MEM_READ)) {
goto do_fault;
}
- /* TODO: MTE check. */
+ if (mtedesc && !mte_probe1(env, mtedesc, addr + mem_off)) {
+ goto do_fault;
+ }
host_fn(vd, reg_off, host + mem_off);
}
reg_off += 1 << esz;
@@ -4814,44 +4985,103 @@ void sve_ldnfff1_r(CPUARMState *env, void *vg, const target_ulong addr,
record_fault(env, reg_off, reg_max);
}
-#define DO_LDFF1_LDNF1_1(PART, ESZ) \
+static inline QEMU_ALWAYS_INLINE
+void sve_ldnfff1_r_mte(CPUARMState *env, void *vg, target_ulong addr,
+ uint32_t desc, const uintptr_t retaddr,
+ const int esz, const int msz, const SVEContFault fault,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+ int bit55 = extract64(addr, 55, 1);
+
+ /* Remove mtedesc from the normal sve descriptor. */
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+
+ /* Perform gross MTE suppression early. */
+ if (!tbi_check(desc, bit55) ||
+ tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
+ mtedesc = 0;
+ }
+
+ sve_ldnfff1_r(env, vg, addr, desc, retaddr, mtedesc,
+ esz, msz, fault, host_fn, tlb_fn);
+}
+
+#define DO_LDFF1_LDNF1_1(PART, ESZ) \
void HELPER(sve_ldff1##PART##_r)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \
{ \
- sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_FIRST, \
+ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MO_8, FAULT_FIRST, \
sve_ld1##PART##_host, sve_ld1##PART##_tlb); \
} \
void HELPER(sve_ldnf1##PART##_r)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \
{ \
- sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_NO, \
+ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MO_8, FAULT_NO, \
+ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \
+} \
+void HELPER(sve_ldff1##PART##_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_FIRST, \
+ sve_ld1##PART##_host, sve_ld1##PART##_tlb); \
+} \
+void HELPER(sve_ldnf1##PART##_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, FAULT_NO, \
sve_ld1##PART##_host, sve_ld1##PART##_tlb); \
}
-#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \
+#define DO_LDFF1_LDNF1_2(PART, ESZ, MSZ) \
void HELPER(sve_ldff1##PART##_le_r)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \
{ \
- sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \
+ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_FIRST, \
sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \
} \
void HELPER(sve_ldnf1##PART##_le_r)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \
{ \
- sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \
+ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_NO, \
sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \
} \
void HELPER(sve_ldff1##PART##_be_r)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \
{ \
- sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \
+ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_FIRST, \
sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \
} \
void HELPER(sve_ldnf1##PART##_be_r)(CPUARMState *env, void *vg, \
target_ulong addr, uint32_t desc) \
{ \
- sve_ldnfff1_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \
+ sve_ldnfff1_r(env, vg, addr, desc, GETPC(), 0, ESZ, MSZ, FAULT_NO, \
sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \
+} \
+void HELPER(sve_ldff1##PART##_le_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \
+ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \
+} \
+void HELPER(sve_ldnf1##PART##_le_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \
+ sve_ld1##PART##_le_host, sve_ld1##PART##_le_tlb); \
+} \
+void HELPER(sve_ldff1##PART##_be_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_FIRST, \
+ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \
+} \
+void HELPER(sve_ldnf1##PART##_be_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_ldnfff1_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, FAULT_NO, \
+ sve_ld1##PART##_be_host, sve_ld1##PART##_be_tlb); \
}
DO_LDFF1_LDNF1_1(bb, MO_8)
@@ -4882,11 +5112,12 @@ DO_LDFF1_LDNF1_2(dd, MO_64, MO_64)
*/
static inline QEMU_ALWAYS_INLINE
-void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc,
- const uintptr_t retaddr, const int esz,
- const int msz, const int N,
+void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr,
+ uint32_t desc, const uintptr_t retaddr,
+ const int esz, const int msz, const int N, uint32_t mtedesc,
sve_ldst1_host_fn *host_fn,
- sve_ldst1_tlb_fn *tlb_fn)
+ sve_ldst1_tlb_fn *tlb_fn,
+ sve_cont_ldst_mte_check_fn *mte_check_fn)
{
const unsigned rd = simd_data(desc);
const intptr_t reg_max = simd_oprsz(desc);
@@ -4908,7 +5139,14 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc,
sve_cont_ldst_watchpoints(&info, env, vg, addr, 1 << esz, N << msz,
BP_MEM_WRITE, retaddr);
- /* TODO: MTE check. */
+ /*
+ * Handle mte checks for all active elements.
+ * Since TBI must be set for MTE, !mtedesc => !mte_active.
+ */
+ if (mte_check_fn && mtedesc) {
+ mte_check_fn(&info, env, vg, addr, 1 << esz, N << msz,
+ mtedesc, retaddr);
+ }
flags = info.page[0].flags | info.page[1].flags;
if (unlikely(flags != 0)) {
@@ -5002,26 +5240,67 @@ void sve_stN_r(CPUARMState *env, uint64_t *vg, target_ulong addr, uint32_t desc,
}
}
-#define DO_STN_1(N, NAME, ESZ) \
-void HELPER(sve_st##N##NAME##_r)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, \
- sve_st1##NAME##_host, sve_st1##NAME##_tlb); \
+static inline QEMU_ALWAYS_INLINE
+void sve_stN_r_mte(CPUARMState *env, uint64_t *vg, target_ulong addr,
+ uint32_t desc, const uintptr_t ra,
+ const int esz, const int msz, const int N,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+ int bit55 = extract64(addr, 55, 1);
+
+ /* Remove mtedesc from the normal sve descriptor. */
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+
+ /* Perform gross MTE suppression early. */
+ if (!tbi_check(desc, bit55) ||
+ tcma_check(desc, bit55, allocation_tag_from_addr(addr))) {
+ mtedesc = 0;
+ }
+
+ sve_stN_r(env, vg, addr, desc, ra, esz, msz, N, mtedesc, host_fn, tlb_fn,
+ N == 1 ? sve_cont_ldst_mte_check1 : sve_cont_ldst_mte_checkN);
}
-#define DO_STN_2(N, NAME, ESZ, MSZ) \
-void HELPER(sve_st##N##NAME##_le_r)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \
- sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb); \
-} \
-void HELPER(sve_st##N##NAME##_be_r)(CPUARMState *env, void *vg, \
- target_ulong addr, uint32_t desc) \
-{ \
- sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \
- sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb); \
+#define DO_STN_1(N, NAME, ESZ) \
+void HELPER(sve_st##N##NAME##_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, 0, \
+ sve_st1##NAME##_host, sve_st1##NAME##_tlb, NULL); \
+} \
+void HELPER(sve_st##N##NAME##_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MO_8, N, \
+ sve_st1##NAME##_host, sve_st1##NAME##_tlb); \
+}
+
+#define DO_STN_2(N, NAME, ESZ, MSZ) \
+void HELPER(sve_st##N##NAME##_le_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \
+ sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb, NULL); \
+} \
+void HELPER(sve_st##N##NAME##_be_r)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_stN_r(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, 0, \
+ sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb, NULL); \
+} \
+void HELPER(sve_st##N##NAME##_le_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \
+ sve_st1##NAME##_le_host, sve_st1##NAME##_le_tlb); \
+} \
+void HELPER(sve_st##N##NAME##_be_r_mte)(CPUARMState *env, void *vg, \
+ target_ulong addr, uint32_t desc) \
+{ \
+ sve_stN_r_mte(env, vg, addr, desc, GETPC(), ESZ, MSZ, N, \
+ sve_st1##NAME##_be_host, sve_st1##NAME##_be_tlb); \
}
DO_STN_1(1, bb, MO_8)
@@ -5090,7 +5369,8 @@ static target_ulong off_zd_d(void *reg, intptr_t reg_ofs)
static inline QEMU_ALWAYS_INLINE
void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
target_ulong base, uint32_t desc, uintptr_t retaddr,
- int esize, int msize, zreg_off_fn *off_fn,
+ uint32_t mtedesc, int esize, int msize,
+ zreg_off_fn *off_fn,
sve_ldst1_host_fn *host_fn,
sve_ldst1_tlb_fn *tlb_fn)
{
@@ -5118,7 +5398,9 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
cpu_check_watchpoint(env_cpu(env), addr, msize,
info.attrs, BP_MEM_READ, retaddr);
}
- /* TODO: MTE check */
+ if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) {
+ mte_check1(env, mtedesc, addr, retaddr);
+ }
host_fn(&scratch, reg_off, info.host);
} else {
/* Element crosses the page boundary. */
@@ -5129,7 +5411,9 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
msize, info.attrs,
BP_MEM_READ, retaddr);
}
- /* TODO: MTE check */
+ if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) {
+ mte_check1(env, mtedesc, addr, retaddr);
+ }
tlb_fn(env, &scratch, reg_off, addr, retaddr);
}
}
@@ -5142,20 +5426,53 @@ void sve_ld1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
memcpy(vd, &scratch, reg_max);
}
+static inline QEMU_ALWAYS_INLINE
+void sve_ld1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
+ target_ulong base, uint32_t desc, uintptr_t retaddr,
+ int esize, int msize, zreg_off_fn *off_fn,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+ /* Remove mtedesc from the normal sve descriptor. */
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+
+ /*
+ * ??? TODO: For the 32-bit offset extractions, base + ofs cannot
+ * offset base entirely over the address space hole to change the
+ * pointer tag, or change the bit55 selector. So we could here
+ * examine TBI + TCMA like we do for sve_ldN_r_mte().
+ */
+ sve_ld1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc,
+ esize, msize, off_fn, host_fn, tlb_fn);
+}
+
#define DO_LD1_ZPZ_S(MEM, OFS, MSZ) \
void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
void *vm, target_ulong base, uint32_t desc) \
{ \
- sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \
+ sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 4, 1 << MSZ, \
off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
+} \
+void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \
+ off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
}
#define DO_LD1_ZPZ_D(MEM, OFS, MSZ) \
void HELPER(sve_ld##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
void *vm, target_ulong base, uint32_t desc) \
{ \
- sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \
+ sve_ld1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 8, 1 << MSZ, \
off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
+} \
+void HELPER(sve_ld##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_ld1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \
+ off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
}
DO_LD1_ZPZ_S(bsu, zsu, MO_8)
@@ -5234,7 +5551,8 @@ DO_LD1_ZPZ_D(dd_be, zd, MO_64)
static inline QEMU_ALWAYS_INLINE
void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
target_ulong base, uint32_t desc, uintptr_t retaddr,
- const int esz, const int msz, zreg_off_fn *off_fn,
+ uint32_t mtedesc, const int esz, const int msz,
+ zreg_off_fn *off_fn,
sve_ldst1_host_fn *host_fn,
sve_ldst1_tlb_fn *tlb_fn)
{
@@ -5259,6 +5577,9 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
* Probe the first element, allowing faults.
*/
addr = base + (off_fn(vm, reg_off) << scale);
+ if (mtedesc) {
+ mte_check1(env, mtedesc, addr, retaddr);
+ }
tlb_fn(env, vd, reg_off, addr, retaddr);
/* After any fault, zero the other elements. */
@@ -5291,7 +5612,11 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
(env_cpu(env), addr, msize) & BP_MEM_READ)) {
goto fault;
}
- /* TODO: MTE check. */
+ if (mtedesc &&
+ arm_tlb_mte_tagged(&info.attrs) &&
+ !mte_probe1(env, mtedesc, addr)) {
+ goto fault;
+ }
host_fn(vd, reg_off, info.host);
}
@@ -5304,20 +5629,58 @@ void sve_ldff1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
record_fault(env, reg_off, reg_max);
}
-#define DO_LDFF1_ZPZ_S(MEM, OFS, MSZ) \
-void HELPER(sve_ldff##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
- void *vm, target_ulong base, uint32_t desc) \
-{ \
- sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), MO_32, MSZ, \
- off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
+static inline QEMU_ALWAYS_INLINE
+void sve_ldff1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
+ target_ulong base, uint32_t desc, uintptr_t retaddr,
+ const int esz, const int msz,
+ zreg_off_fn *off_fn,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+ /* Remove mtedesc from the normal sve descriptor. */
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+
+ /*
+ * ??? TODO: For the 32-bit offset extractions, base + ofs cannot
+ * offset base entirely over the address space hole to change the
+ * pointer tag, or change the bit55 selector. So we could here
+ * examine TBI + TCMA like we do for sve_ldN_r_mte().
+ */
+ sve_ldff1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc,
+ esz, msz, off_fn, host_fn, tlb_fn);
}
-#define DO_LDFF1_ZPZ_D(MEM, OFS, MSZ) \
-void HELPER(sve_ldff##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
- void *vm, target_ulong base, uint32_t desc) \
-{ \
- sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), MO_64, MSZ, \
- off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
+#define DO_LDFF1_ZPZ_S(MEM, OFS, MSZ) \
+void HELPER(sve_ldff##MEM##_##OFS) \
+ (CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), 0, MO_32, MSZ, \
+ off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
+} \
+void HELPER(sve_ldff##MEM##_##OFS##_mte) \
+ (CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_ldff1_z_mte(env, vd, vg, vm, base, desc, GETPC(), MO_32, MSZ, \
+ off_##OFS##_s, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
+}
+
+#define DO_LDFF1_ZPZ_D(MEM, OFS, MSZ) \
+void HELPER(sve_ldff##MEM##_##OFS) \
+ (CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_ldff1_z(env, vd, vg, vm, base, desc, GETPC(), 0, MO_64, MSZ, \
+ off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
+} \
+void HELPER(sve_ldff##MEM##_##OFS##_mte) \
+ (CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_ldff1_z_mte(env, vd, vg, vm, base, desc, GETPC(), MO_64, MSZ, \
+ off_##OFS##_d, sve_ld1##MEM##_host, sve_ld1##MEM##_tlb); \
}
DO_LDFF1_ZPZ_S(bsu, zsu, MO_8)
@@ -5389,7 +5752,8 @@ DO_LDFF1_ZPZ_D(dd_be, zd, MO_64)
static inline QEMU_ALWAYS_INLINE
void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
target_ulong base, uint32_t desc, uintptr_t retaddr,
- int esize, int msize, zreg_off_fn *off_fn,
+ uint32_t mtedesc, int esize, int msize,
+ zreg_off_fn *off_fn,
sve_ldst1_host_fn *host_fn,
sve_ldst1_tlb_fn *tlb_fn)
{
@@ -5433,7 +5797,10 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
cpu_check_watchpoint(env_cpu(env), addr, msize,
info.attrs, BP_MEM_WRITE, retaddr);
}
- /* TODO: MTE check. */
+
+ if (mtedesc && arm_tlb_mte_tagged(&info.attrs)) {
+ mte_check1(env, mtedesc, addr, retaddr);
+ }
}
i += 1;
reg_off += esize;
@@ -5463,20 +5830,53 @@ void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
} while (reg_off < reg_max);
}
-#define DO_ST1_ZPZ_S(MEM, OFS, MSZ) \
-void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
+static inline QEMU_ALWAYS_INLINE
+void sve_st1_z_mte(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
+ target_ulong base, uint32_t desc, uintptr_t retaddr,
+ int esize, int msize, zreg_off_fn *off_fn,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
+{
+ uint32_t mtedesc = desc >> (SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+ /* Remove mtedesc from the normal sve descriptor. */
+ desc = extract32(desc, 0, SIMD_DATA_SHIFT + SVE_MTEDESC_SHIFT);
+
+ /*
+ * ??? TODO: For the 32-bit offset extractions, base + ofs cannot
+ * offset base entirely over the address space hole to change the
+ * pointer tag, or change the bit55 selector. So we could here
+ * examine TBI + TCMA like we do for sve_ldN_r_mte().
+ */
+ sve_st1_z(env, vd, vg, vm, base, desc, retaddr, mtedesc,
+ esize, msize, off_fn, host_fn, tlb_fn);
+}
+
+#define DO_ST1_ZPZ_S(MEM, OFS, MSZ) \
+void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
void *vm, target_ulong base, uint32_t desc) \
-{ \
- sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \
- off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \
+{ \
+ sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 4, 1 << MSZ, \
+ off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \
+} \
+void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \
+ off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \
}
-#define DO_ST1_ZPZ_D(MEM, OFS, MSZ) \
-void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
+#define DO_ST1_ZPZ_D(MEM, OFS, MSZ) \
+void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
void *vm, target_ulong base, uint32_t desc) \
-{ \
- sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \
- off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \
+{ \
+ sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 0, 8, 1 << MSZ, \
+ off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \
+} \
+void HELPER(sve_st##MEM##_##OFS##_mte)(CPUARMState *env, void *vd, void *vg, \
+ void *vm, target_ulong base, uint32_t desc) \
+{ \
+ sve_st1_z_mte(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \
+ off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \
}
DO_ST1_ZPZ_S(bs, zsu, MO_8)
diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c
index 7388494..b35dc8a 100644
--- a/target/arm/tlb_helper.c
+++ b/target/arm/tlb_helper.c
@@ -10,8 +10,6 @@
#include "internals.h"
#include "exec/exec-all.h"
-#if !defined(CONFIG_USER_ONLY)
-
static inline uint32_t merge_syn_data_abort(uint32_t template_syn,
unsigned int target_el,
bool same_el, bool ea,
@@ -122,6 +120,8 @@ void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
arm_deliver_fault(cpu, vaddr, access_type, mmu_idx, &fi);
}
+#if !defined(CONFIG_USER_ONLY)
+
/*
* arm_cpu_do_transaction_failed: handle a memory system error response
* (eg "no device/memory present at address") by raising an external abort
@@ -166,6 +166,7 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
int prot, ret;
MemTxAttrs attrs = {};
ARMMMUFaultInfo fi = {};
+ ARMCacheAttrs cacheattrs = {};
/*
* Walk the page table and (if the mapping exists) add the page
@@ -175,7 +176,8 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
*/
ret = get_phys_addr(&cpu->env, address, access_type,
core_to_arm_mmu_idx(&cpu->env, mmu_idx),
- &phys_addr, &attrs, &prot, &page_size, &fi, NULL);
+ &phys_addr, &attrs, &prot, &page_size,
+ &fi, &cacheattrs);
if (likely(!ret)) {
/*
* Map a single [sub]page. Regions smaller than our declared
@@ -186,6 +188,11 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
phys_addr &= TARGET_PAGE_MASK;
address &= TARGET_PAGE_MASK;
}
+ /* Notice and record tagged memory. */
+ if (cpu_isar_feature(aa64_mte, cpu) && cacheattrs.attrs == 0xf0) {
+ arm_tlb_mte_tagged(&attrs) = true;
+ }
+
tlb_set_page_with_attrs(cs, address, phys_addr, attrs,
prot, mmu_idx, page_size);
return true;
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 4cef862..73d753f 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -204,20 +204,20 @@ static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
}
/*
- * Return a "clean" address for ADDR according to TBID.
- * This is always a fresh temporary, as we need to be able to
- * increment this independently of a dirty write-back address.
+ * Handle MTE and/or TBI.
+ *
+ * For TBI, ideally, we would do nothing. Proper behaviour on fault is
+ * for the tag to be present in the FAR_ELx register. But for user-only
+ * mode we do not have a TLB with which to implement this, so we must
+ * remove the top byte now.
+ *
+ * Always return a fresh temporary that we can increment independently
+ * of the write-back address.
*/
-static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
+
+TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
{
TCGv_i64 clean = new_tmp_a64(s);
- /*
- * In order to get the correct value in the FAR_ELx register,
- * we must present the memory subsystem with the "dirty" address
- * including the TBI. In system mode we can make this work via
- * the TLB, dropping the TBI during translation. But for user-only
- * mode we don't have that option, and must remove the top byte now.
- */
#ifdef CONFIG_USER_ONLY
gen_top_byte_ignore(s, clean, addr, s->tbid);
#else
@@ -226,6 +226,92 @@ static TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr)
return clean;
}
+/* Insert a zero tag into src, with the result at dst. */
+static void gen_address_with_allocation_tag0(TCGv_i64 dst, TCGv_i64 src)
+{
+ tcg_gen_andi_i64(dst, src, ~MAKE_64BIT_MASK(56, 4));
+}
+
+static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
+ MMUAccessType acc, int log2_size)
+{
+ TCGv_i32 t_acc = tcg_const_i32(acc);
+ TCGv_i32 t_idx = tcg_const_i32(get_mem_index(s));
+ TCGv_i32 t_size = tcg_const_i32(1 << log2_size);
+
+ gen_helper_probe_access(cpu_env, ptr, t_acc, t_idx, t_size);
+ tcg_temp_free_i32(t_acc);
+ tcg_temp_free_i32(t_idx);
+ tcg_temp_free_i32(t_size);
+}
+
+/*
+ * For MTE, check a single logical or atomic access. This probes a single
+ * address, the exact one specified. The size and alignment of the access
+ * is not relevant to MTE, per se, but watchpoints do require the size,
+ * and we want to recognize those before making any other changes to state.
+ */
+static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
+ bool is_write, bool tag_checked,
+ int log2_size, bool is_unpriv,
+ int core_idx)
+{
+ if (tag_checked && s->mte_active[is_unpriv]) {
+ TCGv_i32 tcg_desc;
+ TCGv_i64 ret;
+ int desc = 0;
+
+ desc = FIELD_DP32(desc, MTEDESC, MIDX, core_idx);
+ desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
+ desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
+ desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
+ desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_size);
+ tcg_desc = tcg_const_i32(desc);
+
+ ret = new_tmp_a64(s);
+ gen_helper_mte_check1(ret, cpu_env, tcg_desc, addr);
+ tcg_temp_free_i32(tcg_desc);
+
+ return ret;
+ }
+ return clean_data_tbi(s, addr);
+}
+
+TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
+ bool tag_checked, int log2_size)
+{
+ return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size,
+ false, get_mem_index(s));
+}
+
+/*
+ * For MTE, check multiple logical sequential accesses.
+ */
+TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
+ bool tag_checked, int log2_esize, int total_size)
+{
+ if (tag_checked && s->mte_active[0] && total_size != (1 << log2_esize)) {
+ TCGv_i32 tcg_desc;
+ TCGv_i64 ret;
+ int desc = 0;
+
+ desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
+ desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
+ desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
+ desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
+ desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << log2_esize);
+ desc = FIELD_DP32(desc, MTEDESC, TSIZE, total_size);
+ tcg_desc = tcg_const_i32(desc);
+
+ ret = new_tmp_a64(s);
+ gen_helper_mte_checkN(ret, cpu_env, tcg_desc, addr);
+ tcg_temp_free_i32(tcg_desc);
+
+ return ret;
+ }
+ return gen_mte_check1(s, addr, is_write, tag_checked, log2_esize);
+}
+
typedef struct DisasCompare64 {
TCGCond cond;
TCGv_i64 value;
@@ -1616,7 +1702,28 @@ static void handle_msr_i(DisasContext *s, uint32_t insn,
gen_helper_msr_i_daifclear(cpu_env, t1);
tcg_temp_free_i32(t1);
/* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs. */
- s->base.is_jmp = DISAS_UPDATE;
+ s->base.is_jmp = DISAS_UPDATE_EXIT;
+ break;
+
+ case 0x1c: /* TCO */
+ if (dc_isar_feature(aa64_mte, s)) {
+ /* Full MTE is enabled -- set the TCO bit as directed. */
+ if (crm & 1) {
+ set_pstate_bits(PSTATE_TCO);
+ } else {
+ clear_pstate_bits(PSTATE_TCO);
+ }
+ t1 = tcg_const_i32(s->current_el);
+ gen_helper_rebuild_hflags_a64(cpu_env, t1);
+ tcg_temp_free_i32(t1);
+ /* Many factors, including TCO, go into MTE_ACTIVE. */
+ s->base.is_jmp = DISAS_UPDATE_NOCHAIN;
+ } else if (dc_isar_feature(aa64_mte_insn_reg, s)) {
+ /* Only "instructions accessible at EL0" -- PSTATE.TCO is WI. */
+ s->base.is_jmp = DISAS_NEXT;
+ } else {
+ goto do_unallocated;
+ }
break;
default:
@@ -1750,9 +1857,62 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
return;
case ARM_CP_DC_ZVA:
/* Writes clear the aligned block of memory which rt points into. */
- tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
+ if (s->mte_active[0]) {
+ TCGv_i32 t_desc;
+ int desc = 0;
+
+ desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
+ desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
+ desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
+ t_desc = tcg_const_i32(desc);
+
+ tcg_rt = new_tmp_a64(s);
+ gen_helper_mte_check_zva(tcg_rt, cpu_env, t_desc, cpu_reg(s, rt));
+ tcg_temp_free_i32(t_desc);
+ } else {
+ tcg_rt = clean_data_tbi(s, cpu_reg(s, rt));
+ }
gen_helper_dc_zva(cpu_env, tcg_rt);
return;
+ case ARM_CP_DC_GVA:
+ {
+ TCGv_i64 clean_addr, tag;
+
+ /*
+ * DC_GVA, like DC_ZVA, requires that we supply the original
+ * pointer for an invalid page. Probe that address first.
+ */
+ tcg_rt = cpu_reg(s, rt);
+ clean_addr = clean_data_tbi(s, tcg_rt);
+ gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8);
+
+ if (s->ata) {
+ /* Extract the tag from the register to match STZGM. */
+ tag = tcg_temp_new_i64();
+ tcg_gen_shri_i64(tag, tcg_rt, 56);
+ gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
+ tcg_temp_free_i64(tag);
+ }
+ }
+ return;
+ case ARM_CP_DC_GZVA:
+ {
+ TCGv_i64 clean_addr, tag;
+
+ /* For DC_GZVA, we can rely on DC_ZVA for the proper fault. */
+ tcg_rt = cpu_reg(s, rt);
+ clean_addr = clean_data_tbi(s, tcg_rt);
+ gen_helper_dc_zva(cpu_env, clean_addr);
+
+ if (s->ata) {
+ /* Extract the tag from the register to match STZGM. */
+ tag = tcg_temp_new_i64();
+ tcg_gen_shri_i64(tag, tcg_rt, 56);
+ gen_helper_stzgm_tags(cpu_env, clean_addr, tag);
+ tcg_temp_free_i64(tag);
+ }
+ }
+ return;
default:
break;
}
@@ -1795,7 +1955,7 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
/* I/O operations must end the TB here (whether read or write) */
- s->base.is_jmp = DISAS_UPDATE;
+ s->base.is_jmp = DISAS_UPDATE_EXIT;
}
if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
/*
@@ -1810,7 +1970,7 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
* but allow this to be suppressed by the register definition
* (usually only necessary to work around guest bugs).
*/
- s->base.is_jmp = DISAS_UPDATE;
+ s->base.is_jmp = DISAS_UPDATE_EXIT;
}
}
@@ -2327,7 +2487,7 @@ static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
if (rn == 31) {
gen_check_sp_alignment(s);
}
- clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+ clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size);
tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
size | MO_ALIGN | s->be_data);
}
@@ -2345,7 +2505,9 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
if (rn == 31) {
gen_check_sp_alignment(s);
}
- clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+
+ /* This is a single atomic access, despite the "pair". */
+ clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1);
if (size == 2) {
TCGv_i64 cmp = tcg_temp_new_i64();
@@ -2470,7 +2632,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
if (is_lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
}
- clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+ clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
+ true, rn != 31, size);
gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
return;
@@ -2479,7 +2642,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
if (rn == 31) {
gen_check_sp_alignment(s);
}
- clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+ clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
+ false, rn != 31, size);
s->is_ldex = true;
gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
if (is_lasr) {
@@ -2499,7 +2663,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
gen_check_sp_alignment(s);
}
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
- clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+ clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
+ true, rn != 31, size);
do_gpr_st(s, cpu_reg(s, rt), clean_addr, size, true, rt,
disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
return;
@@ -2515,7 +2680,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
if (rn == 31) {
gen_check_sp_alignment(s);
}
- clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+ clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
+ false, rn != 31, size);
do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, true, rt,
disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
@@ -2529,7 +2695,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
if (is_lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
}
- clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+ clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
+ true, rn != 31, size);
gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
return;
}
@@ -2547,7 +2714,8 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
if (rn == 31) {
gen_check_sp_alignment(s);
}
- clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+ clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
+ false, rn != 31, size);
s->is_ldex = true;
gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
if (is_lasr) {
@@ -2650,7 +2818,7 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn)
* +-----+-------+---+---+-------+---+-------+-------+------+------+
*
* opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit
- * LDPSW 01
+ * LDPSW/STGP 01
* LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
* V: 0 -> GPR, 1 -> Vector
* idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
@@ -2675,6 +2843,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
bool is_signed = false;
bool postindex = false;
bool wback = false;
+ bool set_tag = false;
TCGv_i64 clean_addr, dirty_addr;
@@ -2687,6 +2856,14 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
if (is_vector) {
size = 2 + opc;
+ } else if (opc == 1 && !is_load) {
+ /* STGP */
+ if (!dc_isar_feature(aa64_mte_insn_reg, s) || index == 0) {
+ unallocated_encoding(s);
+ return;
+ }
+ size = 3;
+ set_tag = true;
} else {
size = 2 + extract32(opc, 1, 1);
is_signed = extract32(opc, 0, 1);
@@ -2727,7 +2904,7 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
return;
}
- offset <<= size;
+ offset <<= (set_tag ? LOG2_TAG_GRANULE : size);
if (rn == 31) {
gen_check_sp_alignment(s);
@@ -2737,7 +2914,24 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
if (!postindex) {
tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
}
- clean_addr = clean_data_tbi(s, dirty_addr);
+
+ if (set_tag) {
+ if (!s->ata) {
+ /*
+ * TODO: We could rely on the stores below, at least for
+ * system mode, if we arrange to add MO_ALIGN_16.
+ */
+ gen_helper_stg_stub(cpu_env, dirty_addr);
+ } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+ gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr);
+ } else {
+ gen_helper_stg(cpu_env, dirty_addr, dirty_addr);
+ }
+ }
+
+ clean_addr = gen_mte_checkN(s, dirty_addr, !is_load,
+ (wback || rn != 31) && !set_tag,
+ size, 2 << size);
if (is_vector) {
if (is_load) {
@@ -2818,6 +3012,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
bool iss_valid = !is_vector;
bool post_index;
bool writeback;
+ int memidx;
TCGv_i64 clean_addr, dirty_addr;
@@ -2875,7 +3070,11 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
if (!post_index) {
tcg_gen_addi_i64(dirty_addr, dirty_addr, imm9);
}
- clean_addr = clean_data_tbi(s, dirty_addr);
+
+ memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
+ clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store,
+ writeback || rn != 31,
+ size, is_unpriv, memidx);
if (is_vector) {
if (is_store) {
@@ -2885,7 +3084,6 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
}
} else {
TCGv_i64 tcg_rt = cpu_reg(s, rt);
- int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
if (is_store) {
@@ -2982,7 +3180,7 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
- clean_addr = clean_data_tbi(s, dirty_addr);
+ clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size);
if (is_vector) {
if (is_store) {
@@ -3067,7 +3265,7 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
dirty_addr = read_cpu_reg_sp(s, rn, 1);
offset = imm12 << size;
tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
- clean_addr = clean_data_tbi(s, dirty_addr);
+ clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size);
if (is_vector) {
if (is_store) {
@@ -3160,7 +3358,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
if (rn == 31) {
gen_check_sp_alignment(s);
}
- clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+ clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size);
if (o3_opc == 014) {
/*
@@ -3237,7 +3435,8 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn,
tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
/* Note that "clean" and "dirty" here refer to TBI not PAC. */
- clean_addr = clean_data_tbi(s, dirty_addr);
+ clean_addr = gen_mte_check1(s, dirty_addr, false,
+ is_wback || rn != 31, size);
tcg_rt = cpu_reg(s, rt);
do_gpr_ld(s, tcg_rt, clean_addr, size, /* is_signed */ false,
@@ -3399,7 +3598,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
MemOp endian = s->be_data;
- int ebytes; /* bytes per element */
+ int total; /* total bytes */
int elements; /* elements per vector */
int rpt; /* num iterations */
int selem; /* structure elements */
@@ -3469,19 +3668,26 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
endian = MO_LE;
}
- /* Consecutive little-endian elements from a single register
+ total = rpt * selem * (is_q ? 16 : 8);
+ tcg_rn = cpu_reg_sp(s, rn);
+
+ /*
+ * Issue the MTE check vs the logical repeat count, before we
+ * promote consecutive little-endian elements below.
+ */
+ clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31,
+ size, total);
+
+ /*
+ * Consecutive little-endian elements from a single register
* can be promoted to a larger little-endian operation.
*/
if (selem == 1 && endian == MO_LE) {
size = 3;
}
- ebytes = 1 << size;
- elements = (is_q ? 16 : 8) / ebytes;
-
- tcg_rn = cpu_reg_sp(s, rn);
- clean_addr = clean_data_tbi(s, tcg_rn);
- tcg_ebytes = tcg_const_i64(ebytes);
+ elements = (is_q ? 16 : 8) >> size;
+ tcg_ebytes = tcg_const_i64(1 << size);
for (r = 0; r < rpt; r++) {
int e;
for (e = 0; e < elements; e++) {
@@ -3515,7 +3721,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
if (is_postidx) {
if (rm == 31) {
- tcg_gen_addi_i64(tcg_rn, tcg_rn, rpt * elements * selem * ebytes);
+ tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
} else {
tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
}
@@ -3561,7 +3767,7 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
bool replicate = false;
int index = is_q << 3 | S << 2 | size;
- int ebytes, xs;
+ int xs, total;
TCGv_i64 clean_addr, tcg_rn, tcg_ebytes;
if (extract32(insn, 31, 1)) {
@@ -3615,16 +3821,17 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
return;
}
- ebytes = 1 << scale;
-
if (rn == 31) {
gen_check_sp_alignment(s);
}
+ total = selem << scale;
tcg_rn = cpu_reg_sp(s, rn);
- clean_addr = clean_data_tbi(s, tcg_rn);
- tcg_ebytes = tcg_const_i64(ebytes);
+ clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31,
+ scale, total);
+
+ tcg_ebytes = tcg_const_i64(1 << scale);
for (xs = 0; xs < selem; xs++) {
if (replicate) {
/* Load and replicate to all elements */
@@ -3651,13 +3858,216 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
if (is_postidx) {
if (rm == 31) {
- tcg_gen_addi_i64(tcg_rn, tcg_rn, selem * ebytes);
+ tcg_gen_addi_i64(tcg_rn, tcg_rn, total);
} else {
tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
}
}
}
+/*
+ * Load/Store memory tags
+ *
+ * 31 30 29 24 22 21 12 10 5 0
+ * +-----+-------------+-----+---+------+-----+------+------+
+ * | 1 1 | 0 1 1 0 0 1 | op1 | 1 | imm9 | op2 | Rn | Rt |
+ * +-----+-------------+-----+---+------+-----+------+------+
+ */
+static void disas_ldst_tag(DisasContext *s, uint32_t insn)
+{
+ int rt = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE;
+ int op2 = extract32(insn, 10, 2);
+ int op1 = extract32(insn, 22, 2);
+ bool is_load = false, is_pair = false, is_zero = false, is_mult = false;
+ int index = 0;
+ TCGv_i64 addr, clean_addr, tcg_rt;
+
+ /* We checked insn bits [29:24,21] in the caller. */
+ if (extract32(insn, 30, 2) != 3) {
+ goto do_unallocated;
+ }
+
+ /*
+ * @index is a tri-state variable which has 3 states:
+ * < 0 : post-index, writeback
+ * = 0 : signed offset
+ * > 0 : pre-index, writeback
+ */
+ switch (op1) {
+ case 0:
+ if (op2 != 0) {
+ /* STG */
+ index = op2 - 2;
+ } else {
+ /* STZGM */
+ if (s->current_el == 0 || offset != 0) {
+ goto do_unallocated;
+ }
+ is_mult = is_zero = true;
+ }
+ break;
+ case 1:
+ if (op2 != 0) {
+ /* STZG */
+ is_zero = true;
+ index = op2 - 2;
+ } else {
+ /* LDG */
+ is_load = true;
+ }
+ break;
+ case 2:
+ if (op2 != 0) {
+ /* ST2G */
+ is_pair = true;
+ index = op2 - 2;
+ } else {
+ /* STGM */
+ if (s->current_el == 0 || offset != 0) {
+ goto do_unallocated;
+ }
+ is_mult = true;
+ }
+ break;
+ case 3:
+ if (op2 != 0) {
+ /* STZ2G */
+ is_pair = is_zero = true;
+ index = op2 - 2;
+ } else {
+ /* LDGM */
+ if (s->current_el == 0 || offset != 0) {
+ goto do_unallocated;
+ }
+ is_mult = is_load = true;
+ }
+ break;
+
+ default:
+ do_unallocated:
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (is_mult
+ ? !dc_isar_feature(aa64_mte, s)
+ : !dc_isar_feature(aa64_mte_insn_reg, s)) {
+ goto do_unallocated;
+ }
+
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+
+ addr = read_cpu_reg_sp(s, rn, true);
+ if (index >= 0) {
+ /* pre-index or signed offset */
+ tcg_gen_addi_i64(addr, addr, offset);
+ }
+
+ if (is_mult) {
+ tcg_rt = cpu_reg(s, rt);
+
+ if (is_zero) {
+ int size = 4 << s->dcz_blocksize;
+
+ if (s->ata) {
+ gen_helper_stzgm_tags(cpu_env, addr, tcg_rt);
+ }
+ /*
+ * The non-tags portion of STZGM is mostly like DC_ZVA,
+ * except the alignment happens before the access.
+ */
+ clean_addr = clean_data_tbi(s, addr);
+ tcg_gen_andi_i64(clean_addr, clean_addr, -size);
+ gen_helper_dc_zva(cpu_env, clean_addr);
+ } else if (s->ata) {
+ if (is_load) {
+ gen_helper_ldgm(tcg_rt, cpu_env, addr);
+ } else {
+ gen_helper_stgm(cpu_env, addr, tcg_rt);
+ }
+ } else {
+ MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE;
+ int size = 4 << GMID_EL1_BS;
+
+ clean_addr = clean_data_tbi(s, addr);
+ tcg_gen_andi_i64(clean_addr, clean_addr, -size);
+ gen_probe_access(s, clean_addr, acc, size);
+
+ if (is_load) {
+ /* The result tags are zeros. */
+ tcg_gen_movi_i64(tcg_rt, 0);
+ }
+ }
+ return;
+ }
+
+ if (is_load) {
+ tcg_gen_andi_i64(addr, addr, -TAG_GRANULE);
+ tcg_rt = cpu_reg(s, rt);
+ if (s->ata) {
+ gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt);
+ } else {
+ clean_addr = clean_data_tbi(s, addr);
+ gen_probe_access(s, clean_addr, MMU_DATA_LOAD, MO_8);
+ gen_address_with_allocation_tag0(tcg_rt, addr);
+ }
+ } else {
+ tcg_rt = cpu_reg_sp(s, rt);
+ if (!s->ata) {
+ /*
+ * For STG and ST2G, we need to check alignment and probe memory.
+ * TODO: For STZG and STZ2G, we could rely on the stores below,
+ * at least for system mode; user-only won't enforce alignment.
+ */
+ if (is_pair) {
+ gen_helper_st2g_stub(cpu_env, addr);
+ } else {
+ gen_helper_stg_stub(cpu_env, addr);
+ }
+ } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+ if (is_pair) {
+ gen_helper_st2g_parallel(cpu_env, addr, tcg_rt);
+ } else {
+ gen_helper_stg_parallel(cpu_env, addr, tcg_rt);
+ }
+ } else {
+ if (is_pair) {
+ gen_helper_st2g(cpu_env, addr, tcg_rt);
+ } else {
+ gen_helper_stg(cpu_env, addr, tcg_rt);
+ }
+ }
+ }
+
+ if (is_zero) {
+ TCGv_i64 clean_addr = clean_data_tbi(s, addr);
+ TCGv_i64 tcg_zero = tcg_const_i64(0);
+ int mem_index = get_mem_index(s);
+ int i, n = (1 + is_pair) << LOG2_TAG_GRANULE;
+
+ tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index,
+ MO_Q | MO_ALIGN_16);
+ for (i = 8; i < n; i += 8) {
+ tcg_gen_addi_i64(clean_addr, clean_addr, 8);
+ tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_Q);
+ }
+ tcg_temp_free_i64(tcg_zero);
+ }
+
+ if (index != 0) {
+ /* pre-index or post-index */
+ if (index < 0) {
+ /* post-index */
+ tcg_gen_addi_i64(addr, addr, offset);
+ }
+ tcg_gen_mov_i64(cpu_reg_sp(s, rn), addr);
+ }
+}
+
/* Loads and stores */
static void disas_ldst(DisasContext *s, uint32_t insn)
{
@@ -3682,13 +4092,14 @@ static void disas_ldst(DisasContext *s, uint32_t insn)
case 0x0d: /* AdvSIMD load/store single structure */
disas_ldst_single_struct(s, insn);
break;
- case 0x19: /* LDAPR/STLR (unscaled immediate) */
- if (extract32(insn, 10, 2) != 0 ||
- extract32(insn, 21, 1) != 0) {
+ case 0x19:
+ if (extract32(insn, 21, 1) != 0) {
+ disas_ldst_tag(s, insn);
+ } else if (extract32(insn, 10, 2) == 0) {
+ disas_ldst_ldapr_stlr(s, insn);
+ } else {
unallocated_encoding(s);
- break;
}
- disas_ldst_ldapr_stlr(s, insn);
break;
default:
unallocated_encoding(s);
@@ -3727,22 +4138,22 @@ static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
/*
* Add/subtract (immediate)
*
- * 31 30 29 28 24 23 22 21 10 9 5 4 0
- * +--+--+--+-----------+-----+-------------+-----+-----+
- * |sf|op| S| 1 0 0 0 1 |shift| imm12 | Rn | Rd |
- * +--+--+--+-----------+-----+-------------+-----+-----+
+ * 31 30 29 28 23 22 21 10 9 5 4 0
+ * +--+--+--+-------------+--+-------------+-----+-----+
+ * |sf|op| S| 1 0 0 0 1 0 |sh| imm12 | Rn | Rd |
+ * +--+--+--+-------------+--+-------------+-----+-----+
*
* sf: 0 -> 32bit, 1 -> 64bit
* op: 0 -> add , 1 -> sub
* S: 1 -> set flags
- * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
+ * sh: 1 -> LSL imm by 12
*/
static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
{
int rd = extract32(insn, 0, 5);
int rn = extract32(insn, 5, 5);
uint64_t imm = extract32(insn, 10, 12);
- int shift = extract32(insn, 22, 2);
+ bool shift = extract32(insn, 22, 1);
bool setflags = extract32(insn, 29, 1);
bool sub_op = extract32(insn, 30, 1);
bool is_64bit = extract32(insn, 31, 1);
@@ -3751,15 +4162,8 @@ static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
TCGv_i64 tcg_result;
- switch (shift) {
- case 0x0:
- break;
- case 0x1:
+ if (shift) {
imm <<= 12;
- break;
- default:
- unallocated_encoding(s);
- return;
}
tcg_result = tcg_temp_new_i64();
@@ -3788,6 +4192,54 @@ static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
tcg_temp_free_i64(tcg_result);
}
+/*
+ * Add/subtract (immediate, with tags)
+ *
+ * 31 30 29 28 23 22 21 16 14 10 9 5 4 0
+ * +--+--+--+-------------+--+---------+--+-------+-----+-----+
+ * |sf|op| S| 1 0 0 0 1 1 |o2| uimm6 |o3| uimm4 | Rn | Rd |
+ * +--+--+--+-------------+--+---------+--+-------+-----+-----+
+ *
+ * op: 0 -> add, 1 -> sub
+ */
+static void disas_add_sub_imm_with_tags(DisasContext *s, uint32_t insn)
+{
+ int rd = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int uimm4 = extract32(insn, 10, 4);
+ int uimm6 = extract32(insn, 16, 6);
+ bool sub_op = extract32(insn, 30, 1);
+ TCGv_i64 tcg_rn, tcg_rd;
+ int imm;
+
+ /* Test all of sf=1, S=0, o2=0, o3=0. */
+ if ((insn & 0xa040c000u) != 0x80000000u ||
+ !dc_isar_feature(aa64_mte_insn_reg, s)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ imm = uimm6 << LOG2_TAG_GRANULE;
+ if (sub_op) {
+ imm = -imm;
+ }
+
+ tcg_rn = cpu_reg_sp(s, rn);
+ tcg_rd = cpu_reg_sp(s, rd);
+
+ if (s->ata) {
+ TCGv_i32 offset = tcg_const_i32(imm);
+ TCGv_i32 tag_offset = tcg_const_i32(uimm4);
+
+ gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn, offset, tag_offset);
+ tcg_temp_free_i32(tag_offset);
+ tcg_temp_free_i32(offset);
+ } else {
+ tcg_gen_addi_i64(tcg_rd, tcg_rn, imm);
+ gen_address_with_allocation_tag0(tcg_rd, tcg_rd);
+ }
+}
+
/* The input should be a value in the bottom e bits (with higher
* bits zero); returns that value replicated into every element
* of size e in a 64 bit integer.
@@ -4147,9 +4599,12 @@ static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
case 0x20: case 0x21: /* PC-rel. addressing */
disas_pc_rel_adr(s, insn);
break;
- case 0x22: case 0x23: /* Add/subtract (immediate) */
+ case 0x22: /* Add/subtract (immediate) */
disas_add_sub_imm(s, insn);
break;
+ case 0x23: /* Add/subtract (immediate, with tags) */
+ disas_add_sub_imm_with_tags(s, insn);
+ break;
case 0x24: /* Logical (immediate) */
disas_logic_imm(s, insn);
break;
@@ -5244,25 +5699,72 @@ static void handle_crc32(DisasContext *s,
*/
static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
{
- unsigned int sf, rm, opcode, rn, rd;
+ unsigned int sf, rm, opcode, rn, rd, setflag;
sf = extract32(insn, 31, 1);
+ setflag = extract32(insn, 29, 1);
rm = extract32(insn, 16, 5);
opcode = extract32(insn, 10, 6);
rn = extract32(insn, 5, 5);
rd = extract32(insn, 0, 5);
- if (extract32(insn, 29, 1)) {
+ if (setflag && opcode != 0) {
unallocated_encoding(s);
return;
}
switch (opcode) {
+ case 0: /* SUBP(S) */
+ if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
+ goto do_unallocated;
+ } else {
+ TCGv_i64 tcg_n, tcg_m, tcg_d;
+
+ tcg_n = read_cpu_reg_sp(s, rn, true);
+ tcg_m = read_cpu_reg_sp(s, rm, true);
+ tcg_gen_sextract_i64(tcg_n, tcg_n, 0, 56);
+ tcg_gen_sextract_i64(tcg_m, tcg_m, 0, 56);
+ tcg_d = cpu_reg(s, rd);
+
+ if (setflag) {
+ gen_sub_CC(true, tcg_d, tcg_n, tcg_m);
+ } else {
+ tcg_gen_sub_i64(tcg_d, tcg_n, tcg_m);
+ }
+ }
+ break;
case 2: /* UDIV */
handle_div(s, false, sf, rm, rn, rd);
break;
case 3: /* SDIV */
handle_div(s, true, sf, rm, rn, rd);
break;
+ case 4: /* IRG */
+ if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
+ goto do_unallocated;
+ }
+ if (s->ata) {
+ gen_helper_irg(cpu_reg_sp(s, rd), cpu_env,
+ cpu_reg_sp(s, rn), cpu_reg(s, rm));
+ } else {
+ gen_address_with_allocation_tag0(cpu_reg_sp(s, rd),
+ cpu_reg_sp(s, rn));
+ }
+ break;
+ case 5: /* GMI */
+ if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) {
+ goto do_unallocated;
+ } else {
+ TCGv_i64 t1 = tcg_const_i64(1);
+ TCGv_i64 t2 = tcg_temp_new_i64();
+
+ tcg_gen_extract_i64(t2, cpu_reg_sp(s, rn), 56, 4);
+ tcg_gen_shl_i64(t1, t1, t2);
+ tcg_gen_or_i64(cpu_reg(s, rd), cpu_reg(s, rm), t1);
+
+ tcg_temp_free_i64(t1);
+ tcg_temp_free_i64(t2);
+ }
+ break;
case 8: /* LSLV */
handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
break;
@@ -13971,7 +14473,7 @@ static bool is_guarded_page(CPUARMState *env, DisasContext *s)
* table entry even for that case.
*/
return (tlb_hit(entry->addr_code, addr) &&
- env_tlb(env)->d[mmu_idx].iotlb[index].attrs.target_tlb_bit0);
+ arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].iotlb[index].attrs));
#endif
}
@@ -14150,6 +14652,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
dc->mmu_idx = core_to_aa64_mmu_idx(core_mmu_idx);
dc->tbii = FIELD_EX32(tb_flags, TBFLAG_A64, TBII);
dc->tbid = FIELD_EX32(tb_flags, TBFLAG_A64, TBID);
+ dc->tcma = FIELD_EX32(tb_flags, TBFLAG_A64, TCMA);
dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
#if !defined(CONFIG_USER_ONLY)
dc->user = (dc->current_el == 0);
@@ -14161,10 +14664,19 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
dc->bt = FIELD_EX32(tb_flags, TBFLAG_A64, BT);
dc->btype = FIELD_EX32(tb_flags, TBFLAG_A64, BTYPE);
dc->unpriv = FIELD_EX32(tb_flags, TBFLAG_A64, UNPRIV);
+ dc->ata = FIELD_EX32(tb_flags, TBFLAG_A64, ATA);
+ dc->mte_active[0] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE_ACTIVE);
+ dc->mte_active[1] = FIELD_EX32(tb_flags, TBFLAG_A64, MTE0_ACTIVE);
dc->vec_len = 0;
dc->vec_stride = 0;
dc->cp_regs = arm_cpu->cp_regs;
dc->features = env->features;
+ dc->dcz_blocksize = arm_cpu->dcz_blocksize;
+
+#ifdef CONFIG_USER_ONLY
+ /* In sve_probe_page, we assume TBI is enabled. */
+ tcg_debug_assert(dc->tbid & 1);
+#endif
/* Single step state. The code-generation logic here is:
* SS_ACTIVE == 0:
@@ -14292,12 +14804,15 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
gen_goto_tb(dc, 1, dc->base.pc_next);
break;
default:
- case DISAS_UPDATE:
+ case DISAS_UPDATE_EXIT:
gen_a64_set_pc_im(dc->base.pc_next);
/* fall through */
case DISAS_EXIT:
tcg_gen_exit_tb(NULL, 0);
break;
+ case DISAS_UPDATE_NOCHAIN:
+ gen_a64_set_pc_im(dc->base.pc_next);
+ /* fall through */
case DISAS_JUMP:
tcg_gen_lookup_and_goto_ptr();
break;
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
index da0f59a..49e4865 100644
--- a/target/arm/translate-a64.h
+++ b/target/arm/translate-a64.h
@@ -40,6 +40,11 @@ TCGv_ptr get_fpstatus_ptr(bool);
bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
unsigned int imms, unsigned int immr);
bool sve_access_check(DisasContext *s);
+TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr);
+TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
+ bool tag_checked, int log2_size);
+TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
+ bool tag_checked, int count, int log2_esize);
/* We should have at some point before trying to access an FP register
* done the necessary access check, so assert that
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index ac7b311..f318ca2 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -4342,71 +4342,76 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
int len_remain = len % 8;
int nparts = len / 8 + ctpop8(len_remain);
int midx = get_mem_index(s);
- TCGv_i64 addr, t0, t1;
+ TCGv_i64 dirty_addr, clean_addr, t0, t1;
- addr = tcg_temp_new_i64();
- t0 = tcg_temp_new_i64();
+ dirty_addr = tcg_temp_new_i64();
+ tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
+ clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
+ tcg_temp_free_i64(dirty_addr);
- /* Note that unpredicated load/store of vector/predicate registers
+ /*
+ * Note that unpredicated load/store of vector/predicate registers
* are defined as a stream of bytes, which equates to little-endian
- * operations on larger quantities. There is no nice way to force
- * a little-endian load for aarch64_be-linux-user out of line.
- *
+ * operations on larger quantities.
* Attempt to keep code expansion to a minimum by limiting the
* amount of unrolling done.
*/
if (nparts <= 4) {
int i;
+ t0 = tcg_temp_new_i64();
for (i = 0; i < len_align; i += 8) {
- tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
- tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
+ tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
tcg_gen_st_i64(t0, cpu_env, vofs + i);
+ tcg_gen_addi_i64(clean_addr, cpu_reg_sp(s, rn), 8);
}
+ tcg_temp_free_i64(t0);
} else {
TCGLabel *loop = gen_new_label();
TCGv_ptr tp, i = tcg_const_local_ptr(0);
- gen_set_label(loop);
+ /* Copy the clean address into a local temp, live across the loop. */
+ t0 = clean_addr;
+ clean_addr = tcg_temp_local_new_i64();
+ tcg_gen_mov_i64(clean_addr, t0);
+ tcg_temp_free_i64(t0);
- /* Minimize the number of local temps that must be re-read from
- * the stack each iteration. Instead, re-compute values other
- * than the loop counter.
- */
- tp = tcg_temp_new_ptr();
- tcg_gen_addi_ptr(tp, i, imm);
- tcg_gen_extu_ptr_i64(addr, tp);
- tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
+ gen_set_label(loop);
- tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
+ t0 = tcg_temp_new_i64();
+ tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEQ);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 8);
+ tp = tcg_temp_new_ptr();
tcg_gen_add_ptr(tp, cpu_env, i);
tcg_gen_addi_ptr(i, i, 8);
tcg_gen_st_i64(t0, tp, vofs);
tcg_temp_free_ptr(tp);
+ tcg_temp_free_i64(t0);
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
tcg_temp_free_ptr(i);
}
- /* Predicate register loads can be any multiple of 2.
+ /*
+ * Predicate register loads can be any multiple of 2.
* Note that we still store the entire 64-bit unit into cpu_env.
*/
if (len_remain) {
- tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
-
+ t0 = tcg_temp_new_i64();
switch (len_remain) {
case 2:
case 4:
case 8:
- tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
+ tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
+ MO_LE | ctz32(len_remain));
break;
case 6:
t1 = tcg_temp_new_i64();
- tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
- tcg_gen_addi_i64(addr, addr, 4);
- tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
+ tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 4);
+ tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
tcg_temp_free_i64(t1);
break;
@@ -4415,9 +4420,9 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
g_assert_not_reached();
}
tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
+ tcg_temp_free_i64(t0);
}
- tcg_temp_free_i64(addr);
- tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(clean_addr);
}
/* Similarly for stores. */
@@ -4427,10 +4432,12 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
int len_remain = len % 8;
int nparts = len / 8 + ctpop8(len_remain);
int midx = get_mem_index(s);
- TCGv_i64 addr, t0;
+ TCGv_i64 dirty_addr, clean_addr, t0;
- addr = tcg_temp_new_i64();
- t0 = tcg_temp_new_i64();
+ dirty_addr = tcg_temp_new_i64();
+ tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
+ clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
+ tcg_temp_free_i64(dirty_addr);
/* Note that unpredicated load/store of vector/predicate registers
* are defined as a stream of bytes, which equates to little-endian
@@ -4443,33 +4450,35 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
if (nparts <= 4) {
int i;
+ t0 = tcg_temp_new_i64();
for (i = 0; i < len_align; i += 8) {
tcg_gen_ld_i64(t0, cpu_env, vofs + i);
- tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
- tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
+ tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
+ tcg_gen_addi_i64(clean_addr, cpu_reg_sp(s, rn), 8);
}
+ tcg_temp_free_i64(t0);
} else {
TCGLabel *loop = gen_new_label();
- TCGv_ptr t2, i = tcg_const_local_ptr(0);
-
- gen_set_label(loop);
-
- t2 = tcg_temp_new_ptr();
- tcg_gen_add_ptr(t2, cpu_env, i);
- tcg_gen_ld_i64(t0, t2, vofs);
+ TCGv_ptr tp, i = tcg_const_local_ptr(0);
- /* Minimize the number of local temps that must be re-read from
- * the stack each iteration. Instead, re-compute values other
- * than the loop counter.
- */
- tcg_gen_addi_ptr(t2, i, imm);
- tcg_gen_extu_ptr_i64(addr, t2);
- tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
- tcg_temp_free_ptr(t2);
+ /* Copy the clean address into a local temp, live across the loop. */
+ t0 = clean_addr;
+ clean_addr = tcg_temp_local_new_i64();
+ tcg_gen_mov_i64(clean_addr, t0);
+ tcg_temp_free_i64(t0);
- tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
+ gen_set_label(loop);
+ t0 = tcg_temp_new_i64();
+ tp = tcg_temp_new_ptr();
+ tcg_gen_add_ptr(tp, cpu_env, i);
+ tcg_gen_ld_i64(t0, tp, vofs);
tcg_gen_addi_ptr(i, i, 8);
+ tcg_temp_free_ptr(tp);
+
+ tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEQ);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 8);
+ tcg_temp_free_i64(t0);
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
tcg_temp_free_ptr(i);
@@ -4477,29 +4486,30 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
/* Predicate register stores can be any multiple of 2. */
if (len_remain) {
+ t0 = tcg_temp_new_i64();
tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
- tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
switch (len_remain) {
case 2:
case 4:
case 8:
- tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
+ tcg_gen_qemu_st_i64(t0, clean_addr, midx,
+ MO_LE | ctz32(len_remain));
break;
case 6:
- tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
- tcg_gen_addi_i64(addr, addr, 4);
+ tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 4);
tcg_gen_shri_i64(t0, t0, 32);
- tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
+ tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
break;
default:
g_assert_not_reached();
}
+ tcg_temp_free_i64(t0);
}
- tcg_temp_free_i64(addr);
- tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(clean_addr);
}
static bool trans_LDR_zri(DisasContext *s, arg_rri *a)
@@ -4565,18 +4575,34 @@ static const uint8_t dtype_esz[16] = {
};
static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
- int dtype, gen_helper_gvec_mem *fn)
+ int dtype, uint32_t mte_n, bool is_write,
+ gen_helper_gvec_mem *fn)
{
unsigned vsz = vec_full_reg_size(s);
TCGv_ptr t_pg;
TCGv_i32 t_desc;
- int desc;
+ int desc = 0;
- /* For e.g. LD4, there are not enough arguments to pass all 4
+ /*
+ * For e.g. LD4, there are not enough arguments to pass all 4
* registers as pointers, so encode the regno into the data field.
* For consistency, do this even for LD1.
*/
- desc = simd_desc(vsz, vsz, zt);
+ if (s->mte_active[0]) {
+ int msz = dtype_msz(dtype);
+
+ desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
+ desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
+ desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
+ desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
+ desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz);
+ desc = FIELD_DP32(desc, MTEDESC, TSIZE, mte_n << msz);
+ desc <<= SVE_MTEDESC_SHIFT;
+ } else {
+ addr = clean_data_tbi(s, addr);
+ }
+
+ desc = simd_desc(vsz, vsz, zt | desc);
t_desc = tcg_const_i32(desc);
t_pg = tcg_temp_new_ptr();
@@ -4590,64 +4616,132 @@ static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
static void do_ld_zpa(DisasContext *s, int zt, int pg,
TCGv_i64 addr, int dtype, int nreg)
{
- static gen_helper_gvec_mem * const fns[2][16][4] = {
- /* Little-endian */
- { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
+ static gen_helper_gvec_mem * const fns[2][2][16][4] = {
+ { /* mte inactive, little-endian */
+ { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
- { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
-
- { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
- gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
- { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
-
- { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
- gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
- { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
-
- { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
- gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
-
- /* Big-endian */
- { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
- gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
- { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
-
- { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
- gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
- { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
-
- { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
- gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
- { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
-
- { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
- { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
- gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } }
+ { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
+ gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
+ { gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
+ gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
+ { gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
+ gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
+
+ /* mte inactive, big-endian */
+ { { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
+ gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
+ { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
+ gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
+ { gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
+ gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
+ { gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
+ { gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
+ gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } } },
+
+ { /* mte active, little-endian */
+ { { gen_helper_sve_ld1bb_r_mte,
+ gen_helper_sve_ld2bb_r_mte,
+ gen_helper_sve_ld3bb_r_mte,
+ gen_helper_sve_ld4bb_r_mte },
+ { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1sds_le_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hh_le_r_mte,
+ gen_helper_sve_ld2hh_le_r_mte,
+ gen_helper_sve_ld3hh_le_r_mte,
+ gen_helper_sve_ld4hh_le_r_mte },
+ { gen_helper_sve_ld1hsu_le_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hdu_le_r_mte, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1hds_le_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hss_le_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1ss_le_r_mte,
+ gen_helper_sve_ld2ss_le_r_mte,
+ gen_helper_sve_ld3ss_le_r_mte,
+ gen_helper_sve_ld4ss_le_r_mte },
+ { gen_helper_sve_ld1sdu_le_r_mte, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1dd_le_r_mte,
+ gen_helper_sve_ld2dd_le_r_mte,
+ gen_helper_sve_ld3dd_le_r_mte,
+ gen_helper_sve_ld4dd_le_r_mte } },
+
+ /* mte active, big-endian */
+ { { gen_helper_sve_ld1bb_r_mte,
+ gen_helper_sve_ld2bb_r_mte,
+ gen_helper_sve_ld3bb_r_mte,
+ gen_helper_sve_ld4bb_r_mte },
+ { gen_helper_sve_ld1bhu_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bsu_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bdu_r_mte, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1sds_be_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hh_be_r_mte,
+ gen_helper_sve_ld2hh_be_r_mte,
+ gen_helper_sve_ld3hh_be_r_mte,
+ gen_helper_sve_ld4hh_be_r_mte },
+ { gen_helper_sve_ld1hsu_be_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hdu_be_r_mte, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1hds_be_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1hss_be_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1ss_be_r_mte,
+ gen_helper_sve_ld2ss_be_r_mte,
+ gen_helper_sve_ld3ss_be_r_mte,
+ gen_helper_sve_ld4ss_be_r_mte },
+ { gen_helper_sve_ld1sdu_be_r_mte, NULL, NULL, NULL },
+
+ { gen_helper_sve_ld1bds_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bss_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1bhs_r_mte, NULL, NULL, NULL },
+ { gen_helper_sve_ld1dd_be_r_mte,
+ gen_helper_sve_ld2dd_be_r_mte,
+ gen_helper_sve_ld3dd_be_r_mte,
+ gen_helper_sve_ld4dd_be_r_mte } } },
};
- gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg];
+ gen_helper_gvec_mem *fn
+ = fns[s->mte_active[0]][s->be_data == MO_BE][dtype][nreg];
- /* While there are holes in the table, they are not
+ /*
+ * While there are holes in the table, they are not
* accessible via the instruction encoding.
*/
assert(fn != NULL);
- do_mem_zpa(s, zt, pg, addr, dtype, fn);
+ do_mem_zpa(s, zt, pg, addr, dtype, nreg, false, fn);
}
static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a)
@@ -4681,104 +4775,188 @@ static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a)
static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a)
{
- static gen_helper_gvec_mem * const fns[2][16] = {
- /* Little-endian */
- { gen_helper_sve_ldff1bb_r,
- gen_helper_sve_ldff1bhu_r,
- gen_helper_sve_ldff1bsu_r,
- gen_helper_sve_ldff1bdu_r,
-
- gen_helper_sve_ldff1sds_le_r,
- gen_helper_sve_ldff1hh_le_r,
- gen_helper_sve_ldff1hsu_le_r,
- gen_helper_sve_ldff1hdu_le_r,
-
- gen_helper_sve_ldff1hds_le_r,
- gen_helper_sve_ldff1hss_le_r,
- gen_helper_sve_ldff1ss_le_r,
- gen_helper_sve_ldff1sdu_le_r,
-
- gen_helper_sve_ldff1bds_r,
- gen_helper_sve_ldff1bss_r,
- gen_helper_sve_ldff1bhs_r,
- gen_helper_sve_ldff1dd_le_r },
-
- /* Big-endian */
- { gen_helper_sve_ldff1bb_r,
- gen_helper_sve_ldff1bhu_r,
- gen_helper_sve_ldff1bsu_r,
- gen_helper_sve_ldff1bdu_r,
-
- gen_helper_sve_ldff1sds_be_r,
- gen_helper_sve_ldff1hh_be_r,
- gen_helper_sve_ldff1hsu_be_r,
- gen_helper_sve_ldff1hdu_be_r,
-
- gen_helper_sve_ldff1hds_be_r,
- gen_helper_sve_ldff1hss_be_r,
- gen_helper_sve_ldff1ss_be_r,
- gen_helper_sve_ldff1sdu_be_r,
-
- gen_helper_sve_ldff1bds_r,
- gen_helper_sve_ldff1bss_r,
- gen_helper_sve_ldff1bhs_r,
- gen_helper_sve_ldff1dd_be_r },
+ static gen_helper_gvec_mem * const fns[2][2][16] = {
+ { /* mte inactive, little-endian */
+ { gen_helper_sve_ldff1bb_r,
+ gen_helper_sve_ldff1bhu_r,
+ gen_helper_sve_ldff1bsu_r,
+ gen_helper_sve_ldff1bdu_r,
+
+ gen_helper_sve_ldff1sds_le_r,
+ gen_helper_sve_ldff1hh_le_r,
+ gen_helper_sve_ldff1hsu_le_r,
+ gen_helper_sve_ldff1hdu_le_r,
+
+ gen_helper_sve_ldff1hds_le_r,
+ gen_helper_sve_ldff1hss_le_r,
+ gen_helper_sve_ldff1ss_le_r,
+ gen_helper_sve_ldff1sdu_le_r,
+
+ gen_helper_sve_ldff1bds_r,
+ gen_helper_sve_ldff1bss_r,
+ gen_helper_sve_ldff1bhs_r,
+ gen_helper_sve_ldff1dd_le_r },
+
+ /* mte inactive, big-endian */
+ { gen_helper_sve_ldff1bb_r,
+ gen_helper_sve_ldff1bhu_r,
+ gen_helper_sve_ldff1bsu_r,
+ gen_helper_sve_ldff1bdu_r,
+
+ gen_helper_sve_ldff1sds_be_r,
+ gen_helper_sve_ldff1hh_be_r,
+ gen_helper_sve_ldff1hsu_be_r,
+ gen_helper_sve_ldff1hdu_be_r,
+
+ gen_helper_sve_ldff1hds_be_r,
+ gen_helper_sve_ldff1hss_be_r,
+ gen_helper_sve_ldff1ss_be_r,
+ gen_helper_sve_ldff1sdu_be_r,
+
+ gen_helper_sve_ldff1bds_r,
+ gen_helper_sve_ldff1bss_r,
+ gen_helper_sve_ldff1bhs_r,
+ gen_helper_sve_ldff1dd_be_r } },
+
+ { /* mte active, little-endian */
+ { gen_helper_sve_ldff1bb_r_mte,
+ gen_helper_sve_ldff1bhu_r_mte,
+ gen_helper_sve_ldff1bsu_r_mte,
+ gen_helper_sve_ldff1bdu_r_mte,
+
+ gen_helper_sve_ldff1sds_le_r_mte,
+ gen_helper_sve_ldff1hh_le_r_mte,
+ gen_helper_sve_ldff1hsu_le_r_mte,
+ gen_helper_sve_ldff1hdu_le_r_mte,
+
+ gen_helper_sve_ldff1hds_le_r_mte,
+ gen_helper_sve_ldff1hss_le_r_mte,
+ gen_helper_sve_ldff1ss_le_r_mte,
+ gen_helper_sve_ldff1sdu_le_r_mte,
+
+ gen_helper_sve_ldff1bds_r_mte,
+ gen_helper_sve_ldff1bss_r_mte,
+ gen_helper_sve_ldff1bhs_r_mte,
+ gen_helper_sve_ldff1dd_le_r_mte },
+
+ /* mte active, big-endian */
+ { gen_helper_sve_ldff1bb_r_mte,
+ gen_helper_sve_ldff1bhu_r_mte,
+ gen_helper_sve_ldff1bsu_r_mte,
+ gen_helper_sve_ldff1bdu_r_mte,
+
+ gen_helper_sve_ldff1sds_be_r_mte,
+ gen_helper_sve_ldff1hh_be_r_mte,
+ gen_helper_sve_ldff1hsu_be_r_mte,
+ gen_helper_sve_ldff1hdu_be_r_mte,
+
+ gen_helper_sve_ldff1hds_be_r_mte,
+ gen_helper_sve_ldff1hss_be_r_mte,
+ gen_helper_sve_ldff1ss_be_r_mte,
+ gen_helper_sve_ldff1sdu_be_r_mte,
+
+ gen_helper_sve_ldff1bds_r_mte,
+ gen_helper_sve_ldff1bss_r_mte,
+ gen_helper_sve_ldff1bhs_r_mte,
+ gen_helper_sve_ldff1dd_be_r_mte } },
};
if (sve_access_check(s)) {
TCGv_i64 addr = new_tmp_a64(s);
tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
- do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
- fns[s->be_data == MO_BE][a->dtype]);
+ do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
+ fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
}
return true;
}
static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
{
- static gen_helper_gvec_mem * const fns[2][16] = {
- /* Little-endian */
- { gen_helper_sve_ldnf1bb_r,
- gen_helper_sve_ldnf1bhu_r,
- gen_helper_sve_ldnf1bsu_r,
- gen_helper_sve_ldnf1bdu_r,
-
- gen_helper_sve_ldnf1sds_le_r,
- gen_helper_sve_ldnf1hh_le_r,
- gen_helper_sve_ldnf1hsu_le_r,
- gen_helper_sve_ldnf1hdu_le_r,
-
- gen_helper_sve_ldnf1hds_le_r,
- gen_helper_sve_ldnf1hss_le_r,
- gen_helper_sve_ldnf1ss_le_r,
- gen_helper_sve_ldnf1sdu_le_r,
-
- gen_helper_sve_ldnf1bds_r,
- gen_helper_sve_ldnf1bss_r,
- gen_helper_sve_ldnf1bhs_r,
- gen_helper_sve_ldnf1dd_le_r },
-
- /* Big-endian */
- { gen_helper_sve_ldnf1bb_r,
- gen_helper_sve_ldnf1bhu_r,
- gen_helper_sve_ldnf1bsu_r,
- gen_helper_sve_ldnf1bdu_r,
-
- gen_helper_sve_ldnf1sds_be_r,
- gen_helper_sve_ldnf1hh_be_r,
- gen_helper_sve_ldnf1hsu_be_r,
- gen_helper_sve_ldnf1hdu_be_r,
-
- gen_helper_sve_ldnf1hds_be_r,
- gen_helper_sve_ldnf1hss_be_r,
- gen_helper_sve_ldnf1ss_be_r,
- gen_helper_sve_ldnf1sdu_be_r,
-
- gen_helper_sve_ldnf1bds_r,
- gen_helper_sve_ldnf1bss_r,
- gen_helper_sve_ldnf1bhs_r,
- gen_helper_sve_ldnf1dd_be_r },
+ static gen_helper_gvec_mem * const fns[2][2][16] = {
+ { /* mte inactive, little-endian */
+ { gen_helper_sve_ldnf1bb_r,
+ gen_helper_sve_ldnf1bhu_r,
+ gen_helper_sve_ldnf1bsu_r,
+ gen_helper_sve_ldnf1bdu_r,
+
+ gen_helper_sve_ldnf1sds_le_r,
+ gen_helper_sve_ldnf1hh_le_r,
+ gen_helper_sve_ldnf1hsu_le_r,
+ gen_helper_sve_ldnf1hdu_le_r,
+
+ gen_helper_sve_ldnf1hds_le_r,
+ gen_helper_sve_ldnf1hss_le_r,
+ gen_helper_sve_ldnf1ss_le_r,
+ gen_helper_sve_ldnf1sdu_le_r,
+
+ gen_helper_sve_ldnf1bds_r,
+ gen_helper_sve_ldnf1bss_r,
+ gen_helper_sve_ldnf1bhs_r,
+ gen_helper_sve_ldnf1dd_le_r },
+
+ /* mte inactive, big-endian */
+ { gen_helper_sve_ldnf1bb_r,
+ gen_helper_sve_ldnf1bhu_r,
+ gen_helper_sve_ldnf1bsu_r,
+ gen_helper_sve_ldnf1bdu_r,
+
+ gen_helper_sve_ldnf1sds_be_r,
+ gen_helper_sve_ldnf1hh_be_r,
+ gen_helper_sve_ldnf1hsu_be_r,
+ gen_helper_sve_ldnf1hdu_be_r,
+
+ gen_helper_sve_ldnf1hds_be_r,
+ gen_helper_sve_ldnf1hss_be_r,
+ gen_helper_sve_ldnf1ss_be_r,
+ gen_helper_sve_ldnf1sdu_be_r,
+
+ gen_helper_sve_ldnf1bds_r,
+ gen_helper_sve_ldnf1bss_r,
+ gen_helper_sve_ldnf1bhs_r,
+ gen_helper_sve_ldnf1dd_be_r } },
+
+ { /* mte inactive, little-endian */
+ { gen_helper_sve_ldnf1bb_r_mte,
+ gen_helper_sve_ldnf1bhu_r_mte,
+ gen_helper_sve_ldnf1bsu_r_mte,
+ gen_helper_sve_ldnf1bdu_r_mte,
+
+ gen_helper_sve_ldnf1sds_le_r_mte,
+ gen_helper_sve_ldnf1hh_le_r_mte,
+ gen_helper_sve_ldnf1hsu_le_r_mte,
+ gen_helper_sve_ldnf1hdu_le_r_mte,
+
+ gen_helper_sve_ldnf1hds_le_r_mte,
+ gen_helper_sve_ldnf1hss_le_r_mte,
+ gen_helper_sve_ldnf1ss_le_r_mte,
+ gen_helper_sve_ldnf1sdu_le_r_mte,
+
+ gen_helper_sve_ldnf1bds_r_mte,
+ gen_helper_sve_ldnf1bss_r_mte,
+ gen_helper_sve_ldnf1bhs_r_mte,
+ gen_helper_sve_ldnf1dd_le_r_mte },
+
+ /* mte inactive, big-endian */
+ { gen_helper_sve_ldnf1bb_r_mte,
+ gen_helper_sve_ldnf1bhu_r_mte,
+ gen_helper_sve_ldnf1bsu_r_mte,
+ gen_helper_sve_ldnf1bdu_r_mte,
+
+ gen_helper_sve_ldnf1sds_be_r_mte,
+ gen_helper_sve_ldnf1hh_be_r_mte,
+ gen_helper_sve_ldnf1hsu_be_r_mte,
+ gen_helper_sve_ldnf1hdu_be_r_mte,
+
+ gen_helper_sve_ldnf1hds_be_r_mte,
+ gen_helper_sve_ldnf1hss_be_r_mte,
+ gen_helper_sve_ldnf1ss_be_r_mte,
+ gen_helper_sve_ldnf1sdu_be_r_mte,
+
+ gen_helper_sve_ldnf1bds_r_mte,
+ gen_helper_sve_ldnf1bss_r_mte,
+ gen_helper_sve_ldnf1bhs_r_mte,
+ gen_helper_sve_ldnf1dd_be_r_mte } },
};
if (sve_access_check(s)) {
@@ -4788,8 +4966,8 @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a)
TCGv_i64 addr = new_tmp_a64(s);
tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
- do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
- fns[s->be_data == MO_BE][a->dtype]);
+ do_mem_zpa(s, a->rd, a->pg, addr, a->dtype, 1, false,
+ fns[s->mte_active[0]][s->be_data == MO_BE][a->dtype]);
}
return true;
}
@@ -4873,16 +5051,18 @@ static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a)
/* Load and broadcast element. */
static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
{
- if (!sve_access_check(s)) {
- return true;
- }
-
unsigned vsz = vec_full_reg_size(s);
unsigned psz = pred_full_reg_size(s);
unsigned esz = dtype_esz[a->dtype];
unsigned msz = dtype_msz(a->dtype);
- TCGLabel *over = gen_new_label();
- TCGv_i64 temp;
+ TCGLabel *over;
+ TCGv_i64 temp, clean_addr;
+
+ if (!sve_access_check(s)) {
+ return true;
+ }
+
+ over = gen_new_label();
/* If the guarding predicate has no bits set, no load occurs. */
if (psz <= 8) {
@@ -4905,7 +5085,9 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
/* Load the data. */
temp = tcg_temp_new_i64();
tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
- tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
+ clean_addr = gen_mte_check1(s, temp, false, true, msz);
+
+ tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
s->be_data | dtype_mop[a->dtype]);
/* Broadcast to *all* elements. */
@@ -4922,73 +5104,125 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
int msz, int esz, int nreg)
{
- static gen_helper_gvec_mem * const fn_single[2][4][4] = {
- { { gen_helper_sve_st1bb_r,
- gen_helper_sve_st1bh_r,
- gen_helper_sve_st1bs_r,
- gen_helper_sve_st1bd_r },
- { NULL,
- gen_helper_sve_st1hh_le_r,
- gen_helper_sve_st1hs_le_r,
- gen_helper_sve_st1hd_le_r },
- { NULL, NULL,
- gen_helper_sve_st1ss_le_r,
- gen_helper_sve_st1sd_le_r },
- { NULL, NULL, NULL,
- gen_helper_sve_st1dd_le_r } },
- { { gen_helper_sve_st1bb_r,
- gen_helper_sve_st1bh_r,
- gen_helper_sve_st1bs_r,
- gen_helper_sve_st1bd_r },
- { NULL,
- gen_helper_sve_st1hh_be_r,
- gen_helper_sve_st1hs_be_r,
- gen_helper_sve_st1hd_be_r },
- { NULL, NULL,
- gen_helper_sve_st1ss_be_r,
- gen_helper_sve_st1sd_be_r },
- { NULL, NULL, NULL,
- gen_helper_sve_st1dd_be_r } },
+ static gen_helper_gvec_mem * const fn_single[2][2][4][4] = {
+ { { { gen_helper_sve_st1bb_r,
+ gen_helper_sve_st1bh_r,
+ gen_helper_sve_st1bs_r,
+ gen_helper_sve_st1bd_r },
+ { NULL,
+ gen_helper_sve_st1hh_le_r,
+ gen_helper_sve_st1hs_le_r,
+ gen_helper_sve_st1hd_le_r },
+ { NULL, NULL,
+ gen_helper_sve_st1ss_le_r,
+ gen_helper_sve_st1sd_le_r },
+ { NULL, NULL, NULL,
+ gen_helper_sve_st1dd_le_r } },
+ { { gen_helper_sve_st1bb_r,
+ gen_helper_sve_st1bh_r,
+ gen_helper_sve_st1bs_r,
+ gen_helper_sve_st1bd_r },
+ { NULL,
+ gen_helper_sve_st1hh_be_r,
+ gen_helper_sve_st1hs_be_r,
+ gen_helper_sve_st1hd_be_r },
+ { NULL, NULL,
+ gen_helper_sve_st1ss_be_r,
+ gen_helper_sve_st1sd_be_r },
+ { NULL, NULL, NULL,
+ gen_helper_sve_st1dd_be_r } } },
+
+ { { { gen_helper_sve_st1bb_r_mte,
+ gen_helper_sve_st1bh_r_mte,
+ gen_helper_sve_st1bs_r_mte,
+ gen_helper_sve_st1bd_r_mte },
+ { NULL,
+ gen_helper_sve_st1hh_le_r_mte,
+ gen_helper_sve_st1hs_le_r_mte,
+ gen_helper_sve_st1hd_le_r_mte },
+ { NULL, NULL,
+ gen_helper_sve_st1ss_le_r_mte,
+ gen_helper_sve_st1sd_le_r_mte },
+ { NULL, NULL, NULL,
+ gen_helper_sve_st1dd_le_r_mte } },
+ { { gen_helper_sve_st1bb_r_mte,
+ gen_helper_sve_st1bh_r_mte,
+ gen_helper_sve_st1bs_r_mte,
+ gen_helper_sve_st1bd_r_mte },
+ { NULL,
+ gen_helper_sve_st1hh_be_r_mte,
+ gen_helper_sve_st1hs_be_r_mte,
+ gen_helper_sve_st1hd_be_r_mte },
+ { NULL, NULL,
+ gen_helper_sve_st1ss_be_r_mte,
+ gen_helper_sve_st1sd_be_r_mte },
+ { NULL, NULL, NULL,
+ gen_helper_sve_st1dd_be_r_mte } } },
};
- static gen_helper_gvec_mem * const fn_multiple[2][3][4] = {
- { { gen_helper_sve_st2bb_r,
- gen_helper_sve_st2hh_le_r,
- gen_helper_sve_st2ss_le_r,
- gen_helper_sve_st2dd_le_r },
- { gen_helper_sve_st3bb_r,
- gen_helper_sve_st3hh_le_r,
- gen_helper_sve_st3ss_le_r,
- gen_helper_sve_st3dd_le_r },
- { gen_helper_sve_st4bb_r,
- gen_helper_sve_st4hh_le_r,
- gen_helper_sve_st4ss_le_r,
- gen_helper_sve_st4dd_le_r } },
- { { gen_helper_sve_st2bb_r,
- gen_helper_sve_st2hh_be_r,
- gen_helper_sve_st2ss_be_r,
- gen_helper_sve_st2dd_be_r },
- { gen_helper_sve_st3bb_r,
- gen_helper_sve_st3hh_be_r,
- gen_helper_sve_st3ss_be_r,
- gen_helper_sve_st3dd_be_r },
- { gen_helper_sve_st4bb_r,
- gen_helper_sve_st4hh_be_r,
- gen_helper_sve_st4ss_be_r,
- gen_helper_sve_st4dd_be_r } },
+ static gen_helper_gvec_mem * const fn_multiple[2][2][3][4] = {
+ { { { gen_helper_sve_st2bb_r,
+ gen_helper_sve_st2hh_le_r,
+ gen_helper_sve_st2ss_le_r,
+ gen_helper_sve_st2dd_le_r },
+ { gen_helper_sve_st3bb_r,
+ gen_helper_sve_st3hh_le_r,
+ gen_helper_sve_st3ss_le_r,
+ gen_helper_sve_st3dd_le_r },
+ { gen_helper_sve_st4bb_r,
+ gen_helper_sve_st4hh_le_r,
+ gen_helper_sve_st4ss_le_r,
+ gen_helper_sve_st4dd_le_r } },
+ { { gen_helper_sve_st2bb_r,
+ gen_helper_sve_st2hh_be_r,
+ gen_helper_sve_st2ss_be_r,
+ gen_helper_sve_st2dd_be_r },
+ { gen_helper_sve_st3bb_r,
+ gen_helper_sve_st3hh_be_r,
+ gen_helper_sve_st3ss_be_r,
+ gen_helper_sve_st3dd_be_r },
+ { gen_helper_sve_st4bb_r,
+ gen_helper_sve_st4hh_be_r,
+ gen_helper_sve_st4ss_be_r,
+ gen_helper_sve_st4dd_be_r } } },
+ { { { gen_helper_sve_st2bb_r_mte,
+ gen_helper_sve_st2hh_le_r_mte,
+ gen_helper_sve_st2ss_le_r_mte,
+ gen_helper_sve_st2dd_le_r_mte },
+ { gen_helper_sve_st3bb_r_mte,
+ gen_helper_sve_st3hh_le_r_mte,
+ gen_helper_sve_st3ss_le_r_mte,
+ gen_helper_sve_st3dd_le_r_mte },
+ { gen_helper_sve_st4bb_r_mte,
+ gen_helper_sve_st4hh_le_r_mte,
+ gen_helper_sve_st4ss_le_r_mte,
+ gen_helper_sve_st4dd_le_r_mte } },
+ { { gen_helper_sve_st2bb_r_mte,
+ gen_helper_sve_st2hh_be_r_mte,
+ gen_helper_sve_st2ss_be_r_mte,
+ gen_helper_sve_st2dd_be_r_mte },
+ { gen_helper_sve_st3bb_r_mte,
+ gen_helper_sve_st3hh_be_r_mte,
+ gen_helper_sve_st3ss_be_r_mte,
+ gen_helper_sve_st3dd_be_r_mte },
+ { gen_helper_sve_st4bb_r_mte,
+ gen_helper_sve_st4hh_be_r_mte,
+ gen_helper_sve_st4ss_be_r_mte,
+ gen_helper_sve_st4dd_be_r_mte } } },
};
gen_helper_gvec_mem *fn;
int be = s->be_data == MO_BE;
if (nreg == 0) {
/* ST1 */
- fn = fn_single[be][msz][esz];
+ fn = fn_single[s->mte_active[0]][be][msz][esz];
+ nreg = 1;
} else {
/* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
assert(msz == esz);
- fn = fn_multiple[be][nreg - 1][msz];
+ fn = fn_multiple[s->mte_active[0]][be][nreg - 1][msz];
}
assert(fn != NULL);
- do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), fn);
+ do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), nreg, true, fn);
}
static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a)
@@ -5027,7 +5261,7 @@ static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a)
*/
static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
- int scale, TCGv_i64 scalar, int msz,
+ int scale, TCGv_i64 scalar, int msz, bool is_write,
gen_helper_gvec_mem_scatter *fn)
{
unsigned vsz = vec_full_reg_size(s);
@@ -5035,8 +5269,16 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
TCGv_ptr t_pg = tcg_temp_new_ptr();
TCGv_ptr t_zt = tcg_temp_new_ptr();
TCGv_i32 t_desc;
- int desc;
+ int desc = 0;
+ if (s->mte_active[0]) {
+ desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
+ desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
+ desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
+ desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
+ desc = FIELD_DP32(desc, MTEDESC, ESIZE, 1 << msz);
+ desc <<= SVE_MTEDESC_SHIFT;
+ }
desc = simd_desc(vsz, vsz, scale);
t_desc = tcg_const_i32(desc);
@@ -5051,176 +5293,339 @@ static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
tcg_temp_free_i32(t_desc);
}
-/* Indexed by [be][ff][xs][u][msz]. */
-static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][2][3] = {
- /* Little-endian */
- { { { { gen_helper_sve_ldbss_zsu,
- gen_helper_sve_ldhss_le_zsu,
- NULL, },
- { gen_helper_sve_ldbsu_zsu,
- gen_helper_sve_ldhsu_le_zsu,
- gen_helper_sve_ldss_le_zsu, } },
- { { gen_helper_sve_ldbss_zss,
- gen_helper_sve_ldhss_le_zss,
- NULL, },
- { gen_helper_sve_ldbsu_zss,
- gen_helper_sve_ldhsu_le_zss,
- gen_helper_sve_ldss_le_zss, } } },
-
- /* First-fault */
- { { { gen_helper_sve_ldffbss_zsu,
- gen_helper_sve_ldffhss_le_zsu,
- NULL, },
- { gen_helper_sve_ldffbsu_zsu,
- gen_helper_sve_ldffhsu_le_zsu,
- gen_helper_sve_ldffss_le_zsu, } },
- { { gen_helper_sve_ldffbss_zss,
- gen_helper_sve_ldffhss_le_zss,
- NULL, },
- { gen_helper_sve_ldffbsu_zss,
- gen_helper_sve_ldffhsu_le_zss,
- gen_helper_sve_ldffss_le_zss, } } } },
-
- /* Big-endian */
- { { { { gen_helper_sve_ldbss_zsu,
- gen_helper_sve_ldhss_be_zsu,
- NULL, },
- { gen_helper_sve_ldbsu_zsu,
- gen_helper_sve_ldhsu_be_zsu,
- gen_helper_sve_ldss_be_zsu, } },
- { { gen_helper_sve_ldbss_zss,
- gen_helper_sve_ldhss_be_zss,
- NULL, },
- { gen_helper_sve_ldbsu_zss,
- gen_helper_sve_ldhsu_be_zss,
- gen_helper_sve_ldss_be_zss, } } },
-
- /* First-fault */
- { { { gen_helper_sve_ldffbss_zsu,
- gen_helper_sve_ldffhss_be_zsu,
- NULL, },
- { gen_helper_sve_ldffbsu_zsu,
- gen_helper_sve_ldffhsu_be_zsu,
- gen_helper_sve_ldffss_be_zsu, } },
- { { gen_helper_sve_ldffbss_zss,
- gen_helper_sve_ldffhss_be_zss,
- NULL, },
- { gen_helper_sve_ldffbsu_zss,
- gen_helper_sve_ldffhsu_be_zss,
- gen_helper_sve_ldffss_be_zss, } } } },
+/* Indexed by [mte][be][ff][xs][u][msz]. */
+static gen_helper_gvec_mem_scatter * const
+gather_load_fn32[2][2][2][2][2][3] = {
+ { /* MTE Inactive */
+ { /* Little-endian */
+ { { { gen_helper_sve_ldbss_zsu,
+ gen_helper_sve_ldhss_le_zsu,
+ NULL, },
+ { gen_helper_sve_ldbsu_zsu,
+ gen_helper_sve_ldhsu_le_zsu,
+ gen_helper_sve_ldss_le_zsu, } },
+ { { gen_helper_sve_ldbss_zss,
+ gen_helper_sve_ldhss_le_zss,
+ NULL, },
+ { gen_helper_sve_ldbsu_zss,
+ gen_helper_sve_ldhsu_le_zss,
+ gen_helper_sve_ldss_le_zss, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbss_zsu,
+ gen_helper_sve_ldffhss_le_zsu,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zsu,
+ gen_helper_sve_ldffhsu_le_zsu,
+ gen_helper_sve_ldffss_le_zsu, } },
+ { { gen_helper_sve_ldffbss_zss,
+ gen_helper_sve_ldffhss_le_zss,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zss,
+ gen_helper_sve_ldffhsu_le_zss,
+ gen_helper_sve_ldffss_le_zss, } } } },
+
+ { /* Big-endian */
+ { { { gen_helper_sve_ldbss_zsu,
+ gen_helper_sve_ldhss_be_zsu,
+ NULL, },
+ { gen_helper_sve_ldbsu_zsu,
+ gen_helper_sve_ldhsu_be_zsu,
+ gen_helper_sve_ldss_be_zsu, } },
+ { { gen_helper_sve_ldbss_zss,
+ gen_helper_sve_ldhss_be_zss,
+ NULL, },
+ { gen_helper_sve_ldbsu_zss,
+ gen_helper_sve_ldhsu_be_zss,
+ gen_helper_sve_ldss_be_zss, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbss_zsu,
+ gen_helper_sve_ldffhss_be_zsu,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zsu,
+ gen_helper_sve_ldffhsu_be_zsu,
+ gen_helper_sve_ldffss_be_zsu, } },
+ { { gen_helper_sve_ldffbss_zss,
+ gen_helper_sve_ldffhss_be_zss,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zss,
+ gen_helper_sve_ldffhsu_be_zss,
+ gen_helper_sve_ldffss_be_zss, } } } } },
+ { /* MTE Active */
+ { /* Little-endian */
+ { { { gen_helper_sve_ldbss_zsu_mte,
+ gen_helper_sve_ldhss_le_zsu_mte,
+ NULL, },
+ { gen_helper_sve_ldbsu_zsu_mte,
+ gen_helper_sve_ldhsu_le_zsu_mte,
+ gen_helper_sve_ldss_le_zsu_mte, } },
+ { { gen_helper_sve_ldbss_zss_mte,
+ gen_helper_sve_ldhss_le_zss_mte,
+ NULL, },
+ { gen_helper_sve_ldbsu_zss_mte,
+ gen_helper_sve_ldhsu_le_zss_mte,
+ gen_helper_sve_ldss_le_zss_mte, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbss_zsu_mte,
+ gen_helper_sve_ldffhss_le_zsu_mte,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zsu_mte,
+ gen_helper_sve_ldffhsu_le_zsu_mte,
+ gen_helper_sve_ldffss_le_zsu_mte, } },
+ { { gen_helper_sve_ldffbss_zss_mte,
+ gen_helper_sve_ldffhss_le_zss_mte,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zss_mte,
+ gen_helper_sve_ldffhsu_le_zss_mte,
+ gen_helper_sve_ldffss_le_zss_mte, } } } },
+
+ { /* Big-endian */
+ { { { gen_helper_sve_ldbss_zsu_mte,
+ gen_helper_sve_ldhss_be_zsu_mte,
+ NULL, },
+ { gen_helper_sve_ldbsu_zsu_mte,
+ gen_helper_sve_ldhsu_be_zsu_mte,
+ gen_helper_sve_ldss_be_zsu_mte, } },
+ { { gen_helper_sve_ldbss_zss_mte,
+ gen_helper_sve_ldhss_be_zss_mte,
+ NULL, },
+ { gen_helper_sve_ldbsu_zss_mte,
+ gen_helper_sve_ldhsu_be_zss_mte,
+ gen_helper_sve_ldss_be_zss_mte, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbss_zsu_mte,
+ gen_helper_sve_ldffhss_be_zsu_mte,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zsu_mte,
+ gen_helper_sve_ldffhsu_be_zsu_mte,
+ gen_helper_sve_ldffss_be_zsu_mte, } },
+ { { gen_helper_sve_ldffbss_zss_mte,
+ gen_helper_sve_ldffhss_be_zss_mte,
+ NULL, },
+ { gen_helper_sve_ldffbsu_zss_mte,
+ gen_helper_sve_ldffhsu_be_zss_mte,
+ gen_helper_sve_ldffss_be_zss_mte, } } } } },
};
/* Note that we overload xs=2 to indicate 64-bit offset. */
-static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][2][3][2][4] = {
- /* Little-endian */
- { { { { gen_helper_sve_ldbds_zsu,
- gen_helper_sve_ldhds_le_zsu,
- gen_helper_sve_ldsds_le_zsu,
- NULL, },
- { gen_helper_sve_ldbdu_zsu,
- gen_helper_sve_ldhdu_le_zsu,
- gen_helper_sve_ldsdu_le_zsu,
- gen_helper_sve_lddd_le_zsu, } },
- { { gen_helper_sve_ldbds_zss,
- gen_helper_sve_ldhds_le_zss,
- gen_helper_sve_ldsds_le_zss,
- NULL, },
- { gen_helper_sve_ldbdu_zss,
- gen_helper_sve_ldhdu_le_zss,
- gen_helper_sve_ldsdu_le_zss,
- gen_helper_sve_lddd_le_zss, } },
- { { gen_helper_sve_ldbds_zd,
- gen_helper_sve_ldhds_le_zd,
- gen_helper_sve_ldsds_le_zd,
- NULL, },
- { gen_helper_sve_ldbdu_zd,
- gen_helper_sve_ldhdu_le_zd,
- gen_helper_sve_ldsdu_le_zd,
- gen_helper_sve_lddd_le_zd, } } },
-
- /* First-fault */
- { { { gen_helper_sve_ldffbds_zsu,
- gen_helper_sve_ldffhds_le_zsu,
- gen_helper_sve_ldffsds_le_zsu,
- NULL, },
- { gen_helper_sve_ldffbdu_zsu,
- gen_helper_sve_ldffhdu_le_zsu,
- gen_helper_sve_ldffsdu_le_zsu,
- gen_helper_sve_ldffdd_le_zsu, } },
- { { gen_helper_sve_ldffbds_zss,
- gen_helper_sve_ldffhds_le_zss,
- gen_helper_sve_ldffsds_le_zss,
- NULL, },
- { gen_helper_sve_ldffbdu_zss,
- gen_helper_sve_ldffhdu_le_zss,
- gen_helper_sve_ldffsdu_le_zss,
- gen_helper_sve_ldffdd_le_zss, } },
- { { gen_helper_sve_ldffbds_zd,
- gen_helper_sve_ldffhds_le_zd,
- gen_helper_sve_ldffsds_le_zd,
- NULL, },
- { gen_helper_sve_ldffbdu_zd,
- gen_helper_sve_ldffhdu_le_zd,
- gen_helper_sve_ldffsdu_le_zd,
- gen_helper_sve_ldffdd_le_zd, } } } },
-
- /* Big-endian */
- { { { { gen_helper_sve_ldbds_zsu,
- gen_helper_sve_ldhds_be_zsu,
- gen_helper_sve_ldsds_be_zsu,
- NULL, },
- { gen_helper_sve_ldbdu_zsu,
- gen_helper_sve_ldhdu_be_zsu,
- gen_helper_sve_ldsdu_be_zsu,
- gen_helper_sve_lddd_be_zsu, } },
- { { gen_helper_sve_ldbds_zss,
- gen_helper_sve_ldhds_be_zss,
- gen_helper_sve_ldsds_be_zss,
- NULL, },
- { gen_helper_sve_ldbdu_zss,
- gen_helper_sve_ldhdu_be_zss,
- gen_helper_sve_ldsdu_be_zss,
- gen_helper_sve_lddd_be_zss, } },
- { { gen_helper_sve_ldbds_zd,
- gen_helper_sve_ldhds_be_zd,
- gen_helper_sve_ldsds_be_zd,
- NULL, },
- { gen_helper_sve_ldbdu_zd,
- gen_helper_sve_ldhdu_be_zd,
- gen_helper_sve_ldsdu_be_zd,
- gen_helper_sve_lddd_be_zd, } } },
-
- /* First-fault */
- { { { gen_helper_sve_ldffbds_zsu,
- gen_helper_sve_ldffhds_be_zsu,
- gen_helper_sve_ldffsds_be_zsu,
- NULL, },
- { gen_helper_sve_ldffbdu_zsu,
- gen_helper_sve_ldffhdu_be_zsu,
- gen_helper_sve_ldffsdu_be_zsu,
- gen_helper_sve_ldffdd_be_zsu, } },
- { { gen_helper_sve_ldffbds_zss,
- gen_helper_sve_ldffhds_be_zss,
- gen_helper_sve_ldffsds_be_zss,
- NULL, },
- { gen_helper_sve_ldffbdu_zss,
- gen_helper_sve_ldffhdu_be_zss,
- gen_helper_sve_ldffsdu_be_zss,
- gen_helper_sve_ldffdd_be_zss, } },
- { { gen_helper_sve_ldffbds_zd,
- gen_helper_sve_ldffhds_be_zd,
- gen_helper_sve_ldffsds_be_zd,
- NULL, },
- { gen_helper_sve_ldffbdu_zd,
- gen_helper_sve_ldffhdu_be_zd,
- gen_helper_sve_ldffsdu_be_zd,
- gen_helper_sve_ldffdd_be_zd, } } } },
+static gen_helper_gvec_mem_scatter * const
+gather_load_fn64[2][2][2][3][2][4] = {
+ { /* MTE Inactive */
+ { /* Little-endian */
+ { { { gen_helper_sve_ldbds_zsu,
+ gen_helper_sve_ldhds_le_zsu,
+ gen_helper_sve_ldsds_le_zsu,
+ NULL, },
+ { gen_helper_sve_ldbdu_zsu,
+ gen_helper_sve_ldhdu_le_zsu,
+ gen_helper_sve_ldsdu_le_zsu,
+ gen_helper_sve_lddd_le_zsu, } },
+ { { gen_helper_sve_ldbds_zss,
+ gen_helper_sve_ldhds_le_zss,
+ gen_helper_sve_ldsds_le_zss,
+ NULL, },
+ { gen_helper_sve_ldbdu_zss,
+ gen_helper_sve_ldhdu_le_zss,
+ gen_helper_sve_ldsdu_le_zss,
+ gen_helper_sve_lddd_le_zss, } },
+ { { gen_helper_sve_ldbds_zd,
+ gen_helper_sve_ldhds_le_zd,
+ gen_helper_sve_ldsds_le_zd,
+ NULL, },
+ { gen_helper_sve_ldbdu_zd,
+ gen_helper_sve_ldhdu_le_zd,
+ gen_helper_sve_ldsdu_le_zd,
+ gen_helper_sve_lddd_le_zd, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbds_zsu,
+ gen_helper_sve_ldffhds_le_zsu,
+ gen_helper_sve_ldffsds_le_zsu,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zsu,
+ gen_helper_sve_ldffhdu_le_zsu,
+ gen_helper_sve_ldffsdu_le_zsu,
+ gen_helper_sve_ldffdd_le_zsu, } },
+ { { gen_helper_sve_ldffbds_zss,
+ gen_helper_sve_ldffhds_le_zss,
+ gen_helper_sve_ldffsds_le_zss,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zss,
+ gen_helper_sve_ldffhdu_le_zss,
+ gen_helper_sve_ldffsdu_le_zss,
+ gen_helper_sve_ldffdd_le_zss, } },
+ { { gen_helper_sve_ldffbds_zd,
+ gen_helper_sve_ldffhds_le_zd,
+ gen_helper_sve_ldffsds_le_zd,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zd,
+ gen_helper_sve_ldffhdu_le_zd,
+ gen_helper_sve_ldffsdu_le_zd,
+ gen_helper_sve_ldffdd_le_zd, } } } },
+ { /* Big-endian */
+ { { { gen_helper_sve_ldbds_zsu,
+ gen_helper_sve_ldhds_be_zsu,
+ gen_helper_sve_ldsds_be_zsu,
+ NULL, },
+ { gen_helper_sve_ldbdu_zsu,
+ gen_helper_sve_ldhdu_be_zsu,
+ gen_helper_sve_ldsdu_be_zsu,
+ gen_helper_sve_lddd_be_zsu, } },
+ { { gen_helper_sve_ldbds_zss,
+ gen_helper_sve_ldhds_be_zss,
+ gen_helper_sve_ldsds_be_zss,
+ NULL, },
+ { gen_helper_sve_ldbdu_zss,
+ gen_helper_sve_ldhdu_be_zss,
+ gen_helper_sve_ldsdu_be_zss,
+ gen_helper_sve_lddd_be_zss, } },
+ { { gen_helper_sve_ldbds_zd,
+ gen_helper_sve_ldhds_be_zd,
+ gen_helper_sve_ldsds_be_zd,
+ NULL, },
+ { gen_helper_sve_ldbdu_zd,
+ gen_helper_sve_ldhdu_be_zd,
+ gen_helper_sve_ldsdu_be_zd,
+ gen_helper_sve_lddd_be_zd, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbds_zsu,
+ gen_helper_sve_ldffhds_be_zsu,
+ gen_helper_sve_ldffsds_be_zsu,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zsu,
+ gen_helper_sve_ldffhdu_be_zsu,
+ gen_helper_sve_ldffsdu_be_zsu,
+ gen_helper_sve_ldffdd_be_zsu, } },
+ { { gen_helper_sve_ldffbds_zss,
+ gen_helper_sve_ldffhds_be_zss,
+ gen_helper_sve_ldffsds_be_zss,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zss,
+ gen_helper_sve_ldffhdu_be_zss,
+ gen_helper_sve_ldffsdu_be_zss,
+ gen_helper_sve_ldffdd_be_zss, } },
+ { { gen_helper_sve_ldffbds_zd,
+ gen_helper_sve_ldffhds_be_zd,
+ gen_helper_sve_ldffsds_be_zd,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zd,
+ gen_helper_sve_ldffhdu_be_zd,
+ gen_helper_sve_ldffsdu_be_zd,
+ gen_helper_sve_ldffdd_be_zd, } } } } },
+ { /* MTE Active */
+ { /* Little-endian */
+ { { { gen_helper_sve_ldbds_zsu_mte,
+ gen_helper_sve_ldhds_le_zsu_mte,
+ gen_helper_sve_ldsds_le_zsu_mte,
+ NULL, },
+ { gen_helper_sve_ldbdu_zsu_mte,
+ gen_helper_sve_ldhdu_le_zsu_mte,
+ gen_helper_sve_ldsdu_le_zsu_mte,
+ gen_helper_sve_lddd_le_zsu_mte, } },
+ { { gen_helper_sve_ldbds_zss_mte,
+ gen_helper_sve_ldhds_le_zss_mte,
+ gen_helper_sve_ldsds_le_zss_mte,
+ NULL, },
+ { gen_helper_sve_ldbdu_zss_mte,
+ gen_helper_sve_ldhdu_le_zss_mte,
+ gen_helper_sve_ldsdu_le_zss_mte,
+ gen_helper_sve_lddd_le_zss_mte, } },
+ { { gen_helper_sve_ldbds_zd_mte,
+ gen_helper_sve_ldhds_le_zd_mte,
+ gen_helper_sve_ldsds_le_zd_mte,
+ NULL, },
+ { gen_helper_sve_ldbdu_zd_mte,
+ gen_helper_sve_ldhdu_le_zd_mte,
+ gen_helper_sve_ldsdu_le_zd_mte,
+ gen_helper_sve_lddd_le_zd_mte, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbds_zsu_mte,
+ gen_helper_sve_ldffhds_le_zsu_mte,
+ gen_helper_sve_ldffsds_le_zsu_mte,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zsu_mte,
+ gen_helper_sve_ldffhdu_le_zsu_mte,
+ gen_helper_sve_ldffsdu_le_zsu_mte,
+ gen_helper_sve_ldffdd_le_zsu_mte, } },
+ { { gen_helper_sve_ldffbds_zss_mte,
+ gen_helper_sve_ldffhds_le_zss_mte,
+ gen_helper_sve_ldffsds_le_zss_mte,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zss_mte,
+ gen_helper_sve_ldffhdu_le_zss_mte,
+ gen_helper_sve_ldffsdu_le_zss_mte,
+ gen_helper_sve_ldffdd_le_zss_mte, } },
+ { { gen_helper_sve_ldffbds_zd_mte,
+ gen_helper_sve_ldffhds_le_zd_mte,
+ gen_helper_sve_ldffsds_le_zd_mte,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zd_mte,
+ gen_helper_sve_ldffhdu_le_zd_mte,
+ gen_helper_sve_ldffsdu_le_zd_mte,
+ gen_helper_sve_ldffdd_le_zd_mte, } } } },
+ { /* Big-endian */
+ { { { gen_helper_sve_ldbds_zsu_mte,
+ gen_helper_sve_ldhds_be_zsu_mte,
+ gen_helper_sve_ldsds_be_zsu_mte,
+ NULL, },
+ { gen_helper_sve_ldbdu_zsu_mte,
+ gen_helper_sve_ldhdu_be_zsu_mte,
+ gen_helper_sve_ldsdu_be_zsu_mte,
+ gen_helper_sve_lddd_be_zsu_mte, } },
+ { { gen_helper_sve_ldbds_zss_mte,
+ gen_helper_sve_ldhds_be_zss_mte,
+ gen_helper_sve_ldsds_be_zss_mte,
+ NULL, },
+ { gen_helper_sve_ldbdu_zss_mte,
+ gen_helper_sve_ldhdu_be_zss_mte,
+ gen_helper_sve_ldsdu_be_zss_mte,
+ gen_helper_sve_lddd_be_zss_mte, } },
+ { { gen_helper_sve_ldbds_zd_mte,
+ gen_helper_sve_ldhds_be_zd_mte,
+ gen_helper_sve_ldsds_be_zd_mte,
+ NULL, },
+ { gen_helper_sve_ldbdu_zd_mte,
+ gen_helper_sve_ldhdu_be_zd_mte,
+ gen_helper_sve_ldsdu_be_zd_mte,
+ gen_helper_sve_lddd_be_zd_mte, } } },
+
+ /* First-fault */
+ { { { gen_helper_sve_ldffbds_zsu_mte,
+ gen_helper_sve_ldffhds_be_zsu_mte,
+ gen_helper_sve_ldffsds_be_zsu_mte,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zsu_mte,
+ gen_helper_sve_ldffhdu_be_zsu_mte,
+ gen_helper_sve_ldffsdu_be_zsu_mte,
+ gen_helper_sve_ldffdd_be_zsu_mte, } },
+ { { gen_helper_sve_ldffbds_zss_mte,
+ gen_helper_sve_ldffhds_be_zss_mte,
+ gen_helper_sve_ldffsds_be_zss_mte,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zss_mte,
+ gen_helper_sve_ldffhdu_be_zss_mte,
+ gen_helper_sve_ldffsdu_be_zss_mte,
+ gen_helper_sve_ldffdd_be_zss_mte, } },
+ { { gen_helper_sve_ldffbds_zd_mte,
+ gen_helper_sve_ldffhds_be_zd_mte,
+ gen_helper_sve_ldffsds_be_zd_mte,
+ NULL, },
+ { gen_helper_sve_ldffbdu_zd_mte,
+ gen_helper_sve_ldffhdu_be_zd_mte,
+ gen_helper_sve_ldffsdu_be_zd_mte,
+ gen_helper_sve_ldffdd_be_zd_mte, } } } } },
};
static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
{
gen_helper_gvec_mem_scatter *fn = NULL;
- int be = s->be_data == MO_BE;
+ bool be = s->be_data == MO_BE;
+ bool mte = s->mte_active[0];
if (!sve_access_check(s)) {
return true;
@@ -5228,23 +5633,24 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a)
switch (a->esz) {
case MO_32:
- fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz];
+ fn = gather_load_fn32[mte][be][a->ff][a->xs][a->u][a->msz];
break;
case MO_64:
- fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz];
+ fn = gather_load_fn64[mte][be][a->ff][a->xs][a->u][a->msz];
break;
}
assert(fn != NULL);
do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
- cpu_reg_sp(s, a->rn), a->msz, fn);
+ cpu_reg_sp(s, a->rn), a->msz, false, fn);
return true;
}
static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
{
gen_helper_gvec_mem_scatter *fn = NULL;
- int be = s->be_data == MO_BE;
+ bool be = s->be_data == MO_BE;
+ bool mte = s->mte_active[0];
TCGv_i64 imm;
if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
@@ -5256,10 +5662,10 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
switch (a->esz) {
case MO_32:
- fn = gather_load_fn32[be][a->ff][0][a->u][a->msz];
+ fn = gather_load_fn32[mte][be][a->ff][0][a->u][a->msz];
break;
case MO_64:
- fn = gather_load_fn64[be][a->ff][2][a->u][a->msz];
+ fn = gather_load_fn64[mte][be][a->ff][2][a->u][a->msz];
break;
}
assert(fn != NULL);
@@ -5268,63 +5674,108 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a)
* by loading the immediate into the scalar parameter.
*/
imm = tcg_const_i64(a->imm << a->msz);
- do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
+ do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, false, fn);
tcg_temp_free_i64(imm);
return true;
}
-/* Indexed by [be][xs][msz]. */
-static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = {
- /* Little-endian */
- { { gen_helper_sve_stbs_zsu,
- gen_helper_sve_sths_le_zsu,
- gen_helper_sve_stss_le_zsu, },
- { gen_helper_sve_stbs_zss,
- gen_helper_sve_sths_le_zss,
- gen_helper_sve_stss_le_zss, } },
- /* Big-endian */
- { { gen_helper_sve_stbs_zsu,
- gen_helper_sve_sths_be_zsu,
- gen_helper_sve_stss_be_zsu, },
- { gen_helper_sve_stbs_zss,
- gen_helper_sve_sths_be_zss,
- gen_helper_sve_stss_be_zss, } },
+/* Indexed by [mte][be][xs][msz]. */
+static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][2][3] = {
+ { /* MTE Inactive */
+ { /* Little-endian */
+ { gen_helper_sve_stbs_zsu,
+ gen_helper_sve_sths_le_zsu,
+ gen_helper_sve_stss_le_zsu, },
+ { gen_helper_sve_stbs_zss,
+ gen_helper_sve_sths_le_zss,
+ gen_helper_sve_stss_le_zss, } },
+ { /* Big-endian */
+ { gen_helper_sve_stbs_zsu,
+ gen_helper_sve_sths_be_zsu,
+ gen_helper_sve_stss_be_zsu, },
+ { gen_helper_sve_stbs_zss,
+ gen_helper_sve_sths_be_zss,
+ gen_helper_sve_stss_be_zss, } } },
+ { /* MTE Active */
+ { /* Little-endian */
+ { gen_helper_sve_stbs_zsu_mte,
+ gen_helper_sve_sths_le_zsu_mte,
+ gen_helper_sve_stss_le_zsu_mte, },
+ { gen_helper_sve_stbs_zss_mte,
+ gen_helper_sve_sths_le_zss_mte,
+ gen_helper_sve_stss_le_zss_mte, } },
+ { /* Big-endian */
+ { gen_helper_sve_stbs_zsu_mte,
+ gen_helper_sve_sths_be_zsu_mte,
+ gen_helper_sve_stss_be_zsu_mte, },
+ { gen_helper_sve_stbs_zss_mte,
+ gen_helper_sve_sths_be_zss_mte,
+ gen_helper_sve_stss_be_zss_mte, } } },
};
/* Note that we overload xs=2 to indicate 64-bit offset. */
-static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = {
- /* Little-endian */
- { { gen_helper_sve_stbd_zsu,
- gen_helper_sve_sthd_le_zsu,
- gen_helper_sve_stsd_le_zsu,
- gen_helper_sve_stdd_le_zsu, },
- { gen_helper_sve_stbd_zss,
- gen_helper_sve_sthd_le_zss,
- gen_helper_sve_stsd_le_zss,
- gen_helper_sve_stdd_le_zss, },
- { gen_helper_sve_stbd_zd,
- gen_helper_sve_sthd_le_zd,
- gen_helper_sve_stsd_le_zd,
- gen_helper_sve_stdd_le_zd, } },
- /* Big-endian */
- { { gen_helper_sve_stbd_zsu,
- gen_helper_sve_sthd_be_zsu,
- gen_helper_sve_stsd_be_zsu,
- gen_helper_sve_stdd_be_zsu, },
- { gen_helper_sve_stbd_zss,
- gen_helper_sve_sthd_be_zss,
- gen_helper_sve_stsd_be_zss,
- gen_helper_sve_stdd_be_zss, },
- { gen_helper_sve_stbd_zd,
- gen_helper_sve_sthd_be_zd,
- gen_helper_sve_stsd_be_zd,
- gen_helper_sve_stdd_be_zd, } },
+static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][2][3][4] = {
+ { /* MTE Inactive */
+ { /* Little-endian */
+ { gen_helper_sve_stbd_zsu,
+ gen_helper_sve_sthd_le_zsu,
+ gen_helper_sve_stsd_le_zsu,
+ gen_helper_sve_stdd_le_zsu, },
+ { gen_helper_sve_stbd_zss,
+ gen_helper_sve_sthd_le_zss,
+ gen_helper_sve_stsd_le_zss,
+ gen_helper_sve_stdd_le_zss, },
+ { gen_helper_sve_stbd_zd,
+ gen_helper_sve_sthd_le_zd,
+ gen_helper_sve_stsd_le_zd,
+ gen_helper_sve_stdd_le_zd, } },
+ { /* Big-endian */
+ { gen_helper_sve_stbd_zsu,
+ gen_helper_sve_sthd_be_zsu,
+ gen_helper_sve_stsd_be_zsu,
+ gen_helper_sve_stdd_be_zsu, },
+ { gen_helper_sve_stbd_zss,
+ gen_helper_sve_sthd_be_zss,
+ gen_helper_sve_stsd_be_zss,
+ gen_helper_sve_stdd_be_zss, },
+ { gen_helper_sve_stbd_zd,
+ gen_helper_sve_sthd_be_zd,
+ gen_helper_sve_stsd_be_zd,
+ gen_helper_sve_stdd_be_zd, } } },
+ { /* MTE Inactive */
+ { /* Little-endian */
+ { gen_helper_sve_stbd_zsu_mte,
+ gen_helper_sve_sthd_le_zsu_mte,
+ gen_helper_sve_stsd_le_zsu_mte,
+ gen_helper_sve_stdd_le_zsu_mte, },
+ { gen_helper_sve_stbd_zss_mte,
+ gen_helper_sve_sthd_le_zss_mte,
+ gen_helper_sve_stsd_le_zss_mte,
+ gen_helper_sve_stdd_le_zss_mte, },
+ { gen_helper_sve_stbd_zd_mte,
+ gen_helper_sve_sthd_le_zd_mte,
+ gen_helper_sve_stsd_le_zd_mte,
+ gen_helper_sve_stdd_le_zd_mte, } },
+ { /* Big-endian */
+ { gen_helper_sve_stbd_zsu_mte,
+ gen_helper_sve_sthd_be_zsu_mte,
+ gen_helper_sve_stsd_be_zsu_mte,
+ gen_helper_sve_stdd_be_zsu_mte, },
+ { gen_helper_sve_stbd_zss_mte,
+ gen_helper_sve_sthd_be_zss_mte,
+ gen_helper_sve_stsd_be_zss_mte,
+ gen_helper_sve_stdd_be_zss_mte, },
+ { gen_helper_sve_stbd_zd_mte,
+ gen_helper_sve_sthd_be_zd_mte,
+ gen_helper_sve_stsd_be_zd_mte,
+ gen_helper_sve_stdd_be_zd_mte, } } },
};
static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
{
gen_helper_gvec_mem_scatter *fn;
- int be = s->be_data == MO_BE;
+ bool be = s->be_data == MO_BE;
+ bool mte = s->mte_active[0];
if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
return false;
@@ -5334,23 +5785,24 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a)
}
switch (a->esz) {
case MO_32:
- fn = scatter_store_fn32[be][a->xs][a->msz];
+ fn = scatter_store_fn32[mte][be][a->xs][a->msz];
break;
case MO_64:
- fn = scatter_store_fn64[be][a->xs][a->msz];
+ fn = scatter_store_fn64[mte][be][a->xs][a->msz];
break;
default:
g_assert_not_reached();
}
do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
- cpu_reg_sp(s, a->rn), a->msz, fn);
+ cpu_reg_sp(s, a->rn), a->msz, true, fn);
return true;
}
static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
{
gen_helper_gvec_mem_scatter *fn = NULL;
- int be = s->be_data == MO_BE;
+ bool be = s->be_data == MO_BE;
+ bool mte = s->mte_active[0];
TCGv_i64 imm;
if (a->esz < a->msz) {
@@ -5362,10 +5814,10 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
switch (a->esz) {
case MO_32:
- fn = scatter_store_fn32[be][0][a->msz];
+ fn = scatter_store_fn32[mte][be][0][a->msz];
break;
case MO_64:
- fn = scatter_store_fn64[be][2][a->msz];
+ fn = scatter_store_fn64[mte][be][2][a->msz];
break;
}
assert(fn != NULL);
@@ -5374,7 +5826,7 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a)
* by loading the immediate into the scalar parameter.
*/
imm = tcg_const_i64(a->imm << a->msz);
- do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
+ do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, true, fn);
tcg_temp_free_i64(imm);
return true;
}
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
index bf31b18..afa8a5f 100644
--- a/target/arm/translate-vfp.inc.c
+++ b/target/arm/translate-vfp.inc.c
@@ -123,7 +123,7 @@ static bool full_vfp_access_check(DisasContext *s, bool ignore_vfp_enabled)
* this to be the last insn in the TB).
*/
if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
- s->base.is_jmp = DISAS_UPDATE;
+ s->base.is_jmp = DISAS_UPDATE_EXIT;
gen_io_start();
}
gen_helper_v7m_preserve_fp_state(cpu_env);
@@ -2860,6 +2860,6 @@ static bool trans_VLLDM_VLSTM(DisasContext *s, arg_VLLDM_VLSTM *a)
tcg_temp_free_i32(fptr);
/* End the TB, because we have updated FP control bits */
- s->base.is_jmp = DISAS_UPDATE;
+ s->base.is_jmp = DISAS_UPDATE_EXIT;
return true;
}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 795964d..c39a929 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -2775,7 +2775,7 @@ static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
tcg_temp_free_i32(tcg_tgtmode);
tcg_temp_free_i32(tcg_regno);
tcg_temp_free_i32(tcg_reg);
- s->base.is_jmp = DISAS_UPDATE;
+ s->base.is_jmp = DISAS_UPDATE_EXIT;
}
static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
@@ -2797,7 +2797,7 @@ static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
tcg_temp_free_i32(tcg_tgtmode);
tcg_temp_free_i32(tcg_regno);
store_reg(s, rn, tcg_reg);
- s->base.is_jmp = DISAS_UPDATE;
+ s->base.is_jmp = DISAS_UPDATE_EXIT;
}
/* Store value to PC as for an exception return (ie don't
@@ -5114,7 +5114,7 @@ static void gen_srs(DisasContext *s,
tcg_temp_free_i32(tmp);
}
tcg_temp_free_i32(addr);
- s->base.is_jmp = DISAS_UPDATE;
+ s->base.is_jmp = DISAS_UPDATE_EXIT;
}
/* Generate a label used for skipping this instruction */
@@ -8160,7 +8160,7 @@ static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
}
if (a->E != (s->be_data == MO_BE)) {
gen_helper_setend(cpu_env);
- s->base.is_jmp = DISAS_UPDATE;
+ s->base.is_jmp = DISAS_UPDATE_EXIT;
}
return true;
}
@@ -8873,7 +8873,8 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
break;
case DISAS_NEXT:
case DISAS_TOO_MANY:
- case DISAS_UPDATE:
+ case DISAS_UPDATE_EXIT:
+ case DISAS_UPDATE_NOCHAIN:
gen_set_pc_im(dc, dc->base.pc_next);
/* fall through */
default:
@@ -8897,10 +8898,13 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
case DISAS_TOO_MANY:
gen_goto_tb(dc, 1, dc->base.pc_next);
break;
+ case DISAS_UPDATE_NOCHAIN:
+ gen_set_pc_im(dc, dc->base.pc_next);
+ /* fall through */
case DISAS_JUMP:
gen_goto_ptr();
break;
- case DISAS_UPDATE:
+ case DISAS_UPDATE_EXIT:
gen_set_pc_im(dc, dc->base.pc_next);
/* fall through */
default:
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 19650a9..16f2699 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -30,6 +30,7 @@ typedef struct DisasContext {
ARMMMUIdx mmu_idx; /* MMU index to use for normal loads/stores */
uint8_t tbii; /* TBI1|TBI0 for insns */
uint8_t tbid; /* TBI1|TBI0 for data */
+ uint8_t tcma; /* TCMA1|TCMA0 for MTE */
bool ns; /* Use non-secure CPREG bank on access */
int fp_excp_el; /* FP exception EL or 0 if enabled */
int sve_excp_el; /* SVE exception EL or 0 if enabled */
@@ -77,6 +78,10 @@ typedef struct DisasContext {
bool unpriv;
/* True if v8.3-PAuth is active. */
bool pauth_active;
+ /* True if v8.5-MTE access to tags is enabled. */
+ bool ata;
+ /* True if v8.5-MTE tag checks affect the PE; index with is_unpriv. */
+ bool mte_active[2];
/* True with v8.5-BTI and SCTLR_ELx.BT* set. */
bool bt;
/* True if any CP15 access is trapped by HSTR_EL2 */
@@ -86,6 +91,8 @@ typedef struct DisasContext {
* < 0, set by the current instruction.
*/
int8_t btype;
+ /* A copy of cpu->dcz_blocksize. */
+ uint8_t dcz_blocksize;
/* True if this page is guarded. */
bool guarded_page;
/* Bottom two bits of XScale c15_cpar coprocessor access control reg */
@@ -148,7 +155,8 @@ static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn)
/* is_jmp field values */
#define DISAS_JUMP DISAS_TARGET_0 /* only pc was modified dynamically */
-#define DISAS_UPDATE DISAS_TARGET_1 /* cpu state was modified dynamically */
+/* CPU state was modified dynamically; exit to main loop for interrupts. */
+#define DISAS_UPDATE_EXIT DISAS_TARGET_1
/* These instructions trap after executing, so the A32/T32 decoder must
* defer them until after the conditional execution state has been updated.
* WFI also needs special handling when single-stepping.
@@ -164,13 +172,16 @@ static inline void disas_set_insn_syndrome(DisasContext *s, uint32_t syn)
* custom end-of-TB code)
*/
#define DISAS_BX_EXCRET DISAS_TARGET_8
-/* For instructions which want an immediate exit to the main loop,
- * as opposed to attempting to use lookup_and_goto_ptr. Unlike
- * DISAS_UPDATE this doesn't write the PC on exiting the translation
- * loop so you need to ensure something (gen_a64_set_pc_im or runtime
- * helper) has done so before we reach return from cpu_tb_exec.
+/*
+ * For instructions which want an immediate exit to the main loop, as opposed
+ * to attempting to use lookup_and_goto_ptr. Unlike DISAS_UPDATE_EXIT, this
+ * doesn't write the PC on exiting the translation loop so you need to ensure
+ * something (gen_a64_set_pc_im or runtime helper) has done so before we reach
+ * return from cpu_tb_exec.
*/
#define DISAS_EXIT DISAS_TARGET_9
+/* CPU state was modified dynamically; no need to exit, but do not chain. */
+#define DISAS_UPDATE_NOCHAIN DISAS_TARGET_10
#ifdef TARGET_AARCH64
void a64_translate_init(void);