aboutsummaryrefslogtreecommitdiff
path: root/target/arm/tcg
diff options
context:
space:
mode:
Diffstat (limited to 'target/arm/tcg')
-rw-r--r--target/arm/tcg/cpu64.c1
-rw-r--r--target/arm/tcg/helper-a64.c7
-rw-r--r--target/arm/tcg/helper-a64.h3
-rw-r--r--target/arm/tcg/hflags.c6
-rw-r--r--target/arm/tcg/mte_helper.c18
-rw-r--r--target/arm/tcg/translate-a64.c477
-rw-r--r--target/arm/tcg/translate-a64.h4
-rw-r--r--target/arm/tcg/translate-sve.c106
-rw-r--r--target/arm/tcg/translate.c1
-rw-r--r--target/arm/tcg/translate.h65
10 files changed, 495 insertions, 193 deletions
diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c
index 886674a..2976f94 100644
--- a/target/arm/tcg/cpu64.c
+++ b/target/arm/tcg/cpu64.c
@@ -644,6 +644,7 @@ void aarch64_max_tcg_initfn(Object *obj)
t = FIELD_DP64(t, ID_AA64MMFR2, IESB, 1); /* FEAT_IESB */
t = FIELD_DP64(t, ID_AA64MMFR2, VARANGE, 1); /* FEAT_LVA */
t = FIELD_DP64(t, ID_AA64MMFR2, ST, 1); /* FEAT_TTST */
+ t = FIELD_DP64(t, ID_AA64MMFR2, AT, 1); /* FEAT_LSE2 */
t = FIELD_DP64(t, ID_AA64MMFR2, IDS, 1); /* FEAT_IDST */
t = FIELD_DP64(t, ID_AA64MMFR2, FWB, 1); /* FEAT_S2FWB */
t = FIELD_DP64(t, ID_AA64MMFR2, TTL, 1); /* FEAT_TTL */
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
index c3edf16..1c9370f 100644
--- a/target/arm/tcg/helper-a64.c
+++ b/target/arm/tcg/helper-a64.c
@@ -952,3 +952,10 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
memset(mem, 0, blocklen);
}
+
+void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr,
+ uint32_t access_type, uint32_t mmu_idx)
+{
+ arm_cpu_do_unaligned_access(env_cpu(env), addr, access_type,
+ mmu_idx, GETPC());
+}
diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h
index ff56807..3d5957c 100644
--- a/target/arm/tcg/helper-a64.h
+++ b/target/arm/tcg/helper-a64.h
@@ -110,3 +110,6 @@ DEF_HELPER_FLAGS_2(st2g_stub, TCG_CALL_NO_WG, void, env, i64)
DEF_HELPER_FLAGS_2(ldgm, TCG_CALL_NO_WG, i64, env, i64)
DEF_HELPER_FLAGS_3(stgm, TCG_CALL_NO_WG, void, env, i64, i64)
DEF_HELPER_FLAGS_3(stzgm_tags, TCG_CALL_NO_WG, void, env, i64, i64)
+
+DEF_HELPER_FLAGS_4(unaligned_access, TCG_CALL_NO_WG,
+ noreturn, env, i64, i32, i32)
diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c
index b2ccd77..616c5fa 100644
--- a/target/arm/tcg/hflags.c
+++ b/target/arm/tcg/hflags.c
@@ -248,6 +248,12 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
}
}
+ if (cpu_isar_feature(aa64_lse2, env_archcpu(env))) {
+ if (sctlr & SCTLR_nAA) {
+ DP_TBFLAG_A64(flags, NAA, 1);
+ }
+ }
+
/* Compute the condition for using AccType_UNPRIV for LDTR et al. */
if (!(env->pstate & PSTATE_UAO)) {
switch (mmu_idx) {
diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c
index a4f3f92..9c64def 100644
--- a/target/arm/tcg/mte_helper.c
+++ b/target/arm/tcg/mte_helper.c
@@ -785,6 +785,24 @@ uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra)
uint64_t HELPER(mte_check)(CPUARMState *env, uint32_t desc, uint64_t ptr)
{
+ /*
+ * R_XCHFJ: Alignment check not caused by memory type is priority 1,
+ * higher than any translation fault. When MTE is disabled, tcg
+ * performs the alignment check during the code generated for the
+ * memory access. With MTE enabled, we must check this here before
+ * raising any translation fault in allocation_tag_mem.
+ */
+ unsigned align = FIELD_EX32(desc, MTEDESC, ALIGN);
+ if (unlikely(align)) {
+ align = (1u << align) - 1;
+ if (unlikely(ptr & align)) {
+ int idx = FIELD_EX32(desc, MTEDESC, MIDX);
+ bool w = FIELD_EX32(desc, MTEDESC, WRITE);
+ MMUAccessType type = w ? MMU_DATA_STORE : MMU_DATA_LOAD;
+ arm_cpu_do_unaligned_access(env_cpu(env), ptr, type, idx, GETPC());
+ }
+ }
+
return mte_check(env, desc, ptr, GETPC());
}
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index d980033..aa93f37 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -253,7 +253,7 @@ static void gen_probe_access(DisasContext *s, TCGv_i64 ptr,
*/
static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
bool is_write, bool tag_checked,
- int log2_size, bool is_unpriv,
+ MemOp memop, bool is_unpriv,
int core_idx)
{
if (tag_checked && s->mte_active[is_unpriv]) {
@@ -264,7 +264,8 @@ static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
- desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << log2_size) - 1);
+ desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(memop));
+ desc = FIELD_DP32(desc, MTEDESC, SIZEM1, memop_size(memop) - 1);
ret = tcg_temp_new_i64();
gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
@@ -275,9 +276,9 @@ static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr,
}
TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
- bool tag_checked, int log2_size)
+ bool tag_checked, MemOp memop)
{
- return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size,
+ return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, memop,
false, get_mem_index(s));
}
@@ -285,7 +286,7 @@ TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
* For MTE, check multiple logical sequential accesses.
*/
TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
- bool tag_checked, int size)
+ bool tag_checked, int total_size, MemOp single_mop)
{
if (tag_checked && s->mte_active[0]) {
TCGv_i64 ret;
@@ -295,7 +296,8 @@ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
desc = FIELD_DP32(desc, MTEDESC, WRITE, is_write);
- desc = FIELD_DP32(desc, MTEDESC, SIZEM1, size - 1);
+ desc = FIELD_DP32(desc, MTEDESC, ALIGN, get_alignment_bits(single_mop));
+ desc = FIELD_DP32(desc, MTEDESC, SIZEM1, total_size - 1);
ret = tcg_temp_new_i64();
gen_helper_mte_check(ret, cpu_env, tcg_constant_i32(desc), addr);
@@ -305,6 +307,89 @@ TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
return clean_data_tbi(s, addr);
}
+/*
+ * Generate the special alignment check that applies to AccType_ATOMIC
+ * and AccType_ORDERED insns under FEAT_LSE2: the access need not be
+ * naturally aligned, but it must not cross a 16-byte boundary.
+ * See AArch64.CheckAlignment().
+ */
+static void check_lse2_align(DisasContext *s, int rn, int imm,
+ bool is_write, MemOp mop)
+{
+ TCGv_i32 tmp;
+ TCGv_i64 addr;
+ TCGLabel *over_label;
+ MMUAccessType type;
+ int mmu_idx;
+
+ tmp = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(tmp, cpu_reg_sp(s, rn));
+ tcg_gen_addi_i32(tmp, tmp, imm & 15);
+ tcg_gen_andi_i32(tmp, tmp, 15);
+ tcg_gen_addi_i32(tmp, tmp, memop_size(mop));
+
+ over_label = gen_new_label();
+ tcg_gen_brcondi_i32(TCG_COND_LEU, tmp, 16, over_label);
+
+ addr = tcg_temp_new_i64();
+ tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm);
+
+ type = is_write ? MMU_DATA_STORE : MMU_DATA_LOAD,
+ mmu_idx = get_mem_index(s);
+ gen_helper_unaligned_access(cpu_env, addr, tcg_constant_i32(type),
+ tcg_constant_i32(mmu_idx));
+
+ gen_set_label(over_label);
+
+}
+
+/* Handle the alignment check for AccType_ATOMIC instructions. */
+static MemOp check_atomic_align(DisasContext *s, int rn, MemOp mop)
+{
+ MemOp size = mop & MO_SIZE;
+
+ if (size == MO_8) {
+ return mop;
+ }
+
+ /*
+ * If size == MO_128, this is a LDXP, and the operation is single-copy
+ * atomic for each doubleword, not the entire quadword; it still must
+ * be quadword aligned.
+ */
+ if (size == MO_128) {
+ return finalize_memop_atom(s, MO_128 | MO_ALIGN,
+ MO_ATOM_IFALIGN_PAIR);
+ }
+ if (dc_isar_feature(aa64_lse2, s)) {
+ check_lse2_align(s, rn, 0, true, mop);
+ } else {
+ mop |= MO_ALIGN;
+ }
+ return finalize_memop(s, mop);
+}
+
+/* Handle the alignment check for AccType_ORDERED instructions. */
+static MemOp check_ordered_align(DisasContext *s, int rn, int imm,
+ bool is_write, MemOp mop)
+{
+ MemOp size = mop & MO_SIZE;
+
+ if (size == MO_8) {
+ return mop;
+ }
+ if (size == MO_128) {
+ return finalize_memop_atom(s, MO_128 | MO_ALIGN,
+ MO_ATOM_IFALIGN_PAIR);
+ }
+ if (!dc_isar_feature(aa64_lse2, s)) {
+ mop |= MO_ALIGN;
+ } else if (!s->naa) {
+ check_lse2_align(s, rn, imm, is_write, mop);
+ }
+ return finalize_memop(s, mop);
+}
+
typedef struct DisasCompare64 {
TCGCond cond;
TCGv_i64 value;
@@ -838,7 +923,6 @@ static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
unsigned int iss_srt,
bool iss_sf, bool iss_ar)
{
- memop = finalize_memop(s, memop);
tcg_gen_qemu_st_i64(source, tcg_addr, memidx, memop);
if (iss_valid) {
@@ -873,7 +957,6 @@ static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
bool iss_valid, unsigned int iss_srt,
bool iss_sf, bool iss_ar)
{
- memop = finalize_memop(s, memop);
tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
if (extend && (memop & MO_SIGN)) {
@@ -907,59 +990,44 @@ static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
/*
* Store from FP register to memory
*/
-static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
+static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, MemOp mop)
{
/* This writes the bottom N bits of a 128 bit wide vector to memory */
TCGv_i64 tmplo = tcg_temp_new_i64();
- MemOp mop;
tcg_gen_ld_i64(tmplo, cpu_env, fp_reg_offset(s, srcidx, MO_64));
- if (size < 4) {
- mop = finalize_memop(s, size);
+ if ((mop & MO_SIZE) < MO_128) {
tcg_gen_qemu_st_i64(tmplo, tcg_addr, get_mem_index(s), mop);
} else {
- bool be = s->be_data == MO_BE;
- TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
TCGv_i64 tmphi = tcg_temp_new_i64();
+ TCGv_i128 t16 = tcg_temp_new_i128();
tcg_gen_ld_i64(tmphi, cpu_env, fp_reg_hi_offset(s, srcidx));
+ tcg_gen_concat_i64_i128(t16, tmplo, tmphi);
- mop = s->be_data | MO_UQ;
- tcg_gen_qemu_st_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
- mop | (s->align_mem ? MO_ALIGN_16 : 0));
- tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
- tcg_gen_qemu_st_i64(be ? tmplo : tmphi, tcg_hiaddr,
- get_mem_index(s), mop);
+ tcg_gen_qemu_st_i128(t16, tcg_addr, get_mem_index(s), mop);
}
}
/*
* Load from memory to FP register
*/
-static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
+static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, MemOp mop)
{
/* This always zero-extends and writes to a full 128 bit wide vector */
TCGv_i64 tmplo = tcg_temp_new_i64();
TCGv_i64 tmphi = NULL;
- MemOp mop;
- if (size < 4) {
- mop = finalize_memop(s, size);
+ if ((mop & MO_SIZE) < MO_128) {
tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), mop);
} else {
- bool be = s->be_data == MO_BE;
- TCGv_i64 tcg_hiaddr;
+ TCGv_i128 t16 = tcg_temp_new_i128();
- tmphi = tcg_temp_new_i64();
- tcg_hiaddr = tcg_temp_new_i64();
+ tcg_gen_qemu_ld_i128(t16, tcg_addr, get_mem_index(s), mop);
- mop = s->be_data | MO_UQ;
- tcg_gen_qemu_ld_i64(be ? tmphi : tmplo, tcg_addr, get_mem_index(s),
- mop | (s->align_mem ? MO_ALIGN_16 : 0));
- tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
- tcg_gen_qemu_ld_i64(be ? tmplo : tmphi, tcg_hiaddr,
- get_mem_index(s), mop);
+ tmphi = tcg_temp_new_i64();
+ tcg_gen_extr_i128_i64(tmplo, tmphi, t16);
}
tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
@@ -2382,19 +2450,22 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
* races in multi-threaded linux-user and when MTTCG softmmu is
* enabled.
*/
-static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
- TCGv_i64 addr, int size, bool is_pair)
+static void gen_load_exclusive(DisasContext *s, int rt, int rt2, int rn,
+ int size, bool is_pair)
{
int idx = get_mem_index(s);
- MemOp memop = s->be_data;
+ TCGv_i64 dirty_addr, clean_addr;
+ MemOp memop = check_atomic_align(s, rn, size + is_pair);
+
+ s->is_ldex = true;
+ dirty_addr = cpu_reg_sp(s, rn);
+ clean_addr = gen_mte_check1(s, dirty_addr, false, rn != 31, memop);
g_assert(size <= 3);
if (is_pair) {
g_assert(size >= 2);
if (size == 2) {
- /* The pair must be single-copy atomic for the doubleword. */
- memop |= MO_64 | MO_ALIGN;
- tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
+ tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
if (s->be_data == MO_LE) {
tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
@@ -2403,29 +2474,29 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
}
} else {
- /* The pair must be single-copy atomic for *each* doubleword, not
- the entire quadword, however it must be quadword aligned. */
- memop |= MO_64;
- tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
- memop | MO_ALIGN_16);
+ TCGv_i128 t16 = tcg_temp_new_i128();
- TCGv_i64 addr2 = tcg_temp_new_i64();
- tcg_gen_addi_i64(addr2, addr, 8);
- tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
+ tcg_gen_qemu_ld_i128(t16, clean_addr, idx, memop);
+ if (s->be_data == MO_LE) {
+ tcg_gen_extr_i128_i64(cpu_exclusive_val,
+ cpu_exclusive_high, t16);
+ } else {
+ tcg_gen_extr_i128_i64(cpu_exclusive_high,
+ cpu_exclusive_val, t16);
+ }
tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
}
} else {
- memop |= size | MO_ALIGN;
- tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
+ tcg_gen_qemu_ld_i64(cpu_exclusive_val, clean_addr, idx, memop);
tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
}
- tcg_gen_mov_i64(cpu_exclusive_addr, addr);
+ tcg_gen_mov_i64(cpu_exclusive_addr, clean_addr);
}
static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
- TCGv_i64 addr, int size, int is_pair)
+ int rn, int size, int is_pair)
{
/* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
* && (!is_pair || env->exclusive_high == [addr + datasize])) {
@@ -2441,9 +2512,46 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
*/
TCGLabel *fail_label = gen_new_label();
TCGLabel *done_label = gen_new_label();
- TCGv_i64 tmp;
+ TCGv_i64 tmp, clean_addr;
+ MemOp memop;
+
+ /*
+ * FIXME: We are out of spec here. We have recorded only the address
+ * from load_exclusive, not the entire range, and we assume that the
+ * size of the access on both sides match. The architecture allows the
+ * store to be smaller than the load, so long as the stored bytes are
+ * within the range recorded by the load.
+ */
+
+ /* See AArch64.ExclusiveMonitorsPass() and AArch64.IsExclusiveVA(). */
+ clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+ tcg_gen_brcond_i64(TCG_COND_NE, clean_addr, cpu_exclusive_addr, fail_label);
- tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
+ /*
+ * The write, and any associated faults, only happen if the virtual
+ * and physical addresses pass the exclusive monitor check. These
+ * faults are exceedingly unlikely, because normally the guest uses
+ * the exact same address register for the load_exclusive, and we
+ * would have recognized these faults there.
+ *
+ * It is possible to trigger an alignment fault pre-LSE2, e.g. with an
+ * unaligned 4-byte write within the range of an aligned 8-byte load.
+ * With LSE2, the store would need to cross a 16-byte boundary when the
+ * load did not, which would mean the store is outside the range
+ * recorded for the monitor, which would have failed a corrected monitor
+ * check above. For now, we assume no size change and retain the
+ * MO_ALIGN to let tcg know what we checked in the load_exclusive.
+ *
+ * It is possible to trigger an MTE fault, by performing the load with
+ * a virtual address with a valid tag and performing the store with the
+ * same virtual address and a different invalid tag.
+ */
+ memop = size + is_pair;
+ if (memop == MO_128 || !dc_isar_feature(aa64_lse2, s)) {
+ memop |= MO_ALIGN;
+ }
+ memop = finalize_memop(s, memop);
+ gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
tmp = tcg_temp_new_i64();
if (is_pair) {
@@ -2455,8 +2563,7 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
}
tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
cpu_exclusive_val, tmp,
- get_mem_index(s),
- MO_64 | MO_ALIGN | s->be_data);
+ get_mem_index(s), memop);
tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
} else {
TCGv_i128 t16 = tcg_temp_new_i128();
@@ -2474,8 +2581,7 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
}
tcg_gen_atomic_cmpxchg_i128(t16, cpu_exclusive_addr, c16, t16,
- get_mem_index(s),
- MO_128 | MO_ALIGN | s->be_data);
+ get_mem_index(s), memop);
a = tcg_temp_new_i64();
b = tcg_temp_new_i64();
@@ -2493,8 +2599,7 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
}
} else {
tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
- cpu_reg(s, rt), get_mem_index(s),
- size | MO_ALIGN | s->be_data);
+ cpu_reg(s, rt), get_mem_index(s), memop);
tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
}
tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
@@ -2513,13 +2618,15 @@ static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
TCGv_i64 tcg_rt = cpu_reg(s, rt);
int memidx = get_mem_index(s);
TCGv_i64 clean_addr;
+ MemOp memop;
if (rn == 31) {
gen_check_sp_alignment(s);
}
- clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size);
- tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt, memidx,
- size | MO_ALIGN | s->be_data);
+ memop = check_atomic_align(s, rn, size);
+ clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
+ tcg_gen_atomic_cmpxchg_i64(tcg_rs, clean_addr, tcg_rs, tcg_rt,
+ memidx, memop);
}
static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
@@ -2531,13 +2638,15 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
TCGv_i64 t2 = cpu_reg(s, rt + 1);
TCGv_i64 clean_addr;
int memidx = get_mem_index(s);
+ MemOp memop;
if (rn == 31) {
gen_check_sp_alignment(s);
}
/* This is a single atomic access, despite the "pair". */
- clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, size + 1);
+ memop = check_atomic_align(s, rn, size + 1);
+ clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), true, rn != 31, memop);
if (size == 2) {
TCGv_i64 cmp = tcg_temp_new_i64();
@@ -2551,8 +2660,7 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
tcg_gen_concat32_i64(cmp, s2, s1);
}
- tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx,
- MO_64 | MO_ALIGN | s->be_data);
+ tcg_gen_atomic_cmpxchg_i64(cmp, clean_addr, cmp, val, memidx, memop);
if (s->be_data == MO_LE) {
tcg_gen_extr32_i64(s1, s2, cmp);
@@ -2571,8 +2679,7 @@ static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
tcg_gen_concat_i64_i128(cmp, s2, s1);
}
- tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx,
- MO_128 | MO_ALIGN | s->be_data);
+ tcg_gen_atomic_cmpxchg_i128(cmp, clean_addr, cmp, val, memidx, memop);
if (s->be_data == MO_LE) {
tcg_gen_extr_i128_i64(s1, s2, cmp);
@@ -2621,6 +2728,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
int size = extract32(insn, 30, 2);
TCGv_i64 clean_addr;
+ MemOp memop;
switch (o2_L_o1_o0) {
case 0x0: /* STXR */
@@ -2631,9 +2739,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
if (is_lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
}
- clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
- true, rn != 31, size);
- gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, false);
+ gen_store_exclusive(s, rs, rt, rt2, rn, size, false);
return;
case 0x4: /* LDXR */
@@ -2641,10 +2747,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
if (rn == 31) {
gen_check_sp_alignment(s);
}
- clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
- false, rn != 31, size);
- s->is_ldex = true;
- gen_load_exclusive(s, rt, rt2, clean_addr, size, false);
+ gen_load_exclusive(s, rt, rt2, rn, size, false);
if (is_lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
}
@@ -2662,10 +2765,10 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
gen_check_sp_alignment(s);
}
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+ memop = check_ordered_align(s, rn, 0, true, size);
clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
- true, rn != 31, size);
- /* TODO: ARMv8.4-LSE SCTLR.nAA */
- do_gpr_st(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, true, rt,
+ true, rn != 31, memop);
+ do_gpr_st(s, cpu_reg(s, rt), clean_addr, memop, true, rt,
disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
return;
@@ -2680,10 +2783,10 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
if (rn == 31) {
gen_check_sp_alignment(s);
}
+ memop = check_ordered_align(s, rn, 0, false, size);
clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
- false, rn != 31, size);
- /* TODO: ARMv8.4-LSE SCTLR.nAA */
- do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size | MO_ALIGN, false, true,
+ false, rn != 31, memop);
+ do_gpr_ld(s, cpu_reg(s, rt), clean_addr, memop, false, true,
rt, disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
return;
@@ -2696,9 +2799,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
if (is_lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
}
- clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
- true, rn != 31, size);
- gen_store_exclusive(s, rs, rt, rt2, clean_addr, size, true);
+ gen_store_exclusive(s, rs, rt, rt2, rn, size, true);
return;
}
if (rt2 == 31
@@ -2715,10 +2816,7 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
if (rn == 31) {
gen_check_sp_alignment(s);
}
- clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn),
- false, rn != 31, size);
- s->is_ldex = true;
- gen_load_exclusive(s, rt, rt2, clean_addr, size, true);
+ gen_load_exclusive(s, rt, rt2, rn, size, true);
if (is_lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
}
@@ -2768,6 +2866,7 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn)
bool is_signed = false;
int size = 2;
TCGv_i64 tcg_rt, clean_addr;
+ MemOp memop;
if (is_vector) {
if (opc == 3) {
@@ -2778,6 +2877,7 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn)
if (!fp_access_check(s)) {
return;
}
+ memop = finalize_memop_asimd(s, size);
} else {
if (opc == 3) {
/* PRFM (literal) : prefetch */
@@ -2785,20 +2885,20 @@ static void disas_ld_lit(DisasContext *s, uint32_t insn)
}
size = 2 + extract32(opc, 0, 1);
is_signed = extract32(opc, 1, 1);
+ memop = finalize_memop(s, size + is_signed * MO_SIGN);
}
tcg_rt = cpu_reg(s, rt);
clean_addr = tcg_temp_new_i64();
gen_pc_plus_diff(s, clean_addr, imm);
+
if (is_vector) {
- do_fp_ld(s, rt, clean_addr, size);
+ do_fp_ld(s, rt, clean_addr, memop);
} else {
/* Only unsigned 32bit loads target 32bit registers. */
bool iss_sf = opc != 0;
-
- do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
- false, true, rt, iss_sf, false);
+ do_gpr_ld(s, tcg_rt, clean_addr, memop, false, true, rt, iss_sf, false);
}
}
@@ -2840,14 +2940,12 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
bool is_vector = extract32(insn, 26, 1);
bool is_load = extract32(insn, 22, 1);
int opc = extract32(insn, 30, 2);
-
bool is_signed = false;
bool postindex = false;
bool wback = false;
bool set_tag = false;
-
TCGv_i64 clean_addr, dirty_addr;
-
+ MemOp mop;
int size;
if (opc == 3) {
@@ -2930,44 +3028,94 @@ static void disas_ldst_pair(DisasContext *s, uint32_t insn)
}
}
+ if (is_vector) {
+ mop = finalize_memop_asimd(s, size);
+ } else {
+ mop = finalize_memop(s, size);
+ }
clean_addr = gen_mte_checkN(s, dirty_addr, !is_load,
- (wback || rn != 31) && !set_tag, 2 << size);
+ (wback || rn != 31) && !set_tag,
+ 2 << size, mop);
if (is_vector) {
+ /* LSE2 does not merge FP pairs; leave these as separate operations. */
if (is_load) {
- do_fp_ld(s, rt, clean_addr, size);
+ do_fp_ld(s, rt, clean_addr, mop);
} else {
- do_fp_st(s, rt, clean_addr, size);
+ do_fp_st(s, rt, clean_addr, mop);
}
tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
if (is_load) {
- do_fp_ld(s, rt2, clean_addr, size);
+ do_fp_ld(s, rt2, clean_addr, mop);
} else {
- do_fp_st(s, rt2, clean_addr, size);
+ do_fp_st(s, rt2, clean_addr, mop);
}
} else {
TCGv_i64 tcg_rt = cpu_reg(s, rt);
TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
+ /*
+ * We built mop above for the single logical access -- rebuild it
+ * now for the paired operation.
+ *
+ * With LSE2, non-sign-extending pairs are treated atomically if
+ * aligned, and if unaligned one of the pair will be completely
+ * within a 16-byte block and that element will be atomic.
+ * Otherwise each element is separately atomic.
+ * In all cases, issue one operation with the correct atomicity.
+ *
+ * This treats sign-extending loads like zero-extending loads,
+ * since that reuses the most code below.
+ */
+ mop = size + 1;
+ if (s->align_mem) {
+ mop |= (size == 2 ? MO_ALIGN_4 : MO_ALIGN_8);
+ }
+ mop = finalize_memop_pair(s, mop);
+
if (is_load) {
- TCGv_i64 tmp = tcg_temp_new_i64();
+ if (size == 2) {
+ int o2 = s->be_data == MO_LE ? 32 : 0;
+ int o1 = o2 ^ 32;
- /* Do not modify tcg_rt before recognizing any exception
- * from the second load.
- */
- do_gpr_ld(s, tmp, clean_addr, size + is_signed * MO_SIGN,
- false, false, 0, false, false);
- tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
- do_gpr_ld(s, tcg_rt2, clean_addr, size + is_signed * MO_SIGN,
- false, false, 0, false, false);
+ tcg_gen_qemu_ld_i64(tcg_rt, clean_addr, get_mem_index(s), mop);
+ if (is_signed) {
+ tcg_gen_sextract_i64(tcg_rt2, tcg_rt, o2, 32);
+ tcg_gen_sextract_i64(tcg_rt, tcg_rt, o1, 32);
+ } else {
+ tcg_gen_extract_i64(tcg_rt2, tcg_rt, o2, 32);
+ tcg_gen_extract_i64(tcg_rt, tcg_rt, o1, 32);
+ }
+ } else {
+ TCGv_i128 tmp = tcg_temp_new_i128();
- tcg_gen_mov_i64(tcg_rt, tmp);
+ tcg_gen_qemu_ld_i128(tmp, clean_addr, get_mem_index(s), mop);
+ if (s->be_data == MO_LE) {
+ tcg_gen_extr_i128_i64(tcg_rt, tcg_rt2, tmp);
+ } else {
+ tcg_gen_extr_i128_i64(tcg_rt2, tcg_rt, tmp);
+ }
+ }
} else {
- do_gpr_st(s, tcg_rt, clean_addr, size,
- false, 0, false, false);
- tcg_gen_addi_i64(clean_addr, clean_addr, 1 << size);
- do_gpr_st(s, tcg_rt2, clean_addr, size,
- false, 0, false, false);
+ if (size == 2) {
+ TCGv_i64 tmp = tcg_temp_new_i64();
+
+ if (s->be_data == MO_LE) {
+ tcg_gen_concat32_i64(tmp, tcg_rt, tcg_rt2);
+ } else {
+ tcg_gen_concat32_i64(tmp, tcg_rt2, tcg_rt);
+ }
+ tcg_gen_qemu_st_i64(tmp, clean_addr, get_mem_index(s), mop);
+ } else {
+ TCGv_i128 tmp = tcg_temp_new_i128();
+
+ if (s->be_data == MO_LE) {
+ tcg_gen_concat_i64_i128(tmp, tcg_rt, tcg_rt2);
+ } else {
+ tcg_gen_concat_i64_i128(tmp, tcg_rt2, tcg_rt);
+ }
+ tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop);
+ }
}
}
@@ -3012,7 +3160,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
bool post_index;
bool writeback;
int memidx;
-
+ MemOp memop;
TCGv_i64 clean_addr, dirty_addr;
if (is_vector) {
@@ -3025,6 +3173,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
if (!fp_access_check(s)) {
return;
}
+ memop = finalize_memop_asimd(s, size);
} else {
if (size == 3 && opc == 2) {
/* PRFM - prefetch */
@@ -3039,8 +3188,9 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
return;
}
is_store = (opc == 0);
- is_signed = extract32(opc, 1, 1);
+ is_signed = !is_store && extract32(opc, 1, 1);
is_extended = (size < 3) && extract32(opc, 0, 1);
+ memop = finalize_memop(s, size + is_signed * MO_SIGN);
}
switch (idx) {
@@ -3073,25 +3223,26 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
}
memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
+
clean_addr = gen_mte_check1_mmuidx(s, dirty_addr, is_store,
writeback || rn != 31,
size, is_unpriv, memidx);
if (is_vector) {
if (is_store) {
- do_fp_st(s, rt, clean_addr, size);
+ do_fp_st(s, rt, clean_addr, memop);
} else {
- do_fp_ld(s, rt, clean_addr, size);
+ do_fp_ld(s, rt, clean_addr, memop);
}
} else {
TCGv_i64 tcg_rt = cpu_reg(s, rt);
bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
if (is_store) {
- do_gpr_st_memidx(s, tcg_rt, clean_addr, size, memidx,
+ do_gpr_st_memidx(s, tcg_rt, clean_addr, memop, memidx,
iss_valid, rt, iss_sf, false);
} else {
- do_gpr_ld_memidx(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
+ do_gpr_ld_memidx(s, tcg_rt, clean_addr, memop,
is_extended, memidx,
iss_valid, rt, iss_sf, false);
}
@@ -3140,8 +3291,8 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
bool is_signed = false;
bool is_store = false;
bool is_extended = false;
-
TCGv_i64 tcg_rm, clean_addr, dirty_addr;
+ MemOp memop;
if (extract32(opt, 1, 1) == 0) {
unallocated_encoding(s);
@@ -3168,7 +3319,7 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
return;
}
is_store = (opc == 0);
- is_signed = extract32(opc, 1, 1);
+ is_signed = !is_store && extract32(opc, 1, 1);
is_extended = (size < 3) && extract32(opc, 0, 1);
}
@@ -3181,22 +3332,25 @@ static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
tcg_gen_add_i64(dirty_addr, dirty_addr, tcg_rm);
- clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, size);
+
+ memop = finalize_memop(s, size + is_signed * MO_SIGN);
+ clean_addr = gen_mte_check1(s, dirty_addr, is_store, true, memop);
if (is_vector) {
if (is_store) {
- do_fp_st(s, rt, clean_addr, size);
+ do_fp_st(s, rt, clean_addr, memop);
} else {
- do_fp_ld(s, rt, clean_addr, size);
+ do_fp_ld(s, rt, clean_addr, memop);
}
} else {
TCGv_i64 tcg_rt = cpu_reg(s, rt);
bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
+
if (is_store) {
- do_gpr_st(s, tcg_rt, clean_addr, size,
+ do_gpr_st(s, tcg_rt, clean_addr, memop,
true, rt, iss_sf, false);
} else {
- do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
+ do_gpr_ld(s, tcg_rt, clean_addr, memop,
is_extended, true, rt, iss_sf, false);
}
}
@@ -3228,12 +3382,11 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
int rn = extract32(insn, 5, 5);
unsigned int imm12 = extract32(insn, 10, 12);
unsigned int offset;
-
TCGv_i64 clean_addr, dirty_addr;
-
bool is_store;
bool is_signed = false;
bool is_extended = false;
+ MemOp memop;
if (is_vector) {
size |= (opc & 2) << 1;
@@ -3255,7 +3408,7 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
return;
}
is_store = (opc == 0);
- is_signed = extract32(opc, 1, 1);
+ is_signed = !is_store && extract32(opc, 1, 1);
is_extended = (size < 3) && extract32(opc, 0, 1);
}
@@ -3265,22 +3418,23 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
dirty_addr = read_cpu_reg_sp(s, rn, 1);
offset = imm12 << size;
tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
- clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, size);
+
+ memop = finalize_memop(s, size + is_signed * MO_SIGN);
+ clean_addr = gen_mte_check1(s, dirty_addr, is_store, rn != 31, memop);
if (is_vector) {
if (is_store) {
- do_fp_st(s, rt, clean_addr, size);
+ do_fp_st(s, rt, clean_addr, memop);
} else {
- do_fp_ld(s, rt, clean_addr, size);
+ do_fp_ld(s, rt, clean_addr, memop);
}
} else {
TCGv_i64 tcg_rt = cpu_reg(s, rt);
bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
if (is_store) {
- do_gpr_st(s, tcg_rt, clean_addr, size,
- true, rt, iss_sf, false);
+ do_gpr_st(s, tcg_rt, clean_addr, memop, true, rt, iss_sf, false);
} else {
- do_gpr_ld(s, tcg_rt, clean_addr, size + is_signed * MO_SIGN,
+ do_gpr_ld(s, tcg_rt, clean_addr, memop,
is_extended, true, rt, iss_sf, false);
}
}
@@ -3310,7 +3464,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
bool a = extract32(insn, 23, 1);
TCGv_i64 tcg_rs, tcg_rt, clean_addr;
AtomicThreeOpFn *fn = NULL;
- MemOp mop = s->be_data | size | MO_ALIGN;
+ MemOp mop = size;
if (is_vector || !dc_isar_feature(aa64_atomics, s)) {
unallocated_encoding(s);
@@ -3361,7 +3515,9 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
if (rn == 31) {
gen_check_sp_alignment(s);
}
- clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, size);
+
+ mop = check_atomic_align(s, rn, mop);
+ clean_addr = gen_mte_check1(s, cpu_reg_sp(s, rn), false, rn != 31, mop);
if (o3_opc == 014) {
/*
@@ -3371,7 +3527,7 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
* full load-acquire (we only need "load-acquire processor consistent"),
* but we choose to implement them as full LDAQ.
*/
- do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false,
+ do_gpr_ld(s, cpu_reg(s, rt), clean_addr, mop, false,
true, rt, disas_ldst_compute_iss_sf(size, false, 0), true);
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
return;
@@ -3417,6 +3573,7 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn,
bool use_key_a = !extract32(insn, 23, 1);
int offset;
TCGv_i64 clean_addr, dirty_addr, tcg_rt;
+ MemOp memop;
if (size != 3 || is_vector || !dc_isar_feature(aa64_pauth, s)) {
unallocated_encoding(s);
@@ -3443,12 +3600,14 @@ static void disas_ldst_pac(DisasContext *s, uint32_t insn,
offset = sextract32(offset << size, 0, 10 + size);
tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
+ memop = finalize_memop(s, size);
+
/* Note that "clean" and "dirty" here refer to TBI not PAC. */
clean_addr = gen_mte_check1(s, dirty_addr, false,
- is_wback || rn != 31, size);
+ is_wback || rn != 31, memop);
tcg_rt = cpu_reg(s, rt);
- do_gpr_ld(s, tcg_rt, clean_addr, size,
+ do_gpr_ld(s, tcg_rt, clean_addr, memop,
/* extend */ false, /* iss_valid */ !is_wback,
/* iss_srt */ rt, /* iss_sf */ true, /* iss_ar */ false);
@@ -3482,16 +3641,13 @@ static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn)
bool is_store = false;
bool extend = false;
bool iss_sf;
- MemOp mop;
+ MemOp mop = size;
if (!dc_isar_feature(aa64_rcpc_8_4, s)) {
unallocated_encoding(s);
return;
}
- /* TODO: ARMv8.4-LSE SCTLR.nAA */
- mop = size | MO_ALIGN;
-
switch (opc) {
case 0: /* STLURB */
is_store = true;
@@ -3523,6 +3679,8 @@ static void disas_ldst_ldapr_stlr(DisasContext *s, uint32_t insn)
gen_check_sp_alignment(s);
}
+ mop = check_ordered_align(s, rn, offset, is_store, mop);
+
dirty_addr = read_cpu_reg_sp(s, rn, 1);
tcg_gen_addi_i64(dirty_addr, dirty_addr, offset);
clean_addr = clean_data_tbi(s, dirty_addr);
@@ -3689,7 +3847,7 @@ static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
* promote consecutive little-endian elements below.
*/
clean_addr = gen_mte_checkN(s, tcg_rn, is_store, is_postidx || rn != 31,
- total);
+ total, finalize_memop(s, size));
/*
* Consecutive little-endian elements from a single register
@@ -3847,10 +4005,11 @@ static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
total = selem << scale;
tcg_rn = cpu_reg_sp(s, rn);
- clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31,
- total);
mop = finalize_memop(s, scale);
+ clean_addr = gen_mte_checkN(s, tcg_rn, !is_load, is_postidx || rn != 31,
+ total, mop);
+
tcg_ebytes = tcg_constant_i64(1 << scale);
for (xs = 0; xs < selem; xs++) {
if (replicate) {
@@ -4062,15 +4221,18 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn)
if (is_zero) {
TCGv_i64 clean_addr = clean_data_tbi(s, addr);
- TCGv_i64 tcg_zero = tcg_constant_i64(0);
+ TCGv_i64 zero64 = tcg_constant_i64(0);
+ TCGv_i128 zero128 = tcg_temp_new_i128();
int mem_index = get_mem_index(s);
- int i, n = (1 + is_pair) << LOG2_TAG_GRANULE;
+ MemOp mop = finalize_memop(s, MO_128 | MO_ALIGN);
+
+ tcg_gen_concat_i64_i128(zero128, zero64, zero64);
- tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index,
- MO_UQ | MO_ALIGN_16);
- for (i = 8; i < n; i += 8) {
- tcg_gen_addi_i64(clean_addr, clean_addr, 8);
- tcg_gen_qemu_st_i64(tcg_zero, clean_addr, mem_index, MO_UQ);
+ /* This is 1 or 2 atomic 16-byte operations. */
+ tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
+ if (is_pair) {
+ tcg_gen_addi_i64(clean_addr, clean_addr, 16);
+ tcg_gen_qemu_st_i128(zero128, clean_addr, mem_index, mop);
}
}
@@ -14087,6 +14249,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM);
dc->pstate_za = EX_TBFLAG_A64(tb_flags, PSTATE_ZA);
dc->sme_trap_nonstreaming = EX_TBFLAG_A64(tb_flags, SME_TRAP_NONSTREAMING);
+ dc->naa = EX_TBFLAG_A64(tb_flags, NAA);
dc->vec_len = 0;
dc->vec_stride = 0;
dc->cp_regs = arm_cpu->cp_regs;
@@ -14098,6 +14261,8 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
tcg_debug_assert(dc->tbid & 1);
#endif
+ dc->lse2 = dc_isar_feature(aa64_lse2, dc);
+
/* Single step state. The code-generation logic here is:
* SS_ACTIVE == 0:
* generate code with no special handling for single-stepping (except
diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h
index 0576c4e..b55dc43 100644
--- a/target/arm/tcg/translate-a64.h
+++ b/target/arm/tcg/translate-a64.h
@@ -49,9 +49,9 @@ static inline bool sme_smza_enabled_check(DisasContext *s)
TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr);
TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
- bool tag_checked, int log2_size);
+ bool tag_checked, MemOp memop);
TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write,
- bool tag_checked, int size);
+ bool tag_checked, int total_size, MemOp memop);
/* We should have at some point before trying to access an FP register
* done the necessary access check, so assert that
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index d9d5810..ff05062 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -4167,15 +4167,16 @@ TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
int len, int rn, int imm)
{
- int len_align = QEMU_ALIGN_DOWN(len, 8);
- int len_remain = len % 8;
- int nparts = len / 8 + ctpop8(len_remain);
+ int len_align = QEMU_ALIGN_DOWN(len, 16);
+ int len_remain = len % 16;
+ int nparts = len / 16 + ctpop8(len_remain);
int midx = get_mem_index(s);
TCGv_i64 dirty_addr, clean_addr, t0, t1;
+ TCGv_i128 t16;
dirty_addr = tcg_temp_new_i64();
tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
- clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
+ clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
/*
* Note that unpredicated load/store of vector/predicate registers
@@ -4188,10 +4189,16 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
int i;
t0 = tcg_temp_new_i64();
- for (i = 0; i < len_align; i += 8) {
- tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
+ t1 = tcg_temp_new_i64();
+ t16 = tcg_temp_new_i128();
+
+ for (i = 0; i < len_align; i += 16) {
+ tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
+ MO_LE | MO_128 | MO_ATOM_NONE);
+ tcg_gen_extr_i128_i64(t0, t1, t16);
tcg_gen_st_i64(t0, base, vofs + i);
- tcg_gen_addi_i64(clean_addr, clean_addr, 8);
+ tcg_gen_st_i64(t1, base, vofs + i + 8);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 16);
}
} else {
TCGLabel *loop = gen_new_label();
@@ -4200,14 +4207,21 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
tcg_gen_movi_ptr(i, 0);
gen_set_label(loop);
- t0 = tcg_temp_new_i64();
- tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
- tcg_gen_addi_i64(clean_addr, clean_addr, 8);
+ t16 = tcg_temp_new_i128();
+ tcg_gen_qemu_ld_i128(t16, clean_addr, midx,
+ MO_LE | MO_128 | MO_ATOM_NONE);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 16);
tp = tcg_temp_new_ptr();
tcg_gen_add_ptr(tp, base, i);
- tcg_gen_addi_ptr(i, i, 8);
+ tcg_gen_addi_ptr(i, i, 16);
+
+ t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ tcg_gen_extr_i128_i64(t0, t1, t16);
+
tcg_gen_st_i64(t0, tp, vofs);
+ tcg_gen_st_i64(t1, tp, vofs + 8);
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
}
@@ -4216,6 +4230,16 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
* Predicate register loads can be any multiple of 2.
* Note that we still store the entire 64-bit unit into cpu_env.
*/
+ if (len_remain >= 8) {
+ t0 = tcg_temp_new_i64();
+ tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE);
+ tcg_gen_st_i64(t0, base, vofs + len_align);
+ len_remain -= 8;
+ len_align += 8;
+ if (len_remain) {
+ tcg_gen_addi_i64(clean_addr, clean_addr, 8);
+ }
+ }
if (len_remain) {
t0 = tcg_temp_new_i64();
switch (len_remain) {
@@ -4223,14 +4247,14 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
case 4:
case 8:
tcg_gen_qemu_ld_i64(t0, clean_addr, midx,
- MO_LE | ctz32(len_remain));
+ MO_LE | ctz32(len_remain) | MO_ATOM_NONE);
break;
case 6:
t1 = tcg_temp_new_i64();
- tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL);
+ tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE);
tcg_gen_addi_i64(clean_addr, clean_addr, 4);
- tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW);
+ tcg_gen_qemu_ld_i64(t1, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
break;
@@ -4245,15 +4269,16 @@ void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
int len, int rn, int imm)
{
- int len_align = QEMU_ALIGN_DOWN(len, 8);
- int len_remain = len % 8;
- int nparts = len / 8 + ctpop8(len_remain);
+ int len_align = QEMU_ALIGN_DOWN(len, 16);
+ int len_remain = len % 16;
+ int nparts = len / 16 + ctpop8(len_remain);
int midx = get_mem_index(s);
- TCGv_i64 dirty_addr, clean_addr, t0;
+ TCGv_i64 dirty_addr, clean_addr, t0, t1;
+ TCGv_i128 t16;
dirty_addr = tcg_temp_new_i64();
tcg_gen_addi_i64(dirty_addr, cpu_reg_sp(s, rn), imm);
- clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len);
+ clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8);
/* Note that unpredicated load/store of vector/predicate registers
* are defined as a stream of bytes, which equates to little-endian
@@ -4267,10 +4292,15 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
int i;
t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
+ t16 = tcg_temp_new_i128();
for (i = 0; i < len_align; i += 8) {
tcg_gen_ld_i64(t0, base, vofs + i);
- tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
- tcg_gen_addi_i64(clean_addr, clean_addr, 8);
+ tcg_gen_ld_i64(t1, base, vofs + i + 8);
+ tcg_gen_concat_i64_i128(t16, t0, t1);
+ tcg_gen_qemu_st_i128(t16, clean_addr, midx,
+ MO_LE | MO_128 | MO_ATOM_NONE);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 16);
}
} else {
TCGLabel *loop = gen_new_label();
@@ -4280,18 +4310,33 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
gen_set_label(loop);
t0 = tcg_temp_new_i64();
+ t1 = tcg_temp_new_i64();
tp = tcg_temp_new_ptr();
tcg_gen_add_ptr(tp, base, i);
tcg_gen_ld_i64(t0, tp, vofs);
- tcg_gen_addi_ptr(i, i, 8);
+ tcg_gen_ld_i64(t1, tp, vofs + 8);
+ tcg_gen_addi_ptr(i, i, 16);
- tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
- tcg_gen_addi_i64(clean_addr, clean_addr, 8);
+ t16 = tcg_temp_new_i128();
+ tcg_gen_concat_i64_i128(t16, t0, t1);
+
+ tcg_gen_qemu_st_i128(t16, clean_addr, midx, MO_LEUQ);
+ tcg_gen_addi_i64(clean_addr, clean_addr, 16);
tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
}
/* Predicate register stores can be any multiple of 2. */
+ if (len_remain >= 8) {
+ t0 = tcg_temp_new_i64();
+ tcg_gen_st_i64(t0, base, vofs + len_align);
+ tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ | MO_ATOM_NONE);
+ len_remain -= 8;
+ len_align += 8;
+ if (len_remain) {
+ tcg_gen_addi_i64(clean_addr, clean_addr, 8);
+ }
+ }
if (len_remain) {
t0 = tcg_temp_new_i64();
tcg_gen_ld_i64(t0, base, vofs + len_align);
@@ -4301,14 +4346,14 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
case 4:
case 8:
tcg_gen_qemu_st_i64(t0, clean_addr, midx,
- MO_LE | ctz32(len_remain));
+ MO_LE | ctz32(len_remain) | MO_ATOM_NONE);
break;
case 6:
- tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL);
+ tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUL | MO_ATOM_NONE);
tcg_gen_addi_i64(clean_addr, clean_addr, 4);
tcg_gen_shri_i64(t0, t0, 32);
- tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW);
+ tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUW | MO_ATOM_NONE);
break;
default:
@@ -4964,6 +5009,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
unsigned msz = dtype_msz(a->dtype);
TCGLabel *over;
TCGv_i64 temp, clean_addr;
+ MemOp memop;
if (!dc_isar_feature(aa64_sve, s)) {
return false;
@@ -4993,10 +5039,10 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
/* Load the data. */
temp = tcg_temp_new_i64();
tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
- clean_addr = gen_mte_check1(s, temp, false, true, msz);
- tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s),
- finalize_memop(s, dtype_mop[a->dtype]));
+ memop = finalize_memop(s, dtype_mop[a->dtype]);
+ clean_addr = gen_mte_check1(s, temp, false, true, memop);
+ tcg_gen_qemu_ld_i64(temp, clean_addr, get_mem_index(s), memop);
/* Broadcast to *all* elements. */
tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
index a68d3c7..13c88ba 100644
--- a/target/arm/tcg/translate.c
+++ b/target/arm/tcg/translate.c
@@ -9168,6 +9168,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
dc->sme_trap_nonstreaming =
EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING);
}
+ dc->lse2 = false; /* applies only to aarch64 */
dc->cp_regs = cpu->cp_regs;
dc->features = env->features;
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index 4d88197..d1cacff 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -90,6 +90,7 @@ typedef struct DisasContext {
uint64_t features; /* CPU features bits */
bool aarch64;
bool thumb;
+ bool lse2;
/* Because unallocated encodings generate different exception syndrome
* information from traps due to FP being disabled, we can't do a single
* "is fp access disabled" check at a high level in the decode tree.
@@ -141,6 +142,8 @@ typedef struct DisasContext {
bool fgt_eret;
/* True if fine-grained trap on SVC is enabled */
bool fgt_svc;
+ /* True if FEAT_LSE2 SCTLR_ELx.nAA is set */
+ bool naa;
/*
* >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI.
* < 0, set by the current instruction.
@@ -557,12 +560,13 @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
}
/**
- * finalize_memop:
+ * finalize_memop_atom:
* @s: DisasContext
* @opc: size+sign+align of the memory operation
+ * @atom: atomicity of the memory operation
*
- * Build the complete MemOp for a memory operation, including alignment
- * and endianness.
+ * Build the complete MemOp for a memory operation, including alignment,
+ * endianness, and atomicity.
*
* If (op & MO_AMASK) then the operation already contains the required
* alignment, e.g. for AccType_ATOMIC. Otherwise, this an optionally
@@ -572,12 +576,63 @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
* and this is applied here. Note that there is no way to indicate that
* no alignment should ever be enforced; this must be handled manually.
*/
-static inline MemOp finalize_memop(DisasContext *s, MemOp opc)
+static inline MemOp finalize_memop_atom(DisasContext *s, MemOp opc, MemOp atom)
{
if (s->align_mem && !(opc & MO_AMASK)) {
opc |= MO_ALIGN;
}
- return opc | s->be_data;
+ return opc | atom | s->be_data;
+}
+
+/**
+ * finalize_memop:
+ * @s: DisasContext
+ * @opc: size+sign+align of the memory operation
+ *
+ * Like finalize_memop_atom, but with default atomicity.
+ */
+static inline MemOp finalize_memop(DisasContext *s, MemOp opc)
+{
+ MemOp atom = s->lse2 ? MO_ATOM_WITHIN16 : MO_ATOM_IFALIGN;
+ return finalize_memop_atom(s, opc, atom);
+}
+
+/**
+ * finalize_memop_pair:
+ * @s: DisasContext
+ * @opc: size+sign+align of the memory operation
+ *
+ * Like finalize_memop_atom, but with atomicity for a pair.
+ * C.f. Pseudocode for Mem[], operand ispair.
+ */
+static inline MemOp finalize_memop_pair(DisasContext *s, MemOp opc)
+{
+ MemOp atom = s->lse2 ? MO_ATOM_WITHIN16_PAIR : MO_ATOM_IFALIGN_PAIR;
+ return finalize_memop_atom(s, opc, atom);
+}
+
+/**
+ * finalize_memop_asimd:
+ * @s: DisasContext
+ * @opc: size+sign+align of the memory operation
+ *
+ * Like finalize_memop_atom, but with atomicity of AccessType_ASIMD.
+ */
+static inline MemOp finalize_memop_asimd(DisasContext *s, MemOp opc)
+{
+ /*
+ * In the pseudocode for Mem[], with AccessType_ASIMD, size == 16,
+ * if IsAligned(8), the first case provides separate atomicity for
+ * the pair of 64-bit accesses. If !IsAligned(8), the middle cases
+ * do not apply, and we're left with the final case of no atomicity.
+ * Thus MO_ATOM_IFALIGN_PAIR.
+ *
+ * For other sizes, normal LSE2 rules apply.
+ */
+ if ((opc & MO_SIZE) == MO_128) {
+ return finalize_memop_atom(s, opc, MO_ATOM_IFALIGN_PAIR);
+ }
+ return finalize_memop(s, opc);
}
/**