From 5f716a82388eb09754dd900e7dbb8ffa15897a28 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Jun 2020 20:31:17 -0700 Subject: target/arm: Implement the LDGM, STGM, STZGM instructions Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson Message-id: 20200626033144.790098-20-richard.henderson@linaro.org Signed-off-by: Peter Maydell --- target/arm/helper-a64.h | 3 ++ target/arm/mte_helper.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++ target/arm/translate-a64.c | 72 ++++++++++++++++++++++++++++++++++----- target/arm/translate.h | 2 ++ 4 files changed, 153 insertions(+), 8 deletions(-) (limited to 'target/arm') diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h index 2fa61b8..7b628d1 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/helper-a64.h @@ -113,3 +113,6 @@ DEF_HELPER_FLAGS_2(stg_stub, TCG_CALL_NO_WG, void, env, i64) DEF_HELPER_FLAGS_3(st2g, TCG_CALL_NO_WG, void, env, i64, i64) DEF_HELPER_FLAGS_3(st2g_parallel, TCG_CALL_NO_WG, void, env, i64, i64) DEF_HELPER_FLAGS_2(st2g_stub, TCG_CALL_NO_WG, void, env, i64) +DEF_HELPER_FLAGS_2(ldgm, TCG_CALL_NO_WG, i64, env, i64) +DEF_HELPER_FLAGS_3(stgm, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_3(stzgm_tags, TCG_CALL_NO_WG, void, env, i64, i64) diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c index 7ec7930..27d4b45 100644 --- a/target/arm/mte_helper.c +++ b/target/arm/mte_helper.c @@ -274,3 +274,87 @@ void HELPER(st2g_stub)(CPUARMState *env, uint64_t ptr) probe_write(env, ptr + TAG_GRANULE, TAG_GRANULE, mmu_idx, ra); } } + +#define LDGM_STGM_SIZE (4 << GMID_EL1_BS) + +uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr) +{ + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + void *tag_mem; + + ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE); + + /* Trap if accessing an invalid page. */ + tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, + LDGM_STGM_SIZE, MMU_DATA_LOAD, + LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra); + + /* The tag is squashed to zero if the page does not support tags. */ + if (!tag_mem) { + return 0; + } + + QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6); + /* + * We are loading 64-bits worth of tags. The ordering of elements + * within the word corresponds to a 64-bit little-endian operation. + */ + return ldq_le_p(tag_mem); +} + +void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val) +{ + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + void *tag_mem; + + ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE); + + /* Trap if accessing an invalid page. */ + tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, + LDGM_STGM_SIZE, MMU_DATA_LOAD, + LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra); + + /* + * Tag store only happens if the page support tags, + * and if the OS has enabled access to the tags. + */ + if (!tag_mem) { + return; + } + + QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6); + /* + * We are storing 64-bits worth of tags. The ordering of elements + * within the word corresponds to a 64-bit little-endian operation. + */ + stq_le_p(tag_mem, val); +} + +void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val) +{ + uintptr_t ra = GETPC(); + int mmu_idx = cpu_mmu_index(env, false); + int log2_dcz_bytes, log2_tag_bytes; + intptr_t dcz_bytes, tag_bytes; + uint8_t *mem; + + /* + * In arm_cpu_realizefn, we assert that dcz > LOG2_TAG_GRANULE+1, + * i.e. 32 bytes, which is an unreasonably small dcz anyway, + * to make sure that we can access one complete tag byte here. + */ + log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2; + log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1); + dcz_bytes = (intptr_t)1 << log2_dcz_bytes; + tag_bytes = (intptr_t)1 << log2_tag_bytes; + ptr &= -dcz_bytes; + + mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, dcz_bytes, + MMU_DATA_STORE, tag_bytes, ra); + if (mem) { + int tag_pair = (val & 0xf) * 0x11; + memset(mem, tag_pair, tag_bytes); + } +} diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index e2295a3..7dc4937 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -3736,7 +3736,7 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE; int op2 = extract32(insn, 10, 2); int op1 = extract32(insn, 22, 2); - bool is_load = false, is_pair = false, is_zero = false; + bool is_load = false, is_pair = false, is_zero = false, is_mult = false; int index = 0; TCGv_i64 addr, clean_addr, tcg_rt; @@ -3756,9 +3756,14 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) if (op2 != 0) { /* STG */ index = op2 - 2; - break; + } else { + /* STZGM */ + if (s->current_el == 0 || offset != 0) { + goto do_unallocated; + } + is_mult = is_zero = true; } - goto do_unallocated; + break; case 1: if (op2 != 0) { /* STZG */ @@ -3774,17 +3779,27 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) /* ST2G */ is_pair = true; index = op2 - 2; - break; + } else { + /* STGM */ + if (s->current_el == 0 || offset != 0) { + goto do_unallocated; + } + is_mult = true; } - goto do_unallocated; + break; case 3: if (op2 != 0) { /* STZ2G */ is_pair = is_zero = true; index = op2 - 2; - break; + } else { + /* LDGM */ + if (s->current_el == 0 || offset != 0) { + goto do_unallocated; + } + is_mult = is_load = true; } - goto do_unallocated; + break; default: do_unallocated: @@ -3792,7 +3807,9 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) return; } - if (!dc_isar_feature(aa64_mte_insn_reg, s)) { + if (is_mult + ? !dc_isar_feature(aa64_mte, s) + : !dc_isar_feature(aa64_mte_insn_reg, s)) { goto do_unallocated; } @@ -3806,6 +3823,44 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) tcg_gen_addi_i64(addr, addr, offset); } + if (is_mult) { + tcg_rt = cpu_reg(s, rt); + + if (is_zero) { + int size = 4 << s->dcz_blocksize; + + if (s->ata) { + gen_helper_stzgm_tags(cpu_env, addr, tcg_rt); + } + /* + * The non-tags portion of STZGM is mostly like DC_ZVA, + * except the alignment happens before the access. + */ + clean_addr = clean_data_tbi(s, addr); + tcg_gen_andi_i64(clean_addr, clean_addr, -size); + gen_helper_dc_zva(cpu_env, clean_addr); + } else if (s->ata) { + if (is_load) { + gen_helper_ldgm(tcg_rt, cpu_env, addr); + } else { + gen_helper_stgm(cpu_env, addr, tcg_rt); + } + } else { + MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; + int size = 4 << GMID_EL1_BS; + + clean_addr = clean_data_tbi(s, addr); + tcg_gen_andi_i64(clean_addr, clean_addr, -size); + gen_probe_access(s, clean_addr, acc, size); + + if (is_load) { + /* The result tags are zeros. */ + tcg_gen_movi_i64(tcg_rt, 0); + } + } + return; + } + if (is_load) { tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); tcg_rt = cpu_reg(s, rt); @@ -14472,6 +14527,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->vec_stride = 0; dc->cp_regs = arm_cpu->cp_regs; dc->features = env->features; + dc->dcz_blocksize = arm_cpu->dcz_blocksize; /* Single step state. The code-generation logic here is: * SS_ACTIVE == 0: diff --git a/target/arm/translate.h b/target/arm/translate.h index 98bcc37..16f2699 100644 --- a/target/arm/translate.h +++ b/target/arm/translate.h @@ -91,6 +91,8 @@ typedef struct DisasContext { * < 0, set by the current instruction. */ int8_t btype; + /* A copy of cpu->dcz_blocksize. */ + uint8_t dcz_blocksize; /* True if this page is guarded. */ bool guarded_page; /* Bottom two bits of XScale c15_cpar coprocessor access control reg */ -- cgit v1.1