aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2021-06-14 16:09:19 +0100
committerPeter Maydell <peter.maydell@linaro.org>2021-06-16 14:33:52 +0100
commita454ea1e6d40bbd4632e4e66de90e802ae47a68e (patch)
treee081462ab407845cedfd41a179e28769e865401e
parent40a36f003c0375bb9d347eeb3f60bac7bbeb82c3 (diff)
downloadqemu-a454ea1e6d40bbd4632e4e66de90e802ae47a68e.zip
qemu-a454ea1e6d40bbd4632e4e66de90e802ae47a68e.tar.gz
qemu-a454ea1e6d40bbd4632e4e66de90e802ae47a68e.tar.bz2
target/arm: Implement MVE LETP insn
Implement the MVE LETP insn. This is like the existing LE loop-end insn, but it must perform an FPU-enabled check, and on loop-exit it resets LTPSIZE to 4. To accommodate the requirement to do something on loop-exit, we drop the use of condlabel and instead manage both the TB exits manually, in the same way we already do in trans_WLS(). The other MVE-specific change to the LE insn is that we must raise an INVSTATE UsageFault insn if LTPSIZE is not 4. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20210614151007.4545-10-peter.maydell@linaro.org
-rw-r--r--target/arm/t32.decode2
-rw-r--r--target/arm/translate.c104
2 files changed, 97 insertions, 9 deletions
diff --git a/target/arm/t32.decode b/target/arm/t32.decode
index 1b75db5..0f9326c 100644
--- a/target/arm/t32.decode
+++ b/target/arm/t32.decode
@@ -674,7 +674,7 @@ BL 1111 0. .......... 11.1 ............ @branch24
DLS 1111 0 0000 100 rn:4 1110 0000 0000 0001 size=4
WLS 1111 0 0000 100 rn:4 1100 . .......... 1 imm=%lob_imm size=4
{
- LE 1111 0 0000 0 f:1 0 1111 1100 . .......... 1 imm=%lob_imm
+ LE 1111 0 0000 0 f:1 tp:1 1111 1100 . .......... 1 imm=%lob_imm
# This is WLSTP
WLS 1111 0 0000 0 size:2 rn:4 1100 . .......... 1 imm=%lob_imm
}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 1ad0e61..a51e882 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -8223,25 +8223,113 @@ static bool trans_LE(DisasContext *s, arg_LE *a)
* any faster.
*/
TCGv_i32 tmp;
+ TCGLabel *loopend;
+ bool fpu_active;
if (!dc_isar_feature(aa32_lob, s)) {
return false;
}
+ if (a->f && a->tp) {
+ return false;
+ }
+ if (s->condexec_mask) {
+ /*
+ * LE in an IT block is CONSTRAINED UNPREDICTABLE;
+ * we choose to UNDEF, because otherwise our use of
+ * gen_goto_tb(1) would clash with the use of TB exit 1
+ * in the dc->condjmp condition-failed codepath in
+ * arm_tr_tb_stop() and we'd get an assertion.
+ */
+ return false;
+ }
+ if (a->tp) {
+ /* LETP */
+ if (!dc_isar_feature(aa32_mve, s)) {
+ return false;
+ }
+ if (!vfp_access_check(s)) {
+ s->eci_handled = true;
+ return true;
+ }
+ }
/* LE/LETP is OK with ECI set and leaves it untouched */
s->eci_handled = true;
- if (!a->f) {
- /* Not loop-forever. If LR <= 1 this is the last loop: do nothing. */
- arm_gen_condlabel(s);
- tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, s->condlabel);
- /* Decrement LR */
- tmp = load_reg(s, 14);
- tcg_gen_addi_i32(tmp, tmp, -1);
- store_reg(s, 14, tmp);
+ /*
+ * With MVE, LTPSIZE might not be 4, and we must emit an INVSTATE
+ * UsageFault exception for the LE insn in that case. Note that we
+ * are not directly checking FPSCR.LTPSIZE but instead check the
+ * pseudocode LTPSIZE() function, which returns 4 if the FPU is
+ * not currently active (ie ActiveFPState() returns false). We
+ * can identify not-active purely from our TB state flags, as the
+ * FPU is active only if:
+ * the FPU is enabled
+ * AND lazy state preservation is not active
+ * AND we do not need a new fp context (this is the ASPEN/FPCA check)
+ *
+ * Usually we don't need to care about this distinction between
+ * LTPSIZE and FPSCR.LTPSIZE, because the code in vfp_access_check()
+ * will either take an exception or clear the conditions that make
+ * the FPU not active. But LE is an unusual case of a non-FP insn
+ * that looks at LTPSIZE.
+ */
+ fpu_active = !s->fp_excp_el && !s->v7m_lspact && !s->v7m_new_fp_ctxt_needed;
+
+ if (!a->tp && dc_isar_feature(aa32_mve, s) && fpu_active) {
+ /* Need to do a runtime check for LTPSIZE != 4 */
+ TCGLabel *skipexc = gen_new_label();
+ tmp = load_cpu_field(v7m.ltpsize);
+ tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 4, skipexc);
+ tcg_temp_free_i32(tmp);
+ gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
+ default_exception_el(s));
+ gen_set_label(skipexc);
+ }
+
+ if (a->f) {
+ /* Loop-forever: just jump back to the loop start */
+ gen_jmp(s, read_pc(s) - a->imm);
+ return true;
+ }
+
+ /*
+ * Not loop-forever. If LR <= loop-decrement-value this is the last loop.
+ * For LE, we know at this point that LTPSIZE must be 4 and the
+ * loop decrement value is 1. For LETP we need to calculate the decrement
+ * value from LTPSIZE.
+ */
+ loopend = gen_new_label();
+ if (!a->tp) {
+ tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, loopend);
+ tcg_gen_addi_i32(cpu_R[14], cpu_R[14], -1);
+ } else {
+ /*
+ * Decrement by 1 << (4 - LTPSIZE). We need to use a TCG local
+ * so that decr stays live after the brcondi.
+ */
+ TCGv_i32 decr = tcg_temp_local_new_i32();
+ TCGv_i32 ltpsize = load_cpu_field(v7m.ltpsize);
+ tcg_gen_sub_i32(decr, tcg_constant_i32(4), ltpsize);
+ tcg_gen_shl_i32(decr, tcg_constant_i32(1), decr);
+ tcg_temp_free_i32(ltpsize);
+
+ tcg_gen_brcond_i32(TCG_COND_LEU, cpu_R[14], decr, loopend);
+
+ tcg_gen_sub_i32(cpu_R[14], cpu_R[14], decr);
+ tcg_temp_free_i32(decr);
}
/* Jump back to the loop start */
gen_jmp(s, read_pc(s) - a->imm);
+
+ gen_set_label(loopend);
+ if (a->tp) {
+ /* Exits from tail-pred loops must reset LTPSIZE to 4 */
+ tmp = tcg_const_i32(4);
+ store_cpu_field(tmp, v7m.ltpsize);
+ }
+ /* End TB, continuing to following insn */
+ gen_jmp_tb(s, s->base.pc_next, 1);
return true;
}