aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2019-06-11 16:39:45 +0100
committerPeter Maydell <peter.maydell@linaro.org>2019-06-13 15:14:04 +0100
commitfa288de272c5c8a66d5eb683b123706a52bc7ad6 (patch)
tree1e618f040e9883f78901f24990b451274d120a1e
parent79b02a3b5231c5b8cd31e50cd549968dd0a05c49 (diff)
downloadqemu-fa288de272c5c8a66d5eb683b123706a52bc7ad6.zip
qemu-fa288de272c5c8a66d5eb683b123706a52bc7ad6.tar.gz
qemu-fa288de272c5c8a66d5eb683b123706a52bc7ad6.tar.bz2
target/arm: Convert the VFP load/store multiple insns to decodetree
Convert the VFP load/store multiple insns to decodetree. This includes tightening up the UNDEF checking for pre-VFPv3 CPUs which only have D0-D15 : they now UNDEF for any access to D16-D31, not merely when the smallest register in the transfer list is in D16-D31. This conversion does not try to share code between the single precision and the double precision versions; this looks a bit duplicative of code, but it leaves the door open for a future refactoring which gets rid of the use of the "F0" registers by inlining the various functions like gen_vfp_ld() and gen_mov_F0_reg() which are hiding "if (dp) { ... } else { ... }" conditionalisation. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r--target/arm/translate-vfp.inc.c162
-rw-r--r--target/arm/translate.c97
-rw-r--r--target/arm/vfp.decode18
3 files changed, 183 insertions, 94 deletions
diff --git a/target/arm/translate-vfp.inc.c b/target/arm/translate-vfp.inc.c
index 40f2cac..32a1805 100644
--- a/target/arm/translate-vfp.inc.c
+++ b/target/arm/translate-vfp.inc.c
@@ -926,3 +926,165 @@ static bool trans_VLDR_VSTR_dp(DisasContext *s, arg_VLDR_VSTR_sp *a)
return true;
}
+
+static bool trans_VLDM_VSTM_sp(DisasContext *s, arg_VLDM_VSTM_sp *a)
+{
+ uint32_t offset;
+ TCGv_i32 addr;
+ int i, n;
+
+ n = a->imm;
+
+ if (n == 0 || (a->vd + n) > 32) {
+ /*
+ * UNPREDICTABLE cases for bad immediates: we choose to
+ * UNDEF to avoid generating huge numbers of TCG ops
+ */
+ return false;
+ }
+ if (a->rn == 15 && a->w) {
+ /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (s->thumb && a->rn == 15) {
+ /* This is actually UNPREDICTABLE */
+ addr = tcg_temp_new_i32();
+ tcg_gen_movi_i32(addr, s->pc & ~2);
+ } else {
+ addr = load_reg(s, a->rn);
+ }
+ if (a->p) {
+ /* pre-decrement */
+ tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
+ }
+
+ if (s->v8m_stackcheck && a->rn == 13 && a->w) {
+ /*
+ * Here 'addr' is the lowest address we will store to,
+ * and is either the old SP (if post-increment) or
+ * the new SP (if pre-decrement). For post-increment
+ * where the old value is below the limit and the new
+ * value is above, it is UNKNOWN whether the limit check
+ * triggers; we choose to trigger.
+ */
+ gen_helper_v8m_stackcheck(cpu_env, addr);
+ }
+
+ offset = 4;
+ for (i = 0; i < n; i++) {
+ if (a->l) {
+ /* load */
+ gen_vfp_ld(s, false, addr);
+ gen_mov_vreg_F0(false, a->vd + i);
+ } else {
+ /* store */
+ gen_mov_F0_vreg(false, a->vd + i);
+ gen_vfp_st(s, false, addr);
+ }
+ tcg_gen_addi_i32(addr, addr, offset);
+ }
+ if (a->w) {
+ /* writeback */
+ if (a->p) {
+ offset = -offset * n;
+ tcg_gen_addi_i32(addr, addr, offset);
+ }
+ store_reg(s, a->rn, addr);
+ } else {
+ tcg_temp_free_i32(addr);
+ }
+
+ return true;
+}
+
+static bool trans_VLDM_VSTM_dp(DisasContext *s, arg_VLDM_VSTM_dp *a)
+{
+ uint32_t offset;
+ TCGv_i32 addr;
+ int i, n;
+
+ n = a->imm >> 1;
+
+ if (n == 0 || (a->vd + n) > 32 || n > 16) {
+ /*
+ * UNPREDICTABLE cases for bad immediates: we choose to
+ * UNDEF to avoid generating huge numbers of TCG ops
+ */
+ return false;
+ }
+ if (a->rn == 15 && a->w) {
+ /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist */
+ if (!dc_isar_feature(aa32_fp_d32, s) && (a->vd + n) > 16) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ if (s->thumb && a->rn == 15) {
+ /* This is actually UNPREDICTABLE */
+ addr = tcg_temp_new_i32();
+ tcg_gen_movi_i32(addr, s->pc & ~2);
+ } else {
+ addr = load_reg(s, a->rn);
+ }
+ if (a->p) {
+ /* pre-decrement */
+ tcg_gen_addi_i32(addr, addr, -(a->imm << 2));
+ }
+
+ if (s->v8m_stackcheck && a->rn == 13 && a->w) {
+ /*
+ * Here 'addr' is the lowest address we will store to,
+ * and is either the old SP (if post-increment) or
+ * the new SP (if pre-decrement). For post-increment
+ * where the old value is below the limit and the new
+ * value is above, it is UNKNOWN whether the limit check
+ * triggers; we choose to trigger.
+ */
+ gen_helper_v8m_stackcheck(cpu_env, addr);
+ }
+
+ offset = 8;
+ for (i = 0; i < n; i++) {
+ if (a->l) {
+ /* load */
+ gen_vfp_ld(s, true, addr);
+ gen_mov_vreg_F0(true, a->vd + i);
+ } else {
+ /* store */
+ gen_mov_F0_vreg(true, a->vd + i);
+ gen_vfp_st(s, true, addr);
+ }
+ tcg_gen_addi_i32(addr, addr, offset);
+ }
+ if (a->w) {
+ /* writeback */
+ if (a->p) {
+ offset = -offset * n;
+ } else if (a->imm & 1) {
+ offset = 4;
+ } else {
+ offset = 0;
+ }
+
+ if (offset != 0) {
+ tcg_gen_addi_i32(addr, addr, offset);
+ }
+ store_reg(s, a->rn, addr);
+ } else {
+ tcg_temp_free_i32(addr);
+ }
+
+ return true;
+}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 499ce7d..58c0eb1 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -3092,9 +3092,8 @@ static void gen_neon_dup_high16(TCGv_i32 var)
*/
static int disas_vfp_insn(DisasContext *s, uint32_t insn)
{
- uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
+ uint32_t rd, rn, rm, op, i, n, delta_d, delta_m, bank_mask;
int dp, veclen;
- TCGv_i32 addr;
TCGv_i32 tmp;
TCGv_i32 tmp2;
@@ -3702,98 +3701,8 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
break;
case 0xc:
case 0xd:
- if ((insn & 0x03e00000) == 0x00400000) {
- /* Already handled by decodetree */
- return 1;
- } else {
- /* Load/store */
- rn = (insn >> 16) & 0xf;
- if (dp)
- VFP_DREG_D(rd, insn);
- else
- rd = VFP_SREG_D(insn);
- if ((insn & 0x01200000) == 0x01000000) {
- /* Already handled by decodetree */
- return 1;
- } else {
- /* load/store multiple */
- int w = insn & (1 << 21);
- if (dp)
- n = (insn >> 1) & 0x7f;
- else
- n = insn & 0xff;
-
- if (w && !(((insn >> 23) ^ (insn >> 24)) & 1)) {
- /* P == U , W == 1 => UNDEF */
- return 1;
- }
- if (n == 0 || (rd + n) > 32 || (dp && n > 16)) {
- /* UNPREDICTABLE cases for bad immediates: we choose to
- * UNDEF to avoid generating huge numbers of TCG ops
- */
- return 1;
- }
- if (rn == 15 && w) {
- /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
- return 1;
- }
-
- if (s->thumb && rn == 15) {
- /* This is actually UNPREDICTABLE */
- addr = tcg_temp_new_i32();
- tcg_gen_movi_i32(addr, s->pc & ~2);
- } else {
- addr = load_reg(s, rn);
- }
- if (insn & (1 << 24)) /* pre-decrement */
- tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2));
-
- if (s->v8m_stackcheck && rn == 13 && w) {
- /*
- * Here 'addr' is the lowest address we will store to,
- * and is either the old SP (if post-increment) or
- * the new SP (if pre-decrement). For post-increment
- * where the old value is below the limit and the new
- * value is above, it is UNKNOWN whether the limit check
- * triggers; we choose to trigger.
- */
- gen_helper_v8m_stackcheck(cpu_env, addr);
- }
-
- if (dp)
- offset = 8;
- else
- offset = 4;
- for (i = 0; i < n; i++) {
- if (insn & ARM_CP_RW_BIT) {
- /* load */
- gen_vfp_ld(s, dp, addr);
- gen_mov_vreg_F0(dp, rd + i);
- } else {
- /* store */
- gen_mov_F0_vreg(dp, rd + i);
- gen_vfp_st(s, dp, addr);
- }
- tcg_gen_addi_i32(addr, addr, offset);
- }
- if (w) {
- /* writeback */
- if (insn & (1 << 24))
- offset = -offset * n;
- else if (dp && (insn & 1))
- offset = 4;
- else
- offset = 0;
-
- if (offset != 0)
- tcg_gen_addi_i32(addr, addr, offset);
- store_reg(s, rn, addr);
- } else {
- tcg_temp_free_i32(addr);
- }
- }
- }
- break;
+ /* Already handled by decodetree */
+ return 1;
default:
/* Should never happen. */
return 1;
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
index 8fa7fa0..68c9ffc 100644
--- a/target/arm/vfp.decode
+++ b/target/arm/vfp.decode
@@ -78,3 +78,21 @@ VLDR_VSTR_sp ---- 1101 u:1 .0 l:1 rn:4 .... 1010 imm:8 \
vd=%vd_sp
VLDR_VSTR_dp ---- 1101 u:1 .0 l:1 rn:4 .... 1011 imm:8 \
vd=%vd_dp
+
+# We split the load/store multiple up into two patterns to avoid
+# overlap with other insns in the "Advanced SIMD load/store and 64-bit move"
+# grouping:
+# P=0 U=0 W=0 is 64-bit VMOV
+# P=1 W=0 is VLDR/VSTR
+# P=U W=1 is UNDEF
+# leaving P=0 U=1 W=x and P=1 U=0 W=1 for load/store multiple.
+# These include FSTM/FLDM.
+VLDM_VSTM_sp ---- 1100 1 . w:1 l:1 rn:4 .... 1010 imm:8 \
+ vd=%vd_sp p=0 u=1
+VLDM_VSTM_dp ---- 1100 1 . w:1 l:1 rn:4 .... 1011 imm:8 \
+ vd=%vd_dp p=0 u=1
+
+VLDM_VSTM_sp ---- 1101 0.1 l:1 rn:4 .... 1010 imm:8 \
+ vd=%vd_sp p=1 u=0 w=1
+VLDM_VSTM_dp ---- 1101 0.1 l:1 rn:4 .... 1011 imm:8 \
+ vd=%vd_dp p=1 u=0 w=1