aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--target/arm/mve.decode4
-rw-r--r--target/arm/translate-a32.h1
-rw-r--r--target/arm/translate-mve.c85
-rw-r--r--target/arm/translate-vfp.c2
4 files changed, 91 insertions, 1 deletions
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
index 0955ed0..774ee2a 100644
--- a/target/arm/mve.decode
+++ b/target/arm/mve.decode
@@ -136,6 +136,10 @@ VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111101 ....... @vldr_vstr \
VLDR_VSTR 1110110 1 a:1 . w:1 . .... ... 111110 ....... @vldr_vstr \
size=2 p=1
+# Moves between 2 32-bit vector lanes and 2 general purpose registers
+VMOV_to_2gp 1110 1100 0 . 00 rt2:4 ... 0 1111 000 idx:1 rt:4 qd=%qd
+VMOV_from_2gp 1110 1100 0 . 01 rt2:4 ... 0 1111 000 idx:1 rt:4 qd=%qd
+
# Vector 2-op
VAND 1110 1111 0 . 00 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
VBIC 1110 1111 0 . 01 ... 0 ... 0 0001 . 1 . 1 ... 0 @2op_nosz
diff --git a/target/arm/translate-a32.h b/target/arm/translate-a32.h
index 6dfcafe..6f4d65d 100644
--- a/target/arm/translate-a32.h
+++ b/target/arm/translate-a32.h
@@ -49,6 +49,7 @@ void gen_rev16(TCGv_i32 dest, TCGv_i32 var);
void clear_eci_state(DisasContext *s);
bool mve_eci_check(DisasContext *s);
void mve_update_and_store_eci(DisasContext *s);
+bool mve_skip_vmov(DisasContext *s, int vn, int index, int size);
static inline TCGv_i32 load_cpu_offset(int offset)
{
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
index 02c2698..93707fd 100644
--- a/target/arm/translate-mve.c
+++ b/target/arm/translate-mve.c
@@ -1507,3 +1507,88 @@ static bool do_vabav(DisasContext *s, arg_vabav *a, MVEGenVABAVFn *fn)
DO_VABAV(VABAV_S, vabavs)
DO_VABAV(VABAV_U, vabavu)
+
+static bool trans_VMOV_to_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
+{
+ /*
+ * VMOV two 32-bit vector lanes to two general-purpose registers.
+ * This insn is not predicated but it is subject to beat-wise
+ * execution if it is not in an IT block. For us this means
+ * only that if PSR.ECI says we should not be executing the beat
+ * corresponding to the lane of the vector register being accessed
+ * then we should skip perfoming the move, and that we need to do
+ * the usual check for bad ECI state and advance of ECI state.
+ * (If PSR.ECI is non-zero then we cannot be in an IT block.)
+ */
+ TCGv_i32 tmp;
+ int vd;
+
+ if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
+ a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15 ||
+ a->rt == a->rt2) {
+ /* Rt/Rt2 cases are UNPREDICTABLE */
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ /* Convert Qreg index to Dreg for read_neon_element32() etc */
+ vd = a->qd * 2;
+
+ if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
+ tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, vd, a->idx, MO_32);
+ store_reg(s, a->rt, tmp);
+ }
+ if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
+ tmp = tcg_temp_new_i32();
+ read_neon_element32(tmp, vd + 1, a->idx, MO_32);
+ store_reg(s, a->rt2, tmp);
+ }
+
+ mve_update_and_store_eci(s);
+ return true;
+}
+
+static bool trans_VMOV_from_2gp(DisasContext *s, arg_VMOV_to_2gp *a)
+{
+ /*
+ * VMOV two general-purpose registers to two 32-bit vector lanes.
+ * This insn is not predicated but it is subject to beat-wise
+ * execution if it is not in an IT block. For us this means
+ * only that if PSR.ECI says we should not be executing the beat
+ * corresponding to the lane of the vector register being accessed
+ * then we should skip perfoming the move, and that we need to do
+ * the usual check for bad ECI state and advance of ECI state.
+ * (If PSR.ECI is non-zero then we cannot be in an IT block.)
+ */
+ TCGv_i32 tmp;
+ int vd;
+
+ if (!dc_isar_feature(aa32_mve, s) || !mve_check_qreg_bank(s, a->qd) ||
+ a->rt == 13 || a->rt == 15 || a->rt2 == 13 || a->rt2 == 15) {
+ /* Rt/Rt2 cases are UNPREDICTABLE */
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ /* Convert Qreg idx to Dreg for read_neon_element32() etc */
+ vd = a->qd * 2;
+
+ if (!mve_skip_vmov(s, vd, a->idx, MO_32)) {
+ tmp = load_reg(s, a->rt);
+ write_neon_element32(tmp, vd, a->idx, MO_32);
+ tcg_temp_free_i32(tmp);
+ }
+ if (!mve_skip_vmov(s, vd + 1, a->idx, MO_32)) {
+ tmp = load_reg(s, a->rt2);
+ write_neon_element32(tmp, vd + 1, a->idx, MO_32);
+ tcg_temp_free_i32(tmp);
+ }
+
+ mve_update_and_store_eci(s);
+ return true;
+}
diff --git a/target/arm/translate-vfp.c b/target/arm/translate-vfp.c
index b2991e2..e2eb797 100644
--- a/target/arm/translate-vfp.c
+++ b/target/arm/translate-vfp.c
@@ -581,7 +581,7 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
return true;
}
-static bool mve_skip_vmov(DisasContext *s, int vn, int index, int size)
+bool mve_skip_vmov(DisasContext *s, int vn, int index, int size)
{
/*
* In a CPU with MVE, the VMOV (vector lane to general-purpose register)