aboutsummaryrefslogtreecommitdiff
path: root/target/arm
diff options
context:
space:
mode:
Diffstat (limited to 'target/arm')
-rw-r--r--target/arm/helper-mve.h4
-rw-r--r--target/arm/mve.decode7
-rw-r--r--target/arm/mve_helper.c22
-rw-r--r--target/arm/translate-mve.c45
4 files changed, 78 insertions, 0 deletions
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
index f9d4b24..16b974a 100644
--- a/target/arm/helper-mve.h
+++ b/target/arm/helper-mve.h
@@ -145,6 +145,10 @@ DEF_HELPER_FLAGS_4(mve_vmulltub, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
DEF_HELPER_FLAGS_4(mve_vmulltuh, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
DEF_HELPER_FLAGS_4(mve_vmulltuw, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
+DEF_HELPER_FLAGS_4(mve_vadd_scalarb, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vadd_scalarh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(mve_vadd_scalarw, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(mve_vmlaldavsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
DEF_HELPER_FLAGS_4(mve_vmlaldavsw, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
DEF_HELPER_FLAGS_4(mve_vmlaldavxsh, TCG_CALL_NO_WG, i64, env, ptr, ptr, i64)
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
index ac68f07..0ee7a72 100644
--- a/target/arm/mve.decode
+++ b/target/arm/mve.decode
@@ -26,6 +26,7 @@
&vldr_vstr rn qd imm p a w size l u
&1op qd qm size
&2op qd qm qn size
+&2scalar qd qn rm size
@vldr_vstr ....... . . . . l:1 rn:4 ... ...... imm:7 &vldr_vstr qd=%qd u=0
# Note that both Rn and Qd are 3 bits only (no D bit)
@@ -36,6 +37,8 @@
@2op .... .... .. size:2 .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn
@2op_nosz .... .... .... .... .... .... .... .... &2op qd=%qd qm=%qm qn=%qn size=0
+@2scalar .... .... .. size:2 .... .... .... .... rm:4 &2scalar qd=%qd qn=%qn
+
# Vector loads and stores
# Widening loads and narrowing stores:
@@ -154,3 +157,7 @@ VRMLALDAVH_S 1110 1110 1 ... ... 0 ... x:1 1111 . 0 a:1 0 ... 0 @vmlaldav_no
VRMLALDAVH_U 1111 1110 1 ... ... 0 ... x:1 1111 . 0 a:1 0 ... 0 @vmlaldav_nosz
VRMLSLDAVH 1111 1110 1 ... ... 0 ... x:1 1110 . 0 a:1 0 ... 1 @vmlaldav_nosz
+
+# Scalar operations
+
+VADD_scalar 1110 1110 0 . .. ... 1 ... 0 1111 . 100 .... @2scalar
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
index c1427b0..6b8cead 100644
--- a/target/arm/mve_helper.c
+++ b/target/arm/mve_helper.c
@@ -491,6 +491,28 @@ DO_2OP_S(vhsubs, do_vhsub_s)
DO_2OP_U(vhsubu, do_vhsub_u)
+#define DO_2OP_SCALAR(OP, ESIZE, TYPE, FN) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, \
+ uint32_t rm) \
+ { \
+ TYPE *d = vd, *n = vn; \
+ TYPE m = rm; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ mergemask(&d[H##ESIZE(e)], FN(n[H##ESIZE(e)], m), mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+/* provide unsigned 2-op scalar helpers for all sizes */
+#define DO_2OP_SCALAR_U(OP, FN) \
+ DO_2OP_SCALAR(OP##b, 1, uint8_t, FN) \
+ DO_2OP_SCALAR(OP##h, 2, uint16_t, FN) \
+ DO_2OP_SCALAR(OP##w, 4, uint32_t, FN)
+
+DO_2OP_SCALAR_U(vadd_scalar, DO_ADD)
+
/*
* Multiply add long dual accumulate ops.
*/
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
index a0c4f10..388848b 100644
--- a/target/arm/translate-mve.c
+++ b/target/arm/translate-mve.c
@@ -31,6 +31,7 @@
typedef void MVEGenLdStFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
typedef void MVEGenOneOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
typedef void MVEGenTwoOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr);
+typedef void MVEGenTwoOpScalarFn(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
typedef void MVEGenDualAccOpFn(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64);
/* Return the offset of a Qn register (same semantics as aa32_vfp_qreg()) */
@@ -383,6 +384,50 @@ DO_2OP(VMULL_BU, vmullbu)
DO_2OP(VMULL_TS, vmullts)
DO_2OP(VMULL_TU, vmulltu)
+static bool do_2op_scalar(DisasContext *s, arg_2scalar *a,
+ MVEGenTwoOpScalarFn fn)
+{
+ TCGv_ptr qd, qn;
+ TCGv_i32 rm;
+
+ if (!dc_isar_feature(aa32_mve, s) ||
+ !mve_check_qreg_bank(s, a->qd | a->qn) ||
+ !fn) {
+ return false;
+ }
+ if (a->rm == 13 || a->rm == 15) {
+ /* UNPREDICTABLE */
+ return false;
+ }
+ if (!mve_eci_check(s) || !vfp_access_check(s)) {
+ return true;
+ }
+
+ qd = mve_qreg_ptr(a->qd);
+ qn = mve_qreg_ptr(a->qn);
+ rm = load_reg(s, a->rm);
+ fn(cpu_env, qd, qn, rm);
+ tcg_temp_free_i32(rm);
+ tcg_temp_free_ptr(qd);
+ tcg_temp_free_ptr(qn);
+ mve_update_eci(s);
+ return true;
+}
+
+#define DO_2OP_SCALAR(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_2scalar *a) \
+ { \
+ static MVEGenTwoOpScalarFn * const fns[] = { \
+ gen_helper_mve_##FN##b, \
+ gen_helper_mve_##FN##h, \
+ gen_helper_mve_##FN##w, \
+ NULL, \
+ }; \
+ return do_2op_scalar(s, a, fns[a->size]); \
+ }
+
+DO_2OP_SCALAR(VADD_scalar, vadd_scalar)
+
static bool do_long_dual_acc(DisasContext *s, arg_vmlaldav *a,
MVEGenDualAccOpFn *fn)
{