aboutsummaryrefslogtreecommitdiff
path: root/target/arm/mve_helper.c
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2021-08-13 17:11:54 +0100
committerPeter Maydell <peter.maydell@linaro.org>2021-08-25 10:48:49 +0100
commitf0ffff5163cb503de236fc766121601592f08744 (patch)
tree80b59e16a5e42dd873029fd82853ed1b5469f50b /target/arm/mve_helper.c
parent640cdf20a25d0021f4e93b6207b648a973df320b (diff)
downloadqemu-f0ffff5163cb503de236fc766121601592f08744.zip
qemu-f0ffff5163cb503de236fc766121601592f08744.tar.gz
qemu-f0ffff5163cb503de236fc766121601592f08744.tar.bz2
target/arm: Implement MVE VMLADAV and VMLSLDAV
Implement the MVE VMLADAV and VMLSLDAV insns. Like the VMLALDAV and VMLSLDAV insns already implemented, these accumulate multiplied vector elements; but they accumulate a 32-bit result rather than a 64-bit one. Note that these encodings overlap with what would be RdaHi=0b111 for VMLALDAV, VMLSLDAV, VRMLALDAVH and VRMLSLDAVH. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'target/arm/mve_helper.c')
-rw-r--r--target/arm/mve_helper.c41
1 files changed, 41 insertions, 0 deletions
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
index 72c30f3..ea206c9 100644
--- a/target/arm/mve_helper.c
+++ b/target/arm/mve_helper.c
@@ -1190,6 +1190,47 @@ DO_LDAV(vmlsldavsw, 4, int32_t, false, +=, -=)
DO_LDAV(vmlsldavxsw, 4, int32_t, true, +=, -=)
/*
+ * Multiply add dual accumulate ops
+ */
+#define DO_DAV(OP, ESIZE, TYPE, XCHG, EVENACC, ODDACC) \
+ uint32_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vn, \
+ void *vm, uint32_t a) \
+ { \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned e; \
+ TYPE *n = vn, *m = vm; \
+ for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
+ if (mask & 1) { \
+ if (e & 1) { \
+ a ODDACC \
+ n[H##ESIZE(e - 1 * XCHG)] * m[H##ESIZE(e)]; \
+ } else { \
+ a EVENACC \
+ n[H##ESIZE(e + 1 * XCHG)] * m[H##ESIZE(e)]; \
+ } \
+ } \
+ } \
+ mve_advance_vpt(env); \
+ return a; \
+ }
+
+#define DO_DAV_S(INSN, XCHG, EVENACC, ODDACC) \
+ DO_DAV(INSN##b, 1, int8_t, XCHG, EVENACC, ODDACC) \
+ DO_DAV(INSN##h, 2, int16_t, XCHG, EVENACC, ODDACC) \
+ DO_DAV(INSN##w, 4, int32_t, XCHG, EVENACC, ODDACC)
+
+#define DO_DAV_U(INSN, XCHG, EVENACC, ODDACC) \
+ DO_DAV(INSN##b, 1, uint8_t, XCHG, EVENACC, ODDACC) \
+ DO_DAV(INSN##h, 2, uint16_t, XCHG, EVENACC, ODDACC) \
+ DO_DAV(INSN##w, 4, uint32_t, XCHG, EVENACC, ODDACC)
+
+DO_DAV_S(vmladavs, false, +=, +=)
+DO_DAV_U(vmladavu, false, +=, +=)
+DO_DAV_S(vmlsdav, false, +=, -=)
+DO_DAV_S(vmladavsx, true, +=, +=)
+DO_DAV_S(vmlsdavx, true, +=, -=)
+
+/*
* Rounding multiply add long dual accumulate high. In the pseudocode
* this is implemented with a 72-bit internal accumulator value of which
* the top 64 bits are returned. We optimize this to avoid having to