aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2021-08-13 17:11:53 +0100
committerPeter Maydell <peter.maydell@linaro.org>2021-08-25 10:48:49 +0100
commit54dc78a901188d208a3dfedb0f98230043509120 (patch)
tree2942079f2d66df32236419885f8466b374f5fef7
parent7f061c0ab9289cb0ed55eaf09bec1b6cb474e6ee (diff)
downloadqemu-54dc78a901188d208a3dfedb0f98230043509120.zip
qemu-54dc78a901188d208a3dfedb0f98230043509120.tar.gz
qemu-54dc78a901188d208a3dfedb0f98230043509120.tar.bz2
target/arm: Implement MVE narrowing moves
Implement the MVE narrowing move insns VMOVN, VQMOVN and VQMOVUN. These take a double-width input, narrow it (possibly saturating) and store the result to either the top or bottom half of the output element. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r--target/arm/helper-mve.h20
-rw-r--r--target/arm/mve.decode12
-rw-r--r--target/arm/mve_helper.c78
-rw-r--r--target/arm/translate-mve.c22
4 files changed, 132 insertions, 0 deletions
diff --git a/target/arm/helper-mve.h b/target/arm/helper-mve.h
index c7e7aab..17484f7 100644
--- a/target/arm/helper-mve.h
+++ b/target/arm/helper-mve.h
@@ -76,6 +76,26 @@ DEF_HELPER_FLAGS_3(mve_vnegw, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vfnegh, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vfnegs, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vmovnbb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vmovnbh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vmovntb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vmovnth, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vqmovunbb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovunbh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovuntb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovunth, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vqmovnbsb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovnbsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovntsb, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovntsh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
+DEF_HELPER_FLAGS_3(mve_vqmovnbub, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovnbuh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovntub, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vqmovntuh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
DEF_HELPER_FLAGS_4(mve_vand, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
DEF_HELPER_FLAGS_4(mve_vbic, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
DEF_HELPER_FLAGS_4(mve_vorr, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr)
diff --git a/target/arm/mve.decode b/target/arm/mve.decode
index c8a06ed..d295a69 100644
--- a/target/arm/mve.decode
+++ b/target/arm/mve.decode
@@ -153,6 +153,9 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b
VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h
+ VQMOVUNB 111 0 1110 0 . 11 .. 01 ... 0 1110 1 0 . 0 ... 1 @1op
+ VQMOVN_BS 111 0 1110 0 . 11 .. 11 ... 0 1110 0 0 . 0 ... 1 @1op
+
VMULH_S 111 0 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
}
@@ -160,6 +163,9 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b
VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_h
+ VMOVNB 111 1 1110 0 . 11 .. 01 ... 0 1110 1 0 . 0 ... 1 @1op
+ VQMOVN_BU 111 1 1110 0 . 11 .. 11 ... 0 1110 0 0 . 0 ... 1 @1op
+
VMULH_U 111 1 1110 0 . .. ...1 ... 0 1110 . 0 . 0 ... 1 @2op
}
@@ -167,6 +173,9 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b
VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h
+ VQMOVUNT 111 0 1110 0 . 11 .. 01 ... 1 1110 1 0 . 0 ... 1 @1op
+ VQMOVN_TS 111 0 1110 0 . 11 .. 11 ... 1 1110 0 0 . 0 ... 1 @1op
+
VRMULH_S 111 0 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
}
@@ -174,6 +183,9 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b
VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h
+ VMOVNT 111 1 1110 0 . 11 .. 01 ... 1 1110 1 0 . 0 ... 1 @1op
+ VQMOVN_TU 111 1 1110 0 . 11 .. 11 ... 1 1110 0 0 . 0 ... 1 @1op
+
VRMULH_U 111 1 1110 0 . .. ...1 ... 1 1110 . 0 . 0 ... 1 @2op
}
diff --git a/target/arm/mve_helper.c b/target/arm/mve_helper.c
index fed0f3c..72c30f3 100644
--- a/target/arm/mve_helper.c
+++ b/target/arm/mve_helper.c
@@ -1650,6 +1650,84 @@ DO_VSHRN_SAT_UH(vqrshrnb_uh, vqrshrnt_uh, DO_RSHRN_UH)
DO_VSHRN_SAT_SB(vqrshrunbb, vqrshruntb, DO_RSHRUN_B)
DO_VSHRN_SAT_SH(vqrshrunbh, vqrshrunth, DO_RSHRUN_H)
+#define DO_VMOVN(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE) \
+ void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm) \
+ { \
+ LTYPE *m = vm; \
+ TYPE *d = vd; \
+ uint16_t mask = mve_element_mask(env); \
+ unsigned le; \
+ mask >>= ESIZE * TOP; \
+ for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
+ mergemask(&d[H##ESIZE(le * 2 + TOP)], \
+ m[H##LESIZE(le)], mask); \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+DO_VMOVN(vmovnbb, false, 1, uint8_t, 2, uint16_t)
+DO_VMOVN(vmovnbh, false, 2, uint16_t, 4, uint32_t)
+DO_VMOVN(vmovntb, true, 1, uint8_t, 2, uint16_t)
+DO_VMOVN(vmovnth, true, 2, uint16_t, 4, uint32_t)
+
+#define DO_VMOVN_SAT(OP, TOP, ESIZE, TYPE, LESIZE, LTYPE, FN) \
+ void HELPER(mve_##OP)(CPUARMState *env, void *vd, void *vm) \
+ { \
+ LTYPE *m = vm; \
+ TYPE *d = vd; \
+ uint16_t mask = mve_element_mask(env); \
+ bool qc = false; \
+ unsigned le; \
+ mask >>= ESIZE * TOP; \
+ for (le = 0; le < 16 / LESIZE; le++, mask >>= LESIZE) { \
+ bool sat = false; \
+ TYPE r = FN(m[H##LESIZE(le)], &sat); \
+ mergemask(&d[H##ESIZE(le * 2 + TOP)], r, mask); \
+ qc |= sat & mask & 1; \
+ } \
+ if (qc) { \
+ env->vfp.qc[0] = qc; \
+ } \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_VMOVN_SAT_UB(BOP, TOP, FN) \
+ DO_VMOVN_SAT(BOP, false, 1, uint8_t, 2, uint16_t, FN) \
+ DO_VMOVN_SAT(TOP, true, 1, uint8_t, 2, uint16_t, FN)
+
+#define DO_VMOVN_SAT_UH(BOP, TOP, FN) \
+ DO_VMOVN_SAT(BOP, false, 2, uint16_t, 4, uint32_t, FN) \
+ DO_VMOVN_SAT(TOP, true, 2, uint16_t, 4, uint32_t, FN)
+
+#define DO_VMOVN_SAT_SB(BOP, TOP, FN) \
+ DO_VMOVN_SAT(BOP, false, 1, int8_t, 2, int16_t, FN) \
+ DO_VMOVN_SAT(TOP, true, 1, int8_t, 2, int16_t, FN)
+
+#define DO_VMOVN_SAT_SH(BOP, TOP, FN) \
+ DO_VMOVN_SAT(BOP, false, 2, int16_t, 4, int32_t, FN) \
+ DO_VMOVN_SAT(TOP, true, 2, int16_t, 4, int32_t, FN)
+
+#define DO_VQMOVN_SB(N, SATP) \
+ do_sat_bhs((int64_t)(N), INT8_MIN, INT8_MAX, SATP)
+#define DO_VQMOVN_UB(N, SATP) \
+ do_sat_bhs((uint64_t)(N), 0, UINT8_MAX, SATP)
+#define DO_VQMOVUN_B(N, SATP) \
+ do_sat_bhs((int64_t)(N), 0, UINT8_MAX, SATP)
+
+#define DO_VQMOVN_SH(N, SATP) \
+ do_sat_bhs((int64_t)(N), INT16_MIN, INT16_MAX, SATP)
+#define DO_VQMOVN_UH(N, SATP) \
+ do_sat_bhs((uint64_t)(N), 0, UINT16_MAX, SATP)
+#define DO_VQMOVUN_H(N, SATP) \
+ do_sat_bhs((int64_t)(N), 0, UINT16_MAX, SATP)
+
+DO_VMOVN_SAT_SB(vqmovnbsb, vqmovntsb, DO_VQMOVN_SB)
+DO_VMOVN_SAT_SH(vqmovnbsh, vqmovntsh, DO_VQMOVN_SH)
+DO_VMOVN_SAT_UB(vqmovnbub, vqmovntub, DO_VQMOVN_UB)
+DO_VMOVN_SAT_UH(vqmovnbuh, vqmovntuh, DO_VQMOVN_UH)
+DO_VMOVN_SAT_SB(vqmovunbb, vqmovuntb, DO_VQMOVUN_B)
+DO_VMOVN_SAT_SH(vqmovunbh, vqmovunth, DO_VQMOVUN_H)
+
uint32_t HELPER(mve_vshlc)(CPUARMState *env, void *vd, uint32_t rdm,
uint32_t shift)
{
diff --git a/target/arm/translate-mve.c b/target/arm/translate-mve.c
index 247f671..5c3655e 100644
--- a/target/arm/translate-mve.c
+++ b/target/arm/translate-mve.c
@@ -275,6 +275,28 @@ DO_1OP(VCLS, vcls)
DO_1OP(VABS, vabs)
DO_1OP(VNEG, vneg)
+/* Narrowing moves: only size 0 and 1 are valid */
+#define DO_VMOVN(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_1op *a) \
+ { \
+ static MVEGenOneOpFn * const fns[] = { \
+ gen_helper_mve_##FN##b, \
+ gen_helper_mve_##FN##h, \
+ NULL, \
+ NULL, \
+ }; \
+ return do_1op(s, a, fns[a->size]); \
+ }
+
+DO_VMOVN(VMOVNB, vmovnb)
+DO_VMOVN(VMOVNT, vmovnt)
+DO_VMOVN(VQMOVUNB, vqmovunb)
+DO_VMOVN(VQMOVUNT, vqmovunt)
+DO_VMOVN(VQMOVN_BS, vqmovnbs)
+DO_VMOVN(VQMOVN_TS, vqmovnts)
+DO_VMOVN(VQMOVN_BU, vqmovnbu)
+DO_VMOVN(VQMOVN_TU, vqmovntu)
+
static bool trans_VREV16(DisasContext *s, arg_1op *a)
{
static MVEGenOneOpFn * const fns[] = {