aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2018-06-29 15:11:08 +0100
committerPeter Maydell <peter.maydell@linaro.org>2018-06-29 15:11:08 +0100
commitca40a6e6e390eb1cad7ade881dc7c622793f9324 (patch)
treed76a1f86086e3457820f55efde2a00b2c4b7f983
parentcc48affe83fff4b2886c064265d7103dee5e4a14 (diff)
downloadqemu-ca40a6e6e390eb1cad7ade881dc7c622793f9324.zip
qemu-ca40a6e6e390eb1cad7ade881dc7c622793f9324.tar.gz
qemu-ca40a6e6e390eb1cad7ade881dc7c622793f9324.tar.bz2
target/arm: Implement SVE Floating Point Multiply Indexed Group
Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20180627043328.11531-19-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--target/arm/helper.h14
-rw-r--r--target/arm/sve.decode19
-rw-r--r--target/arm/translate-sve.c50
-rw-r--r--target/arm/vec_helper.c48
4 files changed, 131 insertions, 0 deletions
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 879a722..56439ac 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -620,6 +620,20 @@ DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(gvec_ftsmul_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmul_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmul_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_fmul_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_6(gvec_fmla_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, i32)
+
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#include "helper-sve.h"
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 267eb2d..15fa790 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -29,6 +29,7 @@
%imm9_16_10 16:s6 10:3
%size_23 23:2
%dtype_23_13 23:2 13:2
+%index3_22_19 22:1 19:2
# A combination of tsz:imm3 -- extract esize.
%tszimm_esz 22:2 5:5 !function=tszimm_esz
@@ -716,6 +717,24 @@ UMIN_zzi 00100101 .. 101 011 110 ........ ..... @rdn_i8u
# SVE integer multiply immediate (unpredicated)
MUL_zzi 00100101 .. 110 000 110 ........ ..... @rdn_i8s
+### SVE FP Multiply-Add Indexed Group
+
+# SVE floating-point multiply-add (indexed)
+FMLA_zzxz 01100100 0.1 .. rm:3 00000 sub:1 rn:5 rd:5 \
+ ra=%reg_movprfx index=%index3_22_19 esz=1
+FMLA_zzxz 01100100 101 index:2 rm:3 00000 sub:1 rn:5 rd:5 \
+ ra=%reg_movprfx esz=2
+FMLA_zzxz 01100100 111 index:1 rm:4 00000 sub:1 rn:5 rd:5 \
+ ra=%reg_movprfx esz=3
+
+### SVE FP Multiply Indexed Group
+
+# SVE floating-point multiply (indexed)
+FMUL_zzx 01100100 0.1 .. rm:3 001000 rn:5 rd:5 \
+ index=%index3_22_19 esz=1
+FMUL_zzx 01100100 101 index:2 rm:3 001000 rn:5 rd:5 esz=2
+FMUL_zzx 01100100 111 index:1 rm:4 001000 rn:5 rd:5 esz=3
+
### SVE FP Accumulating Reduction Group
# SVE floating-point serial reduction (predicated)
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 1b467a5..e90f593 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -3401,6 +3401,56 @@ DO_ZZI(UMIN, umin)
#undef DO_ZZI
/*
+ *** SVE Floating Point Multiply-Add Indexed Group
+ */
+
+static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a, uint32_t insn)
+{
+ static gen_helper_gvec_4_ptr * const fns[3] = {
+ gen_helper_gvec_fmla_idx_h,
+ gen_helper_gvec_fmla_idx_s,
+ gen_helper_gvec_fmla_idx_d,
+ };
+
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
+ tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ vec_full_reg_offset(s, a->ra),
+ status, vsz, vsz, (a->index << 1) | a->sub,
+ fns[a->esz - 1]);
+ tcg_temp_free_ptr(status);
+ }
+ return true;
+}
+
+/*
+ *** SVE Floating Point Multiply Indexed Group
+ */
+
+static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a, uint32_t insn)
+{
+ static gen_helper_gvec_3_ptr * const fns[3] = {
+ gen_helper_gvec_fmul_idx_h,
+ gen_helper_gvec_fmul_idx_s,
+ gen_helper_gvec_fmul_idx_d,
+ };
+
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ status, vsz, vsz, a->index, fns[a->esz - 1]);
+ tcg_temp_free_ptr(status);
+ }
+ return true;
+}
+
+/*
*** SVE Floating Point Accumulating Reduction Group
*/
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c
index f504dd5..97af75a 100644
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@@ -495,3 +495,51 @@ DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64)
#endif
#undef DO_3OP
+
+/* For the indexed ops, SVE applies the index per 128-bit vector segment.
+ * For AdvSIMD, there is of course only one such vector segment.
+ */
+
+#define DO_MUL_IDX(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
+{ \
+ intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \
+ intptr_t idx = simd_data(desc); \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
+ TYPE mm = m[H(i + idx)]; \
+ for (j = 0; j < segment; j++) { \
+ d[i + j] = TYPE##_mul(n[i + j], mm, stat); \
+ } \
+ } \
+}
+
+DO_MUL_IDX(gvec_fmul_idx_h, float16, H2)
+DO_MUL_IDX(gvec_fmul_idx_s, float32, H4)
+DO_MUL_IDX(gvec_fmul_idx_d, float64, )
+
+#undef DO_MUL_IDX
+
+#define DO_FMLA_IDX(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
+ void *stat, uint32_t desc) \
+{ \
+ intptr_t i, j, oprsz = simd_oprsz(desc), segment = 16 / sizeof(TYPE); \
+ TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \
+ intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \
+ TYPE *d = vd, *n = vn, *m = vm, *a = va; \
+ op1_neg <<= (8 * sizeof(TYPE) - 1); \
+ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
+ TYPE mm = m[H(i + idx)]; \
+ for (j = 0; j < segment; j++) { \
+ d[i + j] = TYPE##_muladd(n[i + j] ^ op1_neg, \
+ mm, a[i + j], 0, stat); \
+ } \
+ } \
+}
+
+DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2)
+DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4)
+DO_FMLA_IDX(gvec_fmla_idx_d, float64, )
+
+#undef DO_FMLA_IDX