aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2025-07-18 18:30:27 +0100
committerPeter Maydell <peter.maydell@linaro.org>2025-07-21 11:13:55 +0100
commit929bec5581966c43d083588f9be38af48150c4fe (patch)
tree4231e254a2dd32868f4b7266fb866ac7c1d9feaf
parentf71c3f470f48bece6d0601171bfda63a0b746879 (diff)
downloadqemu-929bec5581966c43d083588f9be38af48150c4fe.zip
qemu-929bec5581966c43d083588f9be38af48150c4fe.tar.gz
qemu-929bec5581966c43d083588f9be38af48150c4fe.tar.bz2
target/arm: Add BFMLA, BFMLS (vectors)
FEAT_SVE_B16B16 adds bfloat16 versions of the FMLA and FMLS insns in the "SVE floating-point multiply-accumulate writing addend" group, encoded as sz=0b00. Fixes: 7b1613a1020d2942 ("target/arm: Enable FEAT_SME2p1 on -cpu max") Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20250718173032.2498900-6-peter.maydell@linaro.org
-rw-r--r--target/arm/tcg/helper-sve.h14
-rw-r--r--target/arm/tcg/sve_helper.c69
-rw-r--r--target/arm/tcg/translate-sve.c21
3 files changed, 98 insertions, 6 deletions
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
index cb6c235..5e4b7fd 100644
--- a/target/arm/tcg/helper-sve.h
+++ b/target/arm/tcg/helper-sve.h
@@ -1541,6 +1541,8 @@ DEF_HELPER_FLAGS_6(sve_fcadd_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_6(sve_fcadd_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_b16, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG,
@@ -1548,6 +1550,8 @@ DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_7(sve_fmla_zpzzz_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_b16, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG,
@@ -1555,6 +1559,8 @@ DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_7(sve_fmls_zpzzz_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_b16, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
@@ -1562,6 +1568,8 @@ DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_7(sve_fnmla_zpzzz_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_b16, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
@@ -1569,6 +1577,8 @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_b16, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG,
@@ -1576,6 +1586,8 @@ DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_b16, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
@@ -1583,6 +1595,8 @@ DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_b16, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index 1a56fa8..105cc5d 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -5099,6 +5099,75 @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int)
#undef DO_ZPZ_FP
+static void do_fmla_zpzzz_b16(void *vd, void *vn, void *vm, void *va, void *vg,
+ float_status *status, uint32_t desc,
+ uint16_t neg1, uint16_t neg3, int flags)
+{
+ intptr_t i = simd_oprsz(desc);
+ uint64_t *g = vg;
+
+ do {
+ uint64_t pg = g[(i - 1) >> 6];
+ do {
+ i -= 2;
+ if (likely((pg >> (i & 63)) & 1)) {
+ float16 e1, e2, e3, r;
+
+ e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1;
+ e2 = *(uint16_t *)(vm + H1_2(i));
+ e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3;
+ r = bfloat16_muladd(e1, e2, e3, flags, status);
+ *(uint16_t *)(vd + H1_2(i)) = r;
+ }
+ } while (i & 63);
+ } while (i != 0);
+}
+
+void HELPER(sve_fmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
+}
+
+void HELPER(sve_fmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0);
+}
+
+void HELPER(sve_fnmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0);
+}
+
+void HELPER(sve_fnmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0);
+}
+
+void HELPER(sve_ah_fmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product);
+}
+
+void HELPER(sve_ah_fnmla_zpzzz_b16)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product | float_muladd_negate_c);
+}
+
+void HELPER(sve_ah_fnmls_zpzzz_b16)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_b16(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_c);
+}
+
static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
uint16_t neg1, uint16_t neg3, int flags)
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 918cf6e..37ecbc2 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -4368,19 +4368,28 @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1),
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
+static bool do_fmla_zpzzz(DisasContext *s, arg_rprrr_esz *a,
+ gen_helper_gvec_5_ptr *fn)
+{
+ /* These insns use MO_8 to encode BFloat16 */
+ if (a->esz == MO_8 && !dc_isar_feature(aa64_sve_b16b16, s)) {
+ return false;
+ }
+ return gen_gvec_fpst_zzzzp(s, fn, a->rd, a->rn, a->rm, a->ra, a->pg, 0,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
+}
+
#define DO_FMLA(NAME, name, ah_name) \
static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
- NULL, gen_helper_sve_##name##_h, \
+ gen_helper_sve_##name##_b16, gen_helper_sve_##name##_h, \
gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
}; \
static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \
- NULL, gen_helper_sve_##ah_name##_h, \
+ gen_helper_sve_##ah_name##_b16, gen_helper_sve_##ah_name##_h, \
gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \
}; \
- TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \
- s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \
- a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
- a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
+ TRANS_FEAT(NAME, aa64_sve, do_fmla_zpzzz, a, \
+ s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz])
/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */
DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz)