diff options
author | Richard Henderson <richard.henderson@linaro.org> | 2021-05-25 15:58:15 -0700 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2021-06-03 16:43:26 +0100 |
commit | 458d0ab6830f9bcd76af9df4d1d4db8ab646fcef (patch) | |
tree | ea4e0e9a7403c9a6272b47ebbe3c362a826de362 /target/arm/vec_helper.c | |
parent | 5693887f2e97335362d945c778f2bbddd4e9d1bb (diff) | |
download | qemu-458d0ab6830f9bcd76af9df4d1d4db8ab646fcef.zip qemu-458d0ab6830f9bcd76af9df4d1d4db8ab646fcef.tar.gz qemu-458d0ab6830f9bcd76af9df4d1d4db8ab646fcef.tar.bz2 |
target/arm: Implement bfloat widening fma (indexed)
This is BFMLAL{B,T} for both AArch64 AdvSIMD and SVE,
and VFMA{B,T}.BF16 for AArch32 NEON.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20210525225817.400336-11-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'target/arm/vec_helper.c')
-rw-r--r-- | target/arm/vec_helper.c | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/target/arm/vec_helper.c b/target/arm/vec_helper.c index d82736b..5862f18 100644 --- a/target/arm/vec_helper.c +++ b/target/arm/vec_helper.c @@ -2528,3 +2528,25 @@ void HELPER(gvec_bfmlal)(void *vd, void *vn, void *vm, void *va, } clear_tail(d, opr_sz, simd_maxsz(desc)); } + +void HELPER(gvec_bfmlal_idx)(void *vd, void *vn, void *vm, + void *va, void *stat, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + intptr_t sel = extract32(desc, SIMD_DATA_SHIFT, 1); + intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 1, 3); + intptr_t elements = opr_sz / 4; + intptr_t eltspersegment = MIN(16 / 4, elements); + float32 *d = vd, *a = va; + bfloat16 *n = vn, *m = vm; + + for (i = 0; i < elements; i += eltspersegment) { + float32 m_idx = m[H2(2 * i + index)] << 16; + + for (j = i; j < i + eltspersegment; j++) { + float32 n_j = n[H2(2 * j + sel)] << 16; + d[H4(j)] = float32_muladd(n_j, m_idx, a[H4(j)], 0, stat); + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} |