diff options
author | Dennis Zhang <denzha01@e124712.cambridge.arm.com> | 2020-11-03 13:00:51 +0000 |
---|---|---|
committer | Dennis Zhang <denzha01@e124712.cambridge.arm.com> | 2020-11-03 13:00:51 +0000 |
commit | f7d6961126a7f06c8089d8a58bd21be43bc16806 (patch) | |
tree | f6ca6a1732decea47982001254f5e9df3572f9ef /gcc | |
parent | 9d1b813d0f7c9a8d80b0aee6eb1418b0afdf0f84 (diff) | |
download | gcc-f7d6961126a7f06c8089d8a58bd21be43bc16806.zip gcc-f7d6961126a7f06c8089d8a58bd21be43bc16806.tar.gz gcc-f7d6961126a7f06c8089d8a58bd21be43bc16806.tar.bz2 |
aarch64: ACLE intrinsics convert BF16 to Float32
This patch enables intrinsics to convert BFloat16 scalar and vector
operands to Float32 modes. The intrinsics are implemented by shifting
each BFloat16 item 16 bits to left using shl/shll/shll2 instructions.
gcc/ChangeLog:
2020-11-03 Dennis Zhang <dennis.zhang@arm.com>
* config/aarch64/aarch64-simd-builtins.def(vbfcvt): New entry.
(vbfcvt_high, bfcvt): Likewise.
* config/aarch64/aarch64-simd.md(aarch64_vbfcvt<mode>): New entry.
(aarch64_vbfcvt_highv8bf, aarch64_bfcvtsf): Likewise.
* config/aarch64/arm_bf16.h (vcvtah_f32_bf16): New intrinsic.
* config/aarch64/arm_neon.h (vcvt_f32_bf16): Likewise.
(vcvtq_low_f32_bf16, vcvtq_high_f32_bf16): Likewise.
gcc/testsuite/ChangeLog
* gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
(test_vcvt_f32_bf16, test_vcvtq_low_f32_bf16): New tests.
(test_vcvtq_high_f32_bf16, test_vcvth_f32_bf16): Likewise.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 10 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 5 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 28 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_bf16.h | 7 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 21 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c | 40 |
7 files changed, 117 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9f743ec..2ff5c4e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2020-11-03 Dennis Zhang <dennis.zhang@arm.com> + + * config/aarch64/aarch64-simd-builtins.def(vbfcvt): New entry. + (vbfcvt_high, bfcvt): Likewise. + * config/aarch64/aarch64-simd.md(aarch64_vbfcvt<mode>): New entry. + (aarch64_vbfcvt_highv8bf, aarch64_bfcvtsf): Likewise. + * config/aarch64/arm_bf16.h (vcvtah_f32_bf16): New intrinsic. + * config/aarch64/arm_neon.h (vcvt_f32_bf16): Likewise. + (vcvtq_low_f32_bf16, vcvtq_high_f32_bf16): Likewise. + 2020-11-02 Alan Modra <amodra@gmail.com> PR middle-end/97267 diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index eb8e6f7..f494b53 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -732,3 +732,8 @@ VAR1 (UNOP, bfcvtn_q, 0, FP, v8bf) VAR1 (BINOP, bfcvtn2, 0, FP, v8bf) VAR1 (UNOP, bfcvt, 0, FP, bf) + + /* Implemented by aarch64_{v}bfcvt{_high}<mode>. */ + VAR2 (UNOP, vbfcvt, 0, AUTO_FP, v4bf, v8bf) + VAR1 (UNOP, vbfcvt_high, 0, AUTO_FP, v8bf) + VAR1 (UNOP, bfcvt, 0, AUTO_FP, sf) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 381a702..030a086 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -7238,3 +7238,31 @@ "bfcvt\\t%h0, %s1" [(set_attr "type" "f_cvt")] ) + +;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes. +(define_insn "aarch64_vbfcvt<mode>" + [(set (match_operand:V4SF 0 "register_operand" "=w") + (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")] + UNSPEC_BFCVTN))] + "TARGET_BF16_SIMD" + "shll\\t%0.4s, %1.4h, #16" + [(set_attr "type" "neon_shift_imm_long")] +) + +(define_insn "aarch64_vbfcvt_highv8bf" + [(set (match_operand:V4SF 0 "register_operand" "=w") + (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")] + UNSPEC_BFCVTN2))] + "TARGET_BF16_SIMD" + "shll2\\t%0.4s, %1.8h, #16" + [(set_attr "type" "neon_shift_imm_long")] +) + +(define_insn "aarch64_bfcvtsf" + [(set (match_operand:SF 0 "register_operand" "=w") + (unspec:SF [(match_operand:BF 1 "register_operand" "w")] + UNSPEC_BFCVT))] + "TARGET_BF16_FP" + "shl\\t%d0, %d1, #16" + [(set_attr "type" "neon_shift_imm")] +) diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h index 984875d..8816154 100644 --- a/gcc/config/aarch64/arm_bf16.h +++ b/gcc/config/aarch64/arm_bf16.h @@ -40,6 +40,13 @@ vcvth_bf16_f32 (float32_t __a) return __builtin_aarch64_bfcvtbf (__a); } +__extension__ extern __inline float32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvtah_f32_bf16 (bfloat16_t __a) +{ + return __builtin_aarch64_bfcvtsf (__a); +} + #pragma GCC pop_options #endif diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 95bfa5e..69cccd3 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -35680,6 +35680,27 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index); } +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvt_f32_bf16 (bfloat16x4_t __a) +{ + return __builtin_aarch64_vbfcvtv4bf (__a); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvtq_low_f32_bf16 (bfloat16x8_t __a) +{ + return __builtin_aarch64_vbfcvtv8bf (__a); +} + +__extension__ extern __inline float32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvtq_high_f32_bf16 (bfloat16x8_t __a) +{ + return __builtin_aarch64_vbfcvt_highv8bf (__a); +} + __extension__ extern __inline bfloat16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vcvt_bf16_f32 (float32x4_t __a) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 94bddaa..a7bbb1b 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2020-11-03 Dennis Zhang <dennis.zhang@arm.com> + + * gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c + (test_vcvt_f32_bf16, test_vcvtq_low_f32_bf16): New tests. + (test_vcvtq_high_f32_bf16, test_vcvth_f32_bf16): Likewise. + 2020-11-02 Alan Modra <amodra@gmail.com> PR middle-end/97267 diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c index bbea630..47af7c4 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c @@ -46,3 +46,43 @@ bfloat16_t test_bfcvt (float32_t a) { return vcvth_bf16_f32 (a); } + +/* +**test_vcvt_f32_bf16: +** shll v0.4s, v0.4h, #16 +** ret +*/ +float32x4_t test_vcvt_f32_bf16 (bfloat16x4_t a) +{ + return vcvt_f32_bf16 (a); +} + +/* +**test_vcvtq_low_f32_bf16: +** shll v0.4s, v0.4h, #16 +** ret +*/ +float32x4_t test_vcvtq_low_f32_bf16 (bfloat16x8_t a) +{ + return vcvtq_low_f32_bf16 (a); +} + +/* +**test_vcvtq_high_f32_bf16: +** shll2 v0.4s, v0.8h, #16 +** ret +*/ +float32x4_t test_vcvtq_high_f32_bf16 (bfloat16x8_t a) +{ + return vcvtq_high_f32_bf16 (a); +} + +/* +**test_vcvtah_f32_bf16: +** shl d0, d0, #16 +** ret +*/ +float32_t test_vcvtah_f32_bf16 (bfloat16_t a) +{ + return vcvtah_f32_bf16 (a); +} |