aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog46
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-base.cc11
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-base.def12
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-base.h8
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-shapes.cc66
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-shapes.h4
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-sve2.cc1
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins-sve2.h1
-rw-r--r--gcc/config/aarch64/aarch64-sve-builtins.cc5
-rw-r--r--gcc/config/aarch64/aarch64-sve.md113
-rw-r--r--gcc/config/aarch64/aarch64-sve2.md14
-rw-r--r--gcc/config/aarch64/aarch64.h1
-rw-r--r--gcc/config/aarch64/iterators.md19
-rw-r--r--gcc/testsuite/ChangeLog21
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c67
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c86
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c67
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c86
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c67
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c86
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c46
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c96
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c90
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c30
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c30
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c24
-rw-r--r--gcc/testsuite/lib/target-supports.exp2
28 files changed, 1112 insertions, 11 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d10ae92..234e328 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,51 @@
2020-01-31 Richard Sandiford <richard.sandiford@arm.com>
+ * config/aarch64/aarch64.h (TARGET_SVE_BF16): New macro.
+ * config/aarch64/aarch64-sve-builtins-sve2.h (svcvtnt): Move to
+ aarch64-sve-builtins-base.h.
+ * config/aarch64/aarch64-sve-builtins-sve2.cc (svcvtnt): Move to
+ aarch64-sve-builtins-base.cc.
+ * config/aarch64/aarch64-sve-builtins-base.h (svbfdot, svbfdot_lane)
+ (svbfmlalb, svbfmlalb_lane, svbfmlalt, svbfmlalt_lane, svbfmmla)
+ (svcvtnt): Declare.
+ * config/aarch64/aarch64-sve-builtins-base.cc (svbfdot, svbfdot_lane)
+ (svbfmlalb, svbfmlalb_lane, svbfmlalt, svbfmlalt_lane, svbfmmla)
+ (svcvtnt): New functions.
+ * config/aarch64/aarch64-sve-builtins-base.def (svbfdot, svbfdot_lane)
+ (svbfmlalb, svbfmlalb_lane, svbfmlalt, svbfmlalt_lane, svbfmmla)
+ (svcvtnt): New functions.
+ (svcvt): Add a form that converts f32 to bf16.
+ * config/aarch64/aarch64-sve-builtins-shapes.h (ternary_bfloat)
+ (ternary_bfloat_lane, ternary_bfloat_lanex2, ternary_bfloat_opt_n):
+ Declare.
+ * config/aarch64/aarch64-sve-builtins-shapes.cc (parse_element_type):
+ Treat B as bfloat16_t.
+ (ternary_bfloat_lane_base): New class.
+ (ternary_bfloat_def): Likewise.
+ (ternary_bfloat): New shape.
+ (ternary_bfloat_lane_def): New class.
+ (ternary_bfloat_lane): New shape.
+ (ternary_bfloat_lanex2_def): New class.
+ (ternary_bfloat_lanex2): New shape.
+ (ternary_bfloat_opt_n_def): New class.
+ (ternary_bfloat_opt_n): New shape.
+ * config/aarch64/aarch64-sve-builtins.cc (TYPES_cvt_bfloat): New macro.
+ * config/aarch64/aarch64-sve.md (@aarch64_sve_<sve_fp_op>vnx4sf)
+ (@aarch64_sve_<sve_fp_op>_lanevnx4sf): New patterns.
+ (@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>)
+ (@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>): Likewise.
+ (*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>): Likewise.
+ (@aarch64_sve_cvtnt<VNx8BF_ONLY:mode>): Likewise.
+ * config/aarch64/aarch64-sve2.md (@aarch64_sve2_cvtnt<mode>): Key
+ the pattern off the narrow mode instead of the wider one.
+ * config/aarch64/iterators.md (VNx8BF_ONLY): New mode iterator.
+ (UNSPEC_BFMLALB, UNSPEC_BFMLALT, UNSPEC_BFMMLA): New unspecs.
+ (sve_fp_op): Handle them.
+ (SVE_BFLOAT_TERNARY_LONG): New int itertor.
+ (SVE_BFLOAT_TERNARY_LONG_LANE): Likewise.
+
+2020-01-31 Richard Sandiford <richard.sandiford@arm.com>
+
* config/aarch64/arm_sve.h: Include arm_bf16.h.
* config/aarch64/aarch64-modes.def (BF): Move definition before
VECTOR_MODES. Remove separate VECTOR_MODES for V4BF and V8BF.
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 9ae143c..9b63ea7 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -2544,6 +2544,16 @@ FUNCTION (svandv, reduction, (UNSPEC_ANDV))
FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT))
FUNCTION (svasr_wide, shift_wide, (ASHIFTRT, UNSPEC_ASHIFTRT_WIDE))
FUNCTION (svasrd, unspec_based_function, (UNSPEC_ASRD, -1, -1))
+FUNCTION (svbfdot, fixed_insn_function, (CODE_FOR_aarch64_sve_bfdotvnx4sf))
+FUNCTION (svbfdot_lane, fixed_insn_function,
+ (CODE_FOR_aarch64_sve_bfdot_lanevnx4sf))
+FUNCTION (svbfmlalb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlalbvnx4sf))
+FUNCTION (svbfmlalb_lane, fixed_insn_function,
+ (CODE_FOR_aarch64_sve_bfmlalb_lanevnx4sf))
+FUNCTION (svbfmlalt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlaltvnx4sf))
+FUNCTION (svbfmlalt_lane, fixed_insn_function,
+ (CODE_FOR_aarch64_sve_bfmlalt_lanevnx4sf))
+FUNCTION (svbfmmla, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmmlavnx4sf))
FUNCTION (svbic, svbic_impl,)
FUNCTION (svbrka, svbrk_unary_impl, (UNSPEC_BRKA))
FUNCTION (svbrkb, svbrk_unary_impl, (UNSPEC_BRKB))
@@ -2592,6 +2602,7 @@ FUNCTION (svcreate2, svcreate_impl, (2))
FUNCTION (svcreate3, svcreate_impl, (3))
FUNCTION (svcreate4, svcreate_impl, (4))
FUNCTION (svcvt, svcvt_impl,)
+FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),)
FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV))
FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
FUNCTION (svdot, svdot_impl,)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def
index 332555b..27ab05d 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def
@@ -318,6 +318,18 @@ DEF_SVE_FUNCTION (svzip2, binary, all_data, none)
DEF_SVE_FUNCTION (svzip2, binary_pred, all_pred, none)
#undef REQUIRED_EXTENSIONS
+#define REQUIRED_EXTENSIONS AARCH64_FL_BF16
+DEF_SVE_FUNCTION (svbfdot, ternary_bfloat_opt_n, s_float, none)
+DEF_SVE_FUNCTION (svbfdot_lane, ternary_bfloat_lanex2, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalb, ternary_bfloat_opt_n, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalb_lane, ternary_bfloat_lane, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalt, ternary_bfloat_opt_n, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalt_lane, ternary_bfloat_lane, s_float, none)
+DEF_SVE_FUNCTION (svbfmmla, ternary_bfloat, s_float, none)
+DEF_SVE_FUNCTION (svcvt, unary_convert, cvt_bfloat, mxz)
+DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, mx)
+#undef REQUIRED_EXTENSIONS
+
#define REQUIRED_EXTENSIONS AARCH64_FL_I8MM
DEF_SVE_FUNCTION (svmmla, mmla, s_integer, none)
DEF_SVE_FUNCTION (svusmmla, ternary_uintq_intq, s_signed, none)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.h b/gcc/config/aarch64/aarch64-sve-builtins-base.h
index 5c19b7d..957ace8 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.h
@@ -42,6 +42,13 @@ namespace aarch64_sve
extern const function_base *const svasr;
extern const function_base *const svasr_wide;
extern const function_base *const svasrd;
+ extern const function_base *const svbfdot;
+ extern const function_base *const svbfdot_lane;
+ extern const function_base *const svbfmlalb;
+ extern const function_base *const svbfmlalb_lane;
+ extern const function_base *const svbfmlalt;
+ extern const function_base *const svbfmlalt_lane;
+ extern const function_base *const svbfmmla;
extern const function_base *const svbic;
extern const function_base *const svbrka;
extern const function_base *const svbrkb;
@@ -84,6 +91,7 @@ namespace aarch64_sve
extern const function_base *const svcreate3;
extern const function_base *const svcreate4;
extern const function_base *const svcvt;
+ extern const function_base *const svcvtnt;
extern const function_base *const svdiv;
extern const function_base *const svdivr;
extern const function_base *const svdot;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
index 1ea3caa..5f8c85d 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
@@ -78,6 +78,7 @@ apply_predication (const function_instance &instance, tree return_type,
[01] - the element type in type suffix 0 or 1 of INSTANCE
f<bits> - a floating-point type with the given number of bits
f[01] - a floating-point type with the same width as type suffix 0 or 1
+ B - bfloat16_t
h<elt> - a half-sized version of <elt>
p - a predicate (represented as TYPE_SUFFIX_b)
q<elt> - a quarter-sized version of <elt>
@@ -117,6 +118,9 @@ parse_element_type (const function_instance &instance, const char *&format)
if (ch == 'p')
return TYPE_SUFFIX_b;
+ if (ch == 'B')
+ return TYPE_SUFFIX_bf16;
+
if (ch == 'q')
{
type_suffix_index suffix = parse_element_type (instance, format);
@@ -921,6 +925,26 @@ struct ternary_resize2_lane_base : public overloaded_base<0>
}
};
+/* A specialization of ternary_resize2_lane_base for bfloat16 elements,
+ indexed in groups of N elements. */
+template<unsigned int N>
+struct ternary_bfloat_lane_base
+ : public ternary_resize2_lane_base<16, TYPE_bfloat, TYPE_bfloat>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,v0,vB,vB,su64", group, MODE_none);
+ }
+
+ bool
+ check (function_checker &c) const OVERRIDE
+ {
+ return c.require_immediate_lane_index (3, N);
+ }
+};
+
/* A specialization of ternary_resize2_lane_base for quarter-sized
elements. */
template<type_class_index TYPE_CLASS2 = function_resolver::SAME_TYPE_CLASS,
@@ -2695,6 +2719,48 @@ struct tbl_tuple_def : public overloaded_base<0>
};
SHAPE (tbl_tuple)
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloatt16_t, svbfloat16_t). */
+struct ternary_bfloat_def
+ : public ternary_resize2_base<16, TYPE_bfloat, TYPE_bfloat>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,v0,vB,vB", group, MODE_none);
+ }
+};
+SHAPE (ternary_bfloat)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloat16_t, svbfloat16_t, uint64_t)
+
+ where the final argument is an integer constant expression in the range
+ [0, 7]. */
+typedef ternary_bfloat_lane_base<1> ternary_bfloat_lane_def;
+SHAPE (ternary_bfloat_lane)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloat16_t, svbfloat16_t, uint64_t)
+
+ where the final argument is an integer constant expression in the range
+ [0, 3]. */
+typedef ternary_bfloat_lane_base<2> ternary_bfloat_lanex2_def;
+SHAPE (ternary_bfloat_lanex2)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloatt16_t, svbfloat16_t)
+ sv<t0>_t svfoo[_n_t0](sv<t0>_t, svbfloat16_t, bfloat16_t). */
+struct ternary_bfloat_opt_n_def
+ : public ternary_resize2_opt_n_base<16, TYPE_bfloat, TYPE_bfloat>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const OVERRIDE
+ {
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,v0,vB,vB", group, MODE_none);
+ build_all (b, "v0,v0,vB,sB", group, MODE_n);
+ }
+};
+SHAPE (ternary_bfloat_opt_n)
+
/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:int:quarter>_t, sv<t0:uint:quarter>_t,
uint64_t)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
index 1ce0997..3a19982 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
@@ -148,6 +148,10 @@ namespace aarch64_sve
extern const function_shape *const store_scatter_offset;
extern const function_shape *const store_scatter_offset_restricted;
extern const function_shape *const tbl_tuple;
+ extern const function_shape *const ternary_bfloat;
+ extern const function_shape *const ternary_bfloat_lane;
+ extern const function_shape *const ternary_bfloat_lanex2;
+ extern const function_shape *const ternary_bfloat_opt_n;
extern const function_shape *const ternary_intq_uintq_lane;
extern const function_shape *const ternary_intq_uintq_opt_n;
extern const function_shape *const ternary_lane;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
index 53b1651..9e7219c 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -487,7 +487,6 @@ FUNCTION (svbsl2n, CODE_FOR_MODE0 (aarch64_sve2_bsl2n),)
FUNCTION (svcdot, svcdot_impl,)
FUNCTION (svcdot_lane, svcdot_lane_impl,)
FUNCTION (svcvtlt, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTLT))
-FUNCTION (svcvtnt, CODE_FOR_MODE1 (aarch64_sve2_cvtnt),)
FUNCTION (svcvtx, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTX))
FUNCTION (svcvtxnt, CODE_FOR_MODE1 (aarch64_sve2_cvtxnt),)
FUNCTION (sveor3, CODE_FOR_MODE0 (aarch64_sve2_eor3),)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
index 90e29fc..06d4a93 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
@@ -54,7 +54,6 @@ namespace aarch64_sve
extern const function_base *const svcdot;
extern const function_base *const svcdot_lane;
extern const function_base *const svcvtlt;
- extern const function_base *const svcvtnt;
extern const function_base *const svcvtx;
extern const function_base *const svcvtxnt;
extern const function_base *const sveor3;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index d4d201d..2c5543b 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -354,6 +354,10 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
D (u32, f16), D (u32, f32), D (u32, f64), \
D (u64, f16), D (u64, f32), D (u64, f64)
+/* _bf16_f32. */
+#define TYPES_cvt_bfloat(S, D) \
+ D (bf16, f32)
+
/* _f32_f16
_f64_f32. */
#define TYPES_cvt_long(S, D) \
@@ -471,6 +475,7 @@ DEF_SVE_TYPES_ARRAY (d_unsigned);
DEF_SVE_TYPES_ARRAY (d_integer);
DEF_SVE_TYPES_ARRAY (d_data);
DEF_SVE_TYPES_ARRAY (cvt);
+DEF_SVE_TYPES_ARRAY (cvt_bfloat);
DEF_SVE_TYPES_ARRAY (cvt_long);
DEF_SVE_TYPES_ARRAY (cvt_narrow_s);
DEF_SVE_TYPES_ARRAY (cvt_narrow);
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index abaac11..fa38529 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -115,6 +115,7 @@
;; ---- [FP] General ternary arithmetic corresponding to unspecs
;; ---- [FP] Complex multiply-add
;; ---- [FP] Trigonometric multiply-add
+;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
;; ---- [FP] Matrix multiply-accumulate
;;
;; == Comparisons and selects
@@ -150,6 +151,7 @@
;; ---- [FP<-INT] Packs
;; ---- [FP<-INT] Unpacks
;; ---- [FP<-FP] Packs
+;; ---- [FP<-FP] Packs (bfloat16)
;; ---- [FP<-FP] Unpacks
;; ---- [PRED<-PRED] Packs
;; ---- [PRED<-PRED] Unpacks
@@ -6548,6 +6550,46 @@
)
;; -------------------------------------------------------------------------
+;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BFDOT (BF16)
+;; - BFMLALB (BF16)
+;; - BFMLALT (BF16)
+;; - BFMMLA (BF16)
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_fp_op>vnx4sf"
+ [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
+ (unspec:VNx4SF
+ [(match_operand:VNx4SF 1 "register_operand" "0, w")
+ (match_operand:VNx8BF 2 "register_operand" "w, w")
+ (match_operand:VNx8BF 3 "register_operand" "w, w")]
+ SVE_BFLOAT_TERNARY_LONG))]
+ "TARGET_SVE_BF16"
+ "@
+ <sve_fp_op>\t%0.s, %2.h, %3.h
+ movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; The immediate range is enforced before generating the instruction.
+(define_insn "@aarch64_sve_<sve_fp_op>_lanevnx4sf"
+ [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
+ (unspec:VNx4SF
+ [(match_operand:VNx4SF 1 "register_operand" "0, w")
+ (match_operand:VNx8BF 2 "register_operand" "w, w")
+ (match_operand:VNx8BF 3 "register_operand" "y, y")
+ (match_operand:SI 4 "const_int_operand")]
+ SVE_BFLOAT_TERNARY_LONG_LANE))]
+ "TARGET_SVE_BF16"
+ "@
+ <sve_fp_op>\t%0.s, %2.h, %3.h[%4]
+ movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
;; ---- [FP] Matrix multiply-accumulate
;; -------------------------------------------------------------------------
;; Includes:
@@ -8110,6 +8152,77 @@
)
;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Packs (bfloat16)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BFCVT (BF16)
+;; - BFCVTNT (BF16)
+;; -------------------------------------------------------------------------
+
+;; Predicated BFCVT.
+(define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
+ [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
+ (unspec:VNx8BF_ONLY
+ [(match_operand:VNx4BI 1 "register_operand" "Upl")
+ (match_operand:SI 3 "aarch64_sve_gp_strictness")
+ (match_operand:VNx4SF_ONLY 2 "register_operand" "w")]
+ SVE_COND_FCVT))]
+ "TARGET_SVE_BF16"
+ "bfcvt\t%0.h, %1/m, %2.s"
+)
+
+;; Predicated BFCVT with merging.
+(define_expand "@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
+ [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
+ (unspec:VNx8BF_ONLY
+ [(match_operand:VNx4BI 1 "register_operand")
+ (unspec:VNx8BF_ONLY
+ [(match_dup 1)
+ (const_int SVE_STRICT_GP)
+ (match_operand:VNx4SF_ONLY 2 "register_operand")]
+ SVE_COND_FCVT)
+ (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE_BF16"
+)
+
+(define_insn "*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
+ [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
+ (unspec:VNx8BF_ONLY
+ [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl, Upl")
+ (unspec:VNx8BF_ONLY
+ [(match_dup 1)
+ (match_operand:SI 4 "aarch64_sve_gp_strictness")
+ (match_operand:VNx4SF_ONLY 2 "register_operand" "w, w, w")]
+ SVE_COND_FCVT)
+ (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE_BF16"
+ "@
+ bfcvt\t%0.h, %1/m, %2.s
+ movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s
+ movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s"
+ [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; Predicated BFCVTNT. This doesn't give a natural aarch64_pred_*/cond_*
+;; pair because the even elements always have to be supplied for active
+;; elements, even if the inactive elements don't matter.
+;;
+;; This instructions does not take MOVPRFX.
+(define_insn "@aarch64_sve_cvtnt<mode>"
+ [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
+ (unspec:VNx8BF_ONLY
+ [(match_operand:VNx4BI 2 "register_operand" "Upl")
+ (const_int SVE_STRICT_GP)
+ (match_operand:VNx8BF_ONLY 1 "register_operand" "0")
+ (match_operand:VNx4SF 3 "register_operand" "w")]
+ UNSPEC_COND_FCVTNT))]
+ "TARGET_SVE_BF16"
+ "bfcvtnt\t%0.h, %2/m, %3.s"
+)
+
+;; -------------------------------------------------------------------------
;; ---- [FP<-FP] Unpacks
;; -------------------------------------------------------------------------
;; Includes:
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index eaded5d..f82e60e 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1921,16 +1921,16 @@
;; elements, even if the inactive elements don't matter.
;;
;; These instructions do not take MOVPRFX.
-(define_insn "@aarch64_sve2_cvtnt<mode>"
- [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
- (unspec:<VNARROW>
- [(match_operand:<VPRED> 2 "register_operand" "Upl")
+(define_insn "@aarch64_sve_cvtnt<mode>"
+ [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
+ (unspec:SVE_FULL_HSF
+ [(match_operand:<VWIDE_PRED> 2 "register_operand" "Upl")
(const_int SVE_STRICT_GP)
- (match_operand:<VNARROW> 1 "register_operand" "0")
- (match_operand:SVE_FULL_SDF 3 "register_operand" "w")]
+ (match_operand:SVE_FULL_HSF 1 "register_operand" "0")
+ (match_operand:<VWIDE> 3 "register_operand" "w")]
UNSPEC_COND_FCVTNT))]
"TARGET_SVE2"
- "fcvtnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
+ "fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype>"
)
;; Predicated FCVTX (equivalent to what would be FCVTXNB, except that
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 043e26a..8f08bad 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -362,6 +362,7 @@ extern unsigned aarch64_architecture_version;
/* BF16 instructions are enabled through +bf16. */
#define TARGET_BF16_FP (AARCH64_ISA_BF16)
#define TARGET_BF16_SIMD (AARCH64_ISA_BF16 && TARGET_SIMD)
+#define TARGET_SVE_BF16 (TARGET_SVE && AARCH64_ISA_BF16)
/* Make sure this is always defined so we don't have to check for ifdefs
but rather use normal ifs. */
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index d5b60e0..3e3fd9d 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -314,6 +314,7 @@
;; Iterators for single modes, for "@" patterns.
(define_mode_iterator VNx16QI_ONLY [VNx16QI])
(define_mode_iterator VNx8HI_ONLY [VNx8HI])
+(define_mode_iterator VNx8BF_ONLY [VNx8BF])
(define_mode_iterator VNx4SI_ONLY [VNx4SI])
(define_mode_iterator VNx4SF_ONLY [VNx4SF])
(define_mode_iterator VNx2DI_ONLY [VNx2DI])
@@ -820,6 +821,9 @@
UNSPEC_USDOT ; Used in aarch64-simd.md.
UNSPEC_SUDOT ; Used in aarch64-simd.md.
UNSPEC_BFDOT ; Used in aarch64-simd.md.
+ UNSPEC_BFMLALB ; Used in aarch64-sve.md.
+ UNSPEC_BFMLALT ; Used in aarch64-sve.md.
+ UNSPEC_BFMMLA ; Used in aarch64-sve.md.
])
;; ------------------------------------------------------------------
@@ -2235,6 +2239,15 @@
(define_int_iterator SVE_FP_BINARY_INT [UNSPEC_FTSMUL UNSPEC_FTSSEL])
+(define_int_iterator SVE_BFLOAT_TERNARY_LONG [UNSPEC_BFDOT
+ UNSPEC_BFMLALB
+ UNSPEC_BFMLALT
+ UNSPEC_BFMMLA])
+
+(define_int_iterator SVE_BFLOAT_TERNARY_LONG_LANE [UNSPEC_BFDOT
+ UNSPEC_BFMLALB
+ UNSPEC_BFMLALT])
+
(define_int_iterator SVE_INT_REDUCTION [UNSPEC_ANDV
UNSPEC_IORV
UNSPEC_SMAXV
@@ -3225,7 +3238,11 @@
(UNSPEC_SQDMULLBT "sqdmlslbt")
(UNSPEC_SQDMULLT "sqdmlslt")])
-(define_int_attr sve_fp_op [(UNSPEC_FRECPE "frecpe")
+(define_int_attr sve_fp_op [(UNSPEC_BFDOT "bfdot")
+ (UNSPEC_BFMLALB "bfmlalb")
+ (UNSPEC_BFMLALT "bfmlalt")
+ (UNSPEC_BFMMLA "bfmmla")
+ (UNSPEC_FRECPE "frecpe")
(UNSPEC_FRECPS "frecps")
(UNSPEC_RSQRTE "frsqrte")
(UNSPEC_RSQRTS "frsqrts")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 5d002d9..b239734 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,26 @@
2020-01-31 Richard Sandiford <richard.sandiford@arm.com>
+ * lib/target-supports.exp (check_effective_target_aarch64_asm_bf16_ok):
+ New proc.
+ * gcc.target/aarch64/sve/acle/asm/bfdot_f32.c: New test.
+ * gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c: Likweise.
+ * gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c: Likweise.
+ * gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c: Likweise.
+ * gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c: Likweise.
+ * gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c: Likweise.
+ * gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c: Likweise.
+ * gcc.target/aarch64/sve/acle/asm/cvt_bf16.c: Likweise.
+ * gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c: Likweise.
+ * gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c: Likweise.
+ * gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c:
+ Likweise.
+ * gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c:
+ Likweise.
+ * gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c:
+ Likweise.
+
+2020-01-31 Richard Sandiford <richard.sandiford@arm.com>
+
* g++.target/aarch64/sve/acle/general-c++/mangle_1.C: Test mangling
of svbfloat16_t.
* g++.target/aarch64/sve/acle/general-c++/mangle_2.C: Likewise for
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c
new file mode 100644
index 0000000..376622d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c
@@ -0,0 +1,67 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfdot_f32_tied1:
+** bfdot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfdot_f32_tied1, svfloat32_t, svbfloat16_t,
+ z0 = svbfdot_f32 (z0, z4, z5),
+ z0 = svbfdot (z0, z4, z5))
+
+/*
+** bfdot_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfdot z0\.s, \1\.h, z1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfdot_f32_tied2, svfloat32_t, svbfloat16_t,
+ z0_res = svbfdot_f32 (z4, z0, z1),
+ z0_res = svbfdot (z4, z0, z1))
+
+/*
+** bfdot_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfdot z0\.s, z1\.h, \1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfdot_f32_tied3, svfloat32_t, svbfloat16_t,
+ z0_res = svbfdot_f32 (z4, z1, z0),
+ z0_res = svbfdot (z4, z1, z0))
+
+/*
+** bfdot_f32_untied:
+** movprfx z0, z1
+** bfdot z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfdot_f32_untied, svfloat32_t, svbfloat16_t,
+ z0 = svbfdot_f32 (z1, z4, z5),
+ z0 = svbfdot (z1, z4, z5))
+
+/*
+** bfdot_h7_f32_tied1:
+** mov (z[0-9]+\.h), h7
+** bfdot z0\.s, z4\.h, \1
+** ret
+*/
+TEST_DUAL_ZD (bfdot_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfdot_n_f32 (z0, z4, d7),
+ z0 = svbfdot (z0, z4, d7))
+
+/*
+** bfdot_h7_f32_untied:
+** mov (z[0-9]+\.h), h7
+** movprfx z0, z1
+** bfdot z0\.s, z4\.h, \1
+** ret
+*/
+TEST_DUAL_ZD (bfdot_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfdot_n_f32 (z1, z4, d7),
+ z0 = svbfdot (z1, z4, d7))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c
new file mode 100644
index 0000000..0f624fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c
@@ -0,0 +1,86 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfdot_lane_0_f32_tied1:
+** bfdot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (bfdot_lane_0_f32_tied1, svfloat32_t, svbfloat16_t,
+ z0 = svbfdot_lane_f32 (z0, z4, z5, 0),
+ z0 = svbfdot_lane (z0, z4, z5, 0))
+
+/*
+** bfdot_lane_0_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfdot z0\.s, \1\.h, z1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (bfdot_lane_0_f32_tied2, svfloat32_t, svbfloat16_t,
+ z0_res = svbfdot_lane_f32 (z4, z0, z1, 0),
+ z0_res = svbfdot_lane (z4, z0, z1, 0))
+
+/*
+** bfdot_lane_0_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfdot z0\.s, z1\.h, \1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (bfdot_lane_0_f32_tied3, svfloat32_t, svbfloat16_t,
+ z0_res = svbfdot_lane_f32 (z4, z1, z0, 0),
+ z0_res = svbfdot_lane (z4, z1, z0, 0))
+
+/*
+** bfdot_lane_0_f32_untied:
+** movprfx z0, z1
+** bfdot z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (bfdot_lane_0_f32_untied, svfloat32_t, svbfloat16_t,
+ z0 = svbfdot_lane_f32 (z1, z4, z5, 0),
+ z0 = svbfdot_lane (z1, z4, z5, 0))
+
+/*
+** bfdot_lane_1_f32:
+** bfdot z0\.s, z4\.h, z5\.h\[1\]
+** ret
+*/
+TEST_DUAL_Z (bfdot_lane_1_f32, svfloat32_t, svbfloat16_t,
+ z0 = svbfdot_lane_f32 (z0, z4, z5, 1),
+ z0 = svbfdot_lane (z0, z4, z5, 1))
+
+/*
+** bfdot_lane_3_f32:
+** bfdot z0\.s, z4\.h, z5\.h\[3\]
+** ret
+*/
+TEST_DUAL_Z (bfdot_lane_3_f32, svfloat32_t, svbfloat16_t,
+ z0 = svbfdot_lane_f32 (z0, z4, z5, 3),
+ z0 = svbfdot_lane (z0, z4, z5, 3))
+
+/*
+** bfdot_lane_z8_f32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** bfdot z0\.s, z1\.h, \1\.h\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_DUAL_LANE_REG (bfdot_lane_z8_f32, svfloat32_t, svbfloat16_t, z8,
+ z0 = svbfdot_lane_f32 (z0, z1, z8, 1),
+ z0 = svbfdot_lane (z0, z1, z8, 1))
+
+/*
+** bfdot_lane_z16_f32:
+** mov (z[0-7])\.d, z16\.d
+** bfdot z0\.s, z1\.h, \1\.h\[1\]
+** ret
+*/
+TEST_DUAL_LANE_REG (bfdot_lane_z16_f32, svfloat32_t, svbfloat16_t, z16,
+ z0 = svbfdot_lane_f32 (z0, z1, z16, 1),
+ z0 = svbfdot_lane (z0, z1, z16, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c
new file mode 100644
index 0000000..0f81011
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c
@@ -0,0 +1,67 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmlalb_f32_tied1:
+** bfmlalb z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfmlalb_f32_tied1, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlalb_f32 (z0, z4, z5),
+ z0 = svbfmlalb (z0, z4, z5))
+
+/*
+** bfmlalb_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlalb z0\.s, \1\.h, z1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlalb_f32_tied2, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlalb_f32 (z4, z0, z1),
+ z0_res = svbfmlalb (z4, z0, z1))
+
+/*
+** bfmlalb_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlalb z0\.s, z1\.h, \1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlalb_f32_tied3, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlalb_f32 (z4, z1, z0),
+ z0_res = svbfmlalb (z4, z1, z0))
+
+/*
+** bfmlalb_f32_untied:
+** movprfx z0, z1
+** bfmlalb z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfmlalb_f32_untied, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlalb_f32 (z1, z4, z5),
+ z0 = svbfmlalb (z1, z4, z5))
+
+/*
+** bfmlalb_h7_f32_tied1:
+** mov (z[0-9]+\.h), h7
+** bfmlalb z0\.s, z4\.h, \1
+** ret
+*/
+TEST_DUAL_ZD (bfmlalb_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlalb_n_f32 (z0, z4, d7),
+ z0 = svbfmlalb (z0, z4, d7))
+
+/*
+** bfmlalb_h7_f32_untied:
+** mov (z[0-9]+\.h), h7
+** movprfx z0, z1
+** bfmlalb z0\.s, z4\.h, \1
+** ret
+*/
+TEST_DUAL_ZD (bfmlalb_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlalb_n_f32 (z1, z4, d7),
+ z0 = svbfmlalb (z1, z4, d7))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c
new file mode 100644
index 0000000..b0ec088
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c
@@ -0,0 +1,86 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmlalb_lane_0_f32_tied1:
+** bfmlalb z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (bfmlalb_lane_0_f32_tied1, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlalb_lane_f32 (z0, z4, z5, 0),
+ z0 = svbfmlalb_lane (z0, z4, z5, 0))
+
+/*
+** bfmlalb_lane_0_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlalb z0\.s, \1\.h, z1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlalb_lane_0_f32_tied2, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlalb_lane_f32 (z4, z0, z1, 0),
+ z0_res = svbfmlalb_lane (z4, z0, z1, 0))
+
+/*
+** bfmlalb_lane_0_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlalb z0\.s, z1\.h, \1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlalb_lane_0_f32_tied3, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlalb_lane_f32 (z4, z1, z0, 0),
+ z0_res = svbfmlalb_lane (z4, z1, z0, 0))
+
+/*
+** bfmlalb_lane_0_f32_untied:
+** movprfx z0, z1
+** bfmlalb z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (bfmlalb_lane_0_f32_untied, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlalb_lane_f32 (z1, z4, z5, 0),
+ z0 = svbfmlalb_lane (z1, z4, z5, 0))
+
+/*
+** bfmlalb_lane_1_f32:
+** bfmlalb z0\.s, z4\.h, z5\.h\[1\]
+** ret
+*/
+TEST_DUAL_Z (bfmlalb_lane_1_f32, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlalb_lane_f32 (z0, z4, z5, 1),
+ z0 = svbfmlalb_lane (z0, z4, z5, 1))
+
+/*
+** bfmlalb_lane_7_f32:
+** bfmlalb z0\.s, z4\.h, z5\.h\[7\]
+** ret
+*/
+TEST_DUAL_Z (bfmlalb_lane_7_f32, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlalb_lane_f32 (z0, z4, z5, 7),
+ z0 = svbfmlalb_lane (z0, z4, z5, 7))
+
+/*
+** bfmlalb_lane_z8_f32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** bfmlalb z0\.s, z1\.h, \1\.h\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_DUAL_LANE_REG (bfmlalb_lane_z8_f32, svfloat32_t, svbfloat16_t, z8,
+ z0 = svbfmlalb_lane_f32 (z0, z1, z8, 1),
+ z0 = svbfmlalb_lane (z0, z1, z8, 1))
+
+/*
+** bfmlalb_lane_z16_f32:
+** mov (z[0-7])\.d, z16\.d
+** bfmlalb z0\.s, z1\.h, \1\.h\[1\]
+** ret
+*/
+TEST_DUAL_LANE_REG (bfmlalb_lane_z16_f32, svfloat32_t, svbfloat16_t, z16,
+ z0 = svbfmlalb_lane_f32 (z0, z1, z16, 1),
+ z0 = svbfmlalb_lane (z0, z1, z16, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c
new file mode 100644
index 0000000..2a583fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c
@@ -0,0 +1,67 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmlalt_f32_tied1:
+** bfmlalt z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfmlalt_f32_tied1, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlalt_f32 (z0, z4, z5),
+ z0 = svbfmlalt (z0, z4, z5))
+
+/*
+** bfmlalt_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlalt z0\.s, \1\.h, z1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlalt_f32_tied2, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlalt_f32 (z4, z0, z1),
+ z0_res = svbfmlalt (z4, z0, z1))
+
+/*
+** bfmlalt_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlalt z0\.s, z1\.h, \1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlalt_f32_tied3, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlalt_f32 (z4, z1, z0),
+ z0_res = svbfmlalt (z4, z1, z0))
+
+/*
+** bfmlalt_f32_untied:
+** movprfx z0, z1
+** bfmlalt z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfmlalt_f32_untied, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlalt_f32 (z1, z4, z5),
+ z0 = svbfmlalt (z1, z4, z5))
+
+/*
+** bfmlalt_h7_f32_tied1:
+** mov (z[0-9]+\.h), h7
+** bfmlalt z0\.s, z4\.h, \1
+** ret
+*/
+TEST_DUAL_ZD (bfmlalt_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlalt_n_f32 (z0, z4, d7),
+ z0 = svbfmlalt (z0, z4, d7))
+
+/*
+** bfmlalt_h7_f32_untied:
+** mov (z[0-9]+\.h), h7
+** movprfx z0, z1
+** bfmlalt z0\.s, z4\.h, \1
+** ret
+*/
+TEST_DUAL_ZD (bfmlalt_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+ z0 = svbfmlalt_n_f32 (z1, z4, d7),
+ z0 = svbfmlalt (z1, z4, d7))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c
new file mode 100644
index 0000000..3af3997
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c
@@ -0,0 +1,86 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmlalt_lane_0_f32_tied1:
+** bfmlalt z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (bfmlalt_lane_0_f32_tied1, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlalt_lane_f32 (z0, z4, z5, 0),
+ z0 = svbfmlalt_lane (z0, z4, z5, 0))
+
+/*
+** bfmlalt_lane_0_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlalt z0\.s, \1\.h, z1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlalt_lane_0_f32_tied2, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlalt_lane_f32 (z4, z0, z1, 0),
+ z0_res = svbfmlalt_lane (z4, z0, z1, 0))
+
+/*
+** bfmlalt_lane_0_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmlalt z0\.s, z1\.h, \1\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (bfmlalt_lane_0_f32_tied3, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmlalt_lane_f32 (z4, z1, z0, 0),
+ z0_res = svbfmlalt_lane (z4, z1, z0, 0))
+
+/*
+** bfmlalt_lane_0_f32_untied:
+** movprfx z0, z1
+** bfmlalt z0\.s, z4\.h, z5\.h\[0\]
+** ret
+*/
+TEST_DUAL_Z (bfmlalt_lane_0_f32_untied, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlalt_lane_f32 (z1, z4, z5, 0),
+ z0 = svbfmlalt_lane (z1, z4, z5, 0))
+
+/*
+** bfmlalt_lane_1_f32:
+** bfmlalt z0\.s, z4\.h, z5\.h\[1\]
+** ret
+*/
+TEST_DUAL_Z (bfmlalt_lane_1_f32, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlalt_lane_f32 (z0, z4, z5, 1),
+ z0 = svbfmlalt_lane (z0, z4, z5, 1))
+
+/*
+** bfmlalt_lane_7_f32:
+** bfmlalt z0\.s, z4\.h, z5\.h\[7\]
+** ret
+*/
+TEST_DUAL_Z (bfmlalt_lane_7_f32, svfloat32_t, svbfloat16_t,
+ z0 = svbfmlalt_lane_f32 (z0, z4, z5, 7),
+ z0 = svbfmlalt_lane (z0, z4, z5, 7))
+
+/*
+** bfmlalt_lane_z8_f32:
+** str d8, \[sp, -16\]!
+** mov (z[0-7])\.d, z8\.d
+** bfmlalt z0\.s, z1\.h, \1\.h\[1\]
+** ldr d8, \[sp\], 16
+** ret
+*/
+TEST_DUAL_LANE_REG (bfmlalt_lane_z8_f32, svfloat32_t, svbfloat16_t, z8,
+ z0 = svbfmlalt_lane_f32 (z0, z1, z8, 1),
+ z0 = svbfmlalt_lane (z0, z1, z8, 1))
+
+/*
+** bfmlalt_lane_z16_f32:
+** mov (z[0-7])\.d, z16\.d
+** bfmlalt z0\.s, z1\.h, \1\.h\[1\]
+** ret
+*/
+TEST_DUAL_LANE_REG (bfmlalt_lane_z16_f32, svfloat32_t, svbfloat16_t, z16,
+ z0 = svbfmlalt_lane_f32 (z0, z1, z16, 1),
+ z0 = svbfmlalt_lane (z0, z1, z16, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c
new file mode 100644
index 0000000..b1d98fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c
@@ -0,0 +1,46 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmmla_f32_tied1:
+** bfmmla z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfmmla_f32_tied1, svfloat32_t, svbfloat16_t,
+ z0 = svbfmmla_f32 (z0, z4, z5),
+ z0 = svbfmmla (z0, z4, z5))
+
+/*
+** bfmmla_f32_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmmla z0\.s, \1\.h, z1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfmmla_f32_tied2, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmmla_f32 (z4, z0, z1),
+ z0_res = svbfmmla (z4, z0, z1))
+
+/*
+** bfmmla_f32_tied3:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfmmla z0\.s, z1\.h, \1\.h
+** ret
+*/
+TEST_DUAL_Z_REV (bfmmla_f32_tied3, svfloat32_t, svbfloat16_t,
+ z0_res = svbfmmla_f32 (z4, z1, z0),
+ z0_res = svbfmmla (z4, z1, z0))
+
+/*
+** bfmmla_f32_untied:
+** movprfx z0, z1
+** bfmmla z0\.s, z4\.h, z5\.h
+** ret
+*/
+TEST_DUAL_Z (bfmmla_f32_untied, svfloat32_t, svbfloat16_t,
+ z0 = svbfmmla_f32 (z1, z4, z5),
+ z0 = svbfmmla (z1, z4, z5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c
new file mode 100644
index 0000000..52baa1f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c
@@ -0,0 +1,96 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** cvt_bf16_f32_m_tied1:
+** bfcvt z0\.h, p0/m, z4\.s
+** ret
+*/
+TEST_DUAL_Z (cvt_bf16_f32_m_tied1, svbfloat16_t, svfloat32_t,
+ z0 = svcvt_bf16_f32_m (z0, p0, z4),
+ z0 = svcvt_bf16_m (z0, p0, z4))
+
+/*
+** cvt_bf16_f32_m_tied2:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** bfcvt z0\.h, p0/m, \1\.s
+** ret
+*/
+TEST_DUAL_Z_REV (cvt_bf16_f32_m_tied2, svbfloat16_t, svfloat32_t,
+ z0_res = svcvt_bf16_f32_m (z4, p0, z0),
+ z0_res = svcvt_bf16_m (z4, p0, z0))
+
+/*
+** cvt_bf16_f32_m_untied:
+** movprfx z0, z1
+** bfcvt z0\.h, p0/m, z4\.s
+** ret
+*/
+TEST_DUAL_Z (cvt_bf16_f32_m_untied, svbfloat16_t, svfloat32_t,
+ z0 = svcvt_bf16_f32_m (z1, p0, z4),
+ z0 = svcvt_bf16_m (z1, p0, z4))
+
+/*
+** cvt_bf16_f32_z_tied1:
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0\.s, p0/z, \1\.s
+** bfcvt z0\.h, p0/m, \1\.s
+** ret
+*/
+TEST_DUAL_Z_REV (cvt_bf16_f32_z_tied1, svbfloat16_t, svfloat32_t,
+ z0_res = svcvt_bf16_f32_z (p0, z0),
+ z0_res = svcvt_bf16_z (p0, z0))
+
+/*
+** cvt_bf16_f32_z_untied:
+** movprfx z0\.s, p0/z, z4\.s
+** bfcvt z0\.h, p0/m, z4\.s
+** ret
+*/
+TEST_DUAL_Z (cvt_bf16_f32_z_untied, svbfloat16_t, svfloat32_t,
+ z0 = svcvt_bf16_f32_z (p0, z4),
+ z0 = svcvt_bf16_z (p0, z4))
+
+/*
+** cvt_bf16_f32_x_tied1:
+** bfcvt z0\.h, p0/m, z0\.s
+** ret
+*/
+TEST_DUAL_Z_REV (cvt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
+ z0_res = svcvt_bf16_f32_x (p0, z0),
+ z0_res = svcvt_bf16_x (p0, z0))
+
+/*
+** cvt_bf16_f32_x_untied:
+** bfcvt z0\.h, p0/m, z4\.s
+** ret
+*/
+TEST_DUAL_Z (cvt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
+ z0 = svcvt_bf16_f32_x (p0, z4),
+ z0 = svcvt_bf16_x (p0, z4))
+
+/*
+** ptrue_cvt_bf16_f32_x_tied1:
+** ...
+** ptrue p[0-9]+\.b[^\n]*
+** ...
+** ret
+*/
+TEST_DUAL_Z_REV (ptrue_cvt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
+ z0_res = svcvt_bf16_f32_x (svptrue_b32 (), z0),
+ z0_res = svcvt_bf16_x (svptrue_b32 (), z0))
+
+/*
+** ptrue_cvt_bf16_f32_x_untied:
+** ...
+** ptrue p[0-9]+\.b[^\n]*
+** ...
+** ret
+*/
+TEST_DUAL_Z (ptrue_cvt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
+ z0 = svcvt_bf16_f32_x (svptrue_b32 (), z4),
+ z0 = svcvt_bf16_x (svptrue_b32 (), z4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c
new file mode 100644
index 0000000..54614c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c
@@ -0,0 +1,90 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** cvtnt_bf16_f32_m_tied1:
+** bfcvtnt z0\.h, p0/m, z4\.s
+** ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_m_tied1, svbfloat16_t, svfloat32_t,
+ z0 = svcvtnt_bf16_f32_m (z0, p0, z4),
+ z0 = svcvtnt_bf16_m (z0, p0, z4))
+
+/* Bad RA choice: no preferred output sequence. */
+TEST_DUAL_Z_REV (cvtnt_bf16_f32_m_tied2, svbfloat16_t, svfloat32_t,
+ z0_res = svcvtnt_bf16_f32_m (z4, p0, z0),
+ z0_res = svcvtnt_bf16_m (z4, p0, z0))
+
+/*
+** cvtnt_bf16_f32_m_untied:
+** (
+** mov z0\.d, z1\.d
+** bfcvtnt z0\.h, p0/m, z4\.s
+** |
+** bfcvtnt z1\.h, p0/m, z4\.s
+** mov z0\.d, z1\.d
+** )
+** ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_m_untied, svbfloat16_t, svfloat32_t,
+ z0 = svcvtnt_bf16_f32_m (z1, p0, z4),
+ z0 = svcvtnt_bf16_m (z1, p0, z4))
+
+/*
+** cvtnt_bf16_f32_x_tied1:
+** bfcvtnt z0\.h, p0/m, z4\.s
+** ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
+ z0 = svcvtnt_bf16_f32_x (z0, p0, z4),
+ z0 = svcvtnt_bf16_x (z0, p0, z4))
+
+/* Bad RA choice: no preferred output sequence. */
+TEST_DUAL_Z_REV (cvtnt_bf16_f32_x_tied2, svbfloat16_t, svfloat32_t,
+ z0_res = svcvtnt_bf16_f32_x (z4, p0, z0),
+ z0_res = svcvtnt_bf16_x (z4, p0, z0))
+
+/*
+** cvtnt_bf16_f32_x_untied:
+** (
+** mov z0\.d, z1\.d
+** bfcvtnt z0\.h, p0/m, z4\.s
+** |
+** bfcvtnt z1\.h, p0/m, z4\.s
+** mov z0\.d, z1\.d
+** )
+** ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
+ z0 = svcvtnt_bf16_f32_x (z1, p0, z4),
+ z0 = svcvtnt_bf16_x (z1, p0, z4))
+
+/*
+** ptrue_cvtnt_bf16_f32_x_tied1:
+** ...
+** ptrue p[0-9]+\.b[^\n]*
+** ...
+** ret
+*/
+TEST_DUAL_Z (ptrue_cvtnt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
+ z0 = svcvtnt_bf16_f32_x (z0, svptrue_b32 (), z4),
+ z0 = svcvtnt_bf16_x (z0, svptrue_b32 (), z4))
+
+/* Bad RA choice: no preferred output sequence. */
+TEST_DUAL_Z_REV (ptrue_cvtnt_bf16_f32_x_tied2, svbfloat16_t, svfloat32_t,
+ z0_res = svcvtnt_bf16_f32_x (z4, svptrue_b32 (), z0),
+ z0_res = svcvtnt_bf16_x (z4, svptrue_b32 (), z0))
+
+/*
+** ptrue_cvtnt_bf16_f32_x_untied:
+** ...
+** ptrue p[0-9]+\.b[^\n]*
+** ...
+** ret
+*/
+TEST_DUAL_Z (ptrue_cvtnt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
+ z0 = svcvtnt_bf16_f32_x (z1, svptrue_b32 (), z4),
+ z0 = svcvtnt_bf16_x (z1, svptrue_b32 (), z4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c
new file mode 100644
index 0000000..a9233324
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+sve+bf16")
+
+void
+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+ svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, bfloat16_t bf)
+{
+ svbfmmla (f32, bf16); /* { dg-error {too few arguments to function 'svbfmmla'} } */
+ svbfmmla (f32, bf16, bf16, 0); /* { dg-error {too many arguments to function 'svbfmmla'} } */
+ svbfmmla (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfmmla', which expects an SVE vector type} } */
+ svbfmmla (pg, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svbool_t' arguments} } */
+ svbfmmla (u8, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svuint8_t' arguments} } */
+ svbfmmla (u16, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svuint16_t' arguments} } */
+ svbfmmla (f64, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svfloat64_t' arguments} } */
+ svbfmmla (f32, bf16, bf16);
+ svbfmmla (f32, 0, bf16); /* { dg-error {passing 'int' to argument 2 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+ svbfmmla (f32, f32, bf16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+ svbfmmla (f32, bf16, 0); /* { dg-error {passing 'int' to argument 3 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+ svbfmmla (f32, bf16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+ svbfmmla (f32, bf16, bf); /* { dg-error {passing 'bfloat16_t'[^\n]* to argument 3 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c
new file mode 100644
index 0000000..23f027f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+sve+bf16")
+
+void
+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+ svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, int i)
+{
+ svbfmlalb_lane (f32, bf16, bf16); /* { dg-error {too few arguments to function 'svbfmlalb_lane'} } */
+ svbfmlalb_lane (f32, bf16, bf16, 0, 0); /* { dg-error {too many arguments to function 'svbfmlalb_lane'} } */
+ svbfmlalb_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfmlalb_lane', which expects an SVE vector type} } */
+ svbfmlalb_lane (pg, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svbool_t' arguments} } */
+ svbfmlalb_lane (u8, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svuint8_t' arguments} } */
+ svbfmlalb_lane (u16, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svuint16_t' arguments} } */
+ svbfmlalb_lane (f64, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svfloat64_t' arguments} } */
+ svbfmlalb_lane (f32, bf16, bf16, 0);
+ svbfmlalb_lane (f32, 0, bf16, 0); /* { dg-error {passing 'int' to argument 2 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */
+ svbfmlalb_lane (f32, f32, bf16, 0); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */
+ svbfmlalb_lane (f32, bf16, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */
+ svbfmlalb_lane (f32, bf16, f32, 0); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */
+ svbfmlalb_lane (f32, bf16, bf16, s32); /* { dg-error {argument 4 of 'svbfmlalb_lane' must be an integer constant expression} } */
+ svbfmlalb_lane (f32, bf16, bf16, i); /* { dg-error {argument 4 of 'svbfmlalb_lane' must be an integer constant expression} } */
+
+ svbfmlalb_lane (f32, bf16, bf16, 0);
+ svbfmlalb_lane (f32, bf16, bf16, 7);
+ svbfmlalb_lane (f32, bf16, bf16, 8); /* { dg-error {passing 8 to argument 4 of 'svbfmlalb_lane', which expects a value in the range \[0, 7\]} } */
+ svbfmlalb_lane (f32, bf16, bf16, -1); /* { dg-error {passing -1 to argument 4 of 'svbfmlalb_lane', which expects a value in the range \[0, 7\]} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c
new file mode 100644
index 0000000..4755ca7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+sve+bf16")
+
+void
+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+ svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, int i)
+{
+ svbfdot_lane (f32, bf16, bf16); /* { dg-error {too few arguments to function 'svbfdot_lane'} } */
+ svbfdot_lane (f32, bf16, bf16, 0, 0); /* { dg-error {too many arguments to function 'svbfdot_lane'} } */
+ svbfdot_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfdot_lane', which expects an SVE vector type} } */
+ svbfdot_lane (pg, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svbool_t' arguments} } */
+ svbfdot_lane (u8, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svuint8_t' arguments} } */
+ svbfdot_lane (u16, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svuint16_t' arguments} } */
+ svbfdot_lane (f64, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svfloat64_t' arguments} } */
+ svbfdot_lane (f32, bf16, bf16, 0);
+ svbfdot_lane (f32, 0, bf16, 0); /* { dg-error {passing 'int' to argument 2 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */
+ svbfdot_lane (f32, f32, bf16, 0); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */
+ svbfdot_lane (f32, bf16, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */
+ svbfdot_lane (f32, bf16, f32, 0); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */
+ svbfdot_lane (f32, bf16, bf16, s32); /* { dg-error {argument 4 of 'svbfdot_lane' must be an integer constant expression} } */
+ svbfdot_lane (f32, bf16, bf16, i); /* { dg-error {argument 4 of 'svbfdot_lane' must be an integer constant expression} } */
+
+ svbfdot_lane (f32, bf16, bf16, 0);
+ svbfdot_lane (f32, bf16, bf16, 3);
+ svbfdot_lane (f32, bf16, bf16, 4); /* { dg-error {passing 4 to argument 4 of 'svbfdot_lane', which expects a value in the range \[0, 3\]} } */
+ svbfdot_lane (f32, bf16, bf16, -1); /* { dg-error {passing -1 to argument 4 of 'svbfdot_lane', which expects a value in the range \[0, 3\]} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c
new file mode 100644
index 0000000..2d09a8e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+sve+bf16")
+
+void
+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+ svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, bfloat16_t bf)
+{
+ svbfdot (f32, bf16); /* { dg-error {too few arguments to function 'svbfdot'} } */
+ svbfdot (f32, bf16, bf16, 0); /* { dg-error {too many arguments to function 'svbfdot'} } */
+ svbfdot (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfdot', which expects an SVE vector type} } */
+ svbfdot (pg, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svbool_t' arguments} } */
+ svbfdot (u8, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svuint8_t' arguments} } */
+ svbfdot (u16, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svuint16_t' arguments} } */
+ svbfdot (f64, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svfloat64_t' arguments} } */
+ svbfdot (f32, bf16, bf16);
+ svbfdot (f32, 0, bf16); /* { dg-error {passing 'int' to argument 2 of 'svbfdot', which expects 'svbfloat16_t'} } */
+ svbfdot (f32, f32, bf16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfdot', which expects 'svbfloat16_t'} } */
+ svbfdot (f32, bf16, 0); /* { dg-error {invalid conversion to type 'bfloat16_t'} } */
+ svbfdot (f32, bf16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfdot', which expects 'svbfloat16_t'} } */
+ svbfdot (f32, bf16, bf);
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 6c06faf..5377d7b 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -8996,7 +8996,7 @@ proc check_effective_target_aarch64_tiny { } {
# various architecture extensions via the .arch_extension pseudo-op.
foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"
- "i8mm" "f32mm" "f64mm" } {
+ "i8mm" "f32mm" "f64mm" "bf16" } {
eval [string map [list FUNC $aarch64_ext] {
proc check_effective_target_aarch64_asm_FUNC_ok { } {
if { [istarget aarch64*-*-*] } {