28 files changed, 1112 insertions, 11 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d10ae92..234e328 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,51 @@
 2020-01-31  Richard Sandiford  <richard.sandiford@arm.com>
 
+	* config/aarch64/aarch64.h (TARGET_SVE_BF16): New macro.
+	* config/aarch64/aarch64-sve-builtins-sve2.h (svcvtnt): Move to
+	aarch64-sve-builtins-base.h.
+	* config/aarch64/aarch64-sve-builtins-sve2.cc (svcvtnt): Move to
+	aarch64-sve-builtins-base.cc.
+	* config/aarch64/aarch64-sve-builtins-base.h (svbfdot, svbfdot_lane)
+	(svbfmlalb, svbfmlalb_lane, svbfmlalt, svbfmlalt_lane, svbfmmla)
+	(svcvtnt): Declare.
+	* config/aarch64/aarch64-sve-builtins-base.cc (svbfdot, svbfdot_lane)
+	(svbfmlalb, svbfmlalb_lane, svbfmlalt, svbfmlalt_lane, svbfmmla)
+	(svcvtnt): New functions.
+	* config/aarch64/aarch64-sve-builtins-base.def (svbfdot, svbfdot_lane)
+	(svbfmlalb, svbfmlalb_lane, svbfmlalt, svbfmlalt_lane, svbfmmla)
+	(svcvtnt): New functions.
+	(svcvt): Add a form that converts f32 to bf16.
+	* config/aarch64/aarch64-sve-builtins-shapes.h (ternary_bfloat)
+	(ternary_bfloat_lane, ternary_bfloat_lanex2, ternary_bfloat_opt_n):
+	Declare.
+	* config/aarch64/aarch64-sve-builtins-shapes.cc (parse_element_type):
+	Treat B as bfloat16_t.
+	(ternary_bfloat_lane_base): New class.
+	(ternary_bfloat_def): Likewise.
+	(ternary_bfloat): New shape.
+	(ternary_bfloat_lane_def): New class.
+	(ternary_bfloat_lane): New shape.
+	(ternary_bfloat_lanex2_def): New class.
+	(ternary_bfloat_lanex2): New shape.
+	(ternary_bfloat_opt_n_def): New class.
+	(ternary_bfloat_opt_n): New shape.
+	* config/aarch64/aarch64-sve-builtins.cc (TYPES_cvt_bfloat): New macro.
+	* config/aarch64/aarch64-sve.md (@aarch64_sve_<sve_fp_op>vnx4sf)
+	(@aarch64_sve_<sve_fp_op>_lanevnx4sf): New patterns.
+	(@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>)
+	(@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>): Likewise.
+	(*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>): Likewise.
+	(@aarch64_sve_cvtnt<VNx8BF_ONLY:mode>): Likewise.
+	* config/aarch64/aarch64-sve2.md (@aarch64_sve2_cvtnt<mode>): Key
+	the pattern off the narrow mode instead of the wider one.
+	* config/aarch64/iterators.md (VNx8BF_ONLY): New mode iterator.
+	(UNSPEC_BFMLALB, UNSPEC_BFMLALT, UNSPEC_BFMMLA): New unspecs.
+	(sve_fp_op): Handle them.
+	(SVE_BFLOAT_TERNARY_LONG): New int itertor.
+	(SVE_BFLOAT_TERNARY_LONG_LANE): Likewise.
+
+2020-01-31  Richard Sandiford  <richard.sandiford@arm.com>
+
 	* config/aarch64/arm_sve.h: Include arm_bf16.h.
 	* config/aarch64/aarch64-modes.def (BF): Move definition before
 	VECTOR_MODES.  Remove separate VECTOR_MODES for V4BF and V8BF.
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 9ae143c..9b63ea7 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -2544,6 +2544,16 @@ FUNCTION (svandv, reduction, (UNSPEC_ANDV))
 FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT))
 FUNCTION (svasr_wide, shift_wide, (ASHIFTRT, UNSPEC_ASHIFTRT_WIDE))
 FUNCTION (svasrd, unspec_based_function, (UNSPEC_ASRD, -1, -1))
+FUNCTION (svbfdot, fixed_insn_function, (CODE_FOR_aarch64_sve_bfdotvnx4sf))
+FUNCTION (svbfdot_lane, fixed_insn_function,
+	  (CODE_FOR_aarch64_sve_bfdot_lanevnx4sf))
+FUNCTION (svbfmlalb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlalbvnx4sf))
+FUNCTION (svbfmlalb_lane, fixed_insn_function,
+	  (CODE_FOR_aarch64_sve_bfmlalb_lanevnx4sf))
+FUNCTION (svbfmlalt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlaltvnx4sf))
+FUNCTION (svbfmlalt_lane, fixed_insn_function,
+	  (CODE_FOR_aarch64_sve_bfmlalt_lanevnx4sf))
+FUNCTION (svbfmmla, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmmlavnx4sf))
 FUNCTION (svbic, svbic_impl,)
 FUNCTION (svbrka, svbrk_unary_impl, (UNSPEC_BRKA))
 FUNCTION (svbrkb, svbrk_unary_impl, (UNSPEC_BRKB))
@@ -2592,6 +2602,7 @@ FUNCTION (svcreate2, svcreate_impl, (2))
 FUNCTION (svcreate3, svcreate_impl, (3))
 FUNCTION (svcreate4, svcreate_impl, (4))
 FUNCTION (svcvt, svcvt_impl,)
+FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),)
 FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV))
 FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
 FUNCTION (svdot, svdot_impl,)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def
index 332555b..27ab05d 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def
@@ -318,6 +318,18 @@ DEF_SVE_FUNCTION (svzip2, binary, all_data, none)
 DEF_SVE_FUNCTION (svzip2, binary_pred, all_pred, none)
 #undef REQUIRED_EXTENSIONS
 
+#define REQUIRED_EXTENSIONS AARCH64_FL_BF16
+DEF_SVE_FUNCTION (svbfdot, ternary_bfloat_opt_n, s_float, none)
+DEF_SVE_FUNCTION (svbfdot_lane, ternary_bfloat_lanex2, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalb, ternary_bfloat_opt_n, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalb_lane, ternary_bfloat_lane, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalt, ternary_bfloat_opt_n, s_float, none)
+DEF_SVE_FUNCTION (svbfmlalt_lane, ternary_bfloat_lane, s_float, none)
+DEF_SVE_FUNCTION (svbfmmla, ternary_bfloat, s_float, none)
+DEF_SVE_FUNCTION (svcvt, unary_convert, cvt_bfloat, mxz)
+DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, mx)
+#undef REQUIRED_EXTENSIONS
+
 #define REQUIRED_EXTENSIONS AARCH64_FL_I8MM
 DEF_SVE_FUNCTION (svmmla, mmla, s_integer, none)
 DEF_SVE_FUNCTION (svusmmla, ternary_uintq_intq, s_signed, none)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.h b/gcc/config/aarch64/aarch64-sve-builtins-base.h
index 5c19b7d..957ace8 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.h
@@ -42,6 +42,13 @@ namespace aarch64_sve
     extern const function_base *const svasr;
     extern const function_base *const svasr_wide;
     extern const function_base *const svasrd;
+    extern const function_base *const svbfdot;
+    extern const function_base *const svbfdot_lane;
+    extern const function_base *const svbfmlalb;
+    extern const function_base *const svbfmlalb_lane;
+    extern const function_base *const svbfmlalt;
+    extern const function_base *const svbfmlalt_lane;
+    extern const function_base *const svbfmmla;
     extern const function_base *const svbic;
     extern const function_base *const svbrka;
     extern const function_base *const svbrkb;
@@ -84,6 +91,7 @@ namespace aarch64_sve
     extern const function_base *const svcreate3;
     extern const function_base *const svcreate4;
     extern const function_base *const svcvt;
+    extern const function_base *const svcvtnt;
     extern const function_base *const svdiv;
     extern const function_base *const svdivr;
     extern const function_base *const svdot;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
index 1ea3caa..5f8c85d 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
@@ -78,6 +78,7 @@ apply_predication (const function_instance &instance, tree return_type,
    [01]    - the element type in type suffix 0 or 1 of INSTANCE
    f<bits> - a floating-point type with the given number of bits
    f[01]   - a floating-point type with the same width as type suffix 0 or 1
+   B       - bfloat16_t
    h<elt>  - a half-sized version of <elt>
    p       - a predicate (represented as TYPE_SUFFIX_b)
    q<elt>  - a quarter-sized version of <elt>
@@ -117,6 +118,9 @@ parse_element_type (const function_instance &instance, const char *&format)
   if (ch == 'p')
     return TYPE_SUFFIX_b;
 
+  if (ch == 'B')
+    return TYPE_SUFFIX_bf16;
+
   if (ch == 'q')
     {
       type_suffix_index suffix = parse_element_type (instance, format);
@@ -921,6 +925,26 @@ struct ternary_resize2_lane_base : public overloaded_base<0>
   }
 };
 
+/* A specialization of ternary_resize2_lane_base for bfloat16 elements,
+   indexed in groups of N elements.  */
+template<unsigned int N>
+struct ternary_bfloat_lane_base
+  : public ternary_resize2_lane_base<16, TYPE_bfloat, TYPE_bfloat>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vB,vB,su64", group, MODE_none);
+  }
+
+  bool
+  check (function_checker &c) const OVERRIDE
+  {
+    return c.require_immediate_lane_index (3, N);
+  }
+};
+
 /* A specialization of ternary_resize2_lane_base for quarter-sized
    elements.  */
 template<type_class_index TYPE_CLASS2 = function_resolver::SAME_TYPE_CLASS,
@@ -2695,6 +2719,48 @@ struct tbl_tuple_def : public overloaded_base<0>
 };
 SHAPE (tbl_tuple)
 
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloatt16_t, svbfloat16_t).  */
+struct ternary_bfloat_def
+  : public ternary_resize2_base<16, TYPE_bfloat, TYPE_bfloat>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vB,vB", group, MODE_none);
+  }
+};
+SHAPE (ternary_bfloat)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloat16_t, svbfloat16_t, uint64_t)
+
+   where the final argument is an integer constant expression in the range
+   [0, 7].  */
+typedef ternary_bfloat_lane_base<1> ternary_bfloat_lane_def;
+SHAPE (ternary_bfloat_lane)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloat16_t, svbfloat16_t, uint64_t)
+
+   where the final argument is an integer constant expression in the range
+   [0, 3].  */
+typedef ternary_bfloat_lane_base<2> ternary_bfloat_lanex2_def;
+SHAPE (ternary_bfloat_lanex2)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloatt16_t, svbfloat16_t)
+   sv<t0>_t svfoo[_n_t0](sv<t0>_t, svbfloat16_t, bfloat16_t).  */
+struct ternary_bfloat_opt_n_def
+  : public ternary_resize2_opt_n_base<16, TYPE_bfloat, TYPE_bfloat>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const OVERRIDE
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vB,vB", group, MODE_none);
+    build_all (b, "v0,v0,vB,sB", group, MODE_n);
+  }
+};
+SHAPE (ternary_bfloat_opt_n)
+
 /* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:int:quarter>_t, sv<t0:uint:quarter>_t,
 		       uint64_t)
 
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
index 1ce0997..3a19982 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
@@ -148,6 +148,10 @@ namespace aarch64_sve
     extern const function_shape *const store_scatter_offset;
     extern const function_shape *const store_scatter_offset_restricted;
     extern const function_shape *const tbl_tuple;
+    extern const function_shape *const ternary_bfloat;
+    extern const function_shape *const ternary_bfloat_lane;
+    extern const function_shape *const ternary_bfloat_lanex2;
+    extern const function_shape *const ternary_bfloat_opt_n;
     extern const function_shape *const ternary_intq_uintq_lane;
     extern const function_shape *const ternary_intq_uintq_opt_n;
     extern const function_shape *const ternary_lane;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
index 53b1651..9e7219c 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -487,7 +487,6 @@ FUNCTION (svbsl2n, CODE_FOR_MODE0 (aarch64_sve2_bsl2n),)
 FUNCTION (svcdot, svcdot_impl,)
 FUNCTION (svcdot_lane, svcdot_lane_impl,)
 FUNCTION (svcvtlt, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTLT))
-FUNCTION (svcvtnt, CODE_FOR_MODE1 (aarch64_sve2_cvtnt),)
 FUNCTION (svcvtx, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTX))
 FUNCTION (svcvtxnt, CODE_FOR_MODE1 (aarch64_sve2_cvtxnt),)
 FUNCTION (sveor3, CODE_FOR_MODE0 (aarch64_sve2_eor3),)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
index 90e29fc..06d4a93 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
@@ -54,7 +54,6 @@ namespace aarch64_sve
     extern const function_base *const svcdot;
     extern const function_base *const svcdot_lane;
     extern const function_base *const svcvtlt;
-    extern const function_base *const svcvtnt;
     extern const function_base *const svcvtx;
     extern const function_base *const svcvtxnt;
     extern const function_base *const sveor3;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index d4d201d..2c5543b 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -354,6 +354,10 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
   D (u32, f16), D (u32, f32), D (u32, f64), \
   D (u64, f16), D (u64, f32), D (u64, f64)
 
+/* _bf16_f32.  */
+#define TYPES_cvt_bfloat(S, D) \
+  D (bf16, f32)
+
 /* _f32_f16
    _f64_f32.  */
 #define TYPES_cvt_long(S, D) \
@@ -471,6 +475,7 @@ DEF_SVE_TYPES_ARRAY (d_unsigned);
 DEF_SVE_TYPES_ARRAY (d_integer);
 DEF_SVE_TYPES_ARRAY (d_data);
 DEF_SVE_TYPES_ARRAY (cvt);
+DEF_SVE_TYPES_ARRAY (cvt_bfloat);
 DEF_SVE_TYPES_ARRAY (cvt_long);
 DEF_SVE_TYPES_ARRAY (cvt_narrow_s);
 DEF_SVE_TYPES_ARRAY (cvt_narrow);
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index abaac11..fa38529 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -115,6 +115,7 @@
 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
 ;; ---- [FP] Complex multiply-add
 ;; ---- [FP] Trigonometric multiply-add
+;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
 ;; ---- [FP] Matrix multiply-accumulate
 ;;
 ;; == Comparisons and selects
@@ -150,6 +151,7 @@
 ;; ---- [FP<-INT] Packs
 ;; ---- [FP<-INT] Unpacks
 ;; ---- [FP<-FP] Packs
+;; ---- [FP<-FP] Packs (bfloat16)
 ;; ---- [FP<-FP] Unpacks
 ;; ---- [PRED<-PRED] Packs
 ;; ---- [PRED<-PRED] Unpacks
@@ -6548,6 +6550,46 @@
 )
 
 ;; -------------------------------------------------------------------------
+;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BFDOT (BF16)
+;; - BFMLALB (BF16)
+;; - BFMLALT (BF16)
+;; - BFMMLA (BF16)
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_<sve_fp_op>vnx4sf"
+  [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
+	(unspec:VNx4SF
+	  [(match_operand:VNx4SF 1 "register_operand" "0, w")
+	   (match_operand:VNx8BF 2 "register_operand" "w, w")
+	   (match_operand:VNx8BF 3 "register_operand" "w, w")]
+	  SVE_BFLOAT_TERNARY_LONG))]
+  "TARGET_SVE_BF16"
+  "@
+   <sve_fp_op>\t%0.s, %2.h, %3.h
+   movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; The immediate range is enforced before generating the instruction.
+(define_insn "@aarch64_sve_<sve_fp_op>_lanevnx4sf"
+  [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
+	(unspec:VNx4SF
+	  [(match_operand:VNx4SF 1 "register_operand" "0, w")
+	   (match_operand:VNx8BF 2 "register_operand" "w, w")
+	   (match_operand:VNx8BF 3 "register_operand" "y, y")
+	   (match_operand:SI 4 "const_int_operand")]
+	  SVE_BFLOAT_TERNARY_LONG_LANE))]
+  "TARGET_SVE_BF16"
+  "@
+   <sve_fp_op>\t%0.s, %2.h, %3.h[%4]
+   movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
 ;; ---- [FP] Matrix multiply-accumulate
 ;; -------------------------------------------------------------------------
 ;; Includes:
@@ -8110,6 +8152,77 @@
 )
 
 ;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Packs (bfloat16)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BFCVT (BF16)
+;; - BFCVTNT (BF16)
+;; -------------------------------------------------------------------------
+
+;; Predicated BFCVT.
+(define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
+  [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
+	(unspec:VNx8BF_ONLY
+	  [(match_operand:VNx4BI 1 "register_operand" "Upl")
+	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
+	   (match_operand:VNx4SF_ONLY 2 "register_operand" "w")]
+	  SVE_COND_FCVT))]
+  "TARGET_SVE_BF16"
+  "bfcvt\t%0.h, %1/m, %2.s"
+)
+
+;; Predicated BFCVT with merging.
+(define_expand "@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
+  [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
+	(unspec:VNx8BF_ONLY
+	  [(match_operand:VNx4BI 1 "register_operand")
+	   (unspec:VNx8BF_ONLY
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:VNx4SF_ONLY 2 "register_operand")]
+	     SVE_COND_FCVT)
+	   (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE_BF16"
+)
+
+(define_insn "*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
+  [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
+	(unspec:VNx8BF_ONLY
+	  [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:VNx8BF_ONLY
+	     [(match_dup 1)
+	      (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	      (match_operand:VNx4SF_ONLY 2 "register_operand" "w, w, w")]
+	     SVE_COND_FCVT)
+	   (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE_BF16"
+  "@
+   bfcvt\t%0.h, %1/m, %2.s
+   movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s
+   movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s"
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
+;; Predicated BFCVTNT.  This doesn't give a natural aarch64_pred_*/cond_*
+;; pair because the even elements always have to be supplied for active
+;; elements, even if the inactive elements don't matter.
+;;
+;; This instructions does not take MOVPRFX.
+(define_insn "@aarch64_sve_cvtnt<mode>"
+  [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
+	(unspec:VNx8BF_ONLY
+	  [(match_operand:VNx4BI 2 "register_operand" "Upl")
+	   (const_int SVE_STRICT_GP)
+	   (match_operand:VNx8BF_ONLY 1 "register_operand" "0")
+	   (match_operand:VNx4SF 3 "register_operand" "w")]
+	  UNSPEC_COND_FCVTNT))]
+  "TARGET_SVE_BF16"
+  "bfcvtnt\t%0.h, %2/m, %3.s"
+)
+
+;; -------------------------------------------------------------------------
 ;; ---- [FP<-FP] Unpacks
 ;; -------------------------------------------------------------------------
 ;; Includes:
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index eaded5d..f82e60e 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1921,16 +1921,16 @@
 ;; elements, even if the inactive elements don't matter.
 ;;
 ;; These instructions do not take MOVPRFX.
-(define_insn "@aarch64_sve2_cvtnt<mode>"
-  [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
-	(unspec:<VNARROW>
-	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
+(define_insn "@aarch64_sve_cvtnt<mode>"
+  [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
+	(unspec:SVE_FULL_HSF
+	  [(match_operand:<VWIDE_PRED> 2 "register_operand" "Upl")
 	   (const_int SVE_STRICT_GP)
-	   (match_operand:<VNARROW> 1 "register_operand" "0")
-	   (match_operand:SVE_FULL_SDF 3 "register_operand" "w")]
+	   (match_operand:SVE_FULL_HSF 1 "register_operand" "0")
+	   (match_operand:<VWIDE> 3 "register_operand" "w")]
 	  UNSPEC_COND_FCVTNT))]
   "TARGET_SVE2"
-  "fcvtnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
+  "fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype>"
 )
 
 ;; Predicated FCVTX (equivalent to what would be FCVTXNB, except that
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 043e26a..8f08bad 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -362,6 +362,7 @@ extern unsigned aarch64_architecture_version;
 /* BF16 instructions are enabled through +bf16.  */
 #define TARGET_BF16_FP (AARCH64_ISA_BF16)
 #define TARGET_BF16_SIMD (AARCH64_ISA_BF16 && TARGET_SIMD)
+#define TARGET_SVE_BF16 (TARGET_SVE && AARCH64_ISA_BF16)
 
 /* Make sure this is always defined so we don't have to check for ifdefs
    but rather use normal ifs.  */
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index d5b60e0..3e3fd9d 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -314,6 +314,7 @@
 ;; Iterators for single modes, for "@" patterns.
 (define_mode_iterator VNx16QI_ONLY [VNx16QI])
 (define_mode_iterator VNx8HI_ONLY [VNx8HI])
+(define_mode_iterator VNx8BF_ONLY [VNx8BF])
 (define_mode_iterator VNx4SI_ONLY [VNx4SI])
 (define_mode_iterator VNx4SF_ONLY [VNx4SF])
 (define_mode_iterator VNx2DI_ONLY [VNx2DI])
@@ -820,6 +821,9 @@
     UNSPEC_USDOT	; Used in aarch64-simd.md.
     UNSPEC_SUDOT	; Used in aarch64-simd.md.
     UNSPEC_BFDOT	; Used in aarch64-simd.md.
+    UNSPEC_BFMLALB	; Used in aarch64-sve.md.
+    UNSPEC_BFMLALT	; Used in aarch64-sve.md.
+    UNSPEC_BFMMLA	; Used in aarch64-sve.md.
 ])
 
 ;; ------------------------------------------------------------------
@@ -2235,6 +2239,15 @@
 
 (define_int_iterator SVE_FP_BINARY_INT [UNSPEC_FTSMUL UNSPEC_FTSSEL])
 
+(define_int_iterator SVE_BFLOAT_TERNARY_LONG [UNSPEC_BFDOT
+					      UNSPEC_BFMLALB
+					      UNSPEC_BFMLALT
+					      UNSPEC_BFMMLA])
+
+(define_int_iterator SVE_BFLOAT_TERNARY_LONG_LANE [UNSPEC_BFDOT
+						   UNSPEC_BFMLALB
+						   UNSPEC_BFMLALT])
+
 (define_int_iterator SVE_INT_REDUCTION [UNSPEC_ANDV
 					UNSPEC_IORV
 					UNSPEC_SMAXV
@@ -3225,7 +3238,11 @@
 				  (UNSPEC_SQDMULLBT "sqdmlslbt")
 				  (UNSPEC_SQDMULLT "sqdmlslt")])
 
-(define_int_attr sve_fp_op [(UNSPEC_FRECPE "frecpe")
+(define_int_attr sve_fp_op [(UNSPEC_BFDOT "bfdot")
+			    (UNSPEC_BFMLALB "bfmlalb")
+			    (UNSPEC_BFMLALT "bfmlalt")
+			    (UNSPEC_BFMMLA "bfmmla")
+			    (UNSPEC_FRECPE "frecpe")
 			    (UNSPEC_FRECPS "frecps")
 			    (UNSPEC_RSQRTE "frsqrte")
 			    (UNSPEC_RSQRTS "frsqrts")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 5d002d9..b239734 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,26 @@
 2020-01-31  Richard Sandiford  <richard.sandiford@arm.com>
 
+	* lib/target-supports.exp (check_effective_target_aarch64_asm_bf16_ok):
+	New proc.
+	* gcc.target/aarch64/sve/acle/asm/bfdot_f32.c: New test.
+	* gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c: Likweise.
+	* gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c: Likweise.
+	* gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c: Likweise.
+	* gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c: Likweise.
+	* gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c: Likweise.
+	* gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c: Likweise.
+	* gcc.target/aarch64/sve/acle/asm/cvt_bf16.c: Likweise.
+	* gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c: Likweise.
+	* gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c: Likweise.
+	* gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c:
+	Likweise.
+	* gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c:
+	Likweise.
+	* gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c:
+	Likweise.
+
+2020-01-31  Richard Sandiford  <richard.sandiford@arm.com>
+
 	* g++.target/aarch64/sve/acle/general-c++/mangle_1.C: Test mangling
 	of svbfloat16_t.
 	* g++.target/aarch64/sve/acle/general-c++/mangle_2.C: Likewise for
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c
new file mode 100644
index 0000000..376622d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c
@@ -0,0 +1,67 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfdot_f32_tied1:
+**	bfdot	z0\.s, z4\.h, z5\.h
+**	ret
+*/
+TEST_DUAL_Z (bfdot_f32_tied1, svfloat32_t, svbfloat16_t,
+	     z0 = svbfdot_f32 (z0, z4, z5),
+	     z0 = svbfdot (z0, z4, z5))
+
+/*
+** bfdot_f32_tied2:
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	bfdot	z0\.s, \1\.h, z1\.h
+**	ret
+*/
+TEST_DUAL_Z_REV (bfdot_f32_tied2, svfloat32_t, svbfloat16_t,
+		 z0_res = svbfdot_f32 (z4, z0, z1),
+		 z0_res = svbfdot (z4, z0, z1))
+
+/*
+** bfdot_f32_tied3:
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	bfdot	z0\.s, z1\.h, \1\.h
+**	ret
+*/
+TEST_DUAL_Z_REV (bfdot_f32_tied3, svfloat32_t, svbfloat16_t,
+		 z0_res = svbfdot_f32 (z4, z1, z0),
+		 z0_res = svbfdot (z4, z1, z0))
+
+/*
+** bfdot_f32_untied:
+**	movprfx	z0, z1
+**	bfdot	z0\.s, z4\.h, z5\.h
+**	ret
+*/
+TEST_DUAL_Z (bfdot_f32_untied, svfloat32_t, svbfloat16_t,
+	     z0 = svbfdot_f32 (z1, z4, z5),
+	     z0 = svbfdot (z1, z4, z5))
+
+/*
+** bfdot_h7_f32_tied1:
+**	mov	(z[0-9]+\.h), h7
+**	bfdot	z0\.s, z4\.h, \1
+**	ret
+*/
+TEST_DUAL_ZD (bfdot_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+	      z0 = svbfdot_n_f32 (z0, z4, d7),
+	      z0 = svbfdot (z0, z4, d7))
+
+/*
+** bfdot_h7_f32_untied:
+**	mov	(z[0-9]+\.h), h7
+**	movprfx	z0, z1
+**	bfdot	z0\.s, z4\.h, \1
+**	ret
+*/
+TEST_DUAL_ZD (bfdot_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+	      z0 = svbfdot_n_f32 (z1, z4, d7),
+	      z0 = svbfdot (z1, z4, d7))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c
new file mode 100644
index 0000000..0f624fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c
@@ -0,0 +1,86 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfdot_lane_0_f32_tied1:
+**	bfdot	z0\.s, z4\.h, z5\.h\[0\]
+**	ret
+*/
+TEST_DUAL_Z (bfdot_lane_0_f32_tied1, svfloat32_t, svbfloat16_t,
+	     z0 = svbfdot_lane_f32 (z0, z4, z5, 0),
+	     z0 = svbfdot_lane (z0, z4, z5, 0))
+
+/*
+** bfdot_lane_0_f32_tied2:
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	bfdot	z0\.s, \1\.h, z1\.h\[0\]
+**	ret
+*/
+TEST_DUAL_Z_REV (bfdot_lane_0_f32_tied2, svfloat32_t, svbfloat16_t,
+		 z0_res = svbfdot_lane_f32 (z4, z0, z1, 0),
+		 z0_res = svbfdot_lane (z4, z0, z1, 0))
+
+/*
+** bfdot_lane_0_f32_tied3:
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	bfdot	z0\.s, z1\.h, \1\.h\[0\]
+**	ret
+*/
+TEST_DUAL_Z_REV (bfdot_lane_0_f32_tied3, svfloat32_t, svbfloat16_t,
+		 z0_res = svbfdot_lane_f32 (z4, z1, z0, 0),
+		 z0_res = svbfdot_lane (z4, z1, z0, 0))
+
+/*
+** bfdot_lane_0_f32_untied:
+**	movprfx	z0, z1
+**	bfdot	z0\.s, z4\.h, z5\.h\[0\]
+**	ret
+*/
+TEST_DUAL_Z (bfdot_lane_0_f32_untied, svfloat32_t, svbfloat16_t,
+	     z0 = svbfdot_lane_f32 (z1, z4, z5, 0),
+	     z0 = svbfdot_lane (z1, z4, z5, 0))
+
+/*
+** bfdot_lane_1_f32:
+**	bfdot	z0\.s, z4\.h, z5\.h\[1\]
+**	ret
+*/
+TEST_DUAL_Z (bfdot_lane_1_f32, svfloat32_t, svbfloat16_t,
+	     z0 = svbfdot_lane_f32 (z0, z4, z5, 1),
+	     z0 = svbfdot_lane (z0, z4, z5, 1))
+
+/*
+** bfdot_lane_3_f32:
+**	bfdot	z0\.s, z4\.h, z5\.h\[3\]
+**	ret
+*/
+TEST_DUAL_Z (bfdot_lane_3_f32, svfloat32_t, svbfloat16_t,
+	     z0 = svbfdot_lane_f32 (z0, z4, z5, 3),
+	     z0 = svbfdot_lane (z0, z4, z5, 3))
+
+/*
+** bfdot_lane_z8_f32:
+**	str	d8, \[sp, -16\]!
+**	mov	(z[0-7])\.d, z8\.d
+**	bfdot	z0\.s, z1\.h, \1\.h\[1\]
+**	ldr	d8, \[sp\], 16
+**	ret
+*/
+TEST_DUAL_LANE_REG (bfdot_lane_z8_f32, svfloat32_t, svbfloat16_t, z8,
+		    z0 = svbfdot_lane_f32 (z0, z1, z8, 1),
+		    z0 = svbfdot_lane (z0, z1, z8, 1))
+
+/*
+** bfdot_lane_z16_f32:
+**	mov	(z[0-7])\.d, z16\.d
+**	bfdot	z0\.s, z1\.h, \1\.h\[1\]
+**	ret
+*/
+TEST_DUAL_LANE_REG (bfdot_lane_z16_f32, svfloat32_t, svbfloat16_t, z16,
+		    z0 = svbfdot_lane_f32 (z0, z1, z16, 1),
+		    z0 = svbfdot_lane (z0, z1, z16, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c
new file mode 100644
index 0000000..0f81011
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c
@@ -0,0 +1,67 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmlalb_f32_tied1:
+**	bfmlalb	z0\.s, z4\.h, z5\.h
+**	ret
+*/
+TEST_DUAL_Z (bfmlalb_f32_tied1, svfloat32_t, svbfloat16_t,
+	     z0 = svbfmlalb_f32 (z0, z4, z5),
+	     z0 = svbfmlalb (z0, z4, z5))
+
+/*
+** bfmlalb_f32_tied2:
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	bfmlalb	z0\.s, \1\.h, z1\.h
+**	ret
+*/
+TEST_DUAL_Z_REV (bfmlalb_f32_tied2, svfloat32_t, svbfloat16_t,
+		 z0_res = svbfmlalb_f32 (z4, z0, z1),
+		 z0_res = svbfmlalb (z4, z0, z1))
+
+/*
+** bfmlalb_f32_tied3:
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	bfmlalb	z0\.s, z1\.h, \1\.h
+**	ret
+*/
+TEST_DUAL_Z_REV (bfmlalb_f32_tied3, svfloat32_t, svbfloat16_t,
+		 z0_res = svbfmlalb_f32 (z4, z1, z0),
+		 z0_res = svbfmlalb (z4, z1, z0))
+
+/*
+** bfmlalb_f32_untied:
+**	movprfx	z0, z1
+**	bfmlalb	z0\.s, z4\.h, z5\.h
+**	ret
+*/
+TEST_DUAL_Z (bfmlalb_f32_untied, svfloat32_t, svbfloat16_t,
+	     z0 = svbfmlalb_f32 (z1, z4, z5),
+	     z0 = svbfmlalb (z1, z4, z5))
+
+/*
+** bfmlalb_h7_f32_tied1:
+**	mov	(z[0-9]+\.h), h7
+**	bfmlalb	z0\.s, z4\.h, \1
+**	ret
+*/
+TEST_DUAL_ZD (bfmlalb_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+	      z0 = svbfmlalb_n_f32 (z0, z4, d7),
+	      z0 = svbfmlalb (z0, z4, d7))
+
+/*
+** bfmlalb_h7_f32_untied:
+**	mov	(z[0-9]+\.h), h7
+**	movprfx	z0, z1
+**	bfmlalb	z0\.s, z4\.h, \1
+**	ret
+*/
+TEST_DUAL_ZD (bfmlalb_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+	      z0 = svbfmlalb_n_f32 (z1, z4, d7),
+	      z0 = svbfmlalb (z1, z4, d7))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c
new file mode 100644
index 0000000..b0ec088
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c
@@ -0,0 +1,86 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmlalb_lane_0_f32_tied1:
+**	bfmlalb	z0\.s, z4\.h, z5\.h\[0\]
+**	ret
+*/
+TEST_DUAL_Z (bfmlalb_lane_0_f32_tied1, svfloat32_t, svbfloat16_t,
+	     z0 = svbfmlalb_lane_f32 (z0, z4, z5, 0),
+	     z0 = svbfmlalb_lane (z0, z4, z5, 0))
+
+/*
+** bfmlalb_lane_0_f32_tied2:
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	bfmlalb	z0\.s, \1\.h, z1\.h\[0\]
+**	ret
+*/
+TEST_DUAL_Z_REV (bfmlalb_lane_0_f32_tied2, svfloat32_t, svbfloat16_t,
+		 z0_res = svbfmlalb_lane_f32 (z4, z0, z1, 0),
+		 z0_res = svbfmlalb_lane (z4, z0, z1, 0))
+
+/*
+** bfmlalb_lane_0_f32_tied3:
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	bfmlalb	z0\.s, z1\.h, \1\.h\[0\]
+**	ret
+*/
+TEST_DUAL_Z_REV (bfmlalb_lane_0_f32_tied3, svfloat32_t, svbfloat16_t,
+		 z0_res = svbfmlalb_lane_f32 (z4, z1, z0, 0),
+		 z0_res = svbfmlalb_lane (z4, z1, z0, 0))
+
+/*
+** bfmlalb_lane_0_f32_untied:
+**	movprfx	z0, z1
+**	bfmlalb	z0\.s, z4\.h, z5\.h\[0\]
+**	ret
+*/
+TEST_DUAL_Z (bfmlalb_lane_0_f32_untied, svfloat32_t, svbfloat16_t,
+	     z0 = svbfmlalb_lane_f32 (z1, z4, z5, 0),
+	     z0 = svbfmlalb_lane (z1, z4, z5, 0))
+
+/*
+** bfmlalb_lane_1_f32:
+**	bfmlalb	z0\.s, z4\.h, z5\.h\[1\]
+**	ret
+*/
+TEST_DUAL_Z (bfmlalb_lane_1_f32, svfloat32_t, svbfloat16_t,
+	     z0 = svbfmlalb_lane_f32 (z0, z4, z5, 1),
+	     z0 = svbfmlalb_lane (z0, z4, z5, 1))
+
+/*
+** bfmlalb_lane_7_f32:
+**	bfmlalb	z0\.s, z4\.h, z5\.h\[7\]
+**	ret
+*/
+TEST_DUAL_Z (bfmlalb_lane_7_f32, svfloat32_t, svbfloat16_t,
+	     z0 = svbfmlalb_lane_f32 (z0, z4, z5, 7),
+	     z0 = svbfmlalb_lane (z0, z4, z5, 7))
+
+/*
+** bfmlalb_lane_z8_f32:
+**	str	d8, \[sp, -16\]!
+**	mov	(z[0-7])\.d, z8\.d
+**	bfmlalb	z0\.s, z1\.h, \1\.h\[1\]
+**	ldr	d8, \[sp\], 16
+**	ret
+*/
+TEST_DUAL_LANE_REG (bfmlalb_lane_z8_f32, svfloat32_t, svbfloat16_t, z8,
+		    z0 = svbfmlalb_lane_f32 (z0, z1, z8, 1),
+		    z0 = svbfmlalb_lane (z0, z1, z8, 1))
+
+/*
+** bfmlalb_lane_z16_f32:
+**	mov	(z[0-7])\.d, z16\.d
+**	bfmlalb	z0\.s, z1\.h, \1\.h\[1\]
+**	ret
+*/
+TEST_DUAL_LANE_REG (bfmlalb_lane_z16_f32, svfloat32_t, svbfloat16_t, z16,
+		    z0 = svbfmlalb_lane_f32 (z0, z1, z16, 1),
+		    z0 = svbfmlalb_lane (z0, z1, z16, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c
new file mode 100644
index 0000000..2a583fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c
@@ -0,0 +1,67 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmlalt_f32_tied1:
+**	bfmlalt	z0\.s, z4\.h, z5\.h
+**	ret
+*/
+TEST_DUAL_Z (bfmlalt_f32_tied1, svfloat32_t, svbfloat16_t,
+	     z0 = svbfmlalt_f32 (z0, z4, z5),
+	     z0 = svbfmlalt (z0, z4, z5))
+
+/*
+** bfmlalt_f32_tied2:
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	bfmlalt	z0\.s, \1\.h, z1\.h
+**	ret
+*/
+TEST_DUAL_Z_REV (bfmlalt_f32_tied2, svfloat32_t, svbfloat16_t,
+		 z0_res = svbfmlalt_f32 (z4, z0, z1),
+		 z0_res = svbfmlalt (z4, z0, z1))
+
+/*
+** bfmlalt_f32_tied3:
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	bfmlalt	z0\.s, z1\.h, \1\.h
+**	ret
+*/
+TEST_DUAL_Z_REV (bfmlalt_f32_tied3, svfloat32_t, svbfloat16_t,
+		 z0_res = svbfmlalt_f32 (z4, z1, z0),
+		 z0_res = svbfmlalt (z4, z1, z0))
+
+/*
+** bfmlalt_f32_untied:
+**	movprfx	z0, z1
+**	bfmlalt	z0\.s, z4\.h, z5\.h
+**	ret
+*/
+TEST_DUAL_Z (bfmlalt_f32_untied, svfloat32_t, svbfloat16_t,
+	     z0 = svbfmlalt_f32 (z1, z4, z5),
+	     z0 = svbfmlalt (z1, z4, z5))
+
+/*
+** bfmlalt_h7_f32_tied1:
+**	mov	(z[0-9]+\.h), h7
+**	bfmlalt	z0\.s, z4\.h, \1
+**	ret
+*/
+TEST_DUAL_ZD (bfmlalt_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t,
+	      z0 = svbfmlalt_n_f32 (z0, z4, d7),
+	      z0 = svbfmlalt (z0, z4, d7))
+
+/*
+** bfmlalt_h7_f32_untied:
+**	mov	(z[0-9]+\.h), h7
+**	movprfx	z0, z1
+**	bfmlalt	z0\.s, z4\.h, \1
+**	ret
+*/
+TEST_DUAL_ZD (bfmlalt_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t,
+	      z0 = svbfmlalt_n_f32 (z1, z4, d7),
+	      z0 = svbfmlalt (z1, z4, d7))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c
new file mode 100644
index 0000000..3af3997
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c
@@ -0,0 +1,86 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmlalt_lane_0_f32_tied1:
+**	bfmlalt	z0\.s, z4\.h, z5\.h\[0\]
+**	ret
+*/
+TEST_DUAL_Z (bfmlalt_lane_0_f32_tied1, svfloat32_t, svbfloat16_t,
+	     z0 = svbfmlalt_lane_f32 (z0, z4, z5, 0),
+	     z0 = svbfmlalt_lane (z0, z4, z5, 0))
+
+/*
+** bfmlalt_lane_0_f32_tied2:
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	bfmlalt	z0\.s, \1\.h, z1\.h\[0\]
+**	ret
+*/
+TEST_DUAL_Z_REV (bfmlalt_lane_0_f32_tied2, svfloat32_t, svbfloat16_t,
+		 z0_res = svbfmlalt_lane_f32 (z4, z0, z1, 0),
+		 z0_res = svbfmlalt_lane (z4, z0, z1, 0))
+
+/*
+** bfmlalt_lane_0_f32_tied3:
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	bfmlalt	z0\.s, z1\.h, \1\.h\[0\]
+**	ret
+*/
+TEST_DUAL_Z_REV (bfmlalt_lane_0_f32_tied3, svfloat32_t, svbfloat16_t,
+		 z0_res = svbfmlalt_lane_f32 (z4, z1, z0, 0),
+		 z0_res = svbfmlalt_lane (z4, z1, z0, 0))
+
+/*
+** bfmlalt_lane_0_f32_untied:
+**	movprfx	z0, z1
+**	bfmlalt	z0\.s, z4\.h, z5\.h\[0\]
+**	ret
+*/
+TEST_DUAL_Z (bfmlalt_lane_0_f32_untied, svfloat32_t, svbfloat16_t,
+	     z0 = svbfmlalt_lane_f32 (z1, z4, z5, 0),
+	     z0 = svbfmlalt_lane (z1, z4, z5, 0))
+
+/*
+** bfmlalt_lane_1_f32:
+**	bfmlalt	z0\.s, z4\.h, z5\.h\[1\]
+**	ret
+*/
+TEST_DUAL_Z (bfmlalt_lane_1_f32, svfloat32_t, svbfloat16_t,
+	     z0 = svbfmlalt_lane_f32 (z0, z4, z5, 1),
+	     z0 = svbfmlalt_lane (z0, z4, z5, 1))
+
+/*
+** bfmlalt_lane_7_f32:
+**	bfmlalt	z0\.s, z4\.h, z5\.h\[7\]
+**	ret
+*/
+TEST_DUAL_Z (bfmlalt_lane_7_f32, svfloat32_t, svbfloat16_t,
+	     z0 = svbfmlalt_lane_f32 (z0, z4, z5, 7),
+	     z0 = svbfmlalt_lane (z0, z4, z5, 7))
+
+/*
+** bfmlalt_lane_z8_f32:
+**	str	d8, \[sp, -16\]!
+**	mov	(z[0-7])\.d, z8\.d
+**	bfmlalt	z0\.s, z1\.h, \1\.h\[1\]
+**	ldr	d8, \[sp\], 16
+**	ret
+*/
+TEST_DUAL_LANE_REG (bfmlalt_lane_z8_f32, svfloat32_t, svbfloat16_t, z8,
+		    z0 = svbfmlalt_lane_f32 (z0, z1, z8, 1),
+		    z0 = svbfmlalt_lane (z0, z1, z8, 1))
+
+/*
+** bfmlalt_lane_z16_f32:
+**	mov	(z[0-7])\.d, z16\.d
+**	bfmlalt	z0\.s, z1\.h, \1\.h\[1\]
+**	ret
+*/
+TEST_DUAL_LANE_REG (bfmlalt_lane_z16_f32, svfloat32_t, svbfloat16_t, z16,
+		    z0 = svbfmlalt_lane_f32 (z0, z1, z16, 1),
+		    z0 = svbfmlalt_lane (z0, z1, z16, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c
new file mode 100644
index 0000000..b1d98fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c
@@ -0,0 +1,46 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** bfmmla_f32_tied1:
+**	bfmmla	z0\.s, z4\.h, z5\.h
+**	ret
+*/
+TEST_DUAL_Z (bfmmla_f32_tied1, svfloat32_t, svbfloat16_t,
+	     z0 = svbfmmla_f32 (z0, z4, z5),
+	     z0 = svbfmmla (z0, z4, z5))
+
+/*
+** bfmmla_f32_tied2:
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	bfmmla	z0\.s, \1\.h, z1\.h
+**	ret
+*/
+TEST_DUAL_Z_REV (bfmmla_f32_tied2, svfloat32_t, svbfloat16_t,
+		 z0_res = svbfmmla_f32 (z4, z0, z1),
+		 z0_res = svbfmmla (z4, z0, z1))
+
+/*
+** bfmmla_f32_tied3:
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	bfmmla	z0\.s, z1\.h, \1\.h
+**	ret
+*/
+TEST_DUAL_Z_REV (bfmmla_f32_tied3, svfloat32_t, svbfloat16_t,
+		 z0_res = svbfmmla_f32 (z4, z1, z0),
+		 z0_res = svbfmmla (z4, z1, z0))
+
+/*
+** bfmmla_f32_untied:
+**	movprfx	z0, z1
+**	bfmmla	z0\.s, z4\.h, z5\.h
+**	ret
+*/
+TEST_DUAL_Z (bfmmla_f32_untied, svfloat32_t, svbfloat16_t,
+	     z0 = svbfmmla_f32 (z1, z4, z5),
+	     z0 = svbfmmla (z1, z4, z5))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c
new file mode 100644
index 0000000..52baa1f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c
@@ -0,0 +1,96 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** cvt_bf16_f32_m_tied1:
+**	bfcvt	z0\.h, p0/m, z4\.s
+**	ret
+*/
+TEST_DUAL_Z (cvt_bf16_f32_m_tied1, svbfloat16_t, svfloat32_t,
+	     z0 = svcvt_bf16_f32_m (z0, p0, z4),
+	     z0 = svcvt_bf16_m (z0, p0, z4))
+
+/*
+** cvt_bf16_f32_m_tied2:
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0, z4
+**	bfcvt	z0\.h, p0/m, \1\.s
+**	ret
+*/
+TEST_DUAL_Z_REV (cvt_bf16_f32_m_tied2, svbfloat16_t, svfloat32_t,
+		 z0_res = svcvt_bf16_f32_m (z4, p0, z0),
+		 z0_res = svcvt_bf16_m (z4, p0, z0))
+
+/*
+** cvt_bf16_f32_m_untied:
+**	movprfx	z0, z1
+**	bfcvt	z0\.h, p0/m, z4\.s
+**	ret
+*/
+TEST_DUAL_Z (cvt_bf16_f32_m_untied, svbfloat16_t, svfloat32_t,
+	     z0 = svcvt_bf16_f32_m (z1, p0, z4),
+	     z0 = svcvt_bf16_m (z1, p0, z4))
+
+/*
+** cvt_bf16_f32_z_tied1:
+**	mov	(z[0-9]+)\.d, z0\.d
+**	movprfx	z0\.s, p0/z, \1\.s
+**	bfcvt	z0\.h, p0/m, \1\.s
+**	ret
+*/
+TEST_DUAL_Z_REV (cvt_bf16_f32_z_tied1, svbfloat16_t, svfloat32_t,
+		 z0_res = svcvt_bf16_f32_z (p0, z0),
+		 z0_res = svcvt_bf16_z (p0, z0))
+
+/*
+** cvt_bf16_f32_z_untied:
+**	movprfx	z0\.s, p0/z, z4\.s
+**	bfcvt	z0\.h, p0/m, z4\.s
+**	ret
+*/
+TEST_DUAL_Z (cvt_bf16_f32_z_untied, svbfloat16_t, svfloat32_t,
+	     z0 = svcvt_bf16_f32_z (p0, z4),
+	     z0 = svcvt_bf16_z (p0, z4))
+
+/*
+** cvt_bf16_f32_x_tied1:
+**	bfcvt	z0\.h, p0/m, z0\.s
+**	ret
+*/
+TEST_DUAL_Z_REV (cvt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
+		 z0_res = svcvt_bf16_f32_x (p0, z0),
+		 z0_res = svcvt_bf16_x (p0, z0))
+
+/*
+** cvt_bf16_f32_x_untied:
+**	bfcvt	z0\.h, p0/m, z4\.s
+**	ret
+*/
+TEST_DUAL_Z (cvt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
+	     z0 = svcvt_bf16_f32_x (p0, z4),
+	     z0 = svcvt_bf16_x (p0, z4))
+
+/*
+** ptrue_cvt_bf16_f32_x_tied1:
+**	...
+**	ptrue	p[0-9]+\.b[^\n]*
+**	...
+**	ret
+*/
+TEST_DUAL_Z_REV (ptrue_cvt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
+		 z0_res = svcvt_bf16_f32_x (svptrue_b32 (), z0),
+		 z0_res = svcvt_bf16_x (svptrue_b32 (), z0))
+
+/*
+** ptrue_cvt_bf16_f32_x_untied:
+**	...
+**	ptrue	p[0-9]+\.b[^\n]*
+**	...
+**	ret
+*/
+TEST_DUAL_Z (ptrue_cvt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
+	     z0 = svcvt_bf16_f32_x (svptrue_b32 (), z4),
+	     z0 = svcvt_bf16_x (svptrue_b32 (), z4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c
new file mode 100644
index 0000000..54614c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c
@@ -0,0 +1,90 @@
+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok }  */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** cvtnt_bf16_f32_m_tied1:
+**	bfcvtnt	z0\.h, p0/m, z4\.s
+**	ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_m_tied1, svbfloat16_t, svfloat32_t,
+	     z0 = svcvtnt_bf16_f32_m (z0, p0, z4),
+	     z0 = svcvtnt_bf16_m (z0, p0, z4))
+
+/* Bad RA choice: no preferred output sequence.  */
+TEST_DUAL_Z_REV (cvtnt_bf16_f32_m_tied2, svbfloat16_t, svfloat32_t,
+		 z0_res = svcvtnt_bf16_f32_m (z4, p0, z0),
+		 z0_res = svcvtnt_bf16_m (z4, p0, z0))
+
+/*
+** cvtnt_bf16_f32_m_untied:
+** (
+**	mov	z0\.d, z1\.d
+**	bfcvtnt	z0\.h, p0/m, z4\.s
+** |
+**	bfcvtnt	z1\.h, p0/m, z4\.s
+**	mov	z0\.d, z1\.d
+** )
+**	ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_m_untied, svbfloat16_t, svfloat32_t,
+	     z0 = svcvtnt_bf16_f32_m (z1, p0, z4),
+	     z0 = svcvtnt_bf16_m (z1, p0, z4))
+
+/*
+** cvtnt_bf16_f32_x_tied1:
+**	bfcvtnt	z0\.h, p0/m, z4\.s
+**	ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
+	     z0 = svcvtnt_bf16_f32_x (z0, p0, z4),
+	     z0 = svcvtnt_bf16_x (z0, p0, z4))
+
+/* Bad RA choice: no preferred output sequence.  */
+TEST_DUAL_Z_REV (cvtnt_bf16_f32_x_tied2, svbfloat16_t, svfloat32_t,
+		 z0_res = svcvtnt_bf16_f32_x (z4, p0, z0),
+		 z0_res = svcvtnt_bf16_x (z4, p0, z0))
+
+/*
+** cvtnt_bf16_f32_x_untied:
+** (
+**	mov	z0\.d, z1\.d
+**	bfcvtnt	z0\.h, p0/m, z4\.s
+** |
+**	bfcvtnt	z1\.h, p0/m, z4\.s
+**	mov	z0\.d, z1\.d
+** )
+**	ret
+*/
+TEST_DUAL_Z (cvtnt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
+	     z0 = svcvtnt_bf16_f32_x (z1, p0, z4),
+	     z0 = svcvtnt_bf16_x (z1, p0, z4))
+
+/*
+** ptrue_cvtnt_bf16_f32_x_tied1:
+**	...
+**	ptrue	p[0-9]+\.b[^\n]*
+**	...
+**	ret
+*/
+TEST_DUAL_Z (ptrue_cvtnt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t,
+	     z0 = svcvtnt_bf16_f32_x (z0, svptrue_b32 (), z4),
+	     z0 = svcvtnt_bf16_x (z0, svptrue_b32 (), z4))
+
+/* Bad RA choice: no preferred output sequence.  */
+TEST_DUAL_Z_REV (ptrue_cvtnt_bf16_f32_x_tied2, svbfloat16_t, svfloat32_t,
+		 z0_res = svcvtnt_bf16_f32_x (z4, svptrue_b32 (), z0),
+		 z0_res = svcvtnt_bf16_x (z4, svptrue_b32 (), z0))
+
+/*
+** ptrue_cvtnt_bf16_f32_x_untied:
+**	...
+**	ptrue	p[0-9]+\.b[^\n]*
+**	...
+**	ret
+*/
+TEST_DUAL_Z (ptrue_cvtnt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t,
+	     z0 = svcvtnt_bf16_f32_x (z1, svptrue_b32 (), z4),
+	     z0 = svcvtnt_bf16_x (z1, svptrue_b32 (), z4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c
new file mode 100644
index 0000000..a9233324
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+sve+bf16")
+
+void
+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+    svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, bfloat16_t bf)
+{
+  svbfmmla (f32, bf16); /* { dg-error {too few arguments to function 'svbfmmla'} } */
+  svbfmmla (f32, bf16, bf16, 0); /* { dg-error {too many arguments to function 'svbfmmla'} } */
+  svbfmmla (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfmmla', which expects an SVE vector type} } */
+  svbfmmla (pg, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svbool_t' arguments} } */
+  svbfmmla (u8, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svuint8_t' arguments} } */
+  svbfmmla (u16, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svuint16_t' arguments} } */
+  svbfmmla (f64, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svfloat64_t' arguments} } */
+  svbfmmla (f32, bf16, bf16);
+  svbfmmla (f32, 0, bf16); /* { dg-error {passing 'int' to argument 2 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+  svbfmmla (f32, f32, bf16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+  svbfmmla (f32, bf16, 0); /* { dg-error {passing 'int' to argument 3 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+  svbfmmla (f32, bf16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+  svbfmmla (f32, bf16, bf); /* { dg-error {passing 'bfloat16_t'[^\n]* to argument 3 of 'svbfmmla', which expects 'svbfloat16_t'} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c
new file mode 100644
index 0000000..23f027f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+sve+bf16")
+
+void
+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+    svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, int i)
+{
+  svbfmlalb_lane (f32, bf16, bf16); /* { dg-error {too few arguments to function 'svbfmlalb_lane'} } */
+  svbfmlalb_lane (f32, bf16, bf16, 0, 0); /* { dg-error {too many arguments to function 'svbfmlalb_lane'} } */
+  svbfmlalb_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfmlalb_lane', which expects an SVE vector type} } */
+  svbfmlalb_lane (pg, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svbool_t' arguments} } */
+  svbfmlalb_lane (u8, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svuint8_t' arguments} } */
+  svbfmlalb_lane (u16, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svuint16_t' arguments} } */
+  svbfmlalb_lane (f64, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svfloat64_t' arguments} } */
+  svbfmlalb_lane (f32, bf16, bf16, 0);
+  svbfmlalb_lane (f32, 0, bf16, 0); /* { dg-error {passing 'int' to argument 2 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */
+  svbfmlalb_lane (f32, f32, bf16, 0); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */
+  svbfmlalb_lane (f32, bf16, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */
+  svbfmlalb_lane (f32, bf16, f32, 0); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */
+  svbfmlalb_lane (f32, bf16, bf16, s32); /* { dg-error {argument 4 of 'svbfmlalb_lane' must be an integer constant expression} } */
+  svbfmlalb_lane (f32, bf16, bf16, i); /* { dg-error {argument 4 of 'svbfmlalb_lane' must be an integer constant expression} } */
+
+  svbfmlalb_lane (f32, bf16, bf16, 0);
+  svbfmlalb_lane (f32, bf16, bf16, 7);
+  svbfmlalb_lane (f32, bf16, bf16, 8); /* { dg-error {passing 8 to argument 4 of 'svbfmlalb_lane', which expects a value in the range \[0, 7\]} } */
+  svbfmlalb_lane (f32, bf16, bf16, -1); /* { dg-error {passing -1 to argument 4 of 'svbfmlalb_lane', which expects a value in the range \[0, 7\]} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c
new file mode 100644
index 0000000..4755ca7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+sve+bf16")
+
+void
+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+    svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, int i)
+{
+  svbfdot_lane (f32, bf16, bf16); /* { dg-error {too few arguments to function 'svbfdot_lane'} } */
+  svbfdot_lane (f32, bf16, bf16, 0, 0); /* { dg-error {too many arguments to function 'svbfdot_lane'} } */
+  svbfdot_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfdot_lane', which expects an SVE vector type} } */
+  svbfdot_lane (pg, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svbool_t' arguments} } */
+  svbfdot_lane (u8, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svuint8_t' arguments} } */
+  svbfdot_lane (u16, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svuint16_t' arguments} } */
+  svbfdot_lane (f64, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svfloat64_t' arguments} } */
+  svbfdot_lane (f32, bf16, bf16, 0);
+  svbfdot_lane (f32, 0, bf16, 0); /* { dg-error {passing 'int' to argument 2 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */
+  svbfdot_lane (f32, f32, bf16, 0); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */
+  svbfdot_lane (f32, bf16, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */
+  svbfdot_lane (f32, bf16, f32, 0); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */
+  svbfdot_lane (f32, bf16, bf16, s32); /* { dg-error {argument 4 of 'svbfdot_lane' must be an integer constant expression} } */
+  svbfdot_lane (f32, bf16, bf16, i); /* { dg-error {argument 4 of 'svbfdot_lane' must be an integer constant expression} } */
+
+  svbfdot_lane (f32, bf16, bf16, 0);
+  svbfdot_lane (f32, bf16, bf16, 3);
+  svbfdot_lane (f32, bf16, bf16, 4); /* { dg-error {passing 4 to argument 4 of 'svbfdot_lane', which expects a value in the range \[0, 3\]} } */
+  svbfdot_lane (f32, bf16, bf16, -1); /* { dg-error {passing -1 to argument 4 of 'svbfdot_lane', which expects a value in the range \[0, 3\]} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c
new file mode 100644
index 0000000..2d09a8e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+sve+bf16")
+
+void
+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+    svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, bfloat16_t bf)
+{
+  svbfdot (f32, bf16); /* { dg-error {too few arguments to function 'svbfdot'} } */
+  svbfdot (f32, bf16, bf16, 0); /* { dg-error {too many arguments to function 'svbfdot'} } */
+  svbfdot (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfdot', which expects an SVE vector type} } */
+  svbfdot (pg, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svbool_t' arguments} } */
+  svbfdot (u8, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svuint8_t' arguments} } */
+  svbfdot (u16, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svuint16_t' arguments} } */
+  svbfdot (f64, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svfloat64_t' arguments} } */
+  svbfdot (f32, bf16, bf16);
+  svbfdot (f32, 0, bf16); /* { dg-error {passing 'int' to argument 2 of 'svbfdot', which expects 'svbfloat16_t'} } */
+  svbfdot (f32, f32, bf16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfdot', which expects 'svbfloat16_t'} } */
+  svbfdot (f32, bf16, 0); /* { dg-error {invalid conversion to type 'bfloat16_t'} } */
+  svbfdot (f32, bf16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfdot', which expects 'svbfloat16_t'} } */
+  svbfdot (f32, bf16, bf);
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 6c06faf..5377d7b 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -8996,7 +8996,7 @@ proc check_effective_target_aarch64_tiny { } {
 # various architecture extensions via the .arch_extension pseudo-op.
 
 foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"
-			  "i8mm" "f32mm" "f64mm" } {
+			  "i8mm" "f32mm" "f64mm" "bf16" } {
     eval [string map [list FUNC $aarch64_ext] {
 	proc check_effective_target_aarch64_asm_FUNC_ok { } {
 	  if { [istarget aarch64*-*-*] } {