diff options
author | Claudio Bantaloukas <claudio.bantaloukas@arm.com> | 2024-11-29 12:52:45 +0000 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@arm.com> | 2024-11-29 12:52:45 +0000 |
commit | 441f8d637d77d4e666bb0424af2335b1c8780890 (patch) | |
tree | 57eebf7d08482f1197cbce687b6fef31d8cc8239 /gcc | |
parent | 538204079b2fc9145e0cae61aacda493e1037327 (diff) | |
download | gcc-441f8d637d77d4e666bb0424af2335b1c8780890.zip gcc-441f8d637d77d4e666bb0424af2335b1c8780890.tar.gz gcc-441f8d637d77d4e666bb0424af2335b1c8780890.tar.bz2 |
aarch64: add SVE2 FP8DOT2 and FP8DOT4 intrinsics
This patch adds support for the following intrinsics:
- svdot[_f32_mf8]_fpm
- svdot_lane[_f32_mf8]_fpm
- svdot[_f16_mf8]_fpm
- svdot_lane[_f16_mf8]_fpm
The first two are available under a combination of the FP8DOT4 and SVE2 features.
Alternatively under the SSVE_FP8DOT4 feature under streaming mode.
The final two are available under a combination of the FP8DOT2 and SVE2 features.
Alternatively under the SSVE_FP8DOT2 feature under streaming mode.
gcc/
* config/aarch64/aarch64-option-extensions.def
(fp8dot4, ssve-fp8dot4): Add new extensions.
(fp8dot2, ssve-fp8dot2): Likewise.
* config/aarch64/aarch64-sve-builtins-base.cc (svdot_impl): Support fp8.
(svdotprod_lane_impl): Likewise.
(svdot_lane): Provide an unspec for fp8 types.
* config/aarch64/aarch64-sve-builtins-shapes.cc
(ternary_mfloat8_def): Add new class.
(ternary_mfloat8): Add new shape.
(ternary_mfloat8_lane_group_selection_def): Add new class.
(ternary_mfloat8_lane_group_selection): Add new shape.
* config/aarch64/aarch64-sve-builtins-shapes.h
(ternary_mfloat8, ternary_mfloat8_lane_group_selection): Declare.
* config/aarch64/aarch64-sve-builtins-sve2.def
(svdot, svdot_lane): Add new DEF_SVE_FUNCTION_GS_FPM, twice to deal
with the combination of features providing support for 32 and 16 bit
floating point.
* config/aarch64/aarch64-sve2.md (@aarch64_sve_dot<mode>): Add new.
(@aarch64_sve_dot_lane<mode>): Likewise.
* config/aarch64/aarch64.h:
(TARGET_FP8DOT4, TARGET_SSVE_FP8DOT4): Add new defines.
(TARGET_FP8DOT2, TARGET_SSVE_FP8DOT2): Likewise.
* config/aarch64/iterators.md
(UNSPEC_DOT_FP8, UNSPEC_DOT_LANE_FP8): Add new unspecs.
* doc/invoke.texi: Document fp8dot4, fp8dot2, ssve-fp8dot4, ssve-fp8dot2
extensions.
gcc/testsuite/
* gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_1.c: Add new.
gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_lane_group_selection_1.c:
Likewise.
* gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c: Likewise.
* gcc.target/aarch64/sve2/acle/asm/dot_mf8.c: Likewise.
* lib/target-supports.exp: Add dg-require-effective-target support for
aarch64_asm_fp8dot2_ok, aarch64_asm_fp8dot4_ok,
aarch64_asm_ssve-fp8dot2_ok and aarch64_asm_ssve-fp8dot4_ok.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-option-extensions.def | 8 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve-builtins-base.cc | 56 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve-builtins-shapes.cc | 48 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve-builtins-shapes.h | 8 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve-builtins-sve2.def | 14 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-sve2.md | 41 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.h | 18 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 2 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_1.c | 33 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_lane_group_selection_1.c | 49 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c | 172 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_mf8.c | 101 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 3 |
14 files changed, 541 insertions, 24 deletions
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 002d5ab..90abb1c 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -251,6 +251,14 @@ AARCH64_OPT_EXTENSION("ssve-fp8fma", SSVE_FP8FMA, (SME2,FP8), (), (), "ssve-fp8f AARCH64_OPT_EXTENSION("faminmax", FAMINMAX, (SIMD), (), (), "faminmax") +AARCH64_OPT_EXTENSION("fp8dot4", FP8DOT4, (FP8FMA), (), (), "fp8dot4") + +AARCH64_OPT_EXTENSION("ssve-fp8dot4", SSVE_FP8DOT4, (SSVE_FP8FMA), (), (), "ssve-fp8dot4") + +AARCH64_OPT_EXTENSION("fp8dot2", FP8DOT2, (FP8DOT4), (), (), "fp8dot2") + +AARCH64_OPT_EXTENSION("ssve-fp8dot2", SSVE_FP8DOT2, (SSVE_FP8DOT4), (), (), "ssve-fp8dot2") + #undef AARCH64_OPT_FMV_EXTENSION #undef AARCH64_OPT_EXTENSION #undef AARCH64_FMV_FEATURE diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 95e66dc..b979419 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -838,21 +838,26 @@ public: rtx expand (function_expander &e) const override { - /* In the optab, the multiplication operands come before the accumulator - operand. The optab is keyed off the multiplication mode. */ - e.rotate_inputs_left (0, 3); insn_code icode; - if (e.type_suffix_ids[1] == NUM_TYPE_SUFFIXES) - icode = e.convert_optab_handler_for_sign (sdot_prod_optab, - udot_prod_optab, - 0, e.result_mode (), - GET_MODE (e.args[0])); + if (e.fpm_mode == aarch64_sve::FPM_set) + icode = code_for_aarch64_sve_dot (e.result_mode ()); else - icode = (e.type_suffix (0).float_p - ? CODE_FOR_aarch64_sve_fdotvnx4sfvnx8hf - : e.type_suffix (0).unsigned_p - ? CODE_FOR_udot_prodvnx4sivnx8hi - : CODE_FOR_sdot_prodvnx4sivnx8hi); + { + /* In the optab, the multiplication operands come before the accumulator + operand. The optab is keyed off the multiplication mode. */ + e.rotate_inputs_left (0, 3); + if (e.type_suffix_ids[1] == NUM_TYPE_SUFFIXES) + icode = e.convert_optab_handler_for_sign (sdot_prod_optab, + udot_prod_optab, + 0, e.result_mode (), + GET_MODE (e.args[0])); + else + icode = (e.type_suffix (0).float_p + ? CODE_FOR_aarch64_sve_fdotvnx4sfvnx8hf + : e.type_suffix (0).unsigned_p + ? CODE_FOR_udot_prodvnx4sivnx8hi + : CODE_FOR_sdot_prodvnx4sivnx8hi); + } return e.use_unpred_insn (icode); } }; @@ -865,17 +870,24 @@ public: rtx expand (function_expander &e) const override { + insn_code icode; machine_mode mode0 = GET_MODE (e.args[0]); machine_mode mode1 = GET_MODE (e.args[1]); - /* Use the same ordering as the dot_prod_optab, with the - accumulator last. */ - e.rotate_inputs_left (0, 4); - int unspec = unspec_for (e); - insn_code icode; - if (unspec == UNSPEC_FDOT) - icode = CODE_FOR_aarch64_fdot_prod_lanevnx4sfvnx8hf; + if (e.fpm_mode == aarch64_sve::FPM_set) + { + icode = code_for_aarch64_sve_dot_lane (mode0); + } else - icode = code_for_aarch64_dot_prod_lane (unspec, mode0, mode1); + { + /* Use the same ordering as the dot_prod_optab, with the + accumulator last. */ + e.rotate_inputs_left (0, 4); + int unspec = unspec_for (e); + if (unspec == UNSPEC_FDOT) + icode = CODE_FOR_aarch64_fdot_prod_lanevnx4sfvnx8hf; + else + icode = code_for_aarch64_dot_prod_lane (unspec, mode0, mode1); + } return e.use_exact_insn (icode); } }; @@ -3255,7 +3267,7 @@ FUNCTION (svdiv, svdiv_impl,) FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV)) FUNCTION (svdot, svdot_impl,) FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, - UNSPEC_FDOT)) + UNSPEC_FDOT, UNSPEC_DOT_LANE_FP8)) FUNCTION (svdup, svdup_impl,) FUNCTION (svdup_lane, svdup_lane_impl,) FUNCTION (svdupq, svdupq_impl,) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc index 94f4da8..cf3ddab 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc @@ -4005,6 +4005,34 @@ struct ternary_bfloat_def }; SHAPE (ternary_bfloat) +/* sv<t0>_t svfoo[_t0](sv<t0>_t, svmfloat8_t, svmfloat8_t). */ +struct ternary_mfloat8_def + : public ternary_resize2_base<8, TYPE_mfloat, TYPE_mfloat> +{ + void + build (function_builder &b, const function_group_info &group) const override + { + gcc_assert (group.fpm_mode == FPM_set); + b.add_overloaded_functions (group, MODE_none); + build_all (b, "v0,v0,vM,vM", group, MODE_none); + } + + tree + resolve (function_resolver &r) const override + { + type_suffix_index type; + if (!r.check_num_arguments (4) + || (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES + || !r.require_vector_type (1, VECTOR_TYPE_svmfloat8_t) + || !r.require_vector_type (2, VECTOR_TYPE_svmfloat8_t) + || !r.require_scalar_type (3, "uint64_t")) + return error_mark_node; + + return r.resolve_to (r.mode_suffix_id, type, TYPE_SUFFIX_mf8, GROUP_none); + } +}; +SHAPE (ternary_mfloat8) + /* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloat16_t, svbfloat16_t, uint64_t) where the final argument is an integer constant expression in the range @@ -4057,6 +4085,26 @@ struct ternary_mfloat8_lane_def }; SHAPE (ternary_mfloat8_lane) +/* sv<t0>_t svfoo[_t0](sv<t0>_t, svmfloat8_t, svmfloat8_t, uint64_t) + + where the final argument is an integer constant expression in the range + [0, 7] or [0, 3]. */ +struct ternary_mfloat8_lane_group_selection_def + : public ternary_mfloat8_lane_def +{ + bool + check (function_checker &c) const override + { + machine_mode mode = c.vector_mode (0); + if (mode == E_VNx8HFmode) + return c.require_immediate_lane_index (3, 2, 2); + else if (mode == E_VNx4SFmode) + return c.require_immediate_lane_index (3, 2, 4); + gcc_unreachable (); + } +}; +SHAPE (ternary_mfloat8_lane_group_selection) + /* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloatt16_t, svbfloat16_t) sv<t0>_t svfoo[_n_t0](sv<t0>_t, svbfloat16_t, bfloat16_t). */ struct ternary_bfloat_opt_n_def diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h index 1c8937a..c7e448c 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h @@ -71,7 +71,11 @@ namespace aarch64_sve scalar displacement". - "_pred" indicates that the function takes an svbool_t argument - that does not act as a governing predicate.. */ + that does not act as a governing predicate.. + + - "_group_selection" indicates that the function takes an imm integer + argument that selects a specific group of elements that fit a 128 bit + vector. */ namespace shapes { extern const function_shape *const adr_index; @@ -213,7 +217,9 @@ namespace aarch64_sve extern const function_shape *const ternary_lane_rotate; extern const function_shape *const ternary_long_lane; extern const function_shape *const ternary_long_opt_n; + extern const function_shape *const ternary_mfloat8; extern const function_shape *const ternary_mfloat8_lane; + extern const function_shape *const ternary_mfloat8_lane_group_selection; extern const function_shape *const ternary_mfloat8_opt_n; extern const function_shape *const ternary_opt_n; extern const function_shape *const ternary_qq_or_011_lane; diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def index b489e8f..082dec1 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def @@ -396,3 +396,17 @@ DEF_SVE_FUNCTION_GS_FPM (svmlallbb_lane, ternary_mfloat8_lane, s_float_mf8, none DEF_SVE_FUNCTION_GS_FPM (svmlallbt_lane, ternary_mfloat8_lane, s_float_mf8, none, none, set) DEF_SVE_FUNCTION_GS_FPM (svmlalltb_lane, ternary_mfloat8_lane, s_float_mf8, none, none, set) #undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS \ + streaming_compatible (AARCH64_FL_SVE2 | AARCH64_FL_FP8DOT4, \ + AARCH64_FL_SSVE_FP8DOT4) +DEF_SVE_FUNCTION_GS_FPM (svdot, ternary_mfloat8, s_float_mf8, none, none, set) +DEF_SVE_FUNCTION_GS_FPM (svdot_lane, ternary_mfloat8_lane_group_selection, s_float_mf8, none, none, set) +#undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS \ + streaming_compatible (AARCH64_FL_SVE2 | AARCH64_FL_FP8DOT2, \ + AARCH64_FL_SSVE_FP8DOT2) +DEF_SVE_FUNCTION_GS_FPM (svdot, ternary_mfloat8, h_float_mf8, none, none, set) +DEF_SVE_FUNCTION_GS_FPM (svdot_lane, ternary_mfloat8_lane_group_selection, h_float_mf8, none, none, set) +#undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index 5498eac..219e9fc 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -68,6 +68,7 @@ ;; ---- [INT] Shift-and-insert operations ;; ---- [INT] Sum of absolute differences ;; ---- [FP] Mfloat8 Multiply-and-accumulate operations +;; ---- [FP] Mfloat8 dot products ;; ;; == Extending arithmetic ;; ---- [INT] Multi-register widening conversions @@ -2074,6 +2075,46 @@ } ) +;; ------------------------------------------------------------------------- +;; ---- [FP] Mfloat8 dot products +;; ------------------------------------------------------------------------- +;; Includes: +;; - FDOT (4-way, vectors) +;; - FDOT (4-way, indexed) +;; - FDOT (2-way, vectors) +;; - FDOT (2-way, indexed) +;; ------------------------------------------------------------------------- +(define_insn "@aarch64_sve_dot<mode>" + [(set (match_operand:SVE_FULL_HSF 0 "register_operand") + (unspec:SVE_FULL_HSF + [(match_operand:SVE_FULL_HSF 1 "register_operand") + (match_operand:VNx16QI 2 "register_operand") + (match_operand:VNx16QI 3 "register_operand") + (reg:DI FPM_REGNUM)] + UNSPEC_DOT_FP8))] + "TARGET_SSVE_FP8DOT4 && !(<MODE>mode == VNx8HFmode && !TARGET_SSVE_FP8DOT2)" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] + [ w , 0 , w , w ; * ] fdot\t%0.<Vetype>, %2.b, %3.b + [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;fdot\t%0.<Vetype>, %2.b, %3.b + } +) + +(define_insn "@aarch64_sve_dot_lane<mode>" + [(set (match_operand:SVE_FULL_HSF 0 "register_operand") + (unspec:SVE_FULL_HSF + [(match_operand:SVE_FULL_HSF 1 "register_operand") + (match_operand:VNx16QI 2 "register_operand") + (match_operand:VNx16QI 3 "register_operand") + (match_operand:SI 4 "const_int_operand") + (reg:DI FPM_REGNUM)] + UNSPEC_DOT_LANE_FP8))] + "TARGET_SSVE_FP8DOT4 && !(<MODE>mode == VNx8HFmode && !TARGET_SSVE_FP8DOT2)" + {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] + [ w , 0 , w , y ; * ] fdot\t%0.<Vetype>, %2.b, %3.b[%4] + [ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;fdot\t%0.<Vetype>, %2.b, %3.b[%4] + } +) + ;; ========================================================================= ;; == Extending arithmetic ;; ========================================================================= diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 80a1fa4..53b4f88 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -527,6 +527,24 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED (((TARGET_SVE2 && TARGET_FP8FMA) || TARGET_STREAMING) \ && (AARCH64_HAVE_ISA (SSVE_FP8FMA) || TARGET_NON_STREAMING)) +/* fp8 four way dot product enabled through +fp8dot4. */ +#define TARGET_FP8DOT4 AARCH64_HAVE_ISA (FP8DOT4) + +/* Streaming versions of fp8 four way dot product instructions are enabled +through +ssve-fp8dot4. */ +#define TARGET_SSVE_FP8DOT4 ((\ + (TARGET_SVE2 && TARGET_FP8DOT4) || TARGET_STREAMING) \ + && (AARCH64_HAVE_ISA(SSVE_FP8DOT4) || TARGET_NON_STREAMING)) + +/* fp8 two way dot product enabled through +fp8dot2. */ +#define TARGET_FP8DOT2 AARCH64_HAVE_ISA (FP8DOT2) + +/* Streaming versions of fp8 two way dot product instructions are enabled +through +ssve-fp8dot2. */ +#define TARGET_SSVE_FP8DOT2 ((\ + (TARGET_SVE2 && TARGET_FP8DOT2) || TARGET_STREAMING) \ + && (AARCH64_HAVE_ISA(SSVE_FP8DOT2) || TARGET_NON_STREAMING)) + /* Standard register usage. */ /* 31 64-bit general purpose registers R0-R30: diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 4b265a7..4786b02 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -962,6 +962,8 @@ UNSPEC_COND_FCVTX ; Used in aarch64-sve2.md. UNSPEC_COND_FCVTXNT ; Used in aarch64-sve2.md. UNSPEC_COND_FLOGB ; Used in aarch64-sve2.md. + UNSPEC_DOT_FP8 ; Used in aarch64-sve2.md. + UNSPEC_DOT_LANE_FP8 ; Used in aarch64-sve2.md. UNSPEC_EORBT ; Used in aarch64-sve2.md. UNSPEC_EORTB ; Used in aarch64-sve2.md. UNSPEC_F1CVT ; Used in aarch64-sve2.md. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 2a4f016..626f7d2 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -21957,6 +21957,18 @@ Enable the fp8 (8-bit floating point) multiply accumulate extension. @item ssve-fp8fma Enable the fp8 (8-bit floating point) multiply accumulate extension in streaming mode. +@item fp8dot4 +Enable the fp8 (8-bit floating point) to single-precision 4-way dot product +extension. +@item ssve-fp8dot4 +Enable the fp8 (8-bit floating point) to single-precision 4-way dot product +extension in streaming mode. +@item fp8dot2 +Enable the fp8 (8-bit floating point) to half-precision 2-way dot product +extension. +@item ssve-fp8dot2 +Enable the fp8 (8-bit floating point) to half-precision 2-way dot product +extension in streaming mode. @item faminmax Enable the Floating Point Absolute Maximum/Minimum extension. @item sve-b16b16 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_1.c new file mode 100644 index 0000000..9ad789a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_1.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ + +#include <arm_sve.h> + +#pragma GCC target ("arch=armv8.2-a+sve2+fp8dot2") + +void +test (svfloat16_t f16, svmfloat8_t f8, fpm_t fpm, + svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, + svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, mfloat8_t f) +{ + svdot_fpm (f16, f8, f8, fpm); + svdot_fpm (f32, f8, f8, fpm); + + svdot_fpm (f16); /* { dg-error {too few arguments to function 'svdot_fpm'} } */ + svdot_fpm (f16, f8); /* { dg-error {too few arguments to function 'svdot_fpm'} } */ + svdot_fpm (f16, f8, f8); /* { dg-error {too few arguments to function 'svdot_fpm'} } */ + svdot_fpm (f8, f8, fpm); /* { dg-error {too few arguments to function 'svdot_fpm'} } */ + svdot_fpm (f16, f8, fpm); /* { dg-error {too few arguments to function 'svdot_fpm'} } */ + svdot_fpm (f16, f8, f8, fpm, 0); /* { dg-error {too many arguments to function 'svdot_fpm'} } */ + + svdot_fpm (0, f8, f8, fpm); /* { dg-error {passing 'int' to argument 1 of 'svdot_fpm', which expects an SVE type rather than a scalar} } */ + svdot_fpm (f16, f8, f, fpm); /* { dg-error {passing 'mfloat8_t' {aka '__mfp8'} to argument 3 of 'svdot_fpm', which expects 'svmfloat8_t'} } */ + svdot_fpm (pg, f8, f8, fpm); /* { dg-error {'svdot_fpm' has no form that takes 'svbool_t' and 'svmfloat8_t' arguments} } */ + svdot_fpm (u8, f8, f8, fpm); /* { dg-error {'svdot_fpm' has no form that takes 'svuint8_t' and 'svmfloat8_t' arguments} } */ + svdot_fpm (u16, f8, f8, fpm); /* { dg-error {'svdot_fpm' has no form that takes 'svuint16_t' and 'svmfloat8_t' arguments} } */ + svdot_fpm (f64, f8, f8, fpm); /* { dg-error {'svdot_fpm' has no form that takes 'svfloat64_t' and 'svmfloat8_t' arguments} } */ + svdot_fpm (f16, 0, f8, fpm); /* { dg-error {passing 'int' to argument 2 of 'svdot_fpm', which expects 'svmfloat8_t'} } */ + svdot_fpm (f16, f16, f8, fpm); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svdot_fpm', which expects 'svmfloat8_t'} } */ + svdot_fpm (f16, f8, 0, fpm); /* { dg-error {passing 'int' to argument 3 of 'svdot_fpm', which expects 'svmfloat8_t'} } */ + svdot_fpm (f16, f8, f16, fpm); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svdot_fpm', which expects 'svmfloat8_t'} } */ + svdot_fpm (f16, f8, f8, f8); /* { dg-error {passing 'svmfloat8_t' to argument 4 of 'svdot_fpm', which expects 'uint64_t'} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_lane_group_selection_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_lane_group_selection_1.c new file mode 100644 index 0000000..dec00e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_lane_group_selection_1.c @@ -0,0 +1,49 @@ +/* { dg-do compile } */ + +#include <arm_sve.h> + +#pragma GCC target ("arch=armv8.2-a+ssve-fp8fma+ssve-fp8dot2") + +void +f1 (svfloat16_t f16, svmfloat8_t f8, fpm_t fpm, + svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, + svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, mfloat8_t f, int i) + __arm_streaming +{ + svdot_lane_fpm (f32, f8, f8, 0, fpm); + svdot_lane_fpm (f32, f8, f8, 3, fpm); + svdot_lane_fpm (f16, f8, f8, 0, fpm); + svdot_lane_fpm (f16, f8, f8, 7, fpm); + + svdot_lane_fpm (f32, f8, f8, -1, fpm); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_fpm', which expects a value in the range \[0, 3\]} } */ + svdot_lane_fpm (f32, f8, f8, 4, fpm); /* { dg-error {passing 4 to argument 4 of 'svdot_lane_fpm', which expects a value in the range \[0, 3\]} } */ + svdot_lane_fpm (f16, f8, f8, -1, fpm); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_fpm', which expects a value in the range \[0, 7\]} } */ + svdot_lane_fpm (f16, f8, f8, 8, fpm); /* { dg-error {passing 8 to argument 4 of 'svdot_lane_fpm', which expects a value in the range \[0, 7\]} } */ + + svdot_lane_fpm (f16); /* { dg-error {too few arguments to function 'svdot_lane_fpm'} } */ + svdot_lane_fpm (f16, f8); /* { dg-error {too few arguments to function 'svdot_lane_fpm'} } */ + svdot_lane_fpm (f16, f8, f8); /* { dg-error {too few arguments to function 'svdot_lane_fpm'} } */ + svdot_lane_fpm (f16, f8, f8, 0); /* { dg-error {too few arguments to function 'svdot_lane_fpm'} } */ + svdot_lane_fpm (f16, f8, f8, fpm); /* { dg-error {too few arguments to function 'svdot_lane_fpm'} } */ + svdot_lane_fpm (f16, f8, 15, fpm); /* { dg-error {too few arguments to function 'svdot_lane_fpm'} } */ + svdot_lane_fpm (f8, f8, 15, fpm); /* { dg-error {too few arguments to function 'svdot_lane_fpm'} } */ + + svdot_lane_fpm (f16, f8, f8, 15, 0, fpm); /* { dg-error {too many arguments to function 'svdot_lane_fpm'} } */ + svdot_lane_fpm (f16, f8, f8, 15, fpm, fpm); /* { dg-error {too many arguments to function 'svdot_lane_fpm'} } */ + svdot_lane_fpm (f16, f8, f8, f8, 15, fpm); /* { dg-error {too many arguments to function 'svdot_lane_fpm'} } */ + svdot_lane_fpm (f16, f16, f8, f8, 15, fpm); /* { dg-error {too many arguments to function 'svdot_lane_fpm'} } */ + + svdot_lane_fpm (f32, bf16, bf16, 0, fpm); /* { dg-error {passing 'svbfloat16_t' to argument 2 of 'svdot_lane_fpm', which expects 'svmfloat8_t'} } */ + svdot_lane_fpm (0, f8, f8, 0, fpm); /* { dg-error {passing 'int' to argument 1 of 'svdot_lane_fpm', which expects an SVE type rather than a scalar} } */ + svdot_lane_fpm (pg, f8, f8, 0, fpm); /* { dg-error {'svdot_lane_fpm' has no form that takes 'svbool_t' and 'svmfloat8_t' arguments} } */ + svdot_lane_fpm (u8, f8, f8, 0, fpm); /* { dg-error {'svdot_lane_fpm' has no form that takes 'svuint8_t' and 'svmfloat8_t' arguments} } */ + svdot_lane_fpm (u16, f8, f8, 0, fpm); /* { dg-error {'svdot_lane_fpm' has no form that takes 'svuint16_t' and 'svmfloat8_t' arguments} } */ + svdot_lane_fpm (f64, f8, f8, 0, fpm); /* { dg-error {'svdot_lane_fpm' has no form that takes 'svfloat64_t' and 'svmfloat8_t' arguments} } */ + svdot_lane_fpm (f16, 0, f8, 0, fpm); /* { dg-error {passing 'int' to argument 2 of 'svdot_lane_fpm', which expects 'svmfloat8_t'} } */ + svdot_lane_fpm (f16, f32, f8, 0, fpm); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svdot_lane_fpm', which expects 'svmfloat8_t'} } */ + svdot_lane_fpm (f16, f8, 0, 0, fpm); /* { dg-error {passing 'int' to argument 3 of 'svdot_lane_fpm', which expects 'svmfloat8_t'} } */ + svdot_lane_fpm (f16, f8, f32, 0, fpm); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svdot_lane_fpm', which expects 'svmfloat8_t'} } */ + + svdot_lane_fpm (f16, f8, f8, s32, fpm); /* { dg-error {argument 4 of 'svdot_lane_fpm' must be an integer constant expression} } */ + svdot_lane_fpm (f16, f8, f8, i, fpm); /* { dg-error {argument 4 of 'svdot_lane_fpm' must be an integer constant expression} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c new file mode 100644 index 0000000..9e54cd1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c @@ -0,0 +1,172 @@ +/* { dg-do assemble { target aarch64_asm_fp8dot2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_fp8dot2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+fp8dot2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+ssve-fp8dot2" +#endif + +/* +** dot_lane_0_f16_tied1: +** msr fpmr, x0 +** fdot z0\.h, z4\.b, z5\.b\[0\] +** ret +*/ +TEST_DUAL_Z (dot_lane_0_f16_tied1, svfloat16_t, svmfloat8_t, + z0 = svdot_lane_f16_mf8_fpm (z0, z4, z5, 0, fpm0), + z0 = svdot_lane_fpm (z0, z4, z5, 0, fpm0)) + +/* +** dot_lane_0_f16_tied2: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fdot z0\.h, \1\.b, z1\.b\[0\] +** ret +*/ +TEST_DUAL_Z_REV (dot_lane_0_f16_tied2, svfloat16_t, svmfloat8_t, + z0_res = svdot_lane_f16_mf8_fpm (z4, z0, z1, 0, fpm0), + z0_res = svdot_lane_fpm (z4, z0, z1, 0, fpm0)) + +/* +** dot_lane_0_f16_tied3: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fdot z0\.h, z1\.b, \1\.b\[0\] +** ret +*/ +TEST_DUAL_Z_REV (dot_lane_0_f16_tied3, svfloat16_t, svmfloat8_t, + z0_res = svdot_lane_f16_mf8_fpm (z4, z1, z0, 0, fpm0), + z0_res = svdot_lane_fpm (z4, z1, z0, 0, fpm0)) + +/* +** dot_lane_0_f16_untied: +** msr fpmr, x0 +** movprfx z0, z1 +** fdot z0\.h, z4\.b, z5\.b\[0\] +** ret +*/ +TEST_DUAL_Z (dot_lane_0_f16_untied, svfloat16_t, svmfloat8_t, + z0 = svdot_lane_f16_mf8_fpm (z1, z4, z5, 0, fpm0), + z0 = svdot_lane_fpm (z1, z4, z5, 0, fpm0)) + +/* +** dot_lane_1_f16: +** msr fpmr, x0 +** fdot z0\.h, z4\.b, z5\.b\[1\] +** ret +*/ +TEST_DUAL_Z (dot_lane_1_f16, svfloat16_t, svmfloat8_t, + z0 = svdot_lane_f16_mf8_fpm (z0, z4, z5, 1, fpm0), + z0 = svdot_lane_fpm (z0, z4, z5, 1, fpm0)) + +/* +** dot_lane_z8_f16: +** ... +** msr fpmr, x0 +** mov (z[0-7])\.d, z8\.d +** fdot z0\.h, z1\.b, \1\.b\[1\] +** ldr d8, \[sp\], 32 +** ret +*/ +TEST_DUAL_LANE_REG (dot_lane_z8_f16, svfloat16_t, svmfloat8_t, z8, + z0 = svdot_lane_f16_mf8_fpm (z0, z1, z8, 1, fpm0), + z0 = svdot_lane_fpm (z0, z1, z8, 1, fpm0)) + +/* +** dot_lane_z16_f16: +** ... +** msr fpmr, x0 +** mov (z[0-7])\.d, z16\.d +** fdot z0\.h, z1\.b, \1\.b\[7\] +** ... +** ret +*/ +TEST_DUAL_LANE_REG (dot_lane_z16_f16, svfloat16_t, svmfloat8_t, z16, + z0 = svdot_lane_f16_mf8_fpm (z0, z1, z16, 7, fpm0), + z0 = svdot_lane_fpm (z0, z1, z16, 7, fpm0)) + +/* +** dot_lane_0_f32_tied1: +** msr fpmr, x0 +** fdot z0\.s, z4\.b, z5\.b\[0\] +** ret +*/ +TEST_DUAL_Z (dot_lane_0_f32_tied1, svfloat32_t, svmfloat8_t, + z0 = svdot_lane_f32_mf8_fpm (z0, z4, z5, 0, fpm0), + z0 = svdot_lane_fpm (z0, z4, z5, 0, fpm0)) + +/* +** dot_lane_0_f32_tied2: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fdot z0\.s, \1\.b, z1\.b\[0\] +** ret +*/ +TEST_DUAL_Z_REV (dot_lane_0_f32_tied2, svfloat32_t, svmfloat8_t, + z0_res = svdot_lane_f32_mf8_fpm (z4, z0, z1, 0, fpm0), + z0_res = svdot_lane_fpm (z4, z0, z1, 0, fpm0)) + +/* +** dot_lane_0_f32_tied3: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fdot z0\.s, z1\.b, \1\.b\[0\] +** ret +*/ +TEST_DUAL_Z_REV (dot_lane_0_f32_tied3, svfloat32_t, svmfloat8_t, + z0_res = svdot_lane_f32_mf8_fpm (z4, z1, z0, 0, fpm0), + z0_res = svdot_lane_fpm (z4, z1, z0, 0, fpm0)) + +/* +** dot_lane_0_f32_untied: +** msr fpmr, x0 +** movprfx z0, z1 +** fdot z0\.s, z4\.b, z5\.b\[0\] +** ret +*/ +TEST_DUAL_Z (dot_lane_0_f32_untied, svfloat32_t, svmfloat8_t, + z0 = svdot_lane_f32_mf8_fpm (z1, z4, z5, 0, fpm0), + z0 = svdot_lane_fpm (z1, z4, z5, 0, fpm0)) + +/* +** dot_lane_1_f32: +** msr fpmr, x0 +** fdot z0\.s, z4\.b, z5\.b\[1\] +** ret +*/ +TEST_DUAL_Z (dot_lane_1_f32, svfloat32_t, svmfloat8_t, + z0 = svdot_lane_f32_mf8_fpm (z0, z4, z5, 1, fpm0), + z0 = svdot_lane_fpm (z0, z4, z5, 1, fpm0)) + +/* +** dot_lane_z8_f32: +** ... +** msr fpmr, x0 +** mov (z[0-7])\.d, z8\.d +** fdot z0\.s, z1\.b, \1\.b\[1\] +** ldr d8, \[sp\], 32 +** ret +*/ +TEST_DUAL_LANE_REG (dot_lane_z8_f32, svfloat32_t, svmfloat8_t, z8, + z0 = svdot_lane_f32_mf8_fpm (z0, z1, z8, 1, fpm0), + z0 = svdot_lane_fpm (z0, z1, z8, 1, fpm0)) + +/* +** dot_lane_z32_f32: +** ... +** msr fpmr, x0 +** mov (z[0-7])\.d, z16\.d +** fdot z0\.s, z1\.b, \1\.b\[3\] +** ... +** ret +*/ +TEST_DUAL_LANE_REG (dot_lane_z32_f32, svfloat32_t, svmfloat8_t, z16, + z0 = svdot_lane_f32_mf8_fpm (z0, z1, z16, 3, fpm0), + z0 = svdot_lane_fpm (z0, z1, z16, 3, fpm0)) diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_mf8.c new file mode 100644 index 0000000..12e28e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_mf8.c @@ -0,0 +1,101 @@ +/* { dg-do assemble { target aarch64_asm_fp8dot2_ok } } */ +/* { dg-do compile { target { ! aarch64_asm_fp8dot2_ok } } } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include "test_sve_acle.h" + +#pragma GCC target "+fp8dot2" +#ifdef STREAMING_COMPATIBLE +#pragma GCC target "+ssve-fp8dot2" +#endif + +/* +** dot_f16_mf8_tied1: +** msr fpmr, x0 +** fdot z0\.h, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (dot_f16_mf8_tied1, svfloat16_t, svmfloat8_t, + z0 = svdot_f16_mf8_fpm (z0, z4, z5, fpm0), + z0 = svdot_fpm (z0, z4, z5, fpm0)) + +/* +** dot_f16_mf8_tied2: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fdot z0\.h, \1\.b, z1\.b +** ret +*/ +TEST_DUAL_Z_REV (dot_f16_mf8_tied2, svfloat16_t, svmfloat8_t, + z0_res = svdot_f16_mf8_fpm (z4, z0, z1, fpm0), + z0_res = svdot_fpm (z4, z0, z1, fpm0)) + +/* +** dot_f16_mf8_tied3: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fdot z0\.h, z1\.b, \1\.b +** ret +*/ +TEST_DUAL_Z_REV (dot_f16_mf8_tied3, svfloat16_t, svmfloat8_t, + z0_res = svdot_f16_mf8_fpm (z4, z1, z0, fpm0), + z0_res = svdot_fpm (z4, z1, z0, fpm0)) + +/* +** dot_f16_mf8_untied: +** msr fpmr, x0 +** movprfx z0, z1 +** fdot z0\.h, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (dot_f16_mf8_untied, svfloat16_t, svmfloat8_t, + z0 = svdot_f16_mf8_fpm (z1, z4, z5, fpm0), + z0 = svdot_fpm (z1, z4, z5, fpm0)) + +/* +** dot_f32_mf8_tied1: +** msr fpmr, x0 +** fdot z0\.s, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (dot_f32_mf8_tied1, svfloat32_t, svmfloat8_t, + z0 = svdot_f32_mf8_fpm (z0, z4, z5, fpm0), + z0 = svdot_fpm (z0, z4, z5, fpm0)) + +/* +** dot_f32_mf8_tied2: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fdot z0\.s, \1\.b, z1\.b +** ret +*/ +TEST_DUAL_Z_REV (dot_f32_mf8_tied2, svfloat32_t, svmfloat8_t, + z0_res = svdot_f32_mf8_fpm (z4, z0, z1, fpm0), + z0_res = svdot_fpm (z4, z0, z1, fpm0)) + +/* +** dot_f32_mf8_tied3: +** msr fpmr, x0 +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0, z4 +** fdot z0\.s, z1\.b, \1\.b +** ret +*/ +TEST_DUAL_Z_REV (dot_f32_mf8_tied3, svfloat32_t, svmfloat8_t, + z0_res = svdot_f32_mf8_fpm (z4, z1, z0, fpm0), + z0_res = svdot_fpm (z4, z1, z0, fpm0)) + +/* +** dot_f32_mf8_untied: +** msr fpmr, x0 +** movprfx z0, z1 +** fdot z0\.s, z4\.b, z5\.b +** ret +*/ +TEST_DUAL_Z (dot_f32_mf8_untied, svfloat32_t, svmfloat8_t, + z0 = svdot_f32_mf8_fpm (z1, z4, z5, fpm0), + z0 = svdot_fpm (z1, z4, z5, fpm0)) + diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index a122178..95acd09 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -12141,7 +12141,8 @@ foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve" "i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" "ls64" "sme" "sme-i16i64" "sme2" "sve-b16b16" "sme-b16b16" "sme-f16f16" "sme2p1" "fp8" "fp8fma" - "ssve-fp8fma" } { + "ssve-fp8fma" "fp8dot2" "ssve-fp8dot2" "fp8dot4" + "ssve-fp8dot4"} { eval [string map [list FUNC $aarch64_ext] { proc check_effective_target_aarch64_asm_FUNC_ok { } { if { [istarget aarch64*-*-*] } { |