diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-builtins.cc | 107 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-c.cc | 4 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-option-extensions.def | 4 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-pragma-builtins.def | 39 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 58 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.h | 6 | ||||
-rw-r--r-- | gcc/config/aarch64/iterators.md | 19 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpmdot.c | 77 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpmdot.c | 77 |
10 files changed, 380 insertions, 15 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index 9b7280a..a71c8c9 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -780,7 +780,7 @@ typedef struct AARCH64_SIMD_BUILTIN_##T##_##N##A, #undef ENTRY -#define ENTRY(N, S, M0, M1, M2, M3, USES_FPMR, U) \ +#define ENTRY(N, S, M0, M1, M2, M3, M4, USES_FPMR, U) \ AARCH64_##N, enum aarch64_builtins @@ -1590,9 +1590,10 @@ aarch64_init_simd_builtin_functions (bool called_from_pragma) enum class aarch64_builtin_signatures { + unary, binary, ternary, - unary, + quaternary, }; namespace { @@ -1617,6 +1618,7 @@ namespace simd_types { constexpr simd_type s16q { V8HImode, qualifier_none }; constexpr simd_type u16q { V8HImode, qualifier_unsigned }; + constexpr simd_type s32_index { SImode, qualifier_lane_index }; constexpr simd_type s32 { V2SImode, qualifier_none }; constexpr simd_type s32q { V4SImode, qualifier_none }; @@ -1642,10 +1644,10 @@ namespace simd_types { } #undef ENTRY -#define ENTRY(N, S, T0, T1, T2, T3, USES_FPMR, U) \ +#define ENTRY(N, S, T0, T1, T2, T3, T4, USES_FPMR, U) \ {#N, aarch64_builtin_signatures::S, simd_types::T0, simd_types::T1, \ - simd_types::T2, simd_types::T3, U, USES_FPMR, \ - aarch64_required_extensions::REQUIRED_EXTENSIONS}, + simd_types::T2, simd_types::T3, simd_types::T4, U, \ + USES_FPMR, aarch64_required_extensions::REQUIRED_EXTENSIONS}, /* Initialize pragma builtins. */ @@ -1653,7 +1655,7 @@ struct aarch64_pragma_builtins_data { const char *name; aarch64_builtin_signatures signature; - simd_type types[4]; + simd_type types[5]; int unspec; bool uses_fpmr; aarch64_required_extensions required_extensions; @@ -1672,6 +1674,8 @@ aarch64_get_number_of_args (const aarch64_pragma_builtins_data &builtin_data) return 2; else if (builtin_data.signature == aarch64_builtin_signatures::ternary) return 3; + else if (builtin_data.signature == aarch64_builtin_signatures::quaternary) + return 4; else // No other signature supported. gcc_unreachable (); @@ -2504,6 +2508,72 @@ aarch64_general_required_extensions (unsigned int code) return ext::streaming_compatible (0); } +namespace function_checker { + +void +require_integer_constant (location_t location, tree arg) +{ + if (TREE_CODE (arg) != INTEGER_CST) + { + error_at (location, "Constant-type integer argument expected"); + return; + } +} + +void +require_immediate_range (location_t location, tree arg, HOST_WIDE_INT min, + HOST_WIDE_INT max) +{ + if (wi::to_widest (arg) < min || wi::to_widest (arg) > max) + { + error_at (location, "lane out of range %wd - %wd", min, max); + return; + } +} + +/* Validates indexing into a vector using the index's size and the instruction, + where instruction is represented by the unspec. + This only works for intrinsics declared using pragmas in + aarch64-simd-pragma-builtins.def. */ + +void +check_simd_lane_bounds (location_t location, const aarch64_pragma_builtins_data + *builtin_data, tree *args) +{ + if (builtin_data == NULL) + // Don't check for functions that are not declared in + // aarch64-simd-pragma-builtins.def. + return; + + auto nargs = aarch64_get_number_of_args (*builtin_data); + switch (builtin_data->unspec) + { + case UNSPEC_VDOT2: + case UNSPEC_VDOT4: + { + if (builtin_data->types[nargs].qualifiers != qualifier_lane_index) + break; + + auto index_arg = args[nargs - 1]; + require_integer_constant (location, index_arg); + + auto vector_to_index_mode = builtin_data->types[nargs - 1].mode; + int vector_to_index_mode_size + = GET_MODE_NUNITS (vector_to_index_mode).to_constant (); + + auto low = 0; + int high + = builtin_data->unspec == UNSPEC_VDOT2 + ? vector_to_index_mode_size / 2 - 1 + : vector_to_index_mode_size / 4 - 1; + require_immediate_range (location, index_arg, low, high); + break; + } + } +} + +}; + bool aarch64_general_check_builtin_call (location_t location, vec<location_t>, unsigned int code, tree fndecl, @@ -2515,6 +2585,9 @@ aarch64_general_check_builtin_call (location_t location, vec<location_t>, if (!aarch64_check_required_extensions (location, decl, required_extensions)) return false; + auto builtin_data = aarch64_get_pragma_builtin (code); + function_checker::check_simd_lane_bounds (location, builtin_data, args); + switch (code) { case AARCH64_RSR: @@ -3477,6 +3550,28 @@ aarch64_expand_pragma_builtin (tree exp, rtx target, expand_insn (icode, nargs + 1, ops); break; + case UNSPEC_VDOT2: + case UNSPEC_VDOT4: + if (builtin_data->signature == aarch64_builtin_signatures::ternary) + icode = code_for_aarch64 (builtin_data->unspec, + builtin_data->types[0].mode, + builtin_data->types[1].mode, + builtin_data->types[2].mode, + builtin_data->types[3].mode); + else if + (builtin_data->signature == aarch64_builtin_signatures::quaternary) + icode = code_for_aarch64 (builtin_data->unspec, + builtin_data->types[0].mode, + builtin_data->types[1].mode, + builtin_data->types[2].mode, + builtin_data->types[3].mode, + builtin_data->types[4].mode); + else + gcc_unreachable (); + + expand_insn (icode, nargs + 1, ops); + break; + default: gcc_unreachable (); } diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index b13366b..ae1472e 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -260,6 +260,10 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) aarch64_def_or_undef (TARGET_FP8, "__ARM_FEATURE_FP8", pfile); + aarch64_def_or_undef (TARGET_FP8DOT2, "__ARM_FEATURE_FP8DOT2", pfile); + + aarch64_def_or_undef (TARGET_FP8DOT4, "__ARM_FEATURE_FP8DOT4", pfile); + aarch64_def_or_undef (TARGET_LS64, "__ARM_FEATURE_LS64", pfile); aarch64_def_or_undef (TARGET_RCPC, "__ARM_FEATURE_RCPC", pfile); diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index c9d419a..44d2e18 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -236,6 +236,10 @@ AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs") AARCH64_OPT_EXTENSION("fp8", FP8, (SIMD), (), (), "fp8") +AARCH64_OPT_EXTENSION("fp8dot2", FP8DOT2, (SIMD), (), (), "fp8dot2") + +AARCH64_OPT_EXTENSION("fp8dot4", FP8DOT4, (SIMD), (), (), "fp8dot4") + AARCH64_OPT_EXTENSION("faminmax", FAMINMAX, (SIMD), (), (), "faminmax") #undef AARCH64_OPT_FMV_EXTENSION diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def index 91897cf..4a94a66 100644 --- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def @@ -20,20 +20,33 @@ #undef ENTRY_BINARY -#define ENTRY_BINARY(N, T0, T1, T2, U) \ - ENTRY (N, binary, T0, T1, T2, none, false, U) +#define ENTRY_BINARY(N, T0, T1, T2, U) \ + ENTRY (N, binary, T0, T1, T2, none, none, false, U) #undef ENTRY_BINARY_FPM -#define ENTRY_BINARY_FPM(N, T0, T1, T2, U) \ - ENTRY (N, binary, T0, T1, T2, none, true, U) +#define ENTRY_BINARY_FPM(N, T0, T1, T2, U) \ + ENTRY (N, binary, T0, T1, T2, none, none, true, U) #undef ENTRY_TERNARY_FPM -#define ENTRY_TERNARY_FPM(N, T0, T1, T2, T3, U) \ - ENTRY (N, ternary, T0, T1, T2, T3, true, U) +#define ENTRY_TERNARY_FPM(N, T0, T1, T2, T3, U) \ + ENTRY (N, ternary, T0, T1, T2, T3, none, true, U) + +#undef ENTRY_TERNARY_FPM_LANE +#define ENTRY_TERNARY_FPM_LANE(N, T0, T1, T2, T3, U) \ + ENTRY (N, quaternary, T0, T1, T2, T3, s32_index, true, U) #undef ENTRY_UNARY_FPM -#define ENTRY_UNARY_FPM(N, T0, T1, U) \ - ENTRY (N, unary, T0, T1, none, none, true, U) +#define ENTRY_UNARY_FPM(N, T0, T1, U) \ + ENTRY (N, unary, T0, T1, none, none, none, true, U) + +#undef ENTRY_VDOT_FPM +#define ENTRY_VDOT_FPM(T, U) \ + ENTRY_TERNARY_FPM (vdot_##T##_mf8_fpm, T, T, f8, f8, U) \ + ENTRY_TERNARY_FPM (vdotq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U) \ + ENTRY_TERNARY_FPM_LANE (vdot_lane_##T##_mf8_fpm, T, T, f8, f8, U) \ + ENTRY_TERNARY_FPM_LANE (vdot_laneq_##T##_mf8_fpm, T, T, f8, f8q, U) \ + ENTRY_TERNARY_FPM_LANE (vdotq_lane_##T##_mf8_fpm, T##q, T##q, f8q, f8, U) \ + ENTRY_TERNARY_FPM_LANE (vdotq_laneq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U) #undef ENTRY_VHSDF #define ENTRY_VHSDF(NAME, UNSPEC) \ @@ -83,3 +96,13 @@ ENTRY_TERNARY_FPM (vcvt_high_mf8_f32_fpm, f8q, f8, f32q, f32q, UNSPEC_VCVT_HIGH) #define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8) ENTRY_VHSDF_VHSDI (vscale, UNSPEC_FSCALE) #undef REQUIRED_EXTENSIONS + +// fpm dot2 product +#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8DOT2) +ENTRY_VDOT_FPM (f16, UNSPEC_VDOT2) +#undef REQUIRED_EXTENSIONS + +// fpm dot4 product +#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8DOT4) +ENTRY_VDOT_FPM (f32, UNSPEC_VDOT4) +#undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index f843746..7b97486 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -10097,3 +10097,61 @@ "TARGET_FP8" "<fpm_uns_op>\t%0.<VHSDF:Vtype>, %1.<VHSDF:Vtype>, %2.<VHSDI:Vtype>" ) + +;; fpm vdot2 instructions. +(define_insn + "@aarch64_<fpm_uns_op><VHF:mode><VHF:mode><VB:mode><VB:mode>" + [(set (match_operand:VHF 0 "register_operand" "=w") + (unspec:VHF + [(match_operand:VHF 1 "register_operand" "w") + (match_operand:VB 2 "register_operand" "w") + (match_operand:VB 3 "register_operand" "w") + (reg:DI FPM_REGNUM)] + FPM_VDOT2_UNS))] + "TARGET_FP8DOT2" + "<fpm_uns_op>\t%1.<VHF:Vtype>, %2.<VB:Vtype>, %3.<VB:Vtype>" +) + +;; fpm vdot2 instructions with lane. +(define_insn + "@aarch64_<fpm_uns_op><VHF:mode><VHF:mode><VB:mode><VB2:mode><SI_ONLY:mode>" + [(set (match_operand:VHF 0 "register_operand" "=w") + (unspec:VHF + [(match_operand:VHF 1 "register_operand" "w") + (match_operand:VB 2 "register_operand" "w") + (match_operand:VB2 3 "register_operand" "w") + (match_operand:SI_ONLY 4 "const_int_operand" "n") + (reg:DI FPM_REGNUM)] + FPM_VDOT2_UNS))] + "TARGET_FP8DOT2" + "<fpm_uns_op>\t%1.<VHF:Vtype>, %2.<VB:Vtype>, %3.<VHF:Vdotlanetype>[%4]" +) + +;; fpm vdot4 instructions. +(define_insn + "@aarch64_<fpm_uns_op><VDQSF:mode><VDQSF:mode><VB:mode><VB:mode>" + [(set (match_operand:VDQSF 0 "register_operand" "=w") + (unspec:VDQSF + [(match_operand:VDQSF 1 "register_operand" "w") + (match_operand:VB 2 "register_operand" "w") + (match_operand:VB 3 "register_operand" "w") + (reg:DI FPM_REGNUM)] + FPM_VDOT4_UNS))] + "TARGET_FP8DOT4" + "<fpm_uns_op>\t%1.<VDQSF:Vtype>, %2.<VB:Vtype>, %3.<VB:Vtype>" +) + +;; fpm vdot4 instructions with lane. +(define_insn + "@aarch64_<fpm_uns_op><VDQSF:mode><VDQSF:mode><VB:mode><VB2:mode><SI_ONLY:mode>" + [(set (match_operand:VDQSF 0 "register_operand" "=w") + (unspec:VDQSF + [(match_operand:VDQSF 1 "register_operand" "w") + (match_operand:VB 2 "register_operand" "w") + (match_operand:VB2 3 "register_operand" "w") + (match_operand:SI_ONLY 4 "const_int_operand" "n") + (reg:DI FPM_REGNUM)] + FPM_VDOT4_UNS))] + "TARGET_FP8DOT4" + "<fpm_uns_op>\t%1.<VDQSF:Vtype>, %2.<VB:Vtype>, %3.<VDQSF:Vdotlanetype>[%4]" +) diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index f07b2c4..c50a578 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -494,6 +494,12 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED ((TARGET_SVE2p1 || TARGET_STREAMING) \ && (TARGET_SME2 || TARGET_NON_STREAMING)) +/* fp8 dot product instructions are enabled through +fp8dot2. */ +#define TARGET_FP8DOT2 AARCH64_HAVE_ISA (FP8DOT2) + +/* fp8 dot product instructions are enabled through +fp8dot4. */ +#define TARGET_FP8DOT4 AARCH64_HAVE_ISA (FP8DOT4) + /* Standard register usage. */ /* 31 64-bit general purpose registers R0-R30: diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index bdd276b..8c03dcd 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -163,6 +163,10 @@ ;; Advanced SIMD Float modes. (define_mode_iterator VDQF [V2SF V4SF V2DF]) + +(define_mode_iterator VHF [(V4HF "TARGET_SIMD_F16INST") + (V8HF "TARGET_SIMD_F16INST")]) + (define_mode_iterator VHSDF [(V4HF "TARGET_SIMD_F16INST") (V8HF "TARGET_SIMD_F16INST") V2SF V4SF V2DF]) @@ -321,6 +325,7 @@ ;; All byte modes. (define_mode_iterator VB [V8QI V16QI]) +(define_mode_iterator VB2 [VB]) ;; 1 and 2 lane DI and DF modes. (define_mode_iterator V12DIF [V1DI V1DF V2DI V2DF]) @@ -764,6 +769,8 @@ UNSPEC_VCVT2 ; Used in aarch64-simd.md. UNSPEC_VCVT2_HIGH ; Used in aarch64-simd.md. UNSPEC_VCVT2_LOW ; Used in aarch64-simd.md. + UNSPEC_VDOT2 ; Used in aarch64-simd.md. + UNSPEC_VDOT4 ; Used in aarch64-simd.md. UNSPEC_TBL ; Used in vector permute patterns. UNSPEC_TBLQ ; Used in vector permute patterns. UNSPEC_TBX ; Used in vector permute patterns. @@ -2491,6 +2498,11 @@ (VNx8HF ".h") (VNx16HF "") (VNx32HF "") (VNx8HI ".h") (VNx16HI "") (VNx32HI "")]) + +;; Lane index suffix for fp8 vdot operations depends on the output mode +(define_mode_attr Vdotlanetype [(V4HF "2b") (V8HF "2b") + (V2SF "4b") (V4SF "4b")]) + ;; The number of bytes controlled by a predicate (define_mode_attr data_bytes [(VNx16BI "1") (VNx8BI "2") (VNx4BI "4") (VNx2BI "8")]) @@ -4720,7 +4732,12 @@ (UNSPEC_VCVT2_HIGH "f2cvtl2") (UNSPEC_VCVT2_LOW "f2cvtl")]) +(define_int_iterator FPM_VDOT2_UNS [UNSPEC_VDOT2]) +(define_int_iterator FPM_VDOT4_UNS [UNSPEC_VDOT4]) + (define_int_attr fpm_uns_op [(UNSPEC_FSCALE "fscale") (UNSPEC_VCVT "fcvtn") - (UNSPEC_VCVT_HIGH "fcvtn2")]) + (UNSPEC_VCVT_HIGH "fcvtn2") + (UNSPEC_VDOT2 "fdot") + (UNSPEC_VDOT4 "fdot")]) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 4a494f6..bc3f742 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -21807,6 +21807,10 @@ Enable support for Armv8.9-a/9.4-a translation hardening extension. Enable the RCpc3 (Release Consistency) extension. @item fp8 Enable the fp8 (8-bit floating point) extension. +@item fp8dot2 +Enable the fp8dot2 (8-bit floating point dot product) extension. +@item fp8dot4 +Enable the fp8dot4 (8-bit floating point dot product) extension. @item faminmax Enable the Floating Point Absolute Maximum/Minimum extension. diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpmdot.c b/gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpmdot.c new file mode 100644 index 0000000..3e888a6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpmdot.c @@ -0,0 +1,77 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -march=armv9-a+fp8dot2" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "arm_neon.h" + +/* +** test_vdot_f16_fpm: +** msr fpmr, x0 +** fdot v0.4h, v1.8b, v2.8b +** ret +*/ +float16x4_t +test_vdot_f16_fpm (float16x4_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d) +{ + return vdot_f16_mf8_fpm (a, b, c, d); +} + +/* +** test_vdotq_f16_fpm: +** msr fpmr, x0 +** fdot v0.8h, v1.16b, v2.16b +** ret +*/ +float16x8_t +test_vdotq_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vdotq_f16_mf8_fpm (a, b, c, d); +} + +/* +** test_vdot_lane_f16_fpm: +** msr fpmr, x0 +** fdot v0.4h, v1.8b, v2.2b\[1\] +** ret +*/ +float16x4_t +test_vdot_lane_f16_fpm (float16x4_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d) +{ + return vdot_lane_f16_mf8_fpm (a, b, c, 1, d); +} + +/* +** test_vdot_laneq_f16_fpm: +** msr fpmr, x0 +** fdot v0.4h, v1.8b, v2.2b\[1\] +** ret +*/ +float16x4_t +test_vdot_laneq_f16_fpm (float16x4_t a, mfloat8x8_t b, mfloat8x16_t c, fpm_t d) +{ + return vdot_laneq_f16_mf8_fpm (a, b, c, 1, d); +} + +/* +** test_vdotq_lane_f16_fpm: +** msr fpmr, x0 +** fdot v0.8h, v1.16b, v2.2b\[1\] +** ret +*/ +float16x8_t +test_vdotq_lane_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d) +{ + return vdotq_lane_f16_mf8_fpm (a, b, c, 1, d); +} + +/* +** test_vdotq_laneq_f16_fpm: +** msr fpmr, x0 +** fdot v0.8h, v1.16b, v2.2b\[1\] +** ret +*/ +float16x8_t +test_vdotq_laneq_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vdotq_laneq_f16_mf8_fpm (a, b, c, 1, d); +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpmdot.c b/gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpmdot.c new file mode 100644 index 0000000..f03dd0a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpmdot.c @@ -0,0 +1,77 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -march=armv9-a+fp8dot4" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "arm_neon.h" + +/* +** test_vdot_f32_fpm: +** msr fpmr, x0 +** fdot v0.2s, v1.8b, v2.8b +** ret +*/ +float32x2_t +test_vdot_f32_fpm (float32x2_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d) +{ + return vdot_f32_mf8_fpm (a, b, c, d); +} + +/* +** test_vdotq_f32_fpm: +** msr fpmr, x0 +** fdot v0.4s, v1.16b, v2.16b +** ret +*/ +float32x4_t +test_vdotq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vdotq_f32_mf8_fpm (a, b, c, d); +} + +/* +** test_vdot_lane_f32_fpm: +** msr fpmr, x0 +** fdot v0.2s, v1.8b, v2.4b\[1\] +** ret +*/ +float32x2_t +test_vdot_lane_f32_fpm (float32x2_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d) +{ + return vdot_lane_f32_mf8_fpm (a, b, c, 1, d); +} + +/* +** test_vdot_laneq_f32_fpm: +** msr fpmr, x0 +** fdot v0.2s, v1.8b, v2.4b\[1\] +** ret +*/ +float32x2_t +test_vdot_laneq_f32_fpm (float32x2_t a, mfloat8x8_t b, mfloat8x16_t c, fpm_t d) +{ + return vdot_laneq_f32_mf8_fpm (a, b, c, 1, d); +} + +/* +** test_vdotq_lane_f32_fpm: +** msr fpmr, x0 +** fdot v0.4s, v1.16b, v2.4b\[1\] +** ret +*/ +float32x4_t +test_vdotq_lane_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d) +{ + return vdotq_lane_f32_mf8_fpm (a, b, c, 1, d); +} + +/* +** test_vdotq_laneq_f32_fpm: +** msr fpmr, x0 +** fdot v0.4s, v1.16b, v2.4b\[1\] +** ret +*/ +float32x4_t +test_vdotq_laneq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d) +{ + return vdotq_laneq_f32_mf8_fpm (a, b, c, 1, d); +} |