aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-builtins.cc107
-rw-r--r--gcc/config/aarch64/aarch64-c.cc4
-rw-r--r--gcc/config/aarch64/aarch64-option-extensions.def4
-rw-r--r--gcc/config/aarch64/aarch64-simd-pragma-builtins.def39
-rw-r--r--gcc/config/aarch64/aarch64-simd.md58
-rw-r--r--gcc/config/aarch64/aarch64.h6
-rw-r--r--gcc/config/aarch64/iterators.md19
-rw-r--r--gcc/doc/invoke.texi4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpmdot.c77
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpmdot.c77
10 files changed, 380 insertions, 15 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
index 9b7280a..a71c8c9 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -780,7 +780,7 @@ typedef struct
AARCH64_SIMD_BUILTIN_##T##_##N##A,
#undef ENTRY
-#define ENTRY(N, S, M0, M1, M2, M3, USES_FPMR, U) \
+#define ENTRY(N, S, M0, M1, M2, M3, M4, USES_FPMR, U) \
AARCH64_##N,
enum aarch64_builtins
@@ -1590,9 +1590,10 @@ aarch64_init_simd_builtin_functions (bool called_from_pragma)
enum class aarch64_builtin_signatures
{
+ unary,
binary,
ternary,
- unary,
+ quaternary,
};
namespace {
@@ -1617,6 +1618,7 @@ namespace simd_types {
constexpr simd_type s16q { V8HImode, qualifier_none };
constexpr simd_type u16q { V8HImode, qualifier_unsigned };
+ constexpr simd_type s32_index { SImode, qualifier_lane_index };
constexpr simd_type s32 { V2SImode, qualifier_none };
constexpr simd_type s32q { V4SImode, qualifier_none };
@@ -1642,10 +1644,10 @@ namespace simd_types {
}
#undef ENTRY
-#define ENTRY(N, S, T0, T1, T2, T3, USES_FPMR, U) \
+#define ENTRY(N, S, T0, T1, T2, T3, T4, USES_FPMR, U) \
{#N, aarch64_builtin_signatures::S, simd_types::T0, simd_types::T1, \
- simd_types::T2, simd_types::T3, U, USES_FPMR, \
- aarch64_required_extensions::REQUIRED_EXTENSIONS},
+ simd_types::T2, simd_types::T3, simd_types::T4, U, \
+ USES_FPMR, aarch64_required_extensions::REQUIRED_EXTENSIONS},
/* Initialize pragma builtins. */
@@ -1653,7 +1655,7 @@ struct aarch64_pragma_builtins_data
{
const char *name;
aarch64_builtin_signatures signature;
- simd_type types[4];
+ simd_type types[5];
int unspec;
bool uses_fpmr;
aarch64_required_extensions required_extensions;
@@ -1672,6 +1674,8 @@ aarch64_get_number_of_args (const aarch64_pragma_builtins_data &builtin_data)
return 2;
else if (builtin_data.signature == aarch64_builtin_signatures::ternary)
return 3;
+ else if (builtin_data.signature == aarch64_builtin_signatures::quaternary)
+ return 4;
else
// No other signature supported.
gcc_unreachable ();
@@ -2504,6 +2508,72 @@ aarch64_general_required_extensions (unsigned int code)
return ext::streaming_compatible (0);
}
+namespace function_checker {
+
+void
+require_integer_constant (location_t location, tree arg)
+{
+ if (TREE_CODE (arg) != INTEGER_CST)
+ {
+ error_at (location, "Constant-type integer argument expected");
+ return;
+ }
+}
+
+void
+require_immediate_range (location_t location, tree arg, HOST_WIDE_INT min,
+ HOST_WIDE_INT max)
+{
+ if (wi::to_widest (arg) < min || wi::to_widest (arg) > max)
+ {
+ error_at (location, "lane out of range %wd - %wd", min, max);
+ return;
+ }
+}
+
+/* Validates indexing into a vector using the index's size and the instruction,
+ where instruction is represented by the unspec.
+ This only works for intrinsics declared using pragmas in
+ aarch64-simd-pragma-builtins.def. */
+
+void
+check_simd_lane_bounds (location_t location, const aarch64_pragma_builtins_data
+ *builtin_data, tree *args)
+{
+ if (builtin_data == NULL)
+ // Don't check for functions that are not declared in
+ // aarch64-simd-pragma-builtins.def.
+ return;
+
+ auto nargs = aarch64_get_number_of_args (*builtin_data);
+ switch (builtin_data->unspec)
+ {
+ case UNSPEC_VDOT2:
+ case UNSPEC_VDOT4:
+ {
+ if (builtin_data->types[nargs].qualifiers != qualifier_lane_index)
+ break;
+
+ auto index_arg = args[nargs - 1];
+ require_integer_constant (location, index_arg);
+
+ auto vector_to_index_mode = builtin_data->types[nargs - 1].mode;
+ int vector_to_index_mode_size
+ = GET_MODE_NUNITS (vector_to_index_mode).to_constant ();
+
+ auto low = 0;
+ int high
+ = builtin_data->unspec == UNSPEC_VDOT2
+ ? vector_to_index_mode_size / 2 - 1
+ : vector_to_index_mode_size / 4 - 1;
+ require_immediate_range (location, index_arg, low, high);
+ break;
+ }
+ }
+}
+
+};
+
bool
aarch64_general_check_builtin_call (location_t location, vec<location_t>,
unsigned int code, tree fndecl,
@@ -2515,6 +2585,9 @@ aarch64_general_check_builtin_call (location_t location, vec<location_t>,
if (!aarch64_check_required_extensions (location, decl, required_extensions))
return false;
+ auto builtin_data = aarch64_get_pragma_builtin (code);
+ function_checker::check_simd_lane_bounds (location, builtin_data, args);
+
switch (code)
{
case AARCH64_RSR:
@@ -3477,6 +3550,28 @@ aarch64_expand_pragma_builtin (tree exp, rtx target,
expand_insn (icode, nargs + 1, ops);
break;
+ case UNSPEC_VDOT2:
+ case UNSPEC_VDOT4:
+ if (builtin_data->signature == aarch64_builtin_signatures::ternary)
+ icode = code_for_aarch64 (builtin_data->unspec,
+ builtin_data->types[0].mode,
+ builtin_data->types[1].mode,
+ builtin_data->types[2].mode,
+ builtin_data->types[3].mode);
+ else if
+ (builtin_data->signature == aarch64_builtin_signatures::quaternary)
+ icode = code_for_aarch64 (builtin_data->unspec,
+ builtin_data->types[0].mode,
+ builtin_data->types[1].mode,
+ builtin_data->types[2].mode,
+ builtin_data->types[3].mode,
+ builtin_data->types[4].mode);
+ else
+ gcc_unreachable ();
+
+ expand_insn (icode, nargs + 1, ops);
+ break;
+
default:
gcc_unreachable ();
}
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index b13366b..ae1472e 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -260,6 +260,10 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
aarch64_def_or_undef (TARGET_FP8, "__ARM_FEATURE_FP8", pfile);
+ aarch64_def_or_undef (TARGET_FP8DOT2, "__ARM_FEATURE_FP8DOT2", pfile);
+
+ aarch64_def_or_undef (TARGET_FP8DOT4, "__ARM_FEATURE_FP8DOT4", pfile);
+
aarch64_def_or_undef (TARGET_LS64,
"__ARM_FEATURE_LS64", pfile);
aarch64_def_or_undef (TARGET_RCPC, "__ARM_FEATURE_RCPC", pfile);
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index c9d419a..44d2e18 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -236,6 +236,10 @@ AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs")
AARCH64_OPT_EXTENSION("fp8", FP8, (SIMD), (), (), "fp8")
+AARCH64_OPT_EXTENSION("fp8dot2", FP8DOT2, (SIMD), (), (), "fp8dot2")
+
+AARCH64_OPT_EXTENSION("fp8dot4", FP8DOT4, (SIMD), (), (), "fp8dot4")
+
AARCH64_OPT_EXTENSION("faminmax", FAMINMAX, (SIMD), (), (), "faminmax")
#undef AARCH64_OPT_FMV_EXTENSION
diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
index 91897cf..4a94a66 100644
--- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
@@ -20,20 +20,33 @@
#undef ENTRY_BINARY
-#define ENTRY_BINARY(N, T0, T1, T2, U) \
- ENTRY (N, binary, T0, T1, T2, none, false, U)
+#define ENTRY_BINARY(N, T0, T1, T2, U) \
+ ENTRY (N, binary, T0, T1, T2, none, none, false, U)
#undef ENTRY_BINARY_FPM
-#define ENTRY_BINARY_FPM(N, T0, T1, T2, U) \
- ENTRY (N, binary, T0, T1, T2, none, true, U)
+#define ENTRY_BINARY_FPM(N, T0, T1, T2, U) \
+ ENTRY (N, binary, T0, T1, T2, none, none, true, U)
#undef ENTRY_TERNARY_FPM
-#define ENTRY_TERNARY_FPM(N, T0, T1, T2, T3, U) \
- ENTRY (N, ternary, T0, T1, T2, T3, true, U)
+#define ENTRY_TERNARY_FPM(N, T0, T1, T2, T3, U) \
+ ENTRY (N, ternary, T0, T1, T2, T3, none, true, U)
+
+#undef ENTRY_TERNARY_FPM_LANE
+#define ENTRY_TERNARY_FPM_LANE(N, T0, T1, T2, T3, U) \
+ ENTRY (N, quaternary, T0, T1, T2, T3, s32_index, true, U)
#undef ENTRY_UNARY_FPM
-#define ENTRY_UNARY_FPM(N, T0, T1, U) \
- ENTRY (N, unary, T0, T1, none, none, true, U)
+#define ENTRY_UNARY_FPM(N, T0, T1, U) \
+ ENTRY (N, unary, T0, T1, none, none, none, true, U)
+
+#undef ENTRY_VDOT_FPM
+#define ENTRY_VDOT_FPM(T, U) \
+ ENTRY_TERNARY_FPM (vdot_##T##_mf8_fpm, T, T, f8, f8, U) \
+ ENTRY_TERNARY_FPM (vdotq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U) \
+ ENTRY_TERNARY_FPM_LANE (vdot_lane_##T##_mf8_fpm, T, T, f8, f8, U) \
+ ENTRY_TERNARY_FPM_LANE (vdot_laneq_##T##_mf8_fpm, T, T, f8, f8q, U) \
+ ENTRY_TERNARY_FPM_LANE (vdotq_lane_##T##_mf8_fpm, T##q, T##q, f8q, f8, U) \
+ ENTRY_TERNARY_FPM_LANE (vdotq_laneq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U)
#undef ENTRY_VHSDF
#define ENTRY_VHSDF(NAME, UNSPEC) \
@@ -83,3 +96,13 @@ ENTRY_TERNARY_FPM (vcvt_high_mf8_f32_fpm, f8q, f8, f32q, f32q, UNSPEC_VCVT_HIGH)
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8)
ENTRY_VHSDF_VHSDI (vscale, UNSPEC_FSCALE)
#undef REQUIRED_EXTENSIONS
+
+// fpm dot2 product
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8DOT2)
+ENTRY_VDOT_FPM (f16, UNSPEC_VDOT2)
+#undef REQUIRED_EXTENSIONS
+
+// fpm dot4 product
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8DOT4)
+ENTRY_VDOT_FPM (f32, UNSPEC_VDOT4)
+#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index f843746..7b97486 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -10097,3 +10097,61 @@
"TARGET_FP8"
"<fpm_uns_op>\t%0.<VHSDF:Vtype>, %1.<VHSDF:Vtype>, %2.<VHSDI:Vtype>"
)
+
+;; fpm vdot2 instructions.
+(define_insn
+ "@aarch64_<fpm_uns_op><VHF:mode><VHF:mode><VB:mode><VB:mode>"
+ [(set (match_operand:VHF 0 "register_operand" "=w")
+ (unspec:VHF
+ [(match_operand:VHF 1 "register_operand" "w")
+ (match_operand:VB 2 "register_operand" "w")
+ (match_operand:VB 3 "register_operand" "w")
+ (reg:DI FPM_REGNUM)]
+ FPM_VDOT2_UNS))]
+ "TARGET_FP8DOT2"
+ "<fpm_uns_op>\t%1.<VHF:Vtype>, %2.<VB:Vtype>, %3.<VB:Vtype>"
+)
+
+;; fpm vdot2 instructions with lane.
+(define_insn
+ "@aarch64_<fpm_uns_op><VHF:mode><VHF:mode><VB:mode><VB2:mode><SI_ONLY:mode>"
+ [(set (match_operand:VHF 0 "register_operand" "=w")
+ (unspec:VHF
+ [(match_operand:VHF 1 "register_operand" "w")
+ (match_operand:VB 2 "register_operand" "w")
+ (match_operand:VB2 3 "register_operand" "w")
+ (match_operand:SI_ONLY 4 "const_int_operand" "n")
+ (reg:DI FPM_REGNUM)]
+ FPM_VDOT2_UNS))]
+ "TARGET_FP8DOT2"
+ "<fpm_uns_op>\t%1.<VHF:Vtype>, %2.<VB:Vtype>, %3.<VHF:Vdotlanetype>[%4]"
+)
+
+;; fpm vdot4 instructions.
+(define_insn
+ "@aarch64_<fpm_uns_op><VDQSF:mode><VDQSF:mode><VB:mode><VB:mode>"
+ [(set (match_operand:VDQSF 0 "register_operand" "=w")
+ (unspec:VDQSF
+ [(match_operand:VDQSF 1 "register_operand" "w")
+ (match_operand:VB 2 "register_operand" "w")
+ (match_operand:VB 3 "register_operand" "w")
+ (reg:DI FPM_REGNUM)]
+ FPM_VDOT4_UNS))]
+ "TARGET_FP8DOT4"
+ "<fpm_uns_op>\t%1.<VDQSF:Vtype>, %2.<VB:Vtype>, %3.<VB:Vtype>"
+)
+
+;; fpm vdot4 instructions with lane.
+(define_insn
+ "@aarch64_<fpm_uns_op><VDQSF:mode><VDQSF:mode><VB:mode><VB2:mode><SI_ONLY:mode>"
+ [(set (match_operand:VDQSF 0 "register_operand" "=w")
+ (unspec:VDQSF
+ [(match_operand:VDQSF 1 "register_operand" "w")
+ (match_operand:VB 2 "register_operand" "w")
+ (match_operand:VB2 3 "register_operand" "w")
+ (match_operand:SI_ONLY 4 "const_int_operand" "n")
+ (reg:DI FPM_REGNUM)]
+ FPM_VDOT4_UNS))]
+ "TARGET_FP8DOT4"
+ "<fpm_uns_op>\t%1.<VDQSF:Vtype>, %2.<VB:Vtype>, %3.<VDQSF:Vdotlanetype>[%4]"
+)
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index f07b2c4..c50a578 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -494,6 +494,12 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
((TARGET_SVE2p1 || TARGET_STREAMING) \
&& (TARGET_SME2 || TARGET_NON_STREAMING))
+/* fp8 dot product instructions are enabled through +fp8dot2. */
+#define TARGET_FP8DOT2 AARCH64_HAVE_ISA (FP8DOT2)
+
+/* fp8 dot product instructions are enabled through +fp8dot4. */
+#define TARGET_FP8DOT4 AARCH64_HAVE_ISA (FP8DOT4)
+
/* Standard register usage. */
/* 31 64-bit general purpose registers R0-R30:
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index bdd276b..8c03dcd 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -163,6 +163,10 @@
;; Advanced SIMD Float modes.
(define_mode_iterator VDQF [V2SF V4SF V2DF])
+
+(define_mode_iterator VHF [(V4HF "TARGET_SIMD_F16INST")
+ (V8HF "TARGET_SIMD_F16INST")])
+
(define_mode_iterator VHSDF [(V4HF "TARGET_SIMD_F16INST")
(V8HF "TARGET_SIMD_F16INST")
V2SF V4SF V2DF])
@@ -321,6 +325,7 @@
;; All byte modes.
(define_mode_iterator VB [V8QI V16QI])
+(define_mode_iterator VB2 [VB])
;; 1 and 2 lane DI and DF modes.
(define_mode_iterator V12DIF [V1DI V1DF V2DI V2DF])
@@ -764,6 +769,8 @@
UNSPEC_VCVT2 ; Used in aarch64-simd.md.
UNSPEC_VCVT2_HIGH ; Used in aarch64-simd.md.
UNSPEC_VCVT2_LOW ; Used in aarch64-simd.md.
+ UNSPEC_VDOT2 ; Used in aarch64-simd.md.
+ UNSPEC_VDOT4 ; Used in aarch64-simd.md.
UNSPEC_TBL ; Used in vector permute patterns.
UNSPEC_TBLQ ; Used in vector permute patterns.
UNSPEC_TBX ; Used in vector permute patterns.
@@ -2491,6 +2498,11 @@
(VNx8HF ".h") (VNx16HF "") (VNx32HF "")
(VNx8HI ".h") (VNx16HI "") (VNx32HI "")])
+
+;; Lane index suffix for fp8 vdot operations depends on the output mode
+(define_mode_attr Vdotlanetype [(V4HF "2b") (V8HF "2b")
+ (V2SF "4b") (V4SF "4b")])
+
;; The number of bytes controlled by a predicate
(define_mode_attr data_bytes [(VNx16BI "1") (VNx8BI "2")
(VNx4BI "4") (VNx2BI "8")])
@@ -4720,7 +4732,12 @@
(UNSPEC_VCVT2_HIGH "f2cvtl2")
(UNSPEC_VCVT2_LOW "f2cvtl")])
+(define_int_iterator FPM_VDOT2_UNS [UNSPEC_VDOT2])
+(define_int_iterator FPM_VDOT4_UNS [UNSPEC_VDOT4])
+
(define_int_attr fpm_uns_op
[(UNSPEC_FSCALE "fscale")
(UNSPEC_VCVT "fcvtn")
- (UNSPEC_VCVT_HIGH "fcvtn2")])
+ (UNSPEC_VCVT_HIGH "fcvtn2")
+ (UNSPEC_VDOT2 "fdot")
+ (UNSPEC_VDOT4 "fdot")])
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 4a494f6..bc3f742 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -21807,6 +21807,10 @@ Enable support for Armv8.9-a/9.4-a translation hardening extension.
Enable the RCpc3 (Release Consistency) extension.
@item fp8
Enable the fp8 (8-bit floating point) extension.
+@item fp8dot2
+Enable the fp8dot2 (8-bit floating point dot product) extension.
+@item fp8dot4
+Enable the fp8dot4 (8-bit floating point dot product) extension.
@item faminmax
Enable the Floating Point Absolute Maximum/Minimum extension.
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpmdot.c b/gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpmdot.c
new file mode 100644
index 0000000..3e888a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vdot2_fpmdot.c
@@ -0,0 +1,77 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -march=armv9-a+fp8dot2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+/*
+** test_vdot_f16_fpm:
+** msr fpmr, x0
+** fdot v0.4h, v1.8b, v2.8b
+** ret
+*/
+float16x4_t
+test_vdot_f16_fpm (float16x4_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vdot_f16_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vdotq_f16_fpm:
+** msr fpmr, x0
+** fdot v0.8h, v1.16b, v2.16b
+** ret
+*/
+float16x8_t
+test_vdotq_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vdotq_f16_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vdot_lane_f16_fpm:
+** msr fpmr, x0
+** fdot v0.4h, v1.8b, v2.2b\[1\]
+** ret
+*/
+float16x4_t
+test_vdot_lane_f16_fpm (float16x4_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vdot_lane_f16_mf8_fpm (a, b, c, 1, d);
+}
+
+/*
+** test_vdot_laneq_f16_fpm:
+** msr fpmr, x0
+** fdot v0.4h, v1.8b, v2.2b\[1\]
+** ret
+*/
+float16x4_t
+test_vdot_laneq_f16_fpm (float16x4_t a, mfloat8x8_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vdot_laneq_f16_mf8_fpm (a, b, c, 1, d);
+}
+
+/*
+** test_vdotq_lane_f16_fpm:
+** msr fpmr, x0
+** fdot v0.8h, v1.16b, v2.2b\[1\]
+** ret
+*/
+float16x8_t
+test_vdotq_lane_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vdotq_lane_f16_mf8_fpm (a, b, c, 1, d);
+}
+
+/*
+** test_vdotq_laneq_f16_fpm:
+** msr fpmr, x0
+** fdot v0.8h, v1.16b, v2.2b\[1\]
+** ret
+*/
+float16x8_t
+test_vdotq_laneq_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vdotq_laneq_f16_mf8_fpm (a, b, c, 1, d);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpmdot.c b/gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpmdot.c
new file mode 100644
index 0000000..f03dd0a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vdot4_fpmdot.c
@@ -0,0 +1,77 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -march=armv9-a+fp8dot4" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+/*
+** test_vdot_f32_fpm:
+** msr fpmr, x0
+** fdot v0.2s, v1.8b, v2.8b
+** ret
+*/
+float32x2_t
+test_vdot_f32_fpm (float32x2_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vdot_f32_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vdotq_f32_fpm:
+** msr fpmr, x0
+** fdot v0.4s, v1.16b, v2.16b
+** ret
+*/
+float32x4_t
+test_vdotq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vdotq_f32_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vdot_lane_f32_fpm:
+** msr fpmr, x0
+** fdot v0.2s, v1.8b, v2.4b\[1\]
+** ret
+*/
+float32x2_t
+test_vdot_lane_f32_fpm (float32x2_t a, mfloat8x8_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vdot_lane_f32_mf8_fpm (a, b, c, 1, d);
+}
+
+/*
+** test_vdot_laneq_f32_fpm:
+** msr fpmr, x0
+** fdot v0.2s, v1.8b, v2.4b\[1\]
+** ret
+*/
+float32x2_t
+test_vdot_laneq_f32_fpm (float32x2_t a, mfloat8x8_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vdot_laneq_f32_mf8_fpm (a, b, c, 1, d);
+}
+
+/*
+** test_vdotq_lane_f32_fpm:
+** msr fpmr, x0
+** fdot v0.4s, v1.16b, v2.4b\[1\]
+** ret
+*/
+float32x4_t
+test_vdotq_lane_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, fpm_t d)
+{
+ return vdotq_lane_f32_mf8_fpm (a, b, c, 1, d);
+}
+
+/*
+** test_vdotq_laneq_f32_fpm:
+** msr fpmr, x0
+** fdot v0.4s, v1.16b, v2.4b\[1\]
+** ret
+*/
+float32x4_t
+test_vdotq_laneq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+ return vdotq_laneq_f32_mf8_fpm (a, b, c, 1, d);
+}