aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>2023-05-31 17:46:19 +0100
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>2023-05-31 17:46:19 +0100
commitd0c064c3eabc75cf83df296ebcd1db19b4a68851 (patch)
treee355aa63fe6c18875b8474eeade683f8082615bc /gcc
parent547d3bce0c02dbcbb6f62d9469a71eedf17bd688 (diff)
downloadgcc-d0c064c3eabc75cf83df296ebcd1db19b4a68851.zip
gcc-d0c064c3eabc75cf83df296ebcd1db19b4a68851.tar.gz
gcc-d0c064c3eabc75cf83df296ebcd1db19b4a68851.tar.bz2
aarch64: PR target/99195 Annotate dot-product patterns for vec-concat-zero
This straightforward patch annotates the dotproduct instructions, including the i8mm ones. Tests included. Nothing unexpected here. Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf. gcc/ChangeLog: PR target/99195 * config/aarch64/aarch64-simd.md (<sur>dot_prod<vsi2qi>): Rename to... (<sur>dot_prod<vsi2qi><vczle><vczbe>): ... This. (usdot_prod<vsi2qi>): Rename to... (usdot_prod<vsi2qi><vczle><vczbe>): ... This. (aarch64_<sur>dot_lane<vsi2qi>): Rename to... (aarch64_<sur>dot_lane<vsi2qi><vczle><vczbe>): ... This. (aarch64_<sur>dot_laneq<vsi2qi>): Rename to... (aarch64_<sur>dot_laneq<vsi2qi><vczle><vczbe>): ... This. (aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>): Rename to... (aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><vczbe>): ... This. gcc/testsuite/ChangeLog: PR target/99195 * gcc.target/aarch64/simd/pr99195_11.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-simd.md10
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/pr99195_11.c38
2 files changed, 43 insertions, 5 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 1efae8d..4904a50 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -665,7 +665,7 @@
;; ...
;;
;; and so the vectorizer provides r, in which the result has to be accumulated.
-(define_insn "<sur>dot_prod<vsi2qi>"
+(define_insn "<sur>dot_prod<vsi2qi><vczle><vczbe>"
[(set (match_operand:VS 0 "register_operand" "=w")
(plus:VS
(unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
@@ -679,7 +679,7 @@
;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot
;; (vector) Dot Product operation and the vectorized optab.
-(define_insn "usdot_prod<vsi2qi>"
+(define_insn "usdot_prod<vsi2qi><vczle><vczbe>"
[(set (match_operand:VS 0 "register_operand" "=w")
(plus:VS
(unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
@@ -693,7 +693,7 @@
;; These instructions map to the __builtins for the Dot Product
;; indexed operations.
-(define_insn "aarch64_<sur>dot_lane<vsi2qi>"
+(define_insn "aarch64_<sur>dot_lane<vsi2qi><vczle><vczbe>"
[(set (match_operand:VS 0 "register_operand" "=w")
(plus:VS
(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
@@ -709,7 +709,7 @@
[(set_attr "type" "neon_dot<q>")]
)
-(define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
+(define_insn "aarch64_<sur>dot_laneq<vsi2qi><vczle><vczbe>"
[(set (match_operand:VS 0 "register_operand" "=w")
(plus:VS
(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
@@ -727,7 +727,7 @@
;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
;; (by element) Dot Product operations.
-(define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>"
+(define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><vczbe>"
[(set (match_operand:VS 0 "register_operand" "=w")
(plus:VS
(unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_11.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_11.c
new file mode 100644
index 0000000..1ca8c6a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_11.c
@@ -0,0 +1,38 @@
+/* PR target/99195. */
+/* Check that we take advantage of 64-bit Advanced SIMD operations clearing
+ the top half of the vector register and no explicit zeroing instructions
+ are emitted. */
+/* { dg-do compile } */
+/* { dg-options "-O -march=armv8.2-a+dotprod" } */
+
+#include <arm_neon.h>
+
+#define DOTPROD(OT,AT,IT1,IT2,OP,S) \
+OT \
+foo_##OP##_##S (AT a, IT1 b, IT2 c) \
+{ \
+ AT zeros = vcreate_##S (0); \
+ return vcombine_##S (v##OP##_##S (a, b, c), zeros); \
+}
+
+#define DOTPROD_IDX(OT,AT,IT1,IT2,OP,S) \
+OT \
+foo_##OP##_##S (AT a, IT1 b, IT2 c) \
+{ \
+ AT zeros = vcreate_##S (0); \
+ return vcombine_##S (v##OP##_##S (a, b, c, 1), zeros); \
+}
+
+DOTPROD (int32x4_t, int32x2_t, int8x8_t, int8x8_t, dot, s32)
+DOTPROD (uint32x4_t, uint32x2_t, uint8x8_t, uint8x8_t, dot, u32)
+DOTPROD_IDX (int32x4_t, int32x2_t, int8x8_t, int8x8_t, dot_lane, s32)
+DOTPROD_IDX (uint32x4_t, uint32x2_t, uint8x8_t, uint8x8_t, dot_lane, u32)
+
+#pragma GCC target ("+i8mm")
+DOTPROD (int32x4_t, int32x2_t, uint8x8_t, int8x8_t, usdot, s32)
+DOTPROD_IDX (int32x4_t, int32x2_t, uint8x8_t, int8x8_t, usdot_lane, s32)
+DOTPROD_IDX (int32x4_t, int32x2_t, int8x8_t, uint8x8_t, sudot_lane, s32)
+
+/* { dg-final { scan-assembler-not {\tfmov\t} } } */
+/* { dg-final { scan-assembler-not {\tmov\t} } } */
+