aarch64: PR target/99195 Annotate saturating mult patterns for vec-concat-zero

This patch goes through the various alphabet soup saturating multiplication patterns, including those in TARGET_RDMA and annotates them with <vczle><vczbe>. Many other patterns are widening and always write the full 128-bit vectors so this annotation doesn't apply to them. Nothing out of the ordinary in this patch. Bootstrapped and tested on aarch64-none-linux and aarch64_be-none-elf. gcc/ChangeLog: PR target/99195 * config/aarch64/aarch64-simd.md (aarch64_sq<r>dmulh<mode>): Rename to... (aarch64_sq<r>dmulh<mode><vczle><vczbe>): ... This. (aarch64_sq<r>dmulh_n<mode>): Rename to... (aarch64_sq<r>dmulh_n<mode><vczle><vczbe>): ... This. (aarch64_sq<r>dmulh_lane<mode>): Rename to... (aarch64_sq<r>dmulh_lane<mode><vczle><vczbe>): ... This. (aarch64_sq<r>dmulh_laneq<mode>): Rename to... (aarch64_sq<r>dmulh_laneq<mode><vczle><vczbe>): ... This. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>): Rename to... (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode><vczle><vczbe>): ... This. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Rename to... (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>): ... This. (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>): Rename to... (aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>): ... This. gcc/testsuite/ChangeLog: PR target/99195 * gcc.target/aarch64/simd/pr99195_1.c: Add tests for qdmulh, qrdmulh. * gcc.target/aarch64/simd/pr99195_10.c: New test.
author: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2023-05-31 17:43:20 +0100
committer: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2023-05-31 17:43:20 +0100
commit: 547d3bce0c02dbcbb6f62d9469a71eedf17bd688 (patch)
tree: 8f43d602d338193a25b829f87381487e2ca6e991 /gcc
parent: 7aae58b04b92303ccda3ead600be98f0d4b7f462 (diff)
download: gcc-547d3bce0c02dbcbb6f62d9469a71eedf17bd688.zip
gcc-547d3bce0c02dbcbb6f62d9469a71eedf17bd688.tar.gz
gcc-547d3bce0c02dbcbb6f62d9469a71eedf17bd688.tar.bz2
3 files changed, 54 insertions, 11 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 2cd8b82..1efae8d 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -5510,7 +5510,7 @@
 
 ;; sq<r>dmulh.
 
-(define_insn "aarch64_sq<r>dmulh<mode>"
+(define_insn "aarch64_sq<r>dmulh<mode><vczle><vczbe>"
   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
 	(unspec:VSDQ_HSI
 	  [(match_operand:VSDQ_HSI 1 "register_operand" "w")
@@ -5521,7 +5521,7 @@
   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
 )
 
-(define_insn "aarch64_sq<r>dmulh_n<mode>"
+(define_insn "aarch64_sq<r>dmulh_n<mode><vczle><vczbe>"
   [(set (match_operand:VDQHS 0 "register_operand" "=w")
 	(unspec:VDQHS
 	  [(match_operand:VDQHS 1 "register_operand" "w")
@@ -5535,7 +5535,7 @@
 
 ;; sq<r>dmulh_lane
 
-(define_insn "aarch64_sq<r>dmulh_lane<mode>"
+(define_insn "aarch64_sq<r>dmulh_lane<mode><vczle><vczbe>"
   [(set (match_operand:VDQHS 0 "register_operand" "=w")
         (unspec:VDQHS
 	  [(match_operand:VDQHS 1 "register_operand" "w")
@@ -5550,7 +5550,7 @@
   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
 )
 
-(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
+(define_insn "aarch64_sq<r>dmulh_laneq<mode><vczle><vczbe>"
   [(set (match_operand:VDQHS 0 "register_operand" "=w")
         (unspec:VDQHS
 	  [(match_operand:VDQHS 1 "register_operand" "w")
@@ -5597,7 +5597,7 @@
 
 ;; sqrdml[as]h.
 
-(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode><vczle><vczbe>"
   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
 	(unspec:VSDQ_HSI
 	  [(match_operand:VSDQ_HSI 1 "register_operand" "0")
@@ -5611,7 +5611,7 @@
 
 ;; sqrdml[as]h_lane.
 
-(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
   [(set (match_operand:VDQHS 0 "register_operand" "=w")
 	(unspec:VDQHS
 	  [(match_operand:VDQHS 1 "register_operand" "0")
@@ -5629,7 +5629,7 @@
    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
 )
 
-(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
 	(unspec:SD_HSI
 	  [(match_operand:SD_HSI 1 "register_operand" "0")
@@ -5649,7 +5649,7 @@
 
 ;; sqrdml[as]h_laneq.
 
-(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
   [(set (match_operand:VDQHS 0 "register_operand" "=w")
 	(unspec:VDQHS
 	  [(match_operand:VDQHS 1 "register_operand" "0")
@@ -5667,7 +5667,7 @@
    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
 )
 
-(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
+(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
 	(unspec:SD_HSI
 	  [(match_operand:SD_HSI 1 "register_operand" "0")
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
index 8b6548a..765cb27 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
@@ -66,8 +66,8 @@ OPNINETEEN (int16, 4, 8, s16, padd, add, qadd, qsub, sub, mul, and, orr, eor, or
 OPNINETEEN (int32, 2, 4, s32, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 
 OPSIX (int8, 8, 16, s8, zip1, zip2, uzp1, uzp2, shl, qshl)
-OPSIX (int16, 4, 8, s16, zip1, zip2, uzp1, uzp2, shl, qshl)
-OPSIX (int32, 2, 4, s32, zip1, zip2, uzp1, uzp2, shl, qshl)
+OPEIGHT (int16, 4, 8, s16, zip1, zip2, uzp1, uzp2, shl, qshl, qdmulh, qrdmulh)
+OPEIGHT (int32, 2, 4, s32, zip1, zip2, uzp1, uzp2, shl, qshl, qdmulh, qrdmulh)
 
 OPNINETEEN (uint8, 8, 16, u8, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
 OPNINETEEN (uint16, 4, 8, u16, padd, add, qadd, qsub, sub, mul, and, orr, eor, orn, bic, max, min, hadd, rhadd, hsub, abd, pmax, pmin)
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_10.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_10.c
new file mode 100644
index 0000000..9db5400
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_10.c
@@ -0,0 +1,43 @@
+/* PR target/99195.  */
+/*  Check that we take advantage of 64-bit Advanced SIMD operations clearing
+    the top half of the vector register and no explicit zeroing instructions
+    are emitted.  */
+/* { dg-do compile } */
+/* { dg-options "-O -march=armv8.1-a+rdma" } */
+
+#include <arm_neon.h>
+
+#define OPTWO(T,IS,OS,S,OP1,OP2)        \
+FUNC (T, IS, OS, OP1, S)                \
+FUNC (T, IS, OS, OP2, S)
+
+#define TERNARY(OT,IT,OP,S)                         \
+OT                                              \
+foo_##OP##_##S (IT a, IT b, IT c)                 \
+{                                               \
+  IT zeros = vcreate_##S (0);                   \
+  return vcombine_##S (v##OP##_##S (a, b, c), zeros);      \
+}
+
+#undef FUNC
+#define FUNC(T,IS,OS,OP,S) TERNARY (T##x##OS##_t, T##x##IS##_t, OP, S)
+
+OPTWO (int16, 4, 8, s16, qrdmlah, qrdmlsh)
+OPTWO (int32, 2, 4, s32, qrdmlah, qrdmlsh)
+
+#define TERNARY_IDX(OT,IT,OP,S)                         \
+OT                                              \
+foo_##OP##_##S (IT a, IT b, IT c)                 \
+{                                               \
+  IT zeros = vcreate_##S (0);                   \
+  return vcombine_##S (v##OP##_##S (a, b, c, 0), zeros);      \
+}
+
+#undef FUNC
+#define FUNC(T,IS,OS,OP,S) TERNARY_IDX (T##x##OS##_t, T##x##IS##_t, OP, S)
+OPTWO (int16, 4, 8, s16, qrdmlah_lane, qrdmlsh_lane)
+OPTWO (int32, 2, 4, s32, qrdmlah_lane, qrdmlsh_lane)
+
+/* { dg-final { scan-assembler-not {\tfmov\t} } }  */
+/* { dg-final { scan-assembler-not {\tmov\t} } }  */
+
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2023-05-31 17:43:20 +0100
committer	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2023-05-31 17:43:20 +0100
commit	547d3bce0c02dbcbb6f62d9469a71eedf17bd688 (patch)
tree	8f43d602d338193a25b829f87381487e2ca6e991 /gcc
parent	7aae58b04b92303ccda3ead600be98f0d4b7f462 (diff)
download	gcc-547d3bce0c02dbcbb6f62d9469a71eedf17bd688.zip gcc-547d3bce0c02dbcbb6f62d9469a71eedf17bd688.tar.gz gcc-547d3bce0c02dbcbb6f62d9469a71eedf17bd688.tar.bz2