From 1133cfab47258c147fcb2d453465d10e72acbfd9 Mon Sep 17 00:00:00 2001
From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Wed, 3 May 2023 11:15:34 +0100
Subject: aarch64: PR target/99195 annotate simple floating-point patterns for
 vec-concat with zero

Continuing the, almost mechanical, series this patch adds annotation for some of the simple
floating-point patterns we have, and adds testing to ensure that redundant zeroing instructions
are eliminated.

Bootstrapped and tested on aarch64-none-linux-gnu and also aarch64_be-none-elf.

gcc/ChangeLog:

	PR target/99195
	* config/aarch64/aarch64-simd.md (add<mode>3): Rename to...
	(add<mode>3<vczle><vczbe>): ... This.
	(sub<mode>3): Rename to...
	(sub<mode>3<vczle><vczbe>): ... This.
	(mul<mode>3): Rename to...
	(mul<mode>3<vczle><vczbe>): ... This.
	(*div<mode>3): Rename to...
	(*div<mode>3<vczle><vczbe>): ... This.
	(neg<mode>2): Rename to...
	(neg<mode>2<vczle><vczbe>): ... This.
	(abs<mode>2): Rename to...
	(abs<mode>2<vczle><vczbe>): ... This.
	(<frint_pattern><mode>2): Rename to...
	(<frint_pattern><mode>2<vczle><vczbe>): ... This.
	(<fmaxmin><mode>3): Rename to...
	(<fmaxmin><mode>3<vczle><vczbe>): ... This.
	(*sqrt<mode>2): Rename to...
	(*sqrt<mode>2<vczle><vczbe>): ... This.

gcc/testsuite/ChangeLog:

	PR target/99195
	* gcc.target/aarch64/simd/pr99195_1.c: Add testing for some unary
	and binary floating-point ops.
	* gcc.target/aarch64/simd/pr99195_2.c: New test.
---
 gcc/config/aarch64/aarch64-simd.md                | 18 +++---
 gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c | 11 ++++
 gcc/testsuite/gcc.target/aarch64/simd/pr99195_2.c | 72 +++++++++++++++++++++++
 3 files changed, 92 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/pr99195_2.c

(limited to 'gcc')
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 0a1f12c..9ba435f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2846,7 +2846,7 @@
 
 ;; FP arithmetic operations.
 
-(define_insn "add<mode>3"
+(define_insn "add<mode>3<vczle><vczbe>"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
 		   (match_operand:VHSDF 2 "register_operand" "w")))]
@@ -2855,7 +2855,7 @@
   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
 )
 
-(define_insn "sub<mode>3"
+(define_insn "sub<mode>3<vczle><vczbe>"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
 		    (match_operand:VHSDF 2 "register_operand" "w")))]
@@ -2864,7 +2864,7 @@
   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
 )
 
-(define_insn "mul<mode>3"
+(define_insn "mul<mode>3<vczle><vczbe>"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
 		   (match_operand:VHSDF 2 "register_operand" "w")))]
@@ -2885,7 +2885,7 @@
   operands[1] = force_reg (<MODE>mode, operands[1]);
 })
 
-(define_insn "*div<mode>3"
+(define_insn "*div<mode>3<vczle><vczbe>"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
 		 (match_operand:VHSDF 2 "register_operand" "w")))]
@@ -2915,7 +2915,7 @@
   }
 )
 
-(define_insn "neg<mode>2"
+(define_insn "neg<mode>2<vczle><vczbe>"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
  "TARGET_SIMD"
@@ -2923,7 +2923,7 @@
   [(set_attr "type" "neon_fp_neg_<stype><q>")]
 )
 
-(define_insn "abs<mode>2"
+(define_insn "abs<mode>2<vczle><vczbe>"
  [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
  "TARGET_SIMD"
@@ -3228,7 +3228,7 @@
 
 ;; Vector versions of the floating-point frint patterns.
 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
-(define_insn "<frint_pattern><mode>2"
+(define_insn "<frint_pattern><mode>2<vczle><vczbe>"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
 		       FRINT))]
@@ -3583,7 +3583,7 @@
 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
 ;; which implement the IEEE fmax ()/fmin () functions.
-(define_insn "<fmaxmin><mode>3"
+(define_insn "<fmaxmin><mode>3<vczle><vczbe>"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
 		      (match_operand:VHSDF 2 "register_operand" "w")]
@@ -6960,7 +6960,7 @@
     DONE;
 })
 
-(define_insn "*sqrt<mode>2"
+(define_insn "*sqrt<mode>2<vczle><vczbe>"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 	(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
   "TARGET_SIMD"
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
index 9683442..29a2e90 100644
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_1.c
@@ -41,6 +41,14 @@ OPFIVE (T, IS, OS, S, OP2, OP3, OP4, OP5, OP6)
 FUNC (T, IS, OS, OP1, S)                \
 OPSIX (T, IS, OS, S, OP2, OP3, OP4, OP5, OP6, OP7)
 
+#define OPEIGHT(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8)        \
+OPTHREE (T, IS, OS, S, OP1, OP2, OP3)                \
+OPFIVE (T, IS, OS, S, OP4, OP5, OP6, OP7, OP8)
+
+#define OPTEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10)        \
+OPFIVE (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5)                \
+OPFIVE (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10)
+
 #define OPELEVEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10,OP11)        \
 OPFIVE (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5)                \
 OPSIX (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10, OP11)
@@ -53,6 +61,8 @@ OPELEVEN (uint8, 8, 16, u8, padd, add, sub, mul, and, orr, eor, orn, bic, max, m
 OPELEVEN (uint16, 4, 8, u16, padd, add, sub, mul, and, orr, eor, orn, bic, max, min)
 OPELEVEN (uint32, 2, 4, u32, padd, add, sub, mul, and, orr, eor, orn, bic, max, min)
 
+OPEIGHT (float32, 2, 4, f32, add, sub, mul, div, max, maxnm, min, minnm)
+
 #define UNARY(OT,IT,OP,S)			\
 OT                                              \
 foo_##IT##OP##_##S (IT a)                     \
@@ -71,6 +81,7 @@ OPFIVE (uint8, 8, 16, u8, rbit, clz, cnt, cls, mvn)
 OPTHREE (uint16, 4, 8, u16, clz, cls, mvn)
 OPTHREE (uint32, 2, 4, u32, clz, cls, mvn)
 
+OPTEN (float32, 2, 4, f32, neg, abs, sqrt, rnd, rndi, rndm, rnda, rndn, rndp, rndx)
 /* { dg-final { scan-assembler-not {\tfmov\t} } }  */
 /* { dg-final { scan-assembler-not {\tmov\t} } }  */
 
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_2.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_2.c
new file mode 100644
index 0000000..603c5ab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_2.c
@@ -0,0 +1,72 @@
+/* PR target/99195.  */
+/*  Check that we take advantage of 64-bit Advanced SIMD operations clearing
+    the top half of the vector register and no explicit zeroing instructions
+    are emitted.  */
+/* { dg-do compile } */
+/* { dg-options "-O -march=armv8.2-a+fp16" } */
+
+#include <arm_neon.h>
+
+#define BINARY(OT,IT,OP,S)                         \
+OT                                              \
+foo_##OP##_##S (IT a, IT b)                     \
+{                                               \
+  IT zeros = vcreate_##S (0);                   \
+  return vcombine_##S (v##OP##_##S (a, b), zeros);      \
+}
+
+#define FUNC(T,IS,OS,OP,S) BINARY (T##x##OS##_t, T##x##IS##_t, OP, S)
+
+#define OPTWO(T,IS,OS,S,OP1,OP2)        \
+FUNC (T, IS, OS, OP1, S)                \
+FUNC (T, IS, OS, OP2, S)
+
+#define OPTHREE(T, IS, OS, S, OP1, OP2, OP3)    \
+FUNC (T, IS, OS, OP1, S)        \
+OPTWO (T, IS, OS, S, OP2, OP3)
+
+#define OPFOUR(T,IS,OS,S,OP1,OP2,OP3,OP4)       \
+FUNC (T, IS, OS, OP1, S)                \
+OPTHREE (T, IS, OS, S, OP2, OP3, OP4)
+
+#define OPFIVE(T,IS,OS,S,OP1,OP2,OP3,OP4, OP5)  \
+FUNC (T, IS, OS, OP1, S)                \
+OPFOUR (T, IS, OS, S, OP2, OP3, OP4, OP5)
+
+#define OPSIX(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6)        \
+FUNC (T, IS, OS, OP1, S)                \
+OPFIVE (T, IS, OS, S, OP2, OP3, OP4, OP5, OP6)
+
+#define OPSEVEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7)        \
+FUNC (T, IS, OS, OP1, S)                \
+OPSIX (T, IS, OS, S, OP2, OP3, OP4, OP5, OP6, OP7)
+
+#define OPEIGHT(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8)        \
+OPTHREE (T, IS, OS, S, OP1, OP2, OP3)                \
+OPFIVE (T, IS, OS, S, OP4, OP5, OP6, OP7, OP8)
+
+#define OPTEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10)        \
+OPFIVE (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5)                \
+OPFIVE (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10)
+
+#define OPELEVEN(T,IS,OS,S,OP1,OP2,OP3,OP4,OP5,OP6,OP7,OP8,OP9,OP10,OP11)        \
+OPFIVE (T, IS, OS, S, OP1, OP2, OP3, OP4, OP5)                \
+OPSIX (T, IS, OS, S, OP6, OP7, OP8, OP9, OP10, OP11)
+
+OPEIGHT (float16, 4, 8, f16, add, sub, mul, div, max, maxnm, min, minnm)
+
+#define UNARY(OT,IT,OP,S)			\
+OT                                              \
+foo_##IT##OP##_##S (IT a)                     \
+{                                               \
+  IT zeros = vcreate_##S (0);                   \
+  return vcombine_##S ((IT) v##OP##_##S (a), zeros);      \
+}
+
+#undef FUNC
+#define FUNC(T,IS,OS,OP,S) UNARY (T##x##OS##_t, T##x##IS##_t, OP, S)
+
+OPTEN (float16, 4, 8, f16, neg, abs, sqrt, rnd, rndi, rndm, rnda, rndn, rndp, rndx)
+/* { dg-final { scan-assembler-not {\tfmov\t} } }  */
+/* { dg-final { scan-assembler-not {\tmov\t} } }  */
+
-- 
cgit v1.1