[AArch64] Change representation of SABD in RTL

Richard raised a concern about the RTL we use to represent the AdvSIMD SABD (vector signed absolute difference) instruction. We currently represent it as ABS (MINUS op1 op2). This isn't exactly what SABD does. ABS treats its input as a signed value and returns the absolute of that. For example: (sabd:QI 64 -128) == 192 (unsigned) aka -64 (signed) whereas (minus:QI 64 -128) == 192 (unsigned) aka -64 (signed), (abs ...) of that is 64. A better way to describe the instruction is with MINUS (SMAX (op1 op2) SMIN (op1 op2)). This patch implements that, and also implements similar semantics for the UABD instruction that uses UMAX and UMIN. That way for the example above we'll have: (minus:QI (smax:QI (64 -128)) (smin:QI (64 -128))) == (minus:QI 64 -128) == 192 (or -64 signed) which matches what SABD does. * config/aarch64/iterators.md (max_opp): New code_attr. (USMAX): New code iterator. * config/aarch64/predicates.md (aarch64_smin): New predicate. (aarch64_smax): Likewise. * config/aarch64/aarch64-simd.md (abd<mode>_3): Rename to... (*aarch64_<su>abd<mode>_3): ... Change RTL representation to MINUS (MAX MIN). * gcc.target/aarch64/abd_1.c: New test. * gcc.dg/sabd_1.c: Likewise. From-SVN: r268658
author: Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2019-02-07 18:18:16 +0000
committer: Kyrylo Tkachov <ktkachov@gcc.gnu.org> 2019-02-07 18:18:16 +0000
commit: 8544ed6eea68a80999504c8a4b21b77d29cd86e2 (patch)
tree: e6eed6a6e7e0ba6c0e7e187fac794816343c70f5 /gcc
parent: 2b99b6c0cc2fd1da7c9d1d66c39212d7f3e4bc65 (diff)
download: gcc-8544ed6eea68a80999504c8a4b21b77d29cd86e2.zip
gcc-8544ed6eea68a80999504c8a4b21b77d29cd86e2.tar.gz
gcc-8544ed6eea68a80999504c8a4b21b77d29cd86e2.tar.bz2
7 files changed, 125 insertions, 6 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 1969ff7..0984790 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2019-02-07  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/iterators.md (max_opp): New code_attr.
+	(USMAX): New code iterator.
+	* config/aarch64/predicates.md (aarch64_smin): New predicate.
+	(aarch64_smax): Likewise.
+	* config/aarch64/aarch64-simd.md (abd<mode>_3): Rename to...
+	(*aarch64_<su>abd<mode>_3): ... Change RTL representation to
+	MINUS (MAX MIN).
+
 2019-02-07  H.J. Lu  <hongjiu.lu@intel.com>
 
 	PR target/89229
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index cae9a8f..e3852c5 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -705,13 +705,22 @@
   [(set_attr "type" "neon_abs<q>")]
 )
 
-(define_insn "abd<mode>_3"
+;; It's tempting to represent SABD as ABS (MINUS op1 op2).
+;; This isn't accurate as ABS treats always its input as a signed value.
+;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
+;; Whereas SABD would return 192 (-64 signed) on the above example.
+;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
+(define_insn "*aarch64_<su>abd<mode>_3"
   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
-	(abs:VDQ_BHSI (minus:VDQ_BHSI
-		       (match_operand:VDQ_BHSI 1 "register_operand" "w")
-		       (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
-  "TARGET_SIMD"
-  "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+	(minus:VDQ_BHSI
+	  (USMAX:VDQ_BHSI
+	    (match_operand:VDQ_BHSI 1 "register_operand" "w")
+	    (match_operand:VDQ_BHSI 2 "register_operand" "w"))
+	  (match_operator 3 "aarch64_<max_opp>"
+	    [(match_dup 1)
+	     (match_dup 2)])))]
+  "TARGET_SIMD"
+  "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
   [(set_attr "type" "neon_abd<q>")]
 )
 
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 85fa161..6caeeac 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1052,6 +1052,9 @@
 
 (define_code_attr f16mac [(plus "a") (minus "s")])
 
+;; Map smax to smin and umax to umin.
+(define_code_attr max_opp [(smax "smin") (umax "umin")])
+
 ;; The number of subvectors in an SVE_STRUCT.
 (define_mode_attr vector_count [(VNx32QI "2") (VNx16HI "2")
 				(VNx8SI  "2") (VNx4DI  "2")
@@ -1200,6 +1203,9 @@
 
 (define_code_iterator FMAXMIN [smax smin])
 
+;; Signed and unsigned max operations.
+(define_code_iterator USMAX [smax umax])
+
 ;; Code iterator for variants of vector max and min.
 (define_code_iterator ADDSUB [plus minus])
 
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 855cf7b..b8e6d23 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -319,6 +319,12 @@
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "const_scalar_int_operand")))
 
+(define_predicate "aarch64_smin"
+  (match_code "smin"))
+
+(define_predicate "aarch64_umin"
+  (match_code "umin"))
+
 ;; True for integer comparisons and for FP comparisons other than LTGT or UNEQ.
 (define_special_predicate "aarch64_comparison_operator"
   (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered,
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 5004a5e..9951b17 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2019-02-07  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* gcc.target/aarch64/abd_1.c: New test.
+	* gcc.dg/sabd_1.c: Likewise.
+
 2019-02-07  Dominique d'Humieres  <dominiq@gcc.gnu.org>
 
 	PR fortran/52789
diff --git a/gcc/testsuite/gcc.dg/sabd_1.c b/gcc/testsuite/gcc.dg/sabd_1.c
new file mode 100644
index 0000000..587e305
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/sabd_1.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fwrapv" } */
+/* Make sure vectorized absolute difference behaves same as scalar version.  */
+
+#define N 16
+signed char a[] = {-100, -100, -100, -100,-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100 };
+signed char b[] = { 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100 };
+
+signed char out[N];
+
+__attribute__ ((noinline,noipa))
+void
+foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      signed char diff = b[i] - a[i];
+      out[i] = diff > 0 ? diff : -diff;
+    }
+}
+
+signed char out2[N];
+
+__attribute__ ((noinline,noipa))
+void
+foo_scalar (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      asm volatile ("");
+      signed char diff = b[i] - a[i];
+      out2[i] = diff > 0 ? diff : -diff;
+    }
+}
+
+int
+main (void)
+{
+  foo ();
+  foo_scalar ();
+  for (int i = 0; i < N; i++)
+    if (out[i] != out2[i])
+      __builtin_abort ();
+
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/abd_1.c b/gcc/testsuite/gcc.target/aarch64/abd_1.c
new file mode 100644
index 0000000..a27cb5e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/abd_1.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+#pragma GCC target "+nosve"
+
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+#define N 1024
+
+#define FUNC(T)								\
+void									\
+sabd_##T (signed T * restrict a, signed T * restrict b,		\
+		signed T * restrict out)				\
+{									\
+  for (int i = 0; i < N; i++)						\
+    out[i] = MAX (a[i], b[i]) - MIN (a[i], b[i]);			\
+}									\
+									\
+void									\
+uabd_##T (unsigned T * restrict a, unsigned T * restrict b,	\
+		  unsigned T * restrict out)				\
+{									\
+  for (int i = 0; i < N; i++)						\
+    out[i] = MAX (a[i], b[i]) - MIN (a[i], b[i]);			\
+}
+
+FUNC(char)
+FUNC(short)
+FUNC(int)
+
+/* { dg-final { scan-assembler-times "sabd\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "uabd\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */
+/* { dg-final { scan-assembler-times "sabd\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */
+/* { dg-final { scan-assembler-times "uabd\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */
+/* { dg-final { scan-assembler-times "sabd\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */
+/* { dg-final { scan-assembler-times "uabd\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>	2019-02-07 18:18:16 +0000
committer	Kyrylo Tkachov <ktkachov@gcc.gnu.org>	2019-02-07 18:18:16 +0000
commit	8544ed6eea68a80999504c8a4b21b77d29cd86e2 (patch)
tree	e6eed6a6e7e0ba6c0e7e187fac794816343c70f5 /gcc
parent	2b99b6c0cc2fd1da7c9d1d66c39212d7f3e4bc65 (diff)
download	gcc-8544ed6eea68a80999504c8a4b21b77d29cd86e2.zip gcc-8544ed6eea68a80999504c8a4b21b77d29cd86e2.tar.gz gcc-8544ed6eea68a80999504c8a4b21b77d29cd86e2.tar.bz2