aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2020-05-08 09:33:55 +0200
committerJakub Jelinek <jakub@redhat.com>2020-05-08 09:33:55 +0200
commitdf569f7da567af4996821dc0a1871eec79957d04 (patch)
treecc4ced8f3fc3149413fca20171fb9bedf899abed /gcc
parentff33680165346cb291667f38dd2e9f25a74cc3c3 (diff)
downloadgcc-df569f7da567af4996821dc0a1871eec79957d04.zip
gcc-df569f7da567af4996821dc0a1871eec79957d04.tar.gz
gcc-df569f7da567af4996821dc0a1871eec79957d04.tar.bz2
match.pd: Optimize ffs of known non-zero arg into ctz + 1 [PR94956]
The ffs expanders on several targets (x86, ia64, aarch64 at least) emit a conditional move or similar code to handle the case when the argument is 0, which makes the code longer. If we know from VRP that the argument will not be zero, we can (if the target has also an ctz expander) just use ctz which is undefined at zero and thus the expander doesn't need to deal with that. 2020-05-08 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/94956 * match.pd (FFS): Optimize __builtin_ffs* of non-zero argument into __builtin_ctz* + 1 if direct IFN_CTZ is supported. * gcc.target/i386/pr94956.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog4
-rw-r--r--gcc/match.pd10
-rw-r--r--gcc/testsuite/ChangeLog3
-rw-r--r--gcc/testsuite/gcc.target/i386/pr94956.c28
4 files changed, 45 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index eb4924a..5bad3ff 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,9 @@
2020-05-08 Jakub Jelinek <jakub@redhat.com>
+ PR tree-optimization/94956
+ * match.pd (FFS): Optimize __builtin_ffs* of non-zero argument into
+ __builtin_ctz* + 1 if direct IFN_CTZ is supported.
+
PR tree-optimization/94913
* match.pd (A - B + -1 >= A to B >= A): New simplification.
(A - B > A to A < B): Don't test TYPE_OVERFLOW_WRAPS which is always
diff --git a/gcc/match.pd b/gcc/match.pd
index cfe9697..892df1e 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5986,6 +5986,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& direct_internal_fn_supported_p (IFN_POPCOUNT, type,
OPTIMIZE_FOR_BOTH))
(convert (IFN_POPCOUNT:type @0)))))
+
+/* __builtin_ffs needs to deal on many targets with the possible zero
+ argument. If we know the argument is always non-zero, __builtin_ctz + 1
+ should lead to better code. */
+(simplify
+ (FFS tree_expr_nonzero_p@0)
+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (@0),
+ OPTIMIZE_FOR_SPEED))
+ (plus (CTZ:type @0) { build_one_cst (type); })))
#endif
/* Simplify:
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 43e226e..e8c54c7 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,8 @@
2020-05-08 Jakub Jelinek <jakub@redhat.com>
+ PR tree-optimization/94956
+ * gcc.target/i386/pr94956.c: New test.
+
PR tree-optimization/94913
* gcc.dg/tree-ssa/pr94913.c: New test.
diff --git a/gcc/testsuite/gcc.target/i386/pr94956.c b/gcc/testsuite/gcc.target/i386/pr94956.c
new file mode 100644
index 0000000..cc27b45
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr94956.c
@@ -0,0 +1,28 @@
+/* PR tree-optimization/94956 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "\tcmovne\t" } } */
+/* { dg-final { scan-assembler-not "\tsete\t" } } */
+
+int
+foo (unsigned x)
+{
+ if (x == 0) __builtin_unreachable ();
+ return __builtin_ffs (x) - 1;
+}
+
+int
+bar (unsigned long x)
+{
+ if (x == 0) __builtin_unreachable ();
+ return __builtin_ffsl (x) - 1;
+}
+
+#ifdef __x86_64__
+int
+baz (unsigned long long x)
+{
+ if (x == 0) __builtin_unreachable ();
+ return __builtin_ffsll (x) - 1;
+}
+#endif