aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWilco Dijkstra <wdijkstr@arm.com>2020-02-12 18:23:21 +0000
committerWilco Dijkstra <wdijkstr@arm.com>2020-02-12 18:23:21 +0000
commit5bfc8303ffe2d86e938d45f13cd99a39469dac4f (patch)
tree6b2032e18a9ea0f2a430b61e0f7fe5817e7ef7fb
parent9921bbf9b2e27568d952fe6ee5bc083c93bbf7fd (diff)
downloadgcc-5bfc8303ffe2d86e938d45f13cd99a39469dac4f.zip
gcc-5bfc8303ffe2d86e938d45f13cd99a39469dac4f.tar.gz
gcc-5bfc8303ffe2d86e938d45f13cd99a39469dac4f.tar.bz2
[AArch64] Set ctz rtx_cost (PR93565)
Combine sometimes behaves oddly and duplicates ctz to remove an unnecessary sign extension. Avoid this by setting the cost for ctz to be higher than that of a simple ALU instruction. Deepsjeng performance improves by ~0.6%. gcc/ PR rtl-optimization/93565 * config/aarch64/aarch64.c (aarch64_rtx_costs): Add CTZ costs. testsuite/ PR rtl-optimization/93565 * gcc.target/aarch64/pr93565.c: New test.
-rw-r--r--gcc/ChangeLog5
-rw-r--r--gcc/config/aarch64/aarch64.c7
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.target/aarch64/pr93565.c34
4 files changed, 51 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 36e75b3..735cc47 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,10 @@
2020-02-12 Wilco Dijkstra <wdijkstr@arm.com>
+ PR rtl-optimization/93565
+ * config/aarch64/aarch64.c (aarch64_rtx_costs): Add CTZ costs.
+
+2020-02-12 Wilco Dijkstra <wdijkstr@arm.com>
+
* config/aarch64/aarch64-simd.md
(aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>): New pattern.
* config/aarch64/aarch64.md (popcount<mode>2): Use it instead of
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 6a1b409..4a34dce 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11507,6 +11507,13 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
return false;
+ case CTZ:
+ *cost = COSTS_N_INSNS (2);
+
+ if (speed)
+ *cost += extra_cost->alu.clz + extra_cost->alu.rev;
+ return false;
+
case COMPARE:
op0 = XEXP (x, 0);
op1 = XEXP (x, 1);
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 18d6434..1153c74 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,10 @@
2020-02-12 Wilco Dijkstra <wdijkstr@arm.com>
+ PR rtl-optimization/93565
+ * gcc.target/aarch64/pr93565.c: New test.
+
+2020-02-12 Wilco Dijkstra <wdijkstr@arm.com>
+
* gcc.target/aarch64/popcnt2.c: New test.
2020-02-12 Marek Polacek <polacek@redhat.com>
diff --git a/gcc/testsuite/gcc.target/aarch64/pr93565.c b/gcc/testsuite/gcc.target/aarch64/pr93565.c
new file mode 100644
index 0000000..7200f80
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr93565.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+static const unsigned long long magic = 0x03f08c5392f756cdULL;
+
+static const char table[64] = {
+ 0, 1, 12, 2, 13, 22, 17, 3,
+ 14, 33, 23, 36, 18, 58, 28, 4,
+ 62, 15, 34, 26, 24, 48, 50, 37,
+ 19, 55, 59, 52, 29, 44, 39, 5,
+ 63, 11, 21, 16, 32, 35, 57, 27,
+ 61, 25, 47, 49, 54, 51, 43, 38,
+ 10, 20, 31, 56, 60, 46, 53, 42,
+ 9, 30, 45, 41, 8, 40, 7, 6,
+};
+
+static inline int ctz1 (unsigned long b)
+{
+ unsigned long lsb = b & -b;
+ return table[(lsb * magic) >> 58];
+}
+
+void f (unsigned long x, int *p)
+{
+ if (x != 0)
+ {
+ int a = ctz1 (x);
+ *p = a | p[a];
+ }
+}
+
+/* { dg-final { scan-assembler-times "rbit\t" 1 } } */
+/* { dg-final { scan-assembler-times "clz\t" 1 } } */
+