aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorXionghu Luo <luoxhu@linux.ibm.com>2021-05-18 21:34:18 -0500
committerXionghu Luo <luoxhu@linux.ibm.com>2021-05-18 21:34:18 -0500
commitde56f95afaaa22c67cbeec780921d63e8b34514e (patch)
treec1f3457e45da38b04c2bd5d60e100e94fd974141 /gcc
parent39ed6a88c74db5412652f0167d03f71eca4f02fc (diff)
downloadgcc-de56f95afaaa22c67cbeec780921d63e8b34514e.zip
gcc-de56f95afaaa22c67cbeec780921d63e8b34514e.tar.gz
gcc-de56f95afaaa22c67cbeec780921d63e8b34514e.tar.bz2
Run pass_sink_code once more before store_merging
Gimple sink code pass runs quite early, there may be some new oppertunities exposed by later gimple optmization passes, this patch runs the sink code pass once more before store_merging. For detailed discussion, please refer to: https://gcc.gnu.org/pipermail/gcc-patches/2020-December/562352.html Tested the SPEC2017 performance on P8LE, 544.nab_r is improved by 2.43%, but no big changes to other cases, GEOMEAN is improved quite small with 0.25%. gcc/ChangeLog: 2021-05-18 Xionghu Luo <luoxhu@linux.ibm.com> * passes.def: Add sink_code pass before store_merging. * tree-ssa-sink.c (pass_sink_code:clone): New. gcc/testsuite/ChangeLog: 2021-05-18 Xionghu Luo <luoxhu@linux.ibm.com> * gcc.dg/tree-ssa/ssa-sink-1.c: Adjust. * gcc.dg/tree-ssa/ssa-sink-2.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-3.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-4.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-5.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-6.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-7.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-8.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-9.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-10.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-13.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-14.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-16.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-17.c: Ditto. * gcc.dg/tree-ssa/ssa-sink-18.c: New.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/passes.def1
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-1.c2
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c2
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-13.c4
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-14.c4
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-16.c2
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-17.c2
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-18.c212
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-2.c2
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-3.c15
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-4.c2
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-5.c2
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-6.c2
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-7.c2
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-8.c2
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-9.c2
-rw-r--r--gcc/tree-ssa-sink.c1
17 files changed, 229 insertions, 30 deletions
diff --git a/gcc/passes.def b/gcc/passes.def
index de39fa4..945d2bc 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -348,6 +348,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_phiopt, false /* early_p */);
NEXT_PASS (pass_fold_builtins);
NEXT_PASS (pass_optimize_widening_mul);
+ NEXT_PASS (pass_sink_code);
NEXT_PASS (pass_store_merging);
NEXT_PASS (pass_tail_calls);
/* If DCE is not run before checking for uninitialized uses,
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-1.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-1.c
index 411585a..57b5016 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-1.c
@@ -7,4 +7,4 @@ foo (int a, int b, int c)
return c ? x : a;
}
/* We should sink the x = a * b calculation into the branch that returns x. */
-/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink" } } */
+/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c
index 37e4d2f..535cb32 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-10.c
@@ -16,4 +16,4 @@ void foo (void)
}
}
-/* { dg-final { scan-tree-dump-times "Sinking # VUSE" 4 "sink" } } */
+/* { dg-final { scan-tree-dump-times "Sinking # VUSE" 4 "sink1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-13.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-13.c
index a65ba35..584fd91 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-13.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-13.c
@@ -21,5 +21,5 @@ void test ()
/* We should sink/merge all stores and end up with a single BB. */
-/* { dg-final { scan-tree-dump-times "MEM\[^\n\r\]* = 0;" 3 "sink" } } */
-/* { dg-final { scan-tree-dump-times "<bb " 1 "sink" } } */
+/* { dg-final { scan-tree-dump-times "MEM\[^\n\r\]* = 0;" 3 "sink1" } } */
+/* { dg-final { scan-tree-dump-times "<bb " 1 "sink1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-14.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-14.c
index 771cd44..f5418b0 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-14.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-14.c
@@ -13,5 +13,5 @@ void foo (int b)
/* We should have sunk the store and inserted a PHI to merge the
stored values. */
-/* { dg-final { scan-tree-dump-times " = PHI" 1 "sink" } } */
-/* { dg-final { scan-tree-dump-times "x = " 1 "sink" } } */
+/* { dg-final { scan-tree-dump-times " = PHI" 1 "sink1" } } */
+/* { dg-final { scan-tree-dump-times "x = " 1 "sink1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-16.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-16.c
index 610c8d6..012b165 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-16.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-16.c
@@ -10,5 +10,5 @@ int f(int n)
return j;
}
-/* { dg-final { scan-tree-dump "Sinking j_. = __builtin_ffs" "sink" } } */
+/* { dg-final { scan-tree-dump "Sinking j_. = __builtin_ffs" "sink1" } } */
/* { dg-final { scan-tree-dump "return 2;" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-17.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-17.c
index cf2e2a0..d0aeeb3 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-17.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-17.c
@@ -12,4 +12,4 @@ int my_f(int a, int b)
}
/* We should sink the call to pure_f to the if block. */
-/* { dg-final { scan-tree-dump "Sinking # VUSE" "sink" } } */
+/* { dg-final { scan-tree-dump "Sinking # VUSE" "sink1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-18.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-18.c
new file mode 100644
index 0000000..421c78e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-18.c
@@ -0,0 +1,212 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+
+#include <stdint.h>
+
+#define HLOG 16
+#define MAX_LIT (1 << 5)
+typedef const uint8_t *LZF_HSLOT;
+typedef LZF_HSLOT LZF_STATE[1 << (HLOG)];
+
+int
+compute_on_bytes (uint8_t *in_data, int in_len, uint8_t *out_data, int out_len)
+{
+ LZF_STATE htab;
+
+ uint8_t *ip = in_data;
+ uint8_t *op = out_data;
+ uint8_t *in_end = ip + in_len;
+ uint8_t *out_end = op + out_len;
+ uint8_t *ref;
+
+ unsigned long off;
+ unsigned int hval;
+ int lit;
+
+ if (!in_len || !out_len)
+ return 0;
+
+ lit = 0;
+ op++;
+ hval = (((ip[0]) << 8) | ip[1]);
+
+ while (ip < in_end - 2)
+ {
+ uint8_t *hslot;
+
+ hval = (((hval) << 8) | ip[2]);
+ hslot = (uint8_t*)(htab + (((hval >> (3 * 8 - 16)) - hval * 5) & ((1 << (16)) - 1)));
+
+ ref = *hslot + in_data;
+ *hslot = ip - in_data;
+
+ if (1 && (off = ip - ref - 1) < (1 << 13) && ref > in_data
+ && ref[2] == ip[2]
+ && ((ref[1] << 8) | ref[0]) == ((ip[1] << 8) | ip[0]))
+ {
+ unsigned int len = 2;
+ unsigned int maxlen = in_end - ip - len;
+ maxlen
+ = maxlen > ((1 << 8) + (1 << 3)) ? ((1 << 8) + (1 << 3)) : maxlen;
+
+ if ((op + 3 + 1 >= out_end) != 0)
+ if (op - !lit + 3 + 1 >= out_end)
+ return 0;
+
+ op[-lit - 1] = lit - 1;
+ op -= !lit;
+
+ for (;;)
+ {
+ if (maxlen > 16)
+ {
+ len++;
+ if (ref[len] != ip[len])
+ break;
+ len++;
+ if (ref[len] != ip[len])
+ break;
+ len++;
+ if (ref[len] != ip[len])
+ break;
+ len++;
+ if (ref[len] != ip[len])
+ break;
+
+ len++;
+ if (ref[len] != ip[len])
+ break;
+ len++;
+ if (ref[len] != ip[len])
+ break;
+ len++;
+ if (ref[len] != ip[len])
+ break;
+ len++;
+ if (ref[len] != ip[len])
+ break;
+
+ len++;
+ if (ref[len] != ip[len])
+ break;
+ len++;
+ if (ref[len] != ip[len])
+ break;
+ len++;
+ if (ref[len] != ip[len])
+ break;
+ len++;
+ if (ref[len] != ip[len])
+ break;
+
+ len++;
+ if (ref[len] != ip[len])
+ break;
+ len++;
+ if (ref[len] != ip[len])
+ break;
+ len++;
+ if (ref[len] != ip[len])
+ break;
+ len++;
+ if (ref[len] != ip[len])
+ break;
+ }
+
+ do
+ {
+ len++;
+ }
+ while (len < maxlen && ip[len] == ref[len]);
+
+ break;
+ }
+
+ len -= 2;
+ ip++;
+
+ if (len < 7)
+ {
+ *op++ = (off >> 8) + (len << 5);
+ }
+ else
+ {
+ *op++ = (off >> 8) + (7 << 5);
+ *op++ = len - 7;
+ }
+ *op++ = off;
+ lit = 0;
+ op++;
+ ip += len + 1;
+
+ if (ip >= in_end - 2)
+ break;
+
+ --ip;
+ --ip;
+
+ hval = (((ip[0]) << 8) | ip[1]);
+ hval = (((hval) << 8) | ip[2]);
+ htab[(((hval >> (3 * 8 - 16)) - hval * 5) & ((1 << (16)) - 1))]
+ = (LZF_HSLOT)(ip - in_data);
+ ip++;
+
+ hval = (((hval) << 8) | ip[2]);
+ htab[(((hval >> (3 * 8 - 16)) - hval * 5) & ((1 << (16)) - 1))]
+ = (LZF_HSLOT)(ip - in_data);
+ ip++;
+ }
+ else
+ {
+ if (op >= out_end)
+ return 0;
+
+ lit++;
+ *op++ = *ip++;
+
+ if (lit == (1 << 5))
+ {
+ op[-lit - 1] = lit - 1;
+ lit = 0;
+ op++;
+ }
+ }
+ }
+ if (op + 3 > out_end) /* at most 3 bytes can be missing here */
+ return 0;
+
+ while (ip < in_end)
+ {
+ lit++;
+ *op++ = *ip++;
+ if (lit == MAX_LIT)
+ {
+ op[-lit - 1] = lit - 1; /* stop run */
+ lit = 0;
+ op++; /* start run */
+ }
+ }
+
+ op[-lit - 1] = lit - 1; /* end run */
+ op -= !lit; /* undo run if length is zero */
+
+ return op - out_data;
+ }
+
+ /* For this case, pass sink2 sinks statements from hot loop header to loop
+ exits after gimple loop optimizations, which generates instructions executed
+ each iteration in loop, but the results are used outside of loop:
+ With -m64,
+ "Sinking _367 = (uint8_t *) _320;
+ from bb 31 to bb 90
+ Sinking _320 = _321 + ivtmp.25_326;
+ from bb 31 to bb 90
+ Sinking _321 = (unsigned long) ip_229;
+ from bb 31 to bb 90
+ Sinking len_158 = _322 + 4294967295;
+ from bb 31 to bb 33"
+ When -m32, Power and X86 will sink 3 instructions, but arm ilp32 couldn't
+ sink due to ivopts chooses two IV candidates instead of one, which is
+ expected, so this case is restricted to lp64 only so far. */
+
+ /* { dg-final { scan-tree-dump-times "Sunk statements: 4" 1 "sink2" { target lp64 } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-2.c
index 6aa5a18..a0b4734 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-2.c
@@ -9,4 +9,4 @@ bar (int a, int b, int c)
return y;
}
/* We should sink the x = a * b calculation into the else branch */
-/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink" } } */
+/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-3.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-3.c
index 599997e..e69de29 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-3.c
@@ -1,15 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-sink-stats" } */
-extern void foo(int a);
-int
-main (int argc)
-{
- int a;
- a = argc + 1;
- if (argc + 3)
- {
- foo (a);
- }
-}
-/* We should sink the a = argc + 1 calculation into the if branch */
-/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-4.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-4.c
index 784edd2..1e3cfa9 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-4.c
@@ -17,4 +17,4 @@ main (int argc)
foo2 (a);
}
/* We should sink the first a = b + c calculation into the else branch */
-/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink" } } */
+/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 1 "sink1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-5.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-5.c
index dbdde39..f04da5d 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-5.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-5.c
@@ -44,4 +44,4 @@ void foo(int16_t runs[], uint8_t alpha[], int x, int count)
}
/* We should not sink the next_runs = runs + x calculation after the loop. */
-/* { dg-final { scan-tree-dump-times "Sunk statements:" 0 "sink" } } */
+/* { dg-final { scan-tree-dump-times "Sunk statements:" 0 "sink1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-6.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-6.c
index 1abae9f..31f5af3 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-6.c
@@ -14,4 +14,4 @@ int foo(int *a, int r)
/* *a = 1 should be sunk to the else block. */
-/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink" } } */
+/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-7.c
index ec3288f..bd74844 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-7.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-7.c
@@ -15,4 +15,4 @@ int foo(int *a, int r, short *b)
/* *a = 1 should be sunk to the else block. */
-/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink" } } */
+/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-8.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-8.c
index 48af421..4b23b56 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-8.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-8.c
@@ -24,4 +24,4 @@ int foo(int *a, int r, short *b)
/* *a = 1 should be sunk into the default case. */
-/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink" } } */
+/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-9.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-9.c
index 509a763..32bfc81 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-9.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-9.c
@@ -15,4 +15,4 @@ int foo(int *a, int r, int *b)
/* *a = 1 should be sunk to the else block. */
-/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink" } } */
+/* { dg-final { scan-tree-dump-times "Sinking" 1 "sink1" } } */
diff --git a/gcc/tree-ssa-sink.c b/gcc/tree-ssa-sink.c
index d33e56e..d252cbb 100644
--- a/gcc/tree-ssa-sink.c
+++ b/gcc/tree-ssa-sink.c
@@ -819,6 +819,7 @@ public:
/* opt_pass methods: */
virtual bool gate (function *) { return flag_tree_sink != 0; }
virtual unsigned int execute (function *);
+ opt_pass *clone (void) { return new pass_sink_code (m_ctxt); }
}; // class pass_sink_code