aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Guenther <rguenther@suse.de>2008-08-13 08:57:20 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2008-08-13 08:57:20 +0000
commit25c6036a5bc49ba73bc3f5d2573cf2506b513a1f (patch)
treeb8a064a0f6159ca9ca85a74e1d5e760278c524c7 /gcc
parent92464a8a7a6f5404b71b00acdd04cd2754d6b100 (diff)
downloadgcc-25c6036a5bc49ba73bc3f5d2573cf2506b513a1f.zip
gcc-25c6036a5bc49ba73bc3f5d2573cf2506b513a1f.tar.gz
gcc-25c6036a5bc49ba73bc3f5d2573cf2506b513a1f.tar.bz2
re PR tree-optimization/15255 ([tree-ssa] a * 2 + a * 2 is not converted to a * 4)
2008-08-13 Richard Guenther <rguenther@suse.de> PR tree-optimization/15255 * tree-ssa-reassoc.c (linearize_expr_tree): Declare. (struct oecount_s): New struct and VEC types. (cvec): New global. (oecount_hash): New function. (oecount_eq): Likewise. (oecount_cmp): Likewise. (zero_one_operation): New function. (build_and_add_sum): Likewise. (undistribute_ops_list): Perform un-distribution of multiplication and division on the chain of summands. (should_break_up_subtract): Also break up subtracts for factors. (reassociate_bb): Delete dead visited statements. Call undistribute_ops_list. Re-sort and optimize if it did something. * passes.c (init_optimization_passes): Move DSE before reassociation. * tree-ssa-loop-niter.c (stmt_dominates_stmt_p): Correctly handle PHI nodes. * gcc.dg/tree-ssa/reassoc-14.c: New testcase. * gcc.dg/tree-ssa/reassoc-15.c: Likewise. * gcc.dg/tree-ssa/reassoc-16.c: Likewise. * gcc.dg/torture/reassoc-1.c: Likewise. * gcc.dg/tree-ssa/recip-2.c: Adjust. * gcc.dg/tree-ssa/recip-6.c: Likewise. * gcc.dg/tree-ssa/recip-7.c: Likewise. * gfortran.dg/reassoc_4.f: Likewise. From-SVN: r139048
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog21
-rw-r--r--gcc/passes.c2
-rw-r--r--gcc/testsuite/ChangeLog12
-rw-r--r--gcc/testsuite/gcc.dg/torture/reassoc-1.c24
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/reassoc-14.c23
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/reassoc-15.c19
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/reassoc-16.c16
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/reassoc-17.c16
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/reassoc-18.c12
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/recip-2.c11
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/recip-6.c8
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/recip-7.c8
-rw-r--r--gcc/testsuite/gfortran.dg/reassoc_4.f43
-rw-r--r--gcc/tree-ssa-loop-niter.c6
-rw-r--r--gcc/tree-ssa-reassoc.c450
15 files changed, 658 insertions, 13 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b22b949a..805d73d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,24 @@
+2008-08-13 Richard Guenther <rguenther@suse.de>
+
+ PR tree-optimization/15255
+ * tree-ssa-reassoc.c (linearize_expr_tree): Declare.
+ (struct oecount_s): New struct and VEC types.
+ (cvec): New global.
+ (oecount_hash): New function.
+ (oecount_eq): Likewise.
+ (oecount_cmp): Likewise.
+ (zero_one_operation): New function.
+ (build_and_add_sum): Likewise.
+ (undistribute_ops_list): Perform un-distribution of multiplication
+ and division on the chain of summands.
+ (should_break_up_subtract): Also break up subtracts for factors.
+ (reassociate_bb): Delete dead visited statements.
+ Call undistribute_ops_list. Re-sort and optimize if it did something.
+ * passes.c (init_optimization_passes): Move DSE before
+ reassociation.
+ * tree-ssa-loop-niter.c (stmt_dominates_stmt_p): Correctly handle
+ PHI nodes.
+
2008-08-12 Janis Johnson <janis187@us.ibm.com>
* doc/invoke.texi (-fipa-pta): Say the option is experimental.
diff --git a/gcc/passes.c b/gcc/passes.c
index 64470ba..ee3826b 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -633,9 +633,9 @@ init_optimization_passes (void)
only examines PHIs to discover const/copy propagation
opportunities. */
NEXT_PASS (pass_phi_only_cprop);
+ NEXT_PASS (pass_dse);
NEXT_PASS (pass_reassoc);
NEXT_PASS (pass_dce);
- NEXT_PASS (pass_dse);
NEXT_PASS (pass_forwprop);
NEXT_PASS (pass_phiopt);
NEXT_PASS (pass_object_sizes);
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 83d6afe..ca8c8cf 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,15 @@
+2008-08-13 Richard Guenther <rguenther@suse.de>
+
+ PR tree-optimization/15255
+ * gcc.dg/tree-ssa/reassoc-14.c: New testcase.
+ * gcc.dg/tree-ssa/reassoc-15.c: Likewise.
+ * gcc.dg/tree-ssa/reassoc-16.c: Likewise.
+ * gcc.dg/torture/reassoc-1.c: Likewise.
+ * gcc.dg/tree-ssa/recip-2.c: Adjust.
+ * gcc.dg/tree-ssa/recip-6.c: Likewise.
+ * gcc.dg/tree-ssa/recip-7.c: Likewise.
+ * gfortran.dg/reassoc_4.f: Likewise.
+
2008-08-12 Janis Johnson <janis187@us.ibm.com>
* gcc.target/i386/pr32000-2.c: Use dg-skip-if for target expression.
diff --git a/gcc/testsuite/gcc.dg/torture/reassoc-1.c b/gcc/testsuite/gcc.dg/torture/reassoc-1.c
new file mode 100644
index 0000000..f0c9014
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/reassoc-1.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+
+int x;
+
+int __attribute__((noinline))
+foo(int a, int b, int w)
+{
+ int tmp1 = a * w;
+ int tmp2 = b * w;
+ x = tmp1;
+ return tmp1 + tmp2;
+}
+
+extern void abort (void);
+
+int main()
+{
+ if (foo(1, 2, 3) != 9)
+ abort ();
+ if (x != 3)
+ abort ();
+ return 0;
+}
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-14.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-14.c
new file mode 100644
index 0000000..24141ef
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-14.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-reassoc1" } */
+
+int test1 (int x, int y, int z, int weight)
+{
+ int tmp1 = x * weight;
+ int tmp2 = y * weight;
+ int tmp3 = (x - y) * weight;
+ return tmp1 + (tmp2 + tmp3);
+}
+
+int test2 (int x, int y, int z, int weight)
+{
+ int tmp1 = x * weight;
+ int tmp2 = y * weight * weight;
+ int tmp3 = z * weight * weight * weight;
+ return tmp1 + tmp2 + tmp3;
+}
+
+/* There should be one multiplication left in test1 and three in test2. */
+
+/* { dg-final { scan-tree-dump-times "\\\*" 4 "reassoc1" } } */
+/* { dg-final { cleanup-tree-dump "reassoc1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-15.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-15.c
new file mode 100644
index 0000000..d9b74d2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-15.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-reassoc1" } */
+
+int test3 (int x, int y, int z, int weight, int w1, int w2, int w3)
+{
+ int wtmp1 = w1 * weight;
+ int wtmp2 = w2 * weight;
+ int wtmp3 = w3 * weight;
+ int tmp1 = x * wtmp1;
+ int tmp2 = y * wtmp2;
+ int tmp3 = z * wtmp3;
+ return tmp1 + tmp2 + tmp3;
+}
+
+/* The multiplication with weight should be un-distributed.
+ ??? This pattern is not recognized currently. */
+
+/* { dg-final { scan-tree-dump-times "\\\*" 4 "reassoc1" } } */
+/* { dg-final { cleanup-tree-dump "reassoc1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-16.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-16.c
new file mode 100644
index 0000000..4dd54a8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-16.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -fdump-tree-reassoc1" } */
+
+double test1 (double x, double y, double z, double weight)
+{
+ double tmp1 = x / weight;
+ double tmp2 = y / weight;
+ double tmp3 = -x / weight;
+ return tmp1 + tmp2 + tmp3;
+}
+
+/* The division should be un-distributed and all references to x should
+ be gone. */
+
+/* { dg-final { scan-tree-dump-times "/" 1 "reassoc1" } } */
+/* { dg-final { cleanup-tree-dump "reassoc1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-17.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-17.c
new file mode 100644
index 0000000..255c786
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-17.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -fdump-tree-reassoc1" } */
+
+double test2 (double x, double y, double ddj, int b)
+{
+ double tmp1, tmp2, sum;
+ sum = 0.0;
+ if (b)
+ sum = 1.0;
+ tmp1 = sum/ddj;
+ tmp2 = x/ddj;
+ return tmp1 + y + tmp2;
+}
+
+/* { dg-final { scan-tree-dump-times "/" 1 "reassoc1" } } */
+/* { dg-final { cleanup-tree-dump "reassoc1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-18.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-18.c
new file mode 100644
index 0000000..ce52cd0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-18.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-reassoc1" } */
+
+int
+ETree_nFactorEntriesInFront (int b, int m)
+{
+ int nent = b*b + 2*b*m;
+ return nent;
+}
+
+/* { dg-final { scan-tree-dump-times "\\\*" 2 "reassoc1" } } */
+/* { dg-final { cleanup-tree-dump "reassoc1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/recip-2.c b/gcc/testsuite/gcc.dg/tree-ssa/recip-2.c
index af62805..be75414 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/recip-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/recip-2.c
@@ -1,7 +1,9 @@
/* { dg-do compile } */
/* { dg-options "-O1 -funsafe-math-optimizations -fdump-tree-recip" } */
-float e(float a, float b, float c, float d, float e, float f)
+float u, v, w, x, y, z;
+
+void e(float a, float b, float c, float d, float e, float f)
{
if (a < b)
{
@@ -20,7 +22,12 @@ float e(float a, float b, float c, float d, float e, float f)
/* This should not be left as a multiplication. */
c = 1 / c;
- return a + b + c + d + e + f;
+ u = a;
+ v = b;
+ w = c;
+ x = d;
+ y = e;
+ z = f;
}
/* { dg-final { scan-tree-dump-times " / " 2 "recip" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/recip-6.c b/gcc/testsuite/gcc.dg/tree-ssa/recip-6.c
index 60fefd0..b3334fb 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/recip-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/recip-6.c
@@ -5,7 +5,9 @@
extern int f2();
-double f1(double y, double z, double w)
+double m, n, o;
+
+void f1(double y, double z, double w)
{
double b, c, d, e, f;
@@ -18,7 +20,9 @@ double f1(double y, double z, double w)
e = c / y;
f = 1 / y;
- return d + e + f;
+ m = d;
+ n = e;
+ o = f;
}
/* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/recip-7.c b/gcc/testsuite/gcc.dg/tree-ssa/recip-7.c
index af1bf3c..98bbdca 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/recip-7.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/recip-7.c
@@ -5,7 +5,9 @@
extern double h();
-double f(int x, double z, double w)
+double m, n, o;
+
+void f(int x, double z, double w)
{
double b, c, d, e, f;
double y = h ();
@@ -19,7 +21,9 @@ double f(int x, double z, double w)
e = c / y;
f = 1 / y;
- return d + e + f;
+ m = d;
+ n = e;
+ o = f;
}
/* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */
diff --git a/gcc/testsuite/gfortran.dg/reassoc_4.f b/gcc/testsuite/gfortran.dg/reassoc_4.f
new file mode 100644
index 0000000..7004365
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/reassoc_4.f
@@ -0,0 +1,43 @@
+! { dg-do compile }
+! { dg-options "-O3 -ffast-math -fdump-tree-reassoc1" }
+ subroutine anisonl(w,vo,anisox,s,ii1,jj1,weight)
+ integer ii1,jj1,i1,iii1,j1,jjj1,k1,l1,m1,n1
+ real*8 w(3,3),vo(3,3),anisox(3,3,3,3),s(60,60),weight
+!
+! This routine replaces the following lines in e_c3d.f for
+! an anisotropic material
+!
+ do i1=1,3
+ iii1=ii1+i1-1
+ do j1=1,3
+ jjj1=jj1+j1-1
+ do k1=1,3
+ do l1=1,3
+ s(iii1,jjj1)=s(iii1,jjj1)
+ & +anisox(i1,k1,j1,l1)*w(k1,l1)*weight
+ do m1=1,3
+ s(iii1,jjj1)=s(iii1,jjj1)
+ & +anisox(i1,k1,m1,l1)*w(k1,l1)
+ & *vo(j1,m1)*weight
+ & +anisox(m1,k1,j1,l1)*w(k1,l1)
+ & *vo(i1,m1)*weight
+ do n1=1,3
+ s(iii1,jjj1)=s(iii1,jjj1)
+ & +anisox(m1,k1,n1,l1)
+ & *w(k1,l1)*vo(i1,m1)*vo(j1,n1)*weight
+ enddo
+ enddo
+ enddo
+ enddo
+ enddo
+ enddo
+
+ return
+ end
+
+! There should be 22 multiplications left after un-distributing
+! weigth, w(k1,l1), vo(i1,m1) and vo(j1,m1) on the innermost two
+! unrolled loops.
+
+! { dg-final { scan-tree-dump-times "\[0-9\] \\\* " 22 "reassoc1" } }
+! { dg-final { cleanup-tree-dump "reassoc1" } }
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index b8247a0..1eedf75 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -2914,6 +2914,12 @@ stmt_dominates_stmt_p (gimple s1, gimple s2)
{
gimple_stmt_iterator bsi;
+ if (gimple_code (s2) == GIMPLE_PHI)
+ return false;
+
+ if (gimple_code (s1) == GIMPLE_PHI)
+ return true;
+
for (bsi = gsi_start_bb (bb1); gsi_stmt (bsi) != s2; gsi_next (&bsi))
if (gsi_stmt (bsi) == s1)
return true;
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index a3facd8..be68331 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c
@@ -727,6 +727,415 @@ eliminate_using_constants (enum tree_code opcode,
}
}
+
+static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
+ bool, bool);
+
+/* Structure for tracking and counting operands. */
+typedef struct oecount_s {
+ int cnt;
+ enum tree_code oecode;
+ tree op;
+} oecount;
+
+DEF_VEC_O(oecount);
+DEF_VEC_ALLOC_O(oecount,heap);
+
+/* The heap for the oecount hashtable and the sorted list of operands. */
+static VEC (oecount, heap) *cvec;
+
+/* Hash function for oecount. */
+
+static hashval_t
+oecount_hash (const void *p)
+{
+ const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42);
+ return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
+}
+
+/* Comparison function for oecount. */
+
+static int
+oecount_eq (const void *p1, const void *p2)
+{
+ const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42);
+ const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42);
+ return (c1->oecode == c2->oecode
+ && c1->op == c2->op);
+}
+
+/* Comparison function for qsort sorting oecount elements by count. */
+
+static int
+oecount_cmp (const void *p1, const void *p2)
+{
+ const oecount *c1 = (const oecount *)p1;
+ const oecount *c2 = (const oecount *)p2;
+ return c1->cnt - c2->cnt;
+}
+
+/* Walks the linear chain with result *DEF searching for an operation
+ with operand OP and code OPCODE removing that from the chain. *DEF
+ is updated if there is only one operand but no operation left. */
+
+static void
+zero_one_operation (tree *def, enum tree_code opcode, tree op)
+{
+ gimple stmt = SSA_NAME_DEF_STMT (*def);
+
+ do
+ {
+ tree name = gimple_assign_rhs1 (stmt);
+
+ /* If this is the operation we look for and one of the operands
+ is ours simply propagate the other operand into the stmts
+ single use. */
+ if (gimple_assign_rhs_code (stmt) == opcode
+ && (name == op
+ || gimple_assign_rhs2 (stmt) == op))
+ {
+ gimple use_stmt;
+ use_operand_p use;
+ gimple_stmt_iterator gsi;
+ if (name == op)
+ name = gimple_assign_rhs2 (stmt);
+ gcc_assert (has_single_use (gimple_assign_lhs (stmt)));
+ single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt);
+ if (gimple_assign_lhs (stmt) == *def)
+ *def = name;
+ SET_USE (use, name);
+ if (TREE_CODE (name) != SSA_NAME)
+ update_stmt (use_stmt);
+ gsi = gsi_for_stmt (stmt);
+ gsi_remove (&gsi, true);
+ release_defs (stmt);
+ return;
+ }
+
+ /* Continue walking the chain. */
+ gcc_assert (name != op
+ && TREE_CODE (name) == SSA_NAME);
+ stmt = SSA_NAME_DEF_STMT (name);
+ }
+ while (1);
+}
+
+/* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
+ the result. Places the statement after the definition of either
+ OP1 or OP2. Returns the new statement. */
+
+static gimple
+build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode)
+{
+ gimple op1def = NULL, op2def = NULL;
+ gimple_stmt_iterator gsi;
+ tree op;
+ gimple sum;
+
+ /* Create the addition statement. */
+ sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2);
+ op = make_ssa_name (tmpvar, sum);
+ gimple_assign_set_lhs (sum, op);
+
+ /* Find an insertion place and insert. */
+ if (TREE_CODE (op1) == SSA_NAME)
+ op1def = SSA_NAME_DEF_STMT (op1);
+ if (TREE_CODE (op2) == SSA_NAME)
+ op2def = SSA_NAME_DEF_STMT (op2);
+ if ((!op1def || gimple_nop_p (op1def))
+ && (!op2def || gimple_nop_p (op2def)))
+ {
+ gsi = gsi_start_bb (single_succ (ENTRY_BLOCK_PTR));
+ gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
+ }
+ else if ((!op1def || gimple_nop_p (op1def))
+ || (op2def && !gimple_nop_p (op2def)
+ && stmt_dominates_stmt_p (op1def, op2def)))
+ {
+ if (gimple_code (op2def) == GIMPLE_PHI)
+ {
+ gsi = gsi_start_bb (gimple_bb (op2def));
+ gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
+ }
+ else
+ {
+ gsi = gsi_for_stmt (op2def);
+ gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
+ }
+ }
+ else
+ {
+ if (gimple_code (op1def) == GIMPLE_PHI)
+ {
+ gsi = gsi_start_bb (gimple_bb (op1def));
+ gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
+ }
+ else
+ {
+ gsi = gsi_for_stmt (op1def);
+ gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
+ }
+ }
+ update_stmt (sum);
+
+ return sum;
+}
+
+/* Perform un-distribution of divisions and multiplications.
+ A * X + B * X is transformed into (A + B) * X and A / X + B / X
+ to (A + B) / X for real X.
+
+ The algorithm is organized as follows.
+
+ - First we walk the addition chain *OPS looking for summands that
+ are defined by a multiplication or a real division. This results
+ in the candidates bitmap with relevant indices into *OPS.
+
+ - Second we build the chains of multiplications or divisions for
+ these candidates, counting the number of occurences of (operand, code)
+ pairs in all of the candidates chains.
+
+ - Third we sort the (operand, code) pairs by number of occurence and
+ process them starting with the pair with the most uses.
+
+ * For each such pair we walk the candidates again to build a
+ second candidate bitmap noting all multiplication/division chains
+ that have at least one occurence of (operand, code).
+
+ * We build an alternate addition chain only covering these
+ candidates with one (operand, code) operation removed from their
+ multiplication/division chain.
+
+ * The first candidate gets replaced by the alternate addition chain
+ multiplied/divided by the operand.
+
+ * All candidate chains get disabled for further processing and
+ processing of (operand, code) pairs continues.
+
+ The alternate addition chains built are re-processed by the main
+ reassociation algorithm which allows optimizing a * x * y + b * y * x
+ to (a + b ) * x * y in one invocation of the reassociation pass. */
+
+static bool
+undistribute_ops_list (enum tree_code opcode,
+ VEC (operand_entry_t, heap) **ops, struct loop *loop)
+{
+ unsigned int length = VEC_length (operand_entry_t, *ops);
+ operand_entry_t oe1;
+ unsigned i, j;
+ sbitmap candidates, candidates2;
+ unsigned nr_candidates, nr_candidates2;
+ sbitmap_iterator sbi0;
+ VEC (operand_entry_t, heap) **subops;
+ htab_t ctable;
+ bool changed = false;
+
+ if (length <= 1
+ || opcode != PLUS_EXPR)
+ return false;
+
+ /* Build a list of candidates to process. */
+ candidates = sbitmap_alloc (length);
+ sbitmap_zero (candidates);
+ nr_candidates = 0;
+ for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe1); ++i)
+ {
+ enum tree_code dcode;
+ gimple oe1def;
+
+ if (TREE_CODE (oe1->op) != SSA_NAME)
+ continue;
+ oe1def = SSA_NAME_DEF_STMT (oe1->op);
+ if (!is_gimple_assign (oe1def))
+ continue;
+ dcode = gimple_assign_rhs_code (oe1def);
+ if ((dcode != MULT_EXPR
+ && dcode != RDIV_EXPR)
+ || !is_reassociable_op (oe1def, dcode, loop))
+ continue;
+
+ SET_BIT (candidates, i);
+ nr_candidates++;
+ }
+
+ if (nr_candidates < 2)
+ {
+ sbitmap_free (candidates);
+ return false;
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "searching for un-distribute opportunities ");
+ print_generic_expr (dump_file,
+ VEC_index (operand_entry_t, *ops,
+ sbitmap_first_set_bit (candidates))->op, 0);
+ fprintf (dump_file, " %d\n", nr_candidates);
+ }
+
+ /* Build linearized sub-operand lists and the counting table. */
+ cvec = NULL;
+ ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
+ subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
+ VEC_length (operand_entry_t, *ops));
+ EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
+ {
+ gimple oedef;
+ enum tree_code oecode;
+ unsigned j;
+
+ oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
+ oecode = gimple_assign_rhs_code (oedef);
+ linearize_expr_tree (&subops[i], oedef,
+ associative_tree_code (oecode), false);
+
+ for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j)
+ {
+ oecount c;
+ void **slot;
+ size_t idx;
+ c.oecode = oecode;
+ c.cnt = 1;
+ c.op = oe1->op;
+ VEC_safe_push (oecount, heap, cvec, &c);
+ idx = VEC_length (oecount, cvec) + 41;
+ slot = htab_find_slot (ctable, (void *)idx, INSERT);
+ if (!*slot)
+ {
+ *slot = (void *)idx;
+ }
+ else
+ {
+ VEC_pop (oecount, cvec);
+ VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++;
+ }
+ }
+ }
+ htab_delete (ctable);
+
+ /* Sort the counting table. */
+ qsort (VEC_address (oecount, cvec), VEC_length (oecount, cvec),
+ sizeof (oecount), oecount_cmp);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ oecount *c;
+ fprintf (dump_file, "Candidates:\n");
+ for (j = 0; VEC_iterate (oecount, cvec, j, c); ++j)
+ {
+ fprintf (dump_file, " %u %s: ", c->cnt,
+ c->oecode == MULT_EXPR
+ ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
+ print_generic_expr (dump_file, c->op, 0);
+ fprintf (dump_file, "\n");
+ }
+ }
+
+ /* Process the (operand, code) pairs in order of most occurence. */
+ candidates2 = sbitmap_alloc (length);
+ while (!VEC_empty (oecount, cvec))
+ {
+ oecount *c = VEC_last (oecount, cvec);
+ if (c->cnt < 2)
+ break;
+
+ /* Now collect the operands in the outer chain that contain
+ the common operand in their inner chain. */
+ sbitmap_zero (candidates2);
+ nr_candidates2 = 0;
+ EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
+ {
+ gimple oedef;
+ enum tree_code oecode;
+ unsigned j;
+ tree op = VEC_index (operand_entry_t, *ops, i)->op;
+
+ /* If we undistributed in this chain already this may be
+ a constant. */
+ if (TREE_CODE (op) != SSA_NAME)
+ continue;
+
+ oedef = SSA_NAME_DEF_STMT (op);
+ oecode = gimple_assign_rhs_code (oedef);
+ if (oecode != c->oecode)
+ continue;
+
+ for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j)
+ {
+ if (oe1->op == c->op)
+ {
+ SET_BIT (candidates2, i);
+ ++nr_candidates2;
+ break;
+ }
+ }
+ }
+
+ if (nr_candidates2 >= 2)
+ {
+ operand_entry_t oe1, oe2;
+ tree tmpvar;
+ gimple prod;
+ int first = sbitmap_first_set_bit (candidates2);
+
+ /* Build the new addition chain. */
+ oe1 = VEC_index (operand_entry_t, *ops, first);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Building (");
+ print_generic_expr (dump_file, oe1->op, 0);
+ }
+ tmpvar = create_tmp_var (TREE_TYPE (oe1->op), NULL);
+ add_referenced_var (tmpvar);
+ zero_one_operation (&oe1->op, c->oecode, c->op);
+ EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
+ {
+ gimple sum;
+ oe2 = VEC_index (operand_entry_t, *ops, i);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, " + ");
+ print_generic_expr (dump_file, oe2->op, 0);
+ }
+ zero_one_operation (&oe2->op, c->oecode, c->op);
+ sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode);
+ oe2->op = fold_convert (TREE_TYPE (oe2->op), integer_zero_node);
+ oe2->rank = 0;
+ oe1->op = gimple_get_lhs (sum);
+ }
+
+ /* Apply the multiplication/division. */
+ prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
+ print_generic_expr (dump_file, c->op, 0);
+ fprintf (dump_file, "\n");
+ }
+
+ /* Record it in the addition chain and disable further
+ undistribution with this op. */
+ oe1->op = gimple_assign_lhs (prod);
+ oe1->rank = get_rank (oe1->op);
+ VEC_free (operand_entry_t, heap, subops[first]);
+
+ changed = true;
+ }
+
+ VEC_pop (oecount, cvec);
+ }
+
+ for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
+ VEC_free (operand_entry_t, heap, subops[i]);
+ free (subops);
+ VEC_free (oecount, heap, cvec);
+ sbitmap_free (candidates);
+ sbitmap_free (candidates2);
+
+ return changed;
+}
+
+
/* Perform various identities and other optimizations on the list of
operand entries, stored in OPS. The tree code for the binary
operation between all the operands is OPCODE. */
@@ -1097,7 +1506,8 @@ should_break_up_subtract (gimple stmt)
if (TREE_CODE (lhs) == SSA_NAME
&& (immusestmt = get_single_immediate_use (lhs))
&& is_gimple_assign (immusestmt)
- && gimple_assign_rhs_code (immusestmt) == PLUS_EXPR)
+ && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
+ || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
return true;
return false;
}
@@ -1125,7 +1535,8 @@ break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
Place the operands of the expression tree in the vector named OPS. */
static void
-linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt)
+linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
+ bool is_associative, bool set_visited)
{
gimple_stmt_iterator gsinow, gsilhs;
tree binlhs = gimple_assign_rhs1 (stmt);
@@ -1136,7 +1547,8 @@ linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt)
enum tree_code rhscode = gimple_assign_rhs_code (stmt);
struct loop *loop = loop_containing_stmt (stmt);
- gimple_set_visited (stmt, true);
+ if (set_visited)
+ gimple_set_visited (stmt, true);
if (TREE_CODE (binlhs) == SSA_NAME)
{
@@ -1160,6 +1572,13 @@ linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt)
{
tree temp;
+ /* If this is not a associative operation like division, give up. */
+ if (!is_associative)
+ {
+ add_to_ops_vec (ops, binrhs);
+ return;
+ }
+
if (!binrhsisreassoc)
{
add_to_ops_vec (ops, binrhs);
@@ -1203,7 +1622,8 @@ linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt)
gsinow = gsi_for_stmt (stmt);
gsilhs = gsi_for_stmt (SSA_NAME_DEF_STMT (binlhs));
gsi_move_before (&gsilhs, &gsinow);
- linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs));
+ linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
+ is_associative, set_visited);
add_to_ops_vec (ops, binrhs);
}
@@ -1344,7 +1764,16 @@ reassociate_bb (basic_block bb)
/* If this was part of an already processed statement,
we don't need to touch it again. */
if (gimple_visited_p (stmt))
- continue;
+ {
+ /* This statement might have become dead because of previous
+ reassociations. */
+ if (has_zero_uses (gimple_get_lhs (stmt)))
+ {
+ gsi_remove (&gsi, true);
+ release_defs (stmt);
+ }
+ continue;
+ }
lhs = gimple_assign_lhs (stmt);
rhs1 = gimple_assign_rhs1 (stmt);
@@ -1375,12 +1804,21 @@ reassociate_bb (basic_block bb)
continue;
gimple_set_visited (stmt, true);
- linearize_expr_tree (&ops, stmt);
+ linearize_expr_tree (&ops, stmt, true, true);
qsort (VEC_address (operand_entry_t, ops),
VEC_length (operand_entry_t, ops),
sizeof (operand_entry_t),
sort_by_operand_rank);
optimize_ops_list (rhs_code, &ops);
+ if (undistribute_ops_list (rhs_code, &ops,
+ loop_containing_stmt (stmt)))
+ {
+ qsort (VEC_address (operand_entry_t, ops),
+ VEC_length (operand_entry_t, ops),
+ sizeof (operand_entry_t),
+ sort_by_operand_rank);
+ optimize_ops_list (rhs_code, &ops);
+ }
if (VEC_length (operand_entry_t, ops) == 1)
{