diff options
author | Richard Guenther <rguenther@suse.de> | 2008-08-13 08:57:20 +0000 |
---|---|---|
committer | Richard Biener <rguenth@gcc.gnu.org> | 2008-08-13 08:57:20 +0000 |
commit | 25c6036a5bc49ba73bc3f5d2573cf2506b513a1f (patch) | |
tree | b8a064a0f6159ca9ca85a74e1d5e760278c524c7 /gcc | |
parent | 92464a8a7a6f5404b71b00acdd04cd2754d6b100 (diff) | |
download | gcc-25c6036a5bc49ba73bc3f5d2573cf2506b513a1f.zip gcc-25c6036a5bc49ba73bc3f5d2573cf2506b513a1f.tar.gz gcc-25c6036a5bc49ba73bc3f5d2573cf2506b513a1f.tar.bz2 |
re PR tree-optimization/15255 ([tree-ssa] a * 2 + a * 2 is not converted to a * 4)
2008-08-13 Richard Guenther <rguenther@suse.de>
PR tree-optimization/15255
* tree-ssa-reassoc.c (linearize_expr_tree): Declare.
(struct oecount_s): New struct and VEC types.
(cvec): New global.
(oecount_hash): New function.
(oecount_eq): Likewise.
(oecount_cmp): Likewise.
(zero_one_operation): New function.
(build_and_add_sum): Likewise.
(undistribute_ops_list): Perform un-distribution of multiplication
and division on the chain of summands.
(should_break_up_subtract): Also break up subtracts for factors.
(reassociate_bb): Delete dead visited statements.
Call undistribute_ops_list. Re-sort and optimize if it did something.
* passes.c (init_optimization_passes): Move DSE before
reassociation.
* tree-ssa-loop-niter.c (stmt_dominates_stmt_p): Correctly handle
PHI nodes.
* gcc.dg/tree-ssa/reassoc-14.c: New testcase.
* gcc.dg/tree-ssa/reassoc-15.c: Likewise.
* gcc.dg/tree-ssa/reassoc-16.c: Likewise.
* gcc.dg/torture/reassoc-1.c: Likewise.
* gcc.dg/tree-ssa/recip-2.c: Adjust.
* gcc.dg/tree-ssa/recip-6.c: Likewise.
* gcc.dg/tree-ssa/recip-7.c: Likewise.
* gfortran.dg/reassoc_4.f: Likewise.
From-SVN: r139048
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 21 | ||||
-rw-r--r-- | gcc/passes.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/torture/reassoc-1.c | 24 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/reassoc-14.c | 23 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/reassoc-15.c | 19 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/reassoc-16.c | 16 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/reassoc-17.c | 16 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/reassoc-18.c | 12 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/recip-2.c | 11 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/recip-6.c | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/recip-7.c | 8 | ||||
-rw-r--r-- | gcc/testsuite/gfortran.dg/reassoc_4.f | 43 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-niter.c | 6 | ||||
-rw-r--r-- | gcc/tree-ssa-reassoc.c | 450 |
15 files changed, 658 insertions, 13 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b22b949a..805d73d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,24 @@ +2008-08-13 Richard Guenther <rguenther@suse.de> + + PR tree-optimization/15255 + * tree-ssa-reassoc.c (linearize_expr_tree): Declare. + (struct oecount_s): New struct and VEC types. + (cvec): New global. + (oecount_hash): New function. + (oecount_eq): Likewise. + (oecount_cmp): Likewise. + (zero_one_operation): New function. + (build_and_add_sum): Likewise. + (undistribute_ops_list): Perform un-distribution of multiplication + and division on the chain of summands. + (should_break_up_subtract): Also break up subtracts for factors. + (reassociate_bb): Delete dead visited statements. + Call undistribute_ops_list. Re-sort and optimize if it did something. + * passes.c (init_optimization_passes): Move DSE before + reassociation. + * tree-ssa-loop-niter.c (stmt_dominates_stmt_p): Correctly handle + PHI nodes. + 2008-08-12 Janis Johnson <janis187@us.ibm.com> * doc/invoke.texi (-fipa-pta): Say the option is experimental. diff --git a/gcc/passes.c b/gcc/passes.c index 64470ba..ee3826b 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -633,9 +633,9 @@ init_optimization_passes (void) only examines PHIs to discover const/copy propagation opportunities. */ NEXT_PASS (pass_phi_only_cprop); + NEXT_PASS (pass_dse); NEXT_PASS (pass_reassoc); NEXT_PASS (pass_dce); - NEXT_PASS (pass_dse); NEXT_PASS (pass_forwprop); NEXT_PASS (pass_phiopt); NEXT_PASS (pass_object_sizes); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 83d6afe..ca8c8cf 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,15 @@ +2008-08-13 Richard Guenther <rguenther@suse.de> + + PR tree-optimization/15255 + * gcc.dg/tree-ssa/reassoc-14.c: New testcase. + * gcc.dg/tree-ssa/reassoc-15.c: Likewise. + * gcc.dg/tree-ssa/reassoc-16.c: Likewise. + * gcc.dg/torture/reassoc-1.c: Likewise. + * gcc.dg/tree-ssa/recip-2.c: Adjust. + * gcc.dg/tree-ssa/recip-6.c: Likewise. + * gcc.dg/tree-ssa/recip-7.c: Likewise. + * gfortran.dg/reassoc_4.f: Likewise. + 2008-08-12 Janis Johnson <janis187@us.ibm.com> * gcc.target/i386/pr32000-2.c: Use dg-skip-if for target expression. diff --git a/gcc/testsuite/gcc.dg/torture/reassoc-1.c b/gcc/testsuite/gcc.dg/torture/reassoc-1.c new file mode 100644 index 0000000..f0c9014 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/reassoc-1.c @@ -0,0 +1,24 @@ +/* { dg-do run } */ + +int x; + +int __attribute__((noinline)) +foo(int a, int b, int w) +{ + int tmp1 = a * w; + int tmp2 = b * w; + x = tmp1; + return tmp1 + tmp2; +} + +extern void abort (void); + +int main() +{ + if (foo(1, 2, 3) != 9) + abort (); + if (x != 3) + abort (); + return 0; +} + diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-14.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-14.c new file mode 100644 index 0000000..24141ef --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-14.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-reassoc1" } */ + +int test1 (int x, int y, int z, int weight) +{ + int tmp1 = x * weight; + int tmp2 = y * weight; + int tmp3 = (x - y) * weight; + return tmp1 + (tmp2 + tmp3); +} + +int test2 (int x, int y, int z, int weight) +{ + int tmp1 = x * weight; + int tmp2 = y * weight * weight; + int tmp3 = z * weight * weight * weight; + return tmp1 + tmp2 + tmp3; +} + +/* There should be one multiplication left in test1 and three in test2. */ + +/* { dg-final { scan-tree-dump-times "\\\*" 4 "reassoc1" } } */ +/* { dg-final { cleanup-tree-dump "reassoc1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-15.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-15.c new file mode 100644 index 0000000..d9b74d2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-15.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-reassoc1" } */ + +int test3 (int x, int y, int z, int weight, int w1, int w2, int w3) +{ + int wtmp1 = w1 * weight; + int wtmp2 = w2 * weight; + int wtmp3 = w3 * weight; + int tmp1 = x * wtmp1; + int tmp2 = y * wtmp2; + int tmp3 = z * wtmp3; + return tmp1 + tmp2 + tmp3; +} + +/* The multiplication with weight should be un-distributed. + ??? This pattern is not recognized currently. */ + +/* { dg-final { scan-tree-dump-times "\\\*" 4 "reassoc1" } } */ +/* { dg-final { cleanup-tree-dump "reassoc1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-16.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-16.c new file mode 100644 index 0000000..4dd54a8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-16.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffast-math -fdump-tree-reassoc1" } */ + +double test1 (double x, double y, double z, double weight) +{ + double tmp1 = x / weight; + double tmp2 = y / weight; + double tmp3 = -x / weight; + return tmp1 + tmp2 + tmp3; +} + +/* The division should be un-distributed and all references to x should + be gone. */ + +/* { dg-final { scan-tree-dump-times "/" 1 "reassoc1" } } */ +/* { dg-final { cleanup-tree-dump "reassoc1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-17.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-17.c new file mode 100644 index 0000000..255c786 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-17.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffast-math -fdump-tree-reassoc1" } */ + +double test2 (double x, double y, double ddj, int b) +{ + double tmp1, tmp2, sum; + sum = 0.0; + if (b) + sum = 1.0; + tmp1 = sum/ddj; + tmp2 = x/ddj; + return tmp1 + y + tmp2; +} + +/* { dg-final { scan-tree-dump-times "/" 1 "reassoc1" } } */ +/* { dg-final { cleanup-tree-dump "reassoc1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/reassoc-18.c b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-18.c new file mode 100644 index 0000000..ce52cd0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-18.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-reassoc1" } */ + +int +ETree_nFactorEntriesInFront (int b, int m) +{ + int nent = b*b + 2*b*m; + return nent; +} + +/* { dg-final { scan-tree-dump-times "\\\*" 2 "reassoc1" } } */ +/* { dg-final { cleanup-tree-dump "reassoc1" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/recip-2.c b/gcc/testsuite/gcc.dg/tree-ssa/recip-2.c index af62805..be75414 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/recip-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/recip-2.c @@ -1,7 +1,9 @@ /* { dg-do compile } */ /* { dg-options "-O1 -funsafe-math-optimizations -fdump-tree-recip" } */ -float e(float a, float b, float c, float d, float e, float f) +float u, v, w, x, y, z; + +void e(float a, float b, float c, float d, float e, float f) { if (a < b) { @@ -20,7 +22,12 @@ float e(float a, float b, float c, float d, float e, float f) /* This should not be left as a multiplication. */ c = 1 / c; - return a + b + c + d + e + f; + u = a; + v = b; + w = c; + x = d; + y = e; + z = f; } /* { dg-final { scan-tree-dump-times " / " 2 "recip" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/recip-6.c b/gcc/testsuite/gcc.dg/tree-ssa/recip-6.c index 60fefd0..b3334fb 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/recip-6.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/recip-6.c @@ -5,7 +5,9 @@ extern int f2(); -double f1(double y, double z, double w) +double m, n, o; + +void f1(double y, double z, double w) { double b, c, d, e, f; @@ -18,7 +20,9 @@ double f1(double y, double z, double w) e = c / y; f = 1 / y; - return d + e + f; + m = d; + n = e; + o = f; } /* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/recip-7.c b/gcc/testsuite/gcc.dg/tree-ssa/recip-7.c index af1bf3c..98bbdca 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/recip-7.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/recip-7.c @@ -5,7 +5,9 @@ extern double h(); -double f(int x, double z, double w) +double m, n, o; + +void f(int x, double z, double w) { double b, c, d, e, f; double y = h (); @@ -19,7 +21,9 @@ double f(int x, double z, double w) e = c / y; f = 1 / y; - return d + e + f; + m = d; + n = e; + o = f; } /* { dg-final { scan-tree-dump-times " / " 1 "recip" } } */ diff --git a/gcc/testsuite/gfortran.dg/reassoc_4.f b/gcc/testsuite/gfortran.dg/reassoc_4.f new file mode 100644 index 0000000..7004365 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/reassoc_4.f @@ -0,0 +1,43 @@ +! { dg-do compile } +! { dg-options "-O3 -ffast-math -fdump-tree-reassoc1" } + subroutine anisonl(w,vo,anisox,s,ii1,jj1,weight) + integer ii1,jj1,i1,iii1,j1,jjj1,k1,l1,m1,n1 + real*8 w(3,3),vo(3,3),anisox(3,3,3,3),s(60,60),weight +! +! This routine replaces the following lines in e_c3d.f for +! an anisotropic material +! + do i1=1,3 + iii1=ii1+i1-1 + do j1=1,3 + jjj1=jj1+j1-1 + do k1=1,3 + do l1=1,3 + s(iii1,jjj1)=s(iii1,jjj1) + & +anisox(i1,k1,j1,l1)*w(k1,l1)*weight + do m1=1,3 + s(iii1,jjj1)=s(iii1,jjj1) + & +anisox(i1,k1,m1,l1)*w(k1,l1) + & *vo(j1,m1)*weight + & +anisox(m1,k1,j1,l1)*w(k1,l1) + & *vo(i1,m1)*weight + do n1=1,3 + s(iii1,jjj1)=s(iii1,jjj1) + & +anisox(m1,k1,n1,l1) + & *w(k1,l1)*vo(i1,m1)*vo(j1,n1)*weight + enddo + enddo + enddo + enddo + enddo + enddo + + return + end + +! There should be 22 multiplications left after un-distributing +! weigth, w(k1,l1), vo(i1,m1) and vo(j1,m1) on the innermost two +! unrolled loops. + +! { dg-final { scan-tree-dump-times "\[0-9\] \\\* " 22 "reassoc1" } } +! { dg-final { cleanup-tree-dump "reassoc1" } } diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c index b8247a0..1eedf75 100644 --- a/gcc/tree-ssa-loop-niter.c +++ b/gcc/tree-ssa-loop-niter.c @@ -2914,6 +2914,12 @@ stmt_dominates_stmt_p (gimple s1, gimple s2) { gimple_stmt_iterator bsi; + if (gimple_code (s2) == GIMPLE_PHI) + return false; + + if (gimple_code (s1) == GIMPLE_PHI) + return true; + for (bsi = gsi_start_bb (bb1); gsi_stmt (bsi) != s2; gsi_next (&bsi)) if (gsi_stmt (bsi) == s1) return true; diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c index a3facd8..be68331 100644 --- a/gcc/tree-ssa-reassoc.c +++ b/gcc/tree-ssa-reassoc.c @@ -727,6 +727,415 @@ eliminate_using_constants (enum tree_code opcode, } } + +static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple, + bool, bool); + +/* Structure for tracking and counting operands. */ +typedef struct oecount_s { + int cnt; + enum tree_code oecode; + tree op; +} oecount; + +DEF_VEC_O(oecount); +DEF_VEC_ALLOC_O(oecount,heap); + +/* The heap for the oecount hashtable and the sorted list of operands. */ +static VEC (oecount, heap) *cvec; + +/* Hash function for oecount. */ + +static hashval_t +oecount_hash (const void *p) +{ + const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42); + return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; +} + +/* Comparison function for oecount. */ + +static int +oecount_eq (const void *p1, const void *p2) +{ + const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42); + const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42); + return (c1->oecode == c2->oecode + && c1->op == c2->op); +} + +/* Comparison function for qsort sorting oecount elements by count. */ + +static int +oecount_cmp (const void *p1, const void *p2) +{ + const oecount *c1 = (const oecount *)p1; + const oecount *c2 = (const oecount *)p2; + return c1->cnt - c2->cnt; +} + +/* Walks the linear chain with result *DEF searching for an operation + with operand OP and code OPCODE removing that from the chain. *DEF + is updated if there is only one operand but no operation left. */ + +static void +zero_one_operation (tree *def, enum tree_code opcode, tree op) +{ + gimple stmt = SSA_NAME_DEF_STMT (*def); + + do + { + tree name = gimple_assign_rhs1 (stmt); + + /* If this is the operation we look for and one of the operands + is ours simply propagate the other operand into the stmts + single use. */ + if (gimple_assign_rhs_code (stmt) == opcode + && (name == op + || gimple_assign_rhs2 (stmt) == op)) + { + gimple use_stmt; + use_operand_p use; + gimple_stmt_iterator gsi; + if (name == op) + name = gimple_assign_rhs2 (stmt); + gcc_assert (has_single_use (gimple_assign_lhs (stmt))); + single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt); + if (gimple_assign_lhs (stmt) == *def) + *def = name; + SET_USE (use, name); + if (TREE_CODE (name) != SSA_NAME) + update_stmt (use_stmt); + gsi = gsi_for_stmt (stmt); + gsi_remove (&gsi, true); + release_defs (stmt); + return; + } + + /* Continue walking the chain. */ + gcc_assert (name != op + && TREE_CODE (name) == SSA_NAME); + stmt = SSA_NAME_DEF_STMT (name); + } + while (1); +} + +/* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for + the result. Places the statement after the definition of either + OP1 or OP2. Returns the new statement. */ + +static gimple +build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode) +{ + gimple op1def = NULL, op2def = NULL; + gimple_stmt_iterator gsi; + tree op; + gimple sum; + + /* Create the addition statement. */ + sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2); + op = make_ssa_name (tmpvar, sum); + gimple_assign_set_lhs (sum, op); + + /* Find an insertion place and insert. */ + if (TREE_CODE (op1) == SSA_NAME) + op1def = SSA_NAME_DEF_STMT (op1); + if (TREE_CODE (op2) == SSA_NAME) + op2def = SSA_NAME_DEF_STMT (op2); + if ((!op1def || gimple_nop_p (op1def)) + && (!op2def || gimple_nop_p (op2def))) + { + gsi = gsi_start_bb (single_succ (ENTRY_BLOCK_PTR)); + gsi_insert_before (&gsi, sum, GSI_NEW_STMT); + } + else if ((!op1def || gimple_nop_p (op1def)) + || (op2def && !gimple_nop_p (op2def) + && stmt_dominates_stmt_p (op1def, op2def))) + { + if (gimple_code (op2def) == GIMPLE_PHI) + { + gsi = gsi_start_bb (gimple_bb (op2def)); + gsi_insert_before (&gsi, sum, GSI_NEW_STMT); + } + else + { + gsi = gsi_for_stmt (op2def); + gsi_insert_after (&gsi, sum, GSI_NEW_STMT); + } + } + else + { + if (gimple_code (op1def) == GIMPLE_PHI) + { + gsi = gsi_start_bb (gimple_bb (op1def)); + gsi_insert_before (&gsi, sum, GSI_NEW_STMT); + } + else + { + gsi = gsi_for_stmt (op1def); + gsi_insert_after (&gsi, sum, GSI_NEW_STMT); + } + } + update_stmt (sum); + + return sum; +} + +/* Perform un-distribution of divisions and multiplications. + A * X + B * X is transformed into (A + B) * X and A / X + B / X + to (A + B) / X for real X. + + The algorithm is organized as follows. + + - First we walk the addition chain *OPS looking for summands that + are defined by a multiplication or a real division. This results + in the candidates bitmap with relevant indices into *OPS. + + - Second we build the chains of multiplications or divisions for + these candidates, counting the number of occurences of (operand, code) + pairs in all of the candidates chains. + + - Third we sort the (operand, code) pairs by number of occurence and + process them starting with the pair with the most uses. + + * For each such pair we walk the candidates again to build a + second candidate bitmap noting all multiplication/division chains + that have at least one occurence of (operand, code). + + * We build an alternate addition chain only covering these + candidates with one (operand, code) operation removed from their + multiplication/division chain. + + * The first candidate gets replaced by the alternate addition chain + multiplied/divided by the operand. + + * All candidate chains get disabled for further processing and + processing of (operand, code) pairs continues. + + The alternate addition chains built are re-processed by the main + reassociation algorithm which allows optimizing a * x * y + b * y * x + to (a + b ) * x * y in one invocation of the reassociation pass. */ + +static bool +undistribute_ops_list (enum tree_code opcode, + VEC (operand_entry_t, heap) **ops, struct loop *loop) +{ + unsigned int length = VEC_length (operand_entry_t, *ops); + operand_entry_t oe1; + unsigned i, j; + sbitmap candidates, candidates2; + unsigned nr_candidates, nr_candidates2; + sbitmap_iterator sbi0; + VEC (operand_entry_t, heap) **subops; + htab_t ctable; + bool changed = false; + + if (length <= 1 + || opcode != PLUS_EXPR) + return false; + + /* Build a list of candidates to process. */ + candidates = sbitmap_alloc (length); + sbitmap_zero (candidates); + nr_candidates = 0; + for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe1); ++i) + { + enum tree_code dcode; + gimple oe1def; + + if (TREE_CODE (oe1->op) != SSA_NAME) + continue; + oe1def = SSA_NAME_DEF_STMT (oe1->op); + if (!is_gimple_assign (oe1def)) + continue; + dcode = gimple_assign_rhs_code (oe1def); + if ((dcode != MULT_EXPR + && dcode != RDIV_EXPR) + || !is_reassociable_op (oe1def, dcode, loop)) + continue; + + SET_BIT (candidates, i); + nr_candidates++; + } + + if (nr_candidates < 2) + { + sbitmap_free (candidates); + return false; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "searching for un-distribute opportunities "); + print_generic_expr (dump_file, + VEC_index (operand_entry_t, *ops, + sbitmap_first_set_bit (candidates))->op, 0); + fprintf (dump_file, " %d\n", nr_candidates); + } + + /* Build linearized sub-operand lists and the counting table. */ + cvec = NULL; + ctable = htab_create (15, oecount_hash, oecount_eq, NULL); + subops = XCNEWVEC (VEC (operand_entry_t, heap) *, + VEC_length (operand_entry_t, *ops)); + EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0) + { + gimple oedef; + enum tree_code oecode; + unsigned j; + + oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op); + oecode = gimple_assign_rhs_code (oedef); + linearize_expr_tree (&subops[i], oedef, + associative_tree_code (oecode), false); + + for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j) + { + oecount c; + void **slot; + size_t idx; + c.oecode = oecode; + c.cnt = 1; + c.op = oe1->op; + VEC_safe_push (oecount, heap, cvec, &c); + idx = VEC_length (oecount, cvec) + 41; + slot = htab_find_slot (ctable, (void *)idx, INSERT); + if (!*slot) + { + *slot = (void *)idx; + } + else + { + VEC_pop (oecount, cvec); + VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++; + } + } + } + htab_delete (ctable); + + /* Sort the counting table. */ + qsort (VEC_address (oecount, cvec), VEC_length (oecount, cvec), + sizeof (oecount), oecount_cmp); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + oecount *c; + fprintf (dump_file, "Candidates:\n"); + for (j = 0; VEC_iterate (oecount, cvec, j, c); ++j) + { + fprintf (dump_file, " %u %s: ", c->cnt, + c->oecode == MULT_EXPR + ? "*" : c->oecode == RDIV_EXPR ? "/" : "?"); + print_generic_expr (dump_file, c->op, 0); + fprintf (dump_file, "\n"); + } + } + + /* Process the (operand, code) pairs in order of most occurence. */ + candidates2 = sbitmap_alloc (length); + while (!VEC_empty (oecount, cvec)) + { + oecount *c = VEC_last (oecount, cvec); + if (c->cnt < 2) + break; + + /* Now collect the operands in the outer chain that contain + the common operand in their inner chain. */ + sbitmap_zero (candidates2); + nr_candidates2 = 0; + EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0) + { + gimple oedef; + enum tree_code oecode; + unsigned j; + tree op = VEC_index (operand_entry_t, *ops, i)->op; + + /* If we undistributed in this chain already this may be + a constant. */ + if (TREE_CODE (op) != SSA_NAME) + continue; + + oedef = SSA_NAME_DEF_STMT (op); + oecode = gimple_assign_rhs_code (oedef); + if (oecode != c->oecode) + continue; + + for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j) + { + if (oe1->op == c->op) + { + SET_BIT (candidates2, i); + ++nr_candidates2; + break; + } + } + } + + if (nr_candidates2 >= 2) + { + operand_entry_t oe1, oe2; + tree tmpvar; + gimple prod; + int first = sbitmap_first_set_bit (candidates2); + + /* Build the new addition chain. */ + oe1 = VEC_index (operand_entry_t, *ops, first); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Building ("); + print_generic_expr (dump_file, oe1->op, 0); + } + tmpvar = create_tmp_var (TREE_TYPE (oe1->op), NULL); + add_referenced_var (tmpvar); + zero_one_operation (&oe1->op, c->oecode, c->op); + EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0) + { + gimple sum; + oe2 = VEC_index (operand_entry_t, *ops, i); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " + "); + print_generic_expr (dump_file, oe2->op, 0); + } + zero_one_operation (&oe2->op, c->oecode, c->op); + sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode); + oe2->op = fold_convert (TREE_TYPE (oe2->op), integer_zero_node); + oe2->rank = 0; + oe1->op = gimple_get_lhs (sum); + } + + /* Apply the multiplication/division. */ + prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/"); + print_generic_expr (dump_file, c->op, 0); + fprintf (dump_file, "\n"); + } + + /* Record it in the addition chain and disable further + undistribution with this op. */ + oe1->op = gimple_assign_lhs (prod); + oe1->rank = get_rank (oe1->op); + VEC_free (operand_entry_t, heap, subops[first]); + + changed = true; + } + + VEC_pop (oecount, cvec); + } + + for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i) + VEC_free (operand_entry_t, heap, subops[i]); + free (subops); + VEC_free (oecount, heap, cvec); + sbitmap_free (candidates); + sbitmap_free (candidates2); + + return changed; +} + + /* Perform various identities and other optimizations on the list of operand entries, stored in OPS. The tree code for the binary operation between all the operands is OPCODE. */ @@ -1097,7 +1506,8 @@ should_break_up_subtract (gimple stmt) if (TREE_CODE (lhs) == SSA_NAME && (immusestmt = get_single_immediate_use (lhs)) && is_gimple_assign (immusestmt) - && gimple_assign_rhs_code (immusestmt) == PLUS_EXPR) + && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR + || gimple_assign_rhs_code (immusestmt) == MULT_EXPR)) return true; return false; } @@ -1125,7 +1535,8 @@ break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip) Place the operands of the expression tree in the vector named OPS. */ static void -linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt) +linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt, + bool is_associative, bool set_visited) { gimple_stmt_iterator gsinow, gsilhs; tree binlhs = gimple_assign_rhs1 (stmt); @@ -1136,7 +1547,8 @@ linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt) enum tree_code rhscode = gimple_assign_rhs_code (stmt); struct loop *loop = loop_containing_stmt (stmt); - gimple_set_visited (stmt, true); + if (set_visited) + gimple_set_visited (stmt, true); if (TREE_CODE (binlhs) == SSA_NAME) { @@ -1160,6 +1572,13 @@ linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt) { tree temp; + /* If this is not a associative operation like division, give up. */ + if (!is_associative) + { + add_to_ops_vec (ops, binrhs); + return; + } + if (!binrhsisreassoc) { add_to_ops_vec (ops, binrhs); @@ -1203,7 +1622,8 @@ linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt) gsinow = gsi_for_stmt (stmt); gsilhs = gsi_for_stmt (SSA_NAME_DEF_STMT (binlhs)); gsi_move_before (&gsilhs, &gsinow); - linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs)); + linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs), + is_associative, set_visited); add_to_ops_vec (ops, binrhs); } @@ -1344,7 +1764,16 @@ reassociate_bb (basic_block bb) /* If this was part of an already processed statement, we don't need to touch it again. */ if (gimple_visited_p (stmt)) - continue; + { + /* This statement might have become dead because of previous + reassociations. */ + if (has_zero_uses (gimple_get_lhs (stmt))) + { + gsi_remove (&gsi, true); + release_defs (stmt); + } + continue; + } lhs = gimple_assign_lhs (stmt); rhs1 = gimple_assign_rhs1 (stmt); @@ -1375,12 +1804,21 @@ reassociate_bb (basic_block bb) continue; gimple_set_visited (stmt, true); - linearize_expr_tree (&ops, stmt); + linearize_expr_tree (&ops, stmt, true, true); qsort (VEC_address (operand_entry_t, ops), VEC_length (operand_entry_t, ops), sizeof (operand_entry_t), sort_by_operand_rank); optimize_ops_list (rhs_code, &ops); + if (undistribute_ops_list (rhs_code, &ops, + loop_containing_stmt (stmt))) + { + qsort (VEC_address (operand_entry_t, ops), + VEC_length (operand_entry_t, ops), + sizeof (operand_entry_t), + sort_by_operand_rank); + optimize_ops_list (rhs_code, &ops); + } if (VEC_length (operand_entry_t, ops) == 1) { |