diff options
author | Jeff Law <law@redhat.com> | 2013-12-13 09:34:39 -0700 |
---|---|---|
committer | Jeff Law <law@gcc.gnu.org> | 2013-12-13 09:34:39 -0700 |
commit | 62c6e26e3326d13b47fba3b4375b261a8431d422 (patch) | |
tree | 73aedaaa5e23c1bfcf30818f587c63183516efb3 /gcc | |
parent | b0d97c338781ef524f1dbc44c4dffbe4b707ac19 (diff) | |
download | gcc-62c6e26e3326d13b47fba3b4375b261a8431d422.zip gcc-62c6e26e3326d13b47fba3b4375b261a8431d422.tar.gz gcc-62c6e26e3326d13b47fba3b4375b261a8431d422.tar.bz2 |
re PR tree-optimization/45685 (missed conditional move opportunity in loop)
PR tree-optimization/45685
* tree-ssa-phiopt.c (neg_replacement): New function.
(tree_ssa_phiopt_worker): Call it.
PR tree-optimization/45685
* gcc.dg/tree-ssa/pr45685.c: New test.
From-SVN: r205963
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/pr45685.c | 41 | ||||
-rw-r--r-- | gcc/tree-ssa-phiopt.c | 159 |
4 files changed, 211 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 493286b..3ee682b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2013-12-13 Jeff Law <law@redhat.com> + + PR tree-optimization/45685 + * tree-ssa-phiopt.c (neg_replacement): New function. + (tree_ssa_phiopt_worker): Call it. + 2013-12-13 Yuri Rumyantsev <ysrumyan@gmail.com> * config/i386/i386.c (slm_cost): Fix imul cost for HI. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 94e703b..70b1bc4 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2013-12-03 Jeff Law <law@redhat.com> + + PR tree-optimization/45685 + * gcc.dg/tree-ssa/pr45685.c: New test. + 2013-12-13 Bin Cheng <bin.cheng@arm.com> PR tree-optimization/58296 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c b/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c new file mode 100644 index 0000000..0628943 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr45685.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-phiopt1-details" } */ + +typedef unsigned long int uint64_t; +typedef long int int64_t; +int summation_helper_1(int64_t* products, uint64_t count) +{ + int s = 0; + uint64_t i; + for(i=0; i<count; i++) + { + int64_t val = (products[i]>0) ? 1 : -1; + products[i] *= val; + if(products[i] != i) + val = -val; + products[i] = val; + s += val; + } + return s; +} + + +int summation_helper_2(int64_t* products, uint64_t count) +{ + int s = 0; + uint64_t i; + for(i=0; i<count; i++) + { + int val = (products[i]>0) ? 1 : -1; + products[i] *= val; + if(products[i] != i) + val = -val; + products[i] = val; + s += val; + } + return s; +} + +/* { dg-final { scan-tree-dump-times "converted to straightline code" 2 "phiopt1" } } */ +/* { dg-final { cleanup-tree-dump "phiopt1" } } */ + diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c index 4c3fe50..b6bb7e9 100644 --- a/gcc/tree-ssa-phiopt.c +++ b/gcc/tree-ssa-phiopt.c @@ -69,6 +69,8 @@ static bool minmax_replacement (basic_block, basic_block, edge, edge, gimple, tree, tree); static bool abs_replacement (basic_block, basic_block, edge, edge, gimple, tree, tree); +static bool neg_replacement (basic_block, basic_block, + edge, edge, gimple, tree, tree); static bool cond_store_replacement (basic_block, basic_block, edge, edge, struct pointer_set_t *); static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block); @@ -336,6 +338,23 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads) /* Calculate the set of non-trapping memory accesses. */ nontrap = get_non_trapping (); + /* The replacement of conditional negation with a non-branching + sequence is really only a win when optimizing for speed and we + can avoid transformations by gimple if-conversion that result + in poor RTL generation. + + Ideally either gimple if-conversion or the RTL expanders will + be improved and the code to emit branchless conditional negation + can be removed. */ + bool replace_conditional_negation = false; + if (!do_store_elim) + replace_conditional_negation + = ((!optimize_size && optimize >= 2) + || (((flag_tree_loop_vectorize || cfun->has_force_vect_loops) + && flag_tree_loop_if_convert != 0) + || flag_tree_loop_if_convert == 1 + || flag_tree_loop_if_convert_stores == 1)); + /* Search every basic block for COND_EXPR we may be able to optimize. We walk the blocks in order that guarantees that a block with @@ -489,6 +508,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads) cfgchanged = true; else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) cfgchanged = true; + else if (replace_conditional_negation + && neg_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) + cfgchanged = true; else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) cfgchanged = true; } @@ -1285,6 +1307,143 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb, return true; } +/* The function neg_replacement replaces conditional negation with + equivalent straight line code. Returns TRUE if replacement is done, + otherwise returns FALSE. + + COND_BB branches around negation occuring in MIDDLE_BB. + + E0 and E1 are edges out of COND_BB. E0 reaches MIDDLE_BB and + E1 reaches the other successor which should contain PHI with + arguments ARG0 and ARG1. + + Assuming negation is to occur when the condition is true, + then the non-branching sequence is: + + result = (rhs ^ -cond) + cond + + Inverting the condition or its result gives us negation + when the original condition is false. */ + +static bool +neg_replacement (basic_block cond_bb, basic_block middle_bb, + edge e0 ATTRIBUTE_UNUSED, edge e1, + gimple phi, tree arg0, tree arg1) +{ + gimple new_stmt, cond; + gimple_stmt_iterator gsi; + gimple assign; + edge true_edge, false_edge; + tree rhs, lhs; + enum tree_code cond_code; + bool invert = false; + + /* This transformation performs logical operations on the + incoming arguments. So force them to be integral types. */ + if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0))) + return false; + + /* OTHER_BLOCK must have only one executable statement which must have the + form arg0 = -arg1 or arg1 = -arg0. */ + + assign = last_and_only_stmt (middle_bb); + /* If we did not find the proper negation assignment, then we can not + optimize. */ + if (assign == NULL) + return false; + + /* If we got here, then we have found the only executable statement + in OTHER_BLOCK. If it is anything other than arg0 = -arg1 or + arg1 = -arg0, then we can not optimize. */ + if (gimple_code (assign) != GIMPLE_ASSIGN) + return false; + + lhs = gimple_assign_lhs (assign); + + if (gimple_assign_rhs_code (assign) != NEGATE_EXPR) + return false; + + rhs = gimple_assign_rhs1 (assign); + + /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */ + if (!(lhs == arg0 && rhs == arg1) + && !(lhs == arg1 && rhs == arg0)) + return false; + + /* The basic sequence assumes we negate when the condition is true. + If we need the opposite, then we will either need to invert the + condition or its result. */ + extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge); + invert = false_edge->dest == middle_bb; + + /* Unlike abs_replacement, we can handle arbitrary conditionals here. */ + cond = last_stmt (cond_bb); + cond_code = gimple_cond_code (cond); + + /* If inversion is needed, first try to invert the test since + that's cheapest. */ + if (invert) + { + bool honor_nans + = HONOR_NANS (TYPE_MODE (TREE_TYPE (gimple_cond_lhs (cond)))); + enum tree_code new_code = invert_tree_comparison (cond_code, honor_nans); + + /* If invert_tree_comparison was successful, then use its return + value as the new code and note that inversion is no longer + needed. */ + if (new_code != ERROR_MARK) + { + cond_code = new_code; + invert = false; + } + } + + tree cond_val = make_ssa_name (boolean_type_node, NULL); + new_stmt = gimple_build_assign_with_ops (cond_code, cond_val, + gimple_cond_lhs (cond), + gimple_cond_rhs (cond)); + gsi = gsi_last_bb (cond_bb); + gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); + + /* If we still need inversion, then invert the result of the + condition. */ + if (invert) + { + tree tmp = make_ssa_name (boolean_type_node, NULL); + new_stmt = gimple_build_assign_with_ops (BIT_XOR_EXPR, tmp, + cond_val, boolean_true_node); + gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); + cond_val = tmp; + } + + /* Get the condition in the right type so that we can perform + logical and arithmetic operations on it. */ + tree cond_val_converted = make_ssa_name (TREE_TYPE (rhs), NULL); + new_stmt = gimple_build_assign_with_ops (NOP_EXPR, cond_val_converted, + cond_val, NULL_TREE); + gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); + + tree neg_cond_val_converted = make_ssa_name (TREE_TYPE (rhs), NULL); + new_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, neg_cond_val_converted, + cond_val_converted, NULL_TREE); + gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); + + tree tmp = make_ssa_name (TREE_TYPE (rhs), NULL); + new_stmt = gimple_build_assign_with_ops (BIT_XOR_EXPR, tmp, + rhs, neg_cond_val_converted); + gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); + + tree new_lhs = make_ssa_name (TREE_TYPE (rhs), NULL); + new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, new_lhs, + tmp, cond_val_converted); + gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); + + replace_phi_edge_with_variable (cond_bb, e1, phi, new_lhs); + + /* Note that we optimized this PHI. */ + return true; +} + /* Auxiliary functions to determine the set of memory accesses which can't trap because they are preceded by accesses to the same memory portion. We do that for MEM_REFs, so we only need to track |