aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-ssa-ccp.c
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2016-05-03 13:37:25 +0200
committerJakub Jelinek <jakub@gcc.gnu.org>2016-05-03 13:37:25 +0200
commitadedd5c173388ae505470df152b9cb3947339566 (patch)
tree858576e93abb78b087773e895b143546903f49f2 /gcc/tree-ssa-ccp.c
parent50891606a95368edd688fa9dc73003b1dfd68983 (diff)
downloadgcc-adedd5c173388ae505470df152b9cb3947339566.zip
gcc-adedd5c173388ae505470df152b9cb3947339566.tar.gz
gcc-adedd5c173388ae505470df152b9cb3947339566.tar.bz2
re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc')
PR target/49244 * tree-ssa-ccp.c: Include stor-layout.h and optabs-query.h. (optimize_atomic_bit_test_and): New function. (pass_fold_builtins::execute): Use it. * optabs.def (atomic_bit_test_and_set_optab, atomic_bit_test_and_complement_optab, atomic_bit_test_and_reset_optab): New optabs. * internal-fn.def (ATOMIC_BIT_TEST_AND_SET, ATOMIC_BIT_TEST_AND_COMPLEMENT, ATOMIC_BIT_TEST_AND_RESET): New ifns. * builtins.h (expand_ifn_atomic_bit_test_and): New prototype. * builtins.c (expand_ifn_atomic_bit_test_and): New function. * internal-fn.c (expand_ATOMIC_BIT_TEST_AND_SET, expand_ATOMIC_BIT_TEST_AND_COMPLEMENT, expand_ATOMIC_BIT_TEST_AND_RESET): New functions. * doc/md.texi (atomic_bit_test_and_set@var{mode}, atomic_bit_test_and_complement@var{mode}, atomic_bit_test_and_reset@var{mode}): Document. * config/i386/sync.md (atomic_bit_test_and_set<mode>, atomic_bit_test_and_complement<mode>, atomic_bit_test_and_reset<mode>): New expanders. (atomic_bit_test_and_set<mode>_1, atomic_bit_test_and_complement<mode>_1, atomic_bit_test_and_reset<mode>_1): New insns. * gcc.target/i386/pr49244-1.c: New test. * gcc.target/i386/pr49244-2.c: New test. From-SVN: r235813
Diffstat (limited to 'gcc/tree-ssa-ccp.c')
-rw-r--r--gcc/tree-ssa-ccp.c292
1 files changed, 292 insertions, 0 deletions
diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
index baae03f..c4e27f1 100644
--- a/gcc/tree-ssa-ccp.c
+++ b/gcc/tree-ssa-ccp.c
@@ -140,6 +140,8 @@ along with GCC; see the file COPYING3. If not see
#include "builtins.h"
#include "tree-chkp.h"
#include "cfgloop.h"
+#include "stor-layout.h"
+#include "optabs-query.h"
/* Possible lattice values. */
@@ -2697,6 +2699,224 @@ optimize_unreachable (gimple_stmt_iterator i)
return ret;
}
+/* Optimize
+ mask_2 = 1 << cnt_1;
+ _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
+ _5 = _4 & mask_2;
+ to
+ _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
+ _5 = _4;
+ If _5 is only used in _5 != 0 or _5 == 0 comparisons, 1
+ is passed instead of 0, and the builtin just returns a zero
+ or 1 value instead of the actual bit.
+ Similarly for __sync_fetch_and_or_* (without the ", _3" part
+ in there), and/or if mask_2 is a power of 2 constant.
+ Similarly for xor instead of or, use ATOMIC_BIT_TEST_AND_COMPLEMENT
+ in that case. And similarly for and instead of or, except that
+ the second argument to the builtin needs to be one's complement
+ of the mask instead of mask. */
+
+static void
+optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
+ enum internal_fn fn, bool has_model_arg,
+ bool after)
+{
+ gimple *call = gsi_stmt (*gsip);
+ tree lhs = gimple_call_lhs (call);
+ use_operand_p use_p;
+ gimple *use_stmt;
+ tree mask, bit;
+ optab optab;
+
+ if (!flag_inline_atomics
+ || optimize_debug
+ || !gimple_call_builtin_p (call, BUILT_IN_NORMAL)
+ || !lhs
+ || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
+ || !single_imm_use (lhs, &use_p, &use_stmt)
+ || !is_gimple_assign (use_stmt)
+ || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
+ || !gimple_vdef (call))
+ return;
+
+ switch (fn)
+ {
+ case IFN_ATOMIC_BIT_TEST_AND_SET:
+ optab = atomic_bit_test_and_set_optab;
+ break;
+ case IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT:
+ optab = atomic_bit_test_and_complement_optab;
+ break;
+ case IFN_ATOMIC_BIT_TEST_AND_RESET:
+ optab = atomic_bit_test_and_reset_optab;
+ break;
+ default:
+ return;
+ }
+
+ if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing)
+ return;
+
+ mask = gimple_call_arg (call, 1);
+ tree use_lhs = gimple_assign_lhs (use_stmt);
+ if (!use_lhs)
+ return;
+
+ if (TREE_CODE (mask) == INTEGER_CST)
+ {
+ if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+ mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
+ mask = fold_convert (TREE_TYPE (lhs), mask);
+ int ibit = tree_log2 (mask);
+ if (ibit < 0)
+ return;
+ bit = build_int_cst (TREE_TYPE (lhs), ibit);
+ }
+ else if (TREE_CODE (mask) == SSA_NAME)
+ {
+ gimple *g = SSA_NAME_DEF_STMT (mask);
+ if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
+ {
+ if (!is_gimple_assign (g)
+ || gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
+ return;
+ mask = gimple_assign_rhs1 (g);
+ if (TREE_CODE (mask) != SSA_NAME)
+ return;
+ g = SSA_NAME_DEF_STMT (mask);
+ }
+ if (!is_gimple_assign (g)
+ || gimple_assign_rhs_code (g) != LSHIFT_EXPR
+ || !integer_onep (gimple_assign_rhs1 (g)))
+ return;
+ bit = gimple_assign_rhs2 (g);
+ }
+ else
+ return;
+
+ if (gimple_assign_rhs1 (use_stmt) == lhs)
+ {
+ if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
+ return;
+ }
+ else if (gimple_assign_rhs2 (use_stmt) != lhs
+ || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0))
+ return;
+
+ bool use_bool = true;
+ bool has_debug_uses = false;
+ imm_use_iterator iter;
+ gimple *g;
+
+ if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
+ use_bool = false;
+ FOR_EACH_IMM_USE_STMT (g, iter, use_lhs)
+ {
+ enum tree_code code = ERROR_MARK;
+ tree op0, op1;
+ if (is_gimple_debug (g))
+ {
+ has_debug_uses = true;
+ continue;
+ }
+ else if (is_gimple_assign (g))
+ switch (gimple_assign_rhs_code (g))
+ {
+ case COND_EXPR:
+ op1 = gimple_assign_rhs1 (g);
+ code = TREE_CODE (op1);
+ op0 = TREE_OPERAND (op1, 0);
+ op1 = TREE_OPERAND (op1, 1);
+ break;
+ case EQ_EXPR:
+ case NE_EXPR:
+ code = gimple_assign_rhs_code (g);
+ op0 = gimple_assign_rhs1 (g);
+ op1 = gimple_assign_rhs2 (g);
+ break;
+ default:
+ break;
+ }
+ else if (gimple_code (g) == GIMPLE_COND)
+ {
+ code = gimple_cond_code (g);
+ op0 = gimple_cond_lhs (g);
+ op1 = gimple_cond_rhs (g);
+ }
+
+ if ((code == EQ_EXPR || code == NE_EXPR)
+ && op0 == use_lhs
+ && integer_zerop (op1))
+ {
+ use_operand_p use_p;
+ int n = 0;
+ FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+ n++;
+ if (n == 1)
+ continue;
+ }
+
+ use_bool = false;
+ BREAK_FROM_IMM_USE_STMT (iter);
+ }
+
+ tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
+ tree flag = build_int_cst (TREE_TYPE (lhs), use_bool);
+ if (has_model_arg)
+ g = gimple_build_call_internal (fn, 4, gimple_call_arg (call, 0),
+ bit, flag, gimple_call_arg (call, 2));
+ else
+ g = gimple_build_call_internal (fn, 3, gimple_call_arg (call, 0),
+ bit, flag);
+ gimple_call_set_lhs (g, new_lhs);
+ gimple_set_location (g, gimple_location (call));
+ gimple_set_vuse (g, gimple_vuse (call));
+ gimple_set_vdef (g, gimple_vdef (call));
+ SSA_NAME_DEF_STMT (gimple_vdef (call)) = g;
+ gimple_stmt_iterator gsi = *gsip;
+ gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+ if (after)
+ {
+ /* The internal function returns the value of the specified bit
+ before the atomic operation. If we are interested in the value
+ of the specified bit after the atomic operation (makes only sense
+ for xor, otherwise the bit content is compile time known),
+ we need to invert the bit. */
+ g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
+ BIT_XOR_EXPR, new_lhs,
+ use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
+ : mask);
+ new_lhs = gimple_assign_lhs (g);
+ gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+ }
+ if (use_bool && has_debug_uses)
+ {
+ tree temp = make_node (DEBUG_EXPR_DECL);
+ DECL_ARTIFICIAL (temp) = 1;
+ TREE_TYPE (temp) = TREE_TYPE (lhs);
+ DECL_MODE (temp) = TYPE_MODE (TREE_TYPE (lhs));
+ tree t = build2 (LSHIFT_EXPR, TREE_TYPE (lhs), new_lhs, bit);
+ g = gimple_build_debug_bind (temp, t, g);
+ gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+ FOR_EACH_IMM_USE_STMT (g, iter, use_lhs)
+ if (is_gimple_debug (g))
+ {
+ use_operand_p use_p;
+ FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
+ SET_USE (use_p, temp);
+ update_stmt (g);
+ }
+ }
+ SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_lhs)
+ = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs);
+ replace_uses_by (use_lhs, new_lhs);
+ gsi = gsi_for_stmt (use_stmt);
+ gsi_remove (&gsi, true);
+ release_defs (use_stmt);
+ gsi_remove (gsip, true);
+ release_ssa_name (lhs);
+}
+
/* A simple pass that attempts to fold all builtin functions. This pass
is run after we've propagated as many constants as we can. */
@@ -2806,6 +3026,78 @@ pass_fold_builtins::execute (function *fun)
cfg_changed = true;
break;
+ case BUILT_IN_ATOMIC_FETCH_OR_1:
+ case BUILT_IN_ATOMIC_FETCH_OR_2:
+ case BUILT_IN_ATOMIC_FETCH_OR_4:
+ case BUILT_IN_ATOMIC_FETCH_OR_8:
+ case BUILT_IN_ATOMIC_FETCH_OR_16:
+ optimize_atomic_bit_test_and (&i,
+ IFN_ATOMIC_BIT_TEST_AND_SET,
+ true, false);
+ break;
+ case BUILT_IN_SYNC_FETCH_AND_OR_1:
+ case BUILT_IN_SYNC_FETCH_AND_OR_2:
+ case BUILT_IN_SYNC_FETCH_AND_OR_4:
+ case BUILT_IN_SYNC_FETCH_AND_OR_8:
+ case BUILT_IN_SYNC_FETCH_AND_OR_16:
+ optimize_atomic_bit_test_and (&i,
+ IFN_ATOMIC_BIT_TEST_AND_SET,
+ false, false);
+ break;
+
+ case BUILT_IN_ATOMIC_FETCH_XOR_1:
+ case BUILT_IN_ATOMIC_FETCH_XOR_2:
+ case BUILT_IN_ATOMIC_FETCH_XOR_4:
+ case BUILT_IN_ATOMIC_FETCH_XOR_8:
+ case BUILT_IN_ATOMIC_FETCH_XOR_16:
+ optimize_atomic_bit_test_and
+ (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, true, false);
+ break;
+ case BUILT_IN_SYNC_FETCH_AND_XOR_1:
+ case BUILT_IN_SYNC_FETCH_AND_XOR_2:
+ case BUILT_IN_SYNC_FETCH_AND_XOR_4:
+ case BUILT_IN_SYNC_FETCH_AND_XOR_8:
+ case BUILT_IN_SYNC_FETCH_AND_XOR_16:
+ optimize_atomic_bit_test_and
+ (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, false, false);
+ break;
+
+ case BUILT_IN_ATOMIC_XOR_FETCH_1:
+ case BUILT_IN_ATOMIC_XOR_FETCH_2:
+ case BUILT_IN_ATOMIC_XOR_FETCH_4:
+ case BUILT_IN_ATOMIC_XOR_FETCH_8:
+ case BUILT_IN_ATOMIC_XOR_FETCH_16:
+ optimize_atomic_bit_test_and
+ (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, true, true);
+ break;
+ case BUILT_IN_SYNC_XOR_AND_FETCH_1:
+ case BUILT_IN_SYNC_XOR_AND_FETCH_2:
+ case BUILT_IN_SYNC_XOR_AND_FETCH_4:
+ case BUILT_IN_SYNC_XOR_AND_FETCH_8:
+ case BUILT_IN_SYNC_XOR_AND_FETCH_16:
+ optimize_atomic_bit_test_and
+ (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, false, true);
+ break;
+
+ case BUILT_IN_ATOMIC_FETCH_AND_1:
+ case BUILT_IN_ATOMIC_FETCH_AND_2:
+ case BUILT_IN_ATOMIC_FETCH_AND_4:
+ case BUILT_IN_ATOMIC_FETCH_AND_8:
+ case BUILT_IN_ATOMIC_FETCH_AND_16:
+ optimize_atomic_bit_test_and (&i,
+ IFN_ATOMIC_BIT_TEST_AND_RESET,
+ true, false);
+ break;
+ case BUILT_IN_SYNC_FETCH_AND_AND_1:
+ case BUILT_IN_SYNC_FETCH_AND_AND_2:
+ case BUILT_IN_SYNC_FETCH_AND_AND_4:
+ case BUILT_IN_SYNC_FETCH_AND_AND_8:
+ case BUILT_IN_SYNC_FETCH_AND_AND_16:
+ optimize_atomic_bit_test_and (&i,
+ IFN_ATOMIC_BIT_TEST_AND_RESET,
+ false, false);
+ break;
+
case BUILT_IN_VA_START:
case BUILT_IN_VA_END:
case BUILT_IN_VA_COPY: