diff options
author | Jakub Jelinek <jakub@redhat.com> | 2016-05-03 13:37:25 +0200 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2016-05-03 13:37:25 +0200 |
commit | adedd5c173388ae505470df152b9cb3947339566 (patch) | |
tree | 858576e93abb78b087773e895b143546903f49f2 /gcc/tree-ssa-ccp.c | |
parent | 50891606a95368edd688fa9dc73003b1dfd68983 (diff) | |
download | gcc-adedd5c173388ae505470df152b9cb3947339566.zip gcc-adedd5c173388ae505470df152b9cb3947339566.tar.gz gcc-adedd5c173388ae505470df152b9cb3947339566.tar.bz2 |
re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc')
PR target/49244
* tree-ssa-ccp.c: Include stor-layout.h and optabs-query.h.
(optimize_atomic_bit_test_and): New function.
(pass_fold_builtins::execute): Use it.
* optabs.def (atomic_bit_test_and_set_optab,
atomic_bit_test_and_complement_optab,
atomic_bit_test_and_reset_optab): New optabs.
* internal-fn.def (ATOMIC_BIT_TEST_AND_SET,
ATOMIC_BIT_TEST_AND_COMPLEMENT, ATOMIC_BIT_TEST_AND_RESET): New ifns.
* builtins.h (expand_ifn_atomic_bit_test_and): New prototype.
* builtins.c (expand_ifn_atomic_bit_test_and): New function.
* internal-fn.c (expand_ATOMIC_BIT_TEST_AND_SET,
expand_ATOMIC_BIT_TEST_AND_COMPLEMENT,
expand_ATOMIC_BIT_TEST_AND_RESET): New functions.
* doc/md.texi (atomic_bit_test_and_set@var{mode},
atomic_bit_test_and_complement@var{mode},
atomic_bit_test_and_reset@var{mode}): Document.
* config/i386/sync.md (atomic_bit_test_and_set<mode>,
atomic_bit_test_and_complement<mode>,
atomic_bit_test_and_reset<mode>): New expanders.
(atomic_bit_test_and_set<mode>_1,
atomic_bit_test_and_complement<mode>_1,
atomic_bit_test_and_reset<mode>_1): New insns.
* gcc.target/i386/pr49244-1.c: New test.
* gcc.target/i386/pr49244-2.c: New test.
From-SVN: r235813
Diffstat (limited to 'gcc/tree-ssa-ccp.c')
-rw-r--r-- | gcc/tree-ssa-ccp.c | 292 |
1 files changed, 292 insertions, 0 deletions
diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c index baae03f..c4e27f1 100644 --- a/gcc/tree-ssa-ccp.c +++ b/gcc/tree-ssa-ccp.c @@ -140,6 +140,8 @@ along with GCC; see the file COPYING3. If not see #include "builtins.h" #include "tree-chkp.h" #include "cfgloop.h" +#include "stor-layout.h" +#include "optabs-query.h" /* Possible lattice values. */ @@ -2697,6 +2699,224 @@ optimize_unreachable (gimple_stmt_iterator i) return ret; } +/* Optimize + mask_2 = 1 << cnt_1; + _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3); + _5 = _4 & mask_2; + to + _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3); + _5 = _4; + If _5 is only used in _5 != 0 or _5 == 0 comparisons, 1 + is passed instead of 0, and the builtin just returns a zero + or 1 value instead of the actual bit. + Similarly for __sync_fetch_and_or_* (without the ", _3" part + in there), and/or if mask_2 is a power of 2 constant. + Similarly for xor instead of or, use ATOMIC_BIT_TEST_AND_COMPLEMENT + in that case. And similarly for and instead of or, except that + the second argument to the builtin needs to be one's complement + of the mask instead of mask. */ + +static void +optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip, + enum internal_fn fn, bool has_model_arg, + bool after) +{ + gimple *call = gsi_stmt (*gsip); + tree lhs = gimple_call_lhs (call); + use_operand_p use_p; + gimple *use_stmt; + tree mask, bit; + optab optab; + + if (!flag_inline_atomics + || optimize_debug + || !gimple_call_builtin_p (call, BUILT_IN_NORMAL) + || !lhs + || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs) + || !single_imm_use (lhs, &use_p, &use_stmt) + || !is_gimple_assign (use_stmt) + || gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR + || !gimple_vdef (call)) + return; + + switch (fn) + { + case IFN_ATOMIC_BIT_TEST_AND_SET: + optab = atomic_bit_test_and_set_optab; + break; + case IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT: + optab = atomic_bit_test_and_complement_optab; + break; + case IFN_ATOMIC_BIT_TEST_AND_RESET: + optab = atomic_bit_test_and_reset_optab; + break; + default: + return; + } + + if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing) + return; + + mask = gimple_call_arg (call, 1); + tree use_lhs = gimple_assign_lhs (use_stmt); + if (!use_lhs) + return; + + if (TREE_CODE (mask) == INTEGER_CST) + { + if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET) + mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask); + mask = fold_convert (TREE_TYPE (lhs), mask); + int ibit = tree_log2 (mask); + if (ibit < 0) + return; + bit = build_int_cst (TREE_TYPE (lhs), ibit); + } + else if (TREE_CODE (mask) == SSA_NAME) + { + gimple *g = SSA_NAME_DEF_STMT (mask); + if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET) + { + if (!is_gimple_assign (g) + || gimple_assign_rhs_code (g) != BIT_NOT_EXPR) + return; + mask = gimple_assign_rhs1 (g); + if (TREE_CODE (mask) != SSA_NAME) + return; + g = SSA_NAME_DEF_STMT (mask); + } + if (!is_gimple_assign (g) + || gimple_assign_rhs_code (g) != LSHIFT_EXPR + || !integer_onep (gimple_assign_rhs1 (g))) + return; + bit = gimple_assign_rhs2 (g); + } + else + return; + + if (gimple_assign_rhs1 (use_stmt) == lhs) + { + if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0)) + return; + } + else if (gimple_assign_rhs2 (use_stmt) != lhs + || !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0)) + return; + + bool use_bool = true; + bool has_debug_uses = false; + imm_use_iterator iter; + gimple *g; + + if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs)) + use_bool = false; + FOR_EACH_IMM_USE_STMT (g, iter, use_lhs) + { + enum tree_code code = ERROR_MARK; + tree op0, op1; + if (is_gimple_debug (g)) + { + has_debug_uses = true; + continue; + } + else if (is_gimple_assign (g)) + switch (gimple_assign_rhs_code (g)) + { + case COND_EXPR: + op1 = gimple_assign_rhs1 (g); + code = TREE_CODE (op1); + op0 = TREE_OPERAND (op1, 0); + op1 = TREE_OPERAND (op1, 1); + break; + case EQ_EXPR: + case NE_EXPR: + code = gimple_assign_rhs_code (g); + op0 = gimple_assign_rhs1 (g); + op1 = gimple_assign_rhs2 (g); + break; + default: + break; + } + else if (gimple_code (g) == GIMPLE_COND) + { + code = gimple_cond_code (g); + op0 = gimple_cond_lhs (g); + op1 = gimple_cond_rhs (g); + } + + if ((code == EQ_EXPR || code == NE_EXPR) + && op0 == use_lhs + && integer_zerop (op1)) + { + use_operand_p use_p; + int n = 0; + FOR_EACH_IMM_USE_ON_STMT (use_p, iter) + n++; + if (n == 1) + continue; + } + + use_bool = false; + BREAK_FROM_IMM_USE_STMT (iter); + } + + tree new_lhs = make_ssa_name (TREE_TYPE (lhs)); + tree flag = build_int_cst (TREE_TYPE (lhs), use_bool); + if (has_model_arg) + g = gimple_build_call_internal (fn, 4, gimple_call_arg (call, 0), + bit, flag, gimple_call_arg (call, 2)); + else + g = gimple_build_call_internal (fn, 3, gimple_call_arg (call, 0), + bit, flag); + gimple_call_set_lhs (g, new_lhs); + gimple_set_location (g, gimple_location (call)); + gimple_set_vuse (g, gimple_vuse (call)); + gimple_set_vdef (g, gimple_vdef (call)); + SSA_NAME_DEF_STMT (gimple_vdef (call)) = g; + gimple_stmt_iterator gsi = *gsip; + gsi_insert_after (&gsi, g, GSI_NEW_STMT); + if (after) + { + /* The internal function returns the value of the specified bit + before the atomic operation. If we are interested in the value + of the specified bit after the atomic operation (makes only sense + for xor, otherwise the bit content is compile time known), + we need to invert the bit. */ + g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)), + BIT_XOR_EXPR, new_lhs, + use_bool ? build_int_cst (TREE_TYPE (lhs), 1) + : mask); + new_lhs = gimple_assign_lhs (g); + gsi_insert_after (&gsi, g, GSI_NEW_STMT); + } + if (use_bool && has_debug_uses) + { + tree temp = make_node (DEBUG_EXPR_DECL); + DECL_ARTIFICIAL (temp) = 1; + TREE_TYPE (temp) = TREE_TYPE (lhs); + DECL_MODE (temp) = TYPE_MODE (TREE_TYPE (lhs)); + tree t = build2 (LSHIFT_EXPR, TREE_TYPE (lhs), new_lhs, bit); + g = gimple_build_debug_bind (temp, t, g); + gsi_insert_after (&gsi, g, GSI_NEW_STMT); + FOR_EACH_IMM_USE_STMT (g, iter, use_lhs) + if (is_gimple_debug (g)) + { + use_operand_p use_p; + FOR_EACH_IMM_USE_ON_STMT (use_p, iter) + SET_USE (use_p, temp); + update_stmt (g); + } + } + SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_lhs) + = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs); + replace_uses_by (use_lhs, new_lhs); + gsi = gsi_for_stmt (use_stmt); + gsi_remove (&gsi, true); + release_defs (use_stmt); + gsi_remove (gsip, true); + release_ssa_name (lhs); +} + /* A simple pass that attempts to fold all builtin functions. This pass is run after we've propagated as many constants as we can. */ @@ -2806,6 +3026,78 @@ pass_fold_builtins::execute (function *fun) cfg_changed = true; break; + case BUILT_IN_ATOMIC_FETCH_OR_1: + case BUILT_IN_ATOMIC_FETCH_OR_2: + case BUILT_IN_ATOMIC_FETCH_OR_4: + case BUILT_IN_ATOMIC_FETCH_OR_8: + case BUILT_IN_ATOMIC_FETCH_OR_16: + optimize_atomic_bit_test_and (&i, + IFN_ATOMIC_BIT_TEST_AND_SET, + true, false); + break; + case BUILT_IN_SYNC_FETCH_AND_OR_1: + case BUILT_IN_SYNC_FETCH_AND_OR_2: + case BUILT_IN_SYNC_FETCH_AND_OR_4: + case BUILT_IN_SYNC_FETCH_AND_OR_8: + case BUILT_IN_SYNC_FETCH_AND_OR_16: + optimize_atomic_bit_test_and (&i, + IFN_ATOMIC_BIT_TEST_AND_SET, + false, false); + break; + + case BUILT_IN_ATOMIC_FETCH_XOR_1: + case BUILT_IN_ATOMIC_FETCH_XOR_2: + case BUILT_IN_ATOMIC_FETCH_XOR_4: + case BUILT_IN_ATOMIC_FETCH_XOR_8: + case BUILT_IN_ATOMIC_FETCH_XOR_16: + optimize_atomic_bit_test_and + (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, true, false); + break; + case BUILT_IN_SYNC_FETCH_AND_XOR_1: + case BUILT_IN_SYNC_FETCH_AND_XOR_2: + case BUILT_IN_SYNC_FETCH_AND_XOR_4: + case BUILT_IN_SYNC_FETCH_AND_XOR_8: + case BUILT_IN_SYNC_FETCH_AND_XOR_16: + optimize_atomic_bit_test_and + (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, false, false); + break; + + case BUILT_IN_ATOMIC_XOR_FETCH_1: + case BUILT_IN_ATOMIC_XOR_FETCH_2: + case BUILT_IN_ATOMIC_XOR_FETCH_4: + case BUILT_IN_ATOMIC_XOR_FETCH_8: + case BUILT_IN_ATOMIC_XOR_FETCH_16: + optimize_atomic_bit_test_and + (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, true, true); + break; + case BUILT_IN_SYNC_XOR_AND_FETCH_1: + case BUILT_IN_SYNC_XOR_AND_FETCH_2: + case BUILT_IN_SYNC_XOR_AND_FETCH_4: + case BUILT_IN_SYNC_XOR_AND_FETCH_8: + case BUILT_IN_SYNC_XOR_AND_FETCH_16: + optimize_atomic_bit_test_and + (&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, false, true); + break; + + case BUILT_IN_ATOMIC_FETCH_AND_1: + case BUILT_IN_ATOMIC_FETCH_AND_2: + case BUILT_IN_ATOMIC_FETCH_AND_4: + case BUILT_IN_ATOMIC_FETCH_AND_8: + case BUILT_IN_ATOMIC_FETCH_AND_16: + optimize_atomic_bit_test_and (&i, + IFN_ATOMIC_BIT_TEST_AND_RESET, + true, false); + break; + case BUILT_IN_SYNC_FETCH_AND_AND_1: + case BUILT_IN_SYNC_FETCH_AND_AND_2: + case BUILT_IN_SYNC_FETCH_AND_AND_4: + case BUILT_IN_SYNC_FETCH_AND_AND_8: + case BUILT_IN_SYNC_FETCH_AND_AND_16: + optimize_atomic_bit_test_and (&i, + IFN_ATOMIC_BIT_TEST_AND_RESET, + false, false); + break; + case BUILT_IN_VA_START: case BUILT_IN_VA_END: case BUILT_IN_VA_COPY: |