aboutsummaryrefslogtreecommitdiff
path: root/gcc/builtins.c
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2022-01-03 14:02:23 +0100
committerJakub Jelinek <jakub@redhat.com>2022-01-03 14:17:26 +0100
commit6362627b27f395b054f359244fcfcb15ac0ac2ab (patch)
tree9fdac8e071aec28629541896854d11c29c2f8540 /gcc/builtins.c
parent4911609fbe47d3e4d2765cd67031a7e0ee9f5af0 (diff)
downloadgcc-6362627b27f395b054f359244fcfcb15ac0ac2ab.zip
gcc-6362627b27f395b054f359244fcfcb15ac0ac2ab.tar.gz
gcc-6362627b27f395b054f359244fcfcb15ac0ac2ab.tar.bz2
i386, fab: Optimize __atomic_{add,sub,and,or,xor}_fetch (x, y, z) {==,!=,<,<=,>,>=} 0 [PR98737]
On Wed, Jan 27, 2021 at 12:27:13PM +0100, Ulrich Drepper via Gcc-patches wrote: > On 1/27/21 11:37 AM, Jakub Jelinek wrote: > > Would equality comparison against 0 handle the most common cases. > > > > The user can write it as > > __atomic_sub_fetch (x, y, z) == 0 > > or > > __atomic_fetch_sub (x, y, z) - y == 0 > > thouch, so the expansion code would need to be able to cope with both. > > Please also keep !=0, <0, <=0, >0, and >=0 in mind. They all can be > useful and can be handled with the flags. <= 0 and > 0 don't really work well with lock {add,sub,inc,dec}, x86 doesn't have comparisons that would look solely at both SF and ZF and not at other flags (and emitting two separate conditional jumps or two setcc insns and oring them together looks awful). But the rest can work. Here is a patch that adds internal functions and optabs for these, recognizes them at the same spot as e.g. .ATOMIC_BIT_TEST_AND* internal functions (fold all builtins pass) and expands them appropriately (or for the <= 0 and > 0 cases of +/- FAILs and let's middle-end fall back). So far I have handled just the op_fetch builtins, IMHO instead of handling also __atomic_fetch_sub (x, y, z) - y == 0 etc. we should canonicalize __atomic_fetch_sub (x, y, z) - y to __atomic_sub_fetch (x, y, z) (and vice versa). 2022-01-03 Jakub Jelinek <jakub@redhat.com> PR target/98737 * internal-fn.def (ATOMIC_ADD_FETCH_CMP_0, ATOMIC_SUB_FETCH_CMP_0, ATOMIC_AND_FETCH_CMP_0, ATOMIC_OR_FETCH_CMP_0, ATOMIC_XOR_FETCH_CMP_0): New internal fns. * internal-fn.h (ATOMIC_OP_FETCH_CMP_0_EQ, ATOMIC_OP_FETCH_CMP_0_NE, ATOMIC_OP_FETCH_CMP_0_LT, ATOMIC_OP_FETCH_CMP_0_LE, ATOMIC_OP_FETCH_CMP_0_GT, ATOMIC_OP_FETCH_CMP_0_GE): New enumerators. * internal-fn.c (expand_ATOMIC_ADD_FETCH_CMP_0, expand_ATOMIC_SUB_FETCH_CMP_0, expand_ATOMIC_AND_FETCH_CMP_0, expand_ATOMIC_OR_FETCH_CMP_0, expand_ATOMIC_XOR_FETCH_CMP_0): New functions. * optabs.def (atomic_add_fetch_cmp_0_optab, atomic_sub_fetch_cmp_0_optab, atomic_and_fetch_cmp_0_optab, atomic_or_fetch_cmp_0_optab, atomic_xor_fetch_cmp_0_optab): New direct optabs. * builtins.h (expand_ifn_atomic_op_fetch_cmp_0): Declare. * builtins.c (expand_ifn_atomic_op_fetch_cmp_0): New function. * tree-ssa-ccp.c: Include internal-fn.h. (optimize_atomic_bit_test_and): Add . before internal fn call in function comment. Change return type from void to bool and return true only if successfully replaced. (optimize_atomic_op_fetch_cmp_0): New function. (pass_fold_builtins::execute): Use optimize_atomic_op_fetch_cmp_0 for BUILT_IN_ATOMIC_{ADD,SUB,AND,OR,XOR}_FETCH_{1,2,4,8,16} and BUILT_IN_SYNC_{ADD,SUB,AND,OR,XOR}_AND_FETCH_{1,2,4,8,16}, for *XOR* ones only if optimize_atomic_bit_test_and failed. * config/i386/sync.md (atomic_<plusminus_mnemonic>_fetch_cmp_0<mode>, atomic_<logic>_fetch_cmp_0<mode>): New define_expand patterns. (atomic_add_fetch_cmp_0<mode>_1, atomic_sub_fetch_cmp_0<mode>_1, atomic_<logic>_fetch_cmp_0<mode>_1): New define_insn patterns. * doc/md.texi (atomic_add_fetch_cmp_0<mode>, atomic_sub_fetch_cmp_0<mode>, atomic_and_fetch_cmp_0<mode>, atomic_or_fetch_cmp_0<mode>, atomic_xor_fetch_cmp_0<mode>): Document new named patterns. * gcc.target/i386/pr98737-1.c: New test. * gcc.target/i386/pr98737-2.c: New test. * gcc.target/i386/pr98737-3.c: New test. * gcc.target/i386/pr98737-4.c: New test. * gcc.target/i386/pr98737-5.c: New test. * gcc.target/i386/pr98737-6.c: New test. * gcc.target/i386/pr98737-7.c: New test.
Diffstat (limited to 'gcc/builtins.c')
-rw-r--r--gcc/builtins.c87
1 files changed, 87 insertions, 0 deletions
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 77c5755..39b08af 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -6276,6 +6276,93 @@ expand_ifn_atomic_bit_test_and (gcall *call)
emit_move_insn (target, result);
}
+/* Expand IFN_ATOMIC_*_FETCH_CMP_0 internal function. */
+
+void
+expand_ifn_atomic_op_fetch_cmp_0 (gcall *call)
+{
+ tree cmp = gimple_call_arg (call, 0);
+ tree ptr = gimple_call_arg (call, 1);
+ tree arg = gimple_call_arg (call, 2);
+ tree lhs = gimple_call_lhs (call);
+ enum memmodel model = MEMMODEL_SYNC_SEQ_CST;
+ machine_mode mode = TYPE_MODE (TREE_TYPE (cmp));
+ optab optab;
+ rtx_code code;
+ class expand_operand ops[5];
+
+ gcc_assert (flag_inline_atomics);
+
+ if (gimple_call_num_args (call) == 4)
+ model = get_memmodel (gimple_call_arg (call, 3));
+
+ rtx mem = get_builtin_sync_mem (ptr, mode);
+ rtx op = expand_expr_force_mode (arg, mode);
+
+ switch (gimple_call_internal_fn (call))
+ {
+ case IFN_ATOMIC_ADD_FETCH_CMP_0:
+ code = PLUS;
+ optab = atomic_add_fetch_cmp_0_optab;
+ break;
+ case IFN_ATOMIC_SUB_FETCH_CMP_0:
+ code = MINUS;
+ optab = atomic_sub_fetch_cmp_0_optab;
+ break;
+ case IFN_ATOMIC_AND_FETCH_CMP_0:
+ code = AND;
+ optab = atomic_and_fetch_cmp_0_optab;
+ break;
+ case IFN_ATOMIC_OR_FETCH_CMP_0:
+ code = IOR;
+ optab = atomic_or_fetch_cmp_0_optab;
+ break;
+ case IFN_ATOMIC_XOR_FETCH_CMP_0:
+ code = XOR;
+ optab = atomic_xor_fetch_cmp_0_optab;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ enum rtx_code comp = UNKNOWN;
+ switch (tree_to_uhwi (cmp))
+ {
+ case ATOMIC_OP_FETCH_CMP_0_EQ: comp = EQ; break;
+ case ATOMIC_OP_FETCH_CMP_0_NE: comp = NE; break;
+ case ATOMIC_OP_FETCH_CMP_0_GT: comp = GT; break;
+ case ATOMIC_OP_FETCH_CMP_0_GE: comp = GE; break;
+ case ATOMIC_OP_FETCH_CMP_0_LT: comp = LT; break;
+ case ATOMIC_OP_FETCH_CMP_0_LE: comp = LE; break;
+ default: gcc_unreachable ();
+ }
+
+ rtx target;
+ if (lhs == NULL_TREE)
+ target = gen_reg_rtx (TYPE_MODE (boolean_type_node));
+ else
+ target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+ enum insn_code icode = direct_optab_handler (optab, mode);
+ gcc_assert (icode != CODE_FOR_nothing);
+ create_output_operand (&ops[0], target, TYPE_MODE (boolean_type_node));
+ create_fixed_operand (&ops[1], mem);
+ create_convert_operand_to (&ops[2], op, mode, true);
+ create_integer_operand (&ops[3], model);
+ create_integer_operand (&ops[4], comp);
+ if (maybe_expand_insn (icode, 5, ops))
+ return;
+
+ rtx result = expand_atomic_fetch_op (gen_reg_rtx (mode), mem, op,
+ code, model, true);
+ if (lhs)
+ {
+ result = emit_store_flag_force (target, comp, result, const0_rtx, mode,
+ 0, 1);
+ if (result != target)
+ emit_move_insn (target, result);
+ }
+}
+
/* Expand an atomic clear operation.
void _atomic_clear (BOOL *obj, enum memmodel)
EXP is the call expression. */