aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2022-01-03 14:02:23 +0100
committerJakub Jelinek <jakub@redhat.com>2022-01-03 14:17:26 +0100
commit6362627b27f395b054f359244fcfcb15ac0ac2ab (patch)
tree9fdac8e071aec28629541896854d11c29c2f8540 /gcc/config/i386
parent4911609fbe47d3e4d2765cd67031a7e0ee9f5af0 (diff)
downloadgcc-6362627b27f395b054f359244fcfcb15ac0ac2ab.zip
gcc-6362627b27f395b054f359244fcfcb15ac0ac2ab.tar.gz
gcc-6362627b27f395b054f359244fcfcb15ac0ac2ab.tar.bz2
i386, fab: Optimize __atomic_{add,sub,and,or,xor}_fetch (x, y, z) {==,!=,<,<=,>,>=} 0 [PR98737]
On Wed, Jan 27, 2021 at 12:27:13PM +0100, Ulrich Drepper via Gcc-patches wrote: > On 1/27/21 11:37 AM, Jakub Jelinek wrote: > > Would equality comparison against 0 handle the most common cases. > > > > The user can write it as > > __atomic_sub_fetch (x, y, z) == 0 > > or > > __atomic_fetch_sub (x, y, z) - y == 0 > > thouch, so the expansion code would need to be able to cope with both. > > Please also keep !=0, <0, <=0, >0, and >=0 in mind. They all can be > useful and can be handled with the flags. <= 0 and > 0 don't really work well with lock {add,sub,inc,dec}, x86 doesn't have comparisons that would look solely at both SF and ZF and not at other flags (and emitting two separate conditional jumps or two setcc insns and oring them together looks awful). But the rest can work. Here is a patch that adds internal functions and optabs for these, recognizes them at the same spot as e.g. .ATOMIC_BIT_TEST_AND* internal functions (fold all builtins pass) and expands them appropriately (or for the <= 0 and > 0 cases of +/- FAILs and let's middle-end fall back). So far I have handled just the op_fetch builtins, IMHO instead of handling also __atomic_fetch_sub (x, y, z) - y == 0 etc. we should canonicalize __atomic_fetch_sub (x, y, z) - y to __atomic_sub_fetch (x, y, z) (and vice versa). 2022-01-03 Jakub Jelinek <jakub@redhat.com> PR target/98737 * internal-fn.def (ATOMIC_ADD_FETCH_CMP_0, ATOMIC_SUB_FETCH_CMP_0, ATOMIC_AND_FETCH_CMP_0, ATOMIC_OR_FETCH_CMP_0, ATOMIC_XOR_FETCH_CMP_0): New internal fns. * internal-fn.h (ATOMIC_OP_FETCH_CMP_0_EQ, ATOMIC_OP_FETCH_CMP_0_NE, ATOMIC_OP_FETCH_CMP_0_LT, ATOMIC_OP_FETCH_CMP_0_LE, ATOMIC_OP_FETCH_CMP_0_GT, ATOMIC_OP_FETCH_CMP_0_GE): New enumerators. * internal-fn.c (expand_ATOMIC_ADD_FETCH_CMP_0, expand_ATOMIC_SUB_FETCH_CMP_0, expand_ATOMIC_AND_FETCH_CMP_0, expand_ATOMIC_OR_FETCH_CMP_0, expand_ATOMIC_XOR_FETCH_CMP_0): New functions. * optabs.def (atomic_add_fetch_cmp_0_optab, atomic_sub_fetch_cmp_0_optab, atomic_and_fetch_cmp_0_optab, atomic_or_fetch_cmp_0_optab, atomic_xor_fetch_cmp_0_optab): New direct optabs. * builtins.h (expand_ifn_atomic_op_fetch_cmp_0): Declare. * builtins.c (expand_ifn_atomic_op_fetch_cmp_0): New function. * tree-ssa-ccp.c: Include internal-fn.h. (optimize_atomic_bit_test_and): Add . before internal fn call in function comment. Change return type from void to bool and return true only if successfully replaced. (optimize_atomic_op_fetch_cmp_0): New function. (pass_fold_builtins::execute): Use optimize_atomic_op_fetch_cmp_0 for BUILT_IN_ATOMIC_{ADD,SUB,AND,OR,XOR}_FETCH_{1,2,4,8,16} and BUILT_IN_SYNC_{ADD,SUB,AND,OR,XOR}_AND_FETCH_{1,2,4,8,16}, for *XOR* ones only if optimize_atomic_bit_test_and failed. * config/i386/sync.md (atomic_<plusminus_mnemonic>_fetch_cmp_0<mode>, atomic_<logic>_fetch_cmp_0<mode>): New define_expand patterns. (atomic_add_fetch_cmp_0<mode>_1, atomic_sub_fetch_cmp_0<mode>_1, atomic_<logic>_fetch_cmp_0<mode>_1): New define_insn patterns. * doc/md.texi (atomic_add_fetch_cmp_0<mode>, atomic_sub_fetch_cmp_0<mode>, atomic_and_fetch_cmp_0<mode>, atomic_or_fetch_cmp_0<mode>, atomic_xor_fetch_cmp_0<mode>): Document new named patterns. * gcc.target/i386/pr98737-1.c: New test. * gcc.target/i386/pr98737-2.c: New test. * gcc.target/i386/pr98737-3.c: New test. * gcc.target/i386/pr98737-4.c: New test. * gcc.target/i386/pr98737-5.c: New test. * gcc.target/i386/pr98737-6.c: New test. * gcc.target/i386/pr98737-7.c: New test.
Diffstat (limited to 'gcc/config/i386')
-rw-r--r--gcc/config/i386/sync.md104
1 files changed, 104 insertions, 0 deletions
diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index f87fa5e..36417c5 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -938,3 +938,107 @@
(const_int 0))]
""
"lock{%;} %K2btr{<imodesuffix>}\t{%1, %0|%0, %1}")
+
+(define_expand "atomic_<plusminus_mnemonic>_fetch_cmp_0<mode>"
+ [(match_operand:QI 0 "register_operand")
+ (plusminus:SWI (match_operand:SWI 1 "memory_operand")
+ (match_operand:SWI 2 "nonmemory_operand"))
+ (match_operand:SI 3 "const_int_operand") ;; model
+ (match_operand:SI 4 "const_int_operand")]
+ ""
+{
+ if (INTVAL (operands[4]) == GT || INTVAL (operands[4]) == LE)
+ FAIL;
+ emit_insn (gen_atomic_<plusminus_mnemonic>_fetch_cmp_0<mode>_1 (operands[1],
+ operands[2],
+ operands[3]));
+ ix86_expand_setcc (operands[0], (enum rtx_code) INTVAL (operands[4]),
+ gen_rtx_REG (CCGOCmode, FLAGS_REG), const0_rtx);
+ DONE;
+})
+
+(define_insn "atomic_add_fetch_cmp_0<mode>_1"
+ [(set (reg:CCGOC FLAGS_REG)
+ (compare:CCGOC
+ (plus:SWI
+ (unspec_volatile:SWI
+ [(match_operand:SWI 0 "memory_operand" "+m")
+ (match_operand:SI 2 "const_int_operand")] ;; model
+ UNSPECV_XCHG)
+ (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+ (const_int 0)))
+ (set (match_dup 0)
+ (plus:SWI (match_dup 0) (match_dup 1)))]
+ ""
+{
+ if (incdec_operand (operands[1], <MODE>mode))
+ {
+ if (operands[1] == const1_rtx)
+ return "lock{%;} %K2inc{<imodesuffix>}\t%0";
+ else
+ return "lock{%;} %K2dec{<imodesuffix>}\t%0";
+ }
+
+ if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
+ return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
+
+ return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
+})
+
+(define_insn "atomic_sub_fetch_cmp_0<mode>_1"
+ [(set (reg:CCGOC FLAGS_REG)
+ (compare:CCGOC
+ (minus:SWI
+ (unspec_volatile:SWI
+ [(match_operand:SWI 0 "memory_operand" "+m")
+ (match_operand:SI 2 "const_int_operand")] ;; model
+ UNSPECV_XCHG)
+ (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+ (const_int 0)))
+ (set (match_dup 0)
+ (minus:SWI (match_dup 0) (match_dup 1)))]
+ ""
+{
+ if (incdec_operand (operands[1], <MODE>mode))
+ {
+ if (operands[1] != const1_rtx)
+ return "lock{%;} %K2inc{<imodesuffix>}\t%0";
+ else
+ return "lock{%;} %K2dec{<imodesuffix>}\t%0";
+ }
+
+ if (x86_maybe_negate_const_int (&operands[1], <MODE>mode))
+ return "lock{%;} %K2add{<imodesuffix>}\t{%1, %0|%0, %1}";
+
+ return "lock{%;} %K2sub{<imodesuffix>}\t{%1, %0|%0, %1}";
+})
+
+(define_expand "atomic_<logic>_fetch_cmp_0<mode>"
+ [(match_operand:QI 0 "register_operand")
+ (any_logic:SWI (match_operand:SWI 1 "memory_operand")
+ (match_operand:SWI 2 "nonmemory_operand"))
+ (match_operand:SI 3 "const_int_operand") ;; model
+ (match_operand:SI 4 "const_int_operand")]
+ ""
+{
+ emit_insn (gen_atomic_<logic>_fetch_cmp_0<mode>_1 (operands[1], operands[2],
+ operands[3]));
+ ix86_expand_setcc (operands[0], (enum rtx_code) INTVAL (operands[4]),
+ gen_rtx_REG (CCNOmode, FLAGS_REG), const0_rtx);
+ DONE;
+})
+
+(define_insn "atomic_<logic>_fetch_cmp_0<mode>_1"
+ [(set (reg:CCNO FLAGS_REG)
+ (compare:CCNO
+ (any_logic:SWI
+ (unspec_volatile:SWI
+ [(match_operand:SWI 0 "memory_operand" "+m")
+ (match_operand:SI 2 "const_int_operand")] ;; model
+ UNSPECV_XCHG)
+ (match_operand:SWI 1 "nonmemory_operand" "<r><i>"))
+ (const_int 0)))
+ (set (match_dup 0)
+ (any_logic:SWI (match_dup 0) (match_dup 1)))]
+ ""
+ "lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}")