diff options
author | Jakub Jelinek <jakub@redhat.com> | 2011-05-17 09:37:59 +0200 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2011-05-17 09:37:59 +0200 |
commit | 8637e32a7e451f4ee4b09b868546576b56b7a872 (patch) | |
tree | eff22f2c453b9e9e8a47143bbd82009f1d7b9224 /gcc | |
parent | 067e2f069d5b5ad051c46766935a42ba59a20d61 (diff) | |
download | gcc-8637e32a7e451f4ee4b09b868546576b56b7a872.zip gcc-8637e32a7e451f4ee4b09b868546576b56b7a872.tar.gz gcc-8637e32a7e451f4ee4b09b868546576b56b7a872.tar.bz2 |
re PR target/48986 (Missed optimization in atomic decrement on x86/x64)
PR target/48986
* config/i386/sync.md (sync_old_add<mode>): Relax operand 2
predicate to allow CONST_INT.
(*sync_old_add_cmp<mode>): New insn and peephole2 for it.
From-SVN: r173816
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/config/i386/sync.md | 61 |
2 files changed, 66 insertions, 2 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 7f140b8..50b0356 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2011-05-17 Jakub Jelinek <jakub@redhat.com> + + PR target/48986 + * config/i386/sync.md (sync_old_add<mode>): Relax operand 2 + predicate to allow CONST_INT. + (*sync_old_add_cmp<mode>): New insn and peephole2 for it. + 2011-05-16 Joseph Myers <joseph@codesourcery.com> * opts-common.c (opt_enum_arg_to_value): New. diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md index 22f3648..20378d0 100644 --- a/gcc/config/i386/sync.md +++ b/gcc/config/i386/sync.md @@ -1,5 +1,5 @@ ;; GCC machine description for i386 synchronization instructions. -;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 +;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 ;; Free Software Foundation, Inc. ;; ;; This file is part of GCC. @@ -164,17 +164,74 @@ "!TARGET_64BIT && TARGET_CMPXCHG8B && flag_pic" "xchg{l}\t%%ebx, %3\;lock{%;} cmpxchg8b\t%1\;xchg{l}\t%%ebx, %3") +;; For operand 2 nonmemory_operand predicate is used instead of +;; register_operand to allow combiner to better optimize atomic +;; additions of constants. (define_insn "sync_old_add<mode>" [(set (match_operand:SWI 0 "register_operand" "=<r>") (unspec_volatile:SWI [(match_operand:SWI 1 "memory_operand" "+m")] UNSPECV_XCHG)) (set (match_dup 1) (plus:SWI (match_dup 1) - (match_operand:SWI 2 "register_operand" "0"))) + (match_operand:SWI 2 "nonmemory_operand" "0"))) (clobber (reg:CC FLAGS_REG))] "TARGET_XADD" "lock{%;} xadd{<imodesuffix>}\t{%0, %1|%1, %0}") +;; This peephole2 and following insn optimize +;; __sync_fetch_and_add (x, -N) == N into just lock {add,sub,inc,dec} +;; followed by testing of flags instead of lock xadd and comparisons. +(define_peephole2 + [(set (match_operand:SWI 0 "register_operand" "") + (match_operand:SWI 2 "const_int_operand" "")) + (parallel [(set (match_dup 0) + (unspec_volatile:SWI + [(match_operand:SWI 1 "memory_operand" "")] UNSPECV_XCHG)) + (set (match_dup 1) + (plus:SWI (match_dup 1) + (match_dup 0))) + (clobber (reg:CC FLAGS_REG))]) + (set (reg:CCZ FLAGS_REG) + (compare:CCZ (match_dup 0) + (match_operand:SWI 3 "const_int_operand" "")))] + "peep2_reg_dead_p (3, operands[0]) + && (unsigned HOST_WIDE_INT) INTVAL (operands[2]) + == -(unsigned HOST_WIDE_INT) INTVAL (operands[3]) + && !reg_overlap_mentioned_p (operands[0], operands[1])" + [(parallel [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (unspec_volatile:SWI [(match_dup 1)] + UNSPECV_XCHG) + (match_dup 3))) + (set (match_dup 1) + (plus:SWI (match_dup 1) + (match_dup 2)))])]) + +(define_insn "*sync_old_add_cmp<mode>" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (unspec_volatile:SWI + [(match_operand:SWI 0 "memory_operand" "+m")] + UNSPECV_XCHG) + (match_operand:SWI 2 "const_int_operand" "i"))) + (set (match_dup 0) + (plus:SWI (match_dup 0) + (match_operand:SWI 1 "const_int_operand" "i")))] + "(unsigned HOST_WIDE_INT) INTVAL (operands[1]) + == -(unsigned HOST_WIDE_INT) INTVAL (operands[2])" +{ + if (TARGET_USE_INCDEC) + { + if (operands[1] == const1_rtx) + return "lock{%;} inc{<imodesuffix>}\t%0"; + if (operands[1] == constm1_rtx) + return "lock{%;} dec{<imodesuffix>}\t%0"; + } + + if (x86_maybe_negate_const_int (&operands[1], <MODE>mode)) + return "lock{%;} sub{<imodesuffix>}\t{%1, %0|%0, %1}"; + + return "lock{%;} add{<imodesuffix>}\t{%1, %0|%0, %1}"; +}) + ;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space. (define_insn "sync_lock_test_and_set<mode>" [(set (match_operand:SWI 0 "register_operand" "=<r>") |