aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2020-07-16 20:11:43 +0200
committerUros Bizjak <ubizjak@gmail.com>2020-07-16 20:13:06 +0200
commitcc1ef413a859433a8313fa9c15aaff41bdc837dc (patch)
tree8c405f73976c652609af5fabfd2755a57a49fb15
parentf569026aa3088aa895ea39618d2998333b08600b (diff)
downloadgcc-cc1ef413a859433a8313fa9c15aaff41bdc837dc.zip
gcc-cc1ef413a859433a8313fa9c15aaff41bdc837dc.tar.gz
gcc-cc1ef413a859433a8313fa9c15aaff41bdc837dc.tar.bz2
i386: Additional peephole2 to use flags from CMPXCHG more [PR96189]
CMPXCHG instruction sets ZF flag if the values in the destination operand and EAX register are equal; otherwise the ZF flag is cleared and value from destination operand is loaded to EAX. Following assembly: xorl %eax, %eax lock cmpxchgl %edx, (%rdi) testl %eax, %eax sete %al can be optimized by removing the unneeded comparison, since set ZF flag signals that no update to EAX happened. This patch adds peephole2 pattern to also handle XOR zeroing and load of -1 by OR. 2020-07-16 Uroš Bizjak <ubizjak@gmail.com> gcc/ChangeLog: PR target/96189 * config/i386/sync.md (peephole2 to remove unneded compare after CMPXCHG): New pattern, also handle XOR zeroing and load of -1 by OR. gcc/testsuite/ChangeLog: PR target/96189 * gcc.target/i386/pr96189-1.c: New test.
-rw-r--r--gcc/config/i386/sync.md34
-rw-r--r--gcc/testsuite/gcc.target/i386/pr96189-1.c24
2 files changed, 58 insertions, 0 deletions
diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index d203e9d..e221090 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -629,6 +629,40 @@
(set (reg:CCZ FLAGS_REG)
(unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])])
+(define_peephole2
+ [(parallel [(set (match_operand:SWI48 0 "register_operand")
+ (match_operand:SWI48 1 "const_int_operand"))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_operand:SWI 2 "register_operand")
+ (unspec_volatile:SWI
+ [(match_operand:SWI 3 "memory_operand")
+ (match_dup 2)
+ (match_operand:SWI 4 "register_operand")
+ (match_operand:SI 5 "const_int_operand")]
+ UNSPECV_CMPXCHG))
+ (set (match_dup 3)
+ (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
+ (set (reg:CCZ FLAGS_REG)
+ (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])
+ (set (reg:CCZ FLAGS_REG)
+ (compare:CCZ (match_dup 2)
+ (match_dup 1)))]
+ "REGNO (operands[0]) == REGNO (operands[2])"
+ [(parallel [(set (match_dup 0)
+ (match_dup 1))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel [(set (match_dup 2)
+ (unspec_volatile:SWI
+ [(match_dup 3)
+ (match_dup 2)
+ (match_dup 4)
+ (match_dup 5)]
+ UNSPECV_CMPXCHG))
+ (set (match_dup 3)
+ (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG))
+ (set (reg:CCZ FLAGS_REG)
+ (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))])])
+
;; For operand 2 nonmemory_operand predicate is used instead of
;; register_operand to allow combiner to better optimize atomic
;; additions of constants.
diff --git a/gcc/testsuite/gcc.target/i386/pr96189-1.c b/gcc/testsuite/gcc.target/i386/pr96189-1.c
new file mode 100644
index 0000000..fd95cb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr96189-1.c
@@ -0,0 +1,24 @@
+/* PR target/96176 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-Os" } */
+/* { dg-final { scan-assembler-not "\tcmpb\t" } } */
+
+_Bool
+foo (short *x, short z)
+{
+ short y = 0;
+ __atomic_compare_exchange_n (x, &y, z, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+ return y == 0;
+}
+
+/* { dg-final { scan-assembler-not "\ttestw\t" } } */
+
+_Bool
+bar (short *x, short z)
+{
+ short y = -1;
+ __atomic_compare_exchange_n (x, &y, z, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+ return y == -1;
+}
+
+/* { dg-final { scan-assembler-not "\tincw\t" } } */