aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2021-04-23 17:29:29 +0200
committerUros Bizjak <ubizjak@gmail.com>2021-04-28 15:50:15 +0200
commit0d277114b4b2d0cb386c7abe409a81ca29d9d61d (patch)
treeb1dbecd8de573f8a991bdded23dd753964923ea2
parentc909bb78449c5d9692b305dd6b8da4b7cf4defb2 (diff)
downloadgcc-0d277114b4b2d0cb386c7abe409a81ca29d9d61d.zip
gcc-0d277114b4b2d0cb386c7abe409a81ca29d9d61d.tar.gz
gcc-0d277114b4b2d0cb386c7abe409a81ca29d9d61d.tar.bz2
i386: Fix atomic FP peepholes [PR100182]
64bit loads to/stores from x87 and SSE registers are atomic also on 32-bit targets, so there is no need for additional atomic moves to a temporary register. Introduced load peephole2 patterns assume that there won't be any additional loads from the load location outside the peepholed sequence and wrongly removed the source location initialization. OTOH, introduced store peephole2 patterns assume there won't be any additional loads from the stored location outside the peepholed sequence and wrongly removed the destination location initialization. Note that we can't use plain x87 FST instruction to initialize destination location because FST converts the value to the double-precision format, changing bits during move. The patch restores removed initializations in load and store patterns. Additionally, plain x87 FST in store peephole2 patterns is prevented by limiting the store operand source to SSE registers. 2021-04-23 Uroš Bizjak <ubizjak@gmail.com> gcc/ PR target/100182 * config/i386/sync.md (FILD_ATOMIC/FIST_ATOMIC FP load peephole2): Copy operand 3 to operand 4. Use sse_reg_operand as operand 3 predicate. (FILD_ATOMIC/FIST_ATOMIC FP load peephole2 with mem blockage): Ditto. (LDX_ATOMIC/STX_ATOMIC FP load peephole2): Ditto. (LDX_ATOMIC/LDX_ATOMIC FP load peephole2 with mem blockage): Ditto. (FILD_ATOMIC/FIST_ATOMIC FP store peephole2): Copy operand 1 to operand 0. (FILD_ATOMIC/FIST_ATOMIC FP store peephole2 with mem blockage): Ditto. (LDX_ATOMIC/STX_ATOMIC FP store peephole2): Ditto. (LDX_ATOMIC/LDX_ATOMIC FP store peephole2 with mem blockage): Ditto. gcc/testsuite/ PR target/100182 * gcc.target/i386/pr100182.c: New test. * gcc.target/i386/pr71245-1.c (dg-final): Xfail scan-assembler-not. * gcc.target/i386/pr71245-2.c (dg-final): Ditto. (cherry picked from commit d2324a5ab3ff097864ae6828cb1db4dd013c70d1)
-rw-r--r--gcc/config/i386/sync.md24
-rw-r--r--gcc/testsuite/gcc.target/i386/pr100182.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/pr71245-1.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr71245-2.c2
4 files changed, 48 insertions, 10 deletions
diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index 618397c..3e9a5f0 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -219,12 +219,13 @@
(set (match_operand:DI 2 "memory_operand")
(unspec:DI [(match_dup 0)]
UNSPEC_FIST_ATOMIC))
- (set (match_operand:DF 3 "any_fp_register_operand")
+ (set (match_operand:DF 3 "sse_reg_operand")
(match_operand:DF 4 "memory_operand"))]
"!TARGET_64BIT
&& peep2_reg_dead_p (2, operands[0])
&& rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))"
- [(set (match_dup 3) (match_dup 5))]
+ [(set (match_dup 3) (match_dup 5))
+ (set (match_dup 4) (match_dup 3))]
"operands[5] = gen_lowpart (DFmode, operands[1]);")
(define_peephole2
@@ -236,7 +237,7 @@
UNSPEC_FIST_ATOMIC))
(set (mem:BLK (scratch:SI))
(unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE))
- (set (match_operand:DF 3 "any_fp_register_operand")
+ (set (match_operand:DF 3 "sse_reg_operand")
(match_operand:DF 4 "memory_operand"))]
"!TARGET_64BIT
&& peep2_reg_dead_p (2, operands[0])
@@ -244,6 +245,7 @@
[(const_int 0)]
{
emit_move_insn (operands[3], gen_lowpart (DFmode, operands[1]));
+ emit_move_insn (operands[4], operands[3]);
emit_insn (gen_memory_blockage ());
DONE;
})
@@ -255,12 +257,13 @@
(set (match_operand:DI 2 "memory_operand")
(unspec:DI [(match_dup 0)]
UNSPEC_STX_ATOMIC))
- (set (match_operand:DF 3 "any_fp_register_operand")
+ (set (match_operand:DF 3 "sse_reg_operand")
(match_operand:DF 4 "memory_operand"))]
"!TARGET_64BIT
&& peep2_reg_dead_p (2, operands[0])
&& rtx_equal_p (XEXP (operands[4], 0), XEXP (operands[2], 0))"
- [(set (match_dup 3) (match_dup 5))]
+ [(set (match_dup 3) (match_dup 5))
+ (set (match_dup 4) (match_dup 3))]
"operands[5] = gen_lowpart (DFmode, operands[1]);")
(define_peephole2
@@ -272,7 +275,7 @@
UNSPEC_STX_ATOMIC))
(set (mem:BLK (scratch:SI))
(unspec:BLK [(mem:BLK (scratch:SI))] UNSPEC_MEMORY_BLOCKAGE))
- (set (match_operand:DF 3 "any_fp_register_operand")
+ (set (match_operand:DF 3 "sse_reg_operand")
(match_operand:DF 4 "memory_operand"))]
"!TARGET_64BIT
&& peep2_reg_dead_p (2, operands[0])
@@ -280,6 +283,7 @@
[(const_int 0)]
{
emit_move_insn (operands[3], gen_lowpart (DFmode, operands[1]));
+ emit_move_insn (operands[4], operands[3]);
emit_insn (gen_memory_blockage ());
DONE;
})
@@ -383,7 +387,8 @@
"!TARGET_64BIT
&& peep2_reg_dead_p (3, operands[2])
&& rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
- [(set (match_dup 5) (match_dup 1))]
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 5) (match_dup 1))]
"operands[5] = gen_lowpart (DFmode, operands[4]);")
(define_peephole2
@@ -402,6 +407,7 @@
&& rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
[(const_int 0)]
{
+ emit_move_insn (operands[0], operands[1]);
emit_insn (gen_memory_blockage ());
emit_move_insn (gen_lowpart (DFmode, operands[4]), operands[1]);
DONE;
@@ -419,7 +425,8 @@
"!TARGET_64BIT
&& peep2_reg_dead_p (3, operands[2])
&& rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
- [(set (match_dup 5) (match_dup 1))]
+ [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 5) (match_dup 1))]
"operands[5] = gen_lowpart (DFmode, operands[4]);")
(define_peephole2
@@ -438,6 +445,7 @@
&& rtx_equal_p (XEXP (operands[0], 0), XEXP (operands[3], 0))"
[(const_int 0)]
{
+ emit_move_insn (operands[0], operands[1]);
emit_insn (gen_memory_blockage ());
emit_move_insn (gen_lowpart (DFmode, operands[4]), operands[1]);
DONE;
diff --git a/gcc/testsuite/gcc.target/i386/pr100182.c b/gcc/testsuite/gcc.target/i386/pr100182.c
new file mode 100644
index 0000000..2f92a04
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100182.c
@@ -0,0 +1,30 @@
+/* { dg-do run { target ia32 } } */
+/* { dg-options "-O2 -march=i686" } */
+
+struct S { double _M_fp; };
+union U { double d; unsigned long long int l; };
+
+void
+__attribute__((noipa))
+foo (void)
+{
+ struct S a0, a1;
+ union U u;
+ double d0, d1;
+ a0._M_fp = 0.0;
+ a1._M_fp = 1.0;
+ __atomic_store_8 (&a0._M_fp, __atomic_load_8 (&a1._M_fp, __ATOMIC_SEQ_CST), __ATOMIC_SEQ_CST);
+ u.l = __atomic_load_8 (&a0._M_fp, __ATOMIC_SEQ_CST);
+ d0 = u.d;
+ u.l = __atomic_load_8 (&a1._M_fp, __ATOMIC_SEQ_CST);
+ d1 = u.d;
+ if (d0 != d1)
+ __builtin_abort ();
+}
+
+int
+main ()
+{
+ foo ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr71245-1.c b/gcc/testsuite/gcc.target/i386/pr71245-1.c
index be0b760..02c0dcb 100644
--- a/gcc/testsuite/gcc.target/i386/pr71245-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr71245-1.c
@@ -19,4 +19,4 @@ void foo_d (void)
__atomic_store_n (&d.ll, tmp.ll, __ATOMIC_SEQ_CST);
}
-/* { dg-final { scan-assembler-not "(fistp|fild)" } } */
+/* { dg-final { scan-assembler-not "(fistp|fild)" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr71245-2.c b/gcc/testsuite/gcc.target/i386/pr71245-2.c
index 65c1398..bf37a8c 100644
--- a/gcc/testsuite/gcc.target/i386/pr71245-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr71245-2.c
@@ -19,4 +19,4 @@ void foo_d (void)
__atomic_store_n (&d.ll, tmp.ll, __ATOMIC_SEQ_CST);
}
-/* { dg-final { scan-assembler-not "movlps" } } */
+/* { dg-final { scan-assembler-not "movlps" { xfail *-*-* } } } */