aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2016-05-29 22:50:32 +0200
committerUros Bizjak <uros@gcc.gnu.org>2016-05-29 22:50:32 +0200
commitbeed3701c796842abbfb27d7484b35bd82818740 (patch)
tree56f243f0598adbb1fca56be6d0960c8128f04028 /gcc
parentf0b03e9423c1965ba30cd90ce248475423fd55a9 (diff)
downloadgcc-beed3701c796842abbfb27d7484b35bd82818740.zip
gcc-beed3701c796842abbfb27d7484b35bd82818740.tar.gz
gcc-beed3701c796842abbfb27d7484b35bd82818740.tar.bz2
re PR target/71245 (std::atomic<double> load/store bounces the data to the stack using fild/fistp)
PR target/71245 * config/i386/sync.md (define_peephole2 atomic_storedi_fpu): New peepholes to remove unneeded fild/fistp pairs. (define_peephole2 atomic_loaddi_fpu): Ditto. testsuite/ChangeLog: PR target/71245 * gcc.target/i386/pr71245-1.c: New test. * gcc.target/i386/pr71245-2.c: Ditto. From-SVN: r236863
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog7
-rw-r--r--gcc/config/i386/sync.md56
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr71245-1.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/pr71245-2.c22
5 files changed, 113 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 41b863b..cde2b8d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2016-05-29 Uros Bizjak <ubizjak@gmail.com>
+
+ PR target/71245
+ * config/i386/sync.md (define_peephole2 atomic_storedi_fpu):
+ New peepholes to remove unneeded fild/fistp pairs.
+ (define_peephole2 atomic_loaddi_fpu): Ditto.
+
2016-05-27 Jan Hubicka <hubicka@ucw.cz>
* predict.c (maybe_hot_frequency_p): Avoid division.
diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index 8322676..9acf5ca 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -210,6 +210,34 @@
DONE;
})
+(define_peephole2
+ [(set (match_operand:DF 0 "fp_register_operand")
+ (unspec:DF [(match_operand:DI 1 "memory_operand")]
+ UNSPEC_FILD_ATOMIC))
+ (set (match_operand:DI 2 "memory_operand")
+ (unspec:DI [(match_dup 0)]
+ UNSPEC_FIST_ATOMIC))
+ (set (match_operand:DF 3 "fp_register_operand")
+ (match_operand:DF 4 "memory_operand"))]
+ "!TARGET_64BIT
+ && peep2_reg_dead_p (2, operands[0])
+ && rtx_equal_p (operands[4], adjust_address_nv (operands[2], DFmode, 0))"
+ [(set (match_dup 3) (match_dup 5))]
+ "operands[5] = gen_lowpart (DFmode, operands[1]);")
+
+(define_peephole2
+ [(set (match_operand:DI 0 "sse_reg_operand")
+ (match_operand:DI 1 "memory_operand"))
+ (set (match_operand:DI 2 "memory_operand")
+ (match_dup 0))
+ (set (match_operand:DF 3 "fp_register_operand")
+ (match_operand:DF 4 "memory_operand"))]
+ "!TARGET_64BIT
+ && peep2_reg_dead_p (2, operands[0])
+ && rtx_equal_p (operands[4], adjust_address_nv (operands[2], DFmode, 0))"
+ [(set (match_dup 3) (match_dup 5))]
+ "operands[5] = gen_lowpart (DFmode, operands[1]);")
+
(define_expand "atomic_store<mode>"
[(set (match_operand:ATOMIC 0 "memory_operand")
(unspec:ATOMIC [(match_operand:ATOMIC 1 "nonimmediate_operand")
@@ -298,6 +326,34 @@
DONE;
})
+(define_peephole2
+ [(set (match_operand:DF 0 "memory_operand")
+ (match_operand:DF 1 "fp_register_operand"))
+ (set (match_operand:DF 2 "fp_register_operand")
+ (unspec:DF [(match_operand:DI 3 "memory_operand")]
+ UNSPEC_FILD_ATOMIC))
+ (set (match_operand:DI 4 "memory_operand")
+ (unspec:DI [(match_dup 2)]
+ UNSPEC_FIST_ATOMIC))]
+ "!TARGET_64BIT
+ && peep2_reg_dead_p (3, operands[2])
+ && rtx_equal_p (operands[0], adjust_address_nv (operands[3], DFmode, 0))"
+ [(set (match_dup 5) (match_dup 1))]
+ "operands[5] = gen_lowpart (DFmode, operands[4]);")
+
+(define_peephole2
+ [(set (match_operand:DF 0 "memory_operand")
+ (match_operand:DF 1 "fp_register_operand"))
+ (set (match_operand:DI 2 "sse_reg_operand")
+ (match_operand:DI 3 "memory_operand"))
+ (set (match_operand:DI 4 "memory_operand")
+ (match_dup 2))]
+ "!TARGET_64BIT
+ && peep2_reg_dead_p (3, operands[2])
+ && rtx_equal_p (operands[0], adjust_address_nv (operands[3], DFmode, 0))"
+ [(set (match_dup 5) (match_dup 1))]
+ "operands[5] = gen_lowpart (DFmode, operands[4]);")
+
;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC
;; operations. But the fix_trunc patterns want way more setup than we want
;; to provide. Note that the scratch is DFmode instead of XFmode in order
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 7857e7f..2313b86 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2016-05-29 Uros Bizjak <ubizjak@gmail.com>
+
+ PR target/71245
+ * gcc.target/i386/pr71245-1.c: New test.
+ * gcc.target/i386/pr71245-2.c: Ditto.
+
2016-05-29 Paolo Carlini <paolo.carlini@oracle.com>
PR c++/71105
diff --git a/gcc/testsuite/gcc.target/i386/pr71245-1.c b/gcc/testsuite/gcc.target/i386/pr71245-1.c
new file mode 100644
index 0000000..be0b760
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr71245-1.c
@@ -0,0 +1,22 @@
+/* PR target/71245 */
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -march=pentium -mno-sse -mfpmath=387" } */
+
+typedef union
+{
+ unsigned long long ll;
+ double d;
+} u_t;
+
+u_t d = { .d = 5.0 };
+
+void foo_d (void)
+{
+ u_t tmp;
+
+ tmp.ll = __atomic_load_n (&d.ll, __ATOMIC_SEQ_CST);
+ tmp.d += 1.0;
+ __atomic_store_n (&d.ll, tmp.ll, __ATOMIC_SEQ_CST);
+}
+
+/* { dg-final { scan-assembler-not "(fistp|fild)" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr71245-2.c b/gcc/testsuite/gcc.target/i386/pr71245-2.c
new file mode 100644
index 0000000..65c1398
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr71245-2.c
@@ -0,0 +1,22 @@
+/* PR target/71245 */
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -march=pentium -msse -mno-sse2 -mfpmath=387" } */
+
+typedef union
+{
+ unsigned long long ll;
+ double d;
+} u_t;
+
+u_t d = { .d = 5.0 };
+
+void foo_d (void)
+{
+ u_t tmp;
+
+ tmp.ll = __atomic_load_n (&d.ll, __ATOMIC_SEQ_CST);
+ tmp.d += 1.0;
+ __atomic_store_n (&d.ll, tmp.ll, __ATOMIC_SEQ_CST);
+}
+
+/* { dg-final { scan-assembler-not "movlps" } } */