diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2016-05-29 22:50:32 +0200 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2016-05-29 22:50:32 +0200 |
commit | beed3701c796842abbfb27d7484b35bd82818740 (patch) | |
tree | 56f243f0598adbb1fca56be6d0960c8128f04028 /gcc | |
parent | f0b03e9423c1965ba30cd90ce248475423fd55a9 (diff) | |
download | gcc-beed3701c796842abbfb27d7484b35bd82818740.zip gcc-beed3701c796842abbfb27d7484b35bd82818740.tar.gz gcc-beed3701c796842abbfb27d7484b35bd82818740.tar.bz2 |
re PR target/71245 (std::atomic<double> load/store bounces the data to the stack using fild/fistp)
PR target/71245
* config/i386/sync.md (define_peephole2 atomic_storedi_fpu):
New peepholes to remove unneeded fild/fistp pairs.
(define_peephole2 atomic_loaddi_fpu): Ditto.
testsuite/ChangeLog:
PR target/71245
* gcc.target/i386/pr71245-1.c: New test.
* gcc.target/i386/pr71245-2.c: Ditto.
From-SVN: r236863
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/config/i386/sync.md | 56 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr71245-1.c | 22 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr71245-2.c | 22 |
5 files changed, 113 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 41b863b..cde2b8d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2016-05-29 Uros Bizjak <ubizjak@gmail.com> + + PR target/71245 + * config/i386/sync.md (define_peephole2 atomic_storedi_fpu): + New peepholes to remove unneeded fild/fistp pairs. + (define_peephole2 atomic_loaddi_fpu): Ditto. + 2016-05-27 Jan Hubicka <hubicka@ucw.cz> * predict.c (maybe_hot_frequency_p): Avoid division. diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md index 8322676..9acf5ca 100644 --- a/gcc/config/i386/sync.md +++ b/gcc/config/i386/sync.md @@ -210,6 +210,34 @@ DONE; }) +(define_peephole2 + [(set (match_operand:DF 0 "fp_register_operand") + (unspec:DF [(match_operand:DI 1 "memory_operand")] + UNSPEC_FILD_ATOMIC)) + (set (match_operand:DI 2 "memory_operand") + (unspec:DI [(match_dup 0)] + UNSPEC_FIST_ATOMIC)) + (set (match_operand:DF 3 "fp_register_operand") + (match_operand:DF 4 "memory_operand"))] + "!TARGET_64BIT + && peep2_reg_dead_p (2, operands[0]) + && rtx_equal_p (operands[4], adjust_address_nv (operands[2], DFmode, 0))" + [(set (match_dup 3) (match_dup 5))] + "operands[5] = gen_lowpart (DFmode, operands[1]);") + +(define_peephole2 + [(set (match_operand:DI 0 "sse_reg_operand") + (match_operand:DI 1 "memory_operand")) + (set (match_operand:DI 2 "memory_operand") + (match_dup 0)) + (set (match_operand:DF 3 "fp_register_operand") + (match_operand:DF 4 "memory_operand"))] + "!TARGET_64BIT + && peep2_reg_dead_p (2, operands[0]) + && rtx_equal_p (operands[4], adjust_address_nv (operands[2], DFmode, 0))" + [(set (match_dup 3) (match_dup 5))] + "operands[5] = gen_lowpart (DFmode, operands[1]);") + (define_expand "atomic_store<mode>" [(set (match_operand:ATOMIC 0 "memory_operand") (unspec:ATOMIC [(match_operand:ATOMIC 1 "nonimmediate_operand") @@ -298,6 +326,34 @@ DONE; }) +(define_peephole2 + [(set (match_operand:DF 0 "memory_operand") + (match_operand:DF 1 "fp_register_operand")) + (set (match_operand:DF 2 "fp_register_operand") + (unspec:DF [(match_operand:DI 3 "memory_operand")] + UNSPEC_FILD_ATOMIC)) + (set (match_operand:DI 4 "memory_operand") + (unspec:DI [(match_dup 2)] + UNSPEC_FIST_ATOMIC))] + "!TARGET_64BIT + && peep2_reg_dead_p (3, operands[2]) + && rtx_equal_p (operands[0], adjust_address_nv (operands[3], DFmode, 0))" + [(set (match_dup 5) (match_dup 1))] + "operands[5] = gen_lowpart (DFmode, operands[4]);") + +(define_peephole2 + [(set (match_operand:DF 0 "memory_operand") + (match_operand:DF 1 "fp_register_operand")) + (set (match_operand:DI 2 "sse_reg_operand") + (match_operand:DI 3 "memory_operand")) + (set (match_operand:DI 4 "memory_operand") + (match_dup 2))] + "!TARGET_64BIT + && peep2_reg_dead_p (3, operands[2]) + && rtx_equal_p (operands[0], adjust_address_nv (operands[3], DFmode, 0))" + [(set (match_dup 5) (match_dup 1))] + "operands[5] = gen_lowpart (DFmode, operands[4]);") + ;; ??? You'd think that we'd be able to perform this via FLOAT + FIX_TRUNC ;; operations. But the fix_trunc patterns want way more setup than we want ;; to provide. Note that the scratch is DFmode instead of XFmode in order diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7857e7f..2313b86 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2016-05-29 Uros Bizjak <ubizjak@gmail.com> + + PR target/71245 + * gcc.target/i386/pr71245-1.c: New test. + * gcc.target/i386/pr71245-2.c: Ditto. + 2016-05-29 Paolo Carlini <paolo.carlini@oracle.com> PR c++/71105 diff --git a/gcc/testsuite/gcc.target/i386/pr71245-1.c b/gcc/testsuite/gcc.target/i386/pr71245-1.c new file mode 100644 index 0000000..be0b760 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr71245-1.c @@ -0,0 +1,22 @@ +/* PR target/71245 */ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-O2 -march=pentium -mno-sse -mfpmath=387" } */ + +typedef union +{ + unsigned long long ll; + double d; +} u_t; + +u_t d = { .d = 5.0 }; + +void foo_d (void) +{ + u_t tmp; + + tmp.ll = __atomic_load_n (&d.ll, __ATOMIC_SEQ_CST); + tmp.d += 1.0; + __atomic_store_n (&d.ll, tmp.ll, __ATOMIC_SEQ_CST); +} + +/* { dg-final { scan-assembler-not "(fistp|fild)" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr71245-2.c b/gcc/testsuite/gcc.target/i386/pr71245-2.c new file mode 100644 index 0000000..65c1398 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr71245-2.c @@ -0,0 +1,22 @@ +/* PR target/71245 */ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-O2 -march=pentium -msse -mno-sse2 -mfpmath=387" } */ + +typedef union +{ + unsigned long long ll; + double d; +} u_t; + +u_t d = { .d = 5.0 }; + +void foo_d (void) +{ + u_t tmp; + + tmp.ll = __atomic_load_n (&d.ll, __ATOMIC_SEQ_CST); + tmp.d += 1.0; + __atomic_store_n (&d.ll, tmp.ll, __ATOMIC_SEQ_CST); +} + +/* { dg-final { scan-assembler-not "movlps" } } */ |