diff options
author | Jakub Jelinek <jakub@redhat.com> | 2013-12-18 17:50:06 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2013-12-18 17:50:06 +0100 |
commit | 90be6e465c82f68e872ed9f5ea2388f709fee359 (patch) | |
tree | 635271aaba6d080002960b3467e6dec50a8d6682 | |
parent | 69aeb34f52ac77c4704a08f91156de8ffd9d797b (diff) | |
download | gcc-90be6e465c82f68e872ed9f5ea2388f709fee359.zip gcc-90be6e465c82f68e872ed9f5ea2388f709fee359.tar.gz gcc-90be6e465c82f68e872ed9f5ea2388f709fee359.tar.bz2 |
re PR target/59539 (Missed optimisation: VEX-prefixed operations don't need aligned data)
PR target/59539
* config/i386/sse.md
(<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>,
<sse2_avx_avx512f>_loaddqu<mode><mask_name>): New expanders,
prefix existing define_insn names with *.
* gcc.target/i386/pr59539-1.c: New test.
* gcc.target/i386/pr59539-2.c: New test.
From-SVN: r206090
-rw-r--r-- | gcc/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 47 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr59539-1.c | 16 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr59539-2.c | 16 |
5 files changed, 91 insertions, 2 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 46f1e9b..83e8321 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2013-12-18 Jakub Jelinek <jakub@redhat.com> + + PR target/59539 + * config/i386/sse.md + (<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>, + <sse2_avx_avx512f>_loaddqu<mode><mask_name>): New expanders, + prefix existing define_insn names with *. + 2013-12-18 Eric Botcazou <ebotcazou@adacore.com> * config/arm/arm.c (arm_expand_epilogue_apcs_frame): Fix thinko. diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index adedf44..2cbbb14 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -912,7 +912,28 @@ DONE; }) -(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>" +(define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>" + [(set (match_operand:VF 0 "register_operand") + (unspec:VF [(match_operand:VF 1 "nonimmediate_operand")] + UNSPEC_LOADU))] + "TARGET_SSE && <mask_mode512bit_condition>" +{ + /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads + just fine if misaligned_operand is true, and without the UNSPEC it can + be combined with arithmetic instructions. If misaligned_operand is + false, still emit UNSPEC_LOADU insn to honor user's request for + misaligned load. */ + if (TARGET_AVX + && misaligned_operand (operands[1], <MODE>mode) + /* FIXME: Revisit after AVX512F merge is completed. */ + && !<mask_applied>) + { + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); + DONE; + } +}) + +(define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>" [(set (match_operand:VF 0 "register_operand" "=v") (unspec:VF [(match_operand:VF 1 "nonimmediate_operand" "vm")] @@ -999,7 +1020,29 @@ (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) -(define_insn "<sse2_avx_avx512f>_loaddqu<mode><mask_name>" +(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>" + [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand") + (unspec:VI_UNALIGNED_LOADSTORE + [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand")] + UNSPEC_LOADU))] + "TARGET_SSE2 && <mask_mode512bit_condition>" +{ + /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads + just fine if misaligned_operand is true, and without the UNSPEC it can + be combined with arithmetic instructions. If misaligned_operand is + false, still emit UNSPEC_LOADU insn to honor user's request for + misaligned load. */ + if (TARGET_AVX + && misaligned_operand (operands[1], <MODE>mode) + /* FIXME: Revisit after AVX512F merge is completed. */ + && !<mask_applied>) + { + emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1])); + DONE; + } +}) + +(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>" [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v") (unspec:VI_UNALIGNED_LOADSTORE [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")] diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 730ce06..20a1bc5 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2013-12-18 Jakub Jelinek <jakub@redhat.com> + + PR target/59539 + * gcc.target/i386/pr59539-1.c: New test. + * gcc.target/i386/pr59539-2.c: New test. + 2013-12-18 Nick Clifton <nickc@redhat.com> * gcc.dg/pr32912-2.c: Fix for 16-bit targets. diff --git a/gcc/testsuite/gcc.target/i386/pr59539-1.c b/gcc/testsuite/gcc.target/i386/pr59539-1.c new file mode 100644 index 0000000..9b34053 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr59539-1.c @@ -0,0 +1,16 @@ +/* PR target/59539 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ + +#include <immintrin.h> + +int +foo (void *p1, void *p2) +{ + __m128i d1 = _mm_loadu_si128 ((__m128i *) p1); + __m128i d2 = _mm_loadu_si128 ((__m128i *) p2); + __m128i result = _mm_cmpeq_epi16 (d1, d2); + return _mm_movemask_epi8 (result); +} + +/* { dg-final { scan-assembler-times "vmovdqu" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr59539-2.c b/gcc/testsuite/gcc.target/i386/pr59539-2.c new file mode 100644 index 0000000..b53b8c4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr59539-2.c @@ -0,0 +1,16 @@ +/* PR target/59539 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx2" } */ + +#include <immintrin.h> + +int +foo (void *p1, void *p2) +{ + __m256i d1 = _mm256_loadu_si256 ((__m256i *) p1); + __m256i d2 = _mm256_loadu_si256 ((__m256i *) p2); + __m256i result = _mm256_cmpeq_epi16 (d1, d2); + return _mm256_movemask_epi8 (result); +} + +/* { dg-final { scan-assembler-times "vmovdqu" 1 } } */ |