diff options
author | Jakub Jelinek <jakub@redhat.com> | 2017-10-24 21:34:06 +0200 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2017-10-24 21:34:06 +0200 |
commit | 5c0331f62dd2d4025a97538aa6583b787a4a8214 (patch) | |
tree | 4320571507b8f8dba245b2e58c75344695c1fab2 | |
parent | ff1ff960f5039cf1d2970bfe2ba965d1b994caf1 (diff) | |
download | gcc-5c0331f62dd2d4025a97538aa6583b787a4a8214.zip gcc-5c0331f62dd2d4025a97538aa6583b787a4a8214.tar.gz gcc-5c0331f62dd2d4025a97538aa6583b787a4a8214.tar.bz2 |
re PR target/82370 (AVX512 can use a memory operand for immediate-count vpsrlw, but gcc doesn't.)
PR target/82370
* config/i386/sse.md (VIMAX_AVX2): Remove V4TImode.
(VIMAX_AVX2_AVX512BW, VIMAX_AVX512VL): New mode iterators.
(vec_shl_<mode>): Remove unused expander.
(avx512bw_<shift_insn><mode>3): New define_insn.
(<sse2_avx2>_ashl<mode>3, <sse2_avx2>_lshr<mode>3): Replaced by ...
(<sse2_avx2>_<shift_insn><mode>3): ... this. New define_insn.
* gcc.target/i386/pr82370.c: New test.
From-SVN: r254058
-rw-r--r-- | gcc/ChangeLog | 10 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 65 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr82370.c | 18 |
4 files changed, 57 insertions, 41 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a27df40..b2864cf 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2017-10-24 Jakub Jelinek <jakub@redhat.com> + + PR target/82370 + * config/i386/sse.md (VIMAX_AVX2): Remove V4TImode. + (VIMAX_AVX2_AVX512BW, VIMAX_AVX512VL): New mode iterators. + (vec_shl_<mode>): Remove unused expander. + (avx512bw_<shift_insn><mode>3): New define_insn. + (<sse2_avx2>_ashl<mode>3, <sse2_avx2>_lshr<mode>3): Replaced by ... + (<sse2_avx2>_<shift_insn><mode>3): ... this. New define_insn. + 2017-10-24 Paolo Carlini <paolo.carlini@oracle.com> PR c++/82466 diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 35e4bc9..4f9f2bd 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -371,10 +371,17 @@ [V16SF V16SI]) ;; ??? We should probably use TImode instead. -(define_mode_iterator VIMAX_AVX2 +(define_mode_iterator VIMAX_AVX2_AVX512BW [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI]) -;; ??? This should probably be dropped in favor of VIMAX_AVX2. +;; Suppose TARGET_AVX512BW as baseline +(define_mode_iterator VIMAX_AVX512VL + [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")]) + +(define_mode_iterator VIMAX_AVX2 + [(V2TI "TARGET_AVX2") V1TI]) + +;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW. (define_mode_iterator SSESCALARMODE [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI]) @@ -10778,9 +10785,9 @@ (set_attr "mode" "<sseinsnmode>")]) -(define_expand "vec_shl_<mode>" +(define_expand "vec_shr_<mode>" [(set (match_dup 3) - (ashift:V1TI + (lshiftrt:V1TI (match_operand:VI_128 1 "register_operand") (match_operand:SI 2 "const_0_to_255_mul_8_operand"))) (set (match_operand:VI_128 0 "register_operand") (match_dup 4))] @@ -10791,48 +10798,24 @@ operands[4] = gen_lowpart (<MODE>mode, operands[3]); }) -(define_insn "<sse2_avx2>_ashl<mode>3" - [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v") - (ashift:VIMAX_AVX2 - (match_operand:VIMAX_AVX2 1 "register_operand" "0,v") - (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))] - "TARGET_SSE2" +(define_insn "avx512bw_<shift_insn><mode>3" + [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v") + (any_lshift:VIMAX_AVX512VL + (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm") + (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] + "TARGET_AVX512BW" { operands[2] = GEN_INT (INTVAL (operands[2]) / 8); - - switch (which_alternative) - { - case 0: - return "pslldq\t{%2, %0|%0, %2}"; - case 1: - return "vpslldq\t{%2, %1, %0|%0, %1, %2}"; - default: - gcc_unreachable (); - } + return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}"; } - [(set_attr "isa" "noavx,avx") - (set_attr "type" "sseishft") + [(set_attr "type" "sseishft") (set_attr "length_immediate" "1") - (set_attr "prefix_data16" "1,*") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "maybe_evex") (set_attr "mode" "<sseinsnmode>")]) -(define_expand "vec_shr_<mode>" - [(set (match_dup 3) - (lshiftrt:V1TI - (match_operand:VI_128 1 "register_operand") - (match_operand:SI 2 "const_0_to_255_mul_8_operand"))) - (set (match_operand:VI_128 0 "register_operand") (match_dup 4))] - "TARGET_SSE2" -{ - operands[1] = gen_lowpart (V1TImode, operands[1]); - operands[3] = gen_reg_rtx (V1TImode); - operands[4] = gen_lowpart (<MODE>mode, operands[3]); -}) - -(define_insn "<sse2_avx2>_lshr<mode>3" +(define_insn "<sse2_avx2>_<shift_insn><mode>3" [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v") - (lshiftrt:VIMAX_AVX2 + (any_lshift:VIMAX_AVX2 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v") (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))] "TARGET_SSE2" @@ -10842,9 +10825,9 @@ switch (which_alternative) { case 0: - return "psrldq\t{%2, %0|%0, %2}"; + return "p<vshift>dq\t{%2, %0|%0, %2}"; case 1: - return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; + return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}"; default: gcc_unreachable (); } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7801c03..a337118 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2017-10-24 Jakub Jelinek <jakub@redhat.com> + + PR target/82370 + * gcc.target/i386/pr82370.c: New test. + 2017-10-24 Paolo Carlini <paolo.carlini@oracle.com> PR c++/82466 diff --git a/gcc/testsuite/gcc.target/i386/pr82370.c b/gcc/testsuite/gcc.target/i386/pr82370.c new file mode 100644 index 0000000..cc4d9b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr82370.c @@ -0,0 +1,18 @@ +/* PR target/82370 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512vl -mavx512bw -masm=att" } */ +/* { dg-final { scan-assembler-times "vpslldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %xmm\[0-9]\+" 1 } } */ +/* { dg-final { scan-assembler-times "vpsrldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %xmm\[0-9]\+" 1 } } */ +/* { dg-final { scan-assembler-times "vpslldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %ymm\[0-9]\+" 1 } } */ +/* { dg-final { scan-assembler-times "vpsrldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %ymm\[0-9]\+" 1 } } */ +/* { dg-final { scan-assembler-times "vpslldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %zmm\[0-9]\+" 1 } } */ +/* { dg-final { scan-assembler-times "vpsrldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %zmm\[0-9]\+" 1 } } */ + +#include <x86intrin.h> + +__m512i f1 (__m512i *x) { return _mm512_bslli_epi128 (*x, 5); } +__m512i f2 (__m512i *x) { return _mm512_bsrli_epi128 (*x, 5); } +__m256i f3 (__m256i *x) { return _mm256_bslli_epi128 (*x, 5); } +__m256i f4 (__m256i *x) { return _mm256_bsrli_epi128 (*x, 5); } +__m128i f5 (__m128i *x) { return _mm_bslli_si128 (*x, 5); } +__m128i f6 (__m128i *x) { return _mm_bsrli_si128 (*x, 5); } |