aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2017-10-24 21:34:06 +0200
committerJakub Jelinek <jakub@gcc.gnu.org>2017-10-24 21:34:06 +0200
commit5c0331f62dd2d4025a97538aa6583b787a4a8214 (patch)
tree4320571507b8f8dba245b2e58c75344695c1fab2
parentff1ff960f5039cf1d2970bfe2ba965d1b994caf1 (diff)
downloadgcc-5c0331f62dd2d4025a97538aa6583b787a4a8214.zip
gcc-5c0331f62dd2d4025a97538aa6583b787a4a8214.tar.gz
gcc-5c0331f62dd2d4025a97538aa6583b787a4a8214.tar.bz2
re PR target/82370 (AVX512 can use a memory operand for immediate-count vpsrlw, but gcc doesn't.)
PR target/82370 * config/i386/sse.md (VIMAX_AVX2): Remove V4TImode. (VIMAX_AVX2_AVX512BW, VIMAX_AVX512VL): New mode iterators. (vec_shl_<mode>): Remove unused expander. (avx512bw_<shift_insn><mode>3): New define_insn. (<sse2_avx2>_ashl<mode>3, <sse2_avx2>_lshr<mode>3): Replaced by ... (<sse2_avx2>_<shift_insn><mode>3): ... this. New define_insn. * gcc.target/i386/pr82370.c: New test. From-SVN: r254058
-rw-r--r--gcc/ChangeLog10
-rw-r--r--gcc/config/i386/sse.md65
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.target/i386/pr82370.c18
4 files changed, 57 insertions, 41 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a27df40..b2864cf 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2017-10-24 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/82370
+ * config/i386/sse.md (VIMAX_AVX2): Remove V4TImode.
+ (VIMAX_AVX2_AVX512BW, VIMAX_AVX512VL): New mode iterators.
+ (vec_shl_<mode>): Remove unused expander.
+ (avx512bw_<shift_insn><mode>3): New define_insn.
+ (<sse2_avx2>_ashl<mode>3, <sse2_avx2>_lshr<mode>3): Replaced by ...
+ (<sse2_avx2>_<shift_insn><mode>3): ... this. New define_insn.
+
2017-10-24 Paolo Carlini <paolo.carlini@oracle.com>
PR c++/82466
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 35e4bc9..4f9f2bd 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -371,10 +371,17 @@
[V16SF V16SI])
;; ??? We should probably use TImode instead.
-(define_mode_iterator VIMAX_AVX2
+(define_mode_iterator VIMAX_AVX2_AVX512BW
[(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
-;; ??? This should probably be dropped in favor of VIMAX_AVX2.
+;; Suppose TARGET_AVX512BW as baseline
+(define_mode_iterator VIMAX_AVX512VL
+ [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
+
+(define_mode_iterator VIMAX_AVX2
+ [(V2TI "TARGET_AVX2") V1TI])
+
+;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
(define_mode_iterator SSESCALARMODE
[(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
@@ -10778,9 +10785,9 @@
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "vec_shl_<mode>"
+(define_expand "vec_shr_<mode>"
[(set (match_dup 3)
- (ashift:V1TI
+ (lshiftrt:V1TI
(match_operand:VI_128 1 "register_operand")
(match_operand:SI 2 "const_0_to_255_mul_8_operand")))
(set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
@@ -10791,48 +10798,24 @@
operands[4] = gen_lowpart (<MODE>mode, operands[3]);
})
-(define_insn "<sse2_avx2>_ashl<mode>3"
- [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
- (ashift:VIMAX_AVX2
- (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
- (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
- "TARGET_SSE2"
+(define_insn "avx512bw_<shift_insn><mode>3"
+ [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
+ (any_lshift:VIMAX_AVX512VL
+ (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
+ (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+ "TARGET_AVX512BW"
{
operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
-
- switch (which_alternative)
- {
- case 0:
- return "pslldq\t{%2, %0|%0, %2}";
- case 1:
- return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
- default:
- gcc_unreachable ();
- }
+ return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
}
- [(set_attr "isa" "noavx,avx")
- (set_attr "type" "sseishft")
+ [(set_attr "type" "sseishft")
(set_attr "length_immediate" "1")
- (set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "vec_shr_<mode>"
- [(set (match_dup 3)
- (lshiftrt:V1TI
- (match_operand:VI_128 1 "register_operand")
- (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
- (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
- "TARGET_SSE2"
-{
- operands[1] = gen_lowpart (V1TImode, operands[1]);
- operands[3] = gen_reg_rtx (V1TImode);
- operands[4] = gen_lowpart (<MODE>mode, operands[3]);
-})
-
-(define_insn "<sse2_avx2>_lshr<mode>3"
+(define_insn "<sse2_avx2>_<shift_insn><mode>3"
[(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
- (lshiftrt:VIMAX_AVX2
+ (any_lshift:VIMAX_AVX2
(match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
(match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
"TARGET_SSE2"
@@ -10842,9 +10825,9 @@
switch (which_alternative)
{
case 0:
- return "psrldq\t{%2, %0|%0, %2}";
+ return "p<vshift>dq\t{%2, %0|%0, %2}";
case 1:
- return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
+ return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
default:
gcc_unreachable ();
}
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 7801c03..a337118 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2017-10-24 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/82370
+ * gcc.target/i386/pr82370.c: New test.
+
2017-10-24 Paolo Carlini <paolo.carlini@oracle.com>
PR c++/82466
diff --git a/gcc/testsuite/gcc.target/i386/pr82370.c b/gcc/testsuite/gcc.target/i386/pr82370.c
new file mode 100644
index 0000000..cc4d9b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82370.c
@@ -0,0 +1,18 @@
+/* PR target/82370 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl -mavx512bw -masm=att" } */
+/* { dg-final { scan-assembler-times "vpslldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %xmm\[0-9]\+" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %xmm\[0-9]\+" 1 } } */
+/* { dg-final { scan-assembler-times "vpslldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %ymm\[0-9]\+" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %ymm\[0-9]\+" 1 } } */
+/* { dg-final { scan-assembler-times "vpslldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %zmm\[0-9]\+" 1 } } */
+/* { dg-final { scan-assembler-times "vpsrldq\[ \t]\+\\\$5, \\(%\[a-z0-9,]*\\), %zmm\[0-9]\+" 1 } } */
+
+#include <x86intrin.h>
+
+__m512i f1 (__m512i *x) { return _mm512_bslli_epi128 (*x, 5); }
+__m512i f2 (__m512i *x) { return _mm512_bsrli_epi128 (*x, 5); }
+__m256i f3 (__m256i *x) { return _mm256_bslli_epi128 (*x, 5); }
+__m256i f4 (__m256i *x) { return _mm256_bsrli_epi128 (*x, 5); }
+__m128i f5 (__m128i *x) { return _mm_bslli_si128 (*x, 5); }
+__m128i f6 (__m128i *x) { return _mm_bsrli_si128 (*x, 5); }