aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2021-05-24 10:57:52 +0800
committerliuhongt <hongtao.liu@intel.com>2021-06-28 09:17:36 +0800
commit3f1a08d9d731975d4061c306837ab28d52f37c7e (patch)
tree68001ffd1da3a0ded053ace7d5fa4280d902ce92 /gcc
parent28560c6d4043d8f6ac570f35fb84e952e9c719fe (diff)
downloadgcc-3f1a08d9d731975d4061c306837ab28d52f37c7e.zip
gcc-3f1a08d9d731975d4061c306837ab28d52f37c7e.tar.gz
gcc-3f1a08d9d731975d4061c306837ab28d52f37c7e.tar.bz2
For 128/256-bit vec_cond_expr, When mask operands is lt reg const0_rtx, blendv can be used instead of avx512 mask.
gcc/ChangeLog: PR target/100648 * config/i386/sse.md (*avx_cmp<mode>3_lt): New define_insn_and_split. (*avx_cmp<mode>3_ltint): Ditto. (*avx2_pcmp<mode>3_3): Ditto. (*avx2_pcmp<mode>3_4): Ditto. (*avx2_pcmp<mode>3_5): Ditto. gcc/testsuite/ChangeLog: PR target/100648 * g++.target/i386/avx2-pr54700-2.C: Adjust testcase. * g++.target/i386/avx512vl-pr54700-1a.C: New test. * g++.target/i386/avx512vl-pr54700-1b.C: New test. * g++.target/i386/avx512vl-pr54700-2a.C: New test. * g++.target/i386/avx512vl-pr54700-2b.C: New test. * gcc.target/i386/avx512vl-pr100648.c: New test. * gcc.target/i386/avx512vl-blendv-1.c: New test. * gcc.target/i386/avx512vl-blendv-2.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/sse.md152
-rw-r--r--gcc/testsuite/g++.target/i386/avx2-pr54700-2.C8
-rw-r--r--gcc/testsuite/g++.target/i386/avx512vl-pr54700-1a.C9
-rw-r--r--gcc/testsuite/g++.target/i386/avx512vl-pr54700-1b.C9
-rw-r--r--gcc/testsuite/g++.target/i386/avx512vl-pr54700-2a.C17
-rw-r--r--gcc/testsuite/g++.target/i386/avx512vl-pr54700-2b.C17
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-blendv-1.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-blendv-2.c41
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-pr100648.c21
9 files changed, 324 insertions, 1 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 3100635..ffcc0c8 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -3048,6 +3048,68 @@
UNSPEC_PCMP))]
"operands[5] = GEN_INT (INTVAL (operands[5]) ^ 4);")
+(define_insn_and_split "*avx_cmp<mode>3_lt"
+ [(set (match_operand:VF_128_256 0 "register_operand")
+ (vec_merge:VF_128_256
+ (match_operand:VF_128_256 1 "vector_operand")
+ (match_operand:VF_128_256 2 "vector_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:<sseintvecmode> 3 "register_operand")
+ (match_operand:<sseintvecmode> 4 "const0_operand")
+ (match_operand:SI 5 "const_0_to_7_operand")]
+ UNSPEC_PCMP)))]
+ "TARGET_AVX512VL && ix86_pre_reload_split ()
+ /* LT or GE 0 */
+ && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2]))
+ || (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:VF_128_256
+ [(match_dup 2)
+ (match_dup 1)
+ (lt:VF_128_256
+ (match_dup 3)
+ (match_dup 4))]
+ UNSPEC_BLENDV))]
+{
+ if (INTVAL (operands[5]) == 5)
+ std::swap (operands[1], operands[2]);
+})
+
+(define_insn_and_split "*avx_cmp<mode>3_ltint"
+ [(set (match_operand:VI48_AVX 0 "register_operand")
+ (vec_merge:VI48_AVX
+ (match_operand:VI48_AVX 1 "vector_operand")
+ (match_operand:VI48_AVX 2 "vector_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI48_AVX 3 "register_operand")
+ (match_operand:VI48_AVX 4 "const0_operand")
+ (match_operand:SI 5 "const_0_to_7_operand")]
+ UNSPEC_PCMP)))]
+ "TARGET_AVX512VL && ix86_pre_reload_split ()
+ /* LT or GE 0 */
+ && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2]))
+ || (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:<ssebytemode>
+ [(match_dup 2)
+ (match_dup 1)
+ (subreg:<ssebytemode>
+ (lt:VI48_AVX
+ (match_dup 3)
+ (match_dup 4)) 0)]
+ UNSPEC_BLENDV))]
+{
+ if (INTVAL (operands[5]) == 5)
+ std::swap (operands[1], operands[2]);
+ operands[0] = gen_lowpart (<ssebytemode>mode, operands[0]);
+ operands[1] = gen_lowpart (<ssebytemode>mode, operands[1]);
+ operands[2] = gen_lowpart (<ssebytemode>mode, operands[2]);
+})
+
(define_insn "avx_vmcmp<mode>3"
[(set (match_operand:VF_128 0 "register_operand" "=x")
(vec_merge:VF_128
@@ -13063,6 +13125,96 @@
DONE;
})
+(define_insn_and_split "*avx2_pcmp<mode>3_3"
+ [(set (match_operand:VI1_AVX2 0 "register_operand")
+ (vec_merge:VI1_AVX2
+ (match_operand:VI1_AVX2 1 "vector_operand")
+ (match_operand:VI1_AVX2 2 "vector_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI1_AVX2 3 "register_operand")
+ (match_operand:VI1_AVX2 4 "const0_operand")
+ (match_operand:SI 5 "const_0_to_7_operand")]
+ UNSPEC_PCMP)))]
+ "TARGET_AVX512VL && ix86_pre_reload_split ()
+ /* LT or GE 0 */
+ && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[2]))
+ || (INTVAL (operands[5]) == 5 && !MEM_P (operands[1])))"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:VI1_AVX2
+ [(match_dup 2)
+ (match_dup 1)
+ (lt:VI1_AVX2
+ (match_dup 3)
+ (match_dup 4))]
+ UNSPEC_BLENDV))]
+{
+ if (INTVAL (operands[5]) == 5)
+ std::swap (operands[1], operands[2]);
+})
+
+(define_insn_and_split "*avx2_pcmp<mode>3_4"
+ [(set (match_operand:VI1_AVX2 0 "register_operand")
+ (vec_merge:VI1_AVX2
+ (match_operand:VI1_AVX2 1 "vector_operand")
+ (match_operand:VI1_AVX2 2 "vector_operand")
+ (unspec:<avx512fmaskmode>
+ [(subreg:VI1_AVX2 (not (match_operand 3 "register_operand")) 0)
+ (match_operand:VI1_AVX2 4 "const0_operand")
+ (match_operand:SI 5 "const_0_to_7_operand")]
+ UNSPEC_PCMP)))]
+ "TARGET_AVX512VL && ix86_pre_reload_split ()
+ && GET_MODE_CLASS (GET_MODE (operands[3])) == MODE_VECTOR_INT
+ && GET_MODE_SIZE (GET_MODE (operands[3])) == <MODE_SIZE>
+ /* LT or GE 0 */
+ && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[1]))
+ || (INTVAL (operands[5]) == 5 && !MEM_P (operands[2])))"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:VI1_AVX2
+ [(match_dup 1)
+ (match_dup 2)
+ (lt:VI1_AVX2
+ (match_dup 3)
+ (match_dup 4))]
+ UNSPEC_BLENDV))]
+{
+ if (INTVAL (operands[5]) == 1)
+ std::swap (operands[1], operands[2]);
+ operands[3] = gen_lowpart (<MODE>mode, operands[3]);
+})
+
+(define_insn_and_split "*avx2_pcmp<mode>3_5"
+ [(set (match_operand:VI1_AVX2 0 "register_operand")
+ (vec_merge:VI1_AVX2
+ (match_operand:VI1_AVX2 1 "vector_operand")
+ (match_operand:VI1_AVX2 2 "vector_operand")
+ (unspec:<avx512fmaskmode>
+ [(not:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand"))
+ (match_operand:VI1_AVX2 4 "const0_operand")
+ (match_operand:SI 5 "const_0_to_7_operand")]
+ UNSPEC_PCMP)))]
+ "TARGET_AVX512VL && ix86_pre_reload_split ()
+ /* LT or GE 0 */
+ && ((INTVAL (operands[5]) == 1 && !MEM_P (operands[1]))
+ || (INTVAL (operands[5]) == 5 && !MEM_P (operands[2])))"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:VI1_AVX2
+ [(match_dup 1)
+ (match_dup 2)
+ (lt:VI1_AVX2
+ (match_dup 3)
+ (match_dup 4))]
+ UNSPEC_BLENDV))]
+{
+ if (INTVAL (operands[5]) == 1)
+ std::swap (operands[1], operands[2]);
+})
+
(define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand")
(unspec:<avx512fmaskmode>
diff --git a/gcc/testsuite/g++.target/i386/avx2-pr54700-2.C b/gcc/testsuite/g++.target/i386/avx2-pr54700-2.C
index c9054e5..e7a85c3 100644
--- a/gcc/testsuite/g++.target/i386/avx2-pr54700-2.C
+++ b/gcc/testsuite/g++.target/i386/avx2-pr54700-2.C
@@ -2,9 +2,15 @@
/* { dg-do run { target avx2 } } */
/* { dg-options "-O2 -std=c++14 -mavx2 -mno-xop -mno-avx512f" } */
-#include "avx2-check.h"
+#ifndef CHECK_H
+#define CHECK_H "avx2-check.h"
+#endif
+#ifndef TEST
#define TEST avx2_test
+#endif
+
+#include CHECK_H
#include "avx2-pr54700-1.C"
diff --git a/gcc/testsuite/g++.target/i386/avx512vl-pr54700-1a.C b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-1a.C
new file mode 100644
index 0000000..fedc3aa
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-1a.C
@@ -0,0 +1,9 @@
+/* PR target/100648 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -std=c++14 -mavx2 -mno-xop -mavx512vl -mavx512bw" } */
+/* { dg-final { scan-assembler-not "vpcmpgt\[bdq]" } } */
+/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
+/* { dg-final { scan-assembler-times "vblendvps" 4 } } */
+/* { dg-final { scan-assembler-times "vblendvpd" 4 } } */
+
+#include "avx2-pr54700-1.C"
diff --git a/gcc/testsuite/g++.target/i386/avx512vl-pr54700-1b.C b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-1b.C
new file mode 100644
index 0000000..03f9343
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-1b.C
@@ -0,0 +1,9 @@
+/* PR target/100648 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -std=c++14 -mavx512vl -mavx512bw -mno-xop" } */
+/* { dg-final { scan-assembler-not "pcmpgt\[bdq]" } } */
+/* { dg-final { scan-assembler-times "pblendvb" 2 } } */
+/* { dg-final { scan-assembler-times "blendvps" 4 } } */
+/* { dg-final { scan-assembler-times "blendvpd" 4 } } */
+
+#include "sse4_1-pr54700-1.C"
diff --git a/gcc/testsuite/g++.target/i386/avx512vl-pr54700-2a.C b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-2a.C
new file mode 100644
index 0000000..687a8c4
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-2a.C
@@ -0,0 +1,17 @@
+/* PR target/100648 */
+/* { dg-do run { target avx2 } } */
+/* { dg-options "-O2 -std=c++14 -mavx2 -mavx512vl -mavx512bw" } */
+
+#ifndef CHECK_H
+#define CHECK_H "avx512f-helper.h"
+#endif
+
+#ifndef TEST
+#define TEST_test_256
+#endif
+
+#include CHECK_H
+#include "avx2-pr54700-2.C"
+
+#define AVX512VL
+#define AVX512BW
diff --git a/gcc/testsuite/g++.target/i386/avx512vl-pr54700-2b.C b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-2b.C
new file mode 100644
index 0000000..40450a9
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx512vl-pr54700-2b.C
@@ -0,0 +1,17 @@
+/* PR target/pr100648 */
+/* { dg-do run { target sse4 } } */
+/* { dg-options "-O2 -std=c++14 -msse4 -mavx512vl -mavx512bw -mno-xop" } */
+
+#ifndef CHECK_H
+#define CHECK_H "avx512f-helper.h"
+#endif
+
+#ifndef TEST
+#define TEST_test_128
+#endif
+
+#include CHECK_H
+#include "sse4_1-pr54700-2.C"
+
+#define AVX512VL
+#define AVX512BW
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-blendv-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-blendv-1.c
new file mode 100644
index 0000000..6aa004b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-blendv-1.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times {pblendvb[\t ]*%xmm} 1 } } */
+/* { dg-final { scan-assembler-times {pblendvb[\t ]*%ymm} 1 } } */
+/* { dg-final { scan-assembler-times {blendvps[\t ]*%xmm} 1 } } */
+/* { dg-final { scan-assembler-times {blendvps[\t ]*%ymm} 1 } } */
+/* { dg-final { scan-assembler-times {blendvpd[\t ]*%xmm} 1 } } */
+/* { dg-final { scan-assembler-times {blendvpd[\t ]*%ymm} 1 } } */
+
+typedef float v4sf __attribute__ ((vector_size (16)));
+typedef float v8sf __attribute__ ((vector_size (32)));
+typedef double v2df __attribute__ ((vector_size (16)));
+typedef double v4df __attribute__ ((vector_size (32)));
+typedef char v16qi __attribute__ ((vector_size (16)));
+typedef char v32qi __attribute__ ((vector_size (32)));
+
+v4sf
+foo (v4sf a, v4sf b, v4sf c)
+{
+ return __builtin_ia32_blendvps (a, b, c);
+}
+
+v8sf
+foo2 (v8sf a, v8sf b, v8sf c)
+{
+ return __builtin_ia32_blendvps256 (a, b, c);
+}
+
+v2df
+foo3 (v2df a, v2df b, v2df c)
+{
+ return __builtin_ia32_blendvpd (a, b, c);
+}
+
+v4df
+foo4 (v4df a, v4df b, v4df c)
+{
+ return __builtin_ia32_blendvpd256 (a, b, c);
+}
+
+v16qi
+foo5 (v16qi a, v16qi b, v16qi c)
+{
+ return __builtin_ia32_pblendvb128 (a, b, c);
+}
+
+v32qi
+foo6 (v32qi a, v32qi b, v32qi c)
+{
+ return __builtin_ia32_pblendvb256 (a, b, c);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-blendv-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-blendv-2.c
new file mode 100644
index 0000000..daddcd5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-blendv-2.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-not {pblendv} } } */
+/* { dg-final { scan-assembler-not {blendvp} } } */
+
+#include <x86intrin.h>
+__m128
+foo (__m128 a, __m128 b)
+{
+ return _mm_blendv_ps (a, b, _mm_setzero_ps ());
+}
+
+__m256
+foo2 (__m256 a, __m256 b)
+{
+ return _mm256_blendv_ps (a, b, _mm256_set1_ps (-1.0));
+}
+
+__m128d
+foo3 (__m128d a, __m128d b, __m128d c)
+{
+ return _mm_blendv_pd (a, b, _mm_set1_pd (1.0));
+}
+
+__m256d
+foo4 (__m256d a, __m256d b, __m256d c)
+{
+ return _mm256_blendv_pd (a, b, _mm256_set1_pd (-134.3));
+}
+
+__m128i
+foo5 (__m128i a, __m128i b, __m128i c)
+{
+ return _mm_blendv_epi8 (a, b, _mm_set1_epi8 (3));
+}
+
+__m256i
+foo6 (__m256i a, __m256i b, __m256i c)
+{
+ return _mm256_blendv_epi8 (a, b, _mm256_set1_epi8 (-22));
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr100648.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr100648.c
new file mode 100644
index 0000000..2ef7c7b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr100648.c
@@ -0,0 +1,21 @@
+/* PR target/100648. */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl -mavx512bw -masm=att" } */
+/* { dg-final { scan-assembler-times "\tvpblendvb\t" 2 } } */
+/* { dg-final { scan-assembler-not "\tvpcmpeq" } } */
+/* { dg-final { scan-assembler-not "\tvpandn" } } */
+#include <x86intrin.h>
+
+__m256i
+f1 (__m256i a, __m256i b, __m256i mask)
+{
+ return _mm256_blendv_epi8(a, b,
+ _mm256_andnot_si256(mask, _mm256_set1_epi8(255)));
+}
+
+__m128i
+f2 (__m128i a, __m128i b, __m128i mask)
+{
+ return _mm_blendv_epi8(a, b,
+ _mm_andnot_si128(mask, _mm_set1_epi8(255)));
+}