re PR target/88152 (optimize SSE & AVX char compares with subsequent movmskb)

PR target/88152 * config/i386/sse.md (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt, *<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt, *<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift, *<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift, *<sse2_avx2>_pmovmskb_lt, *<sse2_avx2>_pmovmskb_zext_lt): New define_insn_and_split patterns. * g++.target/i386/pr88152.C: New test. From-SVN: r266649
author: Jakub Jelinek <jakub@redhat.com> 2018-11-29 23:16:37 +0100
committer: Jakub Jelinek <jakub@gcc.gnu.org> 2018-11-29 23:16:37 +0100
commit: 0b1c4b83e44a89b2736671f968c07ca1aab634ae (patch)
tree: b218b1a7ff49487c2edc78347432a8583c1a19b6 /gcc
parent: fb9e6a4bf8fb2a6aee9fd5c2dd0d0f4c842cf9f1 (diff)
download: gcc-0b1c4b83e44a89b2736671f968c07ca1aab634ae.zip
gcc-0b1c4b83e44a89b2736671f968c07ca1aab634ae.tar.gz
gcc-0b1c4b83e44a89b2736671f968c07ca1aab634ae.tar.bz2
4 files changed, 172 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 93ee3fa..7075f34 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,13 @@
 2018-11-29  Jakub Jelinek  <jakub@redhat.com>
 
+	PR target/88152
+	* config/i386/sse.md (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt,
+	*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt,
+	*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift,
+	*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift,
+	*<sse2_avx2>_pmovmskb_lt, *<sse2_avx2>_pmovmskb_zext_lt): New
+	define_insn_and_split patterns.
+
 	PR target/54700
 	* config/i386/sse.md
 	(*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt,
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b4d355e..e2cae71 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -14653,6 +14653,78 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")])
 
+(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	  [(lt:VF_128_256
+	     (match_operand:<sseintvecmode> 1 "register_operand" "x")
+	     (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
+	  UNSPEC_MOVMSK))]
+  "TARGET_SSE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
+  "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (unspec:SI
+	    [(lt:VF_128_256
+	       (match_operand:<sseintvecmode> 1 "register_operand" "x")
+	       (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
+	    UNSPEC_MOVMSK)))]
+  "TARGET_64BIT && TARGET_SSE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
+  "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	  [(subreg:VF_128_256
+	     (ashiftrt:<sseintvecmode>
+	       (match_operand:<sseintvecmode> 1 "register_operand" "x")
+	       (match_operand:QI 2 "const_int_operand" "n")) 0)]
+	  UNSPEC_MOVMSK))]
+  "TARGET_SSE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
+  "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (unspec:SI
+	    [(subreg:VF_128_256
+	       (ashiftrt:<sseintvecmode>
+		 (match_operand:<sseintvecmode> 1 "register_operand" "x")
+	       (match_operand:QI 2 "const_int_operand" "n")) 0)]
+	    UNSPEC_MOVMSK)))]
+  "TARGET_64BIT && TARGET_SSE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
+  "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "<sse2_avx2>_pmovmskb"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(unspec:SI
@@ -14686,6 +14758,49 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "SI")])
 
+(define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	  [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
+			(match_operand:VI1_AVX2 2 "const0_operand" "C"))]
+	  UNSPEC_MOVMSK))]
+  "TARGET_SSE2"
+  "#"
+  ""
+  [(set (match_dup 0)
+	(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
+  ""
+  [(set_attr "type" "ssemov")
+   (set (attr "prefix_data16")
+     (if_then_else
+       (match_test "TARGET_AVX")
+     (const_string "*")
+     (const_string "1")))
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")])
+
+(define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (unspec:SI
+	    [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
+			  (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
+	    UNSPEC_MOVMSK)))]
+  "TARGET_64BIT && TARGET_SSE2"
+  "#"
+  ""
+  [(set (match_dup 0)
+	(zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
+  ""
+  [(set_attr "type" "ssemov")
+   (set (attr "prefix_data16")
+     (if_then_else
+       (match_test "TARGET_AVX")
+     (const_string "*")
+     (const_string "1")))
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")])
+
 (define_expand "sse2_maskmovdqu"
   [(set (match_operand:V16QI 0 "memory_operand")
 	(unspec:V16QI [(match_operand:V16QI 1 "register_operand")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index c69e7a3..aae3c61 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2018-11-29  Jakub Jelinek  <jakub@redhat.com>
+
+	PR target/88152
+	* g++.target/i386/pr88152.C: New test.
+
 2018-11-29  Vladimir Makarov  <vmakarov@redhat.com>
 
 	* gcc.target/i386/pr34256.c: Adjust the number of expected moves.
diff --git a/gcc/testsuite/g++.target/i386/pr88152.C b/gcc/testsuite/g++.target/i386/pr88152.C
new file mode 100644
index 0000000..9f1659b
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr88152.C
@@ -0,0 +1,44 @@
+// PR target/88152
+// { dg-do compile }
+// { dg-options "-O2 -mavx2 -std=c++11" }
+// { dg-final { scan-assembler-times "vpmovmskb\[^\n\r]*xmm" 6 } }
+// { dg-final { scan-assembler-times "vpmovmskb\[^\n\r]*ymm" 6 } }
+// { dg-final { scan-assembler-times "vmovmskps\[^\n\r]*xmm" 4 } }
+// { dg-final { scan-assembler-times "vmovmskps\[^\n\r]*ymm" 4 } }
+// { dg-final { scan-assembler-times "vmovmskpd\[^\n\r]*xmm" 4 } }
+// { dg-final { scan-assembler-times "vmovmskpd\[^\n\r]*ymm" 4 } }
+// { dg-final { scan-assembler-not "vpcmpgt|vpcmpeq|vpsra" } }
+
+#include <x86intrin.h>
+
+template <typename T, size_t N>
+using V [[gnu::vector_size(N)]] = T;
+
+int f0 (V<unsigned char, 16> a) { return _mm_movemask_epi8 (reinterpret_cast<__m128i> (a > 0x7f)); }
+long int f1 (V<unsigned char, 16> a) { return (unsigned) _mm_movemask_epi8 (reinterpret_cast<__m128i> (a >= 0x80)); }
+long int f2 (V<signed char, 16> a) { return (unsigned) _mm_movemask_epi8 (reinterpret_cast<__m128i> (a < 0)); }
+int f3 (V<signed char, 16> a) { return _mm_movemask_epi8 (reinterpret_cast<__m128i> (a <= -1)); }
+int f4 (V<char, 16> a) { return _mm_movemask_epi8 (reinterpret_cast<__m128i> (a < 0)); }
+long int f5 (V<char, 16> a) { return (unsigned) _mm_movemask_epi8 (reinterpret_cast<__m128i> (a <= -1)); }
+int f6 (V<unsigned int, 16> a) { return _mm_movemask_ps (reinterpret_cast<__m128> (a > __INT_MAX__)); }
+int f7 (V<unsigned int, 16> a) { return _mm_movemask_ps (reinterpret_cast<__m128> (a >= 1U + __INT_MAX__)); }
+int f8 (V<int, 16> a) { return _mm_movemask_ps (reinterpret_cast<__m128> (a < 0)); }
+int f9 (V<int, 16> a) { return _mm_movemask_ps (reinterpret_cast<__m128> (a <= -1)); }
+int f10 (V<unsigned long long, 16> a) { return _mm_movemask_pd (reinterpret_cast<__m128d> (a > __LONG_LONG_MAX__)); }
+int f11 (V<unsigned long long, 16> a) { return _mm_movemask_pd (reinterpret_cast<__m128d> (a >= 1ULL + __LONG_LONG_MAX__)); }
+long int f12 (V<long long, 16> a) { return (unsigned) _mm_movemask_pd (reinterpret_cast<__m128d> (a < 0)); }
+int f13 (V<long long, 16> a) { return _mm_movemask_pd (reinterpret_cast<__m128d> (a <= -1)); }
+int f14 (V<unsigned char, 32> a) { return _mm256_movemask_epi8 (reinterpret_cast<__m256i> (a > 0x7f)); }
+int f15 (V<unsigned char, 32> a) { return _mm256_movemask_epi8 (reinterpret_cast<__m256i> (a >= 0x80)); }
+long int f16 (V<signed char, 32> a) { return (unsigned) _mm256_movemask_epi8 (reinterpret_cast<__m256i> (a < 0)); }
+int f17 (V<signed char, 32> a) { return _mm256_movemask_epi8 (reinterpret_cast<__m256i> (a <= -1)); }
+int f18 (V<char, 32> a) { return _mm256_movemask_epi8 (reinterpret_cast<__m256i> (a < 0)); }
+int f19 (V<char, 32> a) { return _mm256_movemask_epi8 (reinterpret_cast<__m256i> (a <= -1)); }
+long int f20 (V<unsigned int, 32> a) { return (unsigned) _mm256_movemask_ps (reinterpret_cast<__m256> (a > __INT_MAX__)); }
+int f21 (V<unsigned int, 32> a) { return _mm256_movemask_ps (reinterpret_cast<__m256> (a >= 1U + __INT_MAX__)); }
+int f22 (V<int, 32> a) { return _mm256_movemask_ps (reinterpret_cast<__m256> (a < 0)); }
+int f23 (V<int, 32> a) { return _mm256_movemask_ps (reinterpret_cast<__m256> (a <= -1)); }
+int f24 (V<unsigned long long, 32> a) { return _mm256_movemask_pd (reinterpret_cast<__m256d> (a > __LONG_LONG_MAX__)); }
+int f25 (V<unsigned long long, 32> a) { return _mm256_movemask_pd (reinterpret_cast<__m256d> (a >= 1ULL + __LONG_LONG_MAX__)); }
+int f26 (V<long long, 32> a) { return _mm256_movemask_pd (reinterpret_cast<__m256d> (a < 0)); }
+long int f27 (V<long long, 32> a) { return (unsigned) _mm256_movemask_pd (reinterpret_cast<__m256d> (a <= -1)); }
author	Jakub Jelinek <jakub@redhat.com>	2018-11-29 23:16:37 +0100
committer	Jakub Jelinek <jakub@gcc.gnu.org>	2018-11-29 23:16:37 +0100
commit	0b1c4b83e44a89b2736671f968c07ca1aab634ae (patch)
tree	b218b1a7ff49487c2edc78347432a8583c1a19b6 /gcc
parent	fb9e6a4bf8fb2a6aee9fd5c2dd0d0f4c842cf9f1 (diff)
download	gcc-0b1c4b83e44a89b2736671f968c07ca1aab634ae.zip gcc-0b1c4b83e44a89b2736671f968c07ca1aab634ae.tar.gz gcc-0b1c4b83e44a89b2736671f968c07ca1aab634ae.tar.bz2