vect: Support v4hi -> v4qi.

gcc/ChangeLog: PR target/107432 * config/i386/mmx.md (VI2_32_64): New mode iterator. (mmxhalfmode): New mode atter. (mmxhalfmodelower): Ditto. (truncv2hiv2qi2): Extend mode v4hi and change name from truncv2hiv2qi to trunc<mode><mmxhalfmodelower>2. gcc/testsuite/ChangeLog: PR target/107432 * gcc.target/i386/pr107432-1.c: Modify test. * gcc.target/i386/pr107432-6.c: Add test. * gcc.target/i386/pr108938-3.c: This patch supports truncv4hiv4qi affect bswap optimization, so I added the -mno-avx option for now, and open a bugzilla.
author: Hu, Lin1 <lin1.hu@intel.com> 2024-02-28 18:11:55 +0800
committer: Hu, Lin1 <lin1.hu@intel.com> 2024-06-27 16:02:04 +0800
commit: e5f8a39941f6f0f25dac88bd71fd368fb284a10f (patch)
tree: 24793d27041f806807ef3311f0f37579c41b3396 /gcc
parent: c320a7efcd35ba6c6be70dc9b2fe562a9673e363 (diff)
download: gcc-e5f8a39941f6f0f25dac88bd71fd368fb284a10f.zip
gcc-e5f8a39941f6f0f25dac88bd71fd368fb284a10f.tar.gz
gcc-e5f8a39941f6f0f25dac88bd71fd368fb284a10f.tar.bz2
4 files changed, 44 insertions, 17 deletions
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index ea53f51..24c0516 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -67,6 +67,9 @@
 ;; 4-byte integer vector modes
 (define_mode_iterator VI_32 [V4QI V2HI])
 
+;; 8-byte and 4-byte HImode vector modes
+(define_mode_iterator VI2_32_64 [(V4HI "TARGET_MMX_WITH_SSE") V2HI])
+
 ;; 4-byte and 2-byte integer vector modes
 (define_mode_iterator VI_16_32 [V4QI V2QI V2HI])
 
@@ -106,6 +109,12 @@
 (define_mode_attr mmxdoublemode
   [(V8QI "V8HI") (V4HI "V4SI")])
 
+(define_mode_attr mmxhalfmode
+  [(V4HI "V4QI") (V2HI "V2QI")])
+
+(define_mode_attr mmxhalfmodelower
+  [(V4HI "v4qi") (V2HI "v2qi")])
+
 ;; Mapping of vector float modes to an integer mode of the same size
 (define_mode_attr mmxintvecmode
   [(V2SF "V2SI") (V2SI "V2SI") (V4HI "V4HI") (V8QI "V8QI")
@@ -4880,10 +4889,10 @@
   DONE;
 })
 
-(define_insn "truncv2hiv2qi2"
-  [(set (match_operand:V2QI 0 "register_operand" "=v")
-	(truncate:V2QI
-	  (match_operand:V2HI 1 "register_operand" "v")))]
+(define_insn "trunc<mode><mmxhalfmodelower>2"
+  [(set (match_operand:<mmxhalfmode> 0 "register_operand" "=v")
+	(truncate:<mmxhalfmode>
+	  (match_operand:VI2_32_64 1 "register_operand" "v")))]
   "TARGET_AVX512VL && TARGET_AVX512BW"
   "vpmovwb\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c b/gcc/testsuite/gcc.target/i386/pr107432-1.c
index a4f3744..afdf367 100644
--- a/gcc/testsuite/gcc.target/i386/pr107432-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c
@@ -7,7 +7,8 @@
 /* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
+/* { dg-final { scan-assembler-times "vpmovwb" 8 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovwb" 10 { target { ! ia32 } } } } */
 
 #include <x86intrin.h>
 
@@ -113,6 +114,11 @@ __v2qi	mm32_cvtepi16_epi8_builtin_convertvector(__v2hi a)
   return __builtin_convertvector((__v2hi)a, __v2qi);
 }
 
+__v4qi	mm64_cvtepi16_epi8_builtin_convertvector(__v4hi a)
+{
+  return __builtin_convertvector((__v4hi)a, __v4qi);
+}
+
 __v8qi	mm_cvtepi16_epi8_builtin_convertvector(__m128i a)
 {
   return __builtin_convertvector((__v8hi)a, __v8qi);
@@ -218,6 +224,11 @@ __v2qu	mm32_cvtepu16_epu8_builtin_convertvector(__v2hu a)
   return __builtin_convertvector((__v2hu)a, __v2qu);
 }
 
+__v4qu	mm64_cvtepu16_epu8_builtin_convertvector(__v4hu a)
+{
+  return __builtin_convertvector((__v4hu)a, __v4qu);
+}
+
 __v8qu	mm_cvtepu16_epu8_builtin_convertvector(__m128i a)
 {
   return __builtin_convertvector((__v8hu)a, __v8qu);
diff --git a/gcc/testsuite/gcc.target/i386/pr107432-6.c b/gcc/testsuite/gcc.target/i386/pr107432-6.c
index 4a68a10..dd585b2 100644
--- a/gcc/testsuite/gcc.target/i386/pr107432-6.c
+++ b/gcc/testsuite/gcc.target/i386/pr107432-6.c
@@ -1,18 +1,15 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512fp16 -mavx512vl -mavx512bw -O2 -mavx512dq -fno-trapping-math" } */
-/* { dg-final { scan-assembler-times "vcvttpd2dq" 2 { target { ia32 } } } } */
-/* { dg-final { scan-assembler-times "vcvttpd2dq" 3 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "vcvttpd2udq" 2 { target { ia32 } } } } */
-/* { dg-final { scan-assembler-times "vcvttpd2udq" 3 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "vcvttps2dq" 3 { target { ia32 } } } } */
-/* { dg-final { scan-assembler-times "vcvttps2dq" 4 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "vcvttps2udq" 3 { target { ia32 } } } } */
-/* { dg-final { scan-assembler-times "vcvttps2udq" 4 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "vcvttph2w" 4 } } */
-/* { dg-final { scan-assembler-times "vcvttph2uw" 4 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dq" 4 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dq" 6 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttps2dq" 6 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttps2dq" 8 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttph2w" 8 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttph2w" 10 { target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpmovdb" 10 { target { ia32 } } } } */
 /* { dg-final { scan-assembler-times "vpmovdb" 14 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
+/* { dg-final { scan-assembler-times "vpmovwb" 8 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovwb" 10 { target { ! ia32 } } } } */
 
 #include <x86intrin.h>
 
@@ -103,6 +100,11 @@ __v2qi	mm32_cvtph_epi8_builtin_convertvector(__v2hf a)
   return __builtin_convertvector((__v2hf)a, __v2qi);
 }
 
+__v4qi	mm64_cvtph_epi8_builtin_convertvector(__v4hf a)
+{
+  return __builtin_convertvector((__v4hf)a, __v4qi);
+}
+
 __v8qi	mm128_cvtph_epi8_builtin_convertvector(__v8hf a)
 {
   return __builtin_convertvector((__v8hf)a, __v8qi);
@@ -123,6 +125,11 @@ __v2qu	mm32_cvtph_epu8_builtin_convertvector(__v2hf a)
   return __builtin_convertvector((__v2hf)a, __v2qu);
 }
 
+__v4qu	mm64_cvtph_epu8_builtin_convertvector(__v4hf a)
+{
+  return __builtin_convertvector((__v4hf)a, __v4qu);
+}
+
 __v8qu	mm128_cvtph_epu8_builtin_convertvector(__v8hf a)
 {
   return __builtin_convertvector((__v8hf)a, __v8qu);
diff --git a/gcc/testsuite/gcc.target/i386/pr108938-3.c b/gcc/testsuite/gcc.target/i386/pr108938-3.c
index 32ac544..757a0c4 100644
--- a/gcc/testsuite/gcc.target/i386/pr108938-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr108938-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -mno-movbe" } */
+/* { dg-options "-O2 -ftree-vectorize -mno-movbe -mno-avx" } */
 /* { dg-final { scan-assembler-times "bswap\[\t ]+" 2 { target { ! ia32 } } } } */
 /* { dg-final { scan-assembler-times "bswap\[\t ]+" 3 { target ia32 } } } */
author	Hu, Lin1 <lin1.hu@intel.com>	2024-02-28 18:11:55 +0800
committer	Hu, Lin1 <lin1.hu@intel.com>	2024-06-27 16:02:04 +0800
commit	e5f8a39941f6f0f25dac88bd71fd368fb284a10f (patch)
tree	24793d27041f806807ef3311f0f37579c41b3396 /gcc
parent	c320a7efcd35ba6c6be70dc9b2fe562a9673e363 (diff)
download	gcc-e5f8a39941f6f0f25dac88bd71fd368fb284a10f.zip gcc-e5f8a39941f6f0f25dac88bd71fd368fb284a10f.tar.gz gcc-e5f8a39941f6f0f25dac88bd71fd368fb284a10f.tar.bz2