aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJulia Koval <julia.koval@intel.com>2017-06-08 15:04:43 +0200
committerKirill Yukhin <kyukhin@gcc.gnu.org>2017-06-08 13:04:43 +0000
commitc46f9051654ff52ed083084c109e9247b7fce9aa (patch)
treeab80f354dbded12efd451d0cefc525b221220c3d /gcc
parent5ed418891b23588989460bc85fb53c60fbabbcbc (diff)
downloadgcc-c46f9051654ff52ed083084c109e9247b7fce9aa.zip
gcc-c46f9051654ff52ed083084c109e9247b7fce9aa.tar.gz
gcc-c46f9051654ff52ed083084c109e9247b7fce9aa.tar.bz2
Add mov[us]wb store intrinsics.
gcc/ * config/i386/avx512bwintrin.h (_mm512_mask_cvtepi16_storeu_epi8, _mm512_mask_cvtsepi16_storeu_epi8, _mm512_mask_cvtusepi16_storeu_epi8): New intrinsics. * config/i386/avx512vlbwintrin.h (_mm256_mask_cvtepi16_storeu_epi8, _mm_mask_cvtsepi16_storeu_epi8, _mm256_mask_cvtsepi16_storeu_epi8, _mm_mask_cvtusepi16_storeu_epi8, _mm256_mask_cvtusepi16_storeu_epi8, _mm_mask_cvtepi16_storeu_epi8): New intrinsics. * config/i386/i386-builtin-types.def (PV8Q, V8QI): New pointer type. (VOID_FTYPE_PV32QI_V32HI_USI, VOID_FTYPE_PV8QI_V8HI_UQI, VOID_FTYPE_PV16QI_V16HI_UHI): New function types. * config/i386/i386-builtin.def (__builtin_ia32_pmovwb128mem_mask, __builtin_ia32_pmovwb256mem_mask, __builtin_ia32_pmovswb128mem_mask, __builtin_ia32_pmovswb256mem_mask, __builtin_ia32_pmovuswb128mem_mask, __builtin_ia32_pmovuswb256mem_mask, __builtin_ia32_pmovuswb512mem_mask, __builtin_ia32_pmovswb512mem_mask) __builtin_ia32_pmovwb512mem_mask): New builtins. gcc/testsuite/ * gcc.target/i386/avx512bw-vpmovswb-1.c: Add new intrinsics to test. * gcc.target/i386/avx512bw-vpmovswb-2.c: Ditto. * gcc.target/i386/avx512bw-vpmovuswb-1.c: Ditto. * gcc.target/i386/avx512bw-vpmovuswb-2.c: Ditto. * gcc.target/i386/avx512bw-vpmovwb-1.c: Ditto. * gcc.target/i386/avx512bw-vpmovwb-2.c: Ditto. From-SVN: r249012
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog19
-rw-r--r--gcc/config/i386/avx512bwintrin.h21
-rw-r--r--gcc/config/i386/avx512vlbwintrin.h42
-rw-r--r--gcc/config/i386/i386-builtin-types.def4
-rw-r--r--gcc/config/i386/i386-builtin.def9
-rw-r--r--gcc/config/i386/i386.c3
-rw-r--r--gcc/testsuite/ChangeLog9
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-1.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-1.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-2.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-1.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-2.c10
13 files changed, 164 insertions, 9 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 461c06b..d91f384 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,24 @@
2017-08-08 Julia Koval <julia.koval@intel.com>
+ * config/i386/avx512bwintrin.h (_mm512_mask_cvtepi16_storeu_epi8,
+ _mm512_mask_cvtsepi16_storeu_epi8,
+ _mm512_mask_cvtusepi16_storeu_epi8): New intrinsics.
+ * config/i386/avx512vlbwintrin.h (_mm256_mask_cvtepi16_storeu_epi8,
+ _mm_mask_cvtsepi16_storeu_epi8, _mm256_mask_cvtsepi16_storeu_epi8,
+ _mm_mask_cvtusepi16_storeu_epi8, _mm256_mask_cvtusepi16_storeu_epi8,
+ _mm_mask_cvtepi16_storeu_epi8): New intrinsics.
+ * config/i386/i386-builtin-types.def (PV8Q, V8QI): New pointer type.
+ (VOID_FTYPE_PV32QI_V32HI_USI, VOID_FTYPE_PV8QI_V8HI_UQI,
+ VOID_FTYPE_PV16QI_V16HI_UHI): New function types.
+ * config/i386/i386-builtin.def (__builtin_ia32_pmovwb128mem_mask,
+ __builtin_ia32_pmovwb256mem_mask, __builtin_ia32_pmovswb128mem_mask,
+ __builtin_ia32_pmovswb256mem_mask, __builtin_ia32_pmovuswb128mem_mask,
+ __builtin_ia32_pmovuswb256mem_mask,
+ __builtin_ia32_pmovuswb512mem_mask, __builtin_ia32_pmovswb512mem_mask)
+ __builtin_ia32_pmovwb512mem_mask): New builtins.
+
+2017-08-08 Julia Koval <julia.koval@intel.com>
+
PR target/73350,80862
* config/i386/subst.md (round): Fix round pattern.
* config/i386/i386.c (ix86_erase_embedded_rounding):
diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h
index 2b0fb6b..71a75ee 100644
--- a/gcc/config/i386/avx512bwintrin.h
+++ b/gcc/config/i386/avx512bwintrin.h
@@ -425,6 +425,13 @@ _mm512_cvtepi16_epi8 (__m512i __A)
(__mmask32) -1);
}
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
+{
+ __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
+}
+
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A)
@@ -452,6 +459,13 @@ _mm512_cvtsepi16_epi8 (__m512i __A)
(__mmask32) -1);
}
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
+{
+ __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
+}
+
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A)
@@ -489,6 +503,13 @@ _mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A)
__M);
}
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
+{
+ __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
+}
+
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A)
diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h
index 0dc7d7a..a811801 100644
--- a/gcc/config/i386/avx512vlbwintrin.h
+++ b/gcc/config/i386/avx512vlbwintrin.h
@@ -216,6 +216,13 @@ _mm256_cvtepi16_epi8 (__m256i __A)
(__mmask16) -1);
}
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M,__m256i __A)
+{
+ __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P , (__v16hi) __A, __M);
+}
+
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
@@ -244,6 +251,13 @@ _mm_cvtsepi16_epi8 (__m128i __A)
(__mmask8) -1);
}
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A)
+{
+ __builtin_ia32_pmovswb128mem_mask ((__v8qi *) __P , (__v8hi) __A, __M);
+}
+
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
@@ -272,6 +286,13 @@ _mm256_cvtsepi16_epi8 (__m256i __A)
(__mmask16) -1);
}
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M,__m256i __A)
+{
+ __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P , (__v16hi) __A, __M);
+}
+
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
@@ -300,6 +321,13 @@ _mm_cvtusepi16_epi8 (__m128i __A)
(__mmask8) -1);
}
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A)
+{
+ __builtin_ia32_pmovuswb128mem_mask ((__v8qi *) __P , (__v8hi) __A, __M);
+}
+
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
@@ -329,6 +357,13 @@ _mm256_cvtusepi16_epi8 (__m256i __A)
(__mmask16) -1);
}
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M,__m256i __A)
+{
+ __builtin_ia32_pmovuswb256mem_mask ((__v16qi *) __P , (__v16hi) __A, __M);
+}
+
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
@@ -4009,6 +4044,13 @@ _mm_cvtepi16_epi8 (__m128i __A)
(__mmask8) -1);
}
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A)
+{
+ __builtin_ia32_pmovwb128mem_mask ((__v8qi *) __P , (__v8hi) __A, __M);
+}
+
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 8de3086..19d876d 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -155,6 +155,7 @@ DEF_POINTER_TYPE (PV4SF, V4SF)
DEF_POINTER_TYPE (PV8DF, V8DF)
DEF_POINTER_TYPE (PV8SF, V8SF)
DEF_POINTER_TYPE (PV4SI, V4SI)
+DEF_POINTER_TYPE (PV8QI, V8QI)
DEF_POINTER_TYPE (PV8HI, V8HI)
DEF_POINTER_TYPE (PV8SI, V8SI)
DEF_POINTER_TYPE (PV8DI, V8DI)
@@ -964,6 +965,7 @@ DEF_FUNCTION_TYPE (QI, V2DF, INT, UQI)
DEF_FUNCTION_TYPE (HI, V16SF, INT, UHI)
DEF_FUNCTION_TYPE (QI, V8SF, INT, UQI)
DEF_FUNCTION_TYPE (QI, V4SF, INT, UQI)
+DEF_FUNCTION_TYPE (VOID, PV32QI, V32HI, USI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, UINT, UINT)
DEF_FUNCTION_TYPE (V4HI, HI, HI, HI, HI)
@@ -1106,6 +1108,8 @@ DEF_FUNCTION_TYPE (VOID, PVOID, QI, V2DI, V2DI, INT)
DEF_FUNCTION_TYPE (VOID, QI, V8SI, PCVOID, INT, INT)
DEF_FUNCTION_TYPE (VOID, HI, V16SI, PCVOID, INT, INT)
DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCVOID, INT, INT)
+DEF_FUNCTION_TYPE (VOID, PV8QI, V8HI, UQI)
+DEF_FUNCTION_TYPE (VOID, PV16QI, V16HI, UHI)
DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF, ROUND)
DEF_FUNCTION_TYPE_ALIAS (V4DF_FTYPE_V4DF, ROUND)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 2663cb9..23e8883 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -378,6 +378,15 @@ BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_sto
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask_store, "__builtin_ia32_pmovwb128mem_mask", IX86_BUILTIN_PMOVWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8QI_V8HI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovwb256mem_mask", IX86_BUILTIN_PMOVWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask_store, "__builtin_ia32_pmovswb128mem_mask", IX86_BUILTIN_PMOVSWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8QI_V8HI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovswb256mem_mask", IX86_BUILTIN_PMOVSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask_store, "__builtin_ia32_pmovuswb128mem_mask", IX86_BUILTIN_PMOVUSWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8QI_V8HI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovuswb256mem_mask", IX86_BUILTIN_PMOVUSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovuswb512mem_mask", IX86_BUILTIN_PMOVUSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovwb512mem_mask", IX86_BUILTIN_PMOVWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
/* RDPKRU and WRPKRU. */
BDESC (OPTION_MASK_ISA_PKU, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_BUILTIN_RDPKRU, UNKNOWN, (int) UNSIGNED_FTYPE_VOID)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 94265b0..d5c2d46 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -37033,6 +37033,9 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case VOID_FTYPE_PFLOAT_V16SF_UHI:
case VOID_FTYPE_PFLOAT_V8SF_UQI:
case VOID_FTYPE_PFLOAT_V4SF_UQI:
+ case VOID_FTYPE_PV32QI_V32HI_USI:
+ case VOID_FTYPE_PV16QI_V16HI_UHI:
+ case VOID_FTYPE_PV8QI_V8HI_UQI:
nargs = 2;
klass = store;
/* Reserve memory operand for target. */
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 59b2574..64acda4 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,12 @@
+2017-08-08 Julia Koval <julia.koval@intel.com>
+
+ * gcc.target/i386/avx512bw-vpmovswb-1.c: Add new intrinsics to test.
+ * gcc.target/i386/avx512bw-vpmovswb-2.c: Ditto.
+ * gcc.target/i386/avx512bw-vpmovuswb-1.c: Ditto.
+ * gcc.target/i386/avx512bw-vpmovuswb-2.c: Ditto.
+ * gcc.target/i386/avx512bw-vpmovwb-1.c: Ditto.
+ * gcc.target/i386/avx512bw-vpmovwb-2.c: Ditto.
+
2017-06-08 Marek Polacek <polacek@redhat.com>
PR sanitize/80932
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-1.c
index b5b6ef7..6bb87d2 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-1.c
@@ -3,18 +3,21 @@
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*res1\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*res2\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*res3\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
-volatile __m128i x, z;
-volatile __m256i y;
-volatile __m512i u;
+volatile __m128i x, z, res1;
+volatile __m256i y, res2;
+volatile __m512i u, res3;
volatile __mmask8 m1;
volatile __mmask16 m2;
volatile __mmask32 m3;
@@ -25,10 +28,13 @@ avx512bw_test (void)
z = _mm_cvtsepi16_epi8 (x);
z = _mm_mask_cvtsepi16_epi8 (z, m1, x);
z = _mm_maskz_cvtsepi16_epi8 (m1, x);
+ _mm_mask_cvtsepi16_storeu_epi8 ((void *) &res1, m1, x);
z = _mm256_cvtsepi16_epi8 (y);
z = _mm256_mask_cvtsepi16_epi8 (z, m2, y);
z = _mm256_maskz_cvtsepi16_epi8 (m2, y);
+ _mm256_mask_cvtsepi16_storeu_epi8 ((void *) &res2, m2, y);
y = _mm512_cvtsepi16_epi8 (u);
y = _mm512_mask_cvtsepi16_epi8 (y, m3, u);
y = _mm512_maskz_cvtsepi16_epi8 (m3, u);
+ _mm512_mask_cvtsepi16_storeu_epi8 ((void *) &res3, m3, u);
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-2.c
index 69e25a2..fd9cb4c 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovswb-2.c
@@ -31,9 +31,11 @@ TEST (void)
{
int i, sign;
UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3;
+ char res4[SIZE];
UNION_TYPE (AVX512F_LEN, i_w) src;
MASK_TYPE mask = MASK_VALUE;
char res_ref[32];
+ char res_ref2[SIZE_HALF];
sign = -1;
for (i = 0; i < SIZE; i++)
@@ -41,6 +43,7 @@ TEST (void)
src.a[i] = 1 + 34 * i * sign;
sign = sign * -1;
res2.a[i] = DEFAULT_VALUE;
+ res4[i] = DEFAULT_VALUE;
}
res1.x = INTRINSIC (_cvtsepi16_epi8) (src.x);
@@ -59,4 +62,11 @@ TEST (void)
MASK_ZERO (i_b) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref))
abort ();
+
+ INTRINSIC (_mask_cvtsepi16_storeu_epi8) (res4, mask, src.x);
+ CALC (res_ref2, src.a);
+ MASK_MERGE (i_b) (res_ref2, mask, SIZE);
+
+ if (checkVc (res4, res_ref2, SIZE))
+ abort ();
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-1.c
index e1c62bf..7825e46 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-1.c
@@ -3,18 +3,21 @@
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*res1\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*res2\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*res3\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
-volatile __m128i x, z;
-volatile __m256i y;
-volatile __m512i u;
+volatile __m128i x, z, res1;
+volatile __m256i y, res2;
+volatile __m512i u, res3;
volatile __mmask8 m1;
volatile __mmask16 m2;
volatile __mmask32 m3;
@@ -25,10 +28,13 @@ avx512bw_test (void)
z = _mm_cvtusepi16_epi8 (x);
z = _mm_mask_cvtusepi16_epi8 (z, m1, x);
z = _mm_maskz_cvtusepi16_epi8 (m1, x);
+ _mm_mask_cvtusepi16_storeu_epi8 ((void *) &res1, m1, x);
z = _mm256_cvtusepi16_epi8 (y);
z = _mm256_mask_cvtusepi16_epi8 (z, m2, y);
z = _mm256_maskz_cvtusepi16_epi8 (m2, y);
+ _mm256_mask_cvtusepi16_storeu_epi8 ((void *) &res2, m2, y);
y = _mm512_cvtusepi16_epi8 (u);
y = _mm512_mask_cvtusepi16_epi8 (y, m3, u);
y = _mm512_maskz_cvtusepi16_epi8 (m3, u);
+ _mm512_mask_cvtusepi16_storeu_epi8 ((void *) &res3, m3, u);
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-2.c
index 094d432..d5198f3 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovuswb-2.c
@@ -23,14 +23,17 @@ TEST (void)
{
int i;
UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3;
+ unsigned char res4[SIZE];
UNION_TYPE (AVX512F_LEN, i_w) src;
MASK_TYPE mask = MASK_VALUE;
unsigned char res_ref[32];
+ unsigned char res_ref2[SIZE];
for (i = 0; i < SIZE; i++)
{
src.a[i] = 1 + 34 * i;
res2.a[i] = DEFAULT_VALUE;
+ res4[i] = DEFAULT_VALUE;
}
res1.x = INTRINSIC (_cvtusepi16_epi8) (src.x);
@@ -49,4 +52,11 @@ TEST (void)
MASK_ZERO (i_b) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref))
abort ();
+
+ INTRINSIC (_mask_cvtusepi16_storeu_epi8) (res4, mask, src.x);
+ CALC (res_ref2, src.a);
+
+ MASK_MERGE (i_b) (res_ref2, mask, SIZE);
+ if (checkVc (res4, res_ref2, SIZE))
+ abort ();
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-1.c
index e7adbb5..a1ad551 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-1.c
@@ -3,18 +3,21 @@
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*res1\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*res2\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*res3\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
-volatile __m128i x, z;
-volatile __m256i y;
-volatile __m512i u;
+volatile __m128i x, z, res1;
+volatile __m256i y, res2;
+volatile __m512i u, res3;
volatile __mmask8 m1;
volatile __mmask16 m2;
volatile __mmask32 m3;
@@ -25,10 +28,13 @@ avx512bw_test (void)
z = _mm_cvtepi16_epi8 (x);
z = _mm_mask_cvtepi16_epi8 (z, m1, x);
z = _mm_maskz_cvtepi16_epi8 (m1, x);
+ _mm_mask_cvtepi16_storeu_epi8 ((void *) &res1, m1, x);
z = _mm256_cvtepi16_epi8 (y);
z = _mm256_mask_cvtepi16_epi8 (z, m2, y);
z = _mm256_maskz_cvtepi16_epi8 (m2, y);
+ _mm256_mask_cvtepi16_storeu_epi8 ((void *) &res2, m2, y);
y = _mm512_cvtepi16_epi8 (u);
y = _mm512_mask_cvtepi16_epi8 (y, m3, u);
y = _mm512_maskz_cvtepi16_epi8 (m3, u);
+ _mm512_mask_cvtepi16_storeu_epi8 ((void *) &res3, m3, u);
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-2.c
index 32857da..6b0f86f 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-vpmovwb-2.c
@@ -24,9 +24,11 @@ TEST (void)
{
int i, sign;
UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3;
+ char res4[SIZE];
UNION_TYPE (AVX512F_LEN, i_w) src;
MASK_TYPE mask = MASK_VALUE;
char res_ref[32];
+ char res_ref2[SIZE];
sign = -1;
for (i = 0; i < SIZE; i++)
@@ -34,6 +36,7 @@ TEST (void)
src.a[i] = 1 + 34 * i * sign;
sign = sign * -1;
res2.a[i] = DEFAULT_VALUE;
+ res4[i] = DEFAULT_VALUE;
}
res1.x = INTRINSIC (_cvtepi16_epi8) (src.x);
@@ -52,4 +55,11 @@ TEST (void)
MASK_ZERO (i_b) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref))
abort ();
+
+ INTRINSIC (_mask_cvtepi16_storeu_epi8) (res4, mask, src.x);
+ CALC (res_ref2, src.a);
+
+ MASK_MERGE (i_b) (res_ref2, mask, SIZE);
+ if (checkVc (res4, res_ref2, SIZE))
+ abort ();
}