aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2019-03-07 20:10:21 +0100
committerJakub Jelinek <jakub@gcc.gnu.org>2019-03-07 20:10:21 +0100
commit459d21c6cffb2c74d2644e21db372ba8a0c27eb6 (patch)
treecf908f801639022dc63d87abf63371f6f03b936d /gcc/config
parent8f5439bea30bd2370638261ec7613628c8918d7d (diff)
downloadgcc-459d21c6cffb2c74d2644e21db372ba8a0c27eb6.zip
gcc-459d21c6cffb2c74d2644e21db372ba8a0c27eb6.tar.gz
gcc-459d21c6cffb2c74d2644e21db372ba8a0c27eb6.tar.bz2
re PR target/89602 (Missing AVX512 intrinsics)
PR target/89602 * config/i386/sse.md (avx512f_mov<ssescalarmodelower>_mask, *avx512f_load<mode>_mask, avx512f_store<mode>_mask): New define_insns. (avx512f_load<mode>_mask): New define_expand. * config/i386/i386-builtin.def (__builtin_ia32_loadsd_mask, __builtin_ia32_loadss_mask, __builtin_ia32_storesd_mask, __builtin_ia32_storess_mask, __builtin_ia32_movesd_mask, __builtin_ia32_movess_mask): New builtins. * config/i386/avx512fintrin.h (_mm_mask_load_ss, _mm_maskz_load_ss, _mm_mask_load_sd, _mm_maskz_load_sd, _mm_mask_move_ss, _mm_maskz_move_ss, _mm_mask_move_sd, _mm_maskz_move_sd, _mm_mask_store_ss, _mm_mask_store_sd): New intrinsics. * gcc.target/i386/avx512f-vmovss-1.c: New test. * gcc.target/i386/avx512f-vmovss-2.c: New test. * gcc.target/i386/avx512f-vmovss-3.c: New test. * gcc.target/i386/avx512f-vmovsd-1.c: New test. * gcc.target/i386/avx512f-vmovsd-2.c: New test. * gcc.target/i386/avx512f-vmovsd-3.c: New test. From-SVN: r269467
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/avx512fintrin.h77
-rw-r--r--gcc/config/i386/i386-builtin.def6
-rw-r--r--gcc/config/i386/sse.md61
3 files changed, 144 insertions, 0 deletions
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 68320c2..3a4e0ad 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -6273,6 +6273,83 @@ _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
(__mmask16) __U);
}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float *__P)
+{
+ return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) __W, __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_ss (__mmask8 __U, const float *__P)
+{
+ return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) _mm_setzero_ps (),
+ __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double *__P)
+{
+ return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) __W, __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_load_sd (__mmask8 __U, const double *__P)
+{
+ return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) _mm_setzero_pd (),
+ __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
+ (__v4sf) __W, __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
+ (__v4sf) _mm_setzero_ps (), __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
+ (__v2df) __W, __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
+ (__v2df) _mm_setzero_pd (),
+ __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_ss (float *__P, __mmask8 __U, __m128 __A)
+{
+ __builtin_ia32_storess_mask (__P, (__v4sf) __A, (__mmask8) __U);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_store_sd (double *__P, __mmask8 __U, __m128d __A)
+{
+ __builtin_ia32_storesd_mask (__P, (__v2df) __A, (__mmask8) __U);
+}
+
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 88005f4..eeef341 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -255,6 +255,10 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16sf_mask, "__builtin_
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loaddf_mask, "__builtin_ia32_loadsd_mask", IX86_BUILTIN_LOADSD_MASK, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE_V2DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadsf_mask, "__builtin_ia32_loadss_mask", IX86_BUILTIN_LOADSS_MASK, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT_V4SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storedf_mask, "__builtin_ia32_storesd_mask", IX86_BUILTIN_STORESD_MASK, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storesf_mask, "__builtin_ia32_storess_mask", IX86_BUILTIN_STORESS_MASK, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF_UQI)
BDESC (OPTION_MASK_ISA_LWP, 0, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID)
BDESC (OPTION_MASK_ISA_LWP, 0, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID)
@@ -1470,6 +1474,8 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv16si_mask, "__built
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movdf_mask, "__builtin_ia32_movesd_mask", IX86_BUILTIN_MOVSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movsf_mask, "__builtin_ia32_movess_mask", IX86_BUILTIN_MOVSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF)
BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ac29949..259063f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1151,6 +1151,67 @@
(set_attr "memory" "none,load")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "avx512f_mov<ssescalarmodelower>_mask"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (match_operand:VF_128 2 "register_operand" "v")
+ (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
+ (match_operand:QI 4 "register_operand" "Yk"))
+ (match_operand:VF_128 1 "register_operand" "v")
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_expand "avx512f_load<mode>_mask"
+ [(set (match_operand:<ssevecmode> 0 "register_operand")
+ (vec_merge:<ssevecmode>
+ (vec_merge:<ssevecmode>
+ (vec_duplicate:<ssevecmode>
+ (match_operand:MODEF 1 "memory_operand"))
+ (match_operand:<ssevecmode> 2 "nonimm_or_0_operand")
+ (match_operand:QI 3 "register_operand"))
+ (match_dup 4)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "operands[4] = CONST0_RTX (<ssevecmode>mode);")
+
+(define_insn "*avx512f_load<mode>_mask"
+ [(set (match_operand:<ssevecmode> 0 "register_operand" "=v")
+ (vec_merge:<ssevecmode>
+ (vec_merge:<ssevecmode>
+ (vec_duplicate:<ssevecmode>
+ (match_operand:MODEF 1 "memory_operand" "m"))
+ (match_operand:<ssevecmode> 2 "nonimm_or_0_operand" "0C")
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (match_operand:<ssevecmode> 4 "const0_operand" "C")
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vmov<ssescalarmodesuffix>\t{%1, %0%{%3%}%N2|%0%{3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "memory" "load")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_store<mode>_mask"
+ [(set (match_operand:MODEF 0 "memory_operand" "=m")
+ (if_then_else:MODEF
+ (and:QI (match_operand:QI 2 "register_operand" "Yk")
+ (const_int 1))
+ (vec_select:MODEF
+ (match_operand:<ssevecmode> 1 "register_operand" "v")
+ (parallel [(const_int 0)]))
+ (match_dup 0)))]
+ "TARGET_AVX512F"
+ "vmov<ssescalarmodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "memory" "store")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "<avx512>_blendm<mode>"
[(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
(vec_merge:V48_AVX512VL