diff options
Diffstat (limited to 'gcc/config/i386')
-rw-r--r-- | gcc/config/i386/avx512fintrin.h | 209 | ||||
-rw-r--r-- | gcc/config/i386/i386-builtin-types.def | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386-builtin.def | 4 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 2 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 4 |
5 files changed, 219 insertions, 2 deletions
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index afc80d6..15573c5 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -1415,6 +1415,29 @@ _mm_add_round_sd (__m128d __A, __m128d __B, const int __R) __R); } +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A, + __m128d __B, const int __R) +{ + return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B, + const int __R) +{ + return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U, __R); +} + extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_add_round_ss (__m128 __A, __m128 __B, const int __R) @@ -1424,6 +1447,29 @@ _mm_add_round_ss (__m128 __A, __m128 __B, const int __R) __R); } +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A, + __m128 __B, const int __R) +{ + return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B, + const int __R) +{ + return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U, __R); +} + extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R) @@ -1433,6 +1479,29 @@ _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R) __R); } +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, + __m128d __B, const int __R) +{ + return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B, + const int __R) +{ + return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U, __R); +} + extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R) @@ -1442,18 +1511,66 @@ _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R) __R); } +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, + __m128 __B, const int __R) +{ + return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B, + const int __R) +{ + return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U, __R); +} + #else #define _mm_add_round_sd(A, B, C) \ (__m128d)__builtin_ia32_addsd_round(A, B, C) +#define _mm_mask_add_round_sd(W, U, A, B, C) \ + (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C) + +#define _mm_maskz_add_round_sd(U, A, B, C) \ + (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) + #define _mm_add_round_ss(A, B, C) \ (__m128)__builtin_ia32_addss_round(A, B, C) +#define _mm_mask_add_round_ss(W, U, A, B, C) \ + (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C) + +#define _mm_maskz_add_round_ss(U, A, B, C) \ + (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) + #define _mm_sub_round_sd(A, B, C) \ (__m128d)__builtin_ia32_subsd_round(A, B, C) +#define _mm_mask_sub_round_sd(W, U, A, B, C) \ + (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C) + +#define _mm_maskz_sub_round_sd(U, A, B, C) \ + (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) + #define _mm_sub_round_ss(A, B, C) \ (__m128)__builtin_ia32_subss_round(A, B, C) + +#define _mm_mask_sub_round_ss(W, U, A, B, C) \ + (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C) + +#define _mm_maskz_sub_round_ss(U, A, B, C) \ + (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) + #endif #ifdef __OPTIMIZE__ @@ -10868,6 +10985,52 @@ _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B) _MM_FROUND_CUR_DIRECTION); } +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_sub_pd (__m512d __A, __m512d __B) @@ -10928,6 +11091,52 @@ _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B) _MM_FROUND_CUR_DIRECTION); } +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mul_pd (__m512d __A, __m512d __B) diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 1334633..b3620ed 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -1004,6 +1004,8 @@ DEF_FUNCTION_TYPE (V8DF, V8DI, V8DF, QI, INT) DEF_FUNCTION_TYPE (V8SF, V8DI, V8SF, QI, INT) DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF, HI, INT) DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DF, UQI, INT) +DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, UQI, INT) +DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, UQI, INT) DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, QI, INT) DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, V4SF, QI, INT) DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, QI, INT) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 43b5b62..5556f59 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2387,7 +2387,9 @@ BDESC_FIRST (round_args, ROUND_ARGS, OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_mask_round, "__builtin_ia32_addsd_mask_round", IX86_BUILTIN_ADDSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_mask_round, "__builtin_ia32_addss_mask_round", IX86_BUILTIN_ADDSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI_INT) @@ -2462,7 +2464,9 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_mask_round, "__builtin_ia32_subsd_mask_round", IX86_BUILTIN_SUBSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_mask_round, "__builtin_ia32_subss_mask_round", IX86_BUILTIN_SUBSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 7547539..9c429de 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -36183,6 +36183,8 @@ ix86_expand_round_builtin (const struct builtin_description *d, case INT_FTYPE_V2DF_V2DF_INT_INT: return ix86_expand_sse_comi_round (d, exp, target); case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT: + case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT: + case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT: case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT: case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT: case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT: diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index beb155d0..98104e4 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1568,7 +1568,7 @@ (set_attr "prefix" "<mask_prefix3>") (set_attr "mode" "<MODE>")]) -(define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>" +(define_insn "<sse>_vm<plusminus_insn><mode>3<mask_name><round_name>" [(set (match_operand:VF_128 0 "register_operand" "=x,v") (vec_merge:VF_128 (plusminus:VF_128 @@ -1579,7 +1579,7 @@ "TARGET_SSE" "@ <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2} - v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}" + v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}" [(set_attr "isa" "noavx,avx") (set_attr "type" "sseadd") (set_attr "prefix" "<round_prefix>") |