diff options
author | Hongtao Liu <liuhongt@gcc.gnu.org> | 2019-10-15 07:44:15 +0000 |
---|---|---|
committer | Hongtao Liu <liuhongt@gcc.gnu.org> | 2019-10-15 07:44:15 +0000 |
commit | a7c4d6d1c29b62d97014d30848ee542e309dbe9c (patch) | |
tree | f77ae6bbe7715b5dc4dbdbf7440e13775e951c96 /gcc/config | |
parent | e622a32db78300821fc1327637ec6413febc2c66 (diff) | |
download | gcc-a7c4d6d1c29b62d97014d30848ee542e309dbe9c.zip gcc-a7c4d6d1c29b62d97014d30848ee542e309dbe9c.tar.gz gcc-a7c4d6d1c29b62d97014d30848ee542e309dbe9c.tar.bz2 |
Add missing mask[z]_roundscale_[round]_s[d,s] intrinsics
gcc/
* config/i386/avx512fintrin.h (_mm_mask_roundscale_ss,
_mm_maskz_roundscale_ss, _mm_maskz_roundscale_round_ss,
_mm_maskz_roundscale_round_ss, _mm_mask_roundscale_sd,
_mm_maskz_roundscale_sd, _mm_mask_roundscale_round_sd,
_mm_maskz_roundscale_round_sd): New intrinsics.
(_mm_roundscale_ss, _mm_roundscale_round_ss): Use
__builtin_ia32_rndscales?_mask_round builtins instead of
__builtin_ia32_rndscales?_round.
* config/i386/i386-builtin.def (__builtin_ia32_rndscaless_round,
__builtin_ia32_rndscalesd_round): Remove.
(__builtin_ia32_rndscaless_mask_round,
__builtin_ia32_rndscalesd_mask_round): New intrinsics.
* config/i386/sse.md
(avx512f_rndscale<mode><round_saeonly_name>): Renamed to ...
(avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>):
... this, adjust and add subst atrributes to make it maskable.
gcc/testsuite/
* gcc.target/i386/avx512f-vrndscaless-1.c: Add scan-assembler-times
directives for newly expected instructions.
* gcc.target/i386/avx512f-vrndscalesd-1.c: Likewise.
* gcc.target/i386/avx512f-vrndscaless-2.c
(avx512f_test): Add tests for new intrinsics.
* gcc.target/i386/avx512f-vrndscalesd-2.c: Likewise.
* gcc.target/i386/avx-1.c (__builtin_ia32_rndscalefss_round,
__builtin_ia32_rndscalefsd_round): Remove.
(__builtin_ia32_rndscalefss_mask_round,
__builtin_ia32_rndscalefsd_mask_round): Define.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
From-SVN: r276986
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/i386/avx512fintrin.h | 258 | ||||
-rw-r--r-- | gcc/config/i386/i386-builtin.def | 4 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 9 |
3 files changed, 241 insertions, 30 deletions
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index c2ca4e1..1d08f01 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -9169,10 +9169,43 @@ _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B, extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R) +_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, + const int __R) +{ + return (__m128) + __builtin_ia32_rndscaless_mask_round ((__v4sf) __A, + (__v4sf) __B, __imm, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1, + __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_roundscale_round_ss (__m128 __A, __mmask8 __B, __m128 __C, + __m128 __D, const int __imm, const int __R) { - return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A, - (__v4sf) __B, __imm, __R); + return (__m128) + __builtin_ia32_rndscaless_mask_round ((__v4sf) __C, + (__v4sf) __D, __imm, + (__v4sf) __A, + (__mmask8) __B, + __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_roundscale_round_ss (__mmask8 __A, __m128 __B, __m128 __C, + const int __imm, const int __R) +{ + return (__m128) + __builtin_ia32_rndscaless_mask_round ((__v4sf) __B, + (__v4sf) __C, __imm, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __A, + __R); } extern __inline __m128d @@ -9180,8 +9213,40 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm, const int __R) { - return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A, - (__v2df) __B, __imm, __R); + return (__m128d) + __builtin_ia32_rndscalesd_mask_round ((__v2df) __A, + (__v2df) __B, __imm, + (__v2df) + _mm_setzero_pd (), + (__mmask8) -1, + __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_roundscale_round_sd (__m128d __A, __mmask8 __B, __m128d __C, + __m128d __D, const int __imm, const int __R) +{ + return (__m128d) + __builtin_ia32_rndscalesd_mask_round ((__v2df) __C, + (__v2df) __D, __imm, + (__v2df) __A, + (__mmask8) __B, + __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C, + const int __imm, const int __R) +{ + return (__m128d) + __builtin_ia32_rndscalesd_mask_round ((__v2df) __B, + (__v2df) __C, __imm, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __A, + __R); } #else @@ -9211,12 +9276,54 @@ _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm, (int)(C), \ (__v8df)_mm512_setzero_pd(),\ (__mmask8)(A), R)) -#define _mm_roundscale_round_ss(A, B, C, R) \ - ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (int)(C), R)) -#define _mm_roundscale_round_sd(A, B, C, R) \ - ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C), R)) +#define _mm_roundscale_round_ss(A, B, I, R) \ + ((__m128) \ + __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \ + (__v4sf) (__m128) (B), \ + (int) (I), \ + (__v4sf) _mm_setzero_ps (), \ + (__mmask8) (-1), \ + (int) (R))) +#define _mm_mask_roundscale_round_ss(A, U, B, C, I, R) \ + ((__m128) \ + __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \ + (__v4sf) (__m128) (C), \ + (int) (I), \ + (__v4sf) (__m128) (A), \ + (__mmask8) (U), \ + (int) (R))) +#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \ + ((__m128) \ + __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \ + (__v4sf) (__m128) (B), \ + (int) (I), \ + (__v4sf) _mm_setzero_ps (), \ + (__mmask8) (U), \ + (int) (R))) +#define _mm_roundscale_round_sd(A, B, I, R) \ + ((__m128d) \ + __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \ + (__v2df) (__m128d) (B), \ + (int) (I), \ + (__v2df) _mm_setzero_pd (), \ + (__mmask8) (-1), \ + (int) (R))) +#define _mm_mask_roundscale_round_sd(A, U, B, C, I, R) \ + ((__m128d) \ + __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \ + (__v2df) (__m128d) (C), \ + (int) (I), \ + (__v2df) (__m128d) (A), \ + (__mmask8) (U), \ + (int) (R))) +#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \ + ((__m128d) \ + __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \ + (__v2df) (__m128d) (B), \ + (int) (I), \ + (__v2df) _mm_setzero_pd (), \ + (__mmask8) (U), \ + (int) (R))) #endif extern __inline __m512 @@ -14812,18 +14919,81 @@ extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm) { - return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A, - (__v4sf) __B, __imm, - _MM_FROUND_CUR_DIRECTION); + return (__m128) + __builtin_ia32_rndscaless_mask_round ((__v4sf) __A, + (__v4sf) __B, __imm, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_roundscale_ss (__m128 __A, __mmask8 __B, __m128 __C, __m128 __D, + const int __imm) +{ + return (__m128) + __builtin_ia32_rndscaless_mask_round ((__v4sf) __C, + (__v4sf) __D, __imm, + (__v4sf) __A, + (__mmask8) __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_roundscale_ss (__mmask8 __A, __m128 __B, __m128 __C, + const int __imm) +{ + return (__m128) + __builtin_ia32_rndscaless_mask_round ((__v4sf) __B, + (__v4sf) __C, __imm, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __A, + _MM_FROUND_CUR_DIRECTION); } extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm) { - return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A, - (__v2df) __B, __imm, - _MM_FROUND_CUR_DIRECTION); + return (__m128d) + __builtin_ia32_rndscalesd_mask_round ((__v2df) __A, + (__v2df) __B, __imm, + (__v2df) + _mm_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_roundscale_sd (__m128d __A, __mmask8 __B, __m128d __C, __m128d __D, + const int __imm) +{ + return (__m128d) + __builtin_ia32_rndscalesd_mask_round ((__v2df) __C, + (__v2df) __D, __imm, + (__v2df) __A, + (__mmask8) __B, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_roundscale_sd (__mmask8 __A, __m128d __B, __m128d __C, + const int __imm) +{ + return (__m128d) + __builtin_ia32_rndscalesd_mask_round ((__v2df) __B, + (__v2df) __C, __imm, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __A, + _MM_FROUND_CUR_DIRECTION); } #else @@ -14853,12 +15023,54 @@ _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm) (int)(C), \ (__v8df)_mm512_setzero_pd(),\ (__mmask8)(A), _MM_FROUND_CUR_DIRECTION)) -#define _mm_roundscale_ss(A, B, C) \ - ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \ - (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION)) -#define _mm_roundscale_sd(A, B, C) \ - ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION)) +#define _mm_roundscale_ss(A, B, I) \ + ((__m128) \ + __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \ + (__v4sf) (__m128) (B), \ + (int) (I), \ + (__v4sf) _mm_setzero_ps (), \ + (__mmask8) (-1), \ + _MM_FROUND_CUR_DIRECTION)) +#define _mm_mask_roundscale_ss(A, U, B, C, I) \ + ((__m128) \ + __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \ + (__v4sf) (__m128) (C), \ + (int) (I), \ + (__v4sf) (__m128) (A), \ + (__mmask8) (U), \ + _MM_FROUND_CUR_DIRECTION)) +#define _mm_maskz_roundscale_ss(U, A, B, I) \ + ((__m128) \ + __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \ + (__v4sf) (__m128) (B), \ + (int) (I), \ + (__v4sf) _mm_setzero_ps (), \ + (__mmask8) (U), \ + _MM_FROUND_CUR_DIRECTION)) +#define _mm_roundscale_sd(A, B, I) \ + ((__m128d) \ + __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \ + (__v2df) (__m128d) (B), \ + (int) (I), \ + (__v2df) _mm_setzero_pd (), \ + (__mmask8) (-1), \ + _MM_FROUND_CUR_DIRECTION)) +#define _mm_mask_roundscale_sd(A, U, B, C, I) \ + ((__m128d) \ + __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \ + (__v2df) (__m128d) (C), \ + (int) (I), \ + (__v2df) (__m128d) (A), \ + (__mmask8) (U), \ + _MM_FROUND_CUR_DIRECTION)) +#define _mm_maskz_roundscale_sd(U, A, B, I) \ + ((__m128d) \ + __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \ + (__v2df) (__m128d) (B), \ + (int) (I), \ + (__v2df) _mm_setzero_pd (), \ + (__mmask8) (U), \ + _MM_FROUND_CUR_DIRECTION)) #endif #ifdef __OPTIMIZE__ diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 6ac820e..1102833 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2828,8 +2828,8 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia3 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmmulv4sf3_mask_round, "__builtin_ia32_mulss_mask_round", IX86_BUILTIN_MULSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT) -BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev2df_mask_round, "__builtin_ia32_rndscalesd_mask_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev4sf_mask_round, "__builtin_ia32_rndscaless_mask_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmscalefv2df_mask_round, "__builtin_ia32_scalefsd_mask_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 07922a1..f474eed 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -9694,18 +9694,17 @@ (set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) -(define_insn "avx512f_rndscale<mode><round_saeonly_name>" +(define_insn "avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>" [(set (match_operand:VF_128 0 "register_operand" "=v") (vec_merge:VF_128 (unspec:VF_128 - [(match_operand:VF_128 1 "register_operand" "v") - (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>") + [(match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>") (match_operand:SI 3 "const_0_to_255_operand")] UNSPEC_ROUND) - (match_dup 1) + (match_operand:VF_128 1 "register_operand" "v") (const_int 1)))] "TARGET_AVX512F" - "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}" + "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}" [(set_attr "length_immediate" "1") (set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) |