diff options
18 files changed, 370 insertions, 68 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1148b15..0d11aa8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,6 +1,67 @@ 2018-10-21 H.J. Lu <hongjiu.lu@intel.com> PR target/72782 + * config/i386/avx512fintrin.h (_mm512_fnmsub_round_pd): Use + __builtin_ia32_vfnmsubpd512_mask. + (_mm512_mask_fnmsub_round_pd): Likewise. + (_mm512_fnmsub_pd): Likewise. + (_mm512_mask_fnmsub_pd): Likewise. + (_mm512_maskz_fnmsub_round_pd): Use + __builtin_ia32_vfnmsubpd512_maskz. + (_mm512_maskz_fnmsub_pd): Likewise. + (_mm512_fnmsub_round_ps): Use __builtin_ia32_vfnmsubps512_mask. + (_mm512_mask_fnmsub_round_ps): Likewise. + (_mm512_fnmsub_ps): Likewise. + (_mm512_mask_fnmsub_ps): Likewise. + (_mm512_maskz_fnmsub_round_ps): Use + __builtin_ia32_vfnmsubps512_maskz. + (_mm512_maskz_fnmsub_ps): Likewise. + * config/i386/avx512vlintrin.h (_mm256_mask_fnmsub_pd): Use + __builtin_ia32_vfnmsubpd256_mask. + (_mm256_maskz_fnmsub_pd): Use __builtin_ia32_vfnmsubpd256_maskz. + (_mm_mask_fnmsub_pd): Use __builtin_ia32_vfmaddpd128_mask + (_mm_maskz_fnmsub_pd): Use __builtin_ia32_vfnmsubpd128_maskz. + (_mm256_mask_fnmsub_ps): Use __builtin_ia32_vfnmsubps256_mask. + (_mm256_mask_fnmsub_ps): Use __builtin_ia32_vfnmsubps256_mask. + (_mm256_maskz_fnmsub_ps): Use __builtin_ia32_vfnmsubps256_maskz. + (_mm_mask_fnmsub_ps): Use __builtin_ia32_vfnmsubps128_mask. + (_mm_maskz_fnmsub_ps): Use __builtin_ia32_vfnmsubps128_maskz. + * config/i386/fmaintrin.h (_mm_fnmsub_pd): Use + __builtin_ia32_vfnmsubpd. + (_mm256_fnmsub_pd): Use __builtin_ia32_vfnmsubpd256. + (_mm_fnmsub_ps): Use __builtin_ia32_vfnmsubps. + (_mm256_fnmsub_ps): Use __builtin_ia32_vfnmsubps256. + (_mm_fnmsub_sd): Use __builtin_ia32_vfnmsubsd3. + (_mm_fnmsub_ss): Use __builtin_ia32_vfnmsubss3. + * config/i386/i386-builtin.def: Add + __builtin_ia32_vfnmsubpd256_mask, + __builtin_ia32_vfnmsubpd256_maskz, + __builtin_ia32_vfnmsubpd128_mask, + __builtin_ia32_vfnmsubpd128_maskz, + __builtin_ia32_vfnmsubps256_mask, + __builtin_ia32_vfnmsubps256_maskz, + __builtin_ia32_vfnmsubps128_mask, + __builtin_ia32_vfnmsubps128_maskz, + __builtin_ia32_vfnmsubpd512_mask, + __builtin_ia32_vfnmsubpd512_maskz, + __builtin_ia32_vfnmsubps512_mask, + __builtin_ia32_vfnmsubps512_maskz, __builtin_ia32_vfnmsubss3, + __builtin_ia32_vfnmsubsd3, __builtin_ia32_vfnmsubps, + __builtin_ia32_vfnmsubpd, __builtin_ia32_vfnmsubps256 and. + __builtin_ia32_vfnmsubpd256. + * config/i386/sse.md (fma4i_fnmsub_<mode>): New. + (<avx512>_fnmsub_<mode>_maskz<round_expand_name>): Likewise. + (*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_1): + Likewise. + (*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_2): + Likewise. + (*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_3): + Likewise. + (fmai_vmfnmsub_<mode><round_name>): Likewise. + +2018-10-21 H.J. Lu <hongjiu.lu@intel.com> + + PR target/72782 * config/i386/avx512fintrin.h (_mm512_fnmadd_round_pd): Use __builtin_ia32_vfnmaddpd512_mask. (_mm512_mask_fnmadd_round_pd): Likewise. diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index 1445e9e..001d610 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -3699,10 +3699,10 @@ extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) { - return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, - (__v8df) __B, - -(__v8df) __C, - (__mmask8) -1, __R); + return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) -1, __R); } extern __inline __m512d @@ -3732,20 +3732,20 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C, const int __R) { - return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, - (__v8df) __B, - -(__v8df) __C, - (__mmask8) __U, __R); + return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, __R); } extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) { - return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, - (__v16sf) __B, - -(__v16sf) __C, - (__mmask16) -1, __R); + return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) -1, __R); } extern __inline __m512 @@ -3775,10 +3775,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C, const int __R) { - return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, - (__v16sf) __B, - -(__v16sf) __C, - (__mmask16) __U, __R); + return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, __R); } #else #define _mm512_fmadd_round_pd(A, B, C, R) \ @@ -3902,7 +3902,7 @@ _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, (__m512)__builtin_ia32_vfnmaddps512_maskz(A, B, C, U, R) #define _mm512_fnmsub_round_pd(A, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R) + (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, -1, R) #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \ (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R) @@ -3911,10 +3911,10 @@ _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R) #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \ - (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R) + (__m512d)__builtin_ia32_vfnmsubpd512_maskz(A, B, C, U, R) #define _mm512_fnmsub_round_ps(A, B, C, R) \ - (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R) + (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, -1, R) #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \ (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R) @@ -3923,7 +3923,7 @@ _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R) #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \ - (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R) + (__m512)__builtin_ia32_vfnmsubps512_maskz(A, B, C, U, R) #endif extern __inline __m512i @@ -12768,11 +12768,11 @@ extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C) { - return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, - (__v8df) __B, - -(__v8df) __C, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } extern __inline __m512d @@ -12801,22 +12801,22 @@ extern __inline __m512d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { - return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, - (__v8df) __B, - -(__v8df) __C, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A, + (__v8df) __B, + (__v8df) __C, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); } extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C) { - return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, - (__v16sf) __B, - -(__v16sf) __C, - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); } extern __inline __m512 @@ -12845,11 +12845,11 @@ extern __inline __m512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { - return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, - (__v16sf) __B, - -(__v16sf) __C, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __C, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); } extern __inline __m256i diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h index b46c38e..7ff7801 100644 --- a/gcc/config/i386/avx512vlintrin.h +++ b/gcc/config/i386/avx512vlintrin.h @@ -4665,10 +4665,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { - return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A, - (__v4df) __B, - -(__v4df) __C, - (__mmask8) __U); + return (__m256d) __builtin_ia32_vfnmsubpd256_maskz ((__v4df) __A, + (__v4df) __B, + (__v4df) __C, + (__mmask8) __U); } extern __inline __m128d @@ -4698,10 +4698,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { - return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A, - (__v2df) __B, - -(__v2df) __C, - (__mmask8) __U); + return (__m128d) __builtin_ia32_vfnmsubpd128_maskz ((__v2df) __A, + (__v2df) __B, + (__v2df) __C, + (__mmask8) __U); } extern __inline __m256 @@ -4731,10 +4731,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { - return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A, - (__v8sf) __B, - -(__v8sf) __C, - (__mmask8) __U); + return (__m256) __builtin_ia32_vfnmsubps256_maskz ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __C, + (__mmask8) __U); } extern __inline __m128 @@ -4761,10 +4761,10 @@ extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { - return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A, - (__v4sf) __B, - -(__v4sf) __C, - (__mmask8) __U); + return (__m128) __builtin_ia32_vfnmsubps128_maskz ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __C, + (__mmask8) __U); } extern __inline __m128i diff --git a/gcc/config/i386/fmaintrin.h b/gcc/config/i386/fmaintrin.h index 0a2f4a7..4f13e81c 100644 --- a/gcc/config/i386/fmaintrin.h +++ b/gcc/config/i386/fmaintrin.h @@ -182,48 +182,48 @@ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, - -(__v2df)__C); + return (__m128d)__builtin_ia32_vfnmsubpd ((__v2df)__A, (__v2df)__B, + (__v2df)__C); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C) { - return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, - -(__v4df)__C); + return (__m256d)__builtin_ia32_vfnmsubpd256 ((__v4df)__A, (__v4df)__B, + (__v4df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, - -(__v4sf)__C); + return (__m128)__builtin_ia32_vfnmsubps ((__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C) { - return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, - -(__v8sf)__C); + return (__m256)__builtin_ia32_vfnmsubps256 ((__v8sf)__A, (__v8sf)__B, + (__v8sf)__C); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C) { - return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, -(__v2df)__B, - -(__v2df)__C); + return (__m128d)__builtin_ia32_vfnmsubsd3 ((__v2df)__A, (__v2df)__B, + (__v2df)__C); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C) { - return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, -(__v4sf)__B, - -(__v4sf)__C); + return (__m128)__builtin_ia32_vfnmsubss3 ((__v4sf)__A, (__v4sf)__B, + (__v4sf)__C); } extern __inline __m128d diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 74343db..df0f7e9 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -1931,12 +1931,16 @@ BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask3, "__builtin BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_maskz, "__builtin_ia32_vfnmaddps128_maskz", IX86_BUILTIN_VFNMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_maskz, "__builtin_ia32_vfnmsubpd256_maskz", IX86_BUILTIN_VFNMSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_maskz, "__builtin_ia32_vfnmsubpd128_maskz", IX86_BUILTIN_VFNMSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_maskz, "__builtin_ia32_vfnmsubps256_maskz", IX86_BUILTIN_VFNMSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) +BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_maskz, "__builtin_ia32_vfnmsubps128_maskz", IX86_BUILTIN_VFNMSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI) BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI) @@ -2800,8 +2804,10 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask3_round, "__bu BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_maskz_round, "__builtin_ia32_vfnmaddps512_maskz", IX86_BUILTIN_VFNMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_maskz_round, "__builtin_ia32_vfnmsubpd512_maskz", IX86_BUILTIN_VFNMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_maskz_round, "__builtin_ia32_vfnmsubps512_maskz", IX86_BUILTIN_VFNMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) /* AVX512ER */ BDESC (OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT) @@ -2885,6 +2891,8 @@ BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmsub_v4sf, "__builtin_ia32_vfmsubss BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmsub_v2df, "__builtin_ia32_vfmsubsd3", IX86_BUILTIN_VFMSUBSD3, UNKNOWN, (int)MULTI_ARG_3_DF) BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfnmadd_v4sf, "__builtin_ia32_vfnmaddss3", IX86_BUILTIN_VFNMADDSS3, UNKNOWN, (int)MULTI_ARG_3_SF) BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfnmadd_v2df, "__builtin_ia32_vfnmaddsd3", IX86_BUILTIN_VFNMADDSD3, UNKNOWN, (int)MULTI_ARG_3_DF) +BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfnmsub_v4sf, "__builtin_ia32_vfnmsubss3", IX86_BUILTIN_VFNMSUBSS3, UNKNOWN, (int)MULTI_ARG_3_SF) +BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfnmsub_v2df, "__builtin_ia32_vfnmsubsd3", IX86_BUILTIN_VFNMSUBSD3, UNKNOWN, (int)MULTI_ARG_3_DF) BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF) BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF) @@ -2898,6 +2906,10 @@ BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v4sf, " BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v2df, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF) BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v8sf, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2) BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v4df, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2) +BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsub_v4sf, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF) +BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsub_v2df, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF) +BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsub_v8sf, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2) +BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsub_v4df, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2) BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF) BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 0426da4..28cecbf 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3764,6 +3764,15 @@ (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand") (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))]) +(define_expand "fma4i_fnmsub_<mode>" + [(set (match_operand:FMAMODE_AVX512 0 "register_operand") + (fma:FMAMODE_AVX512 + (neg:FMAMODE_AVX512 + (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")) + (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand") + (neg:FMAMODE_AVX512 + (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))]) + (define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>" [(match_operand:VF_AVX512VL 0 "register_operand") (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>") @@ -4147,6 +4156,20 @@ (set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) +(define_expand "<avx512>_fnmsub_<mode>_maskz<round_expand_name>" + [(match_operand:VF_AVX512VL 0 "register_operand") + (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>") + (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>") + (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>") + (match_operand:<avx512fmaskmode> 4 "register_operand")] + "TARGET_AVX512F && <round_mode512bit_condition>" +{ + emit_insn (gen_fma_fnmsub_<mode>_maskz_1<round_expand_name> ( + operands[0], operands[1], operands[2], operands[3], + CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>)); + DONE; +}) + (define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>" [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v") (fma:VF_SF_AVX512VL @@ -4163,6 +4186,52 @@ [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) +(define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_1" + [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v") + (fma:VF_AVX512 + (neg:VF_AVX512 + (match_operand:VF_AVX512 1 "register_operand" "0,v")) + (match_operand:VF_AVX512 2 "register_operand" "v,0") + (neg:VF_AVX512 + (vec_duplicate:VF_AVX512 + (match_operand:<ssescalarmode> 3 "memory_operand" "m,m")))))] + "TARGET_AVX512F && <sd_mask_mode512bit_condition>" + "vfnmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_2" + [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v") + (fma:VF_AVX512 + (neg:VF_AVX512 + (vec_duplicate:VF_AVX512 + (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))) + (match_operand:VF_AVX512 2 "register_operand" "0,v") + (neg:VF_AVX512 + (match_operand:VF_AVX512 3 "register_operand" "v,0"))))] + "TARGET_AVX512F && <sd_mask_mode512bit_condition>" + "@ + vfnmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>} + vfnmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_3" + [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v") + (fma:VF_AVX512 + (neg:VF_AVX512 + (match_operand:VF_AVX512 1 "register_operand" "0,v")) + (vec_duplicate:VF_AVX512 + (match_operand:<ssescalarmode> 2 "memory_operand" "m,m")) + (neg:VF_AVX512 + (match_operand:VF_AVX512 3 "register_operand" "v,0"))))] + "TARGET_AVX512F && <sd_mask_mode512bit_condition>" + "@ + vfnmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>} + vfnmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>" [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v") (vec_merge:VF_AVX512VL @@ -4403,6 +4472,19 @@ (const_int 1)))] "TARGET_FMA") +(define_expand "fmai_vmfnmsub_<mode><round_name>" + [(set (match_operand:VF_128 0 "register_operand") + (vec_merge:VF_128 + (fma:VF_128 + (neg:VF_128 + (match_operand:VF_128 2 "<round_nimm_predicate>")) + (match_operand:VF_128 1 "<round_nimm_predicate>") + (neg:VF_128 + (match_operand:VF_128 3 "<round_nimm_predicate>"))) + (match_dup 1) + (const_int 1)))] + "TARGET_FMA") + (define_insn "*fmai_fmadd_<mode>" [(set (match_operand:VF_128 0 "register_operand" "=v,v") (vec_merge:VF_128 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 649a8384c..f594bea 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,6 +1,21 @@ 2018-10-21 H.J. Lu <hongjiu.lu@intel.com> PR target/72782 + * gcc.target/i386/avx512f-fnmsub-df-zmm-1.c: New test. + * gcc.target/i386/avx512f-fnmsub-sf-zmm-1.c: Likewise. + * gcc.target/i386/avx512f-fnmsub-sf-zmm-2.c: Likewise. + * gcc.target/i386/avx512f-fnmsub-sf-zmm-3.c: Likewise. + * gcc.target/i386/avx512f-fnmsub-sf-zmm-4.c: Likewise. + * gcc.target/i386/avx512f-fnmsub-sf-zmm-5.c: Likewise. + * gcc.target/i386/avx512f-fnmsub-sf-zmm-6.c: Likewise. + * gcc.target/i386/avx512f-fnmsub-sf-zmm-7.c: Likewise. + * gcc.target/i386/avx512f-fnmsub-sf-zmm-8.c: Likewise. + * gcc.target/i386/avx512vl-fnmsub-sf-xmm-1.c: Likewise. + * gcc.target/i386/avx512vl-fnmsub-sf-ymm-1.c: Likewise. + +2018-10-21 H.J. Lu <hongjiu.lu@intel.com> + + PR target/72782 * gcc.target/i386/avx512f-fnmadd-df-zmm-1.c: New test. * gcc.target/i386/avx512f-fnmadd-sf-zmm-1.c: Likewise. * gcc.target/i386/avx512f-fnmadd-sf-zmm-2.c: Likewise. diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-df-zmm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-df-zmm-1.c new file mode 100644 index 0000000..4d4d31e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-df-zmm-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfnmsub...pd\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastsd\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512d +#define vec 512 +#define op fnmsub +#define suffix pd +#define SCALAR double + +#include "avx512-fma-1.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-1.c new file mode 100644 index 0000000..ff9632a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512 +#define vec 512 +#define op fnmsub +#define suffix ps +#define SCALAR float + +#include "avx512-fma-1.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-2.c new file mode 100644 index 0000000..4256350 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-2.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512 +#define vec 512 +#define op fnmsub +#define suffix ps +#define SCALAR float + +#include "avx512-fma-2.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-3.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-3.c new file mode 100644 index 0000000..6966cea --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-3.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512 +#define vec 512 +#define op fnmsub +#define suffix ps +#define SCALAR float + +#include "avx512-fma-3.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-4.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-4.c new file mode 100644 index 0000000..0748c12 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-4.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512 +#define vec 512 +#define op fnmsub +#define suffix ps +#define SCALAR float + +#include "avx512-fma-4.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-5.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-5.c new file mode 100644 index 0000000..d24a80f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-5.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512 +#define vec 512 +#define op fnmsub +#define suffix ps +#define SCALAR float + +#include "avx512-fma-5.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-6.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-6.c new file mode 100644 index 0000000..ee36dc9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-6.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */ + +#define type __m512 +#define vec 512 +#define op fnmsub +#define suffix ps +#define SCALAR float + +#include "avx512-fma-6.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-7.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-7.c new file mode 100644 index 0000000..7815251 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-7.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub...ps\[^\n\]*%zmm\[0-9\]+" 1 } } */ + +#define type __m512 +#define vec 512 +#define op fnmsub +#define suffix ps +#define SCALAR float + +#include "avx512-fma-7.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-8.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-8.c new file mode 100644 index 0000000..8bd669e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-8.c @@ -0,0 +1,12 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+%zmm\[0-9\]+, %zmm\[0-9\]+, %zmm0" 1 } } */ + +#define type __m512 +#define vec 512 +#define op fnmsub +#define suffix ps +#define SCALAR float + +#include "avx512-fma-8.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-xmm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-xmm-1.c new file mode 100644 index 0000000..0a63df8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-xmm-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mfma -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %xmm\[0-9\]+, %xmm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%xmm\[0-9\]+" } } */ + +#define type __m128 +#define vec +#define op fnmsub +#define suffix ps +#define SCALAR float + +#include "avx512-fma-1.h" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-ymm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-ymm-1.c new file mode 100644 index 0000000..d8d48d6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-ymm-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mfma -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %ymm\[0-9\]+, %ymm0" 1 } } */ +/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%ymm\[0-9\]+" } } */ + +#define type __m256 +#define vec 256 +#define op fnmsub +#define suffix ps +#define SCALAR float + +#include "avx512-fma-1.h" |