aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2018-10-21 20:28:56 +0000
committerH.J. Lu <hjl@gcc.gnu.org>2018-10-21 13:28:56 -0700
commit38ef6fb19d81a9da321e95f35940121e3cf858ff (patch)
tree6905a839cf987be79e39384dba02f4862f012dd6
parent5ca9497788cb44abe81529844ebc0077c0fc2b91 (diff)
downloadgcc-38ef6fb19d81a9da321e95f35940121e3cf858ff.zip
gcc-38ef6fb19d81a9da321e95f35940121e3cf858ff.tar.gz
gcc-38ef6fb19d81a9da321e95f35940121e3cf858ff.tar.bz2
i386: Enable AVX512 memory broadcast for FNMSUB
Many AVX512 vector operations can broadcast from a scalar memory source. This patch enables memory broadcast for FNMSUB operations. In order to support AVX512 memory broadcast for FNMSUB, FNMSUB builtin functions are also added, instead of passing the negated value to FMA builtin functions. gcc/ PR target/72782 * config/i386/avx512fintrin.h (_mm512_fnmsub_round_pd): Use __builtin_ia32_vfnmsubpd512_mask. (_mm512_mask_fnmsub_round_pd): Likewise. (_mm512_fnmsub_pd): Likewise. (_mm512_mask_fnmsub_pd): Likewise. (_mm512_maskz_fnmsub_round_pd): Use __builtin_ia32_vfnmsubpd512_maskz. (_mm512_maskz_fnmsub_pd): Likewise. (_mm512_fnmsub_round_ps): Use __builtin_ia32_vfnmsubps512_mask. (_mm512_mask_fnmsub_round_ps): Likewise. (_mm512_fnmsub_ps): Likewise. (_mm512_mask_fnmsub_ps): Likewise. (_mm512_maskz_fnmsub_round_ps): Use __builtin_ia32_vfnmsubps512_maskz. (_mm512_maskz_fnmsub_ps): Likewise. * config/i386/avx512vlintrin.h (_mm256_mask_fnmsub_pd): Use __builtin_ia32_vfnmsubpd256_mask. (_mm256_maskz_fnmsub_pd): Use __builtin_ia32_vfnmsubpd256_maskz. (_mm_mask_fnmsub_pd): Use __builtin_ia32_vfmaddpd128_mask (_mm_maskz_fnmsub_pd): Use __builtin_ia32_vfnmsubpd128_maskz. (_mm256_mask_fnmsub_ps): Use __builtin_ia32_vfnmsubps256_mask. (_mm256_mask_fnmsub_ps): Use __builtin_ia32_vfnmsubps256_mask. (_mm256_maskz_fnmsub_ps): Use __builtin_ia32_vfnmsubps256_maskz. (_mm_mask_fnmsub_ps): Use __builtin_ia32_vfnmsubps128_mask. (_mm_maskz_fnmsub_ps): Use __builtin_ia32_vfnmsubps128_maskz. * config/i386/fmaintrin.h (_mm_fnmsub_pd): Use __builtin_ia32_vfnmsubpd. (_mm256_fnmsub_pd): Use __builtin_ia32_vfnmsubpd256. (_mm_fnmsub_ps): Use __builtin_ia32_vfnmsubps. (_mm256_fnmsub_ps): Use __builtin_ia32_vfnmsubps256. (_mm_fnmsub_sd): Use __builtin_ia32_vfnmsubsd3. (_mm_fnmsub_ss): Use __builtin_ia32_vfnmsubss3. * config/i386/i386-builtin.def: Add __builtin_ia32_vfnmsubpd256_mask, __builtin_ia32_vfnmsubpd256_maskz, __builtin_ia32_vfnmsubpd128_mask, __builtin_ia32_vfnmsubpd128_maskz, __builtin_ia32_vfnmsubps256_mask, __builtin_ia32_vfnmsubps256_maskz, __builtin_ia32_vfnmsubps128_mask, __builtin_ia32_vfnmsubps128_maskz, __builtin_ia32_vfnmsubpd512_mask, __builtin_ia32_vfnmsubpd512_maskz, __builtin_ia32_vfnmsubps512_mask, __builtin_ia32_vfnmsubps512_maskz, __builtin_ia32_vfnmsubss3, __builtin_ia32_vfnmsubsd3, __builtin_ia32_vfnmsubps, __builtin_ia32_vfnmsubpd, __builtin_ia32_vfnmsubps256 and. __builtin_ia32_vfnmsubpd256. * config/i386/sse.md (fma4i_fnmsub_<mode>): New. (<avx512>_fnmsub_<mode>_maskz<round_expand_name>): Likewise. (*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_1): Likewise. (*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_2): Likewise. (*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_3): Likewise. (fmai_vmfnmsub_<mode><round_name>): Likewise. gcc/testsuite/ PR target/72782 * gcc.target/i386/avx512f-fnmsub-df-zmm-1.c: New test. * gcc.target/i386/avx512f-fnmsub-sf-zmm-1.c: Likewise. * gcc.target/i386/avx512f-fnmsub-sf-zmm-2.c: Likewise. * gcc.target/i386/avx512f-fnmsub-sf-zmm-3.c: Likewise. * gcc.target/i386/avx512f-fnmsub-sf-zmm-4.c: Likewise. * gcc.target/i386/avx512f-fnmsub-sf-zmm-5.c: Likewise. * gcc.target/i386/avx512f-fnmsub-sf-zmm-6.c: Likewise. * gcc.target/i386/avx512f-fnmsub-sf-zmm-7.c: Likewise. * gcc.target/i386/avx512f-fnmsub-sf-zmm-8.c: Likewise. * gcc.target/i386/avx512vl-fnmsub-sf-xmm-1.c: Likewise. * gcc.target/i386/avx512vl-fnmsub-sf-ymm-1.c: Likewise. From-SVN: r265358
-rw-r--r--gcc/ChangeLog61
-rw-r--r--gcc/config/i386/avx512fintrin.h80
-rw-r--r--gcc/config/i386/avx512vlintrin.h32
-rw-r--r--gcc/config/i386/fmaintrin.h24
-rw-r--r--gcc/config/i386/i386-builtin.def12
-rw-r--r--gcc/config/i386/sse.md82
-rw-r--r--gcc/testsuite/ChangeLog15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-fnmsub-df-zmm-1.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-1.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-2.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-3.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-4.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-5.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-6.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-7.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-8.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-xmm-1.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-ymm-1.c12
18 files changed, 370 insertions, 68 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 1148b15..0d11aa8 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,6 +1,67 @@
2018-10-21 H.J. Lu <hongjiu.lu@intel.com>
PR target/72782
+ * config/i386/avx512fintrin.h (_mm512_fnmsub_round_pd): Use
+ __builtin_ia32_vfnmsubpd512_mask.
+ (_mm512_mask_fnmsub_round_pd): Likewise.
+ (_mm512_fnmsub_pd): Likewise.
+ (_mm512_mask_fnmsub_pd): Likewise.
+ (_mm512_maskz_fnmsub_round_pd): Use
+ __builtin_ia32_vfnmsubpd512_maskz.
+ (_mm512_maskz_fnmsub_pd): Likewise.
+ (_mm512_fnmsub_round_ps): Use __builtin_ia32_vfnmsubps512_mask.
+ (_mm512_mask_fnmsub_round_ps): Likewise.
+ (_mm512_fnmsub_ps): Likewise.
+ (_mm512_mask_fnmsub_ps): Likewise.
+ (_mm512_maskz_fnmsub_round_ps): Use
+ __builtin_ia32_vfnmsubps512_maskz.
+ (_mm512_maskz_fnmsub_ps): Likewise.
+ * config/i386/avx512vlintrin.h (_mm256_mask_fnmsub_pd): Use
+ __builtin_ia32_vfnmsubpd256_mask.
+ (_mm256_maskz_fnmsub_pd): Use __builtin_ia32_vfnmsubpd256_maskz.
+ (_mm_mask_fnmsub_pd): Use __builtin_ia32_vfmaddpd128_mask
+ (_mm_maskz_fnmsub_pd): Use __builtin_ia32_vfnmsubpd128_maskz.
+ (_mm256_mask_fnmsub_ps): Use __builtin_ia32_vfnmsubps256_mask.
+ (_mm256_mask_fnmsub_ps): Use __builtin_ia32_vfnmsubps256_mask.
+ (_mm256_maskz_fnmsub_ps): Use __builtin_ia32_vfnmsubps256_maskz.
+ (_mm_mask_fnmsub_ps): Use __builtin_ia32_vfnmsubps128_mask.
+ (_mm_maskz_fnmsub_ps): Use __builtin_ia32_vfnmsubps128_maskz.
+ * config/i386/fmaintrin.h (_mm_fnmsub_pd): Use
+ __builtin_ia32_vfnmsubpd.
+ (_mm256_fnmsub_pd): Use __builtin_ia32_vfnmsubpd256.
+ (_mm_fnmsub_ps): Use __builtin_ia32_vfnmsubps.
+ (_mm256_fnmsub_ps): Use __builtin_ia32_vfnmsubps256.
+ (_mm_fnmsub_sd): Use __builtin_ia32_vfnmsubsd3.
+ (_mm_fnmsub_ss): Use __builtin_ia32_vfnmsubss3.
+ * config/i386/i386-builtin.def: Add
+ __builtin_ia32_vfnmsubpd256_mask,
+ __builtin_ia32_vfnmsubpd256_maskz,
+ __builtin_ia32_vfnmsubpd128_mask,
+ __builtin_ia32_vfnmsubpd128_maskz,
+ __builtin_ia32_vfnmsubps256_mask,
+ __builtin_ia32_vfnmsubps256_maskz,
+ __builtin_ia32_vfnmsubps128_mask,
+ __builtin_ia32_vfnmsubps128_maskz,
+ __builtin_ia32_vfnmsubpd512_mask,
+ __builtin_ia32_vfnmsubpd512_maskz,
+ __builtin_ia32_vfnmsubps512_mask,
+ __builtin_ia32_vfnmsubps512_maskz, __builtin_ia32_vfnmsubss3,
+ __builtin_ia32_vfnmsubsd3, __builtin_ia32_vfnmsubps,
+ __builtin_ia32_vfnmsubpd, __builtin_ia32_vfnmsubps256 and.
+ __builtin_ia32_vfnmsubpd256.
+ * config/i386/sse.md (fma4i_fnmsub_<mode>): New.
+ (<avx512>_fnmsub_<mode>_maskz<round_expand_name>): Likewise.
+ (*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_1):
+ Likewise.
+ (*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_2):
+ Likewise.
+ (*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_3):
+ Likewise.
+ (fmai_vmfnmsub_<mode><round_name>): Likewise.
+
+2018-10-21 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/72782
* config/i386/avx512fintrin.h (_mm512_fnmadd_round_pd): Use
__builtin_ia32_vfnmaddpd512_mask.
(_mm512_mask_fnmadd_round_pd): Likewise.
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 1445e9e..001d610 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -3699,10 +3699,10 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) -1, __R);
+ return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) -1, __R);
}
extern __inline __m512d
@@ -3732,20 +3732,20 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
__m512d __C, const int __R)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) __U, __R);
+ return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) -1, __R);
+ return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) -1, __R);
}
extern __inline __m512
@@ -3775,10 +3775,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
__m512 __C, const int __R)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) __U, __R);
+ return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U, __R);
}
#else
#define _mm512_fmadd_round_pd(A, B, C, R) \
@@ -3902,7 +3902,7 @@ _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
(__m512)__builtin_ia32_vfnmaddps512_maskz(A, B, C, U, R)
#define _mm512_fnmsub_round_pd(A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R)
+ (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, -1, R)
#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
(__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
@@ -3911,10 +3911,10 @@ _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
(__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R)
+ (__m512d)__builtin_ia32_vfnmsubpd512_maskz(A, B, C, U, R)
#define _mm512_fnmsub_round_ps(A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R)
+ (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, -1, R)
#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
(__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
@@ -3923,7 +3923,7 @@ _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
(__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R)
+ (__m512)__builtin_ia32_vfnmsubps512_maskz(A, B, C, U, R)
#endif
extern __inline __m512i
@@ -12768,11 +12768,11 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
}
extern __inline __m512d
@@ -12801,22 +12801,22 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
- (__v8df) __B,
- -(__v8df) __C,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
+ (__v8df) __B,
+ (__v8df) __C,
+ (__mmask8) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) -1,
+ _MM_FROUND_CUR_DIRECTION);
}
extern __inline __m512
@@ -12845,11 +12845,11 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
- (__v16sf) __B,
- -(__v16sf) __C,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
+ (__v16sf) __B,
+ (__v16sf) __C,
+ (__mmask16) __U,
+ _MM_FROUND_CUR_DIRECTION);
}
extern __inline __m256i
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index b46c38e..7ff7801 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -4665,10 +4665,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_fnmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
__m256d __C)
{
- return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
- (__v4df) __B,
- -(__v4df) __C,
- (__mmask8) __U);
+ return (__m256d) __builtin_ia32_vfnmsubpd256_maskz ((__v4df) __A,
+ (__v4df) __B,
+ (__v4df) __C,
+ (__mmask8) __U);
}
extern __inline __m128d
@@ -4698,10 +4698,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fnmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
__m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
- (__v2df) __B,
- -(__v2df) __C,
- (__mmask8) __U);
+ return (__m128d) __builtin_ia32_vfnmsubpd128_maskz ((__v2df) __A,
+ (__v2df) __B,
+ (__v2df) __C,
+ (__mmask8) __U);
}
extern __inline __m256
@@ -4731,10 +4731,10 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_fnmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
__m256 __C)
{
- return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
- (__v8sf) __B,
- -(__v8sf) __C,
- (__mmask8) __U);
+ return (__m256) __builtin_ia32_vfnmsubps256_maskz ((__v8sf) __A,
+ (__v8sf) __B,
+ (__v8sf) __C,
+ (__mmask8) __U);
}
extern __inline __m128
@@ -4761,10 +4761,10 @@ extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fnmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
- (__v4sf) __B,
- -(__v4sf) __C,
- (__mmask8) __U);
+ return (__m128) __builtin_ia32_vfnmsubps128_maskz ((__v4sf) __A,
+ (__v4sf) __B,
+ (__v4sf) __C,
+ (__mmask8) __U);
}
extern __inline __m128i
diff --git a/gcc/config/i386/fmaintrin.h b/gcc/config/i386/fmaintrin.h
index 0a2f4a7..4f13e81c 100644
--- a/gcc/config/i386/fmaintrin.h
+++ b/gcc/config/i386/fmaintrin.h
@@ -182,48 +182,48 @@ extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B,
- -(__v2df)__C);
+ return (__m128d)__builtin_ia32_vfnmsubpd ((__v2df)__A, (__v2df)__B,
+ (__v2df)__C);
}
extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C)
{
- return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B,
- -(__v4df)__C);
+ return (__m256d)__builtin_ia32_vfnmsubpd256 ((__v4df)__A, (__v4df)__B,
+ (__v4df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B,
- -(__v4sf)__C);
+ return (__m128)__builtin_ia32_vfnmsubps ((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__C);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C)
{
- return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B,
- -(__v8sf)__C);
+ return (__m256)__builtin_ia32_vfnmsubps256 ((__v8sf)__A, (__v8sf)__B,
+ (__v8sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, -(__v2df)__B,
- -(__v2df)__C);
+ return (__m128d)__builtin_ia32_vfnmsubsd3 ((__v2df)__A, (__v2df)__B,
+ (__v2df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, -(__v4sf)__B,
- -(__v4sf)__C);
+ return (__m128)__builtin_ia32_vfnmsubss3 ((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__C);
}
extern __inline __m128d
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 74343db..df0f7e9 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -1931,12 +1931,16 @@ BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask3, "__builtin
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_maskz, "__builtin_ia32_vfnmaddps128_maskz", IX86_BUILTIN_VFNMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_maskz, "__builtin_ia32_vfnmsubpd256_maskz", IX86_BUILTIN_VFNMSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_maskz, "__builtin_ia32_vfnmsubpd128_maskz", IX86_BUILTIN_VFNMSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_maskz, "__builtin_ia32_vfnmsubps256_maskz", IX86_BUILTIN_VFNMSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_maskz, "__builtin_ia32_vfnmsubps128_maskz", IX86_BUILTIN_VFNMSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
@@ -2800,8 +2804,10 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask3_round, "__bu
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_maskz_round, "__builtin_ia32_vfnmaddps512_maskz", IX86_BUILTIN_VFNMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_maskz_round, "__builtin_ia32_vfnmsubpd512_maskz", IX86_BUILTIN_VFNMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_maskz_round, "__builtin_ia32_vfnmsubps512_maskz", IX86_BUILTIN_VFNMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
/* AVX512ER */
BDESC (OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
@@ -2885,6 +2891,8 @@ BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmsub_v4sf, "__builtin_ia32_vfmsubss
BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmsub_v2df, "__builtin_ia32_vfmsubsd3", IX86_BUILTIN_VFMSUBSD3, UNKNOWN, (int)MULTI_ARG_3_DF)
BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfnmadd_v4sf, "__builtin_ia32_vfnmaddss3", IX86_BUILTIN_VFNMADDSS3, UNKNOWN, (int)MULTI_ARG_3_SF)
BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfnmadd_v2df, "__builtin_ia32_vfnmaddsd3", IX86_BUILTIN_VFNMADDSD3, UNKNOWN, (int)MULTI_ARG_3_DF)
+BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfnmsub_v4sf, "__builtin_ia32_vfnmsubss3", IX86_BUILTIN_VFNMSUBSS3, UNKNOWN, (int)MULTI_ARG_3_SF)
+BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfnmsub_v2df, "__builtin_ia32_vfnmsubsd3", IX86_BUILTIN_VFNMSUBSD3, UNKNOWN, (int)MULTI_ARG_3_DF)
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF)
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF)
@@ -2898,6 +2906,10 @@ BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v4sf, "
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v2df, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF)
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v8sf, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2)
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmadd_v4df, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2)
+BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsub_v4sf, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF)
+BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsub_v2df, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF)
+BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsub_v8sf, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2)
+BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsub_v4df, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2)
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF)
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 0426da4..28cecbf 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -3764,6 +3764,15 @@
(match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
(match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
+(define_expand "fma4i_fnmsub_<mode>"
+ [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
+ (fma:FMAMODE_AVX512
+ (neg:FMAMODE_AVX512
+ (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
+ (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
+ (neg:FMAMODE_AVX512
+ (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
+
(define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
[(match_operand:VF_AVX512VL 0 "register_operand")
(match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
@@ -4147,6 +4156,20 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
+(define_expand "<avx512>_fnmsub_<mode>_maskz<round_expand_name>"
+ [(match_operand:VF_AVX512VL 0 "register_operand")
+ (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
+ (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
+ (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")]
+ "TARGET_AVX512F && <round_mode512bit_condition>"
+{
+ emit_insn (gen_fma_fnmsub_<mode>_maskz_1<round_expand_name> (
+ operands[0], operands[1], operands[2], operands[3],
+ CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
+ DONE;
+})
+
(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
[(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
(fma:VF_SF_AVX512VL
@@ -4163,6 +4186,52 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
+(define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_1"
+ [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
+ (fma:VF_AVX512
+ (neg:VF_AVX512
+ (match_operand:VF_AVX512 1 "register_operand" "0,v"))
+ (match_operand:VF_AVX512 2 "register_operand" "v,0")
+ (neg:VF_AVX512
+ (vec_duplicate:VF_AVX512
+ (match_operand:<ssescalarmode> 3 "memory_operand" "m,m")))))]
+ "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
+ "vfnmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_2"
+ [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
+ (fma:VF_AVX512
+ (neg:VF_AVX512
+ (vec_duplicate:VF_AVX512
+ (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))
+ (match_operand:VF_AVX512 2 "register_operand" "0,v")
+ (neg:VF_AVX512
+ (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
+ "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
+ "@
+ vfnmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
+ vfnmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_3"
+ [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
+ (fma:VF_AVX512
+ (neg:VF_AVX512
+ (match_operand:VF_AVX512 1 "register_operand" "0,v"))
+ (vec_duplicate:VF_AVX512
+ (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
+ (neg:VF_AVX512
+ (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
+ "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
+ "@
+ vfnmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
+ vfnmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
[(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
(vec_merge:VF_AVX512VL
@@ -4403,6 +4472,19 @@
(const_int 1)))]
"TARGET_FMA")
+(define_expand "fmai_vmfnmsub_<mode><round_name>"
+ [(set (match_operand:VF_128 0 "register_operand")
+ (vec_merge:VF_128
+ (fma:VF_128
+ (neg:VF_128
+ (match_operand:VF_128 2 "<round_nimm_predicate>"))
+ (match_operand:VF_128 1 "<round_nimm_predicate>")
+ (neg:VF_128
+ (match_operand:VF_128 3 "<round_nimm_predicate>")))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_FMA")
+
(define_insn "*fmai_fmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand" "=v,v")
(vec_merge:VF_128
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 649a8384c..f594bea 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,6 +1,21 @@
2018-10-21 H.J. Lu <hongjiu.lu@intel.com>
PR target/72782
+ * gcc.target/i386/avx512f-fnmsub-df-zmm-1.c: New test.
+ * gcc.target/i386/avx512f-fnmsub-sf-zmm-1.c: Likewise.
+ * gcc.target/i386/avx512f-fnmsub-sf-zmm-2.c: Likewise.
+ * gcc.target/i386/avx512f-fnmsub-sf-zmm-3.c: Likewise.
+ * gcc.target/i386/avx512f-fnmsub-sf-zmm-4.c: Likewise.
+ * gcc.target/i386/avx512f-fnmsub-sf-zmm-5.c: Likewise.
+ * gcc.target/i386/avx512f-fnmsub-sf-zmm-6.c: Likewise.
+ * gcc.target/i386/avx512f-fnmsub-sf-zmm-7.c: Likewise.
+ * gcc.target/i386/avx512f-fnmsub-sf-zmm-8.c: Likewise.
+ * gcc.target/i386/avx512vl-fnmsub-sf-xmm-1.c: Likewise.
+ * gcc.target/i386/avx512vl-fnmsub-sf-ymm-1.c: Likewise.
+
+2018-10-21 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/72782
* gcc.target/i386/avx512f-fnmadd-df-zmm-1.c: New test.
* gcc.target/i386/avx512f-fnmadd-sf-zmm-1.c: Likewise.
* gcc.target/i386/avx512f-fnmadd-sf-zmm-2.c: Likewise.
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-df-zmm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-df-zmm-1.c
new file mode 100644
index 0000000..4d4d31e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-df-zmm-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...pd\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastsd\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512d
+#define vec 512
+#define op fnmsub
+#define suffix pd
+#define SCALAR double
+
+#include "avx512-fma-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-1.c
new file mode 100644
index 0000000..ff9632a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-2.c
new file mode 100644
index 0000000..4256350
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-2.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-3.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-3.c
new file mode 100644
index 0000000..6966cea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-3.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-3.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-4.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-4.c
new file mode 100644
index 0000000..0748c12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-4.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-4.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-5.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-5.c
new file mode 100644
index 0000000..d24a80f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-5.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-5.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-6.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-6.c
new file mode 100644
index 0000000..ee36dc9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-6.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-6.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-7.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-7.c
new file mode 100644
index 0000000..7815251
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-7.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[^\n\]*%zmm\[0-9\]+" 1 } } */
+
+#define type __m512
+#define vec 512
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-7.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-8.c b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-8.c
new file mode 100644
index 0000000..8bd669e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-fnmsub-sf-zmm-8.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+%zmm\[0-9\]+, %zmm\[0-9\]+, %zmm0" 1 } } */
+
+#define type __m512
+#define vec 512
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-8.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-xmm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-xmm-1.c
new file mode 100644
index 0000000..0a63df8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-xmm-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mfma -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %xmm\[0-9\]+, %xmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%xmm\[0-9\]+" } } */
+
+#define type __m128
+#define vec
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-ymm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-ymm-1.c
new file mode 100644
index 0000000..d8d48d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-fnmsub-sf-ymm-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mfma -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %ymm\[0-9\]+, %ymm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%ymm\[0-9\]+" } } */
+
+#define type __m256
+#define vec 256
+#define op fnmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-1.h"