aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2018-10-21 20:24:50 +0000
committerH.J. Lu <hjl@gcc.gnu.org>2018-10-21 13:24:50 -0700
commitfe7f972d6ecc1f1df34f15615b7e3dea6f39e564 (patch)
treee1eb0e9ceeeaffb0ac1c74cebe434dcc0f271985
parent88c08ac43c47bb5d21734be744df913dd568d108 (diff)
downloadgcc-fe7f972d6ecc1f1df34f15615b7e3dea6f39e564.zip
gcc-fe7f972d6ecc1f1df34f15615b7e3dea6f39e564.tar.gz
gcc-fe7f972d6ecc1f1df34f15615b7e3dea6f39e564.tar.bz2
Enable AVX512 memory broadcast for FMSUB
Many AVX512 vector operations can broadcast from a scalar memory source. This patch enables memory broadcast for FMSUB operations. In order to support AVX512 memory broadcast for FMSUB, FMSUB builtin functions are also added, instead of passing the negated value to FMA builtin functions. gcc/ PR target/72782 * config/i386/avx512fintrin.h (_mm512_fmsub_round_pd): Use __builtin_ia32_vfmsubpd512_mask. (_mm512_mask_fmsub_round_pd): Likewise. (_mm512_fmsub_pd): Likewise. (_mm512_mask_fmsub_pd): Likewise. (_mm512_maskz_fmsub_round_pd): Use __builtin_ia32_vfmsubpd512_maskz. (_mm512_maskz_fmsub_pd): Likewise. (_mm512_fmsub_round_ps): Use __builtin_ia32_vfmsubps512_mask. (_mm512_mask_fmsub_round_ps): Likewise. (_mm512_fmsub_ps): Likewise. (_mm512_mask_fmsub_ps): Likewise. (_mm512_maskz_fmsub_round_ps): Use __builtin_ia32_vfmsubps512_maskz. (_mm512_maskz_fmsub_ps): Likewise. * config/i386/avx512vlintrin.h (_mm256_mask_fmsub_pd): Use __builtin_ia32_vfmsubpd256_mask. (_mm256_maskz_fmsub_pd): Use __builtin_ia32_vfmsubpd256_maskz. (_mm_mask_fmsub_pd): Use __builtin_ia32_vfmaddpd128_mask (_mm_maskz_fmsub_pd): Use __builtin_ia32_vfmsubpd128_maskz. (_mm256_mask_fmsub_ps): Use __builtin_ia32_vfmsubps256_mask. (_mm256_mask_fmsub_ps): Use __builtin_ia32_vfmsubps256_mask. (_mm256_maskz_fmsub_ps): Use __builtin_ia32_vfmsubps256_maskz. (_mm_mask_fmsub_ps): Use __builtin_ia32_vfmsubps128_mask. (_mm_maskz_fmsub_ps): Use __builtin_ia32_vfmsubps128_maskz. * config/i386/fmaintrin.h (_mm_fmsub_pd): Use __builtin_ia32_vfmsubpd. (_mm256_fmsub_pd): Use __builtin_ia32_vfmsubpd256. (_mm_fmsub_ps): Use __builtin_ia32_vfmsubps. (_mm256_fmsub_ps): Use __builtin_ia32_vfmsubps256. (_mm_fmsub_sd): Use __builtin_ia32_vfmsubsd3. (_mm_fmsub_ss): Use __builtin_ia32_vfmsubss3. * config/i386/i386-builtin.def: Add __builtin_ia32_vfmsubpd256_mask, __builtin_ia32_vfmsubpd256_maskz, __builtin_ia32_vfmsubpd128_mask, __builtin_ia32_vfmsubpd128_maskz, __builtin_ia32_vfmsubps256_mask, __builtin_ia32_vfmsubps256_maskz, __builtin_ia32_vfmsubps128_mask, __builtin_ia32_vfmsubps128_maskz, __builtin_ia32_vfmsubpd512_mask, __builtin_ia32_vfmsubpd512_maskz, __builtin_ia32_vfmsubps512_mask, __builtin_ia32_vfmsubps512_maskz, __builtin_ia32_vfmsubss3, __builtin_ia32_vfmsubsd3, __builtin_ia32_vfmsubps, __builtin_ia32_vfmsubpd, __builtin_ia32_vfmsubps256 and. __builtin_ia32_vfmsubpd256. * config/i386/sse.md (fma4i_fmsub_<mode>): New. (<avx512>_fmsub_<mode>_maskz<round_expand_name>): Likewise. (*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_1): Likewise. (*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_2): Likewise. (*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_3): Likewise. (fmai_vmfmsub_<mode><round_name>): Likewise. gcc/testsuite/ PR target/72782 * gcc.target/i386/avx512f-fmsub-df-zmm-1.c: New test. * gcc.target/i386/avx512f-fmsub-sf-zmm-1.c: Likewise. * gcc.target/i386/avx512f-fmsub-sf-zmm-2.c: Likewise. * gcc.target/i386/avx512f-fmsub-sf-zmm-3.c: Likewise. * gcc.target/i386/avx512f-fmsub-sf-zmm-4.c: Likewise. * gcc.target/i386/avx512f-fmsub-sf-zmm-5.c: Likewise. * gcc.target/i386/avx512f-fmsub-sf-zmm-6.c: Likewise. * gcc.target/i386/avx512f-fmsub-sf-zmm-7.c: Likewise. * gcc.target/i386/avx512f-fmsub-sf-zmm-8.c: Likewise. * gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c: Likewise. * gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c: Likewise. From-SVN: r265356
-rw-r--r--gcc/ChangeLog61
-rw-r--r--gcc/config/i386/avx512fintrin.h60
-rw-r--r--gcc/config/i386/avx512vlintrin.h32
-rw-r--r--gcc/config/i386/fmaintrin.h24
-rw-r--r--gcc/config/i386/i386-builtin.def18
-rw-r--r--gcc/config/i386/sse.md77
-rw-r--r--gcc/testsuite/ChangeLog15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-fmsub-df-zmm-1.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-1.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-2.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-3.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-4.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-5.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-6.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-7.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-8.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c12
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c12
18 files changed, 361 insertions, 58 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 505c949..11d05f7 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,66 @@
2018-10-21 H.J. Lu <hongjiu.lu@intel.com>
+ PR target/72782
+ * config/i386/avx512fintrin.h (_mm512_fmsub_round_pd): Use
+ __builtin_ia32_vfmsubpd512_mask.
+ (_mm512_mask_fmsub_round_pd): Likewise.
+ (_mm512_fmsub_pd): Likewise.
+ (_mm512_mask_fmsub_pd): Likewise.
+ (_mm512_maskz_fmsub_round_pd): Use
+ __builtin_ia32_vfmsubpd512_maskz.
+ (_mm512_maskz_fmsub_pd): Likewise.
+ (_mm512_fmsub_round_ps): Use __builtin_ia32_vfmsubps512_mask.
+ (_mm512_mask_fmsub_round_ps): Likewise.
+ (_mm512_fmsub_ps): Likewise.
+ (_mm512_mask_fmsub_ps): Likewise.
+ (_mm512_maskz_fmsub_round_ps): Use
+ __builtin_ia32_vfmsubps512_maskz.
+ (_mm512_maskz_fmsub_ps): Likewise.
+ * config/i386/avx512vlintrin.h (_mm256_mask_fmsub_pd): Use
+ __builtin_ia32_vfmsubpd256_mask.
+ (_mm256_maskz_fmsub_pd): Use __builtin_ia32_vfmsubpd256_maskz.
+ (_mm_mask_fmsub_pd): Use __builtin_ia32_vfmaddpd128_mask
+ (_mm_maskz_fmsub_pd): Use __builtin_ia32_vfmsubpd128_maskz.
+ (_mm256_mask_fmsub_ps): Use __builtin_ia32_vfmsubps256_mask.
+ (_mm256_mask_fmsub_ps): Use __builtin_ia32_vfmsubps256_mask.
+ (_mm256_maskz_fmsub_ps): Use __builtin_ia32_vfmsubps256_maskz.
+ (_mm_mask_fmsub_ps): Use __builtin_ia32_vfmsubps128_mask.
+ (_mm_maskz_fmsub_ps): Use __builtin_ia32_vfmsubps128_maskz.
+ * config/i386/fmaintrin.h (_mm_fmsub_pd): Use
+ __builtin_ia32_vfmsubpd.
+ (_mm256_fmsub_pd): Use __builtin_ia32_vfmsubpd256.
+ (_mm_fmsub_ps): Use __builtin_ia32_vfmsubps.
+ (_mm256_fmsub_ps): Use __builtin_ia32_vfmsubps256.
+ (_mm_fmsub_sd): Use __builtin_ia32_vfmsubsd3.
+ (_mm_fmsub_ss): Use __builtin_ia32_vfmsubss3.
+ * config/i386/i386-builtin.def: Add
+ __builtin_ia32_vfmsubpd256_mask,
+ __builtin_ia32_vfmsubpd256_maskz,
+ __builtin_ia32_vfmsubpd128_mask,
+ __builtin_ia32_vfmsubpd128_maskz,
+ __builtin_ia32_vfmsubps256_mask,
+ __builtin_ia32_vfmsubps256_maskz,
+ __builtin_ia32_vfmsubps128_mask,
+ __builtin_ia32_vfmsubps128_maskz,
+ __builtin_ia32_vfmsubpd512_mask,
+ __builtin_ia32_vfmsubpd512_maskz,
+ __builtin_ia32_vfmsubps512_mask,
+ __builtin_ia32_vfmsubps512_maskz, __builtin_ia32_vfmsubss3,
+ __builtin_ia32_vfmsubsd3, __builtin_ia32_vfmsubps,
+ __builtin_ia32_vfmsubpd, __builtin_ia32_vfmsubps256 and.
+ __builtin_ia32_vfmsubpd256.
+ * config/i386/sse.md (fma4i_fmsub_<mode>): New.
+ (<avx512>_fmsub_<mode>_maskz<round_expand_name>): Likewise.
+ (*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_1):
+ Likewise.
+ (*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_2):
+ Likewise.
+ (*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_3):
+ Likewise.
+ (fmai_vmfmsub_<mode><round_name>): Likewise.
+
+2018-10-21 H.J. Lu <hongjiu.lu@intel.com>
+
* config/i386/sse.md (*<plusminus_insn><mode>3<mask_name>_bcst_1):
Remove plus. Renamed to ...
(*sub<mode>3<mask_name>_bcst): This.
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 8473cd0..c0c8fa1 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -3355,9 +3355,9 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
(__v8df) __B,
- -(__v8df) __C,
+ (__v8df) __C,
(__mmask8) -1, __R);
}
@@ -3366,9 +3366,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
__m512d __C, const int __R)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
(__v8df) __B,
- -(__v8df) __C,
+ (__v8df) __C,
(__mmask8) __U, __R);
}
@@ -3388,9 +3388,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
__m512d __C, const int __R)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+ return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
(__v8df) __B,
- -(__v8df) __C,
+ (__v8df) __C,
(__mmask8) __U, __R);
}
@@ -3398,9 +3398,9 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
(__v16sf) __B,
- -(__v16sf) __C,
+ (__v16sf) __C,
(__mmask16) -1, __R);
}
@@ -3409,9 +3409,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
__m512 __C, const int __R)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
(__v16sf) __B,
- -(__v16sf) __C,
+ (__v16sf) __C,
(__mmask16) __U, __R);
}
@@ -3431,9 +3431,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
__m512 __C, const int __R)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+ return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
(__v16sf) __B,
- -(__v16sf) __C,
+ (__v16sf) __C,
(__mmask16) __U, __R);
}
@@ -3806,28 +3806,28 @@ _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
(__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
#define _mm512_fmsub_round_pd(A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R)
+ (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, -1, R)
#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R)
+ (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, U, R)
#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
(__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
- (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R)
+ (__m512d)__builtin_ia32_vfmsubpd512_maskz(A, B, C, U, R)
#define _mm512_fmsub_round_ps(A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R)
+ (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, -1, R)
#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R)
+ (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, U, R)
#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
(__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
- (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R)
+ (__m512)__builtin_ia32_vfmsubps512_maskz(A, B, C, U, R)
#define _mm512_fmaddsub_round_pd(A, B, C, R) \
(__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
@@ -12416,9 +12416,9 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
(__v8df) __B,
- -(__v8df) __C,
+ (__v8df) __C,
(__mmask8) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12427,9 +12427,9 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
+ return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
(__v8df) __B,
- -(__v8df) __C,
+ (__v8df) __C,
(__mmask8) __U,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12449,9 +12449,9 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
{
- return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
+ return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
(__v8df) __B,
- -(__v8df) __C,
+ (__v8df) __C,
(__mmask8) __U,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12460,9 +12460,9 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
(__v16sf) __B,
- -(__v16sf) __C,
+ (__v16sf) __C,
(__mmask16) -1,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12471,9 +12471,9 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
+ return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
(__v16sf) __B,
- -(__v16sf) __C,
+ (__v16sf) __C,
(__mmask16) __U,
_MM_FROUND_CUR_DIRECTION);
}
@@ -12493,9 +12493,9 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
{
- return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
+ return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
(__v16sf) __B,
- -(__v16sf) __C,
+ (__v16sf) __C,
(__mmask16) __U,
_MM_FROUND_CUR_DIRECTION);
}
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index a4fb0b0..fcc35c3 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -4117,9 +4117,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_fmsub_pd (__m256d __A, __mmask8 __U, __m256d __B,
__m256d __C)
{
- return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
+ return (__m256d) __builtin_ia32_vfmsubpd256_mask ((__v4df) __A,
(__v4df) __B,
- -(__v4df) __C,
+ (__v4df) __C,
(__mmask8) __U);
}
@@ -4139,9 +4139,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_fmsub_pd (__mmask8 __U, __m256d __A, __m256d __B,
__m256d __C)
{
- return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
+ return (__m256d) __builtin_ia32_vfmsubpd256_maskz ((__v4df) __A,
(__v4df) __B,
- -(__v4df) __C,
+ (__v4df) __C,
(__mmask8) __U);
}
@@ -4149,9 +4149,9 @@ extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmsub_pd (__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
+ return (__m128d) __builtin_ia32_vfmsubpd128_mask ((__v2df) __A,
(__v2df) __B,
- -(__v2df) __C,
+ (__v2df) __C,
(__mmask8) __U);
}
@@ -4171,9 +4171,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmsub_pd (__mmask8 __U, __m128d __A, __m128d __B,
__m128d __C)
{
- return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
+ return (__m128d) __builtin_ia32_vfmsubpd128_maskz ((__v2df) __A,
(__v2df) __B,
- -(__v2df) __C,
+ (__v2df) __C,
(__mmask8) __U);
}
@@ -4181,9 +4181,9 @@ extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_fmsub_ps (__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
{
- return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
+ return (__m256) __builtin_ia32_vfmsubps256_mask ((__v8sf) __A,
(__v8sf) __B,
- -(__v8sf) __C,
+ (__v8sf) __C,
(__mmask8) __U);
}
@@ -4203,9 +4203,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_fmsub_ps (__mmask8 __U, __m256 __A, __m256 __B,
__m256 __C)
{
- return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
+ return (__m256) __builtin_ia32_vfmsubps256_maskz ((__v8sf) __A,
(__v8sf) __B,
- -(__v8sf) __C,
+ (__v8sf) __C,
(__mmask8) __U);
}
@@ -4213,9 +4213,9 @@ extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_fmsub_ps (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
+ return (__m128) __builtin_ia32_vfmsubps128_mask ((__v4sf) __A,
(__v4sf) __B,
- -(__v4sf) __C,
+ (__v4sf) __C,
(__mmask8) __U);
}
@@ -4233,9 +4233,9 @@ extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_fmsub_ps (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
{
- return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
+ return (__m128) __builtin_ia32_vfmsubps128_maskz ((__v4sf) __A,
(__v4sf) __B,
- -(__v4sf) __C,
+ (__v4sf) __C,
(__mmask8) __U);
}
diff --git a/gcc/config/i386/fmaintrin.h b/gcc/config/i386/fmaintrin.h
index 660d345..2eddd89 100644
--- a/gcc/config/i386/fmaintrin.h
+++ b/gcc/config/i386/fmaintrin.h
@@ -86,48 +86,48 @@ extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B,
- -(__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmsubpd ((__v2df)__A, (__v2df)__B,
+ (__v2df)__C);
}
extern __inline __m256d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C)
{
- return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B,
- -(__v4df)__C);
+ return (__m256d)__builtin_ia32_vfmsubpd256 ((__v4df)__A, (__v4df)__B,
+ (__v4df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B,
- -(__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmsubps ((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__C);
}
extern __inline __m256
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C)
{
- return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B,
- -(__v8sf)__C);
+ return (__m256)__builtin_ia32_vfmsubps256 ((__v8sf)__A, (__v8sf)__B,
+ (__v8sf)__C);
}
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C)
{
- return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B,
- -(__v2df)__C);
+ return (__m128d)__builtin_ia32_vfmsubsd3 ((__v2df)__A, (__v2df)__B,
+ (__v2df)__C);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C)
{
- return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B,
- -(__v4sf)__C);
+ return (__m128)__builtin_ia32_vfmsubss3 ((__v4sf)__A, (__v4sf)__B,
+ (__v4sf)__C);
}
extern __inline __m128d
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index dc4c70c..f5b5e56 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -1903,10 +1903,18 @@ BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask, "__builtin_ia32_vfmsubpd256_mask", IX86_BUILTIN_VFMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_maskz, "__builtin_ia32_vfmsubpd256_maskz", IX86_BUILTIN_VFMSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask, "__builtin_ia32_vfmsubpd128_mask", IX86_BUILTIN_VFMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_maskz, "__builtin_ia32_vfmsubpd128_maskz", IX86_BUILTIN_VFMSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask, "__builtin_ia32_vfmsubps256_mask", IX86_BUILTIN_VFMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_maskz, "__builtin_ia32_vfmsubps256_maskz", IX86_BUILTIN_VFMSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask, "__builtin_ia32_vfmsubps128_mask", IX86_BUILTIN_VFMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_maskz, "__builtin_ia32_vfmsubps128_maskz", IX86_BUILTIN_VFMSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
@@ -2768,8 +2776,12 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask_round, "__builtin_ia32_vfmsubpd512_mask", IX86_BUILTIN_VFMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_maskz_round, "__builtin_ia32_vfmsubpd512_maskz", IX86_BUILTIN_VFMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask_round, "__builtin_ia32_vfmsubps512_mask", IX86_BUILTIN_VFMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_maskz_round, "__builtin_ia32_vfmsubps512_maskz", IX86_BUILTIN_VFMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
@@ -2855,11 +2867,17 @@ BDESC_FIRST (multi_arg, MULTI_ARG,
BDESC (OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF)
BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf, "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3, UNKNOWN, (int)MULTI_ARG_3_SF)
BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df, "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3, UNKNOWN, (int)MULTI_ARG_3_DF)
+BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmsub_v4sf, "__builtin_ia32_vfmsubss3", IX86_BUILTIN_VFMSUBSS3, UNKNOWN, (int)MULTI_ARG_3_SF)
+BDESC (OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmsub_v2df, "__builtin_ia32_vfmsubsd3", IX86_BUILTIN_VFMSUBSD3, UNKNOWN, (int)MULTI_ARG_3_DF)
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF)
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF)
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2)
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2)
+BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsub_v4sf, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF)
+BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsub_v2df, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF)
+BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsub_v8sf, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2)
+BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsub_v4df, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2)
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF)
BDESC (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 520afc5..0fdaaed 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -3748,6 +3748,14 @@
(match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
(match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
+(define_expand "fma4i_fmsub_<mode>"
+ [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
+ (fma:FMAMODE_AVX512
+ (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
+ (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
+ (neg:FMAMODE_AVX512
+ (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
+
(define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
[(match_operand:VF_AVX512VL 0 "register_operand")
(match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
@@ -3886,6 +3894,20 @@
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
+(define_expand "<avx512>_fmsub_<mode>_maskz<round_expand_name>"
+ [(match_operand:VF_AVX512VL 0 "register_operand")
+ (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
+ (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
+ (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")]
+ "TARGET_AVX512F && <round_mode512bit_condition>"
+{
+ emit_insn (gen_fma_fmsub_<mode>_maskz_1<round_expand_name> (
+ operands[0], operands[1], operands[2], operands[3],
+ CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
+ DONE;
+})
+
(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
[(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
(fma:VF_SF_AVX512VL
@@ -3901,6 +3923,49 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
+(define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_1"
+ [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
+ (fma:VF_AVX512
+ (match_operand:VF_AVX512 1 "register_operand" "0,v")
+ (match_operand:VF_AVX512 2 "register_operand" "v,0")
+ (neg:VF_AVX512
+ (vec_duplicate:VF_AVX512
+ (match_operand:<ssescalarmode> 3 "memory_operand" "m,m")))))]
+ "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
+ "vfmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_2"
+ [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
+ (fma:VF_AVX512
+ (vec_duplicate:VF_AVX512
+ (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))
+ (match_operand:VF_AVX512 2 "register_operand" "0,v")
+ (neg:VF_AVX512
+ (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
+ "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
+ "@
+ vfmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
+ vfmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_3"
+ [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
+ (fma:VF_AVX512
+ (match_operand:VF_AVX512 1 "register_operand" "0,v")
+ (vec_duplicate:VF_AVX512
+ (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
+ (neg:VF_AVX512
+ (match_operand:VF_AVX512 3 "nonimmediate_operand" "v,0"))))]
+ "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
+ "@
+ vfmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
+ vfmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
[(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
(vec_merge:VF_AVX512VL
@@ -4249,6 +4314,18 @@
(const_int 1)))]
"TARGET_FMA")
+(define_expand "fmai_vmfmsub_<mode><round_name>"
+ [(set (match_operand:VF_128 0 "register_operand")
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "<round_nimm_predicate>")
+ (match_operand:VF_128 2 "<round_nimm_predicate>")
+ (neg:VF_128
+ (match_operand:VF_128 3 "<round_nimm_predicate>")))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_FMA")
+
(define_insn "*fmai_fmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand" "=v,v")
(vec_merge:VF_128
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index c3ea50c..a1d2240 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,18 @@
+2018-10-21 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/72782
+ * gcc.target/i386/avx512f-fmsub-df-zmm-1.c: New test.
+ * gcc.target/i386/avx512f-fmsub-sf-zmm-1.c: Likewise.
+ * gcc.target/i386/avx512f-fmsub-sf-zmm-2.c: Likewise.
+ * gcc.target/i386/avx512f-fmsub-sf-zmm-3.c: Likewise.
+ * gcc.target/i386/avx512f-fmsub-sf-zmm-4.c: Likewise.
+ * gcc.target/i386/avx512f-fmsub-sf-zmm-5.c: Likewise.
+ * gcc.target/i386/avx512f-fmsub-sf-zmm-6.c: Likewise.
+ * gcc.target/i386/avx512f-fmsub-sf-zmm-7.c: Likewise.
+ * gcc.target/i386/avx512f-fmsub-sf-zmm-8.c: Likewise.
+ * gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c: Likewise.
+ * gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c: Likewise.
+
2018-10-21 Paul Thomas <pault@gcc.gnu.org>
PR fortran/71880
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-df-zmm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-df-zmm-1.c
new file mode 100644
index 0000000..840888a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-df-zmm-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...pd\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastsd\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512d
+#define vec 512
+#define op fmsub
+#define suffix pd
+#define SCALAR double
+
+#include "avx512-fma-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-1.c b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-1.c
new file mode 100644
index 0000000..0cb675b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-2.c b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-2.c
new file mode 100644
index 0000000..10212d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-2.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-3.c b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-3.c
new file mode 100644
index 0000000..feb3407
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-3.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-3.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-4.c b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-4.c
new file mode 100644
index 0000000..4305fff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-4.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-4.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-5.c b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-5.c
new file mode 100644
index 0000000..d57251f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-5.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-5.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-6.c b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-6.c
new file mode 100644
index 0000000..b26a9ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-6.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%zmm\[0-9\]+" } } */
+
+#define type __m512
+#define vec 512
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-6.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-7.c b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-7.c
new file mode 100644
index 0000000..cc705af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-7.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[^\n\]*%zmm\[0-9\]+" 1 } } */
+
+#define type __m512
+#define vec 512
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-7.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-8.c b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-8.c
new file mode 100644
index 0000000..2b929fa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-fmsub-sf-zmm-8.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\]*%zmm\[0-9\]+" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+%zmm\[0-9\]+, %zmm\[0-9\]+, %zmm0" 1 } } */
+
+#define type __m512
+#define vec 512
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-8.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c
new file mode 100644
index 0000000..70efbcc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-xmm-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mfma -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %xmm\[0-9\]+, %xmm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%xmm\[0-9\]+" } } */
+
+#define type __m128
+#define vec
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-1.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c
new file mode 100644
index 0000000..a7c1b37
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-fmsub-sf-ymm-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mfma -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vfmsub...ps\[ \\t\]+\\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %ymm\[0-9\]+, %ymm0" 1 } } */
+/* { dg-final { scan-assembler-not "vbroadcastss\[^\n\]*%ymm\[0-9\]+" } } */
+
+#define type __m256
+#define vec 256
+#define op fmsub
+#define suffix ps
+#define SCALAR float
+
+#include "avx512-fma-1.h"