Add FMA intrinsics and testcases.

gcc/ 2011-08-30 Ilya Tocar <ilya.tocar@intel.com> * config/i386/fmaintrin.h: New. * config.gcc: Add fmaintrin.h. * config/i386/i386.c (enum ix86_builtins) <IX86_BUILTIN_VFMADDSS3>: New. <IX86_BUILTIN_VFMADDSD3>: Likewise. * config/i386/sse.md (fmai_vmfmadd_<mode>): New. (*fmai_fmadd_<mode>): Likewise. (*fmai_fmsub_<mode>): Likewise. (*fmai_fnmadd_<mode>): Likewise. (*fmai_fnmsub_<mode>): Likewise. * config/i386/immintrin.h: Add fmaintrin.h. gcc/testsuite/ 2011-08-30 Ilya Tocar <ilya.tocar@intel.com> * gcc.target/i386/fma-check.h: New. * gcc.target/i386/fma-256-fmaddXX.c: New testcase. * gcc.target/i386/fma-256-fmaddsubXX.c: Likewise. * gcc.target/i386/fma-256-fmsubXX.c: Likewise. * gcc.target/i386/fma-256-fmsubaddXX.c: Likewise. * gcc.target/i386/fma-256-fnmaddXX.c: Likewise. * gcc.target/i386/fma-256-fnmsubXX.c: Likewise. * gcc.target/i386/fma-fmaddXX.c: Likewise. * gcc.target/i386/fma-fmaddsubXX.c: Likewise. * gcc.target/i386/fma-fmsubXX.c: Likewise. * gcc.target/i386/fma-fmsubaddXX.c: Likewise. * gcc.target/i386/fma-fnmaddXX.c: Likewise. * gcc.target/i386/fma-fnmsubXX.c: Likewise. * gcc.target/i386/fma-compile.c: Likewise. * gcc.target/i386/i386.exp (check_effective_target_fma): New. * gcc.target/i386/sse-12.c: Add -mfma. * gcc.target/i386/sse-13.c: Likewise. * gcc.target/i386/sse-14.c: Likewise. * gcc.target/i386/sse-22.c: Likewise. * gcc.target/i386/sse-23.c: Likewise. * g++.dg/other/i386-2.C: Likewise. * g++.dg/other/i386-3.C: Likewise. From-SVN: r178311
author: Ilya Tocar <ilya.tocar@intel.com> 2011-08-30 14:02:53 +0000
committer: H.J. Lu <hjl@gcc.gnu.org> 2011-08-30 07:02:53 -0700
commit: 2ddd46d69b09a88fb82832285b69090fa08bddc2 (patch)
tree: 175e06db22b216944eb8ef409716322c278cd349 /gcc
parent: c199ccf75867fa7287570ff1ec19ef76cc9d5ea6 (diff)
download: gcc-2ddd46d69b09a88fb82832285b69090fa08bddc2.zip
gcc-2ddd46d69b09a88fb82832285b69090fa08bddc2.tar.gz
gcc-2ddd46d69b09a88fb82832285b69090fa08bddc2.tar.bz2
29 files changed, 1614 insertions, 21 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b89303d..03db902 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,17 @@
+2011-08-30  Ilya Tocar  <ilya.tocar@intel.com>
+
+	* config/i386/fmaintrin.h: New.
+	* config.gcc: Add fmaintrin.h.
+	* config/i386/i386.c
+	(enum ix86_builtins) <IX86_BUILTIN_VFMADDSS3>: New.
+	<IX86_BUILTIN_VFMADDSD3>: Likewise.
+	* config/i386/sse.md (fmai_vmfmadd_<mode>): New.
+	(*fmai_fmadd_<mode>): Likewise.
+	(*fmai_fmsub_<mode>): Likewise.
+	(*fmai_fnmadd_<mode>): Likewise.
+	(*fmai_fnmsub_<mode>): Likewise.
+	* config/i386/immintrin.h: Add fmaintrin.h.
+
 2011-08-30  Bernd Schmidt  <bernds@codesourcery.com>
 
 	* genautomata.c (NO_COMB_OPTION): New macro.
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 67aae86..81b542c 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -353,7 +353,7 @@ i[34567]86-*-*)
 		       immintrin.h x86intrin.h avxintrin.h xopintrin.h
 		       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
 		       lzcntintrin.h bmiintrin.h bmi2intrin.h tbmintrin.h
-		       avx2intrin.h"
+		       avx2intrin.h fmaintrin.h"
 	;;
 x86_64-*-*)
 	cpu_type=i386
@@ -366,7 +366,7 @@ x86_64-*-*)
 		       immintrin.h x86intrin.h avxintrin.h xopintrin.h
 		       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
 		       lzcntintrin.h bmiintrin.h tbmintrin.h bmi2intrin.h
-		       avx2intrin.h"
+		       avx2intrin.h fmaintrin.h"
 	need_64bit_hwint=yes
 	;;
 ia64-*-*)
diff --git a/gcc/config/i386/fmaintrin.h b/gcc/config/i386/fmaintrin.h
new file mode 100644
index 0000000..9ec9d17
--- /dev/null
+++ b/gcc/config/i386/fmaintrin.h
@@ -0,0 +1,297 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+# error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _FMAINTRIN_H_INCLUDED
+#define _FMAINTRIN_H_INCLUDED
+
+#ifndef __FMA__
+# error "FMA instruction set not enabled"
+#else
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B,
+                                           (__v2df)__C);
+}
+
+extern __inline __m256d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B,
+                                              (__v4df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B,
+                                          (__v4sf)__C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B,
+                                             (__v8sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d) __builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B,
+                                             (__v2df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128) __builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B,
+                                            (__v4sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B,
+                                           -(__v2df)__C);
+}
+
+extern __inline __m256d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B,
+                                              -(__v4df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B,
+                                          -(__v4sf)__C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B,
+                                             -(__v8sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B,
+                                            -(__v2df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B,
+                                           -(__v4sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B,
+                                           (__v2df)__C);
+}
+
+extern __inline __m256d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B,
+                                              (__v4df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B,
+                                          (__v4sf)__C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B,
+                                             (__v8sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddsd3 (-(__v2df)__A, (__v2df)__B,
+                                            (__v2df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddss3 (-(__v4sf)__A, (__v4sf)__B,
+                                           (__v4sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B,
+                                           -(__v2df)__C);
+}
+
+extern __inline __m256d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B,
+                                              -(__v4df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B,
+                                          -(__v4sf)__C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B,
+                                             -(__v8sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddsd3 (-(__v2df)__A, (__v2df)__B,
+                                            -(__v2df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddss3 (-(__v4sf)__A, (__v4sf)__B,
+                                           -(__v4sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
+                                              (__v2df)__C);
+}
+
+extern __inline __m256d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
+                                                 (__v4df)__B,
+                                                 (__v4df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
+                                             (__v4sf)__C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
+                                                (__v8sf)__B,
+                                                (__v8sf)__C);
+}
+
+extern __inline __m128d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
+                                              -(__v2df)__C);
+}
+
+extern __inline __m256d
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
+                                                 (__v4df)__B,
+                                                 -(__v4df)__C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
+                                             -(__v4sf)__C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
+                                                (__v8sf)__B,
+                                                -(__v8sf)__C);
+}
+
+#endif
+
+#endif
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 32495ee..504f013 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -24055,7 +24055,7 @@ enum ix86_builtins
   IX86_BUILTIN_VEC_PERM_V4DF,
   IX86_BUILTIN_VEC_PERM_V8SF,
 
-  /* FMA4 and XOP instructions.  */
+  /* FMA4 instructions.  */
   IX86_BUILTIN_VFMADDSS,
   IX86_BUILTIN_VFMADDSD,
   IX86_BUILTIN_VFMADDPS,
@@ -24067,6 +24067,11 @@ enum ix86_builtins
   IX86_BUILTIN_VFMADDSUBPS256,
   IX86_BUILTIN_VFMADDSUBPD256,
 
+  /* FMA3 instructions.  */
+  IX86_BUILTIN_VFMADDSS3,
+  IX86_BUILTIN_VFMADDSD3,
+
+  /* XOP instructions.  */
   IX86_BUILTIN_VPCMOV,
   IX86_BUILTIN_VPCMOV_V2DI,
   IX86_BUILTIN_VPCMOV_V4SI,
@@ -25450,6 +25455,13 @@ static const struct builtin_description bdesc_multi_arg[] =
     "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
     UNKNOWN, (int)MULTI_ARG_3_DF },
 
+  { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
+    "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
+    UNKNOWN, (int)MULTI_ARG_3_SF },
+  { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
+    "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
+    UNKNOWN, (int)MULTI_ARG_3_DF },
+
   { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
     "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
     UNKNOWN, (int)MULTI_ARG_3_SF },
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index d2e715f..102814e 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -72,6 +72,10 @@
 #include <bmi2intrin.h>
 #endif
 
+#ifdef __FMA__
+#include <fmaintrin.h>
+#endif
+
 #ifdef __RDRND__
 extern __inline int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index fa22e9a..8ce3e3a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1719,6 +1719,89 @@
   operands[4] = CONST0_RTX (<MODE>mode);
 })
 
+(define_expand "fmai_vmfmadd_<mode>"
+  [(set (match_operand:VF_128 0 "register_operand")
+	(vec_merge:VF_128
+	  (fma:VF_128
+	    (match_operand:VF_128 1 "nonimmediate_operand")
+	    (match_operand:VF_128 2 "nonimmediate_operand")
+	    (match_operand:VF_128 3 "nonimmediate_operand"))
+	  (match_dup 0)
+	  (const_int 1)))]
+  "TARGET_FMA")
+
+(define_insn "*fmai_fmadd_<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+        (vec_merge:VF_128
+	  (fma:VF_128
+	    (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
+	    (match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
+	    (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
+	  (match_dup 0)
+	  (const_int 1)))]
+  "TARGET_FMA"
+  "@
+   vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
+   vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fmsub_<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+        (vec_merge:VF_128
+	  (fma:VF_128
+	    (match_operand:VF_128   1 "nonimmediate_operand" "%0, 0,x")
+	    (match_operand:VF_128   2 "nonimmediate_operand" "xm, x,xm")
+	    (neg:VF_128
+	      (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
+	  (match_dup 0)
+	  (const_int 1)))]
+  "TARGET_FMA"
+  "@
+   vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
+   vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fnmadd_<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+        (vec_merge:VF_128
+	  (fma:VF_128
+	    (neg:VF_128
+	      (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
+	    (match_operand:VF_128   2 "nonimmediate_operand" "xm, x,xm")
+	    (match_operand:VF_128   3 "nonimmediate_operand" " x,xm,0"))
+	  (match_dup 0)
+	  (const_int 1)))]
+  "TARGET_FMA"
+  "@
+   vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
+   vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fnmsub_<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
+        (vec_merge:VF_128
+	  (fma:VF_128
+	    (neg:VF_128
+	      (match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
+	    (match_operand:VF_128   2 "nonimmediate_operand" "xm, x,xm")
+	    (neg:VF_128
+	      (match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
+	  (match_dup 0)
+	  (const_int 1)))]
+  "TARGET_FMA"
+  "@
+   vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
+   vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*fma4i_vmfmadd_<mode>"
   [(set (match_operand:VF_128 0 "register_operand" "=x,x")
 	(vec_merge:VF_128
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 86b8019..55d74a9 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,28 @@
+2011-08-30  Ilya Tocar <ilya.tocar@intel.com>
+
+	* gcc.target/i386/fma-check.h: New.
+	* gcc.target/i386/fma-256-fmaddXX.c: New testcase.
+	* gcc.target/i386/fma-256-fmaddsubXX.c: Likewise.
+	* gcc.target/i386/fma-256-fmsubXX.c: Likewise.
+	* gcc.target/i386/fma-256-fmsubaddXX.c: Likewise.
+	* gcc.target/i386/fma-256-fnmaddXX.c: Likewise.
+	* gcc.target/i386/fma-256-fnmsubXX.c: Likewise.
+	* gcc.target/i386/fma-fmaddXX.c: Likewise.
+	* gcc.target/i386/fma-fmaddsubXX.c: Likewise.
+	* gcc.target/i386/fma-fmsubXX.c: Likewise.
+	* gcc.target/i386/fma-fmsubaddXX.c: Likewise.
+	* gcc.target/i386/fma-fnmaddXX.c: Likewise.
+	* gcc.target/i386/fma-fnmsubXX.c: Likewise.
+	* gcc.target/i386/fma-compile.c: Likewise.
+	* gcc.target/i386/i386.exp (check_effective_target_fma): New.
+	* gcc.target/i386/sse-12.c: Add -mfma.
+	* gcc.target/i386/sse-13.c: Likewise.
+	* gcc.target/i386/sse-14.c: Likewise.
+	* gcc.target/i386/sse-22.c: Likewise.
+	* gcc.target/i386/sse-23.c: Likewise.
+	* g++.dg/other/i386-2.C: Likewise.
+	* g++.dg/other/i386-3.C: Likewise.
+
 2011-08-30  Kirill Yukhin  <kirill.yukhin@intel.com>
 
 	PR testsuite/50185
diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
index 8c9c911..e8237a4 100644
--- a/gcc/testsuite/g++.dg/other/i386-2.C
+++ b/gcc/testsuite/g++.dg/other/i386-2.C
@@ -1,9 +1,10 @@
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma" } */
 
 /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
    xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
-   popcntintrin.h and mm_malloc.h.h are usable with -O -pedantic-errors.  */
+   popcntintrin.h, fmaintrin.h and mm_malloc.h.h are usable with 
+   -O -pedantic-errors.  */
 
 #include <x86intrin.h>
 
diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
index d8c6f8d..9abbd32 100644
--- a/gcc/testsuite/g++.dg/other/i386-3.C
+++ b/gcc/testsuite/g++.dg/other/i386-3.C
@@ -1,9 +1,10 @@
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma" } */
 
 /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
    xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
-   popcntintrin.h and mm_malloc.h are usable with
+   popcntintrin.h, fmaintrin.h and mm_malloc.h are usable with
    -O -fkeep-inline-functions.  */
 
 #include <x86intrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/fma-256-fmaddXX.c b/gcc/testsuite/gcc.target/i386/fma-256-fmaddXX.c
new file mode 100644
index 0000000..7e73402
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-256-fmaddXX.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  union256d a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  double d[4];
+  int i;
+  e.x = _mm256_fmadd_pd (__A, __B, __C);
+  for (i = 0; i < 4; i++)
+    {
+      d[i] = a.a[i] * b.a[i] + c.a[i];
+    }
+  if (check_union256d (e, d))
+    abort ();
+}
+
+void
+check_mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  union256 a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  float d[8];
+  int i;
+  e.x = _mm256_fmadd_ps (__A, __B, __C);
+  for (i = 0; i < 8; i++)
+    {
+      d[i] = a.a[i] * b.a[i] + c.a[i];
+    }
+  if (check_union256 (e, d))
+    abort ();
+}
+
+static void
+fma_test (void)
+{
+  union256 c[3];
+  union256d d[3];
+  int i, j;
+  for (i = 0; i < 3; i++)
+    {
+      for (j = 0; j < 8; j++)
+	c[i].a[j] = i * j + 3.5;
+      for (j = 0; j < 4; j++)
+	d[i].a[j] = i * j + 3.5;
+    }
+  check_mm256_fmadd_pd (d[0].x, d[1].x, d[2].x);
+  check_mm256_fmadd_ps (c[0].x, c[1].x, c[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-256-fmaddsubXX.c b/gcc/testsuite/gcc.target/i386/fma-256-fmaddsubXX.c
new file mode 100644
index 0000000..4b61ad5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-256-fmaddsubXX.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  union256 a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  float d[8];
+  int i;
+  e.x = _mm256_fmaddsub_ps (__A, __B, __C);
+  for (i = 0; i < 8; i++)
+    {
+      d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? c.a[i] : -c.a[i]);
+    }
+  if (check_union256 (e, d))
+    abort ();
+}
+
+void
+check_mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  union256d a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  double d[4];
+  int i;
+  e.x = _mm256_fmaddsub_pd (__A, __B, __C);
+  for (i = 0; i < 4; i++)
+    {
+      d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? c.a[i] : -c.a[i]);
+    }
+  if (check_union256d (e, d))
+    abort ();
+}
+
+static void
+fma_test (void)
+{
+  union256 c[3];
+  union256d d[3];
+  int i, j;
+  for (i = 0; i < 3; i++)
+    {
+      for (j = 0; j < 8; j++)
+	c[i].a[j] = i * j + 3.5;
+      for (j = 0; j < 4; j++)
+	d[i].a[j] = i * j + 3.5;
+    }
+  check_mm256_fmaddsub_pd (d[0].x, d[1].x, d[2].x);
+  check_mm256_fmaddsub_ps (c[0].x, c[1].x, c[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-256-fmsubXX.c b/gcc/testsuite/gcc.target/i386/fma-256-fmsubXX.c
new file mode 100644
index 0000000..d92aec0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-256-fmsubXX.c
@@ -0,0 +1,62 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+
+void
+check_mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  union256d a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  double d[4];
+  int i;
+  e.x = _mm256_fmsub_pd (__A, __B, __C);
+  for (i = 0; i < 4; i++)
+    {
+      d[i] = a.a[i] * b.a[i] - c.a[i];
+    }
+  if (check_union256d (e, d))
+    abort ();
+}
+
+void
+check_mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  union256 a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  float d[8];
+  int i;
+  e.x = _mm256_fmsub_ps (__A, __B, __C);
+  for (i = 0; i < 8; i++)
+    {
+      d[i] = a.a[i] * b.a[i] - c.a[i];
+    }
+  if (check_union256 (e, d))
+    abort ();
+}
+
+static void
+fma_test (void)
+{
+  union256 c[3];
+  union256d d[3];
+  int i, j;
+  for (i = 0; i < 3; i++)
+    {
+      for (j = 0; j < 8; j++)
+	c[i].a[j] = i * j + 3.5;
+      for (j = 0; j < 4; j++)
+	d[i].a[j] = i * j + 3.5;
+    }
+  check_mm256_fmsub_pd (d[0].x, d[1].x, d[2].x);
+  check_mm256_fmsub_ps (c[0].x, c[1].x, c[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-256-fmsubaddXX.c b/gcc/testsuite/gcc.target/i386/fma-256-fmsubaddXX.c
new file mode 100644
index 0000000..84a41c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-256-fmsubaddXX.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  union256 a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  float d[8];
+  int i;
+  e.x = _mm256_fmsubadd_ps (__A, __B, __C);
+  for (i = 0; i < 8; i++)
+    {
+      d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? -c.a[i] : c.a[i]);
+    }
+  if (check_union256 (e, d))
+    abort ();
+}
+
+void
+check_mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  union256d a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  double d[4];
+  int i;
+  e.x = _mm256_fmsubadd_pd (__A, __B, __C);
+  for (i = 0; i < 4; i++)
+    {
+      d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? -c.a[i] : c.a[i]);
+    }
+  if (check_union256d (e, d))
+    abort ();
+}
+
+static void
+fma_test (void)
+{
+  union256 c[3];
+  union256d d[3];
+  int i, j;
+  for (i = 0; i < 3; i++)
+    {
+      for (j = 0; j < 8; j++)
+	c[i].a[j] = i * j + 3.5;
+      for (j = 0; j < 4; j++)
+	d[i].a[j] = i * j + 3.5;
+    }
+  check_mm256_fmsubadd_pd (d[0].x, d[1].x, d[2].x);
+  check_mm256_fmsubadd_ps (c[0].x, c[1].x, c[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-256-fnmaddXX.c b/gcc/testsuite/gcc.target/i386/fma-256-fnmaddXX.c
new file mode 100644
index 0000000..c0dfa69
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-256-fnmaddXX.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  union256d a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  double d[4];
+  int i;
+  e.x = _mm256_fnmadd_pd (__A, __B, __C);
+  for (i = 0; i < 4; i++)
+    {
+      d[i] = -a.a[i] * b.a[i] + c.a[i];
+    }
+  if (check_union256d (e, d))
+    abort ();
+}
+
+void
+check_mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  union256 a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  float d[8];
+  int i;
+  e.x = _mm256_fnmadd_ps (__A, __B, __C);
+  for (i = 0; i < 8; i++)
+    {
+      d[i] = -a.a[i] * b.a[i] + c.a[i];
+    }
+  if (check_union256 (e, d))
+    abort ();
+}
+
+static void
+fma_test (void)
+{
+  union256 c[3];
+  union256d d[3];
+  int i, j;
+  for (i = 0; i < 3; i++)
+    {
+      for (j = 0; j < 8; j++)
+	c[i].a[j] = i * j + 3.5;
+      for (j = 0; j < 4; j++)
+	d[i].a[j] = i * j + 3.5;
+    }
+  check_mm256_fnmadd_pd (d[0].x, d[1].x, d[2].x);
+  check_mm256_fnmadd_ps (c[0].x, c[1].x, c[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-256-fnmsubXX.c b/gcc/testsuite/gcc.target/i386/fma-256-fnmsubXX.c
new file mode 100644
index 0000000..ac4705e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-256-fnmsubXX.c
@@ -0,0 +1,62 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+
+void
+check_mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C)
+{
+  union256d a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  double d[4];
+  int i;
+  e.x = _mm256_fnmsub_pd (__A, __B, __C);
+  for (i = 0; i < 4; i++)
+    {
+      d[i] = -a.a[i] * b.a[i] - c.a[i];
+    }
+  if (check_union256d (e, d))
+    abort ();
+}
+
+void
+check_mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C)
+{
+  union256 a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  float d[8];
+  int i;
+  e.x = _mm256_fnmsub_ps (__A, __B, __C);
+  for (i = 0; i < 8; i++)
+    {
+      d[i] = -a.a[i] * b.a[i] - c.a[i];
+    }
+  if (check_union256 (e, d))
+    abort ();
+}
+
+static void
+fma_test (void)
+{
+  union256 c[3];
+  union256d d[3];
+  int i, j;
+  for (i = 0; i < 3; i++)
+    {
+      for (j = 0; j < 8; j++)
+	c[i].a[j] = i * j + 3.5;
+      for (j = 0; j < 4; j++)
+	d[i].a[j] = i * j + 3.5;
+    }
+  check_mm256_fnmsub_pd (d[0].x, d[1].x, d[2].x);
+  check_mm256_fnmsub_ps (c[0].x, c[1].x, c[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-check.h b/gcc/testsuite/gcc.target/i386/fma-check.h
new file mode 100644
index 0000000..696c4a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-check.h
@@ -0,0 +1,25 @@
+#include <stdlib.h>
+
+#include "cpuid.h"
+
+static void fma_test (void);
+
+static void __attribute__ ((noinline)) do_test (void)
+{
+  fma_test ();
+}
+
+int
+main ()
+{
+  unsigned int eax, ebx, ecx, edx;
+
+  if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+    return 0;
+
+  /* Run FMA test only if host has FMA support.  */
+  if (ecx & bit_FMA)
+    do_test ();
+
+  exit (0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-compile.c b/gcc/testsuite/gcc.target/i386/fma-compile.c
new file mode 100644
index 0000000..6d5daa5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-compile.c
@@ -0,0 +1,221 @@
+/* Test that the compiler properly generates floating point multiply
+   and add instructions FMA systems.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -mfma" } */
+
+#include <x86intrin.h>
+
+__m128d
+check_mm_fmadd_pd (__m128d a, __m128d b, __m128d c)
+{
+  return _mm_fmadd_pd (a, b, c);
+}
+
+__m256d
+check_mm256_fmadd_pd (__m256d a, __m256d b, __m256d c)
+{
+  return _mm256_fmadd_pd (a, b, c);
+}
+
+__m128
+check_mm_fmadd_ps (__m128 a, __m128 b, __m128 c)
+{
+  return _mm_fmadd_ps (a, b, c);
+}
+
+__m256
+check_mm256_fmadd_ps (__m256 a, __m256 b, __m256 c)
+{
+  return _mm256_fmadd_ps (a, b, c);
+}
+
+__m128d
+check_mm_fmadd_sd (__m128d a, __m128d b, __m128d c)
+{
+  return _mm_fmadd_sd (a, b, c);
+}
+
+__m128
+check_mm_fmadd_ss (__m128 a, __m128 b, __m128 c)
+{
+  return _mm_fmadd_ss (a, b, c);
+}
+
+__m128d
+check_mm_fmsub_pd (__m128d a, __m128d b, __m128d c)
+{
+  return _mm_fmsub_pd (a, b, c);
+}
+
+__m256d
+check_mm256_fmsub_pd (__m256d a, __m256d b, __m256d c)
+{
+  return _mm256_fmsub_pd (a, b, c);
+}
+
+__m128
+check_mm_fmsub_ps (__m128 a, __m128 b, __m128 c)
+{
+  return _mm_fmsub_ps (a, b, c);
+}
+
+__m256
+check_mm256_fmsub_ps (__m256 a, __m256 b, __m256 c)
+{
+  return _mm256_fmsub_ps (a, b, c);
+}
+
+__m128d
+check_mm_fmsub_sd (__m128d a, __m128d b, __m128d c)
+{
+  return _mm_fmsub_sd (a, b, c);
+}
+
+__m128
+check_mm_fmsub_ss (__m128 a, __m128 b, __m128 c)
+{
+  return _mm_fmsub_ss (a, b, c);
+}
+
+__m128d
+check_mm_fnmadd_pd (__m128d a, __m128d b, __m128d c)
+{
+  return _mm_fnmadd_pd (a, b, c);
+}
+
+__m256d
+check_mm256_fnmadd_pd (__m256d a, __m256d b, __m256d c)
+{
+  return _mm256_fnmadd_pd (a, b, c);
+}
+
+__m128
+check_mm_fnmadd_ps (__m128 a, __m128 b, __m128 c)
+{
+  return _mm_fnmadd_ps (a, b, c);
+}
+
+__m256
+check_mm256_fnmadd_ps (__m256 a, __m256 b, __m256 c)
+{
+  return _mm256_fnmadd_ps (a, b, c);
+}
+
+__m128d
+check_mm_fnmadd_sd (__m128d a, __m128d b, __m128d c)
+{
+  return _mm_fnmadd_sd (a, b, c);
+}
+
+__m128
+check_mm_fnmadd_ss (__m128 a, __m128 b, __m128 c)
+{
+  return _mm_fnmadd_ss (a, b, c);
+}
+
+__m128d
+check_mm_fnmsub_pd (__m128d a, __m128d b, __m128d c)
+{
+  return _mm_fnmsub_pd (a, b, c);
+}
+
+__m256d
+check_mm256_fnmsub_pd (__m256d a, __m256d b, __m256d c)
+{
+  return _mm256_fnmsub_pd (a, b, c);
+}
+
+__m128
+check_mm_fnmsub_ps (__m128 a, __m128 b, __m128 c)
+{
+  return _mm_fnmsub_ps (a, b, c);
+}
+
+__m256
+check_mm256_fnmsub_ps (__m256 a, __m256 b, __m256 c)
+{
+  return _mm256_fnmsub_ps (a, b, c);
+}
+
+__m128d
+check_mm_fnmsub_sd (__m128d a, __m128d b, __m128d c)
+{
+  return _mm_fnmsub_sd (a, b, c);
+}
+
+__m128
+check_mm_fnmsub_ss (__m128 a, __m128 b, __m128 c)
+{
+  return _mm_fnmsub_ss (a, b, c);
+}
+
+__m128d
+check_mm_fmaddsub_pd (__m128d a, __m128d b, __m128d c)
+{
+  return _mm_fmaddsub_pd (a, b, c);
+}
+
+__m256d
+check_mm256_fmaddsub_pd (__m256d a, __m256d b, __m256d c)
+{
+  return _mm256_fmaddsub_pd (a, b, c);
+}
+
+__m128
+check_mm_fmaddsub_ps (__m128 a, __m128 b, __m128 c)
+{
+  return _mm_fmaddsub_ps (a, b, c);
+}
+
+__m256
+check_mm256_fmaddsub_ps (__m256 a, __m256 b, __m256 c)
+{
+  return _mm256_fmaddsub_ps (a, b, c);
+}
+
+__m128d
+check_mm_fmsubadd_pd (__m128d a, __m128d b, __m128d c)
+{
+  return _mm_fmsubadd_pd (a, b, c);
+}
+
+__m256d
+check_mm256_fmsubadd_pd (__m256d a, __m256d b, __m256d c)
+{
+  return _mm256_fmsubadd_pd (a, b, c);
+}
+
+__m128
+check_mm_fmsubadd_ps (__m128 a, __m128 b, __m128 c)
+{
+  return _mm_fmsubadd_ps (a, b, c);
+}
+
+__m256
+check_mm256_fmsubadd_ps (__m256 a, __m256 b, __m256 c)
+{
+  return _mm256_fmsubadd_ps (a, b, c);
+}
+
+
+/* { dg-final { scan-assembler-times "vfmadd[^s]..ps" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsub[^s]..ps" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ps" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ps" 2 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...ps" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...ps" 2 } } */
+/* { dg-final { scan-assembler-times "vfmadd[^s]..pd" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsub[^s]..pd" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...pd" 2 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...pd" 2 } } */
+/* { dg-final { scan-assembler-times "vfmaddsub...pd" 2 } } */
+/* { dg-final { scan-assembler-times "vfmsubadd...pd" 2 } } */
+/* { dg-final { scan-assembler-times "vfmadd[^s]..ss" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub[^s]..ss" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...ss" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...ss" 1 } } */
+/* { dg-final { scan-assembler-times "vfmadd[^s]..sd" 1 } } */
+/* { dg-final { scan-assembler-times "vfmsub[^s]..sd" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmadd...sd" 1 } } */
+/* { dg-final { scan-assembler-times "vfnmsub...sd" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/fma-fmaddXX.c b/gcc/testsuite/gcc.target/i386/fma-fmaddXX.c
new file mode 100644
index 0000000..43ef9e8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-fmaddXX.c
@@ -0,0 +1,102 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  union128d a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  double d[2];
+  int i;
+  e.x = _mm_fmadd_pd (__A, __B, __C);
+  for (i = 0; i < 2; i++)
+    {
+      d[i] = a.a[i] * b.a[i] + c.a[i];
+    }
+
+  if (check_union128d (e, d))
+    abort ();
+}
+
+void
+check_mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  union128 a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  float d[4];
+  int i;
+  e.x = _mm_fmadd_ps (__A, __B, __C);
+  for (i = 0; i < 4; i++)
+    {
+      d[i] = a.a[i] * b.a[i] + c.a[i];
+    }
+  if (check_union128 (e, d))
+    abort ();
+}
+
+void
+check_mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  union128d a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  double d[2];
+  int i;
+  e.x = _mm_fmadd_sd (__A, __B, __C);
+  for (i = 1; i < 2; i++)
+    {
+      d[i] = a.a[i];
+    }
+  d[0] = a.a[0] * b.a[0] + c.a[0];
+  if (check_union128d (e, d))
+    abort ();
+}
+
+void
+check_mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  union128 a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  float d[4];
+  int i;
+  e.x = _mm_fmadd_ss (__A, __B, __C);
+  for (i = 1; i < 4; i++)
+    {
+      d[i] = a.a[i];
+    }
+  d[0] = a.a[0] * b.a[0] + c.a[0];
+  if (check_union128 (e, d))
+    abort ();
+}
+
+static void
+fma_test (void)
+{
+  union128 a[3];
+  union128d b[3];
+  int i, j;
+  for (i = 0; i < 3; i++)
+    {
+      for (j = 0; j < 4; j++)
+	a[i].a[j] = i * j + 3.5;
+      for (j = 0; j < 2; j++)
+	b[i].a[j] = i * j + 3.5;
+    }
+  check_mm_fmadd_pd (b[0].x, b[1].x, b[2].x);
+  check_mm_fmadd_sd (b[0].x, b[1].x, b[2].x);
+  check_mm_fmadd_ps (a[0].x, a[1].x, a[2].x);
+  check_mm_fmadd_ss (a[0].x, a[1].x, a[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-fmaddsubXX.c b/gcc/testsuite/gcc.target/i386/fma-fmaddsubXX.c
new file mode 100644
index 0000000..89c8163
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-fmaddsubXX.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  union128 a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  float d[4];
+  int i;
+  e.x = _mm_fmaddsub_ps (__A, __B, __C);
+  for (i = 0; i < 4; i++)
+    {
+      d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? c.a[i] : -c.a[i]);
+    }
+  if (check_union128 (e, d))
+    abort ();
+}
+
+void
+check_mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  union128d a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  double d[2];
+  int i;
+  e.x = _mm_fmaddsub_pd (__A, __B, __C);
+  for (i = 0; i < 2; i++)
+    {
+      d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? c.a[i] : -c.a[i]);
+    }
+  if (check_union128d (e, d))
+    abort ();
+}
+
+static void
+fma_test (void)
+{
+  union128 a[3];
+  union128d b[3];
+  int i, j;
+  for (i = 0; i < 3; i++)
+    {
+      for (j = 0; j < 4; j++)
+	a[i].a[j] = i * j + 3.5;
+      for (j = 0; j < 2; j++)
+	b[i].a[j] = i * j + 3.5;
+    }
+  check_mm_fmaddsub_pd (b[0].x, b[1].x, b[2].x);
+  check_mm_fmaddsub_ps (a[0].x, a[1].x, a[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-fmsubXX.c b/gcc/testsuite/gcc.target/i386/fma-fmsubXX.c
new file mode 100644
index 0000000..3d92d4b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-fmsubXX.c
@@ -0,0 +1,101 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  union128d a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  double d[2];
+  int i;
+  e.x = _mm_fmsub_pd (__A, __B, __C);
+  for (i = 0; i < 2; i++)
+    {
+      d[i] = a.a[i] * b.a[i] - c.a[i];
+    }
+  if (check_union128d (e, d))
+    abort ();
+}
+
+void
+check_mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  union128 a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  float d[4];
+  int i;
+  e.x = _mm_fmsub_ps (__A, __B, __C);
+  for (i = 0; i < 4; i++)
+    {
+      d[i] = a.a[i] * b.a[i] - c.a[i];
+    }
+  if (check_union128 (e, d))
+    abort ();
+}
+
+void
+check_mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  union128d a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  double d[2];
+  int i;
+  e.x = _mm_fmsub_sd (__A, __B, __C);
+  for (i = 1; i < 2; i++)
+    {
+      d[i] = a.a[i];
+    }
+  d[0] = a.a[0] * b.a[0] - c.a[0];
+  if (check_union128d (e, d))
+    abort ();
+}
+
+void
+check_mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  union128 a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  float d[4];
+  int i;
+  e.x = _mm_fmsub_ss (__A, __B, __C);
+  for (i = 1; i < 4; i++)
+    {
+      d[i] = a.a[i];
+    }
+  d[0] = a.a[0] * b.a[0] - c.a[0];
+  if (check_union128 (e, d))
+    abort ();
+}
+
+static void
+fma_test (void)
+{
+  union128 a[3];
+  union128d b[3];
+  int i, j;
+  for (i = 0; i < 3; i++)
+    {
+      for (j = 0; j < 4; j++)
+	a[i].a[j] = i * j + 3.5;
+      for (j = 0; j < 2; j++)
+	b[i].a[j] = i * j + 3.5;
+    }
+  check_mm_fmsub_pd (b[0].x, b[1].x, b[2].x);
+  check_mm_fmsub_sd (b[0].x, b[1].x, b[2].x);
+  check_mm_fmsub_ps (a[0].x, a[1].x, a[2].x);
+  check_mm_fmsub_ss (a[0].x, a[1].x, a[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-fmsubaddXX.c b/gcc/testsuite/gcc.target/i386/fma-fmsubaddXX.c
new file mode 100644
index 0000000..b03f875
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-fmsubaddXX.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  union128 a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  float d[4];
+  int i;
+  e.x = _mm_fmsubadd_ps (__A, __B, __C);
+  for (i = 0; i < 4; i++)
+    {
+      d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? -c.a[i] : c.a[i]);
+    }
+  if (check_union128 (e, d))
+    abort ();
+}
+
+void
+check_mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  union128d a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  double d[2];
+  int i;
+  e.x = _mm_fmsubadd_pd (__A, __B, __C);
+  for (i = 0; i < 2; i++)
+    {
+      d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? -c.a[i] : c.a[i]);
+    }
+  if (check_union128d (e, d))
+    abort ();
+}
+
+static void
+fma_test (void)
+{
+  union128 a[3];
+  union128d b[3];
+  int i, j;
+  for (i = 0; i < 3; i++)
+    {
+      for (j = 0; j < 4; j++)
+	a[i].a[j] = i * j + 3.5;
+      for (j = 0; j < 2; j++)
+	b[i].a[j] = i * j + 3.5;
+    }
+  check_mm_fmsubadd_pd (b[0].x, b[1].x, b[2].x);
+  check_mm_fmsubadd_ps (a[0].x, a[1].x, a[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-fnmaddXX.c b/gcc/testsuite/gcc.target/i386/fma-fnmaddXX.c
new file mode 100644
index 0000000..f23a6c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-fnmaddXX.c
@@ -0,0 +1,101 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  union128 a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  float d[4];
+  int i;
+  e.x = _mm_fnmadd_ps (__A, __B, __C);
+  for (i = 0; i < 4; i++)
+    {
+      d[i] = -a.a[i] * b.a[i] + c.a[i];
+    }
+  if (check_union128 (e, d))
+    abort ();
+}
+
+void
+check_mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  union128d a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  double d[2];
+  int i;
+  e.x = _mm_fnmadd_pd (__A, __B, __C);
+  for (i = 0; i < 2; i++)
+    {
+      d[i] = -a.a[i] * b.a[i] + c.a[i];
+    }
+  if (check_union128d (e, d))
+    abort ();
+}
+
+void
+check_mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  union128d a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  double d[2];
+  int i;
+  e.x = _mm_fnmadd_sd (__A, __B, __C);
+  for (i = 1; i < 2; i++)
+    {
+      d[i] = a.a[i];
+    }
+  d[0] = -a.a[0] * b.a[0] + c.a[0];
+  if (check_union128d (e, d))
+    abort ();
+}
+
+void
+check_mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  union128 a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  float d[4];
+  int i;
+  e.x = _mm_fnmadd_ss (__A, __B, __C);
+  for (i = 1; i < 4; i++)
+    {
+      d[i] = a.a[i];
+    }
+  d[0] = -a.a[0] * b.a[0] + c.a[0];
+  if (check_union128 (e, d))
+    abort ();
+}
+
+static void
+fma_test (void)
+{
+  union128 a[3];
+  union128d b[3];
+  int i, j;
+  for (i = 0; i < 3; i++)
+    {
+      for (j = 0; j < 4; j++)
+	a[i].a[j] = i * j + 3.5;
+      for (j = 0; j < 2; j++)
+	b[i].a[j] = i * j + 3.5;
+    }
+  check_mm_fnmadd_pd (b[0].x, b[1].x, b[2].x);
+  check_mm_fnmadd_sd (b[0].x, b[1].x, b[2].x);
+  check_mm_fnmadd_ps (a[0].x, a[1].x, a[2].x);
+  check_mm_fnmadd_ss (a[0].x, a[1].x, a[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/fma-fnmsubXX.c b/gcc/testsuite/gcc.target/i386/fma-fnmsubXX.c
new file mode 100644
index 0000000..d17c7f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/fma-fnmsubXX.c
@@ -0,0 +1,101 @@
+/* { dg-do run } */
+/* { dg-require-effective-target fma } */
+/* { dg-options "-O2 -mfma" } */
+
+#include "fma-check.h"
+
+#include <x86intrin.h>
+#include "m256-check.h"
+
+void
+check_mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C)
+{
+  union128d a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  double d[2];
+  int i;
+  e.x = _mm_fnmsub_sd (__A, __B, __C);
+  for (i = 1; i < 2; i++)
+    {
+      d[i] = a.a[i];
+    }
+  d[0] = -a.a[0] * b.a[0] - c.a[0];
+  if (check_union128d (e, d))
+    abort ();
+}
+
+void
+check_mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C)
+{
+  union128 a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  float d[4];
+  int i;
+  e.x = _mm_fnmsub_ss (__A, __B, __C);
+  for (i = 1; i < 4; i++)
+    {
+      d[i] = a.a[i];
+    }
+  d[0] = -a.a[0] * b.a[0] - c.a[0];
+  if (check_union128 (e, d))
+    abort ();
+}
+
+void
+check_mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C)
+{
+  union128 a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  float d[4];
+  int i;
+  e.x = _mm_fnmsub_ps (__A, __B, __C);
+  for (i = 0; i < 4; i++)
+    {
+      d[i] = -a.a[i] * b.a[i] - c.a[i];
+    }
+  if (check_union128 (e, d))
+    abort ();
+}
+
+void
+check_mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C)
+{
+  union128d a, b, c, e;
+  a.x = __A;
+  b.x = __B;
+  c.x = __C;
+  double d[2];
+  int i;
+  e.x = _mm_fnmsub_pd (__A, __B, __C);
+  for (i = 0; i < 2; i++)
+    {
+      d[i] = -a.a[i] * b.a[i] - c.a[i];
+    }
+  if (check_union128d (e, d))
+    abort ();
+}
+
+static void
+fma_test (void)
+{
+  union128 a[3];
+  union128d b[3];
+  int i, j;
+  for (i = 0; i < 3; i++)
+    {
+      for (j = 0; j < 4; j++)
+	a[i].a[j] = i * j + 3.5;
+      for (j = 0; j < 2; j++)
+	b[i].a[j] = i * j + 3.5;
+    }
+  check_mm_fnmsub_pd (b[0].x, b[1].x, b[2].x);
+  check_mm_fnmsub_sd (b[0].x, b[1].x, b[2].x);
+  check_mm_fnmsub_ps (a[0].x, a[1].x, a[2].x);
+  check_mm_fnmsub_ss (a[0].x, a[1].x, a[2].x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/i386.exp b/gcc/testsuite/gcc.target/i386/i386.exp
index 6517d45..75bea9b 100644
--- a/gcc/testsuite/gcc.target/i386/i386.exp
+++ b/gcc/testsuite/gcc.target/i386/i386.exp
@@ -172,6 +172,20 @@ proc check_effective_target_fma4 { } {
     } "-O2 -mfma4" ]
 }
 
+# Return 1 if fma instructions can be compiled.
+proc check_effective_target_fma { } {
+    return [check_no_compiler_messages fma object {
+        typedef float __m128 __attribute__ ((__vector_size__ (16)));
+	typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+	__m128 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
+	{
+	    return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A,
+						     (__v4sf)__B,
+						     (__v4sf)__C);
+	}
+    } "-O2 -mfma" ]
+}
+
 # Return 1 if xop instructions can be compiled.
 proc check_effective_target_xop { } {
     return [check_no_compiler_messages xop object {
diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
index 9f3713c..66a36c6 100644
--- a/gcc/testsuite/gcc.target/i386/sse-12.c
+++ b/gcc/testsuite/gcc.target/i386/sse-12.c
@@ -3,7 +3,7 @@
    popcntintrin.h and mm_malloc.h are usable
    with -O -std=c89 -pedantic-errors.  */
 /* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma" } */
 
 #include <x86intrin.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 134905d..4bc0a2e 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -1,13 +1,13 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma" } */
 
 #include <mm_malloc.h>
 
 /* Test that the intrinsics compile with optimization.  All of them
    are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h,
    mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h,
-   tbmintrin.h, lwpintrin.h, popcntintrin.h and mm_malloc.h that
-   reference the proper builtin functions.
+   tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h and mm_malloc.h 
+   that reference the proper builtin functions.
 
    Defining away "extern" and "__inline" results in all of them being
    compiled as proper functions.  */
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index c1f10f1..6451166 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -1,12 +1,13 @@
 /* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma" } */
 
 #include <mm_malloc.h>
 
 /* Test that the intrinsics compile without optimization.  All of them are
    defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h,
-   fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h,
-   lwpintrin.h and mm_malloc.h that reference the proper builtin functions.
+   fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, 
+   lwpintrin.h, fmaintrin.h and mm_malloc.h that reference the proper 
+   builtin functions.
 
    Defining away "extern" and "__inline" results in all of them being compiled
    as proper functions.  */
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index 89ea7b3..9ccb92d 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -7,8 +7,8 @@
 /* Test that the intrinsics compile with optimization.  All of them
    are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h,
    mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h,
-   tbmintrin.h, lwpintrin.h, popcntintrin.h and mm_malloc.h that
-   reference the proper builtin functions.
+   tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h and mm_malloc.h 
+   that reference the proper builtin functions.
 
    Defining away "extern" and "__inline" results in all of them being
    compiled as proper functions.  */
@@ -255,9 +255,9 @@ test_2 (_mm_clmulepi64_si128, __m128i, __m128i, __m128i, 1)
 #endif
 #include <popcntintrin.h>
 
-/* x86intrin.h (FMA4/XOP/LWP/BMI/BMI2/TBM/LZCNT). */
+/* x86intrin.h (FMA4/XOP/LWP/BMI/BMI2/TBM/LZCNT/FMA). */
 #ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("fma4,xop,lwp,bmi,bmi2,tbm,lzcnt")
+#pragma GCC target ("fma4,xop,lwp,bmi,bmi2,tbm,lzcnt,fma")
 #endif
 #include <x86intrin.h>
 /* xopintrin.h */
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index ef2471c..462f8c9 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -6,8 +6,8 @@
 /* Test that the intrinsics compile with optimization.  All of them
    are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h,
    mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h,
-   tbmintrin.h, lwpintrin.h, popcntintrin.h and mm_malloc.h that
-   reference the proper builtin functions.
+   tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h and mm_malloc.h 
+   that reference the proper builtin functions.
 
    Defining away "extern" and "__inline" results in all of them being
    compiled as proper functions.  */
@@ -180,7 +180,7 @@
 #define __builtin_ia32_gatherdiv4si(X, Y, Z, K, M) __builtin_ia32_gatherdiv4si(X, Y, Z, K, 1)
 #define __builtin_ia32_gatherdiv4si256(X, Y, Z, K, M) __builtin_ia32_gatherdiv4si256(X, Y, Z, K, 1)
 
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma")
 #include <wmmintrin.h>
 #include <smmintrin.h>
 #include <mm3dnow.h>
author	Ilya Tocar <ilya.tocar@intel.com>	2011-08-30 14:02:53 +0000
committer	H.J. Lu <hjl@gcc.gnu.org>	2011-08-30 07:02:53 -0700
commit	2ddd46d69b09a88fb82832285b69090fa08bddc2 (patch)
tree	175e06db22b216944eb8ef409716322c278cd349 /gcc
parent	c199ccf75867fa7287570ff1ec19ef76cc9d5ea6 (diff)
download	gcc-2ddd46d69b09a88fb82832285b69090fa08bddc2.zip gcc-2ddd46d69b09a88fb82832285b69090fa08bddc2.tar.gz gcc-2ddd46d69b09a88fb82832285b69090fa08bddc2.tar.bz2