24 files changed, 1086 insertions, 154 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 989a5f2..6bcaec9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,74 @@
+2010-10-14  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* doc/md.texi (Standard Names): Add fma@var{m}4 documentation.
+
+	* doc/rtl.texi (RTX_TERNARY): Document FMA is ternary.  Add
+	SIGN_EXTRACT and ZERO_EXTRACT which were missing.
+	(Standard names): Document fma.
+
+	* doc/cpp.texi (Common Predefined Macros): Document __FP_FAST_FMA,
+	__FP_FAST_FMAF, __FP_FAST_FMAL.
+
+	* builitns.c (expand_builtin_mathfn_ternary): New function for
+	expanding ternary math functions, like fma.
+	(expand_builtin): Call it for the fma builtins.
+
+	* simplify-rtx.c (simplify_ternary_operation): Don't simplify FMA
+	ops at present.
+
+	* tree-vect-stmts.c (vectorizable_call): Allow 3 argument
+	vectorizable functions to support vectorizing fma.
+
+	* config/rs6000/rs6000.c (rs6000_builtin_vectorized_function):
+	Handle fma builtins.
+
+	* config/rs6000/vsx.md (UNSPEC_VSX_MADD): Delete.
+	(UNSPEC_VSX_MSUB): Ditto.
+	(UNSPEC_VSX_NMADD): Ditto.
+	(UNSPEC_VSX_NMSUB): Ditto.
+	(vsx_fmadd<mode>4*): Rewrite to use FMA rtl in some cases instead
+	of UNSPEC. Renumber combiner patterns.
+	(vsx_fmsub<mode>4*): Ditto.
+	(vsx_fnmadd<mode>4*): Ditto.
+	(vsx_fnmsub<mode>4*): Ditto.
+
+	* config/rs6000/altivec.md (UNSPEC_VNMSUBFP): Delete.
+	(altivec_vmaddfp): Rewrite to use FMA rtl if no fused
+	multiply/add.  Rename combiner pattern, and add TARGET_FUSED_MADD
+	test.
+	(altivec_vmaddfp_1): Ditto.
+	(altivec_vmaddfp_2): Ditto.
+	(atlivec_mulv4sf3): Ditto.
+	(altivec_vnmsubfp): Ditto.
+	(altivec_vnmsubfp_1): Ditto.
+	(altivec_vnmsubfp_2): Ditto.
+	(altivec_vnmsubfp_3): Delete.
+
+	* config/rs6000/rs6000.md (UNSPEC_FMA): Delete.
+	(fmasf4): Rewrite to always use FMA rtl.  Add combiners to
+	generate the four fused multiply/add ops.  Combine power, powerpc
+	ops.
+	(fmasf4_fpr): Ditto.
+	(fmssf4_fpr): Ditto.
+	(fnmasf4_fpr): Ditto.
+	(fnmssf4_fpr): Ditto.
+	(fmadf4): Ditto.
+	(fmadf4_fpr): Ditto.
+	(fmsdf4_fpr): Ditto.
+	(fnmadf4_fpr): Ditto.
+	(fnmsdf4_fpr): Ditto.
+
+	* optabs.h (OTI_fma): Add fma optab.
+	(fma_optab): Ditto.
+
+	* genopinit.c (optabs): Set fma optab.
+
+	* rtl.def (FMA): Add FMA rtl.
+
+	* tree.h (mode_has_fma): New function to return if MODE supports a
+	fast multiply and add instruction.
+	* builtins.c (mode_has_fma): Ditto.
+
 2010-10-15  Jan Hubicka  <jh@suse.cz>
 
 	* lto-streamer-out.c (write_symbol): Use pointer set of seen
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 6fd2d35..370d521 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -106,6 +106,7 @@ static void expand_errno_check (tree, rtx);
 static rtx expand_builtin_mathfn (tree, rtx, rtx);
 static rtx expand_builtin_mathfn_2 (tree, rtx, rtx);
 static rtx expand_builtin_mathfn_3 (tree, rtx, rtx);
+static rtx expand_builtin_mathfn_ternary (tree, rtx, rtx);
 static rtx expand_builtin_interclass_mathfn (tree, rtx);
 static rtx expand_builtin_sincos (tree);
 static rtx expand_builtin_cexpi (tree, rtx);
@@ -2185,6 +2186,79 @@ expand_builtin_mathfn_2 (tree exp, rtx target, rtx subtarget)
   return target;
 }
 
+/* Expand a call to the builtin trinary math functions (fma).
+   Return NULL_RTX if a normal call should be emitted rather than expanding the
+   function in-line.  EXP is the expression that is a call to the builtin
+   function; if convenient, the result should be placed in TARGET.
+   SUBTARGET may be used as the target for computing one of EXP's
+   operands.  */
+
+static rtx
+expand_builtin_mathfn_ternary (tree exp, rtx target, rtx subtarget)
+{
+  optab builtin_optab;
+  rtx op0, op1, op2, insns;
+  tree fndecl = get_callee_fndecl (exp);
+  tree arg0, arg1, arg2;
+  enum machine_mode mode;
+
+  if (!validate_arglist (exp, REAL_TYPE, REAL_TYPE, REAL_TYPE, VOID_TYPE))
+    return NULL_RTX;
+
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  arg1 = CALL_EXPR_ARG (exp, 1);
+  arg2 = CALL_EXPR_ARG (exp, 2);
+
+  switch (DECL_FUNCTION_CODE (fndecl))
+    {
+    CASE_FLT_FN (BUILT_IN_FMA):
+      builtin_optab = fma_optab; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Make a suitable register to place result in.  */
+  mode = TYPE_MODE (TREE_TYPE (exp));
+
+  /* Before working hard, check whether the instruction is available.  */
+  if (optab_handler (builtin_optab, mode) == CODE_FOR_nothing)
+    return NULL_RTX;
+
+  target = gen_reg_rtx (mode);
+
+  /* Always stabilize the argument list.  */
+  CALL_EXPR_ARG (exp, 0) = arg0 = builtin_save_expr (arg0);
+  CALL_EXPR_ARG (exp, 1) = arg1 = builtin_save_expr (arg1);
+  CALL_EXPR_ARG (exp, 2) = arg2 = builtin_save_expr (arg2);
+
+  op0 = expand_expr (arg0, subtarget, VOIDmode, EXPAND_NORMAL);
+  op1 = expand_normal (arg1);
+  op2 = expand_normal (arg2);
+
+  start_sequence ();
+
+  /* Compute into TARGET.
+     Set TARGET to wherever the result comes back.  */
+  target = expand_ternary_op (mode, builtin_optab, op0, op1, op2,
+			      target, 0);
+
+  /* If we were unable to expand via the builtin, stop the sequence
+     (without outputting the insns) and call to the library function
+     with the stabilized argument list.  */
+  if (target == 0)
+    {
+      end_sequence ();
+      return expand_call (exp, target, target == const0_rtx);
+    }
+
+  /* Output the entire sequence.  */
+  insns = get_insns ();
+  end_sequence ();
+  emit_insn (insns);
+
+  return target;
+}
+
 /* Expand a call to the builtin sin and cos math functions.
    Return NULL_RTX if a normal call should be emitted rather than expanding the
    function in-line.  EXP is the expression that is a call to the builtin
@@ -5828,6 +5902,12 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
 	return target;
       break;
 
+    CASE_FLT_FN (BUILT_IN_FMA):
+      target = expand_builtin_mathfn_ternary (exp, target, subtarget);
+      if (target)
+	return target;
+      break;
+
     CASE_FLT_FN (BUILT_IN_ILOGB):
       if (! flag_unsafe_math_optimizations)
 	break;
@@ -13830,3 +13910,10 @@ is_inexpensive_builtin (tree decl)
   return false;
 }
 
+/* Return true if MODE provides a fast multiply/add (FMA) builtin function.  */
+
+bool
+mode_has_fma (enum machine_mode mode)
+{
+  return optab_handler (fma_optab, mode) != CODE_FOR_nothing;
+}
diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog
index 5ba4659..29fc3a1 100644
--- a/gcc/c-family/ChangeLog
+++ b/gcc/c-family/ChangeLog
@@ -1,8 +1,15 @@
+2010-10-14  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* c-cppbuiltin.c (builtin_define_float_constants): Emit
+	__FP_FAST_FMA, __FP_FAST_FMAF, and __FP_FAST_FMAL if the machine
+	has the appropriate fma builtins.
+	(c_cpp_builtins): Adjust call to builtin_define_float_constants.
+
 2010-10-14  Iain Sandoe  <iains@gcc.gnu.org>
 
-	merge from FSF apple 'trunk' branch. 
+	merge from FSF apple 'trunk' branch.
 	2006 Fariborz Jahanian <fjahanian@apple.com>
-	
+
 	Radars 4436866, 4505126, 4506903, 4517826
 	* c-common.c (c_common_resword): Define @property and its attributes.
 	* c-common.h: Define property attribute enum entries.
@@ -14,17 +21,17 @@
 	(objc_add_property_variable): Likewise.
 	(objc_build_getter_call): Likewise.
 	(objc_build_setter_call) Likewise.
-	
+
 2010-10-13  Iain Sandoe  <iains@gcc.gnu.org>
 
-	merge from FSF apple 'trunk' branch. 
+	merge from FSF apple 'trunk' branch.
 	2006-04-26 Fariborz Jahanian <fjahanian@apple.com>
 
 	Radar 3803157 (method attributes)
 	* c-common.c (handle_deprecated_attribute): Recognize
 	objc methods as valid declarations.
 	* c-common.h: Declare objc_method_decl ().
-	* stub-objc.c (objc_method_decl): New stub. 
+	* stub-objc.c (objc_method_decl): New stub.
 
 2010-10-08  Joseph Myers  <joseph@codesourcery.com>
 
diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
index f946dc2..0ad77c7 100644
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -62,6 +62,7 @@ static void builtin_define_type_sizeof (const char *, tree);
 static void builtin_define_float_constants (const char *,
 					    const char *,
 					    const char *,
+					    const char *,
 					    tree);
 
 /* Define NAME with value TYPE size_unit.  */
@@ -78,6 +79,7 @@ static void
 builtin_define_float_constants (const char *name_prefix,
 		                const char *fp_suffix,
 				const char *fp_cast,
+				const char *fma_suffix,
 				tree type)
 {
   /* Used to convert radix-based values to base 10 values in several cases.
@@ -260,6 +262,13 @@ builtin_define_float_constants (const char *name_prefix,
      NaN has quiet NaNs.  */
   sprintf (name, "__%s_HAS_QUIET_NAN__", name_prefix);
   builtin_define_with_int_value (name, MODE_HAS_NANS (TYPE_MODE (type)));
+
+  /* Note whether we have fast FMA.  */
+  if (mode_has_fma (TYPE_MODE (type)))
+    {
+      sprintf (name, "__FP_FAST_FMA%s", fma_suffix);
+      builtin_define_with_int_value (name, 1);
+    }
 }
 
 /* Define __DECx__ constants for TYPE using NAME_PREFIX and SUFFIX. */
@@ -607,13 +616,15 @@ c_cpp_builtins (cpp_reader *pfile)
   builtin_define_with_int_value ("__DEC_EVAL_METHOD__",
                                  TARGET_DEC_EVAL_METHOD);
 
-  builtin_define_float_constants ("FLT", "F", "%s", float_type_node);
+  builtin_define_float_constants ("FLT", "F", "%s", "F", float_type_node);
   /* Cast the double precision constants.  This is needed when single
      precision constants are specified or when pragma FLOAT_CONST_DECIMAL64
      is used.  The correct result is computed by the compiler when using
      macros that include a cast.  */
-  builtin_define_float_constants ("DBL", "L", "((double)%s)", double_type_node);
-  builtin_define_float_constants ("LDBL", "L", "%s", long_double_type_node);
+  builtin_define_float_constants ("DBL", "L", "((double)%s)", "",
+				  double_type_node);
+  builtin_define_float_constants ("LDBL", "L", "%s", "L",
+				  long_double_type_node);
 
   /* For decfloat.h.  */
   builtin_define_decimal_float_constants ("DEC32", "DF", dfloat32_type_node);
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 7bf3c66..9f1b3fe 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -143,7 +143,6 @@
    (UNSPEC_VUPKLS_V4SF  325)
    (UNSPEC_VUPKHU_V4SF  326)
    (UNSPEC_VUPKLU_V4SF  327)
-   (UNSPEC_VNMSUBFP	328)
 ])
 
 (define_constants
@@ -513,12 +512,39 @@
   "vsel %0,%3,%2,%1"
   [(set_attr "type" "vecperm")])
 
-;; Fused multiply add
-(define_insn "altivec_vmaddfp"
+;; Fused multiply add.  By default expand the FMA into (plus (mult)) to help
+;; loop unrolling.  Don't do negate multiply ops, because of complications with
+;; honoring signed zero and fused-madd.
+
+(define_expand "altivec_vmaddfp"
+  [(set (match_operand:V4SF 0 "register_operand" "")
+	(plus:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "")
+			      (match_operand:V4SF 2 "register_operand" ""))
+	  	   (match_operand:V4SF 3 "register_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
+{
+  if (!TARGET_FUSED_MADD)
+    {
+      emit_insn (gen_altivec_vmaddfp_2 (operands[0], operands[1], operands[2],
+					operands[3]));
+      DONE;
+    }
+})
+
+(define_insn "*altivec_vmaddfp_1"
   [(set (match_operand:V4SF 0 "register_operand" "=v")
 	(plus:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "v")
 			      (match_operand:V4SF 2 "register_operand" "v"))
 	  	   (match_operand:V4SF 3 "register_operand" "v")))]
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode) && TARGET_FUSED_MADD"
+  "vmaddfp %0,%1,%2,%3"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "altivec_vmaddfp_2"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+	(fma:V4SF (match_operand:V4SF 1 "register_operand" "v")
+		  (match_operand:V4SF 2 "register_operand" "v")
+		  (match_operand:V4SF 3 "register_operand" "v")))]
   "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
   "vmaddfp %0,%1,%2,%3"
   [(set_attr "type" "vecfloat")])
@@ -529,7 +555,7 @@
   [(use (match_operand:V4SF 0 "register_operand" ""))
    (use (match_operand:V4SF 1 "register_operand" ""))
    (use (match_operand:V4SF 2 "register_operand" ""))]
-  "VECTOR_UNIT_ALTIVEC_P (V4SFmode) && TARGET_FUSED_MADD"
+  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
   "
 {
   rtx neg0;
@@ -627,34 +653,18 @@
 }")
 
 ;; Fused multiply subtract 
-(define_expand "altivec_vnmsubfp"
-  [(match_operand:V4SF 0 "register_operand" "")
-   (match_operand:V4SF 1 "register_operand" "")
-   (match_operand:V4SF 2 "register_operand" "")
-   (match_operand:V4SF 3 "register_operand" "")]
+(define_insn "altivec_vnmsubfp"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+	(neg:V4SF
+	 (fma:V4SF (match_operand:V4SF 1 "register_operand" "v")
+		   (match_operand:V4SF 2 "register_operand" "v")
+		   (neg:V4SF
+		    (match_operand:V4SF 3 "register_operand" "v")))))]
   "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
-{
-  if (TARGET_FUSED_MADD && HONOR_SIGNED_ZEROS (SFmode))
-    {
-       emit_insn (gen_altivec_vnmsubfp_1 (operands[0], operands[1],
-					  operands[2], operands[3]));
-       DONE;
-    }
-  else if (TARGET_FUSED_MADD && !HONOR_SIGNED_ZEROS (DFmode))
-    {
-       emit_insn (gen_altivec_vnmsubfp_2 (operands[0], operands[1],
-					  operands[2], operands[3]));
-       DONE;
-    }
-  else
-    {
-       emit_insn (gen_altivec_vnmsubfp_3 (operands[0], operands[1],
-					  operands[2], operands[3]));
-       DONE;
-    }
-})
+  "vnmsubfp %0,%1,%2,%3"
+  [(set_attr "type" "vecfloat")])
 
-(define_insn "altivec_vnmsubfp_1"
+(define_insn "*altivec_vnmsubfp_1"
   [(set (match_operand:V4SF 0 "register_operand" "=v")
 	(neg:V4SF
 	 (minus:V4SF
@@ -667,7 +677,7 @@
   "vnmsubfp %0,%1,%2,%3"
   [(set_attr "type" "vecfloat")])
 
-(define_insn "altivec_vnmsubfp_2"
+(define_insn "*altivec_vnmsubfp_2"
   [(set (match_operand:V4SF 0 "register_operand" "=v")
 	(minus:V4SF
 	 (match_operand:V4SF 3 "register_operand" "v")
@@ -679,16 +689,6 @@
   "vnmsubfp %0,%1,%2,%3"
   [(set_attr "type" "vecfloat")])
 
-(define_insn "altivec_vnmsubfp_3"
-  [(set (match_operand:V4SF 0 "register_operand" "=v")
-	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")
-		       (match_operand:V4SF 2 "register_operand" "v")
-		       (match_operand:V4SF 3 "register_operand" "v")]
-		      UNSPEC_VNMSUBFP))]
-  "VECTOR_UNIT_ALTIVEC_P (V4SFmode)"
-  "vnmsubfp %0,%1,%2,%3"
-  [(set_attr "type" "vecfloat")])
-
 (define_insn "altivec_vmsumu<VI_char>m"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v")
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 3afedd2..f977301 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -3938,6 +3938,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
 	  if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
 	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
 	  break;
+	case BUILT_IN_FMA:
+	  if (VECTOR_UNIT_VSX_P (V2DFmode)
+	      && out_mode == DFmode && out_n == 2
+	      && in_mode == DFmode && in_n == 2)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
+	  break;
+	case BUILT_IN_FMAF:
+	  if (VECTOR_UNIT_VSX_P (V4SFmode)
+	      && out_mode == SFmode && out_n == 4
+	      && in_mode == SFmode && in_n == 4)
+	    return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
+	  else if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	      && out_mode == SFmode && out_n == 4
+	      && in_mode == SFmode && in_n == 4)
+	    return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
+	  break;
 	case BUILT_IN_TRUNC:
 	  if (VECTOR_UNIT_VSX_P (V2DFmode)
 	      && out_mode == DFmode && out_n == 2
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 9fa2ff1..f1e63cc 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -5844,6 +5844,78 @@
   "fres %0,%1"
   [(set_attr "type" "fp")])
 
+; builtin fmaf support
+; If the user explicitly uses the fma builtin, don't convert this to
+; (plus (mult op1 op2) op3)
+(define_expand "fmasf4"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "")
+	(fma:SF (match_operand:SF 1 "gpc_reg_operand" "")
+		(match_operand:SF 2 "gpc_reg_operand" "")
+		(match_operand:SF 3 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "")
+
+(define_insn "fmasf4_fpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
+		(match_operand:SF 2 "gpc_reg_operand" "f")
+		(match_operand:SF 3 "gpc_reg_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "*
+{
+  return ((TARGET_POWERPC)
+	  ? \"fmadds %0,%1,%2,%3\"
+	  : \"{fma|fmadd} %0,%1,%2,%3\");
+}"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*fmssf4_fpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
+		(match_operand:SF 2 "gpc_reg_operand" "f")
+		(neg:SF (match_operand:SF 3 "gpc_reg_operand" "f"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "*
+{
+  return ((TARGET_POWERPC)
+	  ? \"fmsubs %0,%1,%2,%3\"
+	  : \"{fms|fmsub} %0,%1,%2,%3\");
+}"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*fnmasf4_fpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
+			(match_operand:SF 2 "gpc_reg_operand" "f")
+			(match_operand:SF 3 "gpc_reg_operand" "f"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "*
+{
+  return ((TARGET_POWERPC)
+	  ? \"fnmadds %0,%1,%2,%3\"
+	  : \"{fnma|fnmadd} %0,%1,%2,%3\");
+}"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*fnmssf4_fpr"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
+			(match_operand:SF 2 "gpc_reg_operand" "f")
+			(neg:SF (match_operand:SF 3 "gpc_reg_operand" "f")))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT"
+  "*
+{
+  return ((TARGET_POWERPC)
+	  ? \"fnmsubs %0,%1,%2,%3\"
+	  : \"{fnms|fnmsub} %0,%1,%2,%3\");
+}"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+; Fused multiply/add ops created by the combiner
 (define_insn "*fmaddsf4_powerpc"
   [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
 	(plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
@@ -5854,7 +5926,7 @@
   "fmadds %0,%1,%2,%3"
   [(set_attr "type" "fp")
    (set_attr "fp_type" "fp_maddsub_s")])
-
+ 
 (define_insn "*fmaddsf4_power"
   [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
 	(plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
@@ -6312,6 +6384,62 @@
   "frsqrte %0,%1"
   [(set_attr "type" "fp")])
 
+; builtin fma support
+; If the user explicitly uses the fma builtin, don't convert this to
+; (plus (mult op1 op2) op3)
+(define_expand "fmadf4"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(fma:DF (match_operand:DF 1 "gpc_reg_operand" "")
+		(match_operand:DF 2 "gpc_reg_operand" "")
+		(match_operand:DF 3 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "")
+
+(define_insn "fmadf4_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+	(fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
+		(match_operand:DF 2 "gpc_reg_operand" "f")
+		(match_operand:DF 3 "gpc_reg_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && VECTOR_UNIT_NONE_P (DFmode)"
+  "{fma|fmadd} %0,%1,%2,%3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*fmsdf4_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+	(fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
+		(match_operand:DF 2 "gpc_reg_operand" "f")
+		(neg:DF (match_operand:DF 3 "gpc_reg_operand" "f"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && VECTOR_UNIT_NONE_P (DFmode)"
+  "{fms|fmsub} %0,%1,%2,%3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*fnmadf4_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+	(neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
+			(match_operand:DF 2 "gpc_reg_operand" "f")
+			(match_operand:DF 3 "gpc_reg_operand" "f"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && VECTOR_UNIT_NONE_P (DFmode)"
+  "{fnma|fnmadd} %0,%1,%2,%3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*fnmsdf4_fpr"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
+	(neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
+			(match_operand:DF 2 "gpc_reg_operand" "f")
+			(neg:DF (match_operand:DF 3 "gpc_reg_operand" "f")))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && VECTOR_UNIT_NONE_P (DFmode)"
+  "{fnms|fnmsub} %0,%1,%2,%3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+; Fused multiply/add ops created by the combiner
 (define_insn "*fmadddf4_fpr"
   [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
 	(plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%d")
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 4b395e3..a861cc0 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -194,11 +194,7 @@
    (UNSPEC_VSX_CVUXDSP		507)
    (UNSPEC_VSX_CVSPSXDS		508)
    (UNSPEC_VSX_CVSPUXDS		509)
-   (UNSPEC_VSX_MADD		510)
-   (UNSPEC_VSX_MSUB		511)
-   (UNSPEC_VSX_NMADD		512)
-   (UNSPEC_VSX_NMSUB		513)
-   ;; 514 deleted
+   ;; 510-514 deleted
    (UNSPEC_VSX_TDIV		515)
    (UNSPEC_VSX_TSQRT		516)
    (UNSPEC_VSX_XXPERMDI		517)
@@ -499,19 +495,22 @@
 ;; does not check -mfused-madd to allow users to use these ops when they know
 ;; they want the fused multiply/add.
 
+;; Fused multiply add.  By default expand the FMA into (plus (mult)) to help
+;; loop unrolling.  Don't do negate multiply ops, because of complications with
+;; honoring signed zero and fused-madd.
+
 (define_expand "vsx_fmadd<mode>4"
   [(set (match_operand:VSX_B 0 "vsx_register_operand" "")
 	(plus:VSX_B
-	 (mult:VSX_B
-	  (match_operand:VSX_B 1 "vsx_register_operand" "")
-	  (match_operand:VSX_B 2 "vsx_register_operand" ""))
+	 (mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "")
+		     (match_operand:VSX_B 2 "vsx_register_operand" ""))
 	 (match_operand:VSX_B 3 "vsx_register_operand" "")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 {
   if (!TARGET_FUSED_MADD)
     {
-      emit_insn (gen_vsx_fmadd<mode>4_2 (operands[0], operands[1], operands[2],
-					 operands[3]));
+      emit_insn (gen_vsx_fmadd<mode>4_2 (operands[0], operands[1],
+					 operands[2], operands[3]));
       DONE;
     }
 })
@@ -534,10 +533,9 @@
 
 (define_insn "vsx_fmadd<mode>4_2"
   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
-	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
-		       (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
-		       (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")]
-		      UNSPEC_VSX_MADD))]
+	(fma:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
+		   (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+		   (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
   "@
    x<VSv>madda<VSs> %x0,%x1,%x2
@@ -550,16 +548,15 @@
 (define_expand "vsx_fmsub<mode>4"
   [(set (match_operand:VSX_B 0 "vsx_register_operand" "")
 	(minus:VSX_B
-	 (mult:VSX_B
-	  (match_operand:VSX_B 1 "vsx_register_operand" "")
-	  (match_operand:VSX_B 2 "vsx_register_operand" ""))
+	 (mult:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "")
+		     (match_operand:VSX_B 2 "vsx_register_operand" ""))
 	 (match_operand:VSX_B 3 "vsx_register_operand" "")))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
 {
   if (!TARGET_FUSED_MADD)
     {
-      emit_insn (gen_vsx_fmsub<mode>4_2 (operands[0], operands[1], operands[2],
-					 operands[3]));
+      emit_insn (gen_vsx_fmsub<mode>4_2 (operands[0], operands[1],
+					 operands[2], operands[3]));
       DONE;
     }
 })
@@ -582,10 +579,10 @@
 
 (define_insn "vsx_fmsub<mode>4_2"
   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
-	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
-		       (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
-		       (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")]
-		      UNSPEC_VSX_MSUB))]
+	(fma:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
+		   (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+		   (neg:VSX_B
+		    (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
   "@
    x<VSv>msuba<VSs> %x0,%x1,%x2
@@ -595,32 +592,21 @@
   [(set_attr "type" "<VStype_mul>")
    (set_attr "fp_type" "<VSfptype_mul>")])
 
-(define_expand "vsx_fnmadd<mode>4"
-  [(match_operand:VSX_B 0 "vsx_register_operand" "")
-   (match_operand:VSX_B 1 "vsx_register_operand" "")
-   (match_operand:VSX_B 2 "vsx_register_operand" "")
-   (match_operand:VSX_B 3 "vsx_register_operand" "")]
+(define_insn "vsx_fnmadd<mode>4"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+	(neg:VSX_B
+	 (fma:VSX_B
+	  (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa")
+	  (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+	  (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
-{
-  if (TARGET_FUSED_MADD && HONOR_SIGNED_ZEROS (DFmode))
-    {
-       emit_insn (gen_vsx_fnmadd<mode>4_1 (operands[0], operands[1],
-					   operands[2], operands[3]));
-       DONE;
-    }
-  else if (TARGET_FUSED_MADD && !HONOR_SIGNED_ZEROS (DFmode))
-    {
-       emit_insn (gen_vsx_fnmadd<mode>4_2 (operands[0], operands[1],
-					   operands[2], operands[3]));
-       DONE;
-    }
-  else
-    {
-       emit_insn (gen_vsx_fnmadd<mode>4_3 (operands[0], operands[1],
-					   operands[2], operands[3]));
-       DONE;
-    }
-})
+  "@
+   x<VSv>nmadda<VSs> %x0,%x1,%x2
+   x<VSv>nmaddm<VSs> %x0,%x1,%x3
+   x<VSv>nmadda<VSs> %x0,%x1,%x2
+   x<VSv>nmaddm<VSs> %x0,%x1,%x3"
+  [(set_attr "type" "<VStype_mul>")
+   (set_attr "fp_type" "<VSfptype_mul>")])
 
 (define_insn "vsx_fnmadd<mode>4_1"
   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
@@ -658,48 +644,22 @@
   [(set_attr "type" "<VStype_mul>")
    (set_attr "fp_type" "<VSfptype_mul>")])
 
-(define_insn "vsx_fnmadd<mode>4_3"
+(define_insn "vsx_fnmsub<mode>4"
   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
-	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa")
-		       (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
-		       (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")]
-		      UNSPEC_VSX_NMADD))]
+	(neg:VSX_B
+	 (fma:VSX_B (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
+		    (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+		    (neg:VSX_B
+		     (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))))]
   "VECTOR_UNIT_VSX_P (<MODE>mode)"
   "@
-   x<VSv>nmadda<VSs> %x0,%x1,%x2
-   x<VSv>nmaddm<VSs> %x0,%x1,%x3
-   x<VSv>nmadda<VSs> %x0,%x1,%x2
-   x<VSv>nmaddm<VSs> %x0,%x1,%x3"
+   x<VSv>nmsuba<VSs> %x0,%x1,%x2
+   x<VSv>nmsubm<VSs> %x0,%x1,%x3
+   x<VSv>nmsuba<VSs> %x0,%x1,%x2
+   x<VSv>nmsubm<VSs> %x0,%x1,%x3"
   [(set_attr "type" "<VStype_mul>")
    (set_attr "fp_type" "<VSfptype_mul>")])
 
-(define_expand "vsx_fnmsub<mode>4"
-  [(match_operand:VSX_B 0 "vsx_register_operand" "")
-   (match_operand:VSX_B 1 "vsx_register_operand" "")
-   (match_operand:VSX_B 2 "vsx_register_operand" "")
-   (match_operand:VSX_B 3 "vsx_register_operand" "")]
-  "VECTOR_UNIT_VSX_P (<MODE>mode)"
-{
-  if (TARGET_FUSED_MADD && HONOR_SIGNED_ZEROS (DFmode))
-    {
-       emit_insn (gen_vsx_fnmsub<mode>4_1 (operands[0], operands[1],
-					   operands[2], operands[3]));
-       DONE;
-    }
-  else if (TARGET_FUSED_MADD && !HONOR_SIGNED_ZEROS (DFmode))
-    {
-       emit_insn (gen_vsx_fnmsub<mode>4_2 (operands[0], operands[1],
-					   operands[2], operands[3]));
-       DONE;
-    }
-  else
-    {
-       emit_insn (gen_vsx_fnmsub<mode>4_3 (operands[0], operands[1],
-					   operands[2], operands[3]));
-       DONE;
-    }
-})
-
 (define_insn "vsx_fnmsub<mode>4_1"
   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
 	(neg:VSX_B
@@ -735,21 +695,6 @@
   [(set_attr "type" "<VStype_mul>")
    (set_attr "fp_type" "<VSfptype_mul>")])
 
-(define_insn "vsx_fnmsub<mode>4_3"
-  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
-	(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa")
-		       (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
-		       (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")]
-		      UNSPEC_VSX_NMSUB))]
-  "VECTOR_UNIT_VSX_P (<MODE>mode)"
-  "@
-   x<VSv>nmsuba<VSs> %x0,%x1,%x2
-   x<VSv>nmsubm<VSs> %x0,%x1,%x3
-   x<VSv>nmsuba<VSs> %x0,%x1,%x2
-   x<VSv>nmsubm<VSs> %x0,%x1,%x3"
-  [(set_attr "type" "<VStype_mul>")
-   (set_attr "fp_type" "<VSfptype_mul>")])
-
 ;; Vector conditional expressions (no scalar version for these instructions)
 (define_insn "vsx_eq<mode>"
   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
diff --git a/gcc/doc/cpp.texi b/gcc/doc/cpp.texi
index 7276b61..a580e61 100644
--- a/gcc/doc/cpp.texi
+++ b/gcc/doc/cpp.texi
@@ -2345,6 +2345,15 @@ and swap operations on operands 1, 2, 4, 8 or 16 bytes in length, respectively.
 This macro is defined when the compiler is emitting Dwarf2 CFI directives
 to the assembler.  When this is defined, it is possible to emit those same
 directives in inline assembly.
+
+@item __FP_FAST_FMA
+@itemx __FP_FAST_FMAF
+@itemx __FP_FAST_FMAL
+These macros are defined with value 1 if the backend supports the
+@code{fma}, @code{fmaf}, and @code{fmal} builtin functions, so that
+the include file @file{math.h} can define the macros
+@code{FP_FAST_FMA}, @code{FP_FAST_FMAF}, and @code{FP_FAST_FMAL}
+for compatibility with the 1999 C standard.
 @end table
 
 @node System-specific Predefined Macros
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 1bbdcd0..6de4f36 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3948,6 +3948,16 @@ means of constraints requiring operands 1 and 0 to be the same location.
 @itemx @samp{and@var{m}3}, @samp{ior@var{m}3}, @samp{xor@var{m}3}
 Similar, for other arithmetic operations.
 
+@cindex @code{fma@var{m}4} instruction pattern
+@item @samp{fma@var{m}4}
+Multiply operand 2 and operand 1, then add operand 3, storing the
+result in operand 0.  All operands must have mode @var{m}.  This
+pattern is used to implement the @code{fma}, @code{fmaf}, and
+@code{fmal} builtin functions from the ISO C99 standard.  The
+@code{fma} operation may produce different results than doing the
+multiply followed by the add if the machine does not perform a
+rounding step between the operations.
+
 @cindex @code{min@var{m}3} instruction pattern
 @cindex @code{max@var{m}3} instruction pattern
 @item @samp{smin@var{m}3}, @samp{smax@var{m}3}
diff --git a/gcc/doc/rtl.texi b/gcc/doc/rtl.texi
index 4c15e8f..6fae670 100644
--- a/gcc/doc/rtl.texi
+++ b/gcc/doc/rtl.texi
@@ -182,7 +182,8 @@ and are lvalues (so they can be used for insertion as well).
 
 @item RTX_TERNARY
 An RTX code for other three input operations.  Currently only
-@code{IF_THEN_ELSE} and @code{VEC_MERGE}.
+@code{IF_THEN_ELSE},  @code{VEC_MERGE}, @code{SIGN_EXTRACT},
+@code{ZERO_EXTRACT}, and @code{FMA}.
 
 @item RTX_INSN
 An RTX code for an entire instruction:  @code{INSN}, @code{JUMP_INSN}, and
@@ -2234,6 +2235,12 @@ not be the same.
 For unsigned widening multiplication, use the same idiom, but with
 @code{zero_extend} instead of @code{sign_extend}.
 
+@findex fma
+@item (fma:@var{m} @var{x} @var{y} @var{z})
+Represents the @code{fma}, @code{fmaf}, and @code{fmal} builtin
+functions that do a combined multiply of @var{x} and @var{y} and then
+adding to@var{z} without doing an intermediate rounding step.
+
 @findex div
 @findex ss_div
 @cindex division
diff --git a/gcc/genopinit.c b/gcc/genopinit.c
index 6bc3c35..6e0a714 100644
--- a/gcc/genopinit.c
+++ b/gcc/genopinit.c
@@ -159,6 +159,7 @@ static const char * const optabs[] =
   "set_optab_handler (sqrt_optab, $A, CODE_FOR_$(sqrt$a2$))",
   "set_optab_handler (floor_optab, $A, CODE_FOR_$(floor$a2$))",
   "set_convert_optab_handler (lfloor_optab, $B, $A, CODE_FOR_$(lfloor$F$a$I$b2$))",
+  "set_optab_handler (fma_optab, $A, CODE_FOR_$(fma$a4$))",
   "set_optab_handler (ceil_optab, $A, CODE_FOR_$(ceil$a2$))",
   "set_convert_optab_handler (lceil_optab, $B, $A, CODE_FOR_$(lceil$F$a$I$b2$))",
   "set_optab_handler (round_optab, $A, CODE_FOR_$(round$a2$))",
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 547339b..8b9c9a7 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -190,6 +190,8 @@ enum optab_index
   OTI_pow,
   /* Arc tangent of y/x */
   OTI_atan2,
+  /* Floating multiply/add */
+  OTI_fma,
 
   /* Move instruction.  */
   OTI_mov,
@@ -432,6 +434,7 @@ enum optab_index
 #define umax_optab (&optab_table[OTI_umax])
 #define pow_optab (&optab_table[OTI_pow])
 #define atan2_optab (&optab_table[OTI_atan2])
+#define fma_optab (&optab_table[OTI_fma])
 
 #define mov_optab (&optab_table[OTI_mov])
 #define movstrict_optab (&optab_table[OTI_movstrict])
diff --git a/gcc/rtl.def b/gcc/rtl.def
index 282cca6..6e2aa8b 100644
--- a/gcc/rtl.def
+++ b/gcc/rtl.def
@@ -706,6 +706,9 @@ DEF_RTL_EXPR(SS_TRUNCATE, "ss_truncate", "e", RTX_UNARY)
 /* Unsigned saturating truncate.  */
 DEF_RTL_EXPR(US_TRUNCATE, "us_truncate", "e", RTX_UNARY)
 
+/* Floating point multiply/add combined instruction.  */
+DEF_RTL_EXPR(FMA, "fma", "eee", RTX_TERNARY)
+
 /* Information about the variable and its location.  */
 /* Changed 'te' to 'tei'; the 'i' field is for recording
    initialization status of variables.  */
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 97ff266..f700958 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -4712,6 +4712,12 @@ simplify_ternary_operation (enum rtx_code code, enum machine_mode mode,
 
   switch (code)
     {
+      /* At present, don't simplify fused multiply and add ops, because we need
+	 to make sure there are no intermediate rounding steps used, and that
+	 we get the right sign if negative 0 would be returned.  */
+    case FMA:
+      return NULL_RTX;
+
     case SIGN_EXTRACT:
     case ZERO_EXTRACT:
       if (CONST_INT_P (op0)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 2804670..f1469a2 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,13 @@
+2010-10-14  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* gcc.target/powerpc/ppc-fma-1.c: New tests for powerpc FMA
+	builtin combiner patterns.
+	* gcc.target/powerpc/ppc-fma-2.c: Ditto.
+	* gcc.target/powerpc/ppc-fma-3.c: Ditto.
+	* gcc.target/powerpc/ppc-fma-4.c: Ditto.
+	* gcc.target/powerpc/ppc-fma-5.c: Ditto.
+	* gcc.target/powerpc/ppc-fma-6.c: Ditto.
+
 2010-10-15  Richard Guenther  <rguenther@suse.de>
 
 	* g++.dg/lto/20101015-1_0.C: New testcase.
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c b/gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c
new file mode 100644
index 0000000..674115a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c
@@ -0,0 +1,183 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O3 -ftree-vectorize -mcpu=power7 -ffast-math" } */
+/* { dg-final { scan-assembler-times "xvmadd" 4 } } */
+/* { dg-final { scan-assembler-times "xsmadd" 2 } } */
+/* { dg-final { scan-assembler-times "fmadds" 2 } } */
+/* { dg-final { scan-assembler-times "xvmsub" 2 } } */
+/* { dg-final { scan-assembler-times "xsmsub" 1 } } */
+/* { dg-final { scan-assembler-times "fmsubs" 1 } } */
+/* { dg-final { scan-assembler-times "xvnmadd" 2 } } */
+/* { dg-final { scan-assembler-times "xsnmadd" 1 } } */
+/* { dg-final { scan-assembler-times "fnmadds" 1 } } */
+/* { dg-final { scan-assembler-times "xvnmsub" 2 } } */
+/* { dg-final { scan-assembler-times "xsnmsub" 1 } } */
+/* { dg-final { scan-assembler-times "fnmsubs" 1 } } */
+
+/* All functions should generate an appropriate (a * b) + c instruction
+   since -mfused-madd is on by default.  */
+
+double
+builtin_fma (double b, double c, double d)
+{
+  return __builtin_fma (b, c, d);			/* xsmadd{a,m}dp */
+}
+
+double
+builtin_fms (double b, double c, double d)
+{
+  return __builtin_fma (b, c, -d);			/* xsmsub{a,b}dp */
+}
+
+double
+builtin_fnma (double b, double c, double d)
+{
+  return - __builtin_fma (b, c, d);			/* xsnmadd{a,b}dp */
+}
+
+double
+builtin_fnms (double b, double c, double d)
+{
+  return - __builtin_fma (b, c, -d);			/* xsnmsub{a,b}dp */
+}
+
+float
+builtin_fmaf (float b, float c, float d)
+{
+  return __builtin_fmaf (b, c, d);			/* fmadds */
+}
+
+float
+builtin_fmsf (float b, float c, float d)
+{
+  return __builtin_fmaf (b, c, -d);			/* fmsubs */
+}
+
+float
+builtin_fnmaf (float b, float c, float d)
+{
+  return - __builtin_fmaf (b, c, d);			/* fnmadds */
+}
+
+float
+builtin_fnmsf (float b, float c, float d)
+{
+  return - __builtin_fmaf (b, c, -d);			/* fnmsubs */
+}
+
+double
+normal_fma (double b, double c, double d)
+{
+  return (b * c) + d;					/* xsmadd{a,m}dp */
+}
+
+float
+normal_fmaf (float b, float c, float d)
+{
+  return (b * c) + d;					/* fmadds */
+}
+
+#ifndef SIZE
+#define SIZE 1024
+#endif
+
+double vda[SIZE] __attribute__((__aligned__(32)));
+double vdb[SIZE] __attribute__((__aligned__(32)));
+double vdc[SIZE] __attribute__((__aligned__(32)));
+double vdd[SIZE] __attribute__((__aligned__(32)));
+
+float vfa[SIZE] __attribute__((__aligned__(32)));
+float vfb[SIZE] __attribute__((__aligned__(32)));
+float vfc[SIZE] __attribute__((__aligned__(32)));
+float vfd[SIZE] __attribute__((__aligned__(32)));
+
+void
+vector_fma (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vda[i] = __builtin_fma (vdb[i], vdc[i], vdd[i]);	/* xvmadd{a,m}dp */
+}
+
+void
+vector_fms (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vda[i] = __builtin_fma (vdb[i], vdc[i], -vdd[i]);	/* xvmsub{a,m}dp */
+}
+
+void
+vector_fnma (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vda[i] = - __builtin_fma (vdb[i], vdc[i], vdd[i]);	/* xvnmadd{a,m}dp */
+}
+
+void
+vector_fnms (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vda[i] = - __builtin_fma (vdb[i], vdc[i], -vdd[i]);	/* xvnmsub{a,m}dp */
+}
+
+void
+vector_fmaf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = __builtin_fmaf (vfb[i], vfc[i], vfd[i]);	/* xvmadd{a,m}sp */
+}
+
+void
+vector_fmsf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = __builtin_fmaf (vfb[i], vfc[i], -vfd[i]);	/* xvmsub{a,m}sp */
+}
+
+void
+vector_fnmaf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = - __builtin_fmaf (vfb[i], vfc[i], vfd[i]);	/* xvnmadd{a,m}sp */
+}
+
+void
+vector_fnmsf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = - __builtin_fmaf (vfb[i], vfc[i], -vfd[i]); /* xvnmsub{a,m}sp */
+}
+
+void
+vnormal_fma (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vda[i] = (vdb[i] * vdc[i]) + vdd[i];		/* xvmadd{a,m}dp */
+}
+
+void
+vnormal_fmaf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = (vfb[i] * vfc[i]) + vfd[i];		/* xvmadd{a,m}sp */
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c b/gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c
new file mode 100644
index 0000000..a17565b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c
@@ -0,0 +1,183 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O3 -ftree-vectorize -mcpu=power7 -ffast-math -mno-fused-madd" } */
+/* { dg-final { scan-assembler-times "xvmadd" 2 } } */
+/* { dg-final { scan-assembler-times "xsmadd" 1 } } */
+/* { dg-final { scan-assembler-times "fmadds" 1 } } */
+/* { dg-final { scan-assembler-times "xvmsub" 2 } } */
+/* { dg-final { scan-assembler-times "xsmsub" 1 } } */
+/* { dg-final { scan-assembler-times "fmsubs" 1 } } */
+/* { dg-final { scan-assembler-times "xvnmadd" 2 } } */
+/* { dg-final { scan-assembler-times "xsnmadd" 1 } } */
+/* { dg-final { scan-assembler-times "fnmadds" 1 } } */
+/* { dg-final { scan-assembler-times "xvnmsub" 2 } } */
+/* { dg-final { scan-assembler-times "xsnmsub" 1 } } */
+/* { dg-final { scan-assembler-times "fnmsubs" 1 } } */
+
+/* Only the functions calling the bulitin should generate an appropriate (a *
+   b) + c instruction.  */
+
+double
+builtin_fma (double b, double c, double d)
+{
+  return __builtin_fma (b, c, d);			/* xsmadd{a,m}dp */
+}
+
+double
+builtin_fms (double b, double c, double d)
+{
+  return __builtin_fma (b, c, -d);			/* xsmsub{a,b}dp */
+}
+
+double
+builtin_fnma (double b, double c, double d)
+{
+  return - __builtin_fma (b, c, d);			/* xsnmadd{a,b}dp */
+}
+
+double
+builtin_fnms (double b, double c, double d)
+{
+  return - __builtin_fma (b, c, -d);			/* xsnmsub{a,b}dp */
+}
+
+float
+builtin_fmaf (float b, float c, float d)
+{
+  return __builtin_fmaf (b, c, d);			/* fmadds */
+}
+
+float
+builtin_fmsf (float b, float c, float d)
+{
+  return __builtin_fmaf (b, c, -d);			/* fmsubs */
+}
+
+float
+builtin_fnmaf (float b, float c, float d)
+{
+  return - __builtin_fmaf (b, c, d);			/* fnmadds */
+}
+
+float
+builtin_fnmsf (float b, float c, float d)
+{
+  return - __builtin_fmaf (b, c, -d);			/* fnmsubs */
+}
+
+double
+normal_fma (double b, double c, double d)
+{
+  return (b * c) + d;					/* fmul/fadd */
+}
+
+float
+normal_fmaf (float b, float c, float d)
+{
+  return (b * c) + d;					/* fmuls/fadds */
+}
+
+#ifndef SIZE
+#define SIZE 1024
+#endif
+
+double vda[SIZE] __attribute__((__aligned__(32)));
+double vdb[SIZE] __attribute__((__aligned__(32)));
+double vdc[SIZE] __attribute__((__aligned__(32)));
+double vdd[SIZE] __attribute__((__aligned__(32)));
+
+float vfa[SIZE] __attribute__((__aligned__(32)));
+float vfb[SIZE] __attribute__((__aligned__(32)));
+float vfc[SIZE] __attribute__((__aligned__(32)));
+float vfd[SIZE] __attribute__((__aligned__(32)));
+
+void
+vector_fma (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vda[i] = __builtin_fma (vdb[i], vdc[i], vdd[i]);	/* xvmadd{a,m}dp */
+}
+
+void
+vector_fms (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vda[i] = __builtin_fma (vdb[i], vdc[i], -vdd[i]);	/* xvmsub{a,m}dp */
+}
+
+void
+vector_fnma (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vda[i] = - __builtin_fma (vdb[i], vdc[i], vdd[i]);	/* xvnmadd{a,m}dp */
+}
+
+void
+vector_fnms (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vda[i] = - __builtin_fma (vdb[i], vdc[i], -vdd[i]);	/* xvnmsub{a,m}dp */
+}
+
+void
+vector_fmaf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = __builtin_fmaf (vfb[i], vfc[i], vfd[i]);	/* xvmadd{a,m}sp */
+}
+
+void
+vector_fmsf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = __builtin_fmaf (vfb[i], vfc[i], -vfd[i]);	/* xvmsub{a,m}sp */
+}
+
+void
+vector_fnmaf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = - __builtin_fmaf (vfb[i], vfc[i], vfd[i]);	/* xvnmadd{a,m}sp */
+}
+
+void
+vector_fnmsf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = - __builtin_fmaf (vfb[i], vfc[i], -vfd[i]); /* xvnmsub{a,m}sp */
+}
+
+void
+vnormal_fma (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vda[i] = (vdb[i] * vdc[i]) + vdd[i];		/* xvmadd{a,m}dp */
+}
+
+void
+vnormal_fmaf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = (vfb[i] * vfc[i]) + vfd[i];		/* xvmadd{a,m}sp */
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fma-3.c b/gcc/testsuite/gcc.target/powerpc/ppc-fma-3.c
new file mode 100644
index 0000000..c83c582
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-fma-3.c
@@ -0,0 +1,103 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O3 -ftree-vectorize -mcpu=power6 -maltivec -ffast-math" } */
+/* { dg-final { scan-assembler-times "vmaddfp" 2 } } */
+/* { dg-final { scan-assembler-times "fmadd " 2 } } */
+/* { dg-final { scan-assembler-times "fmadds" 2 } } */
+/* { dg-final { scan-assembler-times "fmsub " 1 } } */
+/* { dg-final { scan-assembler-times "fmsubs" 1 } } */
+/* { dg-final { scan-assembler-times "fnmadd " 1 } } */
+/* { dg-final { scan-assembler-times "fnmadds" 1 } } */
+/* { dg-final { scan-assembler-times "fnmsub " 1 } } */
+/* { dg-final { scan-assembler-times "fnmsubs" 1 } } */
+
+/* All functions should generate an appropriate (a * b) + c instruction
+   since -mfused-madd is on by default.  */
+
+double
+builtin_fma (double b, double c, double d)
+{
+  return __builtin_fma (b, c, d);			/* fmadd */
+}
+
+double
+builtin_fms (double b, double c, double d)
+{
+  return __builtin_fma (b, c, -d);			/* fmsub */
+}
+
+double
+builtin_fnma (double b, double c, double d)
+{
+  return - __builtin_fma (b, c, d);			/* fnmadd */
+}
+
+double
+builtin_fnms (double b, double c, double d)
+{
+  return - __builtin_fma (b, c, -d);			/* fnmsub */
+}
+
+float
+builtin_fmaf (float b, float c, float d)
+{
+  return __builtin_fmaf (b, c, d);			/* fmadds */
+}
+
+float
+builtin_fmsf (float b, float c, float d)
+{
+  return __builtin_fmaf (b, c, -d);			/* fmsubs */
+}
+
+float
+builtin_fnmaf (float b, float c, float d)
+{
+  return - __builtin_fmaf (b, c, d);			/* fnmadds */
+}
+
+float
+builtin_fnmsf (float b, float c, float d)
+{
+  return - __builtin_fmaf (b, c, -d);			/* fnmsubs */
+}
+
+double
+normal_fma (double b, double c, double d)
+{
+  return (b * c) + d;					/* fmadd */
+}
+
+float
+normal_fmaf (float b, float c, float d)
+{
+  return (b * c) + d;					/* fmadds */
+}
+
+#ifndef SIZE
+#define SIZE 1024
+#endif
+
+float vfa[SIZE] __attribute__((__aligned__(32)));
+float vfb[SIZE] __attribute__((__aligned__(32)));
+float vfc[SIZE] __attribute__((__aligned__(32)));
+float vfd[SIZE] __attribute__((__aligned__(32)));
+
+void
+vector_fmaf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = __builtin_fmaf (vfb[i], vfc[i], vfd[i]);	/* vaddfp */
+}
+
+void
+vnormal_fmaf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = (vfb[i] * vfc[i]) + vfd[i];		/* vaddfp */
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fma-4.c b/gcc/testsuite/gcc.target/powerpc/ppc-fma-4.c
new file mode 100644
index 0000000..50e4317
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-fma-4.c
@@ -0,0 +1,94 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O3 -ftree-vectorize -mcpu=power6 -maltivec -ffast-math -mno-fused-madd" } */
+/* { dg-final { scan-assembler-times "vmaddfp" 1 } } */
+/* { dg-final { scan-assembler-times "fmadd " 1 } } */
+/* { dg-final { scan-assembler-times "fmadds" 1 } } */
+/* { dg-final { scan-assembler-times "fmsub " 1 } } */
+/* { dg-final { scan-assembler-times "fmsubs" 1 } } */
+/* { dg-final { scan-assembler-times "fnmadd " 1 } } */
+/* { dg-final { scan-assembler-times "fnmadds" 1 } } */
+/* { dg-final { scan-assembler-times "fnmsub " 1 } } */
+/* { dg-final { scan-assembler-times "fnmsubs" 1 } } */
+
+/* Only the functions calling the builtin should generate an appropriate
+   (a * b) + c instruction.  */
+
+double
+builtin_fma (double b, double c, double d)
+{
+  return __builtin_fma (b, c, d);			/* fmadd */
+}
+
+double
+builtin_fms (double b, double c, double d)
+{
+  return __builtin_fma (b, c, -d);			/* fmsub */
+}
+
+double
+builtin_fnma (double b, double c, double d)
+{
+  return - __builtin_fma (b, c, d);			/* fnmadd */
+}
+
+double
+builtin_fnms (double b, double c, double d)
+{
+  return - __builtin_fma (b, c, -d);			/* fnmsub */
+}
+
+float
+builtin_fmaf (float b, float c, float d)
+{
+  return __builtin_fmaf (b, c, d);			/* fmadds */
+}
+
+float
+builtin_fmsf (float b, float c, float d)
+{
+  return __builtin_fmaf (b, c, -d);			/* fmsubs */
+}
+
+float
+builtin_fnmaf (float b, float c, float d)
+{
+  return - __builtin_fmaf (b, c, d);			/* fnmadds */
+}
+
+float
+builtin_fnmsf (float b, float c, float d)
+{
+  return - __builtin_fmaf (b, c, -d);			/* fnmsubs */
+}
+
+double
+normal_fma (double b, double c, double d)
+{
+  return (b * c) + d;					/* fmul/fadd */
+}
+
+float
+normal_fmaf (float b, float c, float d)
+{
+  return (b * c) + d;					/* fmuls/fadds */
+}
+
+#ifndef SIZE
+#define SIZE 1024
+#endif
+
+float vfa[SIZE] __attribute__((__aligned__(32)));
+float vfb[SIZE] __attribute__((__aligned__(32)));
+float vfc[SIZE] __attribute__((__aligned__(32)));
+float vfd[SIZE] __attribute__((__aligned__(32)));
+
+void
+vector_fmaf (void)
+{
+  int i;
+
+  for (i = 0; i < SIZE; i++)
+    vfa[i] = __builtin_fmaf (vfb[i], vfc[i], vfd[i]);	/* vaddfp */
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fma-5.c b/gcc/testsuite/gcc.target/powerpc/ppc-fma-5.c
new file mode 100644
index 0000000..97243af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-fma-5.c
@@ -0,0 +1,26 @@
+/* { dg-do run { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-options "-O2 -mcpu=power5 -std=c99" } */
+
+#ifndef __FP_FAST_FMA
+#error "__FP_FAST_FMA should be defined"
+#endif
+
+#ifndef __FP_FAST_FMAF
+#error "__FP_FAST_FMAF should be defined"
+#endif
+
+double d_a = 2.0,  d_b = 3.0,  d_c = 4.0;
+float  f_a = 2.0f, f_b = 3.0f, f_c = 4.0f;
+
+int
+main (void)
+{
+  if (__builtin_fma (d_a, d_b, d_c) != (2.0 * 3.0) + 4.0)
+    __builtin_abort ();
+
+  if (__builtin_fmaf (f_a, f_b, f_c) != (2.0f * 3.0f) + 4.0f)
+    __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fma-6.c b/gcc/testsuite/gcc.target/powerpc/ppc-fma-6.c
new file mode 100644
index 0000000..c9132bb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/ppc-fma-6.c
@@ -0,0 +1,28 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target ilp32 } */
+/* { dg-options "-O2 -mcpu=power5 -std=c99 -msoft-float" } */
+/* { dg-final { scan-assembler-not "fmadd" } } */
+/* { dg-final { scan-assembler-not "xsfmadd" } } */
+
+/* Test whether -msoft-float turns off the macros math.h uses for
+   FP_FAST_FMA{,F,L}.  */
+#ifdef __FP_FAST_FMA
+#error "__FP_FAST_FMA should not be defined"
+#endif
+
+#ifdef __FP_FAST_FMAF
+#error "__FP_FAST_FMAF should not be defined"
+#endif
+
+double
+builtin_fma (double b, double c, double d)
+{
+  return __builtin_fma (b, c, d);			/* bl fma  */
+}
+
+float
+builtin_fmaf (float b, float c, float d)
+{
+  return __builtin_fmaf (b, c, -d);			/* bl fmaf */
+}
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 6d0c112..63af34e 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1357,10 +1357,10 @@ vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
   vectype_in = NULL_TREE;
   nargs = gimple_call_num_args (stmt);
 
-  /* Bail out if the function has more than two arguments, we
-     do not have interesting builtin functions to vectorize with
-     more than two arguments.  No arguments is also not good.  */
-  if (nargs == 0 || nargs > 2)
+  /* Bail out if the function has more than three arguments, we do not have
+     interesting builtin functions to vectorize with more than two arguments
+     except for fma.  No arguments is also not good.  */
+  if (nargs == 0 || nargs > 3)
     return false;
 
   for (i = 0; i < nargs; i++)
diff --git a/gcc/tree.h b/gcc/tree.h
index 8aedf1a..af14b8b 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -5068,6 +5068,7 @@ extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
 extern void set_builtin_user_assembler_name (tree decl, const char *asmspec);
 extern bool is_simple_builtin (tree);
 extern bool is_inexpensive_builtin (tree);
+extern bool mode_has_fma (enum machine_mode mode);
 
 /* In convert.c */
 extern tree strip_float_extensions (tree);