6 files changed, 704 insertions, 30 deletions
diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
index 9f993c4..42b7604 100644
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -1277,29 +1277,39 @@ c_cpp_builtins (cpp_reader *pfile)
 	{
 	  scalar_float_mode mode = mode_iter.require ();
 	  const char *name = GET_MODE_NAME (mode);
+	  const size_t name_len = strlen (name);
+	  char float_h_prefix[16] = "";
 	  char *macro_name
-	    = (char *) alloca (strlen (name)
-			       + sizeof ("__LIBGCC__MANT_DIG__"));
+	    = XALLOCAVEC (char, name_len + sizeof ("__LIBGCC__MANT_DIG__"));
 	  sprintf (macro_name, "__LIBGCC_%s_MANT_DIG__", name);
 	  builtin_define_with_int_value (macro_name,
 					 REAL_MODE_FORMAT (mode)->p);
 	  if (!targetm.scalar_mode_supported_p (mode)
 	      || !targetm.libgcc_floating_mode_supported_p (mode))
 	    continue;
-	  macro_name = (char *) alloca (strlen (name)
-					+ sizeof ("__LIBGCC_HAS__MODE__"));
+	  macro_name = XALLOCAVEC (char, name_len
+				   + sizeof ("__LIBGCC_HAS__MODE__"));
 	  sprintf (macro_name, "__LIBGCC_HAS_%s_MODE__", name);
 	  cpp_define (pfile, macro_name);
-	  macro_name = (char *) alloca (strlen (name)
-					+ sizeof ("__LIBGCC__FUNC_EXT__"));
+	  macro_name = XALLOCAVEC (char, name_len
+				   + sizeof ("__LIBGCC__FUNC_EXT__"));
 	  sprintf (macro_name, "__LIBGCC_%s_FUNC_EXT__", name);
 	  char suffix[20] = "";
 	  if (mode == TYPE_MODE (double_type_node))
-	    ; /* Empty suffix correct.  */
+	    {
+	      /* Empty suffix correct.  */
+	      memcpy (float_h_prefix, "DBL", 4);
+	    }
 	  else if (mode == TYPE_MODE (float_type_node))
-	    suffix[0] = 'f';
+	    {
+	      suffix[0] = 'f';
+	      memcpy (float_h_prefix, "FLT", 4);
+	    }
 	  else if (mode == TYPE_MODE (long_double_type_node))
-	    suffix[0] = 'l';
+	    {
+	      suffix[0] = 'l';
+	      memcpy (float_h_prefix, "LDBL", 5);
+	    }
 	  else
 	    {
 	      bool found_suffix = false;
@@ -1310,6 +1320,8 @@ c_cpp_builtins (cpp_reader *pfile)
 		    sprintf (suffix, "f%d%s", floatn_nx_types[i].n,
 			     floatn_nx_types[i].extended ? "x" : "");
 		    found_suffix = true;
+		    sprintf (float_h_prefix, "FLT%d%s", floatn_nx_types[i].n,
+			     floatn_nx_types[i].extended ? "X" : "");
 		    break;
 		  }
 	      gcc_assert (found_suffix);
@@ -1347,11 +1359,33 @@ c_cpp_builtins (cpp_reader *pfile)
 	    default:
 	      gcc_unreachable ();
 	    }
-	  macro_name = (char *) alloca (strlen (name)
-					+ sizeof ("__LIBGCC__EXCESS_"
-						  "PRECISION__"));
+	  macro_name = XALLOCAVEC (char, name_len
+				   + sizeof ("__LIBGCC__EXCESS_PRECISION__"));
 	  sprintf (macro_name, "__LIBGCC_%s_EXCESS_PRECISION__", name);
 	  builtin_define_with_int_value (macro_name, excess_precision);
+
+	  char val_name[64];
+
+	  macro_name = XALLOCAVEC (char, name_len
+				   + sizeof ("__LIBGCC__EPSILON__"));
+	  sprintf (macro_name, "__LIBGCC_%s_EPSILON__", name);
+	  sprintf (val_name, "__%s_EPSILON__", float_h_prefix);
+	  builtin_define_with_value (macro_name, val_name, 0);
+
+	  macro_name = XALLOCAVEC (char, name_len + sizeof ("__LIBGCC__MAX__"));
+	  sprintf (macro_name, "__LIBGCC_%s_MAX__", name);
+	  sprintf (val_name, "__%s_MAX__", float_h_prefix);
+	  builtin_define_with_value (macro_name, val_name, 0);
+
+	  macro_name = XALLOCAVEC (char, name_len + sizeof ("__LIBGCC__MIN__"));
+	  sprintf (macro_name, "__LIBGCC_%s_MIN__", name);
+	  sprintf (val_name, "__%s_MIN__", float_h_prefix);
+	  builtin_define_with_value (macro_name, val_name, 0);
+
+#ifdef HAVE_adddf3
+	  builtin_define_with_int_value ("__LIBGCC_HAVE_HWDBL__",
+					 HAVE_adddf3);
+#endif
 	}
 
       /* For libgcc crtstuff.c and libgcc2.c.  */
diff --git a/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkd.c b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkd.c
new file mode 100644
index 0000000..3ef5fad
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkd.c
@@ -0,0 +1,126 @@
+/*
+  Program to test complex divide for correct results on selected values.
+  Checking known failure points.
+*/
+
+#include <float.h>
+
+extern void abort (void);
+extern void exit (int);
+
+extern int ilogb (double);
+int match (double _Complex, double _Complex);
+
+#define SMALL DBL_MIN
+#define MAXBIT DBL_MANT_DIG
+#define ERRLIM 6
+
+/*
+  Compare c (computed value) with z (expected value).
+  Return 0 if within allowed range.  Return 1 if not.
+*/
+int match (double _Complex c, double _Complex z)
+{
+  double rz, iz, rc, ic;
+  double rerr, ierr, rmax;
+  int biterr;
+  rz = __real__ z;
+  iz = __imag__ z;
+  rc = __real__ c;
+  ic = __imag__ c;
+
+  if (__builtin_fabs (rz) > SMALL)
+    {
+      rerr = __builtin_fabs (rz - rc) / __builtin_fabs (rz);
+    }
+  else if (__builtin_fabs (rz) == 0.0)
+    {
+      rerr = __builtin_fabs (rc);
+    }
+  else
+    {
+      rerr = __builtin_fabs (rz - rc) / SMALL;
+    }
+
+  if (__builtin_fabs (iz) > SMALL)
+    {
+      ierr = __builtin_fabs (iz - ic) / __builtin_fabs (iz);
+    }
+  else if (__builtin_fabs (iz) == 0.0)
+    {
+      ierr = __builtin_fabs (ic);
+    }
+  else
+    {
+      ierr = __builtin_fabs (iz - ic) / SMALL;
+    }
+  rmax = __builtin_fmax(rerr, ierr);
+  biterr = 0;
+  if ( rmax != 0.0)      
+    {
+      biterr = ilogb (rmax) + MAXBIT + 1;
+    }
+
+  if (biterr >= ERRLIM)
+    return 0;
+  else
+    return 1;
+}
+
+
+int main (int argc, char** argv)
+{
+  double _Complex a,b,c,z;
+  double xr[4], xi[4], yr[4], yi[4], zr[4], zi[4];
+  double cr, ci;
+  int i;
+  int ok = 1;
+  xr[0] = -0x1.16e7fad79e45ep+651;
+  xi[0] = -0x1.f7f75b94c6c6ap-860;
+  yr[0] = -0x1.2f40d8ff7e55ep+245;
+  yi[0] = -0x0.0000000004ebcp-1022;
+  zr[0] = 0x1.d6e4b0e282869p+405;
+  zi[0] = -0x1.e9095e311e706p-900;
+
+  xr[1] = -0x1.21ff587f953d3p-310;
+  xi[1] = -0x1.5a526dcc59960p+837;
+  yr[1] = 0x1.b88b8b552eaadp+735;
+  yi[1] = -0x1.873e2d6544d92p-327;
+  zr[1] = 0x1.65734a88b2de0p-961;
+  zi[1] =  -0x1.927e85b8b5770p+101;
+
+  xr[2] = 0x1.4612e41aa8080p-846;
+  xi[2] = -0x0.0000000613e07p-1022;
+  yr[2] = 0x1.df9cd0d58caafp-820;
+  yi[2] = -0x1.e47051a9036dbp-584;
+  zr[2] = 0x1.9b194f3fffa32p-469;
+  zi[2] = 0x1.58a00ab740a6bp-263;
+
+  xr[3] = 0x1.cb27eece7c585p-355;
+  xi[3] = 0x0.000000223b8a8p-1022;
+  yr[3] = -0x1.74e7ed2b9189fp-22;
+  yi[3] = 0x1.3d80439e9a119p-731;
+  zr[3] = -0x1.3b35ed806ae5ap-333;
+  zi[3] = -0x0.05e01bcbfd9f6p-1022;
+
+
+  for (i = 0; i < 4; i++)
+    {
+      __real__ a = xr[i];
+      __imag__ a = xi[i];
+      __real__ b = yr[i];
+      __imag__ b = yi[i];
+      __real__ z = zr[i];
+      __imag__ z = zi[i];
+      c = a / b;
+      cr = __real__ c;
+      ci = __imag__ c;
+
+      if (!match (c,z)){
+	ok = 0;
+      }
+    }
+  if (!ok)
+    abort ();
+  exit (0);
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkf.c b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkf.c
new file mode 100644
index 0000000..adf1ed9
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkf.c
@@ -0,0 +1,125 @@
+/*
+  Program to test complex divide for correct results on selected values.
+  Checking known failure points.
+*/
+
+#include <float.h>
+
+extern void abort (void);
+extern void exit (int);
+
+extern int ilogbf (float);
+int match (float _Complex, float _Complex);
+
+#define SMALL FLT_MIN
+#define MAXBIT FLT_MANT_DIG
+#define ERRLIM 6
+
+/*
+  Compare c (computed value) with z (expected value).
+  Return 0 if within allowed range.  Return 1 if not.
+*/
+int match (float _Complex c, float _Complex z)
+{
+  float rz, iz, rc, ic;
+  float rerr, ierr, rmax;
+  int biterr;
+  rz = __real__ z;
+  iz = __imag__ z;
+  rc = __real__ c;
+  ic = __imag__ c;
+
+  if (__builtin_fabsf (rz) > SMALL)
+    {
+      rerr = __builtin_fabsf (rz - rc) / __builtin_fabsf (rz);
+    }
+  else if (__builtin_fabsf (rz) == 0.0)
+    {
+      rerr = __builtin_fabsf (rc);
+    }
+  else
+    {
+      rerr = __builtin_fabsf (rz - rc) / SMALL;
+    }
+
+  if (__builtin_fabsf (iz) > SMALL)
+    {
+      ierr = __builtin_fabsf (iz - ic) / __builtin_fabsf (iz);
+    }
+  else if (__builtin_fabsf (iz) == 0.0)
+    {
+      ierr = __builtin_fabsf (ic);
+    }
+  else
+    {
+      ierr = __builtin_fabsf (iz - ic) / SMALL;
+    }
+  rmax = __builtin_fmaxf(rerr, ierr);
+  biterr = 0;
+  if ( rmax != 0.0)      
+    {
+      biterr = ilogbf (rmax) + MAXBIT + 1;
+    }
+
+  if (biterr >= ERRLIM)
+    return 0;
+  else
+    return 1;
+}
+
+
+int main(int argc, char** argv)
+{
+  float _Complex a,b,c,z;
+  float xr[4], xi[4], yr[4], yi[4], zr[4], zi[4];
+  float cr, ci;
+  int i;
+  int ok = 1;
+  xr[0] = 0x1.0b1600p-133;
+  xi[0] = 0x1.5e1c28p+54;
+  yr[0] = -0x1.cdec8cp-119;
+  yi[0] = 0x1.1e72ccp+32;
+  zr[0] = 0x1.38e502p+22;
+  zi[0] = -0x1.f89220p-129;
+
+  xr[1] = -0x1.b1bee2p+121;
+  xi[1] = -0x1.cb403ep-59;
+  yr[1] = 0x1.480000p-144;
+  yi[1] = -0x1.c66fc4p+5;
+  zr[1] = -0x1.60b8cap-34;
+  zi[1] = -0x1.e8b02ap+115;
+
+  xr[2] = -0x1.3f6e00p-97;
+  xi[2] = -0x1.c00000p-146;
+  yr[2] = 0x1.000000p-148;
+  yi[2] = -0x1.0c4e70p-91;
+  zr[2] = 0x1.aa50d0p-55;
+  zi[2] = -0x1.30c746p-6;
+
+  xr[3] = 0x1.000000p-148;
+  xi[3] = 0x1.f4bc04p-84;
+  yr[3] = 0x1.00ad74p-20;
+  yi[3] = 0x1.2ad02ep-85;
+  zr[3] = 0x1.1102ccp-127;
+  zi[3] = 0x1.f369a4p-64;
+
+  for (i = 0; i < 4; i++)
+    {
+      __real__ a = xr[i];
+      __imag__ a = xi[i];
+      __real__ b = yr[i];
+      __imag__ b = yi[i];
+      __real__ z = zr[i];
+      __imag__ z = zi[i];
+      c = a / b;
+      cr = __real__ c;
+      ci = __imag__ c;
+
+      if (!match (c,z)){
+	ok = 0;
+      }
+    }
+  if (!ok)
+    abort ();
+  exit (0);
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkld.c b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkld.c
new file mode 100644
index 0000000..ffe9c34
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkld.c
@@ -0,0 +1,168 @@
+/*
+  Program to test complex divide for correct results on selected values.
+  Checking known failure points.
+*/
+
+#include <float.h>
+
+extern void abort (void);
+extern void exit (int);
+
+extern int ilogbl (long double);
+int match (long double _Complex,long double _Complex);
+
+#define SMALL LDBL_MIN
+#define MAXBIT LDBL_MANT_DIG
+#define ERRLIM 6
+
+/*
+  Compare c (computed value) with z (expected value).
+  Return 0 if within allowed range.  Return 1 if not.
+*/
+int match (long double _Complex c,long double _Complex z)
+{
+  long double rz, iz, rc, ic;
+  long double rerr, ierr, rmax;
+  int biterr;
+  rz = __real__ z;
+  iz = __imag__ z;
+  rc = __real__ c;
+  ic = __imag__ c;
+
+  if (__builtin_fabsl (rz) > SMALL)
+    {
+      rerr = __builtin_fabsl (rz - rc) / __builtin_fabsl(rz);
+    }
+  else if (__builtin_fabsl (rz) == 0.0)
+    {
+      rerr = __builtin_fabsl (rc);
+    }
+  else
+    {
+      rerr = __builtin_fabsl (rz - rc) / SMALL;
+    }
+
+  if (__builtin_fabsl (iz) > SMALL)
+    {
+      ierr = __builtin_fabsl (iz - ic) / __builtin_fabsl(iz);
+    }
+  else if (__builtin_fabsl (iz) == 0.0)
+    {
+      ierr = __builtin_fabsl (ic);
+    }
+  else
+    {
+      ierr = __builtin_fabsl (iz - ic) / SMALL;
+    }
+  rmax = __builtin_fmaxl (rerr, ierr);
+  biterr = 0;
+  if ( rmax != 0.0)      
+    {
+      biterr = ilogbl (rmax) + MAXBIT + 1;
+    }
+
+  if (biterr >= ERRLIM)
+    return 0;
+  else
+    return 1;
+}
+
+
+int main (int argc, char** argv)
+{
+  long double _Complex a,b,c,z;
+  long double xr[4], xi[4], yr[4], yi[4], zr[4], zi[4];
+  long double cr, ci;
+  int i;
+  int ok = 1;
+
+#if (LDBL_MAX_EXP < 2048)
+  /*
+    Test values when mantissa is 11 or fewer bits.  Either LDBL is
+    using DBL on this platform or we are using IBM extended double
+    precision. Test values will be automatically truncated when
+    the available precision is smaller than the explicit precision.
+  */
+  xr[0] = -0x1.16e7fad79e45ep+651;
+  xi[0] = -0x1.f7f75b94c6c6ap-860;
+  yr[0] = -0x1.2f40d8ff7e55ep+245;
+  yi[0] = -0x0.0000000004ebcp-968;
+  zr[0] = 0x1.d6e4b0e2828694570ba839070beep+405L;
+  zi[0] = -0x1.e9095e311e70498db810196259b7p-846L;
+
+  xr[1] = -0x1.21ff587f953d3p-310;
+  xi[1] = -0x1.5a526dcc59960p+837;
+  yr[1] = 0x1.b88b8b552eaadp+735;
+  yi[1] = -0x1.873e2d6544d92p-327;
+  zr[1] = 0x1.65734a88b2ddff699c482ee8eef6p-961L;
+  zi[1] = -0x1.927e85b8b576f94a797a1bcb733dp+101L;
+
+  xr[2] = 0x1.4612e41aa8080p-846;
+  xi[2] = -0x0.0000000613e07p-968;
+  yr[2] = 0x1.df9cd0d58caafp-820;
+  yi[2] = -0x1.e47051a9036dbp-584;
+  zr[2] = 0x1.9b194f3aaadea545174c5372d8p-415L;
+  zi[2] = 0x1.58a00ab740a6ad3249002f2b79p-263L;
+
+  xr[3] = 0x1.cb27eece7c585p-355;
+  xi[3] = 0x0.000000223b8a8p-968;
+  yr[3] = -0x1.74e7ed2b9189fp-22;
+  yi[3] = 0x1.3d80439e9a119p-731;
+  zr[3] = -0x1.3b35ed806ae5a2a8cc1c9a96931dp-333L;
+  zi[3] = -0x1.7802c17c774895bd541adeb200p-974L;
+#else
+  /*
+    Test values intended for either IEEE128 or Intel80 formats.  In
+    either case, 15 bits of exponent are available.  Test values will
+    be automatically truncated when the available precision is smaller
+    than the explicit precision.
+  */
+  xr[0] = -0x9.c793985b7d029d90p-8480L;
+  xi[0] = 0x8.018745ffa61a8fe0p+16329L;
+  yr[0] = -0xe.d5bee9c523a35ad0p-15599L;
+  yi[0] = -0xa.8c93c5a4f94128f0p+869L;
+  zr[0] = -0x1.849178451c035b95d16311d0efdap+15459L;
+  zi[0] = -0x1.11375ed2c1f58b9d047ab64aed97p-1008L;
+
+  xr[1] = 0xb.68e44bc6d0b91a30p+16026L;
+  xi[1] = 0xb.ab10f5453e972f30p-14239L;
+  yr[1] = 0x8.8cbd470705428ff0p-16350L;
+  yi[1] = -0xa.0c1cbeae4e4b69f0p+347L;
+  zr[1] = 0x1.eec40848785e500d9f0945ab58d3p-1019L;
+  zi[1] = 0x1.22b6b579927a3f238b772bb6dc95p+15679L;
+
+  xr[2] = -0x9.e8c093a43b546a90p+15983L;
+  xi[2] = 0xc.95b18274208311e0p-2840L;
+  yr[2] = -0x8.dedb729b5c1b2ec0p+8L;
+  yi[2] = 0xa.a49fb81b24738370p-16385L;
+  zr[2] = 0x1.1df99ee89bb118f3201369e06576p+15975L;
+  zi[2] = 0x1.571e7ef904d6b6eee7acb0dcf098p-418L;
+
+  xr[3] = 0xc.4687f251c0f48bd0p-3940L;
+  xi[3] = -0xe.a3f2138992d85fa0p+15598L;
+  yr[3] = 0xe.4b0c25c3d5ebb830p-16344L;
+  yi[3] = -0xa.6cbf1ba80f7b97a0p+78L;
+  zr[3] = 0x1.6785ba23bfb744cee97b4142348bp+15520L;
+  zi[3] = -0x1.ecee7b8c7bdd36237eb538324289p-902L;
+#endif
+
+  for (i = 0; i < 4; i++)
+    {
+      __real__ a = xr[i];
+      __imag__ a = xi[i];
+      __real__ b = yr[i];
+      __imag__ b = yi[i];
+      __real__ z = zr[i];
+      __imag__ z = zi[i];
+      c = a / b;
+      cr = __real__ c;
+      ci = __imag__ c;
+
+      if (!match (c,z)){
+	ok = 0;
+      }
+    }
+  if (!ok)
+    abort ();
+  exit (0);
+}
diff --git a/libgcc/config/rs6000/_divkc3.c b/libgcc/config/rs6000/_divkc3.c
index d261f40..a1d29d2 100644
--- a/libgcc/config/rs6000/_divkc3.c
+++ b/libgcc/config/rs6000/_divkc3.c
@@ -37,29 +37,122 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define __divkc3 __divkc3_sw
 #endif
 
+#ifndef __LONG_DOUBLE_IEEE128__
+#define RBIG   (__LIBGCC_KF_MAX__ / 2)
+#define RMIN   (__LIBGCC_KF_MIN__)
+#define RMIN2  (__LIBGCC_KF_EPSILON__)
+#define RMINSCAL (1 / __LIBGCC_KF_EPSILON__)
+#define RMAX2  (RBIG * RMIN2)
+#else
+#define RBIG   (__LIBGCC_TF_MAX__ / 2)
+#define RMIN   (__LIBGCC_TF_MIN__)
+#define RMIN2  (__LIBGCC_TF_EPSILON__)
+#define RMINSCAL (1 / __LIBGCC_TF_EPSILON__)
+#define RMAX2  (RBIG * RMIN2)
+#endif
+
 TCtype
 __divkc3 (TFtype a, TFtype b, TFtype c, TFtype d)
 {
   TFtype denom, ratio, x, y;
   TCtype res;
 
-  /* ??? We can get better behavior from logarithmic scaling instead of
-     the division.  But that would mean starting to link libgcc against
-     libm.  We could implement something akin to ldexp/frexp as gcc builtins
-     fairly easily...  */
+  /* long double has significant potential underflow/overflow errors that
+     can be greatly reduced with a limited number of tests and adjustments.
+  */
+
+  /* Scale by max(c,d) to reduce chances of denominator overflowing.  */
   if (FABS (c) < FABS (d))
     {
+      /* Prevent underflow when denominator is near max representable.  */
+      if (FABS (d) >= RBIG)
+	{
+	  a = a / 2;
+	  b = b / 2;
+	  c = c / 2;
+	  d = d / 2;
+	}
+      /* Avoid overflow/underflow issues when c and d are small.
+	 Scaling up helps avoid some underflows.
+	 No new overflow possible since c&d < RMIN2.  */
+      if (FABS (d) < RMIN2)
+	{
+	  a = a * RMINSCAL;
+	  b = b * RMINSCAL;
+	  c = c * RMINSCAL;
+	  d = d * RMINSCAL;
+	}
+      else
+	{
+	  if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2))
+	      || ((FABS (b) < RMIN) && (FABS (a) < RMAX2)
+		  && (FABS (d) < RMAX2)))
+	    {
+	      a = a * RMINSCAL;
+	      b = b * RMINSCAL;
+	      c = c * RMINSCAL;
+	      d = d * RMINSCAL;
+	    }
+	}
       ratio = c / d;
       denom = (c * ratio) + d;
-      x = ((a * ratio) + b) / denom;
-      y = ((b * ratio) - a) / denom;
+      /* Choose alternate order of computation if ratio is subnormal.  */
+      if (FABS (ratio) > RMIN)
+	{
+	  x = ((a * ratio) + b) / denom;
+	  y = ((b * ratio) - a) / denom;
+	}
+      else
+	{
+	  x = ((c * (a / d)) + b) / denom;
+	  y = ((c * (b / d)) - a) / denom;
+	}
     }
   else
     {
+      /* Prevent underflow when denominator is near max representable.  */
+      if (FABS (c) >= RBIG)
+	{
+	  a = a / 2;
+	  b = b / 2;
+	  c = c / 2;
+	  d = d / 2;
+	}
+      /* Avoid overflow/underflow issues when both c and d are small.
+	 Scaling up helps avoid some underflows.
+	 No new overflow possible since both c&d are less than RMIN2.  */
+      if (FABS (c) < RMIN2)
+	{
+	  a = a * RMINSCAL;
+	  b = b * RMINSCAL;
+	  c = c * RMINSCAL;
+	  d = d * RMINSCAL;
+	}
+      else
+	{
+	  if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (c) < RMAX2))
+	      || ((FABS (b) < RMIN) && (FABS (a) < RMAX2)
+		  && (FABS (c) < RMAX2)))
+	    {
+	      a = a * RMINSCAL;
+	      b = b * RMINSCAL;
+	      c = c * RMINSCAL;
+	      d = d * RMINSCAL;
+	    }
+	}
       ratio = d / c;
       denom = (d * ratio) + c;
-      x = ((b * ratio) + a) / denom;
-      y = (b - (a * ratio)) / denom;
+      /* Choose alternate order of computation if ratio is subnormal.  */
+      if (FABS (ratio) > RMIN)
+	{
+	  x = ((b * ratio) + a) / denom;
+	  y = (b - (a * ratio)) / denom;
+	}
+      else
+	{
+	  x = (a + (d * (b / c))) / denom;
+	  y = (b - (d * (a / c))) / denom;
+	}
     }
 
   /* Recover infinities and zeros that computed as NaN+iNaN; the only cases
diff --git a/libgcc/libgcc2.c b/libgcc/libgcc2.c
index 17de0a7..38f935e 100644
--- a/libgcc/libgcc2.c
+++ b/libgcc/libgcc2.c
@@ -1860,33 +1860,55 @@ NAME (TYPE x, int m)
 #if defined(L_mulhc3) || defined(L_divhc3)
 # define MTYPE	HFtype
 # define CTYPE	HCtype
+# define AMTYPE SFtype
 # define MODE	hc
 # define CEXT	__LIBGCC_HF_FUNC_EXT__
 # define NOTRUNC (!__LIBGCC_HF_EXCESS_PRECISION__)
 #elif defined(L_mulsc3) || defined(L_divsc3)
 # define MTYPE	SFtype
 # define CTYPE	SCtype
+# define AMTYPE DFtype
 # define MODE	sc
 # define CEXT	__LIBGCC_SF_FUNC_EXT__
 # define NOTRUNC (!__LIBGCC_SF_EXCESS_PRECISION__)
+# define RBIG	(__LIBGCC_SF_MAX__ / 2)
+# define RMIN	(__LIBGCC_SF_MIN__)
+# define RMIN2	(__LIBGCC_SF_EPSILON__)
+# define RMINSCAL (1 / __LIBGCC_SF_EPSILON__)
+# define RMAX2	(RBIG * RMIN2)
 #elif defined(L_muldc3) || defined(L_divdc3)
 # define MTYPE	DFtype
 # define CTYPE	DCtype
 # define MODE	dc
 # define CEXT	__LIBGCC_DF_FUNC_EXT__
 # define NOTRUNC (!__LIBGCC_DF_EXCESS_PRECISION__)
+# define RBIG	(__LIBGCC_DF_MAX__ / 2)
+# define RMIN	(__LIBGCC_DF_MIN__)
+# define RMIN2	(__LIBGCC_DF_EPSILON__)
+# define RMINSCAL (1 / __LIBGCC_DF_EPSILON__)
+# define RMAX2  (RBIG * RMIN2)
 #elif defined(L_mulxc3) || defined(L_divxc3)
 # define MTYPE	XFtype
 # define CTYPE	XCtype
 # define MODE	xc
 # define CEXT	__LIBGCC_XF_FUNC_EXT__
 # define NOTRUNC (!__LIBGCC_XF_EXCESS_PRECISION__)
+# define RBIG	(__LIBGCC_XF_MAX__ / 2)
+# define RMIN	(__LIBGCC_XF_MIN__)
+# define RMIN2	(__LIBGCC_XF_EPSILON__)
+# define RMINSCAL (1 / __LIBGCC_XF_EPSILON__)
+# define RMAX2	(RBIG * RMIN2)
 #elif defined(L_multc3) || defined(L_divtc3)
 # define MTYPE	TFtype
 # define CTYPE	TCtype
 # define MODE	tc
 # define CEXT	__LIBGCC_TF_FUNC_EXT__
 # define NOTRUNC (!__LIBGCC_TF_EXCESS_PRECISION__)
+# define RBIG	(__LIBGCC_TF_MAX__ / 2)
+# define RMIN	(__LIBGCC_TF_MIN__)
+# define RMIN2	(__LIBGCC_TF_EPSILON__)
+# define RMINSCAL (1 / __LIBGCC_TF_EPSILON__)
+# define RMAX2	(RBIG * RMIN2)
 #else
 # error
 #endif
@@ -1994,30 +2016,136 @@ CONCAT3(__mul,MODE,3) (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
 CTYPE
 CONCAT3(__div,MODE,3) (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
 {
+#if defined(L_divhc3)						\
+  || (defined(L_divsc3) && defined(__LIBGCC_HAVE_HWDBL__) )
+
+  /* Half precision is handled with float precision.
+     float is handled with double precision when double precision
+     hardware is available.
+     Due to the additional precision, the simple complex divide
+     method (without Smith's method) is sufficient to get accurate
+     answers and runs slightly faster than Smith's method.  */
+
+  AMTYPE aa, bb, cc, dd;
+  AMTYPE denom;
+  MTYPE x, y;
+  CTYPE res;
+  aa = a;
+  bb = b;
+  cc = c;
+  dd = d;
+
+  denom = (cc * cc) + (dd * dd);
+  x = ((aa * cc) + (bb * dd)) / denom;
+  y = ((bb * cc) - (aa * dd)) / denom;
+
+#else
   MTYPE denom, ratio, x, y;
   CTYPE res;
 
-  /* ??? We can get better behavior from logarithmic scaling instead of
-     the division.  But that would mean starting to link libgcc against
-     libm.  We could implement something akin to ldexp/frexp as gcc builtins
-     fairly easily...  */
+  /* double, extended, long double have significant potential
+     underflow/overflow errors that can be greatly reduced with
+     a limited number of tests and adjustments.  float is handled
+     the same way when no HW double is available.
+  */
+
+  /* Scale by max(c,d) to reduce chances of denominator overflowing.  */
   if (FABS (c) < FABS (d))
     {
+      /* Prevent underflow when denominator is near max representable.  */
+      if (FABS (d) >= RBIG)
+	{
+	  a = a / 2;
+	  b = b / 2;
+	  c = c / 2;
+	  d = d / 2;
+	}
+      /* Avoid overflow/underflow issues when c and d are small.
+	 Scaling up helps avoid some underflows.
+	 No new overflow possible since c&d < RMIN2.  */
+      if (FABS (d) < RMIN2)
+	{
+	  a = a * RMINSCAL;
+	  b = b * RMINSCAL;
+	  c = c * RMINSCAL;
+	  d = d * RMINSCAL;
+	}
+      else
+	{
+	  if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2))
+	      || ((FABS (b) < RMIN) && (FABS (a) < RMAX2)
+		  && (FABS (d) < RMAX2)))
+	    {
+	      a = a * RMINSCAL;
+	      b = b * RMINSCAL;
+	      c = c * RMINSCAL;
+	      d = d * RMINSCAL;
+	    }
+	}
       ratio = c / d;
       denom = (c * ratio) + d;
-      x = ((a * ratio) + b) / denom;
-      y = ((b * ratio) - a) / denom;
+      /* Choose alternate order of computation if ratio is subnormal.  */
+      if (FABS (ratio) > RMIN)
+	{
+	  x = ((a * ratio) + b) / denom;
+	  y = ((b * ratio) - a) / denom;
+	}
+      else
+	{
+	  x = ((c * (a / d)) + b) / denom;
+	  y = ((c * (b / d)) - a) / denom;
+	}
     }
   else
     {
+      /* Prevent underflow when denominator is near max representable.  */
+      if (FABS (c) >= RBIG)
+	{
+	  a = a / 2;
+	  b = b / 2;
+	  c = c / 2;
+	  d = d / 2;
+	}
+      /* Avoid overflow/underflow issues when both c and d are small.
+	 Scaling up helps avoid some underflows.
+	 No new overflow possible since both c&d are less than RMIN2.  */
+      if (FABS (c) < RMIN2)
+	{
+	  a = a * RMINSCAL;
+	  b = b * RMINSCAL;
+	  c = c * RMINSCAL;
+	  d = d * RMINSCAL;
+	}
+      else
+	{
+	  if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (c) < RMAX2))
+	      || ((FABS (b) < RMIN) && (FABS (a) < RMAX2)
+		  && (FABS (c) < RMAX2)))
+	    {
+	      a = a * RMINSCAL;
+	      b = b * RMINSCAL;
+	      c = c * RMINSCAL;
+	      d = d * RMINSCAL;
+	    }
+	}
       ratio = d / c;
       denom = (d * ratio) + c;
-      x = ((b * ratio) + a) / denom;
-      y = (b - (a * ratio)) / denom;
+      /* Choose alternate order of computation if ratio is subnormal.  */
+      if (FABS (ratio) > RMIN)
+	{
+	  x = ((b * ratio) + a) / denom;
+	  y = (b - (a * ratio)) / denom;
+	}
+      else
+	{
+	  x = (a + (d * (b / c))) / denom;
+	  y = (b - (d * (a / c))) / denom;
+	}
     }
+#endif
 
-  /* Recover infinities and zeros that computed as NaN+iNaN; the only cases
-     are nonzero/zero, infinite/finite, and finite/infinite.  */
+  /* Recover infinities and zeros that computed as NaN+iNaN; the only
+     cases are nonzero/zero, infinite/finite, and finite/infinite.  */
   if (isnan (x) && isnan (y))
     {
       if (c == 0.0 && d == 0.0 && (!isnan (a) || !isnan (b)))