diff options
-rw-r--r-- | gcc/c-family/c-cppbuiltin.c | 58 | ||||
-rw-r--r-- | gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkd.c | 126 | ||||
-rw-r--r-- | gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkf.c | 125 | ||||
-rw-r--r-- | gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkld.c | 168 | ||||
-rw-r--r-- | libgcc/config/rs6000/_divkc3.c | 109 | ||||
-rw-r--r-- | libgcc/libgcc2.c | 148 |
6 files changed, 704 insertions, 30 deletions
diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c index 9f993c4..42b7604 100644 --- a/gcc/c-family/c-cppbuiltin.c +++ b/gcc/c-family/c-cppbuiltin.c @@ -1277,29 +1277,39 @@ c_cpp_builtins (cpp_reader *pfile) { scalar_float_mode mode = mode_iter.require (); const char *name = GET_MODE_NAME (mode); + const size_t name_len = strlen (name); + char float_h_prefix[16] = ""; char *macro_name - = (char *) alloca (strlen (name) - + sizeof ("__LIBGCC__MANT_DIG__")); + = XALLOCAVEC (char, name_len + sizeof ("__LIBGCC__MANT_DIG__")); sprintf (macro_name, "__LIBGCC_%s_MANT_DIG__", name); builtin_define_with_int_value (macro_name, REAL_MODE_FORMAT (mode)->p); if (!targetm.scalar_mode_supported_p (mode) || !targetm.libgcc_floating_mode_supported_p (mode)) continue; - macro_name = (char *) alloca (strlen (name) - + sizeof ("__LIBGCC_HAS__MODE__")); + macro_name = XALLOCAVEC (char, name_len + + sizeof ("__LIBGCC_HAS__MODE__")); sprintf (macro_name, "__LIBGCC_HAS_%s_MODE__", name); cpp_define (pfile, macro_name); - macro_name = (char *) alloca (strlen (name) - + sizeof ("__LIBGCC__FUNC_EXT__")); + macro_name = XALLOCAVEC (char, name_len + + sizeof ("__LIBGCC__FUNC_EXT__")); sprintf (macro_name, "__LIBGCC_%s_FUNC_EXT__", name); char suffix[20] = ""; if (mode == TYPE_MODE (double_type_node)) - ; /* Empty suffix correct. */ + { + /* Empty suffix correct. */ + memcpy (float_h_prefix, "DBL", 4); + } else if (mode == TYPE_MODE (float_type_node)) - suffix[0] = 'f'; + { + suffix[0] = 'f'; + memcpy (float_h_prefix, "FLT", 4); + } else if (mode == TYPE_MODE (long_double_type_node)) - suffix[0] = 'l'; + { + suffix[0] = 'l'; + memcpy (float_h_prefix, "LDBL", 5); + } else { bool found_suffix = false; @@ -1310,6 +1320,8 @@ c_cpp_builtins (cpp_reader *pfile) sprintf (suffix, "f%d%s", floatn_nx_types[i].n, floatn_nx_types[i].extended ? "x" : ""); found_suffix = true; + sprintf (float_h_prefix, "FLT%d%s", floatn_nx_types[i].n, + floatn_nx_types[i].extended ? "X" : ""); break; } gcc_assert (found_suffix); @@ -1347,11 +1359,33 @@ c_cpp_builtins (cpp_reader *pfile) default: gcc_unreachable (); } - macro_name = (char *) alloca (strlen (name) - + sizeof ("__LIBGCC__EXCESS_" - "PRECISION__")); + macro_name = XALLOCAVEC (char, name_len + + sizeof ("__LIBGCC__EXCESS_PRECISION__")); sprintf (macro_name, "__LIBGCC_%s_EXCESS_PRECISION__", name); builtin_define_with_int_value (macro_name, excess_precision); + + char val_name[64]; + + macro_name = XALLOCAVEC (char, name_len + + sizeof ("__LIBGCC__EPSILON__")); + sprintf (macro_name, "__LIBGCC_%s_EPSILON__", name); + sprintf (val_name, "__%s_EPSILON__", float_h_prefix); + builtin_define_with_value (macro_name, val_name, 0); + + macro_name = XALLOCAVEC (char, name_len + sizeof ("__LIBGCC__MAX__")); + sprintf (macro_name, "__LIBGCC_%s_MAX__", name); + sprintf (val_name, "__%s_MAX__", float_h_prefix); + builtin_define_with_value (macro_name, val_name, 0); + + macro_name = XALLOCAVEC (char, name_len + sizeof ("__LIBGCC__MIN__")); + sprintf (macro_name, "__LIBGCC_%s_MIN__", name); + sprintf (val_name, "__%s_MIN__", float_h_prefix); + builtin_define_with_value (macro_name, val_name, 0); + +#ifdef HAVE_adddf3 + builtin_define_with_int_value ("__LIBGCC_HAVE_HWDBL__", + HAVE_adddf3); +#endif } /* For libgcc crtstuff.c and libgcc2.c. */ diff --git a/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkd.c b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkd.c new file mode 100644 index 0000000..3ef5fad --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkd.c @@ -0,0 +1,126 @@ +/* + Program to test complex divide for correct results on selected values. + Checking known failure points. +*/ + +#include <float.h> + +extern void abort (void); +extern void exit (int); + +extern int ilogb (double); +int match (double _Complex, double _Complex); + +#define SMALL DBL_MIN +#define MAXBIT DBL_MANT_DIG +#define ERRLIM 6 + +/* + Compare c (computed value) with z (expected value). + Return 0 if within allowed range. Return 1 if not. +*/ +int match (double _Complex c, double _Complex z) +{ + double rz, iz, rc, ic; + double rerr, ierr, rmax; + int biterr; + rz = __real__ z; + iz = __imag__ z; + rc = __real__ c; + ic = __imag__ c; + + if (__builtin_fabs (rz) > SMALL) + { + rerr = __builtin_fabs (rz - rc) / __builtin_fabs (rz); + } + else if (__builtin_fabs (rz) == 0.0) + { + rerr = __builtin_fabs (rc); + } + else + { + rerr = __builtin_fabs (rz - rc) / SMALL; + } + + if (__builtin_fabs (iz) > SMALL) + { + ierr = __builtin_fabs (iz - ic) / __builtin_fabs (iz); + } + else if (__builtin_fabs (iz) == 0.0) + { + ierr = __builtin_fabs (ic); + } + else + { + ierr = __builtin_fabs (iz - ic) / SMALL; + } + rmax = __builtin_fmax(rerr, ierr); + biterr = 0; + if ( rmax != 0.0) + { + biterr = ilogb (rmax) + MAXBIT + 1; + } + + if (biterr >= ERRLIM) + return 0; + else + return 1; +} + + +int main (int argc, char** argv) +{ + double _Complex a,b,c,z; + double xr[4], xi[4], yr[4], yi[4], zr[4], zi[4]; + double cr, ci; + int i; + int ok = 1; + xr[0] = -0x1.16e7fad79e45ep+651; + xi[0] = -0x1.f7f75b94c6c6ap-860; + yr[0] = -0x1.2f40d8ff7e55ep+245; + yi[0] = -0x0.0000000004ebcp-1022; + zr[0] = 0x1.d6e4b0e282869p+405; + zi[0] = -0x1.e9095e311e706p-900; + + xr[1] = -0x1.21ff587f953d3p-310; + xi[1] = -0x1.5a526dcc59960p+837; + yr[1] = 0x1.b88b8b552eaadp+735; + yi[1] = -0x1.873e2d6544d92p-327; + zr[1] = 0x1.65734a88b2de0p-961; + zi[1] = -0x1.927e85b8b5770p+101; + + xr[2] = 0x1.4612e41aa8080p-846; + xi[2] = -0x0.0000000613e07p-1022; + yr[2] = 0x1.df9cd0d58caafp-820; + yi[2] = -0x1.e47051a9036dbp-584; + zr[2] = 0x1.9b194f3fffa32p-469; + zi[2] = 0x1.58a00ab740a6bp-263; + + xr[3] = 0x1.cb27eece7c585p-355; + xi[3] = 0x0.000000223b8a8p-1022; + yr[3] = -0x1.74e7ed2b9189fp-22; + yi[3] = 0x1.3d80439e9a119p-731; + zr[3] = -0x1.3b35ed806ae5ap-333; + zi[3] = -0x0.05e01bcbfd9f6p-1022; + + + for (i = 0; i < 4; i++) + { + __real__ a = xr[i]; + __imag__ a = xi[i]; + __real__ b = yr[i]; + __imag__ b = yi[i]; + __real__ z = zr[i]; + __imag__ z = zi[i]; + c = a / b; + cr = __real__ c; + ci = __imag__ c; + + if (!match (c,z)){ + ok = 0; + } + } + if (!ok) + abort (); + exit (0); +} diff --git a/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkf.c b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkf.c new file mode 100644 index 0000000..adf1ed9 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkf.c @@ -0,0 +1,125 @@ +/* + Program to test complex divide for correct results on selected values. + Checking known failure points. +*/ + +#include <float.h> + +extern void abort (void); +extern void exit (int); + +extern int ilogbf (float); +int match (float _Complex, float _Complex); + +#define SMALL FLT_MIN +#define MAXBIT FLT_MANT_DIG +#define ERRLIM 6 + +/* + Compare c (computed value) with z (expected value). + Return 0 if within allowed range. Return 1 if not. +*/ +int match (float _Complex c, float _Complex z) +{ + float rz, iz, rc, ic; + float rerr, ierr, rmax; + int biterr; + rz = __real__ z; + iz = __imag__ z; + rc = __real__ c; + ic = __imag__ c; + + if (__builtin_fabsf (rz) > SMALL) + { + rerr = __builtin_fabsf (rz - rc) / __builtin_fabsf (rz); + } + else if (__builtin_fabsf (rz) == 0.0) + { + rerr = __builtin_fabsf (rc); + } + else + { + rerr = __builtin_fabsf (rz - rc) / SMALL; + } + + if (__builtin_fabsf (iz) > SMALL) + { + ierr = __builtin_fabsf (iz - ic) / __builtin_fabsf (iz); + } + else if (__builtin_fabsf (iz) == 0.0) + { + ierr = __builtin_fabsf (ic); + } + else + { + ierr = __builtin_fabsf (iz - ic) / SMALL; + } + rmax = __builtin_fmaxf(rerr, ierr); + biterr = 0; + if ( rmax != 0.0) + { + biterr = ilogbf (rmax) + MAXBIT + 1; + } + + if (biterr >= ERRLIM) + return 0; + else + return 1; +} + + +int main(int argc, char** argv) +{ + float _Complex a,b,c,z; + float xr[4], xi[4], yr[4], yi[4], zr[4], zi[4]; + float cr, ci; + int i; + int ok = 1; + xr[0] = 0x1.0b1600p-133; + xi[0] = 0x1.5e1c28p+54; + yr[0] = -0x1.cdec8cp-119; + yi[0] = 0x1.1e72ccp+32; + zr[0] = 0x1.38e502p+22; + zi[0] = -0x1.f89220p-129; + + xr[1] = -0x1.b1bee2p+121; + xi[1] = -0x1.cb403ep-59; + yr[1] = 0x1.480000p-144; + yi[1] = -0x1.c66fc4p+5; + zr[1] = -0x1.60b8cap-34; + zi[1] = -0x1.e8b02ap+115; + + xr[2] = -0x1.3f6e00p-97; + xi[2] = -0x1.c00000p-146; + yr[2] = 0x1.000000p-148; + yi[2] = -0x1.0c4e70p-91; + zr[2] = 0x1.aa50d0p-55; + zi[2] = -0x1.30c746p-6; + + xr[3] = 0x1.000000p-148; + xi[3] = 0x1.f4bc04p-84; + yr[3] = 0x1.00ad74p-20; + yi[3] = 0x1.2ad02ep-85; + zr[3] = 0x1.1102ccp-127; + zi[3] = 0x1.f369a4p-64; + + for (i = 0; i < 4; i++) + { + __real__ a = xr[i]; + __imag__ a = xi[i]; + __real__ b = yr[i]; + __imag__ b = yi[i]; + __real__ z = zr[i]; + __imag__ z = zi[i]; + c = a / b; + cr = __real__ c; + ci = __imag__ c; + + if (!match (c,z)){ + ok = 0; + } + } + if (!ok) + abort (); + exit (0); +} diff --git a/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkld.c b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkld.c new file mode 100644 index 0000000..ffe9c34 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkld.c @@ -0,0 +1,168 @@ +/* + Program to test complex divide for correct results on selected values. + Checking known failure points. +*/ + +#include <float.h> + +extern void abort (void); +extern void exit (int); + +extern int ilogbl (long double); +int match (long double _Complex,long double _Complex); + +#define SMALL LDBL_MIN +#define MAXBIT LDBL_MANT_DIG +#define ERRLIM 6 + +/* + Compare c (computed value) with z (expected value). + Return 0 if within allowed range. Return 1 if not. +*/ +int match (long double _Complex c,long double _Complex z) +{ + long double rz, iz, rc, ic; + long double rerr, ierr, rmax; + int biterr; + rz = __real__ z; + iz = __imag__ z; + rc = __real__ c; + ic = __imag__ c; + + if (__builtin_fabsl (rz) > SMALL) + { + rerr = __builtin_fabsl (rz - rc) / __builtin_fabsl(rz); + } + else if (__builtin_fabsl (rz) == 0.0) + { + rerr = __builtin_fabsl (rc); + } + else + { + rerr = __builtin_fabsl (rz - rc) / SMALL; + } + + if (__builtin_fabsl (iz) > SMALL) + { + ierr = __builtin_fabsl (iz - ic) / __builtin_fabsl(iz); + } + else if (__builtin_fabsl (iz) == 0.0) + { + ierr = __builtin_fabsl (ic); + } + else + { + ierr = __builtin_fabsl (iz - ic) / SMALL; + } + rmax = __builtin_fmaxl (rerr, ierr); + biterr = 0; + if ( rmax != 0.0) + { + biterr = ilogbl (rmax) + MAXBIT + 1; + } + + if (biterr >= ERRLIM) + return 0; + else + return 1; +} + + +int main (int argc, char** argv) +{ + long double _Complex a,b,c,z; + long double xr[4], xi[4], yr[4], yi[4], zr[4], zi[4]; + long double cr, ci; + int i; + int ok = 1; + +#if (LDBL_MAX_EXP < 2048) + /* + Test values when mantissa is 11 or fewer bits. Either LDBL is + using DBL on this platform or we are using IBM extended double + precision. Test values will be automatically truncated when + the available precision is smaller than the explicit precision. + */ + xr[0] = -0x1.16e7fad79e45ep+651; + xi[0] = -0x1.f7f75b94c6c6ap-860; + yr[0] = -0x1.2f40d8ff7e55ep+245; + yi[0] = -0x0.0000000004ebcp-968; + zr[0] = 0x1.d6e4b0e2828694570ba839070beep+405L; + zi[0] = -0x1.e9095e311e70498db810196259b7p-846L; + + xr[1] = -0x1.21ff587f953d3p-310; + xi[1] = -0x1.5a526dcc59960p+837; + yr[1] = 0x1.b88b8b552eaadp+735; + yi[1] = -0x1.873e2d6544d92p-327; + zr[1] = 0x1.65734a88b2ddff699c482ee8eef6p-961L; + zi[1] = -0x1.927e85b8b576f94a797a1bcb733dp+101L; + + xr[2] = 0x1.4612e41aa8080p-846; + xi[2] = -0x0.0000000613e07p-968; + yr[2] = 0x1.df9cd0d58caafp-820; + yi[2] = -0x1.e47051a9036dbp-584; + zr[2] = 0x1.9b194f3aaadea545174c5372d8p-415L; + zi[2] = 0x1.58a00ab740a6ad3249002f2b79p-263L; + + xr[3] = 0x1.cb27eece7c585p-355; + xi[3] = 0x0.000000223b8a8p-968; + yr[3] = -0x1.74e7ed2b9189fp-22; + yi[3] = 0x1.3d80439e9a119p-731; + zr[3] = -0x1.3b35ed806ae5a2a8cc1c9a96931dp-333L; + zi[3] = -0x1.7802c17c774895bd541adeb200p-974L; +#else + /* + Test values intended for either IEEE128 or Intel80 formats. In + either case, 15 bits of exponent are available. Test values will + be automatically truncated when the available precision is smaller + than the explicit precision. + */ + xr[0] = -0x9.c793985b7d029d90p-8480L; + xi[0] = 0x8.018745ffa61a8fe0p+16329L; + yr[0] = -0xe.d5bee9c523a35ad0p-15599L; + yi[0] = -0xa.8c93c5a4f94128f0p+869L; + zr[0] = -0x1.849178451c035b95d16311d0efdap+15459L; + zi[0] = -0x1.11375ed2c1f58b9d047ab64aed97p-1008L; + + xr[1] = 0xb.68e44bc6d0b91a30p+16026L; + xi[1] = 0xb.ab10f5453e972f30p-14239L; + yr[1] = 0x8.8cbd470705428ff0p-16350L; + yi[1] = -0xa.0c1cbeae4e4b69f0p+347L; + zr[1] = 0x1.eec40848785e500d9f0945ab58d3p-1019L; + zi[1] = 0x1.22b6b579927a3f238b772bb6dc95p+15679L; + + xr[2] = -0x9.e8c093a43b546a90p+15983L; + xi[2] = 0xc.95b18274208311e0p-2840L; + yr[2] = -0x8.dedb729b5c1b2ec0p+8L; + yi[2] = 0xa.a49fb81b24738370p-16385L; + zr[2] = 0x1.1df99ee89bb118f3201369e06576p+15975L; + zi[2] = 0x1.571e7ef904d6b6eee7acb0dcf098p-418L; + + xr[3] = 0xc.4687f251c0f48bd0p-3940L; + xi[3] = -0xe.a3f2138992d85fa0p+15598L; + yr[3] = 0xe.4b0c25c3d5ebb830p-16344L; + yi[3] = -0xa.6cbf1ba80f7b97a0p+78L; + zr[3] = 0x1.6785ba23bfb744cee97b4142348bp+15520L; + zi[3] = -0x1.ecee7b8c7bdd36237eb538324289p-902L; +#endif + + for (i = 0; i < 4; i++) + { + __real__ a = xr[i]; + __imag__ a = xi[i]; + __real__ b = yr[i]; + __imag__ b = yi[i]; + __real__ z = zr[i]; + __imag__ z = zi[i]; + c = a / b; + cr = __real__ c; + ci = __imag__ c; + + if (!match (c,z)){ + ok = 0; + } + } + if (!ok) + abort (); + exit (0); +} diff --git a/libgcc/config/rs6000/_divkc3.c b/libgcc/config/rs6000/_divkc3.c index d261f40..a1d29d2 100644 --- a/libgcc/config/rs6000/_divkc3.c +++ b/libgcc/config/rs6000/_divkc3.c @@ -37,29 +37,122 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define __divkc3 __divkc3_sw #endif +#ifndef __LONG_DOUBLE_IEEE128__ +#define RBIG (__LIBGCC_KF_MAX__ / 2) +#define RMIN (__LIBGCC_KF_MIN__) +#define RMIN2 (__LIBGCC_KF_EPSILON__) +#define RMINSCAL (1 / __LIBGCC_KF_EPSILON__) +#define RMAX2 (RBIG * RMIN2) +#else +#define RBIG (__LIBGCC_TF_MAX__ / 2) +#define RMIN (__LIBGCC_TF_MIN__) +#define RMIN2 (__LIBGCC_TF_EPSILON__) +#define RMINSCAL (1 / __LIBGCC_TF_EPSILON__) +#define RMAX2 (RBIG * RMIN2) +#endif + TCtype __divkc3 (TFtype a, TFtype b, TFtype c, TFtype d) { TFtype denom, ratio, x, y; TCtype res; - /* ??? We can get better behavior from logarithmic scaling instead of - the division. But that would mean starting to link libgcc against - libm. We could implement something akin to ldexp/frexp as gcc builtins - fairly easily... */ + /* long double has significant potential underflow/overflow errors that + can be greatly reduced with a limited number of tests and adjustments. + */ + + /* Scale by max(c,d) to reduce chances of denominator overflowing. */ if (FABS (c) < FABS (d)) { + /* Prevent underflow when denominator is near max representable. */ + if (FABS (d) >= RBIG) + { + a = a / 2; + b = b / 2; + c = c / 2; + d = d / 2; + } + /* Avoid overflow/underflow issues when c and d are small. + Scaling up helps avoid some underflows. + No new overflow possible since c&d < RMIN2. */ + if (FABS (d) < RMIN2) + { + a = a * RMINSCAL; + b = b * RMINSCAL; + c = c * RMINSCAL; + d = d * RMINSCAL; + } + else + { + if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2)) + || ((FABS (b) < RMIN) && (FABS (a) < RMAX2) + && (FABS (d) < RMAX2))) + { + a = a * RMINSCAL; + b = b * RMINSCAL; + c = c * RMINSCAL; + d = d * RMINSCAL; + } + } ratio = c / d; denom = (c * ratio) + d; - x = ((a * ratio) + b) / denom; - y = ((b * ratio) - a) / denom; + /* Choose alternate order of computation if ratio is subnormal. */ + if (FABS (ratio) > RMIN) + { + x = ((a * ratio) + b) / denom; + y = ((b * ratio) - a) / denom; + } + else + { + x = ((c * (a / d)) + b) / denom; + y = ((c * (b / d)) - a) / denom; + } } else { + /* Prevent underflow when denominator is near max representable. */ + if (FABS (c) >= RBIG) + { + a = a / 2; + b = b / 2; + c = c / 2; + d = d / 2; + } + /* Avoid overflow/underflow issues when both c and d are small. + Scaling up helps avoid some underflows. + No new overflow possible since both c&d are less than RMIN2. */ + if (FABS (c) < RMIN2) + { + a = a * RMINSCAL; + b = b * RMINSCAL; + c = c * RMINSCAL; + d = d * RMINSCAL; + } + else + { + if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (c) < RMAX2)) + || ((FABS (b) < RMIN) && (FABS (a) < RMAX2) + && (FABS (c) < RMAX2))) + { + a = a * RMINSCAL; + b = b * RMINSCAL; + c = c * RMINSCAL; + d = d * RMINSCAL; + } + } ratio = d / c; denom = (d * ratio) + c; - x = ((b * ratio) + a) / denom; - y = (b - (a * ratio)) / denom; + /* Choose alternate order of computation if ratio is subnormal. */ + if (FABS (ratio) > RMIN) + { + x = ((b * ratio) + a) / denom; + y = (b - (a * ratio)) / denom; + } + else + { + x = (a + (d * (b / c))) / denom; + y = (b - (d * (a / c))) / denom; + } } /* Recover infinities and zeros that computed as NaN+iNaN; the only cases diff --git a/libgcc/libgcc2.c b/libgcc/libgcc2.c index 17de0a7..38f935e 100644 --- a/libgcc/libgcc2.c +++ b/libgcc/libgcc2.c @@ -1860,33 +1860,55 @@ NAME (TYPE x, int m) #if defined(L_mulhc3) || defined(L_divhc3) # define MTYPE HFtype # define CTYPE HCtype +# define AMTYPE SFtype # define MODE hc # define CEXT __LIBGCC_HF_FUNC_EXT__ # define NOTRUNC (!__LIBGCC_HF_EXCESS_PRECISION__) #elif defined(L_mulsc3) || defined(L_divsc3) # define MTYPE SFtype # define CTYPE SCtype +# define AMTYPE DFtype # define MODE sc # define CEXT __LIBGCC_SF_FUNC_EXT__ # define NOTRUNC (!__LIBGCC_SF_EXCESS_PRECISION__) +# define RBIG (__LIBGCC_SF_MAX__ / 2) +# define RMIN (__LIBGCC_SF_MIN__) +# define RMIN2 (__LIBGCC_SF_EPSILON__) +# define RMINSCAL (1 / __LIBGCC_SF_EPSILON__) +# define RMAX2 (RBIG * RMIN2) #elif defined(L_muldc3) || defined(L_divdc3) # define MTYPE DFtype # define CTYPE DCtype # define MODE dc # define CEXT __LIBGCC_DF_FUNC_EXT__ # define NOTRUNC (!__LIBGCC_DF_EXCESS_PRECISION__) +# define RBIG (__LIBGCC_DF_MAX__ / 2) +# define RMIN (__LIBGCC_DF_MIN__) +# define RMIN2 (__LIBGCC_DF_EPSILON__) +# define RMINSCAL (1 / __LIBGCC_DF_EPSILON__) +# define RMAX2 (RBIG * RMIN2) #elif defined(L_mulxc3) || defined(L_divxc3) # define MTYPE XFtype # define CTYPE XCtype # define MODE xc # define CEXT __LIBGCC_XF_FUNC_EXT__ # define NOTRUNC (!__LIBGCC_XF_EXCESS_PRECISION__) +# define RBIG (__LIBGCC_XF_MAX__ / 2) +# define RMIN (__LIBGCC_XF_MIN__) +# define RMIN2 (__LIBGCC_XF_EPSILON__) +# define RMINSCAL (1 / __LIBGCC_XF_EPSILON__) +# define RMAX2 (RBIG * RMIN2) #elif defined(L_multc3) || defined(L_divtc3) # define MTYPE TFtype # define CTYPE TCtype # define MODE tc # define CEXT __LIBGCC_TF_FUNC_EXT__ # define NOTRUNC (!__LIBGCC_TF_EXCESS_PRECISION__) +# define RBIG (__LIBGCC_TF_MAX__ / 2) +# define RMIN (__LIBGCC_TF_MIN__) +# define RMIN2 (__LIBGCC_TF_EPSILON__) +# define RMINSCAL (1 / __LIBGCC_TF_EPSILON__) +# define RMAX2 (RBIG * RMIN2) #else # error #endif @@ -1994,30 +2016,136 @@ CONCAT3(__mul,MODE,3) (MTYPE a, MTYPE b, MTYPE c, MTYPE d) CTYPE CONCAT3(__div,MODE,3) (MTYPE a, MTYPE b, MTYPE c, MTYPE d) { +#if defined(L_divhc3) \ + || (defined(L_divsc3) && defined(__LIBGCC_HAVE_HWDBL__) ) + + /* Half precision is handled with float precision. + float is handled with double precision when double precision + hardware is available. + Due to the additional precision, the simple complex divide + method (without Smith's method) is sufficient to get accurate + answers and runs slightly faster than Smith's method. */ + + AMTYPE aa, bb, cc, dd; + AMTYPE denom; + MTYPE x, y; + CTYPE res; + aa = a; + bb = b; + cc = c; + dd = d; + + denom = (cc * cc) + (dd * dd); + x = ((aa * cc) + (bb * dd)) / denom; + y = ((bb * cc) - (aa * dd)) / denom; + +#else MTYPE denom, ratio, x, y; CTYPE res; - /* ??? We can get better behavior from logarithmic scaling instead of - the division. But that would mean starting to link libgcc against - libm. We could implement something akin to ldexp/frexp as gcc builtins - fairly easily... */ + /* double, extended, long double have significant potential + underflow/overflow errors that can be greatly reduced with + a limited number of tests and adjustments. float is handled + the same way when no HW double is available. + */ + + /* Scale by max(c,d) to reduce chances of denominator overflowing. */ if (FABS (c) < FABS (d)) { + /* Prevent underflow when denominator is near max representable. */ + if (FABS (d) >= RBIG) + { + a = a / 2; + b = b / 2; + c = c / 2; + d = d / 2; + } + /* Avoid overflow/underflow issues when c and d are small. + Scaling up helps avoid some underflows. + No new overflow possible since c&d < RMIN2. */ + if (FABS (d) < RMIN2) + { + a = a * RMINSCAL; + b = b * RMINSCAL; + c = c * RMINSCAL; + d = d * RMINSCAL; + } + else + { + if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2)) + || ((FABS (b) < RMIN) && (FABS (a) < RMAX2) + && (FABS (d) < RMAX2))) + { + a = a * RMINSCAL; + b = b * RMINSCAL; + c = c * RMINSCAL; + d = d * RMINSCAL; + } + } ratio = c / d; denom = (c * ratio) + d; - x = ((a * ratio) + b) / denom; - y = ((b * ratio) - a) / denom; + /* Choose alternate order of computation if ratio is subnormal. */ + if (FABS (ratio) > RMIN) + { + x = ((a * ratio) + b) / denom; + y = ((b * ratio) - a) / denom; + } + else + { + x = ((c * (a / d)) + b) / denom; + y = ((c * (b / d)) - a) / denom; + } } else { + /* Prevent underflow when denominator is near max representable. */ + if (FABS (c) >= RBIG) + { + a = a / 2; + b = b / 2; + c = c / 2; + d = d / 2; + } + /* Avoid overflow/underflow issues when both c and d are small. + Scaling up helps avoid some underflows. + No new overflow possible since both c&d are less than RMIN2. */ + if (FABS (c) < RMIN2) + { + a = a * RMINSCAL; + b = b * RMINSCAL; + c = c * RMINSCAL; + d = d * RMINSCAL; + } + else + { + if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (c) < RMAX2)) + || ((FABS (b) < RMIN) && (FABS (a) < RMAX2) + && (FABS (c) < RMAX2))) + { + a = a * RMINSCAL; + b = b * RMINSCAL; + c = c * RMINSCAL; + d = d * RMINSCAL; + } + } ratio = d / c; denom = (d * ratio) + c; - x = ((b * ratio) + a) / denom; - y = (b - (a * ratio)) / denom; + /* Choose alternate order of computation if ratio is subnormal. */ + if (FABS (ratio) > RMIN) + { + x = ((b * ratio) + a) / denom; + y = (b - (a * ratio)) / denom; + } + else + { + x = (a + (d * (b / c))) / denom; + y = (b - (d * (a / c))) / denom; + } } +#endif - /* Recover infinities and zeros that computed as NaN+iNaN; the only cases - are nonzero/zero, infinite/finite, and finite/infinite. */ + /* Recover infinities and zeros that computed as NaN+iNaN; the only + cases are nonzero/zero, infinite/finite, and finite/infinite. */ if (isnan (x) && isnan (y)) { if (c == 0.0 && d == 0.0 && (!isnan (a) || !isnan (b))) |