aboutsummaryrefslogtreecommitdiff
path: root/sysdeps
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/ia64/fpu/Makefile5
-rw-r--r--sysdeps/ia64/fpu/e_acos.S1
-rw-r--r--sysdeps/ia64/fpu/e_acosf.S1
-rw-r--r--sysdeps/ia64/fpu/e_acosh.S1
-rw-r--r--sysdeps/ia64/fpu/e_acoshf.S1
-rw-r--r--sysdeps/ia64/fpu/e_acoshl.S1
-rw-r--r--sysdeps/ia64/fpu/e_acosl.S3
-rw-r--r--sysdeps/ia64/fpu/e_asin.S1
-rw-r--r--sysdeps/ia64/fpu/e_asinf.S1
-rw-r--r--sysdeps/ia64/fpu/e_asinl.S3
-rw-r--r--sysdeps/ia64/fpu/e_atan2.S6
-rw-r--r--sysdeps/ia64/fpu/e_atan2f.S1
-rw-r--r--sysdeps/ia64/fpu/e_atanh.S1
-rw-r--r--sysdeps/ia64/fpu/e_atanhf.S1
-rw-r--r--sysdeps/ia64/fpu/e_atanhl.S1
-rw-r--r--sysdeps/ia64/fpu/e_cosh.S1
-rw-r--r--sysdeps/ia64/fpu/e_coshf.S1
-rw-r--r--sysdeps/ia64/fpu/e_coshl.S1
-rw-r--r--sysdeps/ia64/fpu/e_exp.S13
-rw-r--r--sysdeps/ia64/fpu/e_exp10.S75
-rw-r--r--sysdeps/ia64/fpu/e_exp10f.S70
-rw-r--r--sysdeps/ia64/fpu/e_exp10l.S72
-rw-r--r--sysdeps/ia64/fpu/e_exp2.S3
-rw-r--r--sysdeps/ia64/fpu/e_exp2f.S3
-rw-r--r--sysdeps/ia64/fpu/e_exp2l.S1
-rw-r--r--sysdeps/ia64/fpu/e_expf.S13
-rw-r--r--sysdeps/ia64/fpu/e_fmod.S1
-rw-r--r--sysdeps/ia64/fpu/e_fmodf.S1
-rw-r--r--sysdeps/ia64/fpu/e_fmodl.S865
-rw-r--r--sysdeps/ia64/fpu/e_hypot.S2
-rw-r--r--sysdeps/ia64/fpu/e_hypotf.S2
-rw-r--r--sysdeps/ia64/fpu/e_hypotl.S2
-rw-r--r--sysdeps/ia64/fpu/e_lgamma_r.c2
-rw-r--r--sysdeps/ia64/fpu/e_lgammaf_r.c2
-rw-r--r--sysdeps/ia64/fpu/e_lgammal_r.c2
-rw-r--r--sysdeps/ia64/fpu/e_log.S2
-rw-r--r--sysdeps/ia64/fpu/e_log2.S1
-rw-r--r--sysdeps/ia64/fpu/e_log2f.S1
-rw-r--r--sysdeps/ia64/fpu/e_log2l.S1
-rw-r--r--sysdeps/ia64/fpu/e_logf.S2
-rw-r--r--sysdeps/ia64/fpu/e_logl.S2
-rw-r--r--sysdeps/ia64/fpu/e_pow.S1
-rw-r--r--sysdeps/ia64/fpu/e_powf.S984
-rw-r--r--sysdeps/ia64/fpu/e_powl.S4
-rw-r--r--sysdeps/ia64/fpu/e_remainder.S1
-rw-r--r--sysdeps/ia64/fpu/e_remainderf.S1
-rw-r--r--sysdeps/ia64/fpu/e_remainderl.S1
-rw-r--r--sysdeps/ia64/fpu/e_scalb.S630
-rw-r--r--sysdeps/ia64/fpu/e_scalbf.S630
-rw-r--r--sysdeps/ia64/fpu/e_scalbl.S630
-rw-r--r--sysdeps/ia64/fpu/e_sinh.S1
-rw-r--r--sysdeps/ia64/fpu/e_sinhf.S1
-rw-r--r--sysdeps/ia64/fpu/e_sinhl.S1
-rw-r--r--sysdeps/ia64/fpu/e_sqrt.S1
-rw-r--r--sysdeps/ia64/fpu/e_sqrtf.S1
-rw-r--r--sysdeps/ia64/fpu/e_sqrtl.S1
-rw-r--r--sysdeps/ia64/fpu/gen_import_file_list14
-rw-r--r--sysdeps/ia64/fpu/import_file.awk19
-rw-r--r--sysdeps/ia64/fpu/import_intel_libm70
-rw-r--r--sysdeps/ia64/fpu/libm_error.c1392
-rw-r--r--sysdeps/ia64/fpu/libm_lgamma.S80
-rw-r--r--sysdeps/ia64/fpu/libm_lgammaf.S23
-rw-r--r--sysdeps/ia64/fpu/libm_lgammal.S1
-rw-r--r--sysdeps/ia64/fpu/libm_scalblnf.S384
-rw-r--r--sysdeps/ia64/fpu/libm_sincos.S164
-rw-r--r--sysdeps/ia64/fpu/libm_sincos_large.S3
-rw-r--r--sysdeps/ia64/fpu/libm_sincosf.S24
-rw-r--r--sysdeps/ia64/fpu/libm_sincosl.S51
-rw-r--r--sysdeps/ia64/fpu/libm_support.h1008
-rw-r--r--sysdeps/ia64/fpu/s_asinh.S1
-rw-r--r--sysdeps/ia64/fpu/s_asinhl.S1
-rw-r--r--sysdeps/ia64/fpu/s_atanf.S1
-rw-r--r--sysdeps/ia64/fpu/s_atanl.S2
-rw-r--r--sysdeps/ia64/fpu/s_cbrt.S2
-rw-r--r--sysdeps/ia64/fpu/s_cbrtf.S3
-rw-r--r--sysdeps/ia64/fpu/s_cbrtl.S1660
-rw-r--r--sysdeps/ia64/fpu/s_cos.S147
-rw-r--r--sysdeps/ia64/fpu/s_cosf.S2
-rw-r--r--sysdeps/ia64/fpu/s_cosl.S46
-rw-r--r--sysdeps/ia64/fpu/s_erf.S1
-rw-r--r--sysdeps/ia64/fpu/s_erfc.S1
-rw-r--r--sysdeps/ia64/fpu/s_erfcf.S1
-rw-r--r--sysdeps/ia64/fpu/s_erfcl.S1
-rw-r--r--sysdeps/ia64/fpu/s_erfl.S1
-rw-r--r--sysdeps/ia64/fpu/s_expm1.S1
-rw-r--r--sysdeps/ia64/fpu/s_expm1f.S1
-rw-r--r--sysdeps/ia64/fpu/s_expm1l.S19
-rw-r--r--sysdeps/ia64/fpu/s_fdim.S1
-rw-r--r--sysdeps/ia64/fpu/s_fdimf.S1
-rw-r--r--sysdeps/ia64/fpu/s_fdiml.S1
-rw-r--r--sysdeps/ia64/fpu/s_frexp.c3
-rw-r--r--sysdeps/ia64/fpu/s_frexpf.c3
-rw-r--r--sysdeps/ia64/fpu/s_frexpl.c3
-rw-r--r--sysdeps/ia64/fpu/s_ilogb.S1
-rw-r--r--sysdeps/ia64/fpu/s_ilogbf.S1
-rw-r--r--sysdeps/ia64/fpu/s_ilogbl.S1
-rw-r--r--sysdeps/ia64/fpu/s_ldexp.c3
-rw-r--r--sysdeps/ia64/fpu/s_ldexpf.c3
-rw-r--r--sysdeps/ia64/fpu/s_ldexpl.c3
-rw-r--r--sysdeps/ia64/fpu/s_libm_ldexp.S370
-rw-r--r--sysdeps/ia64/fpu/s_libm_ldexpf.S368
-rw-r--r--sysdeps/ia64/fpu/s_libm_ldexpl.S369
-rw-r--r--sysdeps/ia64/fpu/s_libm_scalbn.S369
-rw-r--r--sysdeps/ia64/fpu/s_libm_scalbnf.S369
-rw-r--r--sysdeps/ia64/fpu/s_libm_scalbnl.S369
-rw-r--r--sysdeps/ia64/fpu/s_log1p.S1
-rw-r--r--sysdeps/ia64/fpu/s_log1pf.S2
-rw-r--r--sysdeps/ia64/fpu/s_log1pl.S1
-rw-r--r--sysdeps/ia64/fpu/s_logb.S1
-rw-r--r--sysdeps/ia64/fpu/s_logbf.S1
-rw-r--r--sysdeps/ia64/fpu/s_logbl.S1
-rw-r--r--sysdeps/ia64/fpu/s_nearbyint.S229
-rw-r--r--sysdeps/ia64/fpu/s_nearbyintf.S227
-rw-r--r--sysdeps/ia64/fpu/s_nearbyintl.S226
-rw-r--r--sysdeps/ia64/fpu/s_nextafter.S271
-rw-r--r--sysdeps/ia64/fpu/s_nextafterf.S274
-rw-r--r--sysdeps/ia64/fpu/s_nextafterl.S266
-rw-r--r--sysdeps/ia64/fpu/s_nexttoward.S270
-rw-r--r--sysdeps/ia64/fpu/s_nexttowardf.S276
-rw-r--r--sysdeps/ia64/fpu/s_nexttowardl.S266
-rw-r--r--sysdeps/ia64/fpu/s_round.S68
-rw-r--r--sysdeps/ia64/fpu/s_roundf.S68
-rw-r--r--sysdeps/ia64/fpu/s_roundl.S68
-rw-r--r--sysdeps/ia64/fpu/s_scalblnf.c3
-rw-r--r--sysdeps/ia64/fpu/s_scalbn.c3
-rw-r--r--sysdeps/ia64/fpu/s_scalbnf.c3
-rw-r--r--sysdeps/ia64/fpu/s_scalbnl.c3
-rw-r--r--sysdeps/ia64/fpu/s_tan.S2
-rw-r--r--sysdeps/ia64/fpu/s_tanf.S2
-rw-r--r--sysdeps/ia64/fpu/s_tanh.S1
-rw-r--r--sysdeps/ia64/fpu/s_tanhl.S1
-rw-r--r--sysdeps/ia64/fpu/s_tanl.S54
-rw-r--r--sysdeps/ia64/fpu/w_lgamma.c2
-rw-r--r--sysdeps/ia64/fpu/w_lgammaf.c2
-rw-r--r--sysdeps/ia64/fpu/w_lgammal.c2
-rw-r--r--sysdeps/ia64/fpu/w_tgamma.S1
-rw-r--r--sysdeps/ia64/fpu/w_tgammaf.S10
-rw-r--r--sysdeps/ia64/fpu/w_tgammal.S1
-rw-r--r--sysdeps/unix/sysv/linux/x86_64/clone.S8
-rw-r--r--sysdeps/unix/sysv/linux/x86_64/getcontext.S16
-rw-r--r--sysdeps/unix/sysv/linux/x86_64/setcontext.S12
-rw-r--r--sysdeps/unix/sysv/linux/x86_64/swapcontext.S10
-rw-r--r--sysdeps/unix/sysv/linux/x86_64/sysdep.h12
-rw-r--r--sysdeps/unix/x86_64/sysdep.S4
-rw-r--r--sysdeps/x86_64/bsd-_setjmp.S5
-rw-r--r--sysdeps/x86_64/bsd-setjmp.S5
-rw-r--r--sysdeps/x86_64/dl-machine.h2
-rw-r--r--sysdeps/x86_64/elf/start.S4
-rw-r--r--sysdeps/x86_64/memset.S2
-rw-r--r--sysdeps/x86_64/strchr.S4
-rw-r--r--sysdeps/x86_64/strcmp.S4
-rw-r--r--sysdeps/x86_64/strcspn.S9
-rw-r--r--sysdeps/x86_64/strspn.S7
-rw-r--r--sysdeps/x86_64/strtok.S8
154 files changed, 7702 insertions, 6435 deletions
diff --git a/sysdeps/ia64/fpu/Makefile b/sysdeps/ia64/fpu/Makefile
index 7ec30c4..384fc83 100644
--- a/sysdeps/ia64/fpu/Makefile
+++ b/sysdeps/ia64/fpu/Makefile
@@ -27,7 +27,8 @@ sysdep_routines += libc_libm_error libm_frexp libm_frexpf libm_frexpl \
$(duplicated-routines)
sysdep-CPPFLAGS += -include libm-symbols.h \
- -D__POSIX__ \
+ -D__POSIX__ -Dopensource \
-D_LIB_VERSIONIMF=_LIB_VERSION \
- -DSIZE_INT_32 -DSIZE_LONG_INT_64 -DSIZE_LONG_LONG_INT_64
+ -DSIZE_INT_32 -DSIZE_LONG_INT_64 -DSIZE_LONG_LONG_INT_64 \
+ -DSIZE_LONG_64 -DIA64
endif
diff --git a/sysdeps/ia64/fpu/e_acos.S b/sysdeps/ia64/fpu/e_acos.S
index b515f01..c2b31ab 100644
--- a/sysdeps/ia64/fpu/e_acos.S
+++ b/sysdeps/ia64/fpu/e_acos.S
@@ -824,6 +824,7 @@ acos_abs_gt_1:
GLOBAL_LIBM_END(acos)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_acosf.S b/sysdeps/ia64/fpu/e_acosf.S
index 417f5b7..68b0b2e 100644
--- a/sysdeps/ia64/fpu/e_acosf.S
+++ b/sysdeps/ia64/fpu/e_acosf.S
@@ -601,6 +601,7 @@ ACOSF_ABS_ONE:
GLOBAL_LIBM_END(acosf)
+
// Stack operations when calling error support.
// (1) (2)
// sp -> + psp -> +
diff --git a/sysdeps/ia64/fpu/e_acosh.S b/sysdeps/ia64/fpu/e_acosh.S
index 675d5fe..b55a6ab 100644
--- a/sysdeps/ia64/fpu/e_acosh.S
+++ b/sysdeps/ia64/fpu/e_acosh.S
@@ -1139,6 +1139,7 @@ ACOSH_LESS_ONE:
GLOBAL_LIBM_END(acosh)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
diff --git a/sysdeps/ia64/fpu/e_acoshf.S b/sysdeps/ia64/fpu/e_acoshf.S
index 4a54c26..58ef5f2 100644
--- a/sysdeps/ia64/fpu/e_acoshf.S
+++ b/sysdeps/ia64/fpu/e_acoshf.S
@@ -968,6 +968,7 @@ ACOSH_LESS_ONE:
GLOBAL_LIBM_END(acoshf)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
diff --git a/sysdeps/ia64/fpu/e_acoshl.S b/sysdeps/ia64/fpu/e_acoshl.S
index 85282d1..5eb2b34 100644
--- a/sysdeps/ia64/fpu/e_acoshl.S
+++ b/sysdeps/ia64/fpu/e_acoshl.S
@@ -1650,6 +1650,7 @@ acoshl_lt_pone:
GLOBAL_LIBM_END(acoshl)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_acosl.S b/sysdeps/ia64/fpu/e_acosl.S
index daa75b1..4fd345b 100644
--- a/sysdeps/ia64/fpu/e_acosl.S
+++ b/sysdeps/ia64/fpu/e_acosl.S
@@ -35,7 +35,7 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http: //www.intel.com/software/products/opensource/libraries/num.htm.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
@@ -2482,6 +2482,7 @@ acosl_SPECIAL_CASES:
GLOBAL_LIBM_END(acosl)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
// (1)
diff --git a/sysdeps/ia64/fpu/e_asin.S b/sysdeps/ia64/fpu/e_asin.S
index 398079e..f995c59 100644
--- a/sysdeps/ia64/fpu/e_asin.S
+++ b/sysdeps/ia64/fpu/e_asin.S
@@ -800,6 +800,7 @@ asin_abs_gt_1:
GLOBAL_LIBM_END(asin)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_asinf.S b/sysdeps/ia64/fpu/e_asinf.S
index f9a1312..af24165 100644
--- a/sysdeps/ia64/fpu/e_asinf.S
+++ b/sysdeps/ia64/fpu/e_asinf.S
@@ -583,6 +583,7 @@ ASINF_ABS_ONE:
;;
GLOBAL_LIBM_END(asinf)
+
// Stack operations when calling error support.
// (1) (2)
// sp -> + psp -> +
diff --git a/sysdeps/ia64/fpu/e_asinl.S b/sysdeps/ia64/fpu/e_asinl.S
index bf5feba..ad65a73 100644
--- a/sysdeps/ia64/fpu/e_asinl.S
+++ b/sysdeps/ia64/fpu/e_asinl.S
@@ -35,7 +35,7 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http: //www.intel.com/software/products/opensource/libraries/num.htm.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
@@ -2459,6 +2459,7 @@ SMALL_S:
GLOBAL_LIBM_END(asinl)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
// (1)
diff --git a/sysdeps/ia64/fpu/e_atan2.S b/sysdeps/ia64/fpu/e_atan2.S
index 8be7c6c..7a17fbf 100644
--- a/sysdeps/ia64/fpu/e_atan2.S
+++ b/sysdeps/ia64/fpu/e_atan2.S
@@ -52,6 +52,7 @@
// 08/20/02 Corrected inexact flag and directed rounding symmetry bugs
// 02/06/03 Reordered header: .section, .global, .proc, .align
// 04/17/03 Added missing mutex directive
+// 12/23/03 atan2(NaN1,NaN2) now QNaN1, for consistency with atan2f, atan2l
//
// API
//==============================================================
@@ -142,7 +143,7 @@
// -0 -0 -pi
//
// Nan anything quiet Y
-// anything NaN quiet X
+// Not NaN NaN quiet X
// atan2(+-0/+-0) sets double error tag to 37
@@ -388,7 +389,7 @@ GLOBAL_IEEE754_ENTRY(atan2)
}
{ .mfb
ldfe atan2_P21 = [EXP_AD_P2],16
-(p10) fma.d.s0 f8 = atan2_Y,atan2_X,f0 // If y=nan, result quietized y
+(p10) fma.d.s0 f8 = atan2_X,atan2_Y,f0 // If y=nan, result quietized y
(p10) br.ret.spnt b0 // Exit if y=nan
;;
}
@@ -985,6 +986,7 @@ ATAN2_ERROR:
}
GLOBAL_IEEE754_END(atan2)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
// (1)
diff --git a/sysdeps/ia64/fpu/e_atan2f.S b/sysdeps/ia64/fpu/e_atan2f.S
index c483a7a..67618f0 100644
--- a/sysdeps/ia64/fpu/e_atan2f.S
+++ b/sysdeps/ia64/fpu/e_atan2f.S
@@ -827,6 +827,7 @@ ATAN2F_XY_INF_NAN_ZERO:
GLOBAL_IEEE754_END(atan2f)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
mov GR_Parameter_TAG = 38
diff --git a/sysdeps/ia64/fpu/e_atanh.S b/sysdeps/ia64/fpu/e_atanh.S
index 7ddc3e3..5ae96dc 100644
--- a/sysdeps/ia64/fpu/e_atanh.S
+++ b/sysdeps/ia64/fpu/e_atanh.S
@@ -1008,6 +1008,7 @@ atanh_ge_one:
GLOBAL_LIBM_END(atanh)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
diff --git a/sysdeps/ia64/fpu/e_atanhf.S b/sysdeps/ia64/fpu/e_atanhf.S
index 3675c5f..1ec1408 100644
--- a/sysdeps/ia64/fpu/e_atanhf.S
+++ b/sysdeps/ia64/fpu/e_atanhf.S
@@ -782,6 +782,7 @@ atanhf_ge_one:
GLOBAL_LIBM_END(atanhf)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
diff --git a/sysdeps/ia64/fpu/e_atanhl.S b/sysdeps/ia64/fpu/e_atanhl.S
index 8266bd5..cee1ba1 100644
--- a/sysdeps/ia64/fpu/e_atanhl.S
+++ b/sysdeps/ia64/fpu/e_atanhl.S
@@ -1101,6 +1101,7 @@ atanhl_gt_one:
};;
GLOBAL_LIBM_END(atanhl)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_cosh.S b/sysdeps/ia64/fpu/e_cosh.S
index 0c6c5b4..38bd80e 100644
--- a/sysdeps/ia64/fpu/e_cosh.S
+++ b/sysdeps/ia64/fpu/e_cosh.S
@@ -811,6 +811,7 @@ COSH_UNORM:
GLOBAL_IEEE754_END(cosh)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_coshf.S b/sysdeps/ia64/fpu/e_coshf.S
index 91846e4..6d30064 100644
--- a/sysdeps/ia64/fpu/e_coshf.S
+++ b/sysdeps/ia64/fpu/e_coshf.S
@@ -652,6 +652,7 @@ COSH_UNORM:
GLOBAL_IEEE754_END(coshf)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_coshl.S b/sysdeps/ia64/fpu/e_coshl.S
index cef8be0..b5872d0 100644
--- a/sysdeps/ia64/fpu/e_coshl.S
+++ b/sysdeps/ia64/fpu/e_coshl.S
@@ -1033,6 +1033,7 @@ COSH_HUGE:
GLOBAL_IEEE754_END(coshl)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
diff --git a/sysdeps/ia64/fpu/e_exp.S b/sysdeps/ia64/fpu/e_exp.S
index 5ae8afe..d22fd18 100644
--- a/sysdeps/ia64/fpu/e_exp.S
+++ b/sysdeps/ia64/fpu/e_exp.S
@@ -1,7 +1,7 @@
.file "exp.s"
-// Copyright (c) 2000 - 2002, Intel Corporation
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@@ -52,6 +52,7 @@
// 05/20/02 Cleaned up namespace and sf0 syntax
// 09/07/02 Force inexact flag
// 11/15/02 Split underflow path into zero/nonzero; eliminated fma in main path
+// 05/30/03 Set inexact flag on unmasked overflow/underflow
// API
//==============================================================
@@ -602,7 +603,7 @@ EXP_CERTAIN_OVERFLOW:
}
{ .mfb
mov GR_Parameter_TAG = 14
- fma.d.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
+ fma.d.s0 FR_RESULT = fTmp, fTmp, fTmp // Set I,O and +INF result
br.cond.sptk __libm_error_region
}
;;
@@ -685,6 +686,13 @@ EXP_CERTAIN_UNDERFLOW:
}
;;
+{ .mfi
+ nop.m 0
+ fmerge.se fTmp = fTmp, fLn2_by_128_lo // Small with signif lsb 1
+ nop.i 0
+}
+;;
+
{ .mfb
nop.m 0
fma.d.s0 f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result
@@ -730,6 +738,7 @@ EXP_UNDERFLOW_ZERO:
GLOBAL_IEEE754_END(exp)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_exp10.S b/sysdeps/ia64/fpu/e_exp10.S
index 1cc5bef..6bfc218 100644
--- a/sysdeps/ia64/fpu/e_exp10.S
+++ b/sysdeps/ia64/fpu/e_exp10.S
@@ -1,7 +1,7 @@
.file "exp10.s"
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@@ -35,7 +35,7 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http: //www.intel.com/software/products/opensource/libraries/num.htm.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
@@ -43,6 +43,7 @@
// 05/20/02 Cleaned up namespace and sf0 syntax
// 09/06/02 Improved performance; no inexact flags on exact cases
// 01/29/03 Added missing } to bundle templates
+// 12/16/04 Call error handling on underflow.
//
// API
//==============================================================
@@ -81,8 +82,8 @@
// Registers used
//==============================================================
// r2-r3, r14-r40
-// f6-f15, f32-f51
-// p6-p9, p12
+// f6-f15, f32-f52
+// p6-p12
//
@@ -104,6 +105,7 @@ GR_EXPMAX = r24
GR_BIAS53 = r25
GR_ROUNDVAL = r26
+GR_SNORM_LIMIT = r26
GR_MASK = r27
GR_KF0 = r28
GR_MASK_low = r29
@@ -161,6 +163,7 @@ FR_E = f49
FR_exact_limit = f50
FR_int_x = f51
+FR_SNORM_LIMIT = f52
// Data tables
@@ -256,8 +259,12 @@ GLOBAL_IEEE754_ENTRY(exp10)
}
;;
-{.mib
+{.mlx
ldfe FR_LOG2_10= [ GR_COEFF_START ], 16 // load log2(10)*2^(10-63)
+ movl GR_SNORM_LIMIT= 0xc0733a7146f72a41 // Smallest normal threshold
+}
+{.mib
+ nop.m 0
nop.i 0
(p12) br.cond.spnt SPECIAL_exp10 // Branch if nan, inf, zero
}
@@ -284,7 +291,7 @@ GLOBAL_IEEE754_ENTRY(exp10)
;;
{.mfi
- nop.m 0
+ setf.d FR_SNORM_LIMIT= GR_SNORM_LIMIT // Set smallest normal limit
fma.s1 FR_L2_10_high= FR_LOG2_10, FR_2P53, f0 // FR_LOG2_10= log2(10)_hi
nop.i 0
}
@@ -390,6 +397,13 @@ GLOBAL_IEEE754_ENTRY(exp10)
{.mfi
nop.m 0
+ fcmp.ge.s1 p11,p0= f8, FR_SNORM_LIMIT // Test x for normal range
+ nop.i 0
+}
+;;
+
+{.mfi
+ nop.m 0
fma.s1 FR_E= FR_E0, FR_COEFF1, f0 // E= C_1*e
nop.i 0
}
@@ -431,10 +445,17 @@ GLOBAL_IEEE754_ENTRY(exp10)
{.mfb
nop.m 0
(p9) fma.d.s1 f8= FR_P, FR_T, FR_T // result= T+T*P, exact use s1
- br.ret.sptk b0 // return
+ (p11) br.ret.sptk b0 // return, if result normal
}
;;
+// Here if result in denormal range (and not zero)
+{.mib
+ nop.m 0
+ mov GR_Parameter_TAG= 265
+ br.cond.sptk __libm_error_region // Branch to error handling
+}
+;;
SPECIAL_exp10:
{.mfi
@@ -487,53 +508,35 @@ SPECIAL_exp10:
OUT_RANGE_exp10:
+// underflow: p6= 1
// overflow: p8= 1
-{.mii
+.pred.rel "mutex",p6,p8
+{.mmi
(p8) mov GR_EXPMAX= 0x1fffe
- nop.i 0
- nop.i 0
-}
-;;
-
-
-{.mmb
- (p8) mov GR_Parameter_TAG= 166
- (p8) setf.exp FR_R= GR_EXPMAX
- nop.b 999
-}
-;;
-
-{.mfi
- nop.m 999
- (p8) fma.d.s0 f8= FR_R, FR_R, f0 // Create overflow
- nop.i 999
-}
-// underflow: p6= 1
-{.mii
- nop.m 0
(p6) mov GR_EXPMAX= 1
nop.i 0
}
;;
-{.mmb
- nop.m 0
- (p6) setf.exp FR_R= GR_EXPMAX
- nop.b 999
+{.mii
+ setf.exp FR_R= GR_EXPMAX
+ (p8) mov GR_Parameter_TAG= 166
+ (p6) mov GR_Parameter_TAG= 265
}
;;
{.mfb
- nop.m 999
- (p6) fma.d.s0 f8= FR_R, FR_R, f0 // Create underflow
- (p6) br.ret.sptk b0 // will not call libm_error for underflow
+ nop.m 0
+ fma.d.s0 f8= FR_R, FR_R, f0 // Create overflow/underflow
+ br.cond.sptk __libm_error_region // Branch to error handling
}
;;
GLOBAL_IEEE754_END(exp10)
weak_alias (exp10, pow10)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
diff --git a/sysdeps/ia64/fpu/e_exp10f.S b/sysdeps/ia64/fpu/e_exp10f.S
index f069b3a..46615e9 100644
--- a/sysdeps/ia64/fpu/e_exp10f.S
+++ b/sysdeps/ia64/fpu/e_exp10f.S
@@ -1,7 +1,7 @@
.file "exp10f.s"
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@@ -35,7 +35,7 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http: //www.intel.com/software/products/opensource/libraries/num.htm.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
@@ -43,6 +43,7 @@
// 05/20/02 Cleaned up namespace and sf0 syntax
// 09/06/02 Improved performance and accuracy; no inexact flags on exact cases
// 01/29/03 Added missing } to bundle templates
+// 12/16/04 Call error handling on underflow.
//
// API
//==============================================================
@@ -80,8 +81,8 @@
// Registers used
//==============================================================
// r2-r3, r14-r40
-// f6-f15, f32-f51
-// p6-p9, p12
+// f6-f15, f32-f52
+// p6-p12
//
@@ -102,6 +103,7 @@ GR_Fh_ADDR = r23
GR_EXPMAX = r24
GR_ROUNDVAL = r26
+GR_SNORM_LIMIT = r26
GR_MASK = r27
GR_KF0 = r28
GR_MASK_low = r29
@@ -153,6 +155,7 @@ FR_E = f49
FR_exact_limit = f50
FR_int_x = f51
+FR_SNORM_LIMIT = f52
// Data tables
@@ -246,8 +249,12 @@ GLOBAL_IEEE754_ENTRY(exp10f)
}
;;
-{.mib
+{.mlx
ldfe FR_LOG2_10= [ GR_COEFF_START ], 16 // load log2(10)*2^(10-63)
+ movl GR_SNORM_LIMIT= 0xc217b818 // Smallest normal threshold
+}
+{.mib
+ nop.m 0
nop.i 0
(p12) br.cond.spnt SPECIAL_exp10 // Branch if nan, inf, zero
}
@@ -261,7 +268,7 @@ GLOBAL_IEEE754_ENTRY(exp10f)
;;
{.mfi
- nop.m 0
+ setf.s FR_SNORM_LIMIT= GR_SNORM_LIMIT // Set smallest normal limit
(p8) fcvt.fx.s1 FR_int_x = f8 // Convert x to integer
nop.i 0
}
@@ -335,7 +342,7 @@ GLOBAL_IEEE754_ENTRY(exp10f)
{.mfb
ldf8 FR_T_high= [ GR_Fh_ADDR ] // load T_high= 2^{f_high}
- nop.f 0
+ fcmp.ge.s1 p11, p0= f8, FR_SNORM_LIMIT // Test x for normal range
(p12) br.cond.spnt OUT_RANGE_exp10
}
;;
@@ -390,10 +397,17 @@ GLOBAL_IEEE754_ENTRY(exp10f)
{.mfb
nop.m 0
(p9) fma.s.s1 f8= FR_P, FR_T, FR_T // result= T+T*P, exact use s1
- br.ret.sptk b0 // return
+ (p11) br.ret.sptk b0 // return, if result normal
}
;;
+// Here if result in denormal range (and not zero)
+{.mib
+ nop.m 0
+ mov GR_Parameter_TAG= 266
+ br.cond.sptk __libm_error_region // Branch to error handling
+}
+;;
SPECIAL_exp10:
{.mfi
@@ -446,53 +460,35 @@ SPECIAL_exp10:
OUT_RANGE_exp10:
+// underflow: p6= 1
// overflow: p8= 1
-{.mii
+.pred.rel "mutex",p6,p8
+{.mmi
(p8) mov GR_EXPMAX= 0x1fffe
- nop.i 0
- nop.i 0
-}
-;;
-
-
-{.mmb
- (p8) mov GR_Parameter_TAG= 167
- (p8) setf.exp FR_R= GR_EXPMAX
- nop.b 999
-}
-;;
-
-{.mfi
- nop.m 999
- (p8) fma.s.s0 f8= FR_R, FR_R, f0 // Create overflow
- nop.i 999
-}
-// underflow: p6= 1
-{.mii
- nop.m 0
(p6) mov GR_EXPMAX= 1
nop.i 0
}
;;
-{.mmb
- nop.m 0
- (p6) setf.exp FR_R= GR_EXPMAX
- nop.b 999
+{.mii
+ setf.exp FR_R= GR_EXPMAX
+ (p8) mov GR_Parameter_TAG= 167
+ (p6) mov GR_Parameter_TAG= 266
}
;;
{.mfb
- nop.m 999
- (p6) fma.s.s0 f8= FR_R, FR_R, f0 // Create underflow
- (p6) br.ret.sptk b0 // will not call libm_error for underflow
+ nop.m 0
+ fma.s.s0 f8= FR_R, FR_R, f0 // Create overflow/underflow
+ br.cond.sptk __libm_error_region // Branch to error handling
}
;;
GLOBAL_IEEE754_END(exp10f)
weak_alias (exp10f, pow10f)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
diff --git a/sysdeps/ia64/fpu/e_exp10l.S b/sysdeps/ia64/fpu/e_exp10l.S
index 1b47258..a2e84b3 100644
--- a/sysdeps/ia64/fpu/e_exp10l.S
+++ b/sysdeps/ia64/fpu/e_exp10l.S
@@ -1,7 +1,7 @@
.file "exp10l.s"
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@@ -44,6 +44,7 @@
// 02/06/03 Reordered header: .section, .global, .proc, .align
// 05/08/03 Reformatted assembly source; corrected overflow result for round to
// -inf and round to zero; exact results now don't set inexact flag
+// 12/16/04 Call error handling on underflow.
//
// API
//==============================================================
@@ -79,9 +80,9 @@
// Registers used
//==============================================================
-// f6-f15, f32-f62
+// f6-f15, f32-f63
// r14-r30, r32-r40
-// p6-p8, p12-p14
+// p6-p8, p11-p14
//
@@ -129,6 +130,7 @@
FR_4 = f60
FR_28 = f61
FR_32 = f62
+ FR_SNORM_LIMIT = f63
GR_ADDR0 = r14
@@ -178,6 +180,7 @@ LOCAL_OBJECT_START(poly_coeffs)
data8 0x3f55d87fe78a6731 // C_5
data8 0x3f2430912f86c787 // C_6
data8 0x9257edfe9b5fb698, 0x00003fbf // log2(10)_low (bits 64...127)
+ data8 0x9a1bc98027a81918, 0x0000c00b // Smallest normal threshold
LOCAL_OBJECT_END(poly_coeffs)
@@ -435,7 +438,7 @@ GLOBAL_IEEE754_ENTRY(exp10l)
{.mmf
// GR_D_ADDR = pointer to D table
- add GR_D_ADDR = 2048-64+96+16, GR_ADDR0
+ add GR_D_ADDR = 2048-64+96+32, GR_ADDR0
// load C_3, C_4
ldfpd FR_COEFF3, FR_COEFF4 = [ GR_ADDR0 ], 16
// y = x*log2(10)*2^8
@@ -471,7 +474,8 @@ GLOBAL_IEEE754_ENTRY(exp10l)
}
{.mfi
- nop.m 0
+ // load smallest normal limit
+ ldfe FR_SNORM_LIMIT = [ GR_ADDR0 ], 16
// x>overflow threshold ?
fcmp.gt.s1 p12, p7 = f8, FR_OF_TEST
nop.i 0 ;;
@@ -598,6 +602,13 @@ GLOBAL_IEEE754_ENTRY(exp10l)
{.mfi
nop.m 0
+ // test if x >= smallest normal limit
+ fcmp.ge.s1 p11, p0 = f8, FR_SNORM_LIMIT
+ nop.i 0 ;;
+}
+
+{.mfi
+ nop.m 0
// P36 = P34+r2*P56
fma.s1 FR_COEFF4 = FR_COEFF5, FR_COEFF3, FR_COEFF4
nop.i 0
@@ -646,9 +657,16 @@ GLOBAL_IEEE754_ENTRY(exp10l)
// result = T+T*P
(p14) fma.s0 f8 = FR_COEFF3, FR_UF_TEST, FR_UF_TEST
// return
- br.ret.sptk b0 ;;
+ (p11) br.ret.sptk b0 ;; // return, if result normal
}
+// Here if result in denormal range (and not zero)
+{.mib
+ nop.m 0
+ mov GR_Parameter_TAG= 264
+ br.cond.sptk __libm_error_region // Branch to error handling
+}
+;;
SPECIAL_EXP10:
@@ -703,47 +721,35 @@ SPECIAL_EXP10:
OUT_RANGE_EXP10:
-{.mii
- // overflow: p8 = 1
+// underflow: p6 = 1
+// overflow: p8 = 1
+
+.pred.rel "mutex",p6,p8
+{.mmi
(p8) mov GR_CONST1 = 0x1fffe
+ (p6) mov GR_CONST1 = 1
nop.i 0
- nop.i 0 ;;
}
+;;
-{.mmb
- (p8) mov GR_Parameter_TAG = 165
- (p8) setf.exp FR_KF0 = GR_CONST1
- nop.b 999 ;;
-}
-
-{.mfi
- nop.m 999
- (p8) fma.s0 f8 = FR_KF0, FR_KF0, f0
- nop.i 999
-}
{.mii
- nop.m 0
- // underflow: p6 = 1
- (p6) mov GR_CONST1 = 1
- nop.i 0 ;;
-}
-
-{.mmb
- nop.m 0
- (p6) setf.exp FR_KF0 = GR_CONST1
- nop.b 999 ;;
+ setf.exp FR_KF0 = GR_CONST1
+ (p8) mov GR_Parameter_TAG = 165
+ (p6) mov GR_Parameter_TAG = 264
}
+;;
{.mfb
nop.m 999
- (p6) fma.s0 f8 = FR_KF0, FR_KF0, f0
- // will not call libm_error for underflow
- (p6) br.ret.sptk b0 ;;
+ fma.s0 f8 = FR_KF0, FR_KF0, f0 // Create overflow/underflow
+ br.cond.sptk __libm_error_region // Branch to error handling
}
+;;
GLOBAL_IEEE754_END(exp10l)
weak_alias (exp10l, pow10l)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{.mfi
diff --git a/sysdeps/ia64/fpu/e_exp2.S b/sysdeps/ia64/fpu/e_exp2.S
index e4a1dad..46fca2d 100644
--- a/sysdeps/ia64/fpu/e_exp2.S
+++ b/sysdeps/ia64/fpu/e_exp2.S
@@ -35,7 +35,7 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http: //www.intel.com/software/products/opensource/libraries/num.htm.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
@@ -495,6 +495,7 @@ OUT_RANGE_exp2:
GLOBAL_LIBM_END(exp2)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
diff --git a/sysdeps/ia64/fpu/e_exp2f.S b/sysdeps/ia64/fpu/e_exp2f.S
index f785b70..8ee600c 100644
--- a/sysdeps/ia64/fpu/e_exp2f.S
+++ b/sysdeps/ia64/fpu/e_exp2f.S
@@ -35,7 +35,7 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http: //www.intel.com/software/products/opensource/libraries/num.htm.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
@@ -470,6 +470,7 @@ OUT_RANGE_exp2:
GLOBAL_LIBM_END(exp2f)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
diff --git a/sysdeps/ia64/fpu/e_exp2l.S b/sysdeps/ia64/fpu/e_exp2l.S
index 6e2a62a..743ed35 100644
--- a/sysdeps/ia64/fpu/e_exp2l.S
+++ b/sysdeps/ia64/fpu/e_exp2l.S
@@ -747,6 +747,7 @@ OUT_RANGE_exp2l:
GLOBAL_LIBM_END(exp2l)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{.mfi
diff --git a/sysdeps/ia64/fpu/e_expf.S b/sysdeps/ia64/fpu/e_expf.S
index 8d620b6..3dc0ba9 100644
--- a/sysdeps/ia64/fpu/e_expf.S
+++ b/sysdeps/ia64/fpu/e_expf.S
@@ -1,7 +1,7 @@
.file "expf.s"
-// Copyright (c) 2000 - 2002, Intel Corporation
+// Copyright (c) 2000 - 2003, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@@ -52,6 +52,7 @@
// 09/26/02 support of higher precision inputs added, underflow threshold
// corrected
// 11/15/02 Improved performance on Itanium 2, added possible over/under paths
+// 05/30/03 Set inexact flag on unmasked overflow/underflow
//
//
// API
@@ -521,7 +522,7 @@ EXP_CERTAIN_OVERFLOW:
}
{ .mfb
mov GR_Parameter_TAG = 16
- fma.s.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
+ fma.s.s0 FR_RESULT = fTmp, fTmp, fTmp // Set I,O and +INF result
br.cond.sptk __libm_error_region
}
;;
@@ -604,6 +605,13 @@ EXP_CERTAIN_UNDERFLOW:
}
;;
+{ .mfi
+ nop.m 0
+ fmerge.se fTmp = fTmp, f64DivLn2 // Small with non-trial signif
+ nop.i 0
+}
+;;
+
{ .mfb
nop.m 0
fma.s.s0 f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result
@@ -649,6 +657,7 @@ EXP_UNDERFLOW_ZERO:
GLOBAL_IEEE754_END(expf)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_fmod.S b/sysdeps/ia64/fpu/e_fmod.S
index d801e0c..dbd0a29 100644
--- a/sysdeps/ia64/fpu/e_fmod.S
+++ b/sysdeps/ia64/fpu/e_fmod.S
@@ -499,6 +499,7 @@ FMOD_Y_ZERO:
}
GLOBAL_IEEE754_END(fmod)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_fmodf.S b/sysdeps/ia64/fpu/e_fmodf.S
index fe1ec03..36e5807 100644
--- a/sysdeps/ia64/fpu/e_fmodf.S
+++ b/sysdeps/ia64/fpu/e_fmodf.S
@@ -514,6 +514,7 @@ EXP_ERROR_RETURN:
}
GLOBAL_IEEE754_END(fmodf)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_fmodl.S b/sysdeps/ia64/fpu/e_fmodl.S
index da08ae3..3e87eb0 100644
--- a/sysdeps/ia64/fpu/e_fmodl.S
+++ b/sysdeps/ia64/fpu/e_fmodl.S
@@ -1,7 +1,7 @@
.file "fmodl.s"
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@@ -43,56 +43,88 @@
// 03/02/00 New Algorithm
// 04/04/00 Unwind support added
// 08/15/00 Bundle added after call to __libm_error_support to properly
-// set [the previously overwritten] GR_Parameter_RESULT.
+// set [ the previously overwritten ] GR_Parameter_RESULT.
// 11/28/00 Set FR_Y to f9
-// 03/11/02 Fixed flags for fmodl(qnan,zero)
+// 03/11/02 Fixed flags for fmodl(qnan, zero)
// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
-// 04/28/03 Fix: fmod(sNaN,0) no longer sets errno
+// 02/10/03 Reordered header:.section,.global,.proc,.align
+// 04/28/03 Fix: fmod(sNaN, 0) no longer sets errno
+// 11/23/04 Reformatted routine and improved speed
//
// API
//====================================================================
-// long double fmodl(long double,long double);
+// long double fmodl(long double, long double);
//
// Overview of operation
//====================================================================
-// fmod(a,b)=a-i*b,
-// where i is an integer such that, if b!=0,
-// |i|<|a/b| and |a/b-i|<1
+// fmod(a, b)= a-i*b,
+// where i is an integer such that, if b!= 0,
+// |i|<|a/b| and |a/b-i|<1
//
// Algorithm
//====================================================================
// a). if |a|<|b|, return a
// b). get quotient and reciprocal overestimates accurate to
-// 33 bits (q2,y2)
+// 33 bits (q2, y2)
// c). if the exponent difference (exponent(a)-exponent(b))
-// is less than 32, truncate quotient to integer and
-// finish in one iteration
-// d). if exponent(a)-exponent(b)>=32 (q2>=2^32)
-// round quotient estimate to single precision (k=RN(q2)),
-// calculate partial remainder (a'=a-k*b),
-// get quotient estimate (a'*y2), and repeat from c).
+// is less than 32, truncate quotient to integer and
+// finish in one iteration
+// d). if exponent(a)-exponent(b)>= 32 (q2>= 2^32)
+// round quotient estimate to single precision (k= RN(q2)),
+// calculate partial remainder (a'= a-k*b),
+// get quotient estimate (a'*y2), and repeat from c).
//
// Registers used
//====================================================================
-// Predicate registers: p6-p11
-// General registers: r2,r29,r32 (ar.pfs), r33-r39
-// Floating point registers: f6-f15
-
-GR_SAVE_B0 = r33
-GR_SAVE_PFS = r34
-GR_SAVE_GP = r35
-GR_SAVE_SP = r36
-
-GR_Parameter_X = r37
-GR_Parameter_Y = r38
-GR_Parameter_RESULT = r39
-GR_Parameter_TAG = r40
-
-FR_X = f10
-FR_Y = f9
-FR_RESULT = f8
+GR_SMALLBIASEXP = r2
+GR_2P32 = r3
+GR_SMALLBIASEXP = r20
+GR_ROUNDCONST = r21
+GR_SIG_B = r22
+GR_ARPFS = r23
+GR_TMP1 = r24
+GR_TMP2 = r25
+GR_TMP3 = r26
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f10
+FR_Y = f9
+FR_RESULT = f8
+
+FR_ABS_A = f6
+FR_ABS_B = f7
+FR_Y_INV = f10
+FR_SMALLBIAS = f11
+FR_E0 = f12
+FR_Q = f13
+FR_E1 = f14
+FR_2P32 = f15
+FR_TMPX = f32
+FR_TMPY = f33
+FR_ROUNDCONST = f34
+FR_QINT = f35
+FR_QRND24 = f36
+FR_NORM_B = f37
+FR_TMP = f38
+FR_TMP2 = f39
+FR_DFLAG = f40
+FR_Y_INV0 = f41
+FR_Y_INV1 = f42
+FR_Q0 = f43
+FR_Q1 = f44
+FR_QINT_Z = f45
+FR_QREM = f46
+FR_B_SGN_A = f47
.section .text
GLOBAL_IEEE754_ENTRY(fmodl)
@@ -101,495 +133,540 @@ GLOBAL_IEEE754_ENTRY(fmodl)
// result in f8
{ .mfi
- alloc r32=ar.pfs,1,4,4,0
- // f6=|a|
- fmerge.s f6=f0,f8
- mov r2 = 0x0ffdd
+ getf.sig GR_SIG_B = f9
+ // FR_ABS_A = |a|
+ fmerge.s FR_ABS_A = f0, f8
+ mov GR_SMALLBIASEXP = 0x0ffdd
}
- {.mfi
- getf.sig r29=f9
- // f7=|b|
- fmerge.s f7=f0,f9
- nop.i 0;;
+{ .mfi
+ nop.m 0
+ // FR_ABS_B = |b|
+ fmerge.s FR_ABS_B = f0, f9
+ nop.i 0
}
+;;
{ .mfi
- setf.exp f11 = r2
- // (1) y0
- frcpa.s1 f10,p6=f6,f7
- nop.i 0;;
+ setf.exp FR_SMALLBIAS = GR_SMALLBIASEXP
+ // (1) y0
+ frcpa.s1 FR_Y_INV0, p6 = FR_ABS_A, FR_ABS_B
+ nop.i 0
+}
+;;
+
+{ .mlx
+ nop.m 0
+ movl GR_ROUNDCONST = 0x33a00000
}
+;;
// eliminate special cases
-{.mmi
-nop.m 0
-nop.m 0
-// y pseudo-zero ?
-cmp.eq p7,p10=r29,r0;;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ // y pseudo-zero ?
+ cmp.eq p7, p10 = GR_SIG_B, r0
}
+;;
-// Y +-NAN, +-inf, +-0? p7
+// set p7 if b +/-NAN, +/-inf, +/-0
{ .mfi
- nop.m 999
-(p10) fclass.m p7,p10 = f9, 0xe7
- nop.i 999;;
+ nop.m 0
+ (p10) fclass.m p7, p10 = f9, 0xe7
+ nop.i 0
}
+;;
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 0 11
-// e 3
-// X +-NAN, +-inf, ? p9
-
{ .mfi
- nop.m 999
- fclass.m.unc p9,p11 = f8, 0xe3
- nop.i 999
+ mov GR_2P32 = 0x1001f
+ // (2) q0 = a*y0
+ (p6) fma.s1 FR_Q0 = FR_ABS_A, FR_Y_INV0, f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (3) e0 = 1 - b * y0
+ (p6) fnma.s1 FR_E0 = FR_ABS_B, FR_Y_INV0, f1
+ nop.i 0
}
+;;
-// |x| < |y|? Return x p8
+// set p9 if a +/-NAN, +/-inf
+{ .mfi
+ nop.m 0
+ fclass.m.unc p9, p11 = f8, 0xe3
+ nop.i 0
+}
+ // |a| < |b|? Return a, p8=1
{ .mfi
- nop.m 999
-(p10) fcmp.lt.unc.s1 p8,p0 = f6,f7
- nop.i 999 ;;
+ nop.m 0
+ (p10) fcmp.lt.unc.s1 p8, p0 = FR_ABS_A, FR_ABS_B
+ nop.i 0
}
+;;
- { .mfi
- mov r2=0x1001f
- // (2) q0=a*y0
- (p6) fma.s1 f13=f6,f10,f0
- nop.i 0
-} { .mfi
- nop.m 0
- // (3) e0 = 1 - b * y0
- (p6) fnma.s1 f12=f7,f10,f1
- nop.i 0;;
+// set p7 if b +/-NAN, +/-inf, +/-0
+{ .mfi
+ nop.m 0
+ // pseudo-NaN ?
+ (p10) fclass.nm p7, p0 = f9, 0xff
+ nop.i 0
}
+;;
-// Y +-NAN, +-inf, +-0? p7
+// set p9 if a is +/-NaN, +/-Inf
+{ .mfi
+ nop.m 0
+ (p11) fclass.nm p9, p0 = f8, 0xff
+ nop.i 0
+}
{ .mfi
- nop.m 999
- // pseudo-NaN ?
-(p10) fclass.nm p7,p0 = f9, 0xff
- nop.i 999
+ nop.m 0
+ // b denormal ? set D flag (if |a|<|b|)
+ (p8) fnma.s0 FR_DFLAG = f9, f1, f9
+ nop.i 0
}
+;;
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 0 11
-// e 3
-// X +-NAN, +-inf, ? p9
+{ .mfi
+ // FR_2P32 = 2^32
+ setf.exp FR_2P32 = GR_2P32
+ // (4) q1 = q0+e0*q0
+ (p6) fma.s1 FR_Q1 = FR_E0, FR_Q0, FR_Q0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (5) e1 = e0 * e0 + 2^-34
+ (p6) fma.s1 FR_E1 = FR_E0, FR_E0, FR_SMALLBIAS
+ nop.i 0
+}
+;;
{ .mfi
- nop.m 999
-(p11) fclass.nm p9,p0 = f8, 0xff
- nop.i 999;;
+ nop.m 0
+ // normalize a (if |a|<|b|)
+ (p8) fma.s0 f8 = f8, f1, f0
+ nop.i 0
+}
+{ .bbb
+ (p9) br.cond.spnt FMOD_A_NAN_INF
+ (p7) br.cond.spnt FMOD_B_NAN_INF_ZERO
+ // if |a|<|b|, return
+ (p8) br.ret.spnt b0
}
+;;
+
{ .mfi
- nop.m 0
- // y denormal ? set D flag (if |x|<|y|)
- (p8) fnma.s0 f10=f9,f1,f9
- nop.i 0;;
+ nop.m 0
+ // (6) y1 = y0 + e0 * y0
+ (p6) fma.s1 FR_Y_INV1 = FR_E0, FR_Y_INV0, FR_Y_INV0
+ nop.i 0
}
+;;
+{ .mfi
+ nop.m 0
+ // a denormal ? set D flag
+ // b denormal ? set D flag
+ fcmp.eq.s0 p12,p0 = FR_ABS_A, FR_ABS_B
+ nop.i 0
+}
+{ .mfi
+ // set FR_ROUNDCONST = 1.25*2^{-24}
+ setf.s FR_ROUNDCONST = GR_ROUNDCONST
+ // (7) q2 = q1+e1*q1
+ (p6) fma.s1 FR_Q = FR_Q1, FR_E1, FR_Q1
+ nop.i 0
+}
+;;
-{.mfi
- nop.m 0
- // normalize x (if |x|<|y|)
- (p8) fma.s0 f8=f8,f1,f0
- nop.i 0
+{ .mfi
+ nop.m 0
+ fmerge.s FR_B_SGN_A = f8, f9
+ nop.i 0
}
-{.bbb
- (p9) br.cond.spnt FMOD_X_NAN_INF
- (p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO
- // if |x|<|y|, return
- (p8) br.ret.spnt b0;;
+{ .mfi
+ nop.m 0
+ // (8) y2 = y1 + e1 * y1
+ (p6) fma.s1 FR_Y_INV = FR_E1, FR_Y_INV1, FR_Y_INV1
+ // set p6 = 0, p10 = 0
+ cmp.ne.and p6, p10 = r0, r0
}
+;;
- {.mfi
- nop.m 0
- // x denormal ? set D flag
- fnma.s0 f32=f6,f1,f6
- nop.i 0
+// will compute integer quotient bits (24 bits per iteration)
+.align 32
+loop64:
+{ .mfi
+ nop.m 0
+ // compare q2, 2^32
+ fcmp.lt.unc.s1 p8, p7 = FR_Q, FR_2P32
+ nop.i 0
}
-{.mfi
- nop.m 0
- // y denormal ? set D flag
- fnma.s0 f33=f7,f1,f7
- nop.i 0;;
+{ .mfi
+ nop.m 0
+ // will truncate quotient to integer, if exponent<32 (in advance)
+ fcvt.fx.trunc.s1 FR_QINT = FR_Q
+ nop.i 0
}
+;;
- {.mfi
- // f15=2^32
- setf.exp f15=r2
- // (4) q1=q0+e0*q0
- (p6) fma.s1 f13=f12,f13,f13
- nop.i 0
+{ .mfi
+ nop.m 0
+ // if exponent>32 round quotient to single precision (perform in advance)
+ fma.s.s1 FR_QRND24 = FR_Q, f1, f0
+ nop.i 0
}
+;;
+
{ .mfi
- nop.m 0
- // (5) e1 = e0 * e0 + 2^-34
- (p6) fma.s1 f14=f12,f12,f11
- nop.i 0;;
+ nop.m 0
+ // set FR_ROUNDCONST = sgn(a)
+ (p8) fmerge.s FR_ROUNDCONST = f8, f1
+ nop.i 0
}
-{.mlx
- nop.m 0
- movl r2=0x33a00000;;
+{ .mfi
+ nop.m 0
+ // normalize truncated quotient
+ (p8) fcvt.xf FR_QRND24 = FR_QINT
+ nop.i 0
}
+;;
+
{ .mfi
- nop.m 0
- // (6) y1 = y0 + e0 * y0
- (p6) fma.s1 f10=f12,f10,f10
- nop.i 0;;
+ nop.m 0
+ // calculate remainder (assuming FR_QRND24 = RZ(Q))
+ (p7) fnma.s1 FR_E1 = FR_QRND24, FR_ABS_B, FR_ABS_A
+ nop.i 0
}
-{.mfi
- // set f12=1.25*2^{-24}
- setf.s f12=r2
- // (7) q2=q1+e1*q1
- (p6) fma.s1 f13=f13,f14,f13
- nop.i 0;;
+{ .mfi
+ nop.m 0
+ // also if exponent>32, round quotient to single precision
+ // and subtract 1 ulp: q = q-q*(1.25*2^{-24})
+ (p7) fnma.s.s1 FR_QINT_Z = FR_QRND24, FR_ROUNDCONST, FR_QRND24
+ nop.i 0
}
-{.mfi
- nop.m 0
- fmerge.s f9=f8,f9
- nop.i 0
+;;
+
+{ .mfi
+ nop.m 0
+ // (p8) calculate remainder (82-bit format)
+ (p8) fnma.s1 FR_QREM = FR_QRND24, FR_ABS_B, FR_ABS_A
+ nop.i 0
}
{ .mfi
- nop.m 0
- // (8) y2 = y1 + e1 * y1
- (p6) fma.s1 f10=f14,f10,f10
- // set p6=0, p10=0
- cmp.ne.and p6,p10=r0,r0;;
+ nop.m 0
+ // (p7) calculate remainder (assuming FR_QINT_Z = RZ(Q))
+ (p7) fnma.s1 FR_ABS_A = FR_QINT_Z, FR_ABS_B, FR_ABS_A
+ nop.i 0
}
+;;
+{ .mfi
+ nop.m 0
+ // Final iteration (p8): is FR_ABS_A the correct remainder
+ // (quotient was not overestimated) ?
+ (p8) fcmp.lt.unc.s1 p6, p10 = FR_QREM, f0
+ nop.i 0
+}
+;;
-.align 32
-loop64:
- {.mfi
- nop.m 0
- // compare q2, 2^32
- fcmp.lt.unc.s1 p8,p7=f13,f15
- nop.i 0
-}
- {.mfi
- nop.m 0
- // will truncate quotient to integer, if exponent<32 (in advance)
- fcvt.fx.trunc.s1 f11=f13
- nop.i 0;;
-}
- {.mfi
- nop.m 0
- // if exponent>32, round quotient to single precision (perform in advance)
- fma.s.s1 f13=f13,f1,f0
- nop.i 0;;
-}
-
-
- {.mfi
- nop.m 0
- // set f12=sgn(a)
- (p8) fmerge.s f12=f8,f1
- nop.i 0
-}
- {.mfi
- nop.m 0
- // normalize truncated quotient
- (p8) fcvt.xf f13=f11
- nop.i 0;;
-}
- { .mfi
- nop.m 0
- // calculate remainder (assuming f13=RZ(Q))
- (p7) fnma.s1 f14=f13,f7,f6
- nop.i 0
-}
- {.mfi
- nop.m 0
- // also if exponent>32, round quotient to single precision
- // and subtract 1 ulp: q=q-q*(1.25*2^{-24})
- (p7) fnma.s.s1 f11=f13,f12,f13
- nop.i 0;;
-}
-
- {.mfi
- nop.m 0
- // (p8) calculate remainder (82-bit format)
- (p8) fnma.s1 f11=f13,f7,f6
- nop.i 0
-}
- {.mfi
- nop.m 0
- // (p7) calculate remainder (assuming f11=RZ(Q))
- (p7) fnma.s1 f6=f11,f7,f6
- nop.i 0;;
-}
-
-
- {.mfi
- nop.m 0
- // Final iteration (p8): is f6 the correct remainder (quotient was not overestimated) ?
- (p8) fcmp.lt.unc.s1 p6,p10=f11,f0
- nop.i 0;;
-}
- {.mfi
- nop.m 0
- // get new quotient estimation: a'*y2
- (p7) fma.s1 f13=f14,f10,f0
- nop.i 0
-}
- {.mfb
- nop.m 0
- // was f13=RZ(Q) ? (then new remainder f14>=0)
- (p7) fcmp.lt.unc.s1 p7,p9=f14,f0
- nop.b 0;;
-}
-
-
-.pred.rel "mutex",p6,p10
- {.mfb
- nop.m 0
- // add b to estimated remainder (to cover the case when the quotient was overestimated)
- // also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
- (p6) fma.s0 f8=f11,f12,f9
- nop.b 0
-}
- {.mfb
- nop.m 0
- // set correct sign of result before returning: f12=sgn(a)
- (p10) fma.s0 f8=f11,f12,f0
- (p8) br.ret.sptk b0;;
-}
- {.mfi
- nop.m 0
- // if f13!=RZ(Q), get alternative quotient estimation: a''*y2
- (p7) fma.s1 f13=f6,f10,f0
- nop.i 0
-}
- {.mfb
- nop.m 0
- // if f14 was RZ(Q), set remainder to f14
- (p9) mov f6=f14
- br.cond.sptk loop64;;
+{ .mfi
+ nop.m 0
+ // get new quotient estimation: a'*y2
+ (p7) fma.s1 FR_Q = FR_E1, FR_Y_INV, f0
+ nop.i 0
}
+{ .mfb
+ nop.m 0
+ // was FR_Q = RZ(Q) ? (then new remainder FR_E1> = 0)
+ (p7) fcmp.lt.unc.s1 p7, p9 = FR_E1, f0
+ nop.b 0
+}
+;;
+.pred.rel "mutex", p6, p10
+{ .mfb
+ nop.m 0
+ // add b to estimated remainder (to cover the case when the quotient was
+ // overestimated)
+ // also set correct sign by using
+ // FR_B_SGN_A = |b|*sgn(a), FR_ROUNDCONST = sgn(a)
+ (p6) fma.s0 f8 = FR_QREM, FR_ROUNDCONST, FR_B_SGN_A
+ nop.b 0
+}
+{ .mfb
+ nop.m 0
+ // set correct sign of result before returning: FR_ROUNDCONST = sgn(a)
+ (p10) fma.s0 f8 = FR_QREM, FR_ROUNDCONST, f0
+ (p8) br.ret.sptk b0
+}
+;;
+{ .mfi
+ nop.m 0
+ // if f13! = RZ(Q), get alternative quotient estimation: a''*y2
+ (p7) fma.s1 FR_Q = FR_ABS_A, FR_Y_INV, f0
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ // if FR_E1 was RZ(Q), set remainder to FR_E1
+ (p9) fma.s1 FR_ABS_A = FR_E1, f1, f0
+ br.cond.sptk loop64
+}
+;;
-FMOD_X_NAN_INF:
+FMOD_A_NAN_INF:
-// Y zero ?
-{.mfi
- nop.m 0
- fclass.m p10,p0=f8,0xc3 // Test x=nan
- nop.i 0
+// b zero ?
+{ .mfi
+ nop.m 0
+ fclass.m p10, p0 = f8, 0xc3 // Test a = nan
+ nop.i 0
}
-{.mfi
- nop.m 0
- fma.s1 f10=f9,f1,f0
- nop.i 0;;
+{ .mfi
+ nop.m 0
+ fma.s1 FR_NORM_B = f9, f1, f0
+ nop.i 0
}
+;;
-{.mfi
- nop.m 0
- fma.s0 f8=f8,f1,f0
- nop.i 0
+{ .mfi
+ nop.m 0
+ fma.s0 f8 = f8, f1, f0
+ nop.i 0
}
-{.mfi
- nop.m 0
-(p10) fclass.m p10,p0=f9,0x07 // Test x=nan, and y=zero
- nop.i 0;;
+{ .mfi
+ nop.m 0
+ (p10) fclass.m p10, p0 = f9, 0x07 // Test x = nan, and y = zero
+ nop.i 0
}
-{.mfb
- nop.m 0
- fcmp.eq.unc.s1 p11,p0=f10,f0
-(p10) br.ret.spnt b0;; // Exit with result=x if x=nan and y=zero
+;;
+
+{ .mfb
+ nop.m 0
+ fcmp.eq.unc.s1 p11, p0 = FR_NORM_B, f0
+ (p10) br.ret.spnt b0 // Exit with result = a if a = nan and b = zero
}
-{.mib
- nop.m 0
- nop.i 0
- // if Y zero
- (p11) br.cond.spnt FMOD_Y_ZERO;;
+;;
+
+{ .mib
+ nop.m 0
+ nop.i 0
+ // if Y zero
+ (p11) br.cond.spnt FMOD_B_ZERO
}
+;;
-// X infinity? Return QNAN indefinite
+// a= infinity? Return QNAN indefinite
{ .mfi
- // set p7 t0 0
- cmp.ne p7,p0=r0,r0
- fclass.m.unc p8,p9 = f8, 0x23
- nop.i 999;;
+ // set p7 t0 0
+ cmp.ne p7, p0 = r0, r0
+ fclass.m.unc p8, p9 = f8, 0x23
+ nop.i 0
}
-// Y NaN ?
-{.mfi
- nop.m 999
-(p8) fclass.m p9,p8=f9,0xc3
- nop.i 0;;
+;;
+
+// b NaN ?
+{ .mfi
+ nop.m 0
+ (p8) fclass.m p9, p8 = f9, 0xc3
+ nop.i 0
}
-// Y not pseudo-zero ? (r29 holds significand)
-{.mii
- nop.m 999
-(p8) cmp.ne p7,p0=r29,r0
- nop.i 0;;
+;;
+
+// b not pseudo-zero ? (GR_SIG_B holds significand)
+{ .mii
+ nop.m 0
+ (p8) cmp.ne p7, p0 = GR_SIG_B, r0
+ nop.i 0
}
-{.mfi
- nop.m 999
-(p8) frcpa.s0 f8,p0 = f8,f8
- nop.i 0
+;;
+
+{ .mfi
+ nop.m 0
+ (p8) frcpa.s0 f8, p0 = f8, f8
+ nop.i 0
}
{ .mfi
- nop.m 999
- // also set Denormal flag if necessary
-(p7) fnma.s0 f9=f9,f1,f9
- nop.i 999 ;;
+ nop.m 0
+ // also set Denormal flag if necessary
+ (p7) fnma.s0 f9 = f9, f1, f9
+ nop.i 0
}
+;;
{ .mfb
- nop.m 999
-(p8) fma.s0 f8=f8,f1,f0
- nop.b 999 ;;
+ nop.m 0
+ (p8) fma.s0 f8 = f8, f1, f0
+ nop.b 0
}
+;;
{ .mfb
- nop.m 999
-(p9) frcpa.s0 f8,p7=f8,f9
- br.ret.sptk b0 ;;
+ nop.m 0
+ (p9) frcpa.s0 f8, p7 = f8, f9
+ br.ret.sptk b0
}
+;;
-
-FMOD_Y_NAN_INF_ZERO:
-// Y INF
+FMOD_B_NAN_INF_ZERO:
+// b INF
{ .mfi
- nop.m 999
- fclass.m.unc p7,p0 = f9, 0x23
- nop.i 999 ;;
+ nop.m 0
+ fclass.m.unc p7, p0 = f9, 0x23
+ nop.i 0
}
+;;
{ .mfb
- nop.m 999
-(p7) fma.s0 f8=f8,f1,f0
-(p7) br.ret.spnt b0 ;;
+ nop.m 0
+ (p7) fma.s0 f8 = f8, f1, f0
+ (p7) br.ret.spnt b0
}
+;;
-// Y NAN?
+// b NAN?
{ .mfi
- nop.m 999
- fclass.m.unc p9,p10 = f9, 0xc3
- nop.i 999 ;;
+ nop.m 0
+ fclass.m.unc p9, p10 = f9, 0xc3
+ nop.i 0
}
+;;
+
{ .mfi
- nop.m 999
-(p10) fclass.nm p9,p0 = f9, 0xff
- nop.i 999 ;;
+ nop.m 0
+ (p10) fclass.nm p9, p0 = f9, 0xff
+ nop.i 0
}
+;;
{ .mfb
- nop.m 999
-(p9) fma.s0 f8=f9,f1,f0
-(p9) br.ret.spnt b0 ;;
+ nop.m 0
+ (p9) fma.s0 f8 = f9, f1, f0
+ (p9) br.ret.spnt b0
}
+;;
-FMOD_Y_ZERO:
+FMOD_B_ZERO:
// Y zero? Must be zero at this point
// because it is the only choice left.
// Return QNAN indefinite
-{.mfi
- nop.m 0
- // set Invalid
- frcpa.s0 f12,p0=f0,f0
- nop.i 0
-}
-// X NAN?
{ .mfi
- nop.m 999
- fclass.m.unc p9,p10 = f8, 0xc3
- nop.i 999 ;;
+ nop.m 0
+ // set Invalid
+ frcpa.s0 FR_TMP, p0 = f0, f0
+ nop.i 0
}
+;;
+
+// a NAN?
{ .mfi
- nop.m 999
-(p10) fclass.nm p9,p10 = f8, 0xff
- nop.i 999 ;;
+ nop.m 0
+ fclass.m.unc p9, p10 = f8, 0xc3
+ nop.i 0
}
+;;
-{.mfi
- nop.m 999
- (p9) frcpa.s0 f11,p7=f8,f0
- nop.i 0;;
+{ .mfi
+ alloc GR_ARPFS = ar.pfs, 1, 4, 4, 0
+ (p10) fclass.nm p9, p10 = f8, 0xff
+ nop.i 0
}
-
+;;
{ .mfi
- nop.m 999
-(p10) frcpa.s0 f11,p7 = f9,f9
- mov GR_Parameter_TAG = 120 ;;
+ nop.m 0
+ (p9) frcpa.s0 FR_TMP2, p7 = f8, f0
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
- fmerge.s f10 = f8, f8
- nop.i 999
+ nop.m 0
+ (p10) frcpa.s0 FR_TMP2, p7 = f9, f9
+ mov GR_Parameter_TAG = 120
}
+;;
+{ .mfi
+ nop.m 0
+ fmerge.s FR_X = f8, f8
+ nop.i 0
+}
{ .mfb
- nop.m 999
- fma.s0 f8=f11,f1,f0
- br.sptk __libm_error_region;;
+ nop.m 0
+ fma.s0 f8 = FR_TMP2, f1, f0
+ br.sptk __libm_error_region
}
+;;
GLOBAL_IEEE754_END(fmodl)
-
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
- add GR_Parameter_Y=-32,sp // Parameter 2 value
- nop.f 0
-.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+ add GR_Parameter_Y = -32, sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs, GR_SAVE_PFS
+ mov GR_SAVE_PFS = ar.pfs // Save ar.pfs
}
{ .mfi
.fframe 64
- add sp=-64,sp // Create new stack
- nop.f 0
- mov GR_SAVE_GP=gp // Save gp
-};;
+ add sp = -64, sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP = gp // Save gp
+}
+;;
+
{ .mmi
- stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
- add GR_Parameter_X = 16,sp // Parameter 1 address
-.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0 // Save b0
-};;
+ stfe [ GR_Parameter_Y ] = FR_Y, 16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16, sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0 = b0 // Save b0
+}
+;;
+
.body
{ .mib
- stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
- nop.b 0 // Parameter 3 address
+ stfe [ GR_Parameter_X ] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0, GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
}
{ .mib
- stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
- add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support# // Call error handling function
-};;
+ stfe [ GR_Parameter_Y ] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16, GR_Parameter_Y
+ br.call.sptk b0 = __libm_error_support# // Call error handling function
+}
+;;
+
{ .mmi
- nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
-};;
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48, sp
+}
+;;
+
{ .mmi
- ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+ ldfe f8 = [ GR_Parameter_RESULT ] // Get return result off stack
.restore sp
- add sp = 64,sp // Restore stack pointer
- mov b0 = GR_SAVE_B0 // Restore return address
-};;
+ add sp = 64, sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+}
+;;
+
{ .mib
- mov gp = GR_SAVE_GP // Restore gp
- mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
- br.ret.sptk b0 // Return
-};;
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+}
+;;
LOCAL_LIBM_END(__libm_error_region)
-
-
-
-.type __libm_error_support#,@function
+.type __libm_error_support#, @function
.global __libm_error_support#
-
-
diff --git a/sysdeps/ia64/fpu/e_hypot.S b/sysdeps/ia64/fpu/e_hypot.S
index 885c819..36cfd1e 100644
--- a/sysdeps/ia64/fpu/e_hypot.S
+++ b/sysdeps/ia64/fpu/e_hypot.S
@@ -106,6 +106,7 @@ FR_RESULT = f8
LOCAL_LIBM_ENTRY(cabs)
LOCAL_LIBM_END(cabs)
+
GLOBAL_IEEE754_ENTRY(hypot)
{.mfi
@@ -384,6 +385,7 @@ GLOBAL_IEEE754_ENTRY(hypot)
(p9) br.ret.sptk b0;;
}
GLOBAL_IEEE754_END(hypot)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_hypotf.S b/sysdeps/ia64/fpu/e_hypotf.S
index 633bb67..d6fcbd1 100644
--- a/sysdeps/ia64/fpu/e_hypotf.S
+++ b/sysdeps/ia64/fpu/e_hypotf.S
@@ -106,6 +106,7 @@ FR_RESULT = f8
LOCAL_LIBM_ENTRY(cabsf)
LOCAL_LIBM_END(cabsf)
+
GLOBAL_IEEE754_ENTRY(hypotf)
{.mfi
alloc r32= ar.pfs,0,4,4,0
@@ -337,6 +338,7 @@ GLOBAL_IEEE754_ENTRY(hypotf)
(p9) br.ret.sptk b0;;
}
GLOBAL_IEEE754_END(hypotf)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mii
diff --git a/sysdeps/ia64/fpu/e_hypotl.S b/sysdeps/ia64/fpu/e_hypotl.S
index 0aa94b6..988b86e 100644
--- a/sysdeps/ia64/fpu/e_hypotl.S
+++ b/sysdeps/ia64/fpu/e_hypotl.S
@@ -105,6 +105,7 @@ FR_RESULT = f8
LOCAL_LIBM_ENTRY(cabsl)
LOCAL_LIBM_END(cabsl)
+
GLOBAL_IEEE754_ENTRY(hypotl)
{.mfi
alloc r32= ar.pfs,0,4,4,0
@@ -421,6 +422,7 @@ GLOBAL_IEEE754_ENTRY(hypotl)
(p9) br.ret.sptk b0;;
}
GLOBAL_IEEE754_END(hypotl)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_lgamma_r.c b/sysdeps/ia64/fpu/e_lgamma_r.c
index e892635..ebc90fc 100644
--- a/sysdeps/ia64/fpu/e_lgamma_r.c
+++ b/sysdeps/ia64/fpu/e_lgamma_r.c
@@ -1,5 +1,6 @@
/* file: lgamma_r.c */
+
// Copyright (c) 2002 Intel Corporation
// All rights reserved.
//
@@ -20,7 +21,6 @@
// products derived from this software without specific prior written
// permission.
-// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
diff --git a/sysdeps/ia64/fpu/e_lgammaf_r.c b/sysdeps/ia64/fpu/e_lgammaf_r.c
index e5d4d2e..4efa840 100644
--- a/sysdeps/ia64/fpu/e_lgammaf_r.c
+++ b/sysdeps/ia64/fpu/e_lgammaf_r.c
@@ -1,5 +1,6 @@
/* file: lgammaf_r.c */
+
// Copyright (c) 2002 Intel Corporation
// All rights reserved.
//
@@ -20,7 +21,6 @@
// products derived from this software without specific prior written
// permission.
-// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
diff --git a/sysdeps/ia64/fpu/e_lgammal_r.c b/sysdeps/ia64/fpu/e_lgammal_r.c
index a2b36d6..3fbea70 100644
--- a/sysdeps/ia64/fpu/e_lgammal_r.c
+++ b/sysdeps/ia64/fpu/e_lgammal_r.c
@@ -1,5 +1,6 @@
/* file: lgammal_r.c */
+
// Copyright (c) 2002 Intel Corporation
// All rights reserved.
//
@@ -20,7 +21,6 @@
// products derived from this software without specific prior written
// permission.
-// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
diff --git a/sysdeps/ia64/fpu/e_log.S b/sysdeps/ia64/fpu/e_log.S
index f80f153..7b277f8 100644
--- a/sysdeps/ia64/fpu/e_log.S
+++ b/sysdeps/ia64/fpu/e_log.S
@@ -1386,6 +1386,7 @@ GLOBAL_IEEE754_ENTRY(log10)
};;
GLOBAL_IEEE754_END(log10)
+
GLOBAL_IEEE754_ENTRY(log)
{ .mfi
getf.exp GR_Exp = f8 // if x is unorm then must recompute
@@ -1667,6 +1668,7 @@ log_libm_err:
};;
GLOBAL_IEEE754_END(log)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_log2.S b/sysdeps/ia64/fpu/e_log2.S
index 7679357..660a952 100644
--- a/sysdeps/ia64/fpu/e_log2.S
+++ b/sysdeps/ia64/fpu/e_log2.S
@@ -655,6 +655,7 @@ SPECIAL_LOG2:
GLOBAL_LIBM_END(log2)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_log2f.S b/sysdeps/ia64/fpu/e_log2f.S
index 6de2f38..17d710a 100644
--- a/sysdeps/ia64/fpu/e_log2f.S
+++ b/sysdeps/ia64/fpu/e_log2f.S
@@ -493,6 +493,7 @@ SPECIAL_log2f:
GLOBAL_LIBM_END(log2f)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_log2l.S b/sysdeps/ia64/fpu/e_log2l.S
index 37af2f2..b3fe63f 100644
--- a/sysdeps/ia64/fpu/e_log2l.S
+++ b/sysdeps/ia64/fpu/e_log2l.S
@@ -761,6 +761,7 @@ LOG2_PSEUDO_ZERO:
GLOBAL_IEEE754_END(log2l)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_logf.S b/sysdeps/ia64/fpu/e_logf.S
index 0ca6d3f..186edab 100644
--- a/sysdeps/ia64/fpu/e_logf.S
+++ b/sysdeps/ia64/fpu/e_logf.S
@@ -841,6 +841,7 @@ GLOBAL_IEEE754_ENTRY(log10f)
br.cond.sptk logf_log10f_common
};;
GLOBAL_IEEE754_END(log10f)
+
GLOBAL_IEEE754_ENTRY(logf)
{ .mfi
getf.exp GR_Exp = f8 // if x is unorm then must recompute
@@ -1087,6 +1088,7 @@ logf_libm_err:
};;
GLOBAL_IEEE754_END(logf)
+
// Stack operations when calling error support.
// (1) (2) (3) (call) (4)
// sp -> + psp -> + psp -> + sp -> +
diff --git a/sysdeps/ia64/fpu/e_logl.S b/sysdeps/ia64/fpu/e_logl.S
index ba6b55b..3ebb20a 100644
--- a/sysdeps/ia64/fpu/e_logl.S
+++ b/sysdeps/ia64/fpu/e_logl.S
@@ -634,6 +634,7 @@ GLOBAL_IEEE754_ENTRY(logl)
GLOBAL_IEEE754_END(logl)
+
GLOBAL_IEEE754_ENTRY(log10l)
{ .mfi
alloc r32 = ar.pfs,0,21,4,0
@@ -1144,6 +1145,7 @@ LOGL_64_negative:
GLOBAL_IEEE754_END(log10l)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_pow.S b/sysdeps/ia64/fpu/e_pow.S
index 11fae53..86005f2 100644
--- a/sysdeps/ia64/fpu/e_pow.S
+++ b/sysdeps/ia64/fpu/e_pow.S
@@ -2234,6 +2234,7 @@ POW_OVER_UNDER_ERROR:
GLOBAL_LIBM_END(pow)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
diff --git a/sysdeps/ia64/fpu/e_powf.S b/sysdeps/ia64/fpu/e_powf.S
index 275843f..4c839cb 100644
--- a/sysdeps/ia64/fpu/e_powf.S
+++ b/sysdeps/ia64/fpu/e_powf.S
@@ -64,6 +64,8 @@
// 05/20/02 Cleaned up namespace and sf0 syntax
// 08/29/02 Improved Itanium 2 performance
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 10/09/03 Modified algorithm to improve performance, reduce table size, and
+// fix boundary case powf(2.0,-150.0)
//
// API
//==============================================================
@@ -106,37 +108,33 @@
//
// Log(1/Cm) = log(1/frcpa(1+m/256)) where m goes from 0 to 255.
//
-// We tabluate as two doubles, T and t, where T +t is the value itself.
+// We tabluate as one double, T for single precision power
//
-// Log(x) = (K Log(2)_hi + T) + (Log(2)_hi + t) + Log( 1 + (Bx-1))
-// Log(x) = G + delta + Log( 1 + (Bx-1))
+// Log(x) = (K Log(2)_hi + T) + (K Log(2)_lo) + Log( 1 + (Bx-1))
+// Log(x) = G + delta + Log( 1 + (Bx-1))
//
// The Log( 1 + (Bx-1)) can be calculated as a series in r = Bx-1.
//
// Log( 1 + (Bx-1)) = r - rsq/2 + p
+// where p = r^3(P0 + P1*r + P2*r^2)
//
// Then,
//
// yLog(x) = yG + y delta + y(r-rsq/2) + yp
-// yLog(x) = Z1 + e3 + Z2 + Z3 + (e2 + e3)
+// yLog(x) = Z1 + e3 + Z2 + Z3
//
//
-// exp(yLog(x)) = exp(Z1 + Z2 + Z3) exp(e1 + e2 + e3)
+// exp(yLog(x)) = exp(Z1 + Z2) exp(Z3) exp(e3)
//
//
// exp(Z3) is another series.
-// exp(e1 + e2 + e3) is approximated as f3 = 1 + (e1 + e2 + e3)
+// exp(e3) is approximated as f3 = 1 + e3
//
-// Z1 (128/log2) = number of log2/128 in Z1 is N1
-// Z2 (128/log2) = number of log2/128 in Z2 is N2
-//
-// s1 = Z1 - N1 log2/128
-// s2 = Z2 - N2 log2/128
+// exp(Z1 + Z2) = exp(Z)
+// Z (128/log2) = number of log2/128 in Z is N
//
-// s = s1 + s2
-// N = N1 + N2
+// s = Z - N log2/128
//
-// exp(Z1 + Z2) = exp(Z)
// exp(Z) = exp(s) exp(N log2/128)
//
// exp(r) = exp(Z - N log2/128)
@@ -161,13 +159,11 @@
// N log2/128 = M log2 + I2 log2/8 + I1 log2/128
//
// exp(Z) = exp(s) (1+d) exp(log(2^M) + log(2^I2/8) + log(2^I1/128))
-// exp(Z) = exp(s) (1+d1) (1+d2)(2^M) 2^I2/8 2^I1/128
-// exp(Z) = exp(s) f1 f2 (2^M) 2^I2/8 2^I1/128
+// exp(Z) = exp(s) f12 (2^M) 2^I2/8 2^I1/128
//
// I1, I2 are table indices. Use a series for exp(s).
// Then get exp(Z)
//
-// exp(yLog(x)) = exp(Z1 + Z2 + Z3) exp(e1 + e2 + e3)
// exp(yLog(x)) = exp(Z) exp(Z3) f3
// exp(yLog(x)) = exp(Z)f3 exp(Z3)
// exp(yLog(x)) = A exp(Z3)
@@ -331,6 +327,8 @@
// +------------+----------------+-+
// | 13 bits | 50 bits | |
// +------------+----------------+-+
+//
+// Note: For powf only the table of T is needed
// Special Cases
@@ -402,10 +400,17 @@
// integer registers used
+pow_GR_exp_half = r10
+pow_GR_signexp_Xm1 = r11
+pow_GR_tmp = r11
+
pow_GR_signexp_X = r14
pow_GR_17ones = r15
+pow_GR_Fpsr = r15
pow_AD_P = r16
+pow_GR_rcs0_mask = r16
pow_GR_exp_2tom8 = r17
+pow_GR_rcs0 = r17
pow_GR_sig_X = r18
pow_GR_10033 = r19
pow_GR_16ones = r20
@@ -423,9 +428,6 @@ pow_GR_offset = r29
pow_GR_exp_Xm1 = r30
pow_GR_xneg_yodd = r31
-pow_GR_signexp_Xm1 = r35
-pow_GR_int_W1 = r36
-pow_GR_int_W2 = r37
pow_GR_int_N = r38
pow_GR_index1 = r39
pow_GR_index2 = r40
@@ -465,24 +467,20 @@ POW_B = f32
POW_NORM_X = f33
POW_Xm1 = f34
POW_r1 = f34
-POW_P4 = f35
-POW_P5 = f36
POW_NORM_Y = f37
POW_Q2 = f38
-POW_Q3 = f39
+POW_eps = f39
POW_P2 = f40
-POW_P3 = f41
POW_P0 = f42
POW_log2_lo = f43
POW_r = f44
POW_Q0_half = f45
-POW_Q1 = f46
POW_tmp = f47
POW_log2_hi = f48
-POW_Q4 = f49
+POW_Q1 = f49
POW_P1 = f50
POW_log2_by_128_hi = f51
@@ -491,54 +489,33 @@ POW_rsq = f53
POW_Yrcub = f54
POW_log2_by_128_lo = f55
-POW_v6 = f56
POW_xsq = f57
-POW_v4 = f58
POW_v2 = f59
POW_T = f60
-POW_Tt = f61
POW_RSHF = f62
-POW_v21ps = f63
-POW_s4 = f64
+POW_v210 = f63
POW_twoV = f65
POW_U = f66
POW_G = f67
POW_delta = f68
-POW_v3 = f69
POW_V = f70
POW_p = f71
-POW_Z1 = f72
+POW_Z = f72
POW_e3 = f73
-POW_e2 = f74
POW_Z2 = f75
-POW_e1 = f76
POW_W1 = f77
-POW_UmZ2 = f78
-POW_W2 = f79
POW_Z3 = f80
-POW_int_W1 = f81
-POW_e12 = f82
-POW_int_W2 = f83
-POW_UmZ2pV = f84
POW_Z3sq = f85
-POW_e123 = f86
-POW_N1float = f87
-POW_N2float = f88
+POW_Nfloat = f87
POW_f3 = f89
POW_q = f90
-POW_s1 = f91
-POW_Nfloat = f92
-POW_s2 = f93
-POW_f2 = f94
-POW_f1 = f95
-
POW_T1 = f96
POW_T2 = f97
POW_2M = f98
@@ -575,25 +552,18 @@ RODATA
.align 16
LOCAL_OBJECT_START(pow_table_P)
-data8 0x8000F7B249FF332D, 0x0000BFFC // P_5
-data8 0xAAAAAAA9E7902C7F, 0x0000BFFC // P_3
data8 0x80000000000018E5, 0x0000BFFD // P_1
data8 0xb8aa3b295c17f0bc, 0x00004006 // inv_ln2_by_128
//
//
data8 0x3FA5555555554A9E // Q_2
-data8 0x3F8111124F4DD9F9 // Q_3
-data8 0x3FE0000000000000 // Q_0
+data8 0x0000000000000000 // Pad
data8 0x3FC5555555554733 // Q_1
-data8 0x3F56C16D9360FFA0 // Q_4
data8 0x43e8000000000000 // Right shift constant for exp
data8 0xc9e3b39803f2f6af, 0x00003fb7 // ln2_by_128_lo
-data8 0x0000000000000000 // pad to eliminate bank conflicts with pow_table_Q
-data8 0x0000000000000000 // pad to eliminate bank conflicts with pow_table_Q
LOCAL_OBJECT_END(pow_table_P)
LOCAL_OBJECT_START(pow_table_Q)
-data8 0x9249FE7F0DC423CF, 0x00003FFC // P_4
data8 0xCCCCCCCC4ED2BA7F, 0x00003FFC // P_2
data8 0xAAAAAAAAAAAAB505, 0x00003FFD // P_0
data8 0x3fe62e42fefa39e8, 0x3cccd5e4f1d9cc02 // log2 hi lo = +6.93147e-001
@@ -602,262 +572,262 @@ LOCAL_OBJECT_END(pow_table_Q)
LOCAL_OBJECT_START(pow_Tt)
-data8 0x3f60040155d58800, 0x3c93bce0ce3ddd81 // log(1/frcpa(1+0/256))= +1.95503e-003
-data8 0x3f78121214586a00, 0x3cb540e0a5cfc9bc // log(1/frcpa(1+1/256))= +5.87661e-003
-data8 0x3f841929f9683200, 0x3cbdf1d57404da1f // log(1/frcpa(1+2/256))= +9.81362e-003
-data8 0x3f8c317384c75f00, 0x3c69806208c04c22 // log(1/frcpa(1+3/256))= +1.37662e-002
-data8 0x3f91a6b91ac73380, 0x3c7874daa716eb32 // log(1/frcpa(1+4/256))= +1.72376e-002
-data8 0x3f95ba9a5d9ac000, 0x3cacbb84e08d78ac // log(1/frcpa(1+5/256))= +2.12196e-002
-data8 0x3f99d2a807432580, 0x3cbcf80538b441e1 // log(1/frcpa(1+6/256))= +2.52177e-002
-data8 0x3f9d6b2725979800, 0x3c6095e5c8f8f359 // log(1/frcpa(1+7/256))= +2.87291e-002
-data8 0x3fa0c58fa19dfa80, 0x3cb4c5d4e9d0dda2 // log(1/frcpa(1+8/256))= +3.27573e-002
-data8 0x3fa2954c78cbce00, 0x3caa932b860ab8d6 // log(1/frcpa(1+9/256))= +3.62953e-002
-data8 0x3fa4a94d2da96c40, 0x3ca670452b76bbd5 // log(1/frcpa(1+10/256))= +4.03542e-002
-data8 0x3fa67c94f2d4bb40, 0x3ca84104f9941798 // log(1/frcpa(1+11/256))= +4.39192e-002
-data8 0x3fa85188b630f040, 0x3cb40a882cbf0153 // log(1/frcpa(1+12/256))= +4.74971e-002
-data8 0x3faa6b8abe73af40, 0x3c988d46e25c9059 // log(1/frcpa(1+13/256))= +5.16017e-002
-data8 0x3fac441e06f72a80, 0x3cae3e930a1a2a96 // log(1/frcpa(1+14/256))= +5.52072e-002
-data8 0x3fae1e6713606d00, 0x3c8a796f6283b580 // log(1/frcpa(1+15/256))= +5.88257e-002
-data8 0x3faffa6911ab9300, 0x3c5193070351e88a // log(1/frcpa(1+16/256))= +6.24574e-002
-data8 0x3fb0ec139c5da600, 0x3c623f2a75eb992d // log(1/frcpa(1+17/256))= +6.61022e-002
-data8 0x3fb1dbd2643d1900, 0x3ca649b2ef8927f0 // log(1/frcpa(1+18/256))= +6.97605e-002
-data8 0x3fb2cc7284fe5f00, 0x3cbc5e86599513e2 // log(1/frcpa(1+19/256))= +7.34321e-002
-data8 0x3fb3bdf5a7d1ee60, 0x3c90bd4bb69dada3 // log(1/frcpa(1+20/256))= +7.71173e-002
-data8 0x3fb4b05d7aa012e0, 0x3c54e377c9b8a54f // log(1/frcpa(1+21/256))= +8.08161e-002
-data8 0x3fb580db7ceb5700, 0x3c7fdb2f98354cde // log(1/frcpa(1+22/256))= +8.39975e-002
-data8 0x3fb674f089365a60, 0x3cb9994c9d3301c1 // log(1/frcpa(1+23/256))= +8.77219e-002
-data8 0x3fb769ef2c6b5680, 0x3caaec639db52a79 // log(1/frcpa(1+24/256))= +9.14602e-002
-data8 0x3fb85fd927506a40, 0x3c9f9f99a3cf8e25 // log(1/frcpa(1+25/256))= +9.52125e-002
-data8 0x3fb9335e5d594980, 0x3ca15c3abd47d99a // log(1/frcpa(1+26/256))= +9.84401e-002
-data8 0x3fba2b0220c8e5e0, 0x3cb4ca639adf6fc3 // log(1/frcpa(1+27/256))= +1.02219e-001
-data8 0x3fbb0004ac1a86a0, 0x3ca7cb81bf959a59 // log(1/frcpa(1+28/256))= +1.05469e-001
-data8 0x3fbbf968769fca00, 0x3cb0c646c121418e // log(1/frcpa(1+29/256))= +1.09274e-001
-data8 0x3fbccfedbfee13a0, 0x3ca0465fce24ab4b // log(1/frcpa(1+30/256))= +1.12548e-001
-data8 0x3fbda727638446a0, 0x3c82803f4e2e6603 // log(1/frcpa(1+31/256))= +1.15832e-001
-data8 0x3fbea3257fe10f60, 0x3cb986a3f2313d1a // log(1/frcpa(1+32/256))= +1.19677e-001
-data8 0x3fbf7be9fedbfde0, 0x3c97d16a6a621cf4 // log(1/frcpa(1+33/256))= +1.22985e-001
-data8 0x3fc02ab352ff25f0, 0x3c9cc6baad365600 // log(1/frcpa(1+34/256))= +1.26303e-001
-data8 0x3fc097ce579d2040, 0x3cb9ba16d329440b // log(1/frcpa(1+35/256))= +1.29633e-001
-data8 0x3fc1178e8227e470, 0x3cb7bc671683f8e6 // log(1/frcpa(1+36/256))= +1.33531e-001
-data8 0x3fc185747dbecf30, 0x3c9d1116f66d2345 // log(1/frcpa(1+37/256))= +1.36885e-001
-data8 0x3fc1f3b925f25d40, 0x3c8162c9ef939ac6 // log(1/frcpa(1+38/256))= +1.40250e-001
-data8 0x3fc2625d1e6ddf50, 0x3caad3a1ec384fc3 // log(1/frcpa(1+39/256))= +1.43627e-001
-data8 0x3fc2d1610c868130, 0x3cb3ad997036941b // log(1/frcpa(1+40/256))= +1.47015e-001
-data8 0x3fc340c597411420, 0x3cbc2308262c7998 // log(1/frcpa(1+41/256))= +1.50414e-001
-data8 0x3fc3b08b6757f2a0, 0x3cb2170d6cdf0526 // log(1/frcpa(1+42/256))= +1.53825e-001
-data8 0x3fc40dfb08378000, 0x3c9bb453c4f7b685 // log(1/frcpa(1+43/256))= +1.56677e-001
-data8 0x3fc47e74e8ca5f70, 0x3cb836a48fdfce9d // log(1/frcpa(1+44/256))= +1.60109e-001
-data8 0x3fc4ef51f6466de0, 0x3ca07a43919aa64b // log(1/frcpa(1+45/256))= +1.63553e-001
-data8 0x3fc56092e02ba510, 0x3ca85006899d97b0 // log(1/frcpa(1+46/256))= +1.67010e-001
-data8 0x3fc5d23857cd74d0, 0x3ca30a5ba6e7abbe // log(1/frcpa(1+47/256))= +1.70478e-001
-data8 0x3fc6313a37335d70, 0x3ca905586f0ac97e // log(1/frcpa(1+48/256))= +1.73377e-001
-data8 0x3fc6a399dabbd380, 0x3c9b2c6657a96684 // log(1/frcpa(1+49/256))= +1.76868e-001
-data8 0x3fc70337dd3ce410, 0x3cb50bc52f55cdd8 // log(1/frcpa(1+50/256))= +1.79786e-001
-data8 0x3fc77654128f6120, 0x3cad2eb7c9a39efe // log(1/frcpa(1+51/256))= +1.83299e-001
-data8 0x3fc7e9d82a0b0220, 0x3cba127e90393c01 // log(1/frcpa(1+52/256))= +1.86824e-001
-data8 0x3fc84a6b759f5120, 0x3cbd7fd52079f706 // log(1/frcpa(1+53/256))= +1.89771e-001
-data8 0x3fc8ab47d5f5a300, 0x3cbfae141751a3de // log(1/frcpa(1+54/256))= +1.92727e-001
-data8 0x3fc91fe490965810, 0x3cb69cf30a1c319e // log(1/frcpa(1+55/256))= +1.96286e-001
-data8 0x3fc981634011aa70, 0x3ca5bb3d208bc42a // log(1/frcpa(1+56/256))= +1.99261e-001
-data8 0x3fc9f6c407089660, 0x3ca04d68658179a0 // log(1/frcpa(1+57/256))= +2.02843e-001
-data8 0x3fca58e729348f40, 0x3c99f5411546c286 // log(1/frcpa(1+58/256))= +2.05838e-001
-data8 0x3fcabb55c31693a0, 0x3cb9a5350eb327d5 // log(1/frcpa(1+59/256))= +2.08842e-001
-data8 0x3fcb1e104919efd0, 0x3c18965fcce7c406 // log(1/frcpa(1+60/256))= +2.11855e-001
-data8 0x3fcb94ee93e367c0, 0x3cb503716da45184 // log(1/frcpa(1+61/256))= +2.15483e-001
-data8 0x3fcbf851c0675550, 0x3cbdf1b3f7ab5378 // log(1/frcpa(1+62/256))= +2.18516e-001
-data8 0x3fcc5c0254bf23a0, 0x3ca7aab9ed0b1d7b // log(1/frcpa(1+63/256))= +2.21558e-001
-data8 0x3fccc000c9db3c50, 0x3c92a7a2a850072a // log(1/frcpa(1+64/256))= +2.24609e-001
-data8 0x3fcd244d99c85670, 0x3c9f6019120edf4c // log(1/frcpa(1+65/256))= +2.27670e-001
-data8 0x3fcd88e93fb2f450, 0x3c6affb96815e081 // log(1/frcpa(1+66/256))= +2.30741e-001
-data8 0x3fcdedd437eaef00, 0x3c72553595897976 // log(1/frcpa(1+67/256))= +2.33820e-001
-data8 0x3fce530effe71010, 0x3c90913b020fa182 // log(1/frcpa(1+68/256))= +2.36910e-001
-data8 0x3fceb89a1648b970, 0x3c837ba4045bfd25 // log(1/frcpa(1+69/256))= +2.40009e-001
-data8 0x3fcf1e75fadf9bd0, 0x3cbcea6d13e0498d // log(1/frcpa(1+70/256))= +2.43117e-001
-data8 0x3fcf84a32ead7c30, 0x3ca5e3a67b3c6d77 // log(1/frcpa(1+71/256))= +2.46235e-001
-data8 0x3fcfeb2233ea07c0, 0x3cba0c6f0049c5a6 // log(1/frcpa(1+72/256))= +2.49363e-001
-data8 0x3fd028f9c7035c18, 0x3cb0a30b06677ff6 // log(1/frcpa(1+73/256))= +2.52501e-001
-data8 0x3fd05c8be0d96358, 0x3ca0f1c77ccb5865 // log(1/frcpa(1+74/256))= +2.55649e-001
-data8 0x3fd085eb8f8ae790, 0x3cbd513f45fe7a97 // log(1/frcpa(1+75/256))= +2.58174e-001
-data8 0x3fd0b9c8e32d1910, 0x3c927449047ca006 // log(1/frcpa(1+76/256))= +2.61339e-001
-data8 0x3fd0edd060b78080, 0x3c89b52d8435f53e // log(1/frcpa(1+77/256))= +2.64515e-001
-data8 0x3fd122024cf00638, 0x3cbdd976fabda4bd // log(1/frcpa(1+78/256))= +2.67701e-001
-data8 0x3fd14be2927aecd0, 0x3cb02f90ad0bc471 // log(1/frcpa(1+79/256))= +2.70257e-001
-data8 0x3fd180618ef18ad8, 0x3cbd003792c71a98 // log(1/frcpa(1+80/256))= +2.73461e-001
-data8 0x3fd1b50bbe2fc638, 0x3ca9ae64c6403ead // log(1/frcpa(1+81/256))= +2.76675e-001
-data8 0x3fd1df4cc7cf2428, 0x3cb43f0455f7e395 // log(1/frcpa(1+82/256))= +2.79254e-001
-data8 0x3fd214456d0eb8d0, 0x3cb0fbd748d75d30 // log(1/frcpa(1+83/256))= +2.82487e-001
-data8 0x3fd23ec5991eba48, 0x3c906edd746b77e2 // log(1/frcpa(1+84/256))= +2.85081e-001
-data8 0x3fd2740d9f870af8, 0x3ca9802e6a00a670 // log(1/frcpa(1+85/256))= +2.88333e-001
-data8 0x3fd29ecdabcdfa00, 0x3cacecef70890cfa // log(1/frcpa(1+86/256))= +2.90943e-001
-data8 0x3fd2d46602adcce8, 0x3cb97911955f3521 // log(1/frcpa(1+87/256))= +2.94214e-001
-data8 0x3fd2ff66b04ea9d0, 0x3cb12dabe191d1c9 // log(1/frcpa(1+88/256))= +2.96838e-001
-data8 0x3fd335504b355a30, 0x3cbdf9139df924ec // log(1/frcpa(1+89/256))= +3.00129e-001
-data8 0x3fd360925ec44f58, 0x3cb253e68977a1e3 // log(1/frcpa(1+90/256))= +3.02769e-001
-data8 0x3fd38bf1c3337e70, 0x3cb3d283d2a2da21 // log(1/frcpa(1+91/256))= +3.05417e-001
-data8 0x3fd3c25277333180, 0x3cadaa5b035eae27 // log(1/frcpa(1+92/256))= +3.08735e-001
-data8 0x3fd3edf463c16838, 0x3cb983d680d3c108 // log(1/frcpa(1+93/256))= +3.11399e-001
-data8 0x3fd419b423d5e8c0, 0x3cbc86dd921c139d // log(1/frcpa(1+94/256))= +3.14069e-001
-data8 0x3fd44591e0539f48, 0x3c86a76d6dc2782e // log(1/frcpa(1+95/256))= +3.16746e-001
-data8 0x3fd47c9175b6f0a8, 0x3cb59a2e013c6b5f // log(1/frcpa(1+96/256))= +3.20103e-001
-data8 0x3fd4a8b341552b08, 0x3c93f1e86e468694 // log(1/frcpa(1+97/256))= +3.22797e-001
-data8 0x3fd4d4f390890198, 0x3cbf5e4ea7c5105a // log(1/frcpa(1+98/256))= +3.25498e-001
-data8 0x3fd501528da1f960, 0x3cbf58da53e9ad10 // log(1/frcpa(1+99/256))= +3.28206e-001
-data8 0x3fd52dd06347d4f0, 0x3cb98a28cebf6eef // log(1/frcpa(1+100/256))= +3.30921e-001
-data8 0x3fd55a6d3c7b8a88, 0x3c9c76b67c2d1fd4 // log(1/frcpa(1+101/256))= +3.33644e-001
-data8 0x3fd5925d2b112a58, 0x3c9029616a4331b8 // log(1/frcpa(1+102/256))= +3.37058e-001
-data8 0x3fd5bf406b543db0, 0x3c9fb8292ecfc820 // log(1/frcpa(1+103/256))= +3.39798e-001
-data8 0x3fd5ec433d5c35a8, 0x3cb71a1229d17eec // log(1/frcpa(1+104/256))= +3.42545e-001
-data8 0x3fd61965cdb02c18, 0x3cbba94fe1dbb8d2 // log(1/frcpa(1+105/256))= +3.45300e-001
-data8 0x3fd646a84935b2a0, 0x3c9ee496d2c9ae57 // log(1/frcpa(1+106/256))= +3.48063e-001
-data8 0x3fd6740add31de90, 0x3cb1da3a6c7a9dfd // log(1/frcpa(1+107/256))= +3.50833e-001
-data8 0x3fd6a18db74a58c0, 0x3cb494c257add8dc // log(1/frcpa(1+108/256))= +3.53610e-001
-data8 0x3fd6cf31058670e8, 0x3cb0b244a70a8da9 // log(1/frcpa(1+109/256))= +3.56396e-001
-data8 0x3fd6f180e852f0b8, 0x3c9db7aefa866720 // log(1/frcpa(1+110/256))= +3.58490e-001
-data8 0x3fd71f5d71b894e8, 0x3cbe91c4bf324957 // log(1/frcpa(1+111/256))= +3.61289e-001
-data8 0x3fd74d5aefd66d58, 0x3cb06b3d9bfac023 // log(1/frcpa(1+112/256))= +3.64096e-001
-data8 0x3fd77b79922bd378, 0x3cb727d8804491f4 // log(1/frcpa(1+113/256))= +3.66911e-001
-data8 0x3fd7a9b9889f19e0, 0x3ca2ef22df5bc543 // log(1/frcpa(1+114/256))= +3.69734e-001
-data8 0x3fd7d81b037eb6a0, 0x3cb8fd3ba07a7ece // log(1/frcpa(1+115/256))= +3.72565e-001
-data8 0x3fd8069e33827230, 0x3c8bd1e25866e61a // log(1/frcpa(1+116/256))= +3.75404e-001
-data8 0x3fd82996d3ef8bc8, 0x3ca5aab9f5928928 // log(1/frcpa(1+117/256))= +3.77538e-001
-data8 0x3fd85855776dcbf8, 0x3ca56f33337789d6 // log(1/frcpa(1+118/256))= +3.80391e-001
-data8 0x3fd8873658327cc8, 0x3cbb8ef0401db49d // log(1/frcpa(1+119/256))= +3.83253e-001
-data8 0x3fd8aa75973ab8c8, 0x3cbb9961f509a680 // log(1/frcpa(1+120/256))= +3.85404e-001
-data8 0x3fd8d992dc8824e0, 0x3cb220512a53732d // log(1/frcpa(1+121/256))= +3.88280e-001
-data8 0x3fd908d2ea7d9510, 0x3c985f0e513bfb5c // log(1/frcpa(1+122/256))= +3.91164e-001
-data8 0x3fd92c59e79c0e50, 0x3cb82e073fd30d63 // log(1/frcpa(1+123/256))= +3.93332e-001
-data8 0x3fd95bd750ee3ed0, 0x3ca4aa7cdb6dd8a8 // log(1/frcpa(1+124/256))= +3.96231e-001
-data8 0x3fd98b7811a3ee58, 0x3caa93a5b660893e // log(1/frcpa(1+125/256))= +3.99138e-001
-data8 0x3fd9af47f33d4068, 0x3cac294b3b3190ba // log(1/frcpa(1+126/256))= +4.01323e-001
-data8 0x3fd9df270c1914a0, 0x3cbe1a58fd0cd67e // log(1/frcpa(1+127/256))= +4.04245e-001
-data8 0x3fda0325ed14fda0, 0x3cb1efa7950fb57e // log(1/frcpa(1+128/256))= +4.06442e-001
-data8 0x3fda33440224fa78, 0x3c8915fe75e7d477 // log(1/frcpa(1+129/256))= +4.09379e-001
-data8 0x3fda57725e80c380, 0x3ca72bd1062b1b7f // log(1/frcpa(1+130/256))= +4.11587e-001
-data8 0x3fda87d0165dd198, 0x3c91f7845f58dbad // log(1/frcpa(1+131/256))= +4.14539e-001
-data8 0x3fdaac2e6c03f890, 0x3cb6f237a911c509 // log(1/frcpa(1+132/256))= +4.16759e-001
-data8 0x3fdadccc6fdf6a80, 0x3c90ddc4b7687169 // log(1/frcpa(1+133/256))= +4.19726e-001
-data8 0x3fdb015b3eb1e790, 0x3c692dd7d90e1e8e // log(1/frcpa(1+134/256))= +4.21958e-001
-data8 0x3fdb323a3a635948, 0x3c6f85655cbe14de // log(1/frcpa(1+135/256))= +4.24941e-001
-data8 0x3fdb56fa04462908, 0x3c95252d841994de // log(1/frcpa(1+136/256))= +4.27184e-001
-data8 0x3fdb881aa659bc90, 0x3caa53a745a3642f // log(1/frcpa(1+137/256))= +4.30182e-001
-data8 0x3fdbad0bef3db160, 0x3cb32f2540dcc16a // log(1/frcpa(1+138/256))= +4.32437e-001
-data8 0x3fdbd21297781c28, 0x3cbd8e891e106f1d // log(1/frcpa(1+139/256))= +4.34697e-001
-data8 0x3fdc039236f08818, 0x3c809435af522ba7 // log(1/frcpa(1+140/256))= +4.37718e-001
-data8 0x3fdc28cb1e4d32f8, 0x3cb3944752fbd81e // log(1/frcpa(1+141/256))= +4.39990e-001
-data8 0x3fdc4e19b84723c0, 0x3c9a465260cd3fe5 // log(1/frcpa(1+142/256))= +4.42267e-001
-data8 0x3fdc7ff9c74554c8, 0x3c92447d5b6ca369 // log(1/frcpa(1+143/256))= +4.45311e-001
-data8 0x3fdca57b64e9db00, 0x3cb44344a8a00c82 // log(1/frcpa(1+144/256))= +4.47600e-001
-data8 0x3fdccb130a5ceba8, 0x3cbefaddfb97b73f // log(1/frcpa(1+145/256))= +4.49895e-001
-data8 0x3fdcf0c0d18f3268, 0x3cbd3e7bfee57898 // log(1/frcpa(1+146/256))= +4.52194e-001
-data8 0x3fdd232075b5a200, 0x3c9222599987447c // log(1/frcpa(1+147/256))= +4.55269e-001
-data8 0x3fdd490246defa68, 0x3cabafe9a767a80d // log(1/frcpa(1+148/256))= +4.57581e-001
-data8 0x3fdd6efa918d25c8, 0x3cb58a2624e1c6fd // log(1/frcpa(1+149/256))= +4.59899e-001
-data8 0x3fdd9509707ae528, 0x3cbdc3babce578e7 // log(1/frcpa(1+150/256))= +4.62221e-001
-data8 0x3fddbb2efe92c550, 0x3cb0ac0943c434a4 // log(1/frcpa(1+151/256))= +4.64550e-001
-data8 0x3fddee2f3445e4a8, 0x3cbba9d07ce820e8 // log(1/frcpa(1+152/256))= +4.67663e-001
-data8 0x3fde148a1a2726c8, 0x3cb6537e3375b205 // log(1/frcpa(1+153/256))= +4.70004e-001
-data8 0x3fde3afc0a49ff38, 0x3cbfed5518dbc20e // log(1/frcpa(1+154/256))= +4.72350e-001
-data8 0x3fde6185206d5168, 0x3cb6572601f73d5c // log(1/frcpa(1+155/256))= +4.74702e-001
-data8 0x3fde882578823d50, 0x3c9b24abd4584d1a // log(1/frcpa(1+156/256))= +4.77060e-001
-data8 0x3fdeaedd2eac9908, 0x3cb0ceb5e4d2c8f7 // log(1/frcpa(1+157/256))= +4.79423e-001
-data8 0x3fded5ac5f436be0, 0x3ca72f21f1f5238e // log(1/frcpa(1+158/256))= +4.81792e-001
-data8 0x3fdefc9326d16ab8, 0x3c85081a1639a45c // log(1/frcpa(1+159/256))= +4.84166e-001
-data8 0x3fdf2391a21575f8, 0x3cbf11015bdd297a // log(1/frcpa(1+160/256))= +4.86546e-001
-data8 0x3fdf4aa7ee031928, 0x3cb3795bc052a2d1 // log(1/frcpa(1+161/256))= +4.88932e-001
-data8 0x3fdf71d627c30bb0, 0x3c35c61f0f5a88f3 // log(1/frcpa(1+162/256))= +4.91323e-001
-data8 0x3fdf991c6cb3b378, 0x3c97d99419be6028 // log(1/frcpa(1+163/256))= +4.93720e-001
-data8 0x3fdfc07ada69a908, 0x3cbfe9341ded70b1 // log(1/frcpa(1+164/256))= +4.96123e-001
-data8 0x3fdfe7f18eb03d38, 0x3cb85718a640c33f // log(1/frcpa(1+165/256))= +4.98532e-001
-data8 0x3fe007c053c5002c, 0x3cb3addc9c065f09 // log(1/frcpa(1+166/256))= +5.00946e-001
-data8 0x3fe01b942198a5a0, 0x3c9d5aa4c77da6ac // log(1/frcpa(1+167/256))= +5.03367e-001
-data8 0x3fe02f74400c64e8, 0x3cb5a0ee4450ef52 // log(1/frcpa(1+168/256))= +5.05793e-001
-data8 0x3fe04360be7603ac, 0x3c9dd00c35630fe0 // log(1/frcpa(1+169/256))= +5.08225e-001
-data8 0x3fe05759ac47fe30, 0x3cbd063e1f0bd82c // log(1/frcpa(1+170/256))= +5.10663e-001
-data8 0x3fe06b5f1911cf50, 0x3cae8da674af5289 // log(1/frcpa(1+171/256))= +5.13107e-001
-data8 0x3fe078bf0533c568, 0x3c62241edf5fd1f7 // log(1/frcpa(1+172/256))= +5.14740e-001
-data8 0x3fe08cd9687e7b0c, 0x3cb3007febcca227 // log(1/frcpa(1+173/256))= +5.17194e-001
-data8 0x3fe0a10074cf9018, 0x3ca496e84603816b // log(1/frcpa(1+174/256))= +5.19654e-001
-data8 0x3fe0b5343a234474, 0x3cb46098d14fc90a // log(1/frcpa(1+175/256))= +5.22120e-001
-data8 0x3fe0c974c89431cc, 0x3cac0a7cdcbb86c6 // log(1/frcpa(1+176/256))= +5.24592e-001
-data8 0x3fe0ddc2305b9884, 0x3cb2f753210410ff // log(1/frcpa(1+177/256))= +5.27070e-001
-data8 0x3fe0eb524bafc918, 0x3c88affd6682229e // log(1/frcpa(1+178/256))= +5.28726e-001
-data8 0x3fe0ffb54213a474, 0x3cadeefbab9af993 // log(1/frcpa(1+179/256))= +5.31214e-001
-data8 0x3fe114253da97d9c, 0x3cbaf1c2b8bc160a // log(1/frcpa(1+180/256))= +5.33709e-001
-data8 0x3fe128a24f1d9afc, 0x3cb9cf4df375e650 // log(1/frcpa(1+181/256))= +5.36210e-001
-data8 0x3fe1365252bf0864, 0x3c985a621d4be111 // log(1/frcpa(1+182/256))= +5.37881e-001
-data8 0x3fe14ae558b4a92c, 0x3ca104c4aa8977d1 // log(1/frcpa(1+183/256))= +5.40393e-001
-data8 0x3fe15f85a19c7658, 0x3cbadf26e540f375 // log(1/frcpa(1+184/256))= +5.42910e-001
-data8 0x3fe16d4d38c119f8, 0x3cb3aea11caec416 // log(1/frcpa(1+185/256))= +5.44592e-001
-data8 0x3fe18203c20dd130, 0x3cba82d1211d1d6d // log(1/frcpa(1+186/256))= +5.47121e-001
-data8 0x3fe196c7bc4b1f38, 0x3cb6267acc4f4f4a // log(1/frcpa(1+187/256))= +5.49656e-001
-data8 0x3fe1a4a738b7a33c, 0x3c858930213c987d // log(1/frcpa(1+188/256))= +5.51349e-001
-data8 0x3fe1b981c0c9653c, 0x3c9bc2a4a30f697b // log(1/frcpa(1+189/256))= +5.53895e-001
-data8 0x3fe1ce69e8bb1068, 0x3cb7ae6199cf2a00 // log(1/frcpa(1+190/256))= +5.56447e-001
-data8 0x3fe1dc619de06944, 0x3c6b50bb38388177 // log(1/frcpa(1+191/256))= +5.58152e-001
-data8 0x3fe1f160a2ad0da0, 0x3cbd05b2778a5e1d // log(1/frcpa(1+192/256))= +5.60715e-001
-data8 0x3fe2066d7740737c, 0x3cb32e828f9c6bd6 // log(1/frcpa(1+193/256))= +5.63285e-001
-data8 0x3fe2147dba47a390, 0x3cbd579851b8b672 // log(1/frcpa(1+194/256))= +5.65001e-001
-data8 0x3fe229a1bc5ebac0, 0x3cbb321be5237ce8 // log(1/frcpa(1+195/256))= +5.67582e-001
-data8 0x3fe237c1841a502c, 0x3cb3b56e0915ea64 // log(1/frcpa(1+196/256))= +5.69306e-001
-data8 0x3fe24cfce6f80d98, 0x3cb34a4d1a422919 // log(1/frcpa(1+197/256))= +5.71898e-001
-data8 0x3fe25b2c55cd5760, 0x3cb237401ea5015e // log(1/frcpa(1+198/256))= +5.73630e-001
-data8 0x3fe2707f4d5f7c40, 0x3c9d30f20acc8341 // log(1/frcpa(1+199/256))= +5.76233e-001
-data8 0x3fe285e0842ca380, 0x3cbc4d866d5f21c0 // log(1/frcpa(1+200/256))= +5.78842e-001
-data8 0x3fe294294708b770, 0x3cb85e14d5dc54fa // log(1/frcpa(1+201/256))= +5.80586e-001
-data8 0x3fe2a9a2670aff0c, 0x3c7e6f8f468bbf91 // log(1/frcpa(1+202/256))= +5.83207e-001
-data8 0x3fe2b7fb2c8d1cc0, 0x3c930ffcf63c8b65 // log(1/frcpa(1+203/256))= +5.84959e-001
-data8 0x3fe2c65a6395f5f4, 0x3ca0afe20b53d2d2 // log(1/frcpa(1+204/256))= +5.86713e-001
-data8 0x3fe2dbf557b0df40, 0x3cb646be1188fbc9 // log(1/frcpa(1+205/256))= +5.89350e-001
-data8 0x3fe2ea64c3f97654, 0x3c96516fa8df33b2 // log(1/frcpa(1+206/256))= +5.91113e-001
-data8 0x3fe3001823684d70, 0x3cb96d64e16d1360 // log(1/frcpa(1+207/256))= +5.93762e-001
-data8 0x3fe30e97e9a8b5cc, 0x3c98ef96bc97cca0 // log(1/frcpa(1+208/256))= +5.95531e-001
-data8 0x3fe32463ebdd34e8, 0x3caef1dc9a56c1bf // log(1/frcpa(1+209/256))= +5.98192e-001
-data8 0x3fe332f4314ad794, 0x3caa4f0ac5d5fa11 // log(1/frcpa(1+210/256))= +5.99970e-001
-data8 0x3fe348d90e7464cc, 0x3cbe7889f0516acd // log(1/frcpa(1+211/256))= +6.02643e-001
-data8 0x3fe35779f8c43d6c, 0x3ca96bbab7245411 // log(1/frcpa(1+212/256))= +6.04428e-001
-data8 0x3fe36621961a6a98, 0x3ca31f32262db9fb // log(1/frcpa(1+213/256))= +6.06217e-001
-data8 0x3fe37c299f3c3668, 0x3cb15c72c107ee29 // log(1/frcpa(1+214/256))= +6.08907e-001
-data8 0x3fe38ae2171976e4, 0x3cba42a2554b2dd4 // log(1/frcpa(1+215/256))= +6.10704e-001
-data8 0x3fe399a157a603e4, 0x3cb99c62286d8919 // log(1/frcpa(1+216/256))= +6.12504e-001
-data8 0x3fe3afccfe77b9d0, 0x3ca11048f96a43bd // log(1/frcpa(1+217/256))= +6.15210e-001
-data8 0x3fe3be9d503533b4, 0x3ca4022f47588c3e // log(1/frcpa(1+218/256))= +6.17018e-001
-data8 0x3fe3cd7480b4a8a0, 0x3cb4ba7afc2dc56a // log(1/frcpa(1+219/256))= +6.18830e-001
-data8 0x3fe3e3c43918f76c, 0x3c859673d064b8ba // log(1/frcpa(1+220/256))= +6.21554e-001
-data8 0x3fe3f2acb27ed6c4, 0x3cb55c6b452a16a8 // log(1/frcpa(1+221/256))= +6.23373e-001
-data8 0x3fe4019c2125ca90, 0x3cb8c367879c5a31 // log(1/frcpa(1+222/256))= +6.25197e-001
-data8 0x3fe4181061389720, 0x3cb2c17a79c5cc6c // log(1/frcpa(1+223/256))= +6.27937e-001
-data8 0x3fe42711518df544, 0x3ca5f38d47012fc5 // log(1/frcpa(1+224/256))= +6.29769e-001
-data8 0x3fe436194e12b6bc, 0x3cb9854d65a9b426 // log(1/frcpa(1+225/256))= +6.31604e-001
-data8 0x3fe445285d68ea68, 0x3ca3ff9b3a81cd81 // log(1/frcpa(1+226/256))= +6.33442e-001
-data8 0x3fe45bcc464c8938, 0x3cb0a2d8011a6c05 // log(1/frcpa(1+227/256))= +6.36206e-001
-data8 0x3fe46aed21f117fc, 0x3c8a2be41f8e9f3d // log(1/frcpa(1+228/256))= +6.38053e-001
-data8 0x3fe47a1527e8a2d0, 0x3cba4a83594fab09 // log(1/frcpa(1+229/256))= +6.39903e-001
-data8 0x3fe489445efffcc8, 0x3cbf306a23dcbcde // log(1/frcpa(1+230/256))= +6.41756e-001
-data8 0x3fe4a018bcb69834, 0x3ca46c9285029fd1 // log(1/frcpa(1+231/256))= +6.44543e-001
-data8 0x3fe4af5a0c9d65d4, 0x3cbbc1db897580e3 // log(1/frcpa(1+232/256))= +6.46405e-001
-data8 0x3fe4bea2a5bdbe84, 0x3cb84d880d7ef775 // log(1/frcpa(1+233/256))= +6.48271e-001
-data8 0x3fe4cdf28f10ac44, 0x3cb3ec4b7893ce1f // log(1/frcpa(1+234/256))= +6.50140e-001
-data8 0x3fe4dd49cf994058, 0x3c897224d59d3408 // log(1/frcpa(1+235/256))= +6.52013e-001
-data8 0x3fe4eca86e64a680, 0x3cbccf620f24f0cd // log(1/frcpa(1+236/256))= +6.53889e-001
-data8 0x3fe503c43cd8eb68, 0x3c3f872c65971084 // log(1/frcpa(1+237/256))= +6.56710e-001
-data8 0x3fe513356667fc54, 0x3cb9ca64cc3d52c8 // log(1/frcpa(1+238/256))= +6.58595e-001
-data8 0x3fe522ae0738a3d4, 0x3cbe708164c75968 // log(1/frcpa(1+239/256))= +6.60483e-001
-data8 0x3fe5322e26867854, 0x3cb9988ba4aea615 // log(1/frcpa(1+240/256))= +6.62376e-001
-data8 0x3fe541b5cb979808, 0x3ca1662e3a6b95f5 // log(1/frcpa(1+241/256))= +6.64271e-001
-data8 0x3fe55144fdbcbd60, 0x3cb3acd4ca45c1e0 // log(1/frcpa(1+242/256))= +6.66171e-001
-data8 0x3fe560dbc45153c4, 0x3cb4988947959fed // log(1/frcpa(1+243/256))= +6.68074e-001
-data8 0x3fe5707a26bb8c64, 0x3cb3017fe6607ba9 // log(1/frcpa(1+244/256))= +6.69980e-001
-data8 0x3fe587f60ed5b8fc, 0x3cbe7a3266366ed4 // log(1/frcpa(1+245/256))= +6.72847e-001
-data8 0x3fe597a7977c8f30, 0x3ca1e12b9959a90e // log(1/frcpa(1+246/256))= +6.74763e-001
-data8 0x3fe5a760d634bb88, 0x3cb7c365e53d9602 // log(1/frcpa(1+247/256))= +6.76682e-001
-data8 0x3fe5b721d295f10c, 0x3cb716c2551ccbf0 // log(1/frcpa(1+248/256))= +6.78605e-001
-data8 0x3fe5c6ea94431ef8, 0x3ca02b2ed0e28261 // log(1/frcpa(1+249/256))= +6.80532e-001
-data8 0x3fe5d6bb22ea86f4, 0x3caf43a8bbb2f974 // log(1/frcpa(1+250/256))= +6.82462e-001
-data8 0x3fe5e6938645d38c, 0x3cbcedc98821b333 // log(1/frcpa(1+251/256))= +6.84397e-001
-data8 0x3fe5f673c61a2ed0, 0x3caa385eef5f2789 // log(1/frcpa(1+252/256))= +6.86335e-001
-data8 0x3fe6065bea385924, 0x3cb11624f165c5b4 // log(1/frcpa(1+253/256))= +6.88276e-001
-data8 0x3fe6164bfa7cc068, 0x3cbad884f87073fa // log(1/frcpa(1+254/256))= +6.90222e-001
-data8 0x3fe62643fecf9740, 0x3cb78c51da12f4df // log(1/frcpa(1+255/256))= +6.92171e-001
+data8 0x3f60040155d58800 // log(1/frcpa(1+0/256))= +1.95503e-003
+data8 0x3f78121214586a00 // log(1/frcpa(1+1/256))= +5.87661e-003
+data8 0x3f841929f9683200 // log(1/frcpa(1+2/256))= +9.81362e-003
+data8 0x3f8c317384c75f00 // log(1/frcpa(1+3/256))= +1.37662e-002
+data8 0x3f91a6b91ac73380 // log(1/frcpa(1+4/256))= +1.72376e-002
+data8 0x3f95ba9a5d9ac000 // log(1/frcpa(1+5/256))= +2.12196e-002
+data8 0x3f99d2a807432580 // log(1/frcpa(1+6/256))= +2.52177e-002
+data8 0x3f9d6b2725979800 // log(1/frcpa(1+7/256))= +2.87291e-002
+data8 0x3fa0c58fa19dfa80 // log(1/frcpa(1+8/256))= +3.27573e-002
+data8 0x3fa2954c78cbce00 // log(1/frcpa(1+9/256))= +3.62953e-002
+data8 0x3fa4a94d2da96c40 // log(1/frcpa(1+10/256))= +4.03542e-002
+data8 0x3fa67c94f2d4bb40 // log(1/frcpa(1+11/256))= +4.39192e-002
+data8 0x3fa85188b630f040 // log(1/frcpa(1+12/256))= +4.74971e-002
+data8 0x3faa6b8abe73af40 // log(1/frcpa(1+13/256))= +5.16017e-002
+data8 0x3fac441e06f72a80 // log(1/frcpa(1+14/256))= +5.52072e-002
+data8 0x3fae1e6713606d00 // log(1/frcpa(1+15/256))= +5.88257e-002
+data8 0x3faffa6911ab9300 // log(1/frcpa(1+16/256))= +6.24574e-002
+data8 0x3fb0ec139c5da600 // log(1/frcpa(1+17/256))= +6.61022e-002
+data8 0x3fb1dbd2643d1900 // log(1/frcpa(1+18/256))= +6.97605e-002
+data8 0x3fb2cc7284fe5f00 // log(1/frcpa(1+19/256))= +7.34321e-002
+data8 0x3fb3bdf5a7d1ee60 // log(1/frcpa(1+20/256))= +7.71173e-002
+data8 0x3fb4b05d7aa012e0 // log(1/frcpa(1+21/256))= +8.08161e-002
+data8 0x3fb580db7ceb5700 // log(1/frcpa(1+22/256))= +8.39975e-002
+data8 0x3fb674f089365a60 // log(1/frcpa(1+23/256))= +8.77219e-002
+data8 0x3fb769ef2c6b5680 // log(1/frcpa(1+24/256))= +9.14602e-002
+data8 0x3fb85fd927506a40 // log(1/frcpa(1+25/256))= +9.52125e-002
+data8 0x3fb9335e5d594980 // log(1/frcpa(1+26/256))= +9.84401e-002
+data8 0x3fba2b0220c8e5e0 // log(1/frcpa(1+27/256))= +1.02219e-001
+data8 0x3fbb0004ac1a86a0 // log(1/frcpa(1+28/256))= +1.05469e-001
+data8 0x3fbbf968769fca00 // log(1/frcpa(1+29/256))= +1.09274e-001
+data8 0x3fbccfedbfee13a0 // log(1/frcpa(1+30/256))= +1.12548e-001
+data8 0x3fbda727638446a0 // log(1/frcpa(1+31/256))= +1.15832e-001
+data8 0x3fbea3257fe10f60 // log(1/frcpa(1+32/256))= +1.19677e-001
+data8 0x3fbf7be9fedbfde0 // log(1/frcpa(1+33/256))= +1.22985e-001
+data8 0x3fc02ab352ff25f0 // log(1/frcpa(1+34/256))= +1.26303e-001
+data8 0x3fc097ce579d2040 // log(1/frcpa(1+35/256))= +1.29633e-001
+data8 0x3fc1178e8227e470 // log(1/frcpa(1+36/256))= +1.33531e-001
+data8 0x3fc185747dbecf30 // log(1/frcpa(1+37/256))= +1.36885e-001
+data8 0x3fc1f3b925f25d40 // log(1/frcpa(1+38/256))= +1.40250e-001
+data8 0x3fc2625d1e6ddf50 // log(1/frcpa(1+39/256))= +1.43627e-001
+data8 0x3fc2d1610c868130 // log(1/frcpa(1+40/256))= +1.47015e-001
+data8 0x3fc340c597411420 // log(1/frcpa(1+41/256))= +1.50414e-001
+data8 0x3fc3b08b6757f2a0 // log(1/frcpa(1+42/256))= +1.53825e-001
+data8 0x3fc40dfb08378000 // log(1/frcpa(1+43/256))= +1.56677e-001
+data8 0x3fc47e74e8ca5f70 // log(1/frcpa(1+44/256))= +1.60109e-001
+data8 0x3fc4ef51f6466de0 // log(1/frcpa(1+45/256))= +1.63553e-001
+data8 0x3fc56092e02ba510 // log(1/frcpa(1+46/256))= +1.67010e-001
+data8 0x3fc5d23857cd74d0 // log(1/frcpa(1+47/256))= +1.70478e-001
+data8 0x3fc6313a37335d70 // log(1/frcpa(1+48/256))= +1.73377e-001
+data8 0x3fc6a399dabbd380 // log(1/frcpa(1+49/256))= +1.76868e-001
+data8 0x3fc70337dd3ce410 // log(1/frcpa(1+50/256))= +1.79786e-001
+data8 0x3fc77654128f6120 // log(1/frcpa(1+51/256))= +1.83299e-001
+data8 0x3fc7e9d82a0b0220 // log(1/frcpa(1+52/256))= +1.86824e-001
+data8 0x3fc84a6b759f5120 // log(1/frcpa(1+53/256))= +1.89771e-001
+data8 0x3fc8ab47d5f5a300 // log(1/frcpa(1+54/256))= +1.92727e-001
+data8 0x3fc91fe490965810 // log(1/frcpa(1+55/256))= +1.96286e-001
+data8 0x3fc981634011aa70 // log(1/frcpa(1+56/256))= +1.99261e-001
+data8 0x3fc9f6c407089660 // log(1/frcpa(1+57/256))= +2.02843e-001
+data8 0x3fca58e729348f40 // log(1/frcpa(1+58/256))= +2.05838e-001
+data8 0x3fcabb55c31693a0 // log(1/frcpa(1+59/256))= +2.08842e-001
+data8 0x3fcb1e104919efd0 // log(1/frcpa(1+60/256))= +2.11855e-001
+data8 0x3fcb94ee93e367c0 // log(1/frcpa(1+61/256))= +2.15483e-001
+data8 0x3fcbf851c0675550 // log(1/frcpa(1+62/256))= +2.18516e-001
+data8 0x3fcc5c0254bf23a0 // log(1/frcpa(1+63/256))= +2.21558e-001
+data8 0x3fccc000c9db3c50 // log(1/frcpa(1+64/256))= +2.24609e-001
+data8 0x3fcd244d99c85670 // log(1/frcpa(1+65/256))= +2.27670e-001
+data8 0x3fcd88e93fb2f450 // log(1/frcpa(1+66/256))= +2.30741e-001
+data8 0x3fcdedd437eaef00 // log(1/frcpa(1+67/256))= +2.33820e-001
+data8 0x3fce530effe71010 // log(1/frcpa(1+68/256))= +2.36910e-001
+data8 0x3fceb89a1648b970 // log(1/frcpa(1+69/256))= +2.40009e-001
+data8 0x3fcf1e75fadf9bd0 // log(1/frcpa(1+70/256))= +2.43117e-001
+data8 0x3fcf84a32ead7c30 // log(1/frcpa(1+71/256))= +2.46235e-001
+data8 0x3fcfeb2233ea07c0 // log(1/frcpa(1+72/256))= +2.49363e-001
+data8 0x3fd028f9c7035c18 // log(1/frcpa(1+73/256))= +2.52501e-001
+data8 0x3fd05c8be0d96358 // log(1/frcpa(1+74/256))= +2.55649e-001
+data8 0x3fd085eb8f8ae790 // log(1/frcpa(1+75/256))= +2.58174e-001
+data8 0x3fd0b9c8e32d1910 // log(1/frcpa(1+76/256))= +2.61339e-001
+data8 0x3fd0edd060b78080 // log(1/frcpa(1+77/256))= +2.64515e-001
+data8 0x3fd122024cf00638 // log(1/frcpa(1+78/256))= +2.67701e-001
+data8 0x3fd14be2927aecd0 // log(1/frcpa(1+79/256))= +2.70257e-001
+data8 0x3fd180618ef18ad8 // log(1/frcpa(1+80/256))= +2.73461e-001
+data8 0x3fd1b50bbe2fc638 // log(1/frcpa(1+81/256))= +2.76675e-001
+data8 0x3fd1df4cc7cf2428 // log(1/frcpa(1+82/256))= +2.79254e-001
+data8 0x3fd214456d0eb8d0 // log(1/frcpa(1+83/256))= +2.82487e-001
+data8 0x3fd23ec5991eba48 // log(1/frcpa(1+84/256))= +2.85081e-001
+data8 0x3fd2740d9f870af8 // log(1/frcpa(1+85/256))= +2.88333e-001
+data8 0x3fd29ecdabcdfa00 // log(1/frcpa(1+86/256))= +2.90943e-001
+data8 0x3fd2d46602adcce8 // log(1/frcpa(1+87/256))= +2.94214e-001
+data8 0x3fd2ff66b04ea9d0 // log(1/frcpa(1+88/256))= +2.96838e-001
+data8 0x3fd335504b355a30 // log(1/frcpa(1+89/256))= +3.00129e-001
+data8 0x3fd360925ec44f58 // log(1/frcpa(1+90/256))= +3.02769e-001
+data8 0x3fd38bf1c3337e70 // log(1/frcpa(1+91/256))= +3.05417e-001
+data8 0x3fd3c25277333180 // log(1/frcpa(1+92/256))= +3.08735e-001
+data8 0x3fd3edf463c16838 // log(1/frcpa(1+93/256))= +3.11399e-001
+data8 0x3fd419b423d5e8c0 // log(1/frcpa(1+94/256))= +3.14069e-001
+data8 0x3fd44591e0539f48 // log(1/frcpa(1+95/256))= +3.16746e-001
+data8 0x3fd47c9175b6f0a8 // log(1/frcpa(1+96/256))= +3.20103e-001
+data8 0x3fd4a8b341552b08 // log(1/frcpa(1+97/256))= +3.22797e-001
+data8 0x3fd4d4f390890198 // log(1/frcpa(1+98/256))= +3.25498e-001
+data8 0x3fd501528da1f960 // log(1/frcpa(1+99/256))= +3.28206e-001
+data8 0x3fd52dd06347d4f0 // log(1/frcpa(1+100/256))= +3.30921e-001
+data8 0x3fd55a6d3c7b8a88 // log(1/frcpa(1+101/256))= +3.33644e-001
+data8 0x3fd5925d2b112a58 // log(1/frcpa(1+102/256))= +3.37058e-001
+data8 0x3fd5bf406b543db0 // log(1/frcpa(1+103/256))= +3.39798e-001
+data8 0x3fd5ec433d5c35a8 // log(1/frcpa(1+104/256))= +3.42545e-001
+data8 0x3fd61965cdb02c18 // log(1/frcpa(1+105/256))= +3.45300e-001
+data8 0x3fd646a84935b2a0 // log(1/frcpa(1+106/256))= +3.48063e-001
+data8 0x3fd6740add31de90 // log(1/frcpa(1+107/256))= +3.50833e-001
+data8 0x3fd6a18db74a58c0 // log(1/frcpa(1+108/256))= +3.53610e-001
+data8 0x3fd6cf31058670e8 // log(1/frcpa(1+109/256))= +3.56396e-001
+data8 0x3fd6f180e852f0b8 // log(1/frcpa(1+110/256))= +3.58490e-001
+data8 0x3fd71f5d71b894e8 // log(1/frcpa(1+111/256))= +3.61289e-001
+data8 0x3fd74d5aefd66d58 // log(1/frcpa(1+112/256))= +3.64096e-001
+data8 0x3fd77b79922bd378 // log(1/frcpa(1+113/256))= +3.66911e-001
+data8 0x3fd7a9b9889f19e0 // log(1/frcpa(1+114/256))= +3.69734e-001
+data8 0x3fd7d81b037eb6a0 // log(1/frcpa(1+115/256))= +3.72565e-001
+data8 0x3fd8069e33827230 // log(1/frcpa(1+116/256))= +3.75404e-001
+data8 0x3fd82996d3ef8bc8 // log(1/frcpa(1+117/256))= +3.77538e-001
+data8 0x3fd85855776dcbf8 // log(1/frcpa(1+118/256))= +3.80391e-001
+data8 0x3fd8873658327cc8 // log(1/frcpa(1+119/256))= +3.83253e-001
+data8 0x3fd8aa75973ab8c8 // log(1/frcpa(1+120/256))= +3.85404e-001
+data8 0x3fd8d992dc8824e0 // log(1/frcpa(1+121/256))= +3.88280e-001
+data8 0x3fd908d2ea7d9510 // log(1/frcpa(1+122/256))= +3.91164e-001
+data8 0x3fd92c59e79c0e50 // log(1/frcpa(1+123/256))= +3.93332e-001
+data8 0x3fd95bd750ee3ed0 // log(1/frcpa(1+124/256))= +3.96231e-001
+data8 0x3fd98b7811a3ee58 // log(1/frcpa(1+125/256))= +3.99138e-001
+data8 0x3fd9af47f33d4068 // log(1/frcpa(1+126/256))= +4.01323e-001
+data8 0x3fd9df270c1914a0 // log(1/frcpa(1+127/256))= +4.04245e-001
+data8 0x3fda0325ed14fda0 // log(1/frcpa(1+128/256))= +4.06442e-001
+data8 0x3fda33440224fa78 // log(1/frcpa(1+129/256))= +4.09379e-001
+data8 0x3fda57725e80c380 // log(1/frcpa(1+130/256))= +4.11587e-001
+data8 0x3fda87d0165dd198 // log(1/frcpa(1+131/256))= +4.14539e-001
+data8 0x3fdaac2e6c03f890 // log(1/frcpa(1+132/256))= +4.16759e-001
+data8 0x3fdadccc6fdf6a80 // log(1/frcpa(1+133/256))= +4.19726e-001
+data8 0x3fdb015b3eb1e790 // log(1/frcpa(1+134/256))= +4.21958e-001
+data8 0x3fdb323a3a635948 // log(1/frcpa(1+135/256))= +4.24941e-001
+data8 0x3fdb56fa04462908 // log(1/frcpa(1+136/256))= +4.27184e-001
+data8 0x3fdb881aa659bc90 // log(1/frcpa(1+137/256))= +4.30182e-001
+data8 0x3fdbad0bef3db160 // log(1/frcpa(1+138/256))= +4.32437e-001
+data8 0x3fdbd21297781c28 // log(1/frcpa(1+139/256))= +4.34697e-001
+data8 0x3fdc039236f08818 // log(1/frcpa(1+140/256))= +4.37718e-001
+data8 0x3fdc28cb1e4d32f8 // log(1/frcpa(1+141/256))= +4.39990e-001
+data8 0x3fdc4e19b84723c0 // log(1/frcpa(1+142/256))= +4.42267e-001
+data8 0x3fdc7ff9c74554c8 // log(1/frcpa(1+143/256))= +4.45311e-001
+data8 0x3fdca57b64e9db00 // log(1/frcpa(1+144/256))= +4.47600e-001
+data8 0x3fdccb130a5ceba8 // log(1/frcpa(1+145/256))= +4.49895e-001
+data8 0x3fdcf0c0d18f3268 // log(1/frcpa(1+146/256))= +4.52194e-001
+data8 0x3fdd232075b5a200 // log(1/frcpa(1+147/256))= +4.55269e-001
+data8 0x3fdd490246defa68 // log(1/frcpa(1+148/256))= +4.57581e-001
+data8 0x3fdd6efa918d25c8 // log(1/frcpa(1+149/256))= +4.59899e-001
+data8 0x3fdd9509707ae528 // log(1/frcpa(1+150/256))= +4.62221e-001
+data8 0x3fddbb2efe92c550 // log(1/frcpa(1+151/256))= +4.64550e-001
+data8 0x3fddee2f3445e4a8 // log(1/frcpa(1+152/256))= +4.67663e-001
+data8 0x3fde148a1a2726c8 // log(1/frcpa(1+153/256))= +4.70004e-001
+data8 0x3fde3afc0a49ff38 // log(1/frcpa(1+154/256))= +4.72350e-001
+data8 0x3fde6185206d5168 // log(1/frcpa(1+155/256))= +4.74702e-001
+data8 0x3fde882578823d50 // log(1/frcpa(1+156/256))= +4.77060e-001
+data8 0x3fdeaedd2eac9908 // log(1/frcpa(1+157/256))= +4.79423e-001
+data8 0x3fded5ac5f436be0 // log(1/frcpa(1+158/256))= +4.81792e-001
+data8 0x3fdefc9326d16ab8 // log(1/frcpa(1+159/256))= +4.84166e-001
+data8 0x3fdf2391a21575f8 // log(1/frcpa(1+160/256))= +4.86546e-001
+data8 0x3fdf4aa7ee031928 // log(1/frcpa(1+161/256))= +4.88932e-001
+data8 0x3fdf71d627c30bb0 // log(1/frcpa(1+162/256))= +4.91323e-001
+data8 0x3fdf991c6cb3b378 // log(1/frcpa(1+163/256))= +4.93720e-001
+data8 0x3fdfc07ada69a908 // log(1/frcpa(1+164/256))= +4.96123e-001
+data8 0x3fdfe7f18eb03d38 // log(1/frcpa(1+165/256))= +4.98532e-001
+data8 0x3fe007c053c5002c // log(1/frcpa(1+166/256))= +5.00946e-001
+data8 0x3fe01b942198a5a0 // log(1/frcpa(1+167/256))= +5.03367e-001
+data8 0x3fe02f74400c64e8 // log(1/frcpa(1+168/256))= +5.05793e-001
+data8 0x3fe04360be7603ac // log(1/frcpa(1+169/256))= +5.08225e-001
+data8 0x3fe05759ac47fe30 // log(1/frcpa(1+170/256))= +5.10663e-001
+data8 0x3fe06b5f1911cf50 // log(1/frcpa(1+171/256))= +5.13107e-001
+data8 0x3fe078bf0533c568 // log(1/frcpa(1+172/256))= +5.14740e-001
+data8 0x3fe08cd9687e7b0c // log(1/frcpa(1+173/256))= +5.17194e-001
+data8 0x3fe0a10074cf9018 // log(1/frcpa(1+174/256))= +5.19654e-001
+data8 0x3fe0b5343a234474 // log(1/frcpa(1+175/256))= +5.22120e-001
+data8 0x3fe0c974c89431cc // log(1/frcpa(1+176/256))= +5.24592e-001
+data8 0x3fe0ddc2305b9884 // log(1/frcpa(1+177/256))= +5.27070e-001
+data8 0x3fe0eb524bafc918 // log(1/frcpa(1+178/256))= +5.28726e-001
+data8 0x3fe0ffb54213a474 // log(1/frcpa(1+179/256))= +5.31214e-001
+data8 0x3fe114253da97d9c // log(1/frcpa(1+180/256))= +5.33709e-001
+data8 0x3fe128a24f1d9afc // log(1/frcpa(1+181/256))= +5.36210e-001
+data8 0x3fe1365252bf0864 // log(1/frcpa(1+182/256))= +5.37881e-001
+data8 0x3fe14ae558b4a92c // log(1/frcpa(1+183/256))= +5.40393e-001
+data8 0x3fe15f85a19c7658 // log(1/frcpa(1+184/256))= +5.42910e-001
+data8 0x3fe16d4d38c119f8 // log(1/frcpa(1+185/256))= +5.44592e-001
+data8 0x3fe18203c20dd130 // log(1/frcpa(1+186/256))= +5.47121e-001
+data8 0x3fe196c7bc4b1f38 // log(1/frcpa(1+187/256))= +5.49656e-001
+data8 0x3fe1a4a738b7a33c // log(1/frcpa(1+188/256))= +5.51349e-001
+data8 0x3fe1b981c0c9653c // log(1/frcpa(1+189/256))= +5.53895e-001
+data8 0x3fe1ce69e8bb1068 // log(1/frcpa(1+190/256))= +5.56447e-001
+data8 0x3fe1dc619de06944 // log(1/frcpa(1+191/256))= +5.58152e-001
+data8 0x3fe1f160a2ad0da0 // log(1/frcpa(1+192/256))= +5.60715e-001
+data8 0x3fe2066d7740737c // log(1/frcpa(1+193/256))= +5.63285e-001
+data8 0x3fe2147dba47a390 // log(1/frcpa(1+194/256))= +5.65001e-001
+data8 0x3fe229a1bc5ebac0 // log(1/frcpa(1+195/256))= +5.67582e-001
+data8 0x3fe237c1841a502c // log(1/frcpa(1+196/256))= +5.69306e-001
+data8 0x3fe24cfce6f80d98 // log(1/frcpa(1+197/256))= +5.71898e-001
+data8 0x3fe25b2c55cd5760 // log(1/frcpa(1+198/256))= +5.73630e-001
+data8 0x3fe2707f4d5f7c40 // log(1/frcpa(1+199/256))= +5.76233e-001
+data8 0x3fe285e0842ca380 // log(1/frcpa(1+200/256))= +5.78842e-001
+data8 0x3fe294294708b770 // log(1/frcpa(1+201/256))= +5.80586e-001
+data8 0x3fe2a9a2670aff0c // log(1/frcpa(1+202/256))= +5.83207e-001
+data8 0x3fe2b7fb2c8d1cc0 // log(1/frcpa(1+203/256))= +5.84959e-001
+data8 0x3fe2c65a6395f5f4 // log(1/frcpa(1+204/256))= +5.86713e-001
+data8 0x3fe2dbf557b0df40 // log(1/frcpa(1+205/256))= +5.89350e-001
+data8 0x3fe2ea64c3f97654 // log(1/frcpa(1+206/256))= +5.91113e-001
+data8 0x3fe3001823684d70 // log(1/frcpa(1+207/256))= +5.93762e-001
+data8 0x3fe30e97e9a8b5cc // log(1/frcpa(1+208/256))= +5.95531e-001
+data8 0x3fe32463ebdd34e8 // log(1/frcpa(1+209/256))= +5.98192e-001
+data8 0x3fe332f4314ad794 // log(1/frcpa(1+210/256))= +5.99970e-001
+data8 0x3fe348d90e7464cc // log(1/frcpa(1+211/256))= +6.02643e-001
+data8 0x3fe35779f8c43d6c // log(1/frcpa(1+212/256))= +6.04428e-001
+data8 0x3fe36621961a6a98 // log(1/frcpa(1+213/256))= +6.06217e-001
+data8 0x3fe37c299f3c3668 // log(1/frcpa(1+214/256))= +6.08907e-001
+data8 0x3fe38ae2171976e4 // log(1/frcpa(1+215/256))= +6.10704e-001
+data8 0x3fe399a157a603e4 // log(1/frcpa(1+216/256))= +6.12504e-001
+data8 0x3fe3afccfe77b9d0 // log(1/frcpa(1+217/256))= +6.15210e-001
+data8 0x3fe3be9d503533b4 // log(1/frcpa(1+218/256))= +6.17018e-001
+data8 0x3fe3cd7480b4a8a0 // log(1/frcpa(1+219/256))= +6.18830e-001
+data8 0x3fe3e3c43918f76c // log(1/frcpa(1+220/256))= +6.21554e-001
+data8 0x3fe3f2acb27ed6c4 // log(1/frcpa(1+221/256))= +6.23373e-001
+data8 0x3fe4019c2125ca90 // log(1/frcpa(1+222/256))= +6.25197e-001
+data8 0x3fe4181061389720 // log(1/frcpa(1+223/256))= +6.27937e-001
+data8 0x3fe42711518df544 // log(1/frcpa(1+224/256))= +6.29769e-001
+data8 0x3fe436194e12b6bc // log(1/frcpa(1+225/256))= +6.31604e-001
+data8 0x3fe445285d68ea68 // log(1/frcpa(1+226/256))= +6.33442e-001
+data8 0x3fe45bcc464c8938 // log(1/frcpa(1+227/256))= +6.36206e-001
+data8 0x3fe46aed21f117fc // log(1/frcpa(1+228/256))= +6.38053e-001
+data8 0x3fe47a1527e8a2d0 // log(1/frcpa(1+229/256))= +6.39903e-001
+data8 0x3fe489445efffcc8 // log(1/frcpa(1+230/256))= +6.41756e-001
+data8 0x3fe4a018bcb69834 // log(1/frcpa(1+231/256))= +6.44543e-001
+data8 0x3fe4af5a0c9d65d4 // log(1/frcpa(1+232/256))= +6.46405e-001
+data8 0x3fe4bea2a5bdbe84 // log(1/frcpa(1+233/256))= +6.48271e-001
+data8 0x3fe4cdf28f10ac44 // log(1/frcpa(1+234/256))= +6.50140e-001
+data8 0x3fe4dd49cf994058 // log(1/frcpa(1+235/256))= +6.52013e-001
+data8 0x3fe4eca86e64a680 // log(1/frcpa(1+236/256))= +6.53889e-001
+data8 0x3fe503c43cd8eb68 // log(1/frcpa(1+237/256))= +6.56710e-001
+data8 0x3fe513356667fc54 // log(1/frcpa(1+238/256))= +6.58595e-001
+data8 0x3fe522ae0738a3d4 // log(1/frcpa(1+239/256))= +6.60483e-001
+data8 0x3fe5322e26867854 // log(1/frcpa(1+240/256))= +6.62376e-001
+data8 0x3fe541b5cb979808 // log(1/frcpa(1+241/256))= +6.64271e-001
+data8 0x3fe55144fdbcbd60 // log(1/frcpa(1+242/256))= +6.66171e-001
+data8 0x3fe560dbc45153c4 // log(1/frcpa(1+243/256))= +6.68074e-001
+data8 0x3fe5707a26bb8c64 // log(1/frcpa(1+244/256))= +6.69980e-001
+data8 0x3fe587f60ed5b8fc // log(1/frcpa(1+245/256))= +6.72847e-001
+data8 0x3fe597a7977c8f30 // log(1/frcpa(1+246/256))= +6.74763e-001
+data8 0x3fe5a760d634bb88 // log(1/frcpa(1+247/256))= +6.76682e-001
+data8 0x3fe5b721d295f10c // log(1/frcpa(1+248/256))= +6.78605e-001
+data8 0x3fe5c6ea94431ef8 // log(1/frcpa(1+249/256))= +6.80532e-001
+data8 0x3fe5d6bb22ea86f4 // log(1/frcpa(1+250/256))= +6.82462e-001
+data8 0x3fe5e6938645d38c // log(1/frcpa(1+251/256))= +6.84397e-001
+data8 0x3fe5f673c61a2ed0 // log(1/frcpa(1+252/256))= +6.86335e-001
+data8 0x3fe6065bea385924 // log(1/frcpa(1+253/256))= +6.88276e-001
+data8 0x3fe6164bfa7cc068 // log(1/frcpa(1+254/256))= +6.90222e-001
+data8 0x3fe62643fecf9740 // log(1/frcpa(1+255/256))= +6.92171e-001
LOCAL_OBJECT_END(pow_Tt)
@@ -909,14 +879,14 @@ GLOBAL_LIBM_ENTRY(powf)
addl pow_AD_P = @ltoff(pow_table_P), gp
fma.s1 POW_Xp1 = f8,f1,f1 // Will be used for r1 if x<0
nop.i 999
-;;
}
+;;
// Get significand of x. Will be used to get index to fetch T, Tt.
{ .mfi
getf.sig pow_GR_sig_X = f8
frcpa.s1 POW_B, p6 = f1,f8
- nop.i 999
+ mov pow_GR_exp_half = 0xFFFE // Exponent for 0.5
}
{ .mfi
ld8 pow_AD_P = [pow_AD_P]
@@ -925,11 +895,10 @@ GLOBAL_LIBM_ENTRY(powf)
}
;;
-// p13 = TRUE ==> X is unorm
// DOUBLE 0x10033 exponent limit at which y is an integer
{ .mfi
nop.m 999
- fclass.m p13,p0 = f8, 0x0b // Test for x unorm
+ fcmp.lt.s1 p8,p9 = f8, f0 // Test for x<0
addl pow_GR_10033 = 0x10033, r0
}
{ .mfi
@@ -939,11 +908,11 @@ GLOBAL_LIBM_ENTRY(powf)
}
;;
-// p14 = TRUE ==> X is ZERO
+// p13 = TRUE ==> X is unorm
{ .mfi
+ setf.exp POW_Q0_half = pow_GR_exp_half // Form 0.5
+ fclass.m p13,p0 = f8, 0x0b // Test for x unorm
adds pow_AD_Tt = pow_Tt - pow_table_P, pow_AD_P
- fclass.m p14,p0 = f8, 0x07
- and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones
}
{ .mfi
adds pow_AD_Q = pow_table_Q - pow_table_P, pow_AD_P
@@ -952,14 +921,16 @@ GLOBAL_LIBM_ENTRY(powf)
}
;;
+// p14 = TRUE ==> X is ZERO
{ .mfi
- ldfe POW_P5 = [pow_AD_P], 16
- fcmp.lt.s1 p8,p9 = f8, f0 // Test for x<0
+ ldfe POW_P2 = [pow_AD_Q], 16
+ fclass.m p14,p0 = f8, 0x07
nop.i 999
}
-{ .mib
- ldfe POW_P4 = [pow_AD_Q], 16
- sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones
+// Note POW_Xm1 and POW_r1 are used interchangably
+{ .mfb
+ nop.m 999
+(p8) fnma.s1 POW_Xm1 = POW_Xp1,f1,f0
(p13) br.cond.spnt POW_X_DENORM
}
;;
@@ -968,26 +939,33 @@ GLOBAL_LIBM_ENTRY(powf)
POW_COMMON:
// p11 = TRUE ==> Y is a NAN
{ .mfi
- ldfe POW_P3 = [pow_AD_P], 16
+ and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones
fclass.m p11,p0 = f9, 0xc3
nop.i 999
}
{ .mfi
- ldfe POW_P2 = [pow_AD_Q], 16
- nop.f 999
+ nop.m 999
+ fms.s1 POW_r = POW_B, POW_NORM_X,f1
mov pow_GR_y_zero = 0
}
;;
-// Note POW_Xm1 and POW_r1 are used interchangably
+// Get exponent of |x|-1 to use in comparison to 2^-8
+{ .mmi
+ getf.exp pow_GR_signexp_Xm1 = POW_Xm1
+ sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones
+ extr.u pow_GR_offset = pow_GR_sig_X, 55, 8
+}
+;;
+
{ .mfi
alloc r32=ar.pfs,2,19,4,0
- fms.s1 POW_r = POW_B, POW_NORM_X,f1
- nop.i 999
+ fcvt.fx.s1 POW_int_Y = POW_NORM_Y
+ shladd pow_AD_Tt = pow_GR_offset, 3, pow_AD_Tt
}
{ .mfi
setf.sig POW_int_K = pow_GR_true_exp_X
-(p8) fnma.s1 POW_Xm1 = POW_Xp1,f1,f0
+ nop.f 999
nop.i 999
}
;;
@@ -997,7 +975,7 @@ POW_COMMON:
{ .mfi
ldfe POW_P1 = [pow_AD_P], 16
fclass.m p12,p0 = f9, 0x07
- shl pow_GR_offset = pow_GR_sig_X, 1
+ nop.i 999
}
{ .mfb
ldfe POW_P0 = [pow_AD_Q], 16
@@ -1006,19 +984,18 @@ POW_COMMON:
}
;;
-// Get exponent of |x|-1 to use in comparison to 2^-8
-{ .mfi
- getf.exp pow_GR_signexp_Xm1 = POW_Xm1
- fcvt.fx.s1 POW_int_Y = POW_NORM_Y
- shr.u pow_GR_offset = pow_GR_offset,56
+{ .mmf
+ getf.exp pow_GR_signexp_Y = POW_NORM_Y
+ ldfd POW_T = [pow_AD_Tt]
+ fma.s1 POW_rsq = POW_r, POW_r,f0
}
;;
// p11 = TRUE ==> X is a NAN
{ .mfi
ldfpd POW_log2_hi, POW_log2_lo = [pow_AD_Q], 16
- fclass.m p11,p0 = f8, 0xc3
- shladd pow_AD_Tt = pow_GR_offset, 4, pow_AD_Tt
+ fclass.m p11,p0 = POW_NORM_X, 0xc3
+ nop.i 999
}
{ .mfi
ldfe POW_inv_log2_by_128 = [pow_AD_P], 16
@@ -1028,28 +1005,33 @@ POW_COMMON:
;;
{ .mfi
- ldfpd POW_Q2, POW_Q3 = [pow_AD_P], 16
- fma.s1 POW_G = f0,f0,f0 // G=0 in case |x| near 1
+ ldfd POW_Q2 = [pow_AD_P], 16
+ fnma.s1 POW_twoV = POW_r, POW_Q0_half,f1
and pow_GR_exp_Xm1 = pow_GR_signexp_Xm1, pow_GR_17ones
}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_U = POW_NORM_Y,POW_r,f0
+ nop.i 999
+}
;;
// Determine if we will use the |x| near 1 path (p6) or normal path (p7)
{ .mfi
- getf.exp pow_GR_signexp_Y = POW_NORM_Y
- nop.f 999
+ nop.m 999
+ fcvt.xf POW_K = POW_int_K
cmp.lt p6,p7 = pow_GR_exp_Xm1, pow_GR_exp_2tom8
}
{ .mfb
- ldfpd POW_T, POW_Tt = [pow_AD_Tt], 16
- fma.s1 POW_rsq = POW_r, POW_r,f0
+ nop.m 999
+ fma.s1 POW_G = f0,f0,f0 // G=0 in case |x| near 1
(p11) br.cond.spnt POW_X_NAN // Branch if x=nan and y not nan
}
;;
-// If on the x near 1 path, assign r1 to r and r1*r1 to rsq
+// If on the x near 1 path, assign r1 to r
{ .mfi
- ldfpd POW_Q0_half, POW_Q1 = [pow_AD_P], 16
+ ldfpd POW_Q1, POW_RSHF = [pow_AD_P], 16
(p6) fma.s1 POW_r = POW_r1, f1, f0
nop.i 999
}
@@ -1061,57 +1043,25 @@ POW_COMMON:
;;
{ .mfi
- ldfpd POW_Q4, POW_RSHF = [pow_AD_P], 16
-(p7) fma.s1 POW_v6 = POW_r, POW_P5, POW_P4
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p6) fma.s1 POW_v6 = POW_r1, POW_P5, POW_P4
- nop.i 999
-}
-;;
-
-{ .mfi
- nop.m 999
-(p7) fma.s1 POW_v4 = POW_P3, POW_r, POW_P2
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p6) fma.s1 POW_v4 = POW_P3, POW_r1, POW_P2
- nop.i 999
-}
-;;
-
-{ .mfi
- nop.m 999
- fcvt.xf POW_K = POW_int_K
- nop.i 999
-}
-;;
-
-{ .mfi
getf.sig pow_GR_sig_int_Y = POW_int_Y
- fnma.s1 POW_twoV = POW_NORM_Y, POW_rsq,f0
+(p6) fnma.s1 POW_twoV = POW_r1, POW_Q0_half,f1
and pow_GR_exp_Y = pow_GR_signexp_Y, pow_GR_17ones
}
{ .mfb
andcm pow_GR_sign_Y = pow_GR_signexp_Y, pow_GR_17ones
- fma.s1 POW_U = POW_NORM_Y,POW_r,f0
+(p6) fma.s1 POW_U = POW_NORM_Y,POW_r1,f0
(p12) br.cond.spnt POW_Y_0 // Branch if y=zero, x not zero or nan
}
;;
-// p11 = TRUE ==> X is NEGATIVE but not inf
{ .mfi
ldfe POW_log2_by_128_lo = [pow_AD_P], 16
- fclass.m p11,p0 = f8, 0x1a
+(p7) fma.s1 POW_Z2 = POW_twoV, POW_U, f0
nop.i 999
}
{ .mfi
ldfe POW_log2_by_128_hi = [pow_AD_Q], 16
- fma.s1 POW_v2 = POW_P1, POW_r, POW_P0
+ nop.f 999
nop.i 999
}
;;
@@ -1123,43 +1073,32 @@ POW_COMMON:
}
{ .mfi
nop.m 999
- fma.s1 POW_v3 = POW_v6, POW_rsq, POW_v4
+(p7) fma.s1 POW_G = POW_K, POW_log2_hi, POW_T
adds pow_AD_tbl1 = pow_tbl1 - pow_Tt, pow_AD_Q
}
;;
+// p11 = TRUE ==> X is NEGATIVE but not inf
{ .mfi
nop.m 999
-(p7) fma.s1 POW_delta = POW_K, POW_log2_lo, POW_Tt
+ fclass.m p11,p0 = POW_NORM_X, 0x1a
nop.i 999
}
{ .mfi
nop.m 999
-(p7) fma.s1 POW_G = POW_K, POW_log2_hi, POW_T
+(p7) fma.s1 POW_delta = POW_K, POW_log2_lo, f0
adds pow_AD_tbl2 = pow_tbl2 - pow_tbl1, pow_AD_tbl1
}
;;
{ .mfi
nop.m 999
- fms.s1 POW_e2 = POW_NORM_Y, POW_r, POW_U
+(p6) fma.s1 POW_Z = POW_twoV, POW_U, f0
nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 POW_Z2 = POW_twoV, POW_Q0_half, POW_U
- nop.i 999
-}
-;;
-
-{ .mfi
- nop.m 999
- fma.s1 POW_Yrcub = POW_rsq, POW_U, f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 POW_p = POW_rsq, POW_v3, POW_v2
+ fma.s1 POW_v2 = POW_P1, POW_r, POW_P0
nop.i 999
}
;;
@@ -1169,7 +1108,7 @@ POW_COMMON:
// p13 = TRUE ==> X is NEGATIVE AND Y possible int
{ .mfi
nop.m 999
- fma.s1 POW_Z1 = POW_NORM_Y, POW_G, f0
+(p7) fma.s1 POW_Z = POW_NORM_Y, POW_G, POW_Z2
(p11) cmp.gt.unc p12,p13 = pow_GR_exp_Y, pow_GR_10033
}
{ .mfi
@@ -1179,35 +1118,28 @@ POW_COMMON:
}
;;
-// By adding RSHF (1.1000...*2^63) we put integer part in rightmost significand
{ .mfi
nop.m 999
- fma.s1 POW_W2 = POW_Z2, POW_inv_log2_by_128, POW_RSHF
+ fma.s1 POW_Yrcub = POW_rsq, POW_U, f0
nop.i 999
}
{ .mfi
nop.m 999
- fms.s1 POW_UmZ2 = POW_U, f1, POW_Z2
+ fma.s1 POW_p = POW_rsq, POW_P2, POW_v2
nop.i 999
}
;;
+// Test if x inf
{ .mfi
nop.m 999
- fma.s1 POW_Z3 = POW_p, POW_Yrcub, f0
+ fclass.m p15,p0 = POW_NORM_X, 0x23
nop.i 999
}
-;;
-
// By adding RSHF (1.1000...*2^63) we put integer part in rightmost significand
{ .mfi
nop.m 999
- fms.s1 POW_e1 = POW_NORM_Y, POW_G, POW_Z1
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 POW_W1 = POW_Z1, POW_inv_log2_by_128, POW_RSHF
+ fma.s1 POW_W1 = POW_Z, POW_inv_log2_by_128, POW_RSHF
nop.i 999
}
;;
@@ -1227,93 +1159,38 @@ POW_COMMON:
}
;;
-// By subtracting RSHF we get rounded integer POW_N2float
-{ .mfi
- nop.m 999
- fms.s1 POW_N2float = POW_W2, f1, POW_RSHF
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 POW_UmZ2pV = POW_twoV,POW_Q0_half,POW_UmZ2
- nop.i 999
-}
-;;
-
-{ .mfi
- nop.m 999
- fma.s1 POW_Z3sq = POW_Z3, POW_Z3, f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 POW_v4 = POW_Z3, POW_Q3, POW_Q2
- nop.i 999
-}
-;;
-
-// Extract rounded integer from rightmost significand of POW_W2
-// By subtracting RSHF we get rounded integer POW_N1float
-{ .mfi
- getf.sig pow_GR_int_W2 = POW_W2
- fms.s1 POW_N1float = POW_W1, f1, POW_RSHF
- nop.i 999
-}
-{ .mfi
- nop.m 999
- fma.s1 POW_v2 = POW_Z3, POW_Q1, POW_Q0_half
- nop.i 999
-}
-;;
-
-{ .mfi
- nop.m 999
- fnma.s1 POW_s2 = POW_N2float, POW_log2_by_128_hi, POW_Z2
- nop.i 999
-}
+// p11 = TRUE ==> X is +1.0
{ .mfi
nop.m 999
- fma.s1 POW_e2 = POW_e2,f1,POW_UmZ2pV
+ fcmp.eq.s1 p11,p0 = POW_NORM_X, f1
nop.i 999
}
;;
// Extract rounded integer from rightmost significand of POW_W1
-// Test if x inf
+// By subtracting RSHF we get rounded integer POW_Nfloat
{ .mfi
- getf.sig pow_GR_int_W1 = POW_W1
- fclass.m p15,p0 = POW_NORM_X, 0x23
+ getf.sig pow_GR_int_N = POW_W1
+ fms.s1 POW_Nfloat = POW_W1, f1, POW_RSHF
nop.i 999
}
{ .mfb
nop.m 999
- fnma.s1 POW_f2 = POW_N2float, POW_log2_by_128_lo, f1
+ fma.s1 POW_Z3 = POW_p, POW_Yrcub, f0
(p12) br.cond.spnt POW_X_NEG_Y_NONINT // Branch if x neg, y not integer
}
;;
-// p11 = TRUE ==> X is +1.0
+// p7 = TRUE ==> Y is +1.0
// p12 = TRUE ==> X is NEGATIVE AND Y is an odd integer
{ .mfi
getf.exp pow_GR_signexp_Y_Gpr = POW_Y_Gpr
- fcmp.eq.s1 p11,p0 = POW_NORM_X, f1
+ fcmp.eq.s1 p7,p0 = POW_NORM_Y, f1 // Test for y=1.0
(p10) tbit.nz.unc p12,p0 = pow_GR_sig_int_Y,0
}
-{ .mfi
- nop.m 999
- fma.s1 POW_v3 = POW_Z3sq, POW_Q4, POW_v4
- nop.i 999
-}
-;;
-
-{ .mfi
- nop.m 999
- fnma.s1 POW_f1 = POW_N1float, POW_log2_by_128_lo, f1
- nop.i 999
-}
{ .mfb
nop.m 999
- fnma.s1 POW_s1 = POW_N1float, POW_log2_by_128_hi, POW_Z1
+(p11) fma.s.s0 f8 = f1,f1,f0 // If x=1, result is +1
(p15) br.cond.spnt POW_X_INF
}
;;
@@ -1324,77 +1201,73 @@ POW_COMMON:
fcmp.eq.s0 p15,p0 = f8,f9
nop.i 999
}
-{ .mfi
+{ .mfb
nop.m 999
fma.s1 POW_e3 = POW_NORM_Y, POW_delta, f0
- nop.i 999
+(p11) br.ret.spnt b0 // Early exit if x=1.0, result is +1
}
;;
{ .mfi
- nop.m 999
- fcmp.eq.s1 p7,p0 = POW_NORM_Y, f1 // Test for y=1.0
+(p12) mov pow_GR_xneg_yodd = 1
+ fnma.s1 POW_f12 = POW_Nfloat, POW_log2_by_128_lo, f1
nop.i 999
}
-{ .mfi
+{ .mfb
nop.m 999
- fma.s1 POW_e12 = POW_e1,f1,POW_e2
- nop.i 999
-}
-;;
-
-{ .mfi
- add pow_GR_int_N = pow_GR_int_W1, pow_GR_int_W2
-(p11) fma.s.s0 f8 = f1,f1,f0 // If x=1, result is +1
- nop.i 999
-}
-{ .mib
-(p12) mov pow_GR_xneg_yodd = 1
- nop.i 999
-(p11) br.ret.spnt b0 // Early exit if x=1.0, result is +1
+ fnma.s1 POW_s = POW_Nfloat, POW_log2_by_128_hi, POW_Z
+(p7) br.ret.spnt b0 // Early exit if y=1.0, result is x
}
;;
-{ .mfi
+{ .mmi
and pow_GR_index1 = 0x0f, pow_GR_int_N
- fma.s1 POW_q = POW_Z3sq, POW_v3, POW_v2
- shr pow_int_GR_M = pow_GR_int_N, 7 // M = N/128
-}
-{ .mib
and pow_GR_index2 = 0x70, pow_GR_int_N
- nop.i 999
-(p7) br.ret.spnt b0 // Early exit if y=1.0, result is x
+ shr pow_int_GR_M = pow_GR_int_N, 7 // M = N/128
}
;;
{ .mfi
shladd pow_AD_T1 = pow_GR_index1, 4, pow_AD_tbl1
- fma.s1 POW_s = POW_s1, f1, POW_s2
+ fma.s1 POW_q = POW_Z3, POW_Q1, POW_Q0_half
add pow_int_GR_M = pow_GR_16ones, pow_int_GR_M
}
{ .mfi
add pow_AD_T2 = pow_AD_tbl2, pow_GR_index2
- fma.s1 POW_f12 = POW_f1, POW_f2,f0
+ fma.s1 POW_Z3sq = POW_Z3, POW_Z3, f0
nop.i 999
}
;;
-{ .mmf
+{ .mmi
ldfe POW_T1 = [pow_AD_T1]
ldfe POW_T2 = [pow_AD_T2]
- nop.f 999
+ nop.i 999
}
;;
+// f123 = f12*(e3+1) = f12*e3+f12
{ .mfi
setf.exp POW_2M = pow_int_GR_M
- fma.s1 POW_e123 = POW_e12, f1, POW_e3
- and pow_GR_exp_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones
+ fma.s1 POW_f123 = POW_e3,POW_f12,POW_f12
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_ssq = POW_s, POW_s, f0
+ nop.i 999
}
;;
{ .mfi
nop.m 999
+ fma.s1 POW_v2 = POW_s, POW_Q2, POW_Q1
+ and pow_GR_exp_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones
+}
+;;
+
+{ .mfi
+ cmp.ne p12,p13 = pow_GR_xneg_yodd, r0
fma.s1 POW_q = POW_Z3sq, POW_q, POW_Z3
sub pow_GR_true_exp_Y_Gpr = pow_GR_exp_Y_Gpr, pow_GR_16ones
}
@@ -1411,88 +1284,62 @@ POW_COMMON:
// Form signexp of constants to indicate overflow
{ .mfi
mov pow_GR_big_pos = 0x1007f
- fma.s1 POW_ssq = POW_s, POW_s, f0
+ nop.f 999
cmp.le p8,p9 = 7, pow_GR_true_exp_Y_Gpr
}
{ .mfi
mov pow_GR_big_neg = 0x3007f
- fma.s1 POW_v4 = POW_s, POW_Q3, POW_Q2
+ nop.f 999
andcm pow_GR_sign_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones
}
;;
// Form big positive and negative constants to test for possible overflow
+// Scale both terms of the polynomial by POW_f123
{ .mfi
setf.exp POW_big_pos = pow_GR_big_pos
- fma.s1 POW_v2 = POW_s, POW_Q1, POW_Q0_half
+ fma.s1 POW_ssq = POW_ssq, POW_f123, f0
(p9) cmp.le.unc p0,p10 = 6, pow_GR_true_exp_Y_Gpr
}
{ .mfb
setf.exp POW_big_neg = pow_GR_big_neg
- fma.s1 POW_1ps = f1,f1,POW_s
+ fma.s1 POW_1ps = POW_s, POW_f123, POW_f123
(p8) br.cond.spnt POW_OVER_UNDER_X_NOT_INF
}
;;
-// f123 = f12*(e123+1) = f12*e123+f12
{ .mfi
nop.m 999
- fma.s1 POW_f123 = POW_e123,POW_f12,POW_f12
+(p12) fnma.s1 POW_T1T2 = POW_T1, POW_T2, f0
nop.i 999
}
-;;
-
{ .mfi
nop.m 999
- fma.s1 POW_T1T2 = POW_T1, POW_T2, f0
+(p13) fma.s1 POW_T1T2 = POW_T1, POW_T2, f0
nop.i 999
}
-{ .mfi
- nop.m 999
- fma.s1 POW_v3 = POW_ssq, POW_Q4, POW_v4
- cmp.ne p12,p13 = pow_GR_xneg_yodd, r0
-}
;;
{ .mfi
nop.m 999
- fma.s1 POW_2Mqp1 = POW_2M, POW_q, POW_2M
- nop.i 999
-}
-;;
-
-{ .mfi
- nop.m 999
- fma.s1 POW_v21ps = POW_ssq, POW_v2, POW_1ps
+ fma.s1 POW_v210 = POW_s, POW_v2, POW_Q0_half
nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 POW_s4 = POW_ssq, POW_ssq, f0
- nop.i 999
-}
-;;
-
-{ .mfi
- nop.m 999
-(p12) fnma.s1 POW_A = POW_T1T2, POW_f123, f0
- nop.i 999
-}
-{ .mfi
- nop.m 999
-(p13) fma.s1 POW_A = POW_T1T2, POW_f123, f0
+ fma.s1 POW_2Mqp1 = POW_2M, POW_q, POW_2M
nop.i 999
}
;;
{ .mfi
nop.m 999
- fma.s1 POW_es = POW_s4, POW_v3, POW_v21ps
+ fma.s1 POW_es = POW_ssq, POW_v210, POW_1ps
nop.i 999
}
{ .mfi
nop.m 999
- fma.s1 POW_A = POW_A, POW_2Mqp1, f0
+ fma.s1 POW_A = POW_T1T2, POW_2Mqp1, f0
nop.i 999
}
;;
@@ -1623,16 +1470,25 @@ POW_POSSIBLE_UNDER:
// 0.1...11 2^-3ffe (biased, 1)
// largest dn smallest normal
+// Form small constant (2^-170) to correct underflow result near region of
+// smallest denormal in round-nearest.
+
// Put in s2 (td set, ftz set)
+.pred.rel "mutex",p12,p13
{ .mfi
- nop.m 999
+ mov pow_GR_Fpsr = ar40 // Read the fpsr--need to check rc.s0
fsetc.s2 0x7F,0x41
- nop.i 999
+ mov pow_GR_rcs0_mask = 0x0c00 // Set mask for rc.s0
+}
+{ .mfi
+(p12) mov pow_GR_tmp = 0x2ffff - 170
+ nop.f 999
+(p13) mov pow_GR_tmp = 0x0ffff - 170
}
;;
{ .mfi
- nop.m 999
+ setf.exp POW_eps = pow_GR_tmp // Form 2^-170
fma.s.s2 POW_ftz_urm_f8 = POW_A, POW_es, f0
nop.i 999
}
@@ -1654,6 +1510,21 @@ POW_POSSIBLE_UNDER:
}
;;
+{ .mmi
+(p7) and pow_GR_rcs0 = pow_GR_rcs0_mask, pow_GR_Fpsr // Isolate rc.s0
+;;
+(p7) cmp.eq.unc p6,p0 = pow_GR_rcs0, r0 // Test for round to nearest
+ nop.i 999
+}
+;;
+
+// Tweak result slightly if underflow to get correct rounding near smallest
+// denormal if round-nearest
+{ .mfi
+ nop.m 999
+(p6) fms.s.s0 f8 = POW_A, POW_es, POW_eps
+ nop.i 999
+}
{ .mbb
(p7) mov pow_GR_tag = 31
(p7) br.cond.spnt __libm_error_region // Branch if underflow
@@ -1671,16 +1542,8 @@ POW_X_DENORM:
}
;;
-{ .mmi
- getf.sig pow_GR_sig_X = POW_NORM_X
-;;
- and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones
- nop.i 999
-}
-;;
-
{ .mib
- sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones
+ getf.sig pow_GR_sig_X = POW_NORM_X
nop.i 999
br.cond.sptk POW_COMMON
}
@@ -2140,6 +2003,7 @@ POW_OVER_UNDER_ERROR:
GLOBAL_LIBM_END(powf)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
diff --git a/sysdeps/ia64/fpu/e_powl.S b/sysdeps/ia64/fpu/e_powl.S
index 0896c19..3f93f60 100644
--- a/sysdeps/ia64/fpu/e_powl.S
+++ b/sysdeps/ia64/fpu/e_powl.S
@@ -60,6 +60,7 @@
// 02/10/03 Reordered header: .section, .global, .proc, .align;
// used data8 for long double table values
// 04/17/03 Added missing mutex directive
+// 10/13/03 Corrected .endp names to match .proc names
//
//*********************************************************************
//
@@ -2755,6 +2756,7 @@ POWL_64_SQRT:
GLOBAL_LIBM_END(powl)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
@@ -2803,6 +2805,6 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
br.ret.sptk b0 // Return
};;
-.endp
+LOCAL_LIBM_END(__libm_error_region#)
.type __libm_error_support#,@function
.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_remainder.S b/sysdeps/ia64/fpu/e_remainder.S
index 2f6e90f..f655567 100644
--- a/sysdeps/ia64/fpu/e_remainder.S
+++ b/sysdeps/ia64/fpu/e_remainder.S
@@ -531,6 +531,7 @@ EXP_ERROR_RETURN:
GLOBAL_IEEE754_END(remainder)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_remainderf.S b/sysdeps/ia64/fpu/e_remainderf.S
index bbb5fd0..0e9bedd 100644
--- a/sysdeps/ia64/fpu/e_remainderf.S
+++ b/sysdeps/ia64/fpu/e_remainderf.S
@@ -550,6 +550,7 @@ EXP_ERROR_RETURN:
GLOBAL_IEEE754_END(remainderf)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_remainderl.S b/sysdeps/ia64/fpu/e_remainderl.S
index 1c1a3c3..8c1630e 100644
--- a/sysdeps/ia64/fpu/e_remainderl.S
+++ b/sysdeps/ia64/fpu/e_remainderl.S
@@ -557,6 +557,7 @@ EXP_ERROR_RETURN:
}
GLOBAL_IEEE754_END(remainderl)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_scalb.S b/sysdeps/ia64/fpu/e_scalb.S
index 82e914e..3d48aab 100644
--- a/sysdeps/ia64/fpu/e_scalb.S
+++ b/sysdeps/ia64/fpu/e_scalb.S
@@ -21,60 +21,82 @@
// products derived from this software without specific prior written
// permission.
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
+// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
-// 01/26/01 Scalb completely reworked and now standalone version
+// 01/26/01 Scalb completely reworked and now standalone version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/06/03 Improved performance
//
// API
//==============================================================
-// double = scalb (double x, double n)
+// double = scalb (double x, double n)
// input floating point f8 and floating point f9
// output floating point f8
//
+// int_type = 0 if int is 32 bits
+// int_type = 1 if int is 64 bits
+//
// Returns x* 2**n using an fma and detects overflow
-// and underflow.
+// and underflow.
//
//
+// Strategy:
+// Compute biased exponent of result exp_Result = N + exp_X
+// Break into ranges:
+// exp_Result > 0x103fe -> Certain overflow
+// exp_Result = 0x103fe -> Possible overflow
+// 0x0fc01 <= exp_Result < 0x103fe -> No over/underflow (main path)
+// 0x0fc01 - 52 <= exp_Result < 0x0fc01 -> Possible underflow
+// exp_Result < 0x0fc01 - 52 -> Certain underflow
+FR_Big = f6
+FR_NBig = f7
FR_Floating_X = f8
FR_Result = f8
FR_Floating_N = f9
FR_Result2 = f9
-FR_Norm_N = f10
-FR_Result3 = f11
-FR_Norm_X = f12
+FR_Result3 = f10
+FR_Norm_X = f11
+FR_Two_N = f12
FR_N_float_int = f13
-FR_Two_N = f14
-FR_Two_to_Big = f15
-FR_Big = f6
-FR_NBig = f7
+FR_Norm_N = f14
+GR_neg_ov_limit= r14
+GR_big_exp = r14
GR_N_Biased = r15
GR_Big = r16
-GR_NBig = r17
-GR_Scratch = r18
-GR_Scratch1 = r19
+GR_exp_Result = r18
+GR_pos_ov_limit= r19
+GR_exp_sure_ou = r19
GR_Bias = r20
GR_N_as_int = r21
+GR_signexp_X = r22
+GR_exp_X = r23
+GR_exp_mask = r24
+GR_max_exp = r25
+GR_min_exp = r26
+GR_min_den_exp = r27
+GR_Scratch = r28
+GR_signexp_N = r29
+GR_exp_N = r30
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
@@ -89,412 +111,442 @@ GLOBAL_IEEE754_ENTRY(scalb)
//
// Is x NAN, INF, ZERO, +-?
+// Build the exponent Bias
//
{ .mfi
- alloc r32=ar.pfs,0,3,4,0
- fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
- addl GR_Scratch = 0x019C3F,r0
+ getf.exp GR_signexp_N = FR_Floating_N // Get signexp of n
+ fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
+ mov GR_Bias = 0x0ffff
+}
+{ .mfi
+ mov GR_Big = 35000 // If N this big then certain overflow
+ fcvt.fx.trunc.s1 FR_N_float_int = FR_Floating_N // Get N in significand
+ nop.i 0
+}
+;;
+
+{ .mfi
+ getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
+ fclass.m p7,p0 = FR_Floating_N, 0x0b // Test for n=unorm
+ nop.i 0
}
//
-// Is y a NAN, INF, ZERO, +-?
+// Normalize n
//
{ .mfi
- nop.m 999
- fclass.m.unc p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf | @zero
- addl GR_Scratch1 = 0x063BF,r0
+ mov GR_exp_mask = 0x1ffff // Exponent mask
+ fnorm.s1 FR_Norm_N = FR_Floating_N
+ nop.i 0
}
;;
//
-// Convert N to a fp integer
-// Normalize x
+// Is n NAN, INF, ZERO, +-?
//
{ .mfi
- nop.m 0
- fnorm.s1 FR_Norm_N = FR_Floating_N
- nop.i 999
+ mov GR_big_exp = 0x1003e // Exponent at which n is integer
+ fclass.m p9,p0 = FR_Floating_N, 0xe7 // @snan | @qnan | @inf | @zero
+ mov GR_max_exp = 0x103fe // Exponent of maximum double
}
-{ .mfi
- nop.m 999
- fnorm.s1 FR_Norm_X = FR_Floating_X
- nop.i 999
-};;
-
//
-// Create 2*big
-// Create 2**-big
// Normalize x
-// Branch on special values.
//
-{ .mib
- setf.exp FR_Big = GR_Scratch
- nop.i 0
-(p6) br.cond.spnt SCALB_NAN_INF_ZERO
+{ .mfb
+ nop.m 0
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+(p7) br.cond.spnt SCALB_N_UNORM // Branch if n=unorm
}
-{ .mib
- setf.exp FR_NBig = GR_Scratch1
- nop.i 0
-(p7) br.cond.spnt SCALB_NAN_INF_ZERO
-};;
+;;
-//
-// Convert N to a fp integer
-// Create -35000
-//
+SCALB_COMMON1:
+// Main path continues. Also return here from u=unorm path.
+// Handle special cases if x = Nan, Inf, Zero
+{ .mfb
+ nop.m 0
+ fcmp.lt.s1 p7,p0 = FR_Floating_N, f0 // Test N negative
+(p6) br.cond.spnt SCALB_NAN_INF_ZERO
+}
+;;
+
+// Handle special cases if n = Nan, Inf, Zero
{ .mfi
- addl GR_Scratch = 1,r0
- fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
- addl GR_NBig = -35000,r0
+ getf.sig GR_N_as_int = FR_N_float_int // Get n from significand
+ fclass.m p8,p0 = FR_Floating_X, 0x0b // Test for x=unorm
+ mov GR_exp_sure_ou = 0x1000e // Exp_N where x*2^N sure over/under
+}
+{ .mfb
+ mov GR_min_exp = 0x0fc01 // Exponent of minimum double
+ fcvt.xf FR_N_float_int = FR_N_float_int // Convert N to FP integer
+(p9) br.cond.spnt SCALB_NAN_INF_ZERO
}
;;
-//
-// Put N if a GP register
-// Convert N_float_int to floating point value
-// Create 35000
-// Build the exponent Bias
-//
-{ .mii
- getf.sig GR_N_as_int = FR_N_float_int
- shl GR_Scratch = GR_Scratch,63
- addl GR_Big = 35000,r0
+{ .mmi
+ and GR_exp_N = GR_exp_mask, GR_signexp_N // Get exponent of N
+(p7) sub GR_Big = r0, GR_Big // Limit for N
+ nop.i 0
}
-{ .mfi
- addl GR_Bias = 0x0FFFF,r0
- fcvt.xf FR_N_float_int = FR_N_float_int
- nop.i 0
-};;
+;;
-//
-// Catch those fp values that are beyond 2**64-1
-// Is N > 35000
-// Is N < -35000
-//
-{ .mfi
- cmp.ne.unc p9,p10 = GR_N_as_int,GR_Scratch
- nop.f 0
- nop.i 0
+{ .mib
+ cmp.lt p9,p0 = GR_exp_N, GR_big_exp // N possible non-integer?
+ cmp.ge p6,p0 = GR_exp_N, GR_exp_sure_ou // N certain over/under?
+(p8) br.cond.spnt SCALB_X_UNORM // Branch if x=unorm
}
-{ .mmi
- cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
- cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
- nop.i 0
-};;
+;;
-//
-// Is N really an int, only for those non-int indefinites?
-// Create exp bias.
-//
-{ .mfi
- add GR_N_Biased = GR_Bias,GR_N_as_int
-(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
- nop.i 0
-};;
+SCALB_COMMON2:
+// Main path continues. Also return here from x=unorm path.
+// Create biased exponent for 2**N
+{ .mmi
+(p6) mov GR_N_as_int = GR_Big // Limit N
+;;
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+ nop.i 0
+}
+;;
-//
-// Branch and return if N is not an int.
-// Main path, create 2**N
-//
{ .mfi
- setf.exp FR_Two_N = GR_N_Biased
- nop.i 999
+ setf.exp FR_Two_N = GR_N_Biased // Form 2**N
+(p9) fcmp.neq.unc.s1 p9,p0 = FR_Norm_N, FR_N_float_int // Test if N an integer
+ and GR_exp_X = GR_exp_mask, GR_signexp_X // Get exponent of X
}
-{ .mfb
- nop.m 0
-(p7) frcpa.s0 f8,p11 = f0,f0
-(p7) br.ret.spnt b0
-};;
+;;
//
-// Set denormal on denormal input x and denormal input N
+// Compute biased result exponent
+// Branch if N is not an integer
//
-{ .mfi
- nop.m 999
-(p10)fcmp.ge.s1 p6,p8 = FR_Norm_N,f0
- nop.i 0
-};;
-{ .mfi
- nop.m 999
- fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
- nop.i 999
+{ .mib
+ add GR_exp_Result = GR_exp_X, GR_N_as_int
+ mov GR_min_den_exp = 0x0fc01 - 52 // Exponent of min denorm dble
+(p9) br.cond.spnt SCALB_N_NOT_INT
}
-{ .mfi
- nop.m 999
- fcmp.ge.s0 p12,p13 = FR_Floating_N,f0
- nop.i 0
-};;
+;;
//
-// Adjust 2**N if N was very small or very large
+// Raise Denormal operand flag with compare
+// Do final operation
//
-
{ .mfi
- nop.m 0
-(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
- nop.i 0
+ cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
+ fcmp.ge.s0 p0,p11 = FR_Floating_X,FR_Floating_N // Dummy to set denorm
+ cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch = 0x00000000000303FF
-};;
-{ .mfi
- nop.m 0
-(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
- nop.i 0
+{ .mfb
+ nop.m 0
+ fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+(p9) br.cond.spnt SCALB_UNDERFLOW // Branch if certain underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch1= 0x00000000000103FF
-};;
+;;
+
+{ .mib
+(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
+(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
+(p7) br.ret.sptk b0 // Return from main path
+}
+;;
-// Set up necessary status fields
+{ .bbb
+(p6) br.cond.spnt SCALB_OVERFLOW // Branch if certain overflow
+(p8) br.cond.spnt SCALB_POSSIBLE_OVERFLOW // Branch if possible overflow
+(p9) br.cond.spnt SCALB_POSSIBLE_UNDERFLOW // Branch if possible underflow
+}
+;;
+
+// Here if possible underflow.
+// Resulting exponent: 0x0fc01-52 <= exp_Result < 0x0fc01
+SCALB_POSSIBLE_UNDERFLOW:
+//
+// Here if possible overflow.
+// Resulting exponent: 0x103fe = exp_Result
+SCALB_POSSIBLE_OVERFLOW:
+
+// Set up necessary status fields
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflows)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
- nop.m 999
- fsetc.s3 0x7F,0x41
- nop.i 999
+ mov GR_pos_ov_limit = 0x103ff // Exponent for positive overflow
+ fsetc.s3 0x7F,0x41
+ nop.i 0
}
{ .mfi
- nop.m 999
- fsetc.s2 0x7F,0x42
- nop.i 999
-};;
+ mov GR_neg_ov_limit = 0x303ff // Exponent for negative overflow
+ fsetc.s2 0x7F,0x42
+ nop.i 0
+}
+;;
//
-// Do final operation
+// Do final operation with s2 and s3
//
{ .mfi
- setf.exp FR_NBig = GR_Scratch
- fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
- nop.i 999
+ setf.exp FR_NBig = GR_neg_ov_limit
+ fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
}
{ .mfi
- nop.m 999
- fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-{ .mfi
- setf.exp FR_Big = GR_Scratch1
- fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
+ setf.exp FR_Big = GR_pos_ov_limit
+ fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
+}
+;;
// Check for overflow or underflow.
-//
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflow)
-// S3 user supplied status + FZ + TD (Underflow)
-//
-//
// Restore s3
// Restore s2
//
{ .mfi
- nop.m 0
- fsetc.s3 0x7F,0x40
- nop.i 999
+ nop.m 0
+ fsetc.s3 0x7F,0x40
+ nop.i 0
}
{ .mfi
- nop.m 0
- fsetc.s2 0x7F,0x40
- nop.i 999
-};;
+ nop.m 0
+ fsetc.s2 0x7F,0x40
+ nop.i 0
+}
+;;
//
// Is the result zero?
//
{ .mfi
- nop.m 999
- fclass.m.unc p6, p0 = FR_Result3, 0x007
- nop.i 999
-}
+ nop.m 0
+ fclass.m p6, p0 = FR_Result3, 0x007
+ nop.i 0
+}
{ .mfi
- addl GR_Tag = 53, r0
- fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
- nop.i 0
-};;
+ nop.m 0
+ fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
+ nop.i 0
+}
+;;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
- nop.m 999
+ nop.m 0
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
-(p6) addl GR_Tag = 54, r0
+ nop.m 0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt SCALB_UNDERFLOW
-};;
+(p6) br.cond.spnt SCALB_UNDERFLOW
+}
+;;
//
// Branch out for overflow
//
-{ .mbb
- nop.m 0
-(p7) br.cond.spnt SCALB_OVERFLOW
-(p9) br.cond.spnt SCALB_OVERFLOW
-};;
+{ .bbb
+(p7) br.cond.spnt SCALB_OVERFLOW
+(p9) br.cond.spnt SCALB_OVERFLOW
+ br.ret.sptk b0 // Return from main path.
+}
+;;
-//
-// Return from main path.
-//
-{ .mfb
- nop.m 999
- nop.f 0
- br.ret.sptk b0;;
+// Here if result overflows
+SCALB_OVERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 53, r0 // Set error tag for overflow
+ br.cond.sptk __libm_error_region // Call error support for overflow
+}
+;;
+
+// Here if result underflows
+SCALB_UNDERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 54, r0 // Set error tag for underflow
+ br.cond.sptk __libm_error_region // Call error support for underflow
}
+;;
-SCALB_NAN_INF_ZERO:
+SCALB_NAN_INF_ZERO:
//
-// Convert N to a fp integer
-//
+// Before entry, N has been converted to a fp integer in significand of
+// FR_N_float_int
+//
+// Convert N_float_int to floating point value
+//
{ .mfi
- addl GR_Scratch = 1,r0
- fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
- nop.i 999
+ getf.sig GR_N_as_int = FR_N_float_int
+ fclass.m p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
+ nop.i 0
}
{ .mfi
- nop.m 0
- fclass.m.unc p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
- nop.i 0
-};;
+ addl GR_Scratch = 1,r0
+ fcvt.xf FR_N_float_int = FR_N_float_int
+ nop.i 0
+}
+;;
+
{ .mfi
- nop.m 0
- fclass.m.unc p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
- shl GR_Scratch = GR_Scratch,63
-};;
+ nop.m 0
+ fclass.m p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
+ shl GR_Scratch = GR_Scratch,63
+}
+;;
+
{ .mfi
- nop.m 0
- fclass.m.unc p8,p0 = FR_Floating_N, 0x21 // @inf
- nop.i 0
-}
- { .mfi
- nop.m 0
- fclass.m.unc p9,p0 = FR_Floating_N, 0x22 // @-inf
- nop.i 0
-};;
+ nop.m 0
+ fclass.m p8,p0 = FR_Floating_N, 0x21 // @inf
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fclass.m p9,p0 = FR_Floating_N, 0x22 // @-inf
+ nop.i 0
+}
+;;
//
// Either X or N is a Nan, return result and possible raise invalid.
//
{ .mfb
- nop.m 0
-(p6) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
+ nop.m 0
+(p6) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
(p6) br.ret.spnt b0
-};;
+}
+;;
+
{ .mfb
- getf.sig GR_N_as_int = FR_N_float_int
-(p7) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
+ nop.m 0
+(p7) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
(p7) br.ret.spnt b0
-};;
+}
+;;
//
// If N + Inf do something special
// For N = -Inf, create Int
//
{ .mfb
- nop.m 0
-(p8) fma.d.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
-(p8) br.ret.spnt b0
+ nop.m 0
+(p8) fma.d.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
+(p8) br.ret.spnt b0
}
{ .mfi
- nop.m 0
-(p9) fnma.d.s0 FR_Floating_N = FR_Floating_N, f1, f0
- nop.i 0
-};;
+ nop.m 0
+(p9) fnma.d.s0 FR_Floating_N = FR_Floating_N, f1, f0
+ nop.i 0
+}
+;;
//
// If N==-Inf,return x/(-N)
//
{ .mfb
- nop.m 0
-(p9) frcpa.s0 FR_Result,p6 = FR_Floating_X,FR_Floating_N
-(p9) br.ret.spnt b0
-};;
-
-//
-// Convert N_float_int to floating point value
-//
-{ .mfi
- cmp.ne.unc p9,p0 = GR_N_as_int,GR_Scratch
- fcvt.xf FR_N_float_int = FR_N_float_int
- nop.i 0
-};;
+ cmp.ne p7,p0 = GR_N_as_int,GR_Scratch
+(p9) frcpa.s0 FR_Result,p0 = FR_Floating_X,FR_Floating_N
+(p9) br.ret.spnt b0
+}
+;;
//
// Is N an integer.
//
{ .mfi
- nop.m 0
-(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
- nop.i 0
-};;
+ nop.m 0
+(p7) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
+ nop.i 0
+}
+;;
//
// If N not an int, return NaN and raise invalid.
//
{ .mfb
- nop.m 0
-(p7) frcpa.s0 FR_Result,p6 = f0,f0
-(p7) br.ret.spnt b0
-};;
+ nop.m 0
+(p7) frcpa.s0 FR_Result,p0 = f0,f0
+(p7) br.ret.spnt b0
+}
+;;
//
-// Always return x in other path.
+// Always return x in other path.
//
{ .mfb
- nop.m 0
- fma.d.s0 FR_Result = FR_Floating_X,f1,f0
- br.ret.sptk b0
-};;
+ nop.m 0
+ fma.d.s0 FR_Result = FR_Floating_X,f1,f0
+ br.ret.sptk b0
+}
+;;
-GLOBAL_IEEE754_END(scalb)
-__libm_error_region:
+// Here if n not int
+// Return NaN and raise invalid.
+SCALB_N_NOT_INT:
+{ .mfb
+ nop.m 0
+ frcpa.s0 FR_Result,p0 = f0,f0
+ br.ret.sptk b0
+}
+;;
+
+// Here if n=unorm
+SCALB_N_UNORM:
+{ .mfb
+ getf.exp GR_signexp_N = FR_Norm_N // Get signexp of normalized n
+ fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N // Get N in significand
+ br.cond.sptk SCALB_COMMON1 // Return to main path
+}
+;;
+
+// Here if x=unorm
+SCALB_X_UNORM:
+{ .mib
+ getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
+ nop.i 0
+ br.cond.sptk SCALB_COMMON2 // Return to main path
+}
+;;
-SCALB_OVERFLOW:
-SCALB_UNDERFLOW:
+GLOBAL_IEEE754_END(scalb)
+LOCAL_LIBM_ENTRY(__libm_error_region)
//
// Get stack address of N
//
.prologue
{ .mfi
- add GR_Parameter_Y=-32,sp
+ add GR_Parameter_Y=-32,sp
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
+ mov GR_SAVE_PFS=ar.pfs
}
//
-// Adjust sp
+// Adjust sp
//
{ .mfi
.fframe 64
- add sp=-64,sp
+ add sp=-64,sp
nop.f 0
- mov GR_SAVE_GP=gp
+ mov GR_SAVE_GP=gp
};;
//
-// Store N on stack in correct position
+// Store N on stack in correct position
// Locate the address of x on stack
//
{ .mmi
- stfd [GR_Parameter_Y] = FR_Norm_N,16
- add GR_Parameter_X = 16,sp
+ stfd [GR_Parameter_Y] = FR_Norm_N,16
+ add GR_Parameter_X = 16,sp
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
+ mov GR_SAVE_B0=b0
};;
//
@@ -503,42 +555,42 @@ SCALB_UNDERFLOW:
//
.body
{ .mib
- stfd [GR_Parameter_X] = FR_Norm_X
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ stfd [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0
}
{ .mib
- stfd [GR_Parameter_Y] = FR_Result
+ stfd [GR_Parameter_Y] = FR_Result
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support#
+ br.call.sptk b0=__libm_error_support#
};;
//
// Get location of result on stack
//
{ .mmi
+ add GR_Parameter_RESULT = 48,sp
nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
+ nop.i 0
};;
//
-// Get the new result
+// Get the new result
//
{ .mmi
- ldfd FR_Result = [GR_Parameter_RESULT]
+ ldfd FR_Result = [GR_Parameter_RESULT]
.restore sp
- add sp = 64,sp
- mov b0 = GR_SAVE_B0
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
};;
//
// Restore gp, ar.pfs and return
//
{ .mib
- mov gp = GR_SAVE_GP
- mov ar.pfs = GR_SAVE_PFS
- br.ret.sptk b0
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
};;
LOCAL_LIBM_END(__libm_error_region)
diff --git a/sysdeps/ia64/fpu/e_scalbf.S b/sysdeps/ia64/fpu/e_scalbf.S
index 07acb32..e965667 100644
--- a/sysdeps/ia64/fpu/e_scalbf.S
+++ b/sysdeps/ia64/fpu/e_scalbf.S
@@ -21,60 +21,82 @@
// products derived from this software without specific prior written
// permission.
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
+// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
-// 01/26/01 Scalb completely reworked and now standalone version
+// 01/26/01 Scalb completely reworked and now standalone version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/06/03 Improved performance
//
// API
//==============================================================
-// float = scalbf (float x, float n)
+// float = scalbf (float x, float n)
// input floating point f8 and floating point f9
// output floating point f8
//
+// int_type = 0 if int is 32 bits
+// int_type = 1 if int is 64 bits
+//
// Returns x* 2**n using an fma and detects overflow
-// and underflow.
+// and underflow.
//
//
+// Strategy:
+// Compute biased exponent of result exp_Result = N + exp_X
+// Break into ranges:
+// exp_Result > 0x1007e -> Certain overflow
+// exp_Result = 0x1007e -> Possible overflow
+// 0x0ff81 <= exp_Result < 0x1007e -> No over/underflow (main path)
+// 0x0ff81 - 23 <= exp_Result < 0x0ff81 -> Possible underflow
+// exp_Result < 0x0ff81 - 23 -> Certain underflow
+FR_Big = f6
+FR_NBig = f7
FR_Floating_X = f8
FR_Result = f8
FR_Floating_N = f9
FR_Result2 = f9
-FR_Norm_N = f10
-FR_Result3 = f11
-FR_Norm_X = f12
+FR_Result3 = f10
+FR_Norm_X = f11
+FR_Two_N = f12
FR_N_float_int = f13
-FR_Two_N = f14
-FR_Two_to_Big = f15
-FR_Big = f6
-FR_NBig = f7
+FR_Norm_N = f14
+GR_neg_ov_limit= r14
+GR_big_exp = r14
GR_N_Biased = r15
GR_Big = r16
-GR_NBig = r17
-GR_Scratch = r18
-GR_Scratch1 = r19
+GR_exp_Result = r18
+GR_pos_ov_limit= r19
+GR_exp_sure_ou = r19
GR_Bias = r20
GR_N_as_int = r21
+GR_signexp_X = r22
+GR_exp_X = r23
+GR_exp_mask = r24
+GR_max_exp = r25
+GR_min_exp = r26
+GR_min_den_exp = r27
+GR_Scratch = r28
+GR_signexp_N = r29
+GR_exp_N = r30
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
@@ -89,412 +111,442 @@ GLOBAL_IEEE754_ENTRY(scalbf)
//
// Is x NAN, INF, ZERO, +-?
+// Build the exponent Bias
//
{ .mfi
- alloc r32=ar.pfs,0,3,4,0
- fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
- addl GR_Scratch = 0x019C3F,r0
+ getf.exp GR_signexp_N = FR_Floating_N // Get signexp of n
+ fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
+ mov GR_Bias = 0x0ffff
+}
+{ .mfi
+ mov GR_Big = 35000 // If N this big then certain overflow
+ fcvt.fx.trunc.s1 FR_N_float_int = FR_Floating_N // Get N in significand
+ nop.i 0
+}
+;;
+
+{ .mfi
+ getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
+ fclass.m p7,p0 = FR_Floating_N, 0x0b // Test for n=unorm
+ nop.i 0
}
//
-// Is y a NAN, INF, ZERO, +-?
+// Normalize n
//
{ .mfi
- nop.m 999
- fclass.m.unc p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf | @zero
- addl GR_Scratch1 = 0x063BF,r0
+ mov GR_exp_mask = 0x1ffff // Exponent mask
+ fnorm.s1 FR_Norm_N = FR_Floating_N
+ nop.i 0
}
;;
//
-// Convert N to a fp integer
-// Normalize x
+// Is n NAN, INF, ZERO, +-?
//
{ .mfi
- nop.m 0
- fnorm.s1 FR_Norm_N = FR_Floating_N
- nop.i 999
+ mov GR_big_exp = 0x1003e // Exponent at which n is integer
+ fclass.m p9,p0 = FR_Floating_N, 0xe7 // @snan | @qnan | @inf | @zero
+ mov GR_max_exp = 0x1007e // Exponent of maximum float
}
-{ .mfi
- nop.m 999
- fnorm.s1 FR_Norm_X = FR_Floating_X
- nop.i 999
-};;
-
//
-// Create 2*big
-// Create 2**-big
// Normalize x
-// Branch on special values.
//
-{ .mib
- setf.exp FR_Big = GR_Scratch
- nop.i 0
-(p6) br.cond.spnt SCALBF_NAN_INF_ZERO
+{ .mfb
+ nop.m 0
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+(p7) br.cond.spnt SCALBF_N_UNORM // Branch if n=unorm
}
-{ .mib
- setf.exp FR_NBig = GR_Scratch1
- nop.i 0
-(p7) br.cond.spnt SCALBF_NAN_INF_ZERO
-};;
+;;
-//
-// Convert N to a fp integer
-// Create -35000
-//
+SCALBF_COMMON1:
+// Main path continues. Also return here from u=unorm path.
+// Handle special cases if x = Nan, Inf, Zero
+{ .mfb
+ nop.m 0
+ fcmp.lt.s1 p7,p0 = FR_Floating_N, f0 // Test N negative
+(p6) br.cond.spnt SCALBF_NAN_INF_ZERO
+}
+;;
+
+// Handle special cases if n = Nan, Inf, Zero
{ .mfi
- addl GR_Scratch = 1,r0
- fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
- addl GR_NBig = -35000,r0
+ getf.sig GR_N_as_int = FR_N_float_int // Get n from significand
+ fclass.m p8,p0 = FR_Floating_X, 0x0b // Test for x=unorm
+ mov GR_exp_sure_ou = 0x1000e // Exp_N where x*2^N sure over/under
+}
+{ .mfb
+ mov GR_min_exp = 0x0ff81 // Exponent of minimum float
+ fcvt.xf FR_N_float_int = FR_N_float_int // Convert N to FP integer
+(p9) br.cond.spnt SCALBF_NAN_INF_ZERO
}
;;
-//
-// Put N if a GP register
-// Convert N_float_int to floating point value
-// Create 35000
-// Build the exponent Bias
-//
-{ .mii
- getf.sig GR_N_as_int = FR_N_float_int
- shl GR_Scratch = GR_Scratch,63
- addl GR_Big = 35000,r0
+{ .mmi
+ and GR_exp_N = GR_exp_mask, GR_signexp_N // Get exponent of N
+(p7) sub GR_Big = r0, GR_Big // Limit for N
+ nop.i 0
}
-{ .mfi
- addl GR_Bias = 0x0FFFF,r0
- fcvt.xf FR_N_float_int = FR_N_float_int
- nop.i 0
-};;
+;;
-//
-// Catch those fp values that are beyond 2**64-1
-// Is N > 35000
-// Is N < -35000
-//
-{ .mfi
- cmp.ne.unc p9,p10 = GR_N_as_int,GR_Scratch
- nop.f 0
- nop.i 0
+{ .mib
+ cmp.lt p9,p0 = GR_exp_N, GR_big_exp // N possible non-integer?
+ cmp.ge p6,p0 = GR_exp_N, GR_exp_sure_ou // N certain over/under?
+(p8) br.cond.spnt SCALBF_X_UNORM // Branch if x=unorm
}
-{ .mmi
- cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
- cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
- nop.i 0
-};;
+;;
-//
-// Is N really an int, only for those non-int indefinites?
-// Create exp bias.
-//
-{ .mfi
- add GR_N_Biased = GR_Bias,GR_N_as_int
-(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
- nop.i 0
-};;
+SCALBF_COMMON2:
+// Main path continues. Also return here from x=unorm path.
+// Create biased exponent for 2**N
+{ .mmi
+(p6) mov GR_N_as_int = GR_Big // Limit N
+;;
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+ nop.i 0
+}
+;;
-//
-// Branch and return if N is not an int.
-// Main path, create 2**N
-//
{ .mfi
- setf.exp FR_Two_N = GR_N_Biased
- nop.i 999
+ setf.exp FR_Two_N = GR_N_Biased // Form 2**N
+(p9) fcmp.neq.unc.s1 p9,p0 = FR_Norm_N, FR_N_float_int // Test if N an integer
+ and GR_exp_X = GR_exp_mask, GR_signexp_X // Get exponent of X
}
-{ .mfb
- nop.m 0
-(p7) frcpa.s0 f8,p11 = f0,f0
-(p7) br.ret.spnt b0
-};;
+;;
//
-// Set denormal on denormal input x and denormal input N
+// Compute biased result exponent
+// Branch if N is not an integer
//
-{ .mfi
- nop.m 999
-(p10)fcmp.ge.s1 p6,p8 = FR_Norm_N,f0
- nop.i 0
-};;
-{ .mfi
- nop.m 999
- fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
- nop.i 999
+{ .mib
+ add GR_exp_Result = GR_exp_X, GR_N_as_int
+ mov GR_min_den_exp = 0x0ff81 - 23 // Exponent of min denorm float
+(p9) br.cond.spnt SCALBF_N_NOT_INT
}
-{ .mfi
- nop.m 999
- fcmp.ge.s0 p12,p13 = FR_Floating_N,f0
- nop.i 0
-};;
+;;
//
-// Adjust 2**N if N was very small or very large
+// Raise Denormal operand flag with compare
+// Do final operation
//
-
{ .mfi
- nop.m 0
-(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
- nop.i 0
+ cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
+ fcmp.ge.s0 p0,p11 = FR_Floating_X,FR_Floating_N // Dummy to set denorm
+ cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch = 0x000000000003007F
-};;
-{ .mfi
- nop.m 0
-(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
- nop.i 0
+{ .mfb
+ nop.m 0
+ fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+(p9) br.cond.spnt SCALBF_UNDERFLOW // Branch if certain underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch1= 0x000000000001007F
-};;
+;;
+
+{ .mib
+(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
+(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
+(p7) br.ret.sptk b0 // Return from main path
+}
+;;
-// Set up necessary status fields
+{ .bbb
+(p6) br.cond.spnt SCALBF_OVERFLOW // Branch if certain overflow
+(p8) br.cond.spnt SCALBF_POSSIBLE_OVERFLOW // Branch if possible overflow
+(p9) br.cond.spnt SCALBF_POSSIBLE_UNDERFLOW // Branch if possible underflow
+}
+;;
+
+// Here if possible underflow.
+// Resulting exponent: 0x0ff81-23 <= exp_Result < 0x0ff81
+SCALBF_POSSIBLE_UNDERFLOW:
+//
+// Here if possible overflow.
+// Resulting exponent: 0x1007e = exp_Result
+SCALBF_POSSIBLE_OVERFLOW:
+
+// Set up necessary status fields
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflows)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
- nop.m 999
- fsetc.s3 0x7F,0x41
- nop.i 999
+ mov GR_pos_ov_limit = 0x1007f // Exponent for positive overflow
+ fsetc.s3 0x7F,0x41
+ nop.i 0
}
{ .mfi
- nop.m 999
- fsetc.s2 0x7F,0x42
- nop.i 999
-};;
+ mov GR_neg_ov_limit = 0x3007f // Exponent for negative overflow
+ fsetc.s2 0x7F,0x42
+ nop.i 0
+}
+;;
//
-// Do final operation
+// Do final operation with s2 and s3
//
{ .mfi
- setf.exp FR_NBig = GR_Scratch
- fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
- nop.i 999
+ setf.exp FR_NBig = GR_neg_ov_limit
+ fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
}
{ .mfi
- nop.m 999
- fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-{ .mfi
- setf.exp FR_Big = GR_Scratch1
- fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
+ setf.exp FR_Big = GR_pos_ov_limit
+ fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
+}
+;;
// Check for overflow or underflow.
-//
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflow)
-// S3 user supplied status + FZ + TD (Underflow)
-//
-//
// Restore s3
// Restore s2
//
{ .mfi
- nop.m 0
- fsetc.s3 0x7F,0x40
- nop.i 999
+ nop.m 0
+ fsetc.s3 0x7F,0x40
+ nop.i 0
}
{ .mfi
- nop.m 0
- fsetc.s2 0x7F,0x40
- nop.i 999
-};;
+ nop.m 0
+ fsetc.s2 0x7F,0x40
+ nop.i 0
+}
+;;
//
// Is the result zero?
//
{ .mfi
- nop.m 999
- fclass.m.unc p6, p0 = FR_Result3, 0x007
- nop.i 999
-}
+ nop.m 0
+ fclass.m p6, p0 = FR_Result3, 0x007
+ nop.i 0
+}
{ .mfi
- addl GR_Tag = 55, r0
- fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
- nop.i 0
-};;
+ nop.m 0
+ fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
+ nop.i 0
+}
+;;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
- nop.m 999
+ nop.m 0
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
-(p6) addl GR_Tag = 56, r0
+ nop.m 0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt SCALBF_UNDERFLOW
-};;
+(p6) br.cond.spnt SCALBF_UNDERFLOW
+}
+;;
//
// Branch out for overflow
//
-{ .mbb
- nop.m 0
-(p7) br.cond.spnt SCALBF_OVERFLOW
-(p9) br.cond.spnt SCALBF_OVERFLOW
-};;
+{ .bbb
+(p7) br.cond.spnt SCALBF_OVERFLOW
+(p9) br.cond.spnt SCALBF_OVERFLOW
+ br.ret.sptk b0 // Return from main path.
+}
+;;
-//
-// Return from main path.
-//
-{ .mfb
- nop.m 999
- nop.f 0
- br.ret.sptk b0;;
+// Here if result overflows
+SCALBF_OVERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 55, r0 // Set error tag for overflow
+ br.cond.sptk __libm_error_region // Call error support for overflow
+}
+;;
+
+// Here if result underflows
+SCALBF_UNDERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 56, r0 // Set error tag for underflow
+ br.cond.sptk __libm_error_region // Call error support for underflow
}
+;;
-SCALBF_NAN_INF_ZERO:
+SCALBF_NAN_INF_ZERO:
//
-// Convert N to a fp integer
-//
+// Before entry, N has been converted to a fp integer in significand of
+// FR_N_float_int
+//
+// Convert N_float_int to floating point value
+//
{ .mfi
- addl GR_Scratch = 1,r0
- fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
- nop.i 999
+ getf.sig GR_N_as_int = FR_N_float_int
+ fclass.m p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
+ nop.i 0
}
{ .mfi
- nop.m 0
- fclass.m.unc p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
- nop.i 0
-};;
+ addl GR_Scratch = 1,r0
+ fcvt.xf FR_N_float_int = FR_N_float_int
+ nop.i 0
+}
+;;
+
{ .mfi
- nop.m 0
- fclass.m.unc p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
- shl GR_Scratch = GR_Scratch,63
-};;
+ nop.m 0
+ fclass.m p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
+ shl GR_Scratch = GR_Scratch,63
+}
+;;
+
{ .mfi
- nop.m 0
- fclass.m.unc p8,p0 = FR_Floating_N, 0x21 // @inf
- nop.i 0
-}
- { .mfi
- nop.m 0
- fclass.m.unc p9,p0 = FR_Floating_N, 0x22 // @-inf
- nop.i 0
-};;
+ nop.m 0
+ fclass.m p8,p0 = FR_Floating_N, 0x21 // @inf
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fclass.m p9,p0 = FR_Floating_N, 0x22 // @-inf
+ nop.i 0
+}
+;;
//
// Either X or N is a Nan, return result and possible raise invalid.
//
{ .mfb
- nop.m 0
-(p6) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
+ nop.m 0
+(p6) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
(p6) br.ret.spnt b0
-};;
+}
+;;
+
{ .mfb
- getf.sig GR_N_as_int = FR_N_float_int
-(p7) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
+ nop.m 0
+(p7) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
(p7) br.ret.spnt b0
-};;
+}
+;;
//
// If N + Inf do something special
// For N = -Inf, create Int
//
{ .mfb
- nop.m 0
-(p8) fma.s.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
-(p8) br.ret.spnt b0
+ nop.m 0
+(p8) fma.s.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
+(p8) br.ret.spnt b0
}
{ .mfi
- nop.m 0
-(p9) fnma.s.s0 FR_Floating_N = FR_Floating_N, f1, f0
- nop.i 0
-};;
+ nop.m 0
+(p9) fnma.s.s0 FR_Floating_N = FR_Floating_N, f1, f0
+ nop.i 0
+}
+;;
//
// If N==-Inf,return x/(-N)
//
{ .mfb
- nop.m 0
-(p9) frcpa.s0 FR_Result,p6 = FR_Floating_X,FR_Floating_N
-(p9) br.ret.spnt b0
-};;
-
-//
-// Convert N_float_int to floating point value
-//
-{ .mfi
- cmp.ne.unc p9,p0 = GR_N_as_int,GR_Scratch
- fcvt.xf FR_N_float_int = FR_N_float_int
- nop.i 0
-};;
+ cmp.ne p7,p0 = GR_N_as_int,GR_Scratch
+(p9) frcpa.s0 FR_Result,p0 = FR_Floating_X,FR_Floating_N
+(p9) br.ret.spnt b0
+}
+;;
//
// Is N an integer.
//
{ .mfi
- nop.m 0
-(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
- nop.i 0
-};;
+ nop.m 0
+(p7) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
+ nop.i 0
+}
+;;
//
// If N not an int, return NaN and raise invalid.
//
{ .mfb
- nop.m 0
-(p7) frcpa.s0 FR_Result,p6 = f0,f0
-(p7) br.ret.spnt b0
-};;
+ nop.m 0
+(p7) frcpa.s0 FR_Result,p0 = f0,f0
+(p7) br.ret.spnt b0
+}
+;;
//
-// Always return x in other path.
+// Always return x in other path.
//
{ .mfb
- nop.m 0
- fma.s.s0 FR_Result = FR_Floating_X,f1,f0
- br.ret.sptk b0
-};;
+ nop.m 0
+ fma.s.s0 FR_Result = FR_Floating_X,f1,f0
+ br.ret.sptk b0
+}
+;;
-GLOBAL_IEEE754_END(scalbf)
-__libm_error_region:
+// Here if n not int
+// Return NaN and raise invalid.
+SCALBF_N_NOT_INT:
+{ .mfb
+ nop.m 0
+ frcpa.s0 FR_Result,p0 = f0,f0
+ br.ret.sptk b0
+}
+;;
+
+// Here if n=unorm
+SCALBF_N_UNORM:
+{ .mfb
+ getf.exp GR_signexp_N = FR_Norm_N // Get signexp of normalized n
+ fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N // Get N in significand
+ br.cond.sptk SCALBF_COMMON1 // Return to main path
+}
+;;
+
+// Here if x=unorm
+SCALBF_X_UNORM:
+{ .mib
+ getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
+ nop.i 0
+ br.cond.sptk SCALBF_COMMON2 // Return to main path
+}
+;;
-SCALBF_OVERFLOW:
-SCALBF_UNDERFLOW:
+GLOBAL_IEEE754_END(scalbf)
+LOCAL_LIBM_ENTRY(__libm_error_region)
//
// Get stack address of N
//
.prologue
{ .mfi
- add GR_Parameter_Y=-32,sp
+ add GR_Parameter_Y=-32,sp
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
+ mov GR_SAVE_PFS=ar.pfs
}
//
-// Adjust sp
+// Adjust sp
//
{ .mfi
.fframe 64
- add sp=-64,sp
+ add sp=-64,sp
nop.f 0
- mov GR_SAVE_GP=gp
+ mov GR_SAVE_GP=gp
};;
//
-// Store N on stack in correct position
+// Store N on stack in correct position
// Locate the address of x on stack
//
{ .mmi
- stfs [GR_Parameter_Y] = FR_Norm_N,16
- add GR_Parameter_X = 16,sp
+ stfs [GR_Parameter_Y] = FR_Norm_N,16
+ add GR_Parameter_X = 16,sp
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
+ mov GR_SAVE_B0=b0
};;
//
@@ -503,42 +555,42 @@ SCALBF_UNDERFLOW:
//
.body
{ .mib
- stfs [GR_Parameter_X] = FR_Norm_X
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ stfs [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0
}
{ .mib
- stfs [GR_Parameter_Y] = FR_Result
+ stfs [GR_Parameter_Y] = FR_Result
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support#
+ br.call.sptk b0=__libm_error_support#
};;
//
// Get location of result on stack
//
{ .mmi
+ add GR_Parameter_RESULT = 48,sp
nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
+ nop.i 0
};;
//
-// Get the new result
+// Get the new result
//
{ .mmi
- ldfs FR_Result = [GR_Parameter_RESULT]
+ ldfs FR_Result = [GR_Parameter_RESULT]
.restore sp
- add sp = 64,sp
- mov b0 = GR_SAVE_B0
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
};;
//
// Restore gp, ar.pfs and return
//
{ .mib
- mov gp = GR_SAVE_GP
- mov ar.pfs = GR_SAVE_PFS
- br.ret.sptk b0
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
};;
LOCAL_LIBM_END(__libm_error_region)
diff --git a/sysdeps/ia64/fpu/e_scalbl.S b/sysdeps/ia64/fpu/e_scalbl.S
index d22d029..9b6467f 100644
--- a/sysdeps/ia64/fpu/e_scalbl.S
+++ b/sysdeps/ia64/fpu/e_scalbl.S
@@ -21,60 +21,82 @@
// products derived from this software without specific prior written
// permission.
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
+// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
-// 01/26/01 Scalb completely reworked and now standalone version
+// 01/26/01 Scalb completely reworked and now standalone version
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/06/03 Improved performance
//
// API
//==============================================================
-// double-extended = scalbl (double-extended x, double-extended n)
+// long double = scalbl (long double x, long double n)
// input floating point f8 and floating point f9
// output floating point f8
//
+// int_type = 0 if int is 32 bits
+// int_type = 1 if int is 64 bits
+//
// Returns x* 2**n using an fma and detects overflow
-// and underflow.
+// and underflow.
//
//
+// Strategy:
+// Compute biased exponent of result exp_Result = N + exp_X
+// Break into ranges:
+// exp_Result > 0x13ffe -> Certain overflow
+// exp_Result = 0x13ffe -> Possible overflow
+// 0x0c001 <= exp_Result < 0x13ffe -> No over/underflow (main path)
+// 0x0c001 - 63 <= exp_Result < 0x0c001 -> Possible underflow
+// exp_Result < 0x0c001 - 63 -> Certain underflow
+FR_Big = f6
+FR_NBig = f7
FR_Floating_X = f8
FR_Result = f8
FR_Floating_N = f9
FR_Result2 = f9
-FR_Norm_N = f10
-FR_Result3 = f11
-FR_Norm_X = f12
+FR_Result3 = f10
+FR_Norm_X = f11
+FR_Two_N = f12
FR_N_float_int = f13
-FR_Two_N = f14
-FR_Two_to_Big = f15
-FR_Big = f6
-FR_NBig = f7
+FR_Norm_N = f14
+GR_neg_ov_limit= r14
+GR_big_exp = r14
GR_N_Biased = r15
GR_Big = r16
-GR_NBig = r17
-GR_Scratch = r18
-GR_Scratch1 = r19
+GR_exp_Result = r18
+GR_pos_ov_limit= r19
+GR_exp_sure_ou = r19
GR_Bias = r20
GR_N_as_int = r21
+GR_signexp_X = r22
+GR_exp_X = r23
+GR_exp_mask = r24
+GR_max_exp = r25
+GR_min_exp = r26
+GR_min_den_exp = r27
+GR_Scratch = r28
+GR_signexp_N = r29
+GR_exp_N = r30
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
@@ -89,412 +111,442 @@ GLOBAL_IEEE754_ENTRY(scalbl)
//
// Is x NAN, INF, ZERO, +-?
+// Build the exponent Bias
//
{ .mfi
- alloc r32=ar.pfs,0,3,4,0
- fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
- addl GR_Scratch = 0x019C3F,r0
+ getf.exp GR_signexp_N = FR_Floating_N // Get signexp of n
+ fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
+ mov GR_Bias = 0x0ffff
+}
+{ .mfi
+ mov GR_Big = 35000 // If N this big then certain overflow
+ fcvt.fx.trunc.s1 FR_N_float_int = FR_Floating_N // Get N in significand
+ nop.i 0
+}
+;;
+
+{ .mfi
+ getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
+ fclass.m p7,p0 = FR_Floating_N, 0x0b // Test for n=unorm
+ nop.i 0
}
//
-// Is y a NAN, INF, ZERO, +-?
+// Normalize n
//
{ .mfi
- nop.m 999
- fclass.m.unc p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf | @zero
- addl GR_Scratch1 = 0x063BF,r0
+ mov GR_exp_mask = 0x1ffff // Exponent mask
+ fnorm.s1 FR_Norm_N = FR_Floating_N
+ nop.i 0
}
;;
//
-// Convert N to a fp integer
-// Normalize x
+// Is n NAN, INF, ZERO, +-?
//
{ .mfi
- nop.m 0
- fnorm.s1 FR_Norm_N = FR_Floating_N
- nop.i 999
+ mov GR_big_exp = 0x1003e // Exponent at which n is integer
+ fclass.m p9,p0 = FR_Floating_N, 0xe7 // @snan | @qnan | @inf | @zero
+ mov GR_max_exp = 0x13ffe // Exponent of maximum long double
}
-{ .mfi
- nop.m 999
- fnorm.s1 FR_Norm_X = FR_Floating_X
- nop.i 999
-};;
-
//
-// Create 2*big
-// Create 2**-big
// Normalize x
-// Branch on special values.
//
-{ .mib
- setf.exp FR_Big = GR_Scratch
- nop.i 0
-(p6) br.cond.spnt SCALBL_NAN_INF_ZERO
+{ .mfb
+ nop.m 0
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+(p7) br.cond.spnt SCALBL_N_UNORM // Branch if n=unorm
}
-{ .mib
- setf.exp FR_NBig = GR_Scratch1
- nop.i 0
-(p7) br.cond.spnt SCALBL_NAN_INF_ZERO
-};;
+;;
-//
-// Convert N to a fp integer
-// Create -35000
-//
+SCALBL_COMMON1:
+// Main path continues. Also return here from u=unorm path.
+// Handle special cases if x = Nan, Inf, Zero
+{ .mfb
+ nop.m 0
+ fcmp.lt.s1 p7,p0 = FR_Floating_N, f0 // Test N negative
+(p6) br.cond.spnt SCALBL_NAN_INF_ZERO
+}
+;;
+
+// Handle special cases if n = Nan, Inf, Zero
{ .mfi
- addl GR_Scratch = 1,r0
- fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
- addl GR_NBig = -35000,r0
+ getf.sig GR_N_as_int = FR_N_float_int // Get n from significand
+ fclass.m p8,p0 = FR_Floating_X, 0x0b // Test for x=unorm
+ mov GR_exp_sure_ou = 0x1000e // Exp_N where x*2^N sure over/under
+}
+{ .mfb
+ mov GR_min_exp = 0x0c001 // Exponent of minimum long double
+ fcvt.xf FR_N_float_int = FR_N_float_int // Convert N to FP integer
+(p9) br.cond.spnt SCALBL_NAN_INF_ZERO
}
;;
-//
-// Put N if a GP register
-// Convert N_float_int to floating point value
-// Create 35000
-// Build the exponent Bias
-//
-{ .mii
- getf.sig GR_N_as_int = FR_N_float_int
- shl GR_Scratch = GR_Scratch,63
- addl GR_Big = 35000,r0
+{ .mmi
+ and GR_exp_N = GR_exp_mask, GR_signexp_N // Get exponent of N
+(p7) sub GR_Big = r0, GR_Big // Limit for N
+ nop.i 0
}
-{ .mfi
- addl GR_Bias = 0x0FFFF,r0
- fcvt.xf FR_N_float_int = FR_N_float_int
- nop.i 0
-};;
+;;
-//
-// Catch those fp values that are beyond 2**64-1
-// Is N > 35000
-// Is N < -35000
-//
-{ .mfi
- cmp.ne.unc p9,p10 = GR_N_as_int,GR_Scratch
- nop.f 0
- nop.i 0
+{ .mib
+ cmp.lt p9,p0 = GR_exp_N, GR_big_exp // N possible non-integer?
+ cmp.ge p6,p0 = GR_exp_N, GR_exp_sure_ou // N certain over/under?
+(p8) br.cond.spnt SCALBL_X_UNORM // Branch if x=unorm
}
-{ .mmi
- cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
- cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
- nop.i 0
-};;
+;;
-//
-// Is N really an int, only for those non-int indefinites?
-// Create exp bias.
-//
-{ .mfi
- add GR_N_Biased = GR_Bias,GR_N_as_int
-(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
- nop.i 0
-};;
+SCALBL_COMMON2:
+// Main path continues. Also return here from x=unorm path.
+// Create biased exponent for 2**N
+{ .mmi
+(p6) mov GR_N_as_int = GR_Big // Limit N
+;;
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+ nop.i 0
+}
+;;
-//
-// Branch and return if N is not an int.
-// Main path, create 2**N
-//
{ .mfi
- setf.exp FR_Two_N = GR_N_Biased
- nop.i 999
+ setf.exp FR_Two_N = GR_N_Biased // Form 2**N
+(p9) fcmp.neq.unc.s1 p9,p0 = FR_Norm_N, FR_N_float_int // Test if N an integer
+ and GR_exp_X = GR_exp_mask, GR_signexp_X // Get exponent of X
}
-{ .mfb
- nop.m 0
-(p7) frcpa.s0 f8,p11 = f0,f0
-(p7) br.ret.spnt b0
-};;
+;;
//
-// Set denormal on denormal input x and denormal input N
+// Compute biased result exponent
+// Branch if N is not an integer
//
-{ .mfi
- nop.m 999
-(p10)fcmp.ge.s1 p6,p8 = FR_Norm_N,f0
- nop.i 0
-};;
-{ .mfi
- nop.m 999
- fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
- nop.i 999
+{ .mib
+ add GR_exp_Result = GR_exp_X, GR_N_as_int
+ mov GR_min_den_exp = 0x0c001 - 63 // Exp of min denorm long dble
+(p9) br.cond.spnt SCALBL_N_NOT_INT
}
-{ .mfi
- nop.m 999
- fcmp.ge.s0 p12,p13 = FR_Floating_N,f0
- nop.i 0
-};;
+;;
//
-// Adjust 2**N if N was very small or very large
+// Raise Denormal operand flag with compare
+// Do final operation
//
-
{ .mfi
- nop.m 0
-(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
- nop.i 0
+ cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
+ fcmp.ge.s0 p0,p11 = FR_Floating_X,FR_Floating_N // Dummy to set denorm
+ cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch = 0x0000000000033FFF
-};;
-{ .mfi
- nop.m 0
-(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
- nop.i 0
+{ .mfb
+ nop.m 0
+ fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+(p9) br.cond.spnt SCALBL_UNDERFLOW // Branch if certain underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch1= 0x0000000000013FFF
-};;
+;;
+
+{ .mib
+(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
+(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
+(p7) br.ret.sptk b0 // Return from main path
+}
+;;
-// Set up necessary status fields
+{ .bbb
+(p6) br.cond.spnt SCALBL_OVERFLOW // Branch if certain overflow
+(p8) br.cond.spnt SCALBL_POSSIBLE_OVERFLOW // Branch if possible overflow
+(p9) br.cond.spnt SCALBL_POSSIBLE_UNDERFLOW // Branch if possible underflow
+}
+;;
+
+// Here if possible underflow.
+// Resulting exponent: 0x0c001-63 <= exp_Result < 0x0c001
+SCALBL_POSSIBLE_UNDERFLOW:
+//
+// Here if possible overflow.
+// Resulting exponent: 0x13ffe = exp_Result
+SCALBL_POSSIBLE_OVERFLOW:
+
+// Set up necessary status fields
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflows)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
- nop.m 999
- fsetc.s3 0x7F,0x41
- nop.i 999
+ mov GR_pos_ov_limit = 0x13fff // Exponent for positive overflow
+ fsetc.s3 0x7F,0x41
+ nop.i 0
}
{ .mfi
- nop.m 999
- fsetc.s2 0x7F,0x42
- nop.i 999
-};;
+ mov GR_neg_ov_limit = 0x33fff // Exponent for negative overflow
+ fsetc.s2 0x7F,0x42
+ nop.i 0
+}
+;;
//
-// Do final operation
+// Do final operation with s2 and s3
//
{ .mfi
- setf.exp FR_NBig = GR_Scratch
- fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
- nop.i 999
+ setf.exp FR_NBig = GR_neg_ov_limit
+ fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
}
{ .mfi
- nop.m 999
- fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-{ .mfi
- setf.exp FR_Big = GR_Scratch1
- fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
+ setf.exp FR_Big = GR_pos_ov_limit
+ fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
+}
+;;
// Check for overflow or underflow.
-//
-// S0 user supplied status
-// S2 user supplied status + WRE + TD (Overflow)
-// S3 user supplied status + FZ + TD (Underflow)
-//
-//
// Restore s3
// Restore s2
//
{ .mfi
- nop.m 0
- fsetc.s3 0x7F,0x40
- nop.i 999
+ nop.m 0
+ fsetc.s3 0x7F,0x40
+ nop.i 0
}
{ .mfi
- nop.m 0
- fsetc.s2 0x7F,0x40
- nop.i 999
-};;
+ nop.m 0
+ fsetc.s2 0x7F,0x40
+ nop.i 0
+}
+;;
//
// Is the result zero?
//
{ .mfi
- nop.m 999
- fclass.m.unc p6, p0 = FR_Result3, 0x007
- nop.i 999
-}
+ nop.m 0
+ fclass.m p6, p0 = FR_Result3, 0x007
+ nop.i 0
+}
{ .mfi
- addl GR_Tag = 51, r0
- fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
- nop.i 0
-};;
+ nop.m 0
+ fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
+ nop.i 0
+}
+;;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
- nop.m 999
+ nop.m 0
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
-(p6) addl GR_Tag = 52, r0
+ nop.m 0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt SCALBL_UNDERFLOW
-};;
+(p6) br.cond.spnt SCALBL_UNDERFLOW
+}
+;;
//
// Branch out for overflow
//
-{ .mbb
- nop.m 0
-(p7) br.cond.spnt SCALBL_OVERFLOW
-(p9) br.cond.spnt SCALBL_OVERFLOW
-};;
+{ .bbb
+(p7) br.cond.spnt SCALBL_OVERFLOW
+(p9) br.cond.spnt SCALBL_OVERFLOW
+ br.ret.sptk b0 // Return from main path.
+}
+;;
-//
-// Return from main path.
-//
-{ .mfb
- nop.m 999
- nop.f 0
- br.ret.sptk b0;;
+// Here if result overflows
+SCALBL_OVERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 51, r0 // Set error tag for overflow
+ br.cond.sptk __libm_error_region // Call error support for overflow
+}
+;;
+
+// Here if result underflows
+SCALBL_UNDERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 52, r0 // Set error tag for underflow
+ br.cond.sptk __libm_error_region // Call error support for underflow
}
+;;
-SCALBL_NAN_INF_ZERO:
+SCALBL_NAN_INF_ZERO:
//
-// Convert N to a fp integer
-//
+// Before entry, N has been converted to a fp integer in significand of
+// FR_N_float_int
+//
+// Convert N_float_int to floating point value
+//
{ .mfi
- addl GR_Scratch = 1,r0
- fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N
- nop.i 999
+ getf.sig GR_N_as_int = FR_N_float_int
+ fclass.m p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
+ nop.i 0
}
{ .mfi
- nop.m 0
- fclass.m.unc p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
- nop.i 0
-};;
+ addl GR_Scratch = 1,r0
+ fcvt.xf FR_N_float_int = FR_N_float_int
+ nop.i 0
+}
+;;
+
{ .mfi
- nop.m 0
- fclass.m.unc p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
- shl GR_Scratch = GR_Scratch,63
-};;
+ nop.m 0
+ fclass.m p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
+ shl GR_Scratch = GR_Scratch,63
+}
+;;
+
{ .mfi
- nop.m 0
- fclass.m.unc p8,p0 = FR_Floating_N, 0x21 // @inf
- nop.i 0
-}
- { .mfi
- nop.m 0
- fclass.m.unc p9,p0 = FR_Floating_N, 0x22 // @-inf
- nop.i 0
-};;
+ nop.m 0
+ fclass.m p8,p0 = FR_Floating_N, 0x21 // @inf
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fclass.m p9,p0 = FR_Floating_N, 0x22 // @-inf
+ nop.i 0
+}
+;;
//
// Either X or N is a Nan, return result and possible raise invalid.
//
{ .mfb
- nop.m 0
-(p6) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
+ nop.m 0
+(p6) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
(p6) br.ret.spnt b0
-};;
+}
+;;
+
{ .mfb
- getf.sig GR_N_as_int = FR_N_float_int
-(p7) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
+ nop.m 0
+(p7) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
(p7) br.ret.spnt b0
-};;
+}
+;;
//
// If N + Inf do something special
// For N = -Inf, create Int
//
{ .mfb
- nop.m 0
-(p8) fma.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
-(p8) br.ret.spnt b0
+ nop.m 0
+(p8) fma.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
+(p8) br.ret.spnt b0
}
{ .mfi
- nop.m 0
-(p9) fnma.s0 FR_Floating_N = FR_Floating_N, f1, f0
- nop.i 0
-};;
+ nop.m 0
+(p9) fnma.s0 FR_Floating_N = FR_Floating_N, f1, f0
+ nop.i 0
+}
+;;
//
// If N==-Inf,return x/(-N)
//
{ .mfb
- nop.m 0
-(p9) frcpa.s0 FR_Result,p6 = FR_Floating_X,FR_Floating_N
-(p9) br.ret.spnt b0
-};;
-
-//
-// Convert N_float_int to floating point value
-//
-{ .mfi
- cmp.ne.unc p9,p0 = GR_N_as_int,GR_Scratch
- fcvt.xf FR_N_float_int = FR_N_float_int
- nop.i 0
-};;
+ cmp.ne p7,p0 = GR_N_as_int,GR_Scratch
+(p9) frcpa.s0 FR_Result,p0 = FR_Floating_X,FR_Floating_N
+(p9) br.ret.spnt b0
+}
+;;
//
// Is N an integer.
//
{ .mfi
- nop.m 0
-(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
- nop.i 0
-};;
+ nop.m 0
+(p7) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
+ nop.i 0
+}
+;;
//
// If N not an int, return NaN and raise invalid.
//
{ .mfb
- nop.m 0
-(p7) frcpa.s0 FR_Result,p6 = f0,f0
-(p7) br.ret.spnt b0
-};;
+ nop.m 0
+(p7) frcpa.s0 FR_Result,p0 = f0,f0
+(p7) br.ret.spnt b0
+}
+;;
//
-// Always return x in other path.
+// Always return x in other path.
//
{ .mfb
- nop.m 0
- fma.s0 FR_Result = FR_Floating_X,f1,f0
- br.ret.sptk b0
-};;
+ nop.m 0
+ fma.s0 FR_Result = FR_Floating_X,f1,f0
+ br.ret.sptk b0
+}
+;;
-GLOBAL_IEEE754_END(scalbl)
-__libm_error_region:
+// Here if n not int
+// Return NaN and raise invalid.
+SCALBL_N_NOT_INT:
+{ .mfb
+ nop.m 0
+ frcpa.s0 FR_Result,p0 = f0,f0
+ br.ret.sptk b0
+}
+;;
+
+// Here if n=unorm
+SCALBL_N_UNORM:
+{ .mfb
+ getf.exp GR_signexp_N = FR_Norm_N // Get signexp of normalized n
+ fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N // Get N in significand
+ br.cond.sptk SCALBL_COMMON1 // Return to main path
+}
+;;
+
+// Here if x=unorm
+SCALBL_X_UNORM:
+{ .mib
+ getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
+ nop.i 0
+ br.cond.sptk SCALBL_COMMON2 // Return to main path
+}
+;;
-SCALBL_OVERFLOW:
-SCALBL_UNDERFLOW:
+GLOBAL_IEEE754_END(scalbl)
+LOCAL_LIBM_ENTRY(__libm_error_region)
//
// Get stack address of N
//
.prologue
{ .mfi
- add GR_Parameter_Y=-32,sp
+ add GR_Parameter_Y=-32,sp
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
+ mov GR_SAVE_PFS=ar.pfs
}
//
-// Adjust sp
+// Adjust sp
//
{ .mfi
.fframe 64
- add sp=-64,sp
+ add sp=-64,sp
nop.f 0
- mov GR_SAVE_GP=gp
+ mov GR_SAVE_GP=gp
};;
//
-// Store N on stack in correct position
+// Store N on stack in correct position
// Locate the address of x on stack
//
{ .mmi
- stfe [GR_Parameter_Y] = FR_Norm_N,16
- add GR_Parameter_X = 16,sp
+ stfe [GR_Parameter_Y] = FR_Norm_N,16
+ add GR_Parameter_X = 16,sp
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
+ mov GR_SAVE_B0=b0
};;
//
@@ -503,42 +555,42 @@ SCALBL_UNDERFLOW:
//
.body
{ .mib
- stfe [GR_Parameter_X] = FR_Norm_X
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ stfe [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0
}
{ .mib
- stfe [GR_Parameter_Y] = FR_Result
+ stfe [GR_Parameter_Y] = FR_Result
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support#
+ br.call.sptk b0=__libm_error_support#
};;
//
// Get location of result on stack
//
{ .mmi
+ add GR_Parameter_RESULT = 48,sp
nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
+ nop.i 0
};;
//
-// Get the new result
+// Get the new result
//
{ .mmi
- ldfe FR_Result = [GR_Parameter_RESULT]
+ ldfe FR_Result = [GR_Parameter_RESULT]
.restore sp
- add sp = 64,sp
- mov b0 = GR_SAVE_B0
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
};;
//
// Restore gp, ar.pfs and return
//
{ .mib
- mov gp = GR_SAVE_GP
- mov ar.pfs = GR_SAVE_PFS
- br.ret.sptk b0
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
};;
LOCAL_LIBM_END(__libm_error_region)
diff --git a/sysdeps/ia64/fpu/e_sinh.S b/sysdeps/ia64/fpu/e_sinh.S
index 84c312c..5910d4a 100644
--- a/sysdeps/ia64/fpu/e_sinh.S
+++ b/sysdeps/ia64/fpu/e_sinh.S
@@ -850,6 +850,7 @@ SINH_UNORM:
GLOBAL_IEEE754_END(sinh)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_sinhf.S b/sysdeps/ia64/fpu/e_sinhf.S
index 4a407b7..d01d830 100644
--- a/sysdeps/ia64/fpu/e_sinhf.S
+++ b/sysdeps/ia64/fpu/e_sinhf.S
@@ -689,6 +689,7 @@ SINH_UNORM:
GLOBAL_IEEE754_END(sinhf)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/e_sinhl.S b/sysdeps/ia64/fpu/e_sinhl.S
index ccc996a..5b4a4ad 100644
--- a/sysdeps/ia64/fpu/e_sinhl.S
+++ b/sysdeps/ia64/fpu/e_sinhl.S
@@ -1055,6 +1055,7 @@ SINH_HUGE:
GLOBAL_IEEE754_END(sinhl)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
diff --git a/sysdeps/ia64/fpu/e_sqrt.S b/sysdeps/ia64/fpu/e_sqrt.S
index 0e208b3..53e60ef 100644
--- a/sysdeps/ia64/fpu/e_sqrt.S
+++ b/sysdeps/ia64/fpu/e_sqrt.S
@@ -252,6 +252,7 @@ GLOBAL_IEEE754_ENTRY(sqrt)
}
// END DOUBLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
GLOBAL_IEEE754_END(sqrt)
+
// Stack operations when calling error support.
// (1) (2) (3) (call) (4)
// sp -> + psp -> + psp -> + sp -> +
diff --git a/sysdeps/ia64/fpu/e_sqrtf.S b/sysdeps/ia64/fpu/e_sqrtf.S
index bee0df7..daa2045 100644
--- a/sysdeps/ia64/fpu/e_sqrtf.S
+++ b/sysdeps/ia64/fpu/e_sqrtf.S
@@ -204,6 +204,7 @@ GLOBAL_IEEE754_ENTRY(sqrtf)
//
GLOBAL_IEEE754_END(sqrtf)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mii
diff --git a/sysdeps/ia64/fpu/e_sqrtl.S b/sysdeps/ia64/fpu/e_sqrtl.S
index ec14756..6a5735d 100644
--- a/sysdeps/ia64/fpu/e_sqrtl.S
+++ b/sysdeps/ia64/fpu/e_sqrtl.S
@@ -221,6 +221,7 @@ alloc r32= ar.pfs,0,5,4,0
// END DOUBLE EXTENDED PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
GLOBAL_IEEE754_END(sqrtl)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/gen_import_file_list b/sysdeps/ia64/fpu/gen_import_file_list
index a02bb31..b8bd6a5 100644
--- a/sysdeps/ia64/fpu/gen_import_file_list
+++ b/sysdeps/ia64/fpu/gen_import_file_list
@@ -16,8 +16,18 @@ import_c() {
echo "$1 $libm_dir/$2 $3"
}
-import_c DUMMY libm_support.h libm_support.h
-import_c DUMMY libm_error.c libm_error.c
+dummy_files="
+libm_cpu_defs.h
+libm_error_codes.h
+libm_support.h
+libm_error.c
+"
+
+for f in $dummy_files
+do
+ import_c DUMMY $f $f
+done
+
import_c scalblnf scalblnf.c s_scalblnf.c
for f in acos acosh asin atanh cosh exp2 exp10 fmod log2 pow remainder \
diff --git a/sysdeps/ia64/fpu/import_file.awk b/sysdeps/ia64/fpu/import_file.awk
index c6335dc..97fe77e 100644
--- a/sysdeps/ia64/fpu/import_file.awk
+++ b/sysdeps/ia64/fpu/import_file.awk
@@ -7,10 +7,13 @@ BEGIN {
getline;
while (!match($0, "^// WARRANTY DISCLAIMER")) {
print;
- getline;
+ if (!getline) {
+ break;
+ }
}
- getline;
- printf \
+ if (getline)
+ {
+ printf \
"// Redistribution and use in source and binary forms, with or without\n" \
"// modification, are permitted provided that the following conditions are\n" \
"// met:\n" \
@@ -25,10 +28,11 @@ BEGIN {
"// * The name of Intel Corporation may not be used to endorse or promote\n" \
"// products derived from this software without specific prior written\n" \
"// permission.\n\n";
- if (LICENSE_ONLY == "y") {
- do {
- print;
- } while (getline);
+ if (LICENSE_ONLY == "y") {
+ do {
+ print;
+ } while (getline);
+ }
}
}
@@ -115,7 +119,6 @@ BEGIN {
print
getline;
}
- getline;
printf "%s_END(%s)\n", type, name;
if (match(name, "^exp10[fl]?$")) {
t=substr(name,6)
diff --git a/sysdeps/ia64/fpu/import_intel_libm b/sysdeps/ia64/fpu/import_intel_libm
index 752ba37..1aaa646 100644
--- a/sysdeps/ia64/fpu/import_intel_libm
+++ b/sysdeps/ia64/fpu/import_intel_libm
@@ -16,6 +16,7 @@ import_s() {
# $2 = source file-name
# $3 = destination file-name
echo "Importing $1 from $2 -> $3"
+ rm -f $3
awk -f import_file.awk FUNC=$1 $2 > $3
}
@@ -24,19 +25,82 @@ import_c() {
# $2 = source file-name
# $3 = destination file-name
echo "Importing $1 from $2 -> $3"
+ rm -f $3
awk -f import_file.awk LICENSE_ONLY=y $2 > $3
}
do_imports() {
while read func_pattern src_file dst_file; do
- if [ "$(expr $src_file : '.*\(c\)$')" = "c" ]; then
+ case $src_file in
+ *.[ch])
import_c "$func_pattern" "$src_file" "$dst_file"
- else
+ ;;
+ *)
import_s "$func_pattern" "$src_file" "$dst_file"
- fi
+ ;;
+ esac
done
}
./gen_import_file_list $libm_dir > import_file_list
do_imports < import_file_list
+
+emptyfiles="
+e_gamma_r.c
+e_gammaf_r.c
+e_gammal_r.c
+s_sincos.c
+s_sincosf.c
+s_sincosl.c
+t_exp.c
+w_acosh.c
+w_acoshf.c
+w_acoshl.c
+w_atanh.c
+w_atanhf.c
+w_atanhl.c
+w_exp10.c
+w_exp10f.c
+w_exp10l.c
+w_exp2.c
+w_exp2f.c
+w_exp2l.c
+w_expl.c
+w_lgamma_r.c
+w_lgammaf_r.c
+w_lgammal_r.c
+w_log2.c
+w_log2f.c
+w_log2l.c
+w_sinh.c
+w_sinhf.c
+w_sinhl.c
+"
+for f in $emptyfiles
+do
+ rm -f $f
+ echo "/* Not needed. */" > $f
+done
+
+removedfiles="
+libm_atan2_reg.S
+s_ldexp.S
+s_ldexpf.S
+s_ldexpl.S
+s_scalbn.S
+s_scalbnf.S
+s_scalbnl.S
+"
+
+rm -f $removedfiles
+
+for f in lgammaf_r.c lgammal_r.c lgamma_r.c
+do
+ import_c $f $libm_dir/$f e_$f
+done
+
+for f in lgamma.c lgammaf.c lgammal.c
+do
+ import_c $f $libm_dir/$f w_$f
+done
diff --git a/sysdeps/ia64/fpu/libm_error.c b/sysdeps/ia64/fpu/libm_error.c
index 42ca36d..a7f9dae 100644
--- a/sysdeps/ia64/fpu/libm_error.c
+++ b/sysdeps/ia64/fpu/libm_error.c
@@ -1,7 +1,7 @@
/* file: libm_error.c */
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@@ -23,12 +23,12 @@
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@@ -41,8 +41,8 @@
// History
//==============================================================
// 2/02/00: Initial version
-// 3/22/00: Updated to support flexible and dynamic error handling.
-// 8/16/00: Changed all matherr function-calls to use the pmatherr
+// 3/22/00: Updated to support flexible and dynamic error handling.
+// 8/16/00: Changed all matherr function-calls to use the pmatherr
// function-pointers.
// 10/03/00: Corrected a scalb type.
// 11/28/00: Changed INPUT_XL to INPUT_XD for scalb_underflow case.
@@ -61,15 +61,30 @@
// 01/28/02: Corrected SVID/XOPEN stderr message for log2
// 05/20/02: Added code for cot
// 07/01/02: Added code for sinhcosh
-// 10/04/02: Underflow detection in ISOC path redefined to
-// be zero rather than tiny and inexact
+// 10/04/02: Underflow detection in ISOC path redefined to
+// be zero rather than tiny and inexact
// 12/06/02: Added code for annuity and compound
// 01/30/03: Corrected test for underflow in ISOC path to not set denormal
// 04/10/03: Corrected ISOC branch for gamma/lgamma to return ERANGE for neg ints.
// Added code for tgamma
-// 04/11/03: Corrected POSIX/SVID/XOPEN branches for gamma/lgamma
+// 04/11/03: Corrected POSIX/SVID/XOPEN branches for gamma/lgamma
// to return EDOM for neg ints.
-//
+// 09/08/03: Corrected XOPEN/SVID result for pow overflow with neg x, pos y.
+// 10/14/03: Added ILP32 ifdef
+// 12/12/03: Corrected XOPEN/SVID results for powf_zero_to_negative,
+// powl_neg_to_non_integer, atan2f_zero, atan2df_zero,
+// acoshf_lt_one, acosh_lt_one.
+// 12/07/04: Cast name strings as char *.
+// 12/08/04: Corrected POSIX behavior for atan2_zero, acos_gt_one, asin_gt_one,
+// log_negative, log10_negative, log1p_negative, and log2_negative.
+// Added SVID and XOPEN case log2l_zero.
+// 12/13/04: Corrected POSIX behavior for exp2_overflow, exp2_underflow,
+// exp10_overflow, exp10_underflow. Added ISOC to set errno for
+// exp10_underflow.
+// 12/14/04: Corrected POSIX behavior for nextafter_overflow,
+// nextafter_underflow, nexttoward_overflow, nexttoward_underflow.
+// Added ISOC to set errno for nextafter and nexttoward underflow.
+// 12/15/04: Corrected POSIX behavior for exp, exp2, and exp10 underflow.
#include <errno.h>
#include <stdio.h>
@@ -102,13 +117,13 @@ int (*pmatherr)(struct EXC_DECL_D*) = MATHERR_D;
int (*pmatherrl)(struct exceptionl*) = matherrl;
void __libm_setusermatherrf( int(*user_merrf)(struct exceptionf*) )
-{ pmatherrf = ( (user_merrf==NULL)? (MATHERR_F) : (user_merrf) ); }
+{ pmatherrf = ( (user_merrf==NULL)? (MATHERR_F) : (user_merrf) ); }
void __libm_setusermatherr( int(*user_merr)(struct EXC_DECL_D*) )
-{ pmatherr = ( (user_merr==NULL)? (MATHERR_D) : (user_merr) ); }
+{ pmatherr = ( (user_merr==NULL)? (MATHERR_D) : (user_merr) ); }
void __libm_setusermatherrl( int(*user_merrl)(struct exceptionl*) )
-{ pmatherrl = ( (user_merrl==NULL)? (matherrl) : (user_merrl) ); }
+{ pmatherrl = ( (user_merrl==NULL)? (matherrl) : (user_merrl) ); }
#endif /* !_LIBC */
@@ -120,14 +135,14 @@ void __libm_error_support(void *arg1,void *arg2,void *retval,error_types input_t
# ifdef __cplusplus
struct __exception exc;
-# else
+# else
struct exception exc;
-# endif
+# endif
struct exceptionf excf;
struct exceptionl excl;
-# if defined(__GNUC__)
+# ifdef __GNUC__
#define ALIGNIT __attribute__ ((__aligned__ (16)))
# elif defined opensource
#define ALIGNIT
@@ -137,75 +152,87 @@ struct exceptionl excl;
# ifdef SIZE_LONG_INT_64
#define __INT_64__ signed long
-# else
+# else
+# if ILP32
+#define __INT_64__ signed long long
+# else
#define __INT_64__ __int64
# endif
+# endif
+
-const char float_inf[4] = {0x00,0x00,0x80,0x7F};
-const char float_huge[4] = {0xFF,0xFF,0x7F,0x7F};
-const char float_zero[4] = {0x00,0x00,0x00,0x00};
-const char float_neg_inf[4] = {0x00,0x00,0x80,0xFF};
-const char float_neg_huge[4] = {0xFF,0xFF,0x7F,0xFF};
-const char float_neg_zero[4] = {0x00,0x00,0x00,0x80};
+#define STATIC static
+
+STATIC const char float_inf[4] = {0x00,0x00,0x80,0x7F};
+STATIC const char float_huge[4] = {0xFF,0xFF,0x7F,0x7F};
+STATIC const char float_zero[4] = {0x00,0x00,0x00,0x00};
+STATIC const char float_neg_inf[4] = {0x00,0x00,0x80,0xFF};
+STATIC const char float_neg_huge[4] = {0xFF,0xFF,0x7F,0xFF};
+STATIC const char float_neg_zero[4] = {0x00,0x00,0x00,0x80};
ALIGNIT
-const char double_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x7F};
-#if 0 /* unused */
+STATIC const char double_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x7F};
+#ifndef _LIBC
ALIGNIT
-const char double_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0x7F};
+STATIC const char double_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0x7F};
#endif
ALIGNIT
-const char double_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
+STATIC const char double_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
ALIGNIT
-const char double_neg_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF};
-#if 0 /* unused */
+STATIC const char double_neg_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF};
+#ifndef _LIBC
ALIGNIT
-const char double_neg_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0xFF};
+STATIC const char double_neg_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0xFF};
#endif
ALIGNIT
-const char double_neg_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80};
+STATIC const char double_neg_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80};
ALIGNIT
-const char long_double_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
-#if 0 /* unused */
+STATIC const char long_double_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
ALIGNIT
-const char long_double_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
+#ifndef _LIBC
+STATIC const char long_double_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
#endif
ALIGNIT
-const char long_double_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
+STATIC const char long_double_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
ALIGNIT
-const char long_double_neg_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
-#if 0 /* unused */
+STATIC const char long_double_neg_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
ALIGNIT
-const char long_double_neg_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
+#ifndef _LIBC
+STATIC const char long_double_neg_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
#endif
ALIGNIT
-const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00,0x00};
+STATIC const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00,0x00};
+
-#define RETVAL_HUGE_VALL *(long double *)retval = *(long double *)long_double_inf
-#define RETVAL_NEG_HUGE_VALL *(long double *)retval = *(long double *)long_double_neg_inf
-#define RETVAL_HUGEL *(long double *)retval = (long double)*(float *)float_huge
-#define RETVAL_NEG_HUGEL *(long double *)retval =(long double)*(float*)float_neg_huge
+#define RETVAL_HUGE_VALL *(long double *)retval = *(long double *)long_double_inf
+#define RETVAL_NEG_HUGE_VALL *(long double *)retval = *(long double *)long_double_neg_inf
+#define RETVAL_HUGEL *(long double *)retval = (long double)*(float *)float_huge
+#define RETVAL_NEG_HUGEL *(long double *)retval =(long double)*(float*)float_neg_huge
#define RETVAL_HUGE_VALD *(double *)retval = *(double *) double_inf
#define RETVAL_NEG_HUGE_VALD *(double *)retval = *(double *) double_neg_inf
#define RETVAL_HUGED *(double *)retval = (double) *(float *)float_huge
-#define RETVAL_NEG_HUGED *(double *)retval = (double) *(float *) float_neg_huge
+#define RETVAL_NEG_HUGED *(double *)retval = (double) *(float *) float_neg_huge
#define RETVAL_HUGE_VALF *(float *)retval = *(float *) float_inf
#define RETVAL_NEG_HUGE_VALF *(float *)retval = *(float *) float_neg_inf
#define RETVAL_HUGEF *(float *)retval = *(float *) float_huge
-#define RETVAL_NEG_HUGEF *(float *)retval = *(float *) float_neg_huge
+#define RETVAL_NEG_HUGEF *(float *)retval = *(float *) float_neg_huge
-#define RETVAL_ZEROL *(long double *)retval = *(long double *)long_double_zero
-#define RETVAL_ZEROD *(double *)retval = *(double *)double_zero
-#define RETVAL_ZEROF *(float *)retval = *(float *)float_zero
+#define ZEROL_VALUE *(long double *)long_double_zero
+#define ZEROD_VALUE *(double *)double_zero
+#define ZEROF_VALUE *(float *)float_zero
-#define RETVAL_NEG_ZEROL *(long double *)retval = *(long double *)long_double_neg_zero
-#define RETVAL_NEG_ZEROD *(double *)retval = *(double *)double_neg_zero
-#define RETVAL_NEG_ZEROF *(float *)retval = *(float *)float_neg_zero
+#define RETVAL_ZEROL *(long double *)retval = *(long double *)long_double_zero
+#define RETVAL_ZEROD *(double *)retval = *(double *)double_zero
+#define RETVAL_ZEROF *(float *)retval = *(float *)float_zero
-#define RETVAL_ONEL *(long double *)retval = (long double) 1.0
-#define RETVAL_ONED *(double *)retval = 1.0
-#define RETVAL_ONEF *(float *)retval = 1.0f
+#define RETVAL_NEG_ZEROL *(long double *)retval = *(long double *)long_double_neg_zero
+#define RETVAL_NEG_ZEROD *(double *)retval = *(double *)double_neg_zero
+#define RETVAL_NEG_ZEROF *(float *)retval = *(float *)float_neg_zero
+
+#define RETVAL_ONEL *(long double *)retval = (long double) 1.0
+#define RETVAL_ONED *(double *)retval = 1.0
+#define RETVAL_ONEF *(float *)retval = 1.0f
#define NOT_MATHERRL excl.arg1=*(long double *)arg1;excl.arg2=*(long double *)arg2;excl.retval=*(long double *)retval;if(!pmatherrl(&excl))
#define NOT_MATHERRD exc.arg1=*(double *)arg1;exc.arg2=*(double *)arg2;exc.retval=*(double *)retval;if(!pmatherr(&exc))
@@ -213,9 +240,9 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0
#define ifSVID if(_LIB_VERSIONIMF==_SVID_)
-#define NAMEL excl.name
-#define NAMED exc.name
-#define NAMEF excf.name
+#define NAMEL excl.name
+#define NAMED exc.name
+#define NAMEF excf.name
//
// These should work OK for MS because they are ints -
@@ -230,20 +257,20 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0
#define PLOSS 6
#define SINGL excl.type = SING
-#define DOMAINL excl.type = DOMAIN
-#define OVERFLOWL excl.type = OVERFLOW
-#define UNDERFLOWL excl.type = UNDERFLOW
-#define TLOSSL excl.type = TLOSS
+#define DOMAINL excl.type = DOMAIN
+#define OVERFLOWL excl.type = OVERFLOW
+#define UNDERFLOWL excl.type = UNDERFLOW
+#define TLOSSL excl.type = TLOSS
#define SINGD exc.type = SING
-#define DOMAIND exc.type = DOMAIN
-#define OVERFLOWD exc.type = OVERFLOW
-#define UNDERFLOWD exc.type = UNDERFLOW
-#define TLOSSD exc.type = TLOSS
+#define DOMAIND exc.type = DOMAIN
+#define OVERFLOWD exc.type = OVERFLOW
+#define UNDERFLOWD exc.type = UNDERFLOW
+#define TLOSSD exc.type = TLOSS
#define SINGF excf.type = SING
-#define DOMAINF excf.type = DOMAIN
-#define OVERFLOWF excf.type = OVERFLOW
-#define UNDERFLOWF excf.type = UNDERFLOW
-#define TLOSSF excf.type = TLOSS
+#define DOMAINF excf.type = DOMAIN
+#define OVERFLOWF excf.type = OVERFLOW
+#define UNDERFLOWF excf.type = UNDERFLOW
+#define TLOSSF excf.type = TLOSS
#define INPUT_XL (excl.arg1=*(long double*)arg1)
#define INPUT_XD (exc.arg1=*(double*)arg1)
@@ -251,9 +278,10 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0
#define INPUT_YL (excl.arg2=*(long double*)arg2)
#define INPUT_YD (exc.arg2=*(double*)arg2)
#define INPUT_YF (excf.arg2=*(float*)arg2)
-#define INPUT_RESL (*(long double *)retval)
+#define INPUT_RESL (*(long double *)retval)
#define INPUT_RESD (*(double *)retval)
#define INPUT_RESF (*(float *)retval)
+#define INPUT_RESI64 (*(__INT_64__ *)retval)
#define WRITEL_LOG_ZERO fputs("logl: SING error\n",stderr)
#define WRITED_LOG_ZERO fputs("log: SING error\n",stderr)
@@ -271,7 +299,7 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0
#define WRITED_Y1_ZERO fputs("y1: DOMAIN error\n",stderr)
#define WRITEF_Y1_ZERO fputs("y1f: DOMAIN error\n",stderr)
#define WRITEL_Y1_NEGATIVE fputs("y1l: DOMAIN error\n",stderr)
-#define WRITED_Y1_NEGATIUE fputs("y1: DOMAIN error\n",stderr)
+#define WRITED_Y1_NEGATIVE fputs("y1: DOMAIN error\n",stderr)
#define WRITEF_Y1_NEGATIVE fputs("y1f: DOMAIN error\n",stderr)
#define WRITEL_YN_ZERO fputs("ynl: DOMAIN error\n",stderr)
#define WRITED_YN_ZERO fputs("yn: DOMAIN error\n",stderr)
@@ -286,13 +314,13 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0
#define WRITED_LOG1P_NEGATIVE fputs("log1p: DOMAIN error\n",stderr)
#define WRITEF_LOG1P_NEGATIVE fputs("log1pf: DOMAIN error\n",stderr)
#define WRITEL_LOG10_ZERO fputs("log10l: SING error\n",stderr)
-#define WRITED_LOG10_ZERO fputs("log10: SING error\n",stderr)
+#define WRITED_LOG10_ZERO fputs("log10: SING error\n",stderr)
#define WRITEF_LOG10_ZERO fputs("log10f: SING error\n",stderr)
#define WRITEL_LOG10_NEGATIVE fputs("log10l: DOMAIN error\n",stderr)
#define WRITED_LOG10_NEGATIVE fputs("log10: DOMAIN error\n",stderr)
#define WRITEF_LOG10_NEGATIVE fputs("log10f: DOMAIN error\n",stderr)
#define WRITEL_LOG2_ZERO fputs("log2l: SING error\n",stderr)
-#define WRITED_LOG2_ZERO fputs("log2: SING error\n",stderr)
+#define WRITED_LOG2_ZERO fputs("log2: SING error\n",stderr)
#define WRITEF_LOG2_ZERO fputs("log2f: SING error\n",stderr)
#define WRITEL_LOG2_NEGATIVE fputs("log2l: DOMAIN error\n",stderr)
#define WRITED_LOG2_NEGATIVE fputs("log2: DOMAIN error\n",stderr)
@@ -339,9 +367,9 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0
#define WRITEL_GAMMA_NEGATIVE fputs("gammal: SING error\n",stderr)
#define WRITED_GAMMA_NEGATIVE fputs("gamma: SING error\n",stderr)
#define WRITEF_GAMMA_NEGATIVE fputs("gammaf: SING error\n",stderr)
-#define WRITEL_TGAMMA_NEGATIVE fputs("tgammal: DOMAIN error\n",stderr)
-#define WRITED_TGAMMA_NEGATIVE fputs("tgamma: DOMAIN error\n",stderr)
-#define WRITEF_TGAMMA_NEGATIVE fputs("tgammaf: DOMAIN error\n",stderr)
+#define WRITEL_TGAMMA_NEGATIVE fputs("tgammal: SING error\n",stderr)
+#define WRITED_TGAMMA_NEGATIVE fputs("tgamma: SING error\n",stderr)
+#define WRITEF_TGAMMA_NEGATIVE fputs("tgammaf: SING error\n",stderr)
#define WRITEL_J0_TLOSS fputs("j0l: TLOSS error\n",stderr)
#define WRITEL_Y0_TLOSS fputs("y0l: TLOSS error\n",stderr)
#define WRITEL_J1_TLOSS fputs("j1l: TLOSS error\n",stderr)
@@ -379,7 +407,7 @@ if(_LIB_VERSIONIMF==_IEEE_) return;
/***********************/
/* C9X Path */
/***********************/
-else if(_LIB_VERSIONIMF==_ISOC_)
+else if(_LIB_VERSIONIMF==_ISOC_)
{
switch(input_tag)
{
@@ -396,29 +424,29 @@ else if(_LIB_VERSIONIMF==_ISOC_)
case log1p_zero:
case log1pf_zero:
case powl_overflow:
- case pow_overflow:
- case powf_overflow:
+ case pow_overflow:
+ case powf_overflow:
case expl_overflow:
- case exp_overflow:
- case expf_overflow:
+ case exp_overflow:
+ case expf_overflow:
case exp2l_overflow:
- case exp2_overflow:
- case exp2f_overflow:
+ case exp2_overflow:
+ case exp2f_overflow:
case exp10l_overflow:
- case exp10_overflow:
- case exp10f_overflow:
+ case exp10_overflow:
+ case exp10f_overflow:
case expm1l_overflow:
- case expm1_overflow:
- case expm1f_overflow:
+ case expm1_overflow:
+ case expm1f_overflow:
case hypotl_overflow:
case hypot_overflow:
case hypotf_overflow:
- case sinhl_overflow:
- case sinh_overflow:
- case sinhf_overflow:
- case atanhl_eq_one:
- case atanh_eq_one:
- case atanhf_eq_one:
+ case sinhl_overflow:
+ case sinh_overflow:
+ case sinhf_overflow:
+ case atanhl_eq_one:
+ case atanh_eq_one:
+ case atanhf_eq_one:
case scalbl_overflow:
case scalb_overflow:
case scalbf_overflow:
@@ -428,9 +456,15 @@ else if(_LIB_VERSIONIMF==_ISOC_)
case nextafterl_overflow:
case nextafter_overflow:
case nextafterf_overflow:
+ case nextafterl_underflow:
+ case nextafter_underflow:
+ case nextafterf_underflow:
case nexttowardl_overflow:
case nexttoward_overflow:
case nexttowardf_overflow:
+ case nexttowardl_underflow:
+ case nexttoward_underflow:
+ case nexttowardf_underflow:
case scalbnl_overflow:
case scalbn_overflow:
case scalbnf_overflow:
@@ -453,35 +487,35 @@ else if(_LIB_VERSIONIMF==_ISOC_)
case gamma_negative:
case gammaf_negative:
case ilogbl_zero:
- case ilogb_zero:
+ case ilogb_zero:
case ilogbf_zero:
case fdiml_overflow:
- case fdim_overflow:
+ case fdim_overflow:
case fdimf_overflow:
case llrintl_large:
- case llrint_large:
+ case llrint_large:
case llrintf_large:
case llroundl_large:
- case llround_large:
+ case llround_large:
case llroundf_large:
case lrintl_large:
- case lrint_large:
+ case lrint_large:
case lrintf_large:
case lroundl_large:
- case lround_large:
+ case lround_large:
case lroundf_large:
case tandl_overflow:
- case tand_overflow:
+ case tand_overflow:
case tandf_overflow:
case cotdl_overflow:
- case cotd_overflow:
+ case cotd_overflow:
case cotdf_overflow:
case cotl_overflow:
- case cot_overflow:
+ case cot_overflow:
case cotf_overflow:
- case sinhcoshl_overflow:
- case sinhcosh_overflow:
- case sinhcoshf_overflow:
+ case sinhcoshl_overflow:
+ case sinhcosh_overflow:
+ case sinhcoshf_overflow:
case annuityl_overflow:
case annuity_overflow:
case annuityf_overflow:
@@ -496,6 +530,7 @@ else if(_LIB_VERSIONIMF==_ISOC_)
}
case powl_underflow:
case expl_underflow:
+ case exp10l_underflow:
case exp2l_underflow:
case scalbl_underflow:
case scalbnl_underflow:
@@ -505,35 +540,43 @@ else if(_LIB_VERSIONIMF==_ISOC_)
case annuityl_underflow:
case compoundl_underflow:
{
- if ( *(__INT_64__*)retval == 0 ) ERRNO_RANGE;
+ /* Test for zero by testing 64 significand bits for zero. An integer
+ test is needed so denormal flag is not set by a floating-point test */
+ if ( INPUT_RESI64 == 0 ) ERRNO_RANGE;
break;
}
- case pow_underflow:
- case exp_underflow:
- case exp2_underflow:
+ case pow_underflow:
+ case exp_underflow:
+ case exp10_underflow:
+ case exp2_underflow:
case scalb_underflow:
case scalbn_underflow:
case scalbln_underflow:
case ldexp_underflow:
- case erfc_underflow:
+ case erfc_underflow:
case annuity_underflow:
case compound_underflow:
{
- if ( ((*(__INT_64__*)retval)<<1) == 0 ) ERRNO_RANGE;
+ /* Test for zero by testing exp and significand bits for zero. An integer
+ test is needed so denormal flag is not set by a floating-point test */
+ if ( (INPUT_RESI64 << 1) == 0 ) ERRNO_RANGE;
break;
}
- case powf_underflow:
- case expf_underflow:
- case exp2f_underflow:
+ case powf_underflow:
+ case expf_underflow:
+ case exp10f_underflow:
+ case exp2f_underflow:
case scalbf_underflow:
case scalbnf_underflow:
case scalblnf_underflow:
case ldexpf_underflow:
- case erfcf_underflow:
+ case erfcf_underflow:
case annuityf_underflow:
case compoundf_underflow:
{
- if ( ((*(__INT_64__*)retval)<<33) == 0 ) ERRNO_RANGE;
+ /* Test for zero by testing exp and significand bits for zero. An integer
+ test is needed so denormal flag is not set by a floating-point test */
+ if ( (INPUT_RESI64 << 33) == 0 ) ERRNO_RANGE;
break;
}
case logl_negative:
@@ -563,17 +606,17 @@ else if(_LIB_VERSIONIMF==_ISOC_)
case fmodl_by_zero:
case fmod_by_zero:
case fmodf_by_zero:
- case atanhl_gt_one:
- case atanh_gt_one:
- case atanhf_gt_one:
- case acosl_gt_one:
- case acos_gt_one:
- case acosf_gt_one:
- case asinl_gt_one:
- case asin_gt_one:
- case asinf_gt_one:
+ case atanhl_gt_one:
+ case atanh_gt_one:
+ case atanhf_gt_one:
+ case acosl_gt_one:
+ case acos_gt_one:
+ case acosf_gt_one:
+ case asinl_gt_one:
+ case asin_gt_one:
+ case asinf_gt_one:
case logbl_zero:
- case logb_zero:
+ case logb_zero:
case logbf_zero:
case acoshl_lt_one:
case acosh_lt_one:
@@ -596,12 +639,12 @@ else if(_LIB_VERSIONIMF==_ISOC_)
case ynl_negative:
case yn_negative:
case ynf_negative:
- case acosdl_gt_one:
- case acosd_gt_one:
- case acosdf_gt_one:
- case asindl_gt_one:
- case asind_gt_one:
- case asindf_gt_one:
+ case acosdl_gt_one:
+ case acosd_gt_one:
+ case acosdf_gt_one:
+ case asindl_gt_one:
+ case asind_gt_one:
+ case asindf_gt_one:
case atan2dl_zero:
case atan2d_zero:
case atan2df_zero:
@@ -656,10 +699,10 @@ switch(input_tag)
RETVAL_HUGE_VALF; ERRNO_RANGE; break;
}
case gammal_negative:
- case lgammal_negative:
case gamma_negative:
- case lgamma_negative:
case gammaf_negative:
+ case lgammal_negative:
+ case lgamma_negative:
case lgammaf_negative:
case tgammal_negative:
case tgamma_negative:
@@ -686,49 +729,61 @@ switch(input_tag)
case scalblnf_overflow:
case scalblnf_underflow:
case tandl_overflow:
- case tand_overflow:
+ case tand_overflow:
case tandf_overflow:
case cotdl_overflow:
- case cotd_overflow:
+ case cotd_overflow:
case cotdf_overflow:
case cotl_overflow:
- case cot_overflow:
+ case cot_overflow:
case cotf_overflow:
- case sinhcoshl_overflow:
- case sinhcosh_overflow:
- case sinhcoshf_overflow:
+ case sinhcoshl_overflow:
+ case sinhcosh_overflow:
+ case sinhcoshf_overflow:
+ case nextafterl_overflow:
+ case nextafter_overflow:
+ case nextafterf_overflow:
+ case nextafterl_underflow:
+ case nextafter_underflow:
+ case nextafterf_underflow:
+ case nexttowardl_overflow:
+ case nexttoward_overflow:
+ case nexttowardf_overflow:
+ case nexttowardl_underflow:
+ case nexttoward_underflow:
+ case nexttowardf_underflow:
{
ERRNO_RANGE; break;
}
- case atanhl_gt_one:
- case atanhl_eq_one:
+ case atanhl_gt_one:
+ case atanhl_eq_one:
/* atanhl(|x| >= 1) */
{
ERRNO_DOMAIN; break;
}
- case atanh_gt_one:
- case atanh_eq_one:
+ case atanh_gt_one:
+ case atanh_eq_one:
/* atanh(|x| >= 1) */
{
ERRNO_DOMAIN; break;
}
- case atanhf_gt_one:
- case atanhf_eq_one:
+ case atanhf_gt_one:
+ case atanhf_eq_one:
/* atanhf(|x| >= 1) */
{
ERRNO_DOMAIN; break;
}
- case sqrtl_negative:
+ case sqrtl_negative:
/* sqrtl(x < 0) */
{
ERRNO_DOMAIN; break;
}
- case sqrt_negative:
+ case sqrt_negative:
/* sqrt(x < 0) */
{
ERRNO_DOMAIN; break;
}
- case sqrtf_negative:
+ case sqrtf_negative:
/* sqrtf(x < 0) */
{
ERRNO_DOMAIN; break;
@@ -767,7 +822,10 @@ switch(input_tag)
/* y1l(x < 0) */
/* ynl(x < 0) */
{
- RETVAL_NEG_HUGE_VALL; ERRNO_DOMAIN; break;
+#ifndef _LIBC
+ RETVAL_NEG_HUGE_VALL;
+#endif
+ ERRNO_DOMAIN; break;
}
case y0_negative:
case y1_negative:
@@ -777,7 +835,7 @@ switch(input_tag)
/* yn(x < 0) */
{
RETVAL_NEG_HUGE_VALD; ERRNO_DOMAIN; break;
- }
+ }
case y0f_negative:
case y1f_negative:
case ynf_negative:
@@ -786,14 +844,15 @@ switch(input_tag)
/* ynf(x < 0) */
{
RETVAL_NEG_HUGE_VALF; ERRNO_DOMAIN; break;
- }
+ }
case logl_zero:
case log1pl_zero:
case log10l_zero:
case log2l_zero:
/* logl(0) */
- /* log1pl(0) */
+ /* log1pl(-1) */
/* log10l(0) */
+ /* log2l(0) */
{
RETVAL_NEG_HUGE_VALL; ERRNO_RANGE; break;
}
@@ -802,8 +861,9 @@ switch(input_tag)
case log10_zero:
case log2_zero:
/* log(0) */
- /* log1p(0) */
+ /* log1p(-1) */
/* log10(0) */
+ /* log2(0) */
{
RETVAL_NEG_HUGE_VALD; ERRNO_RANGE; break;
}
@@ -812,8 +872,9 @@ switch(input_tag)
case log10f_zero:
case log2f_zero:
/* logf(0) */
- /* log1pf(0) */
+ /* log1pf(-1) */
/* log10f(0) */
+ /* log2f(0) */
{
RETVAL_NEG_HUGE_VALF; ERRNO_RANGE; break;
}
@@ -822,12 +883,10 @@ switch(input_tag)
case log10l_negative:
case log2l_negative:
/* logl(x < 0) */
- /* log1pl(x < 0) */
+ /* log1pl(x < -1) */
/* log10l(x < 0) */
+ /* log2l(x < 0) */
{
-#ifndef _LIBC
- RETVAL_NEG_HUGE_VALL;
-#endif
ERRNO_DOMAIN; break;
}
case log_negative:
@@ -835,65 +894,74 @@ switch(input_tag)
case log10_negative:
case log2_negative:
/* log(x < 0) */
- /* log1p(x < 0) */
+ /* log1p(x < -1) */
/* log10(x < 0) */
+ /* log2(x < 0) */
{
-#ifndef _LIBC
- RETVAL_NEG_HUGE_VALD;
-#endif
ERRNO_DOMAIN; break;
- }
+ }
case logf_negative:
case log1pf_negative:
case log10f_negative:
case log2f_negative:
/* logf(x < 0) */
- /* log1pf(x < 0) */
+ /* log1pf(x < -1) */
/* log10f(x < 0) */
+ /* log2f(x < 0) */
{
-#ifndef _LIBC
- RETVAL_NEG_HUGE_VALF;
-#endif
ERRNO_DOMAIN; break;
- }
+ }
case expl_overflow:
- case exp2l_overflow:
case exp10l_overflow:
+ case exp2l_overflow:
/* expl overflow */
+ /* exp10l overflow */
+ /* exp2l overflow */
{
RETVAL_HUGE_VALL; ERRNO_RANGE; break;
}
case exp_overflow:
- case exp2_overflow:
case exp10_overflow:
+ case exp2_overflow:
/* exp overflow */
+ /* exp10 overflow */
+ /* exp2 overflow */
{
RETVAL_HUGE_VALD; ERRNO_RANGE; break;
}
case expf_overflow:
- case exp2f_overflow:
case exp10f_overflow:
+ case exp2f_overflow:
/* expf overflow */
{
RETVAL_HUGE_VALF; ERRNO_RANGE; break;
}
case expl_underflow:
+ case exp10l_underflow:
case exp2l_underflow:
/* expl underflow */
+ /* exp10l underflow */
+ /* exp2l underflow */
{
- RETVAL_ZEROL; ERRNO_RANGE; break;
+ ERRNO_RANGE; break;
}
case exp_underflow:
+ case exp10_underflow:
case exp2_underflow:
/* exp underflow */
+ /* exp10 underflow */
+ /* exp2 underflow */
{
- RETVAL_ZEROD; ERRNO_RANGE; break;
+ ERRNO_RANGE; break;
}
case expf_underflow:
+ case exp10f_underflow:
case exp2f_underflow:
/* expf underflow */
+ /* exp10f underflow */
+ /* exp2f underflow */
{
- RETVAL_ZEROF; ERRNO_RANGE; break;
+ ERRNO_RANGE; break;
}
case j0l_gt_loss:
case y0l_gt_loss:
@@ -945,16 +1013,16 @@ switch(input_tag)
case compoundl_overflow:
/* powl(x,y) overflow */
{
- if (INPUT_RESL < 0) RETVAL_NEG_HUGE_VALL;
+ if (INPUT_RESL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGE_VALL;
else RETVAL_HUGE_VALL;
- ERRNO_RANGE; break;
+ ERRNO_RANGE; break;
}
case pow_overflow:
case annuity_overflow:
case compound_overflow:
/* pow(x,y) overflow */
{
- if (INPUT_RESD < 0) RETVAL_NEG_HUGE_VALD;
+ if (INPUT_RESD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGE_VALD;
else RETVAL_HUGE_VALD;
ERRNO_RANGE; break;
}
@@ -963,7 +1031,7 @@ switch(input_tag)
case compoundf_overflow:
/* powf(x,y) overflow */
{
- if (INPUT_RESF < 0) RETVAL_NEG_HUGE_VALF;
+ if (INPUT_RESF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGE_VALF;
else RETVAL_HUGE_VALF;
ERRNO_RANGE; break;
}
@@ -1038,7 +1106,7 @@ switch(input_tag)
/* Special Error */
{
break;
- }
+ }
case pow_nan_to_zero:
/* pow(NaN,0.0) */
{
@@ -1051,51 +1119,24 @@ switch(input_tag)
}
case atan2l_zero:
case atan2dl_zero:
+ /* atan2l(0,0) */
/* atan2dl(0,0) */
{
-#ifndef _LIBC
- RETVAL_ZEROL;
-#else
- /* XXX arg1 and arg2 are switched!!!! */
- if (signbit (*(long double *) arg1))
- /* y == -0 */
- *(long double *) retval = __libm_copysignl (M_PIl, *(long double *) arg2);
- else
- *(long double *) retval = *(long double *) arg2;
-#endif
- ERRNO_DOMAIN; break;
+ break;
}
case atan2_zero:
case atan2d_zero:
+ /* atan2(0,0) */
/* atan2d(0,0) */
{
-#ifndef _LIBC
- RETVAL_ZEROD;
-#else
- /* XXX arg1 and arg2 are switched!!!! */
- if (signbit (*(double *) arg1))
- /* y == -0 */
- *(double *) retval = __libm_copysign (M_PI, *(double *) arg2);
- else
- *(double *) retval = *(double *) arg2;
-#endif
- ERRNO_DOMAIN; break;
+ break;
}
case atan2f_zero:
case atan2df_zero:
/* atan2f(0,0) */
/* atan2df(0,0) */
{
-#ifndef _LIBC
- RETVAL_ZEROF;
-#else
- if (signbit (*(float *) arg2))
- /* y == -0 */
- *(float *) retval = __libm_copysignf (M_PI, *(float *) arg1);
- else
- *(float *) retval = *(float *) arg1;
-#endif
- ERRNO_DOMAIN; break;
+ break;
}
case expm1l_overflow:
/* expm1 overflow */
@@ -1145,42 +1186,42 @@ switch(input_tag)
case scalbl_underflow:
/* scalbl underflow */
{
- if (INPUT_XL < 0) RETVAL_NEG_ZEROL;
+ if (INPUT_XL < ZEROL_VALUE /*0*/) RETVAL_NEG_ZEROL;
else RETVAL_ZEROL;
ERRNO_RANGE; break;
}
case scalb_underflow:
/* scalb underflow */
{
- if (INPUT_XD < 0) RETVAL_NEG_ZEROD;
+ if (INPUT_XD < ZEROD_VALUE /*0*/) RETVAL_NEG_ZEROD;
else RETVAL_ZEROD;
ERRNO_RANGE; break;
}
case scalbf_underflow:
/* scalbf underflow */
{
- if (INPUT_XF < 0) RETVAL_NEG_ZEROF;
+ if (INPUT_XF < ZEROF_VALUE /*0*/) RETVAL_NEG_ZEROF;
else RETVAL_ZEROF;
ERRNO_RANGE; break;
}
case scalbl_overflow:
/* scalbl overflow */
{
- if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL;
+ if (INPUT_XL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGE_VALL;
else RETVAL_HUGE_VALL;
ERRNO_RANGE; break;
}
case scalb_overflow:
/* scalb overflow */
{
- if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD;
+ if (INPUT_XD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGE_VALD;
else RETVAL_HUGE_VALD;
ERRNO_RANGE; break;
}
case scalbf_overflow:
/* scalbf overflow */
{
- if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF;
+ if (INPUT_XF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGE_VALF;
else RETVAL_HUGE_VALF;
ERRNO_RANGE; break;
}
@@ -1204,9 +1245,6 @@ switch(input_tag)
/* acosl(x > 1) */
/* acosdl(x > 1) */
{
-#ifndef _LIBC
- RETVAL_ZEROL;
-#endif
ERRNO_DOMAIN; break;
}
case acos_gt_one:
@@ -1214,9 +1252,6 @@ switch(input_tag)
/* acos(x > 1) */
/* acosd(x > 1) */
{
-#ifndef _LIBC
- RETVAL_ZEROD;
-#endif
ERRNO_DOMAIN; break;
}
case acosf_gt_one:
@@ -1224,9 +1259,6 @@ switch(input_tag)
/* acosf(x > 1) */
/* acosdf(x > 1) */
{
-#ifndef _LIBC
- RETVAL_ZEROF;
-#endif
ERRNO_DOMAIN; break;
}
case asinl_gt_one:
@@ -1234,9 +1266,6 @@ switch(input_tag)
/* asinl(x > 1) */
/* asindl(x > 1) */
{
-#ifndef _LIBC
- RETVAL_ZEROL;
-#endif
ERRNO_DOMAIN; break;
}
case asin_gt_one:
@@ -1244,18 +1273,13 @@ switch(input_tag)
/* asin(x > 1) */
/* asind(x > 1) */
{
-#ifndef _LIBC
- RETVAL_ZEROD;
-#endif
ERRNO_DOMAIN; break;
}
case asinf_gt_one:
case asindf_gt_one:
+ /* asinf(x > 1) */
/* asindf(x > 1) */
{
-#ifndef _LIBC
- RETVAL_ZEROF;
-#endif
ERRNO_DOMAIN; break;
}
case remainderl_by_zero:
@@ -1291,33 +1315,24 @@ switch(input_tag)
{
RETVAL_HUGE_VALF; ERRNO_RANGE; break;
}
- case nextafterl_overflow:
- case nextafter_overflow:
- case nextafterf_overflow:
- case nexttowardl_overflow:
- case nexttoward_overflow:
- case nexttowardf_overflow:
- {
- ERRNO_RANGE; break;
- }
case sinhl_overflow:
/* sinhl overflows */
{
- if (INPUT_XL > 0) RETVAL_HUGE_VALL;
+ if (INPUT_XL > ZEROL_VALUE /*0*/) RETVAL_HUGE_VALL;
else RETVAL_NEG_HUGE_VALL;
ERRNO_RANGE; break;
}
case sinh_overflow:
/* sinh overflows */
{
- if (INPUT_XD > 0) RETVAL_HUGE_VALD;
+ if (INPUT_XD > ZEROD_VALUE /*0*/) RETVAL_HUGE_VALD;
else RETVAL_NEG_HUGE_VALD;
ERRNO_RANGE; break;
}
case sinhf_overflow:
/* sinhf overflows */
{
- if (INPUT_XF > 0) RETVAL_HUGE_VALF;
+ if (INPUT_XF > ZEROF_VALUE /*0*/) RETVAL_HUGE_VALF;
else RETVAL_NEG_HUGE_VALF;
ERRNO_RANGE; break;
}
@@ -1361,7 +1376,7 @@ return;
/*******************************/
/* __SVID__ and __XOPEN__ Path */
/*******************************/
-else
+else
{
switch(input_tag)
{
@@ -1384,13 +1399,13 @@ else
case scalblnf_overflow:
case scalblnf_underflow:
case tandl_overflow:
- case tand_overflow:
+ case tand_overflow:
case tandf_overflow:
case cotdl_overflow:
- case cotd_overflow:
+ case cotd_overflow:
case cotdf_overflow:
case cotl_overflow:
- case cot_overflow:
+ case cot_overflow:
case cotf_overflow:
case annuityl_overflow:
case annuityl_underflow:
@@ -1422,12 +1437,12 @@ else
{
ERRNO_DOMAIN; break;
}
- case sqrtl_negative:
+ case sqrtl_negative:
/* sqrtl(x < 0) */
{
DOMAINL; NAMEL = (char *) "sqrtl";
- ifSVID
- {
+ ifSVID
+ {
RETVAL_ZEROL;
NOT_MATHERRL
{
@@ -1435,22 +1450,22 @@ else
ERRNO_DOMAIN;
}
}
- else
+ else
{ /* NaN already computed */
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
- case sqrt_negative:
+ case sqrt_negative:
/* sqrt(x < 0) */
{
DOMAIND; NAMED = (char *) "sqrt";
- ifSVID
+ ifSVID
{
-
+
RETVAL_ZEROD;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_SQRT;
ERRNO_DOMAIN;
@@ -1459,18 +1474,18 @@ else
else
{ /* NaN already computed */
NOT_MATHERRD {ERRNO_DOMAIN;}
- }
- *(double *)retval = exc.retval;
+ }
+ *(double *)retval = exc.retval;
break;
}
- case sqrtf_negative:
+ case sqrtf_negative:
/* sqrtf(x < 0) */
{
DOMAINF; NAMEF = (char *) "sqrtf";
- ifSVID
+ ifSVID
{
RETVAL_ZEROF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_SQRT;
ERRNO_DOMAIN;
@@ -1479,59 +1494,59 @@ else
else
{
NOT_MATHERRF {ERRNO_DOMAIN;}
- }
- *(float *)retval = excf.retval;
+ }
+ *(float *)retval = excf.retval;
break;
}
case logl_zero:
/* logl(0) */
{
SINGL; NAMEL = (char *) "logl";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_LOG_ZERO;
ERRNO_DOMAIN;
- }
+ }
}
else
{
RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
- }
- *(long double *)retval = excl.retval;
+ }
+ *(long double *)retval = excl.retval;
break;
}
case log_zero:
/* log(0) */
{
SINGD; NAMED = (char *) "log";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_LOG_ZERO;
ERRNO_DOMAIN;
- }
+ }
}
else
{
RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case logf_zero:
/* logf(0) */
{
SINGF; NAMEF = (char *) "logf";
- ifSVID
+ ifSVID
{
- RETVAL_NEG_HUGEF;
+ RETVAL_NEG_HUGEF;
NOT_MATHERRF
{
WRITEF_LOG_ZERO;
@@ -1540,10 +1555,10 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALF;
+ RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
@@ -1551,10 +1566,10 @@ else
/* logl(x < 0) */
{
DOMAINL; NAMEL = (char *) "logl";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_LOG_NEGATIVE;
ERRNO_DOMAIN;
@@ -1562,20 +1577,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALL;
+ RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case log_negative:
/* log(x < 0) */
{
DOMAIND; NAMED = (char *) "log";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_LOG_NEGATIVE;
ERRNO_DOMAIN;
@@ -1583,38 +1598,38 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALD;
+ RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
- }
+ }
case logf_negative:
/* logf(x < 0) */
{
DOMAINF; NAMEF = (char *) "logf";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_LOG_NEGATIVE;
ERRNO_DOMAIN;
}
- }
+ }
else
{
- RETVAL_NEG_HUGE_VALF;
+ RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF{ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case log1pl_zero:
/* log1pl(-1) */
{
SINGL; NAMEL = (char *) "log1pl";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
NOT_MATHERRL
@@ -1635,7 +1650,7 @@ else
/* log1p(-1) */
{
SINGD; NAMED = (char *) "log1p";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
NOT_MATHERRD
@@ -1656,7 +1671,7 @@ else
/* log1pf(-1) */
{
SINGF; NAMEF = (char *) "log1pf";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
NOT_MATHERRF
@@ -1672,7 +1687,7 @@ else
}
*(float *)retval = excf.retval;
break;
- }
+ }
case log1pl_negative:
/* log1pl(x < -1) */
{
@@ -1686,7 +1701,7 @@ else
ERRNO_DOMAIN;
}
}
- else
+ else
{
RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
@@ -1707,7 +1722,7 @@ else
ERRNO_DOMAIN;
}
}
- else
+ else
{
RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
@@ -1728,7 +1743,7 @@ else
ERRNO_DOMAIN;
}
}
- else
+ else
{
RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
@@ -1740,7 +1755,7 @@ else
/* log10l(0) */
{
SINGL; NAMEL = (char *) "log10l";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
NOT_MATHERRL
@@ -1754,14 +1769,14 @@ else
RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case log10_zero:
/* log10(0) */
{
SINGD; NAMED = (char *) "log10";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
NOT_MATHERRD
@@ -1775,14 +1790,14 @@ else
RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case log10f_zero:
/* log10f(0) */
{
SINGF; NAMEF = (char *) "log10f";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
NOT_MATHERRF
@@ -1796,17 +1811,17 @@ else
RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case log10l_negative:
/* log10l(x < 0) */
{
DOMAINL; NAMEL = (char *) "log10l";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_LOG10_NEGATIVE;
ERRNO_DOMAIN;
@@ -1817,38 +1832,38 @@ else
RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case log10_negative:
/* log10(x < 0) */
{
DOMAIND; NAMED = (char *) "log10";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_LOG10_NEGATIVE;
ERRNO_DOMAIN;
}
- }
+ }
else
{
RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case log10f_negative:
/* log10f(x < 0) */
{
DOMAINF; NAMEF = (char *) "log10f";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_LOG10_NEGATIVE;
ERRNO_DOMAIN;
@@ -1859,14 +1874,35 @@ else
RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
+ break;
+ }
+ case log2l_zero:
+ /* log2l(0) */
+ {
+ SINGL; NAMEL = (char *) "log2l";
+ ifSVID
+ {
+ RETVAL_NEG_HUGEL;
+ NOT_MATHERRL
+ {
+ WRITEL_LOG2_ZERO;
+ ERRNO_DOMAIN;
+ }
+ }
+ else
+ {
+ RETVAL_NEG_HUGE_VALL;
+ NOT_MATHERRL {ERRNO_DOMAIN;}
+ }
+ *(long double *)retval = excl.retval;
break;
}
case log2_zero:
/* log2(0) */
{
SINGD; NAMED = (char *) "log2";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
NOT_MATHERRD
@@ -1880,14 +1916,14 @@ else
RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case log2f_zero:
/* log2f(0) */
{
SINGF; NAMEF = (char *) "log2f";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
NOT_MATHERRF
@@ -1901,17 +1937,17 @@ else
RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case log2l_negative:
/* log2l(x < 0) */
{
DOMAINL; NAMEL = (char *) "log2l";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_LOG2_NEGATIVE;
ERRNO_DOMAIN;
@@ -1922,38 +1958,38 @@ else
RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case log2_negative:
/* log2(x < 0) */
{
DOMAIND; NAMED = (char *) "log2";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_LOG2_NEGATIVE;
ERRNO_DOMAIN;
}
- }
+ }
else
{
RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case log2f_negative:
/* log2f(x < 0) */
{
DOMAINF; NAMEF = (char *) "log2f";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_LOG2_NEGATIVE;
ERRNO_DOMAIN;
@@ -1964,14 +2000,14 @@ else
RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case expl_overflow:
/* expl overflow */
{
OVERFLOWL; NAMEL = (char *) "expl";
- ifSVID
+ ifSVID
{
RETVAL_HUGEL;
}
@@ -1980,14 +2016,14 @@ else
RETVAL_HUGE_VALL;
}
NOT_MATHERRL {ERRNO_RANGE;}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case exp_overflow:
/* exp overflow */
{
OVERFLOWD; NAMED = (char *) "exp";
- ifSVID
+ ifSVID
{
RETVAL_HUGED;
}
@@ -1996,14 +2032,14 @@ else
RETVAL_HUGE_VALD;
}
NOT_MATHERRD {ERRNO_RANGE;}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case expf_overflow:
/* expf overflow */
{
OVERFLOWF; NAMEF = (char *) "expf";
- ifSVID
+ ifSVID
{
RETVAL_HUGEF;
}
@@ -2012,7 +2048,7 @@ else
RETVAL_HUGE_VALF;
}
NOT_MATHERRF {ERRNO_RANGE;}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case expl_underflow:
@@ -2020,7 +2056,7 @@ else
{
UNDERFLOWL; NAMEL = (char *) "expl"; RETVAL_ZEROL;
NOT_MATHERRL {ERRNO_RANGE;}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case exp_underflow:
@@ -2028,7 +2064,7 @@ else
{
UNDERFLOWD; NAMED = (char *) "exp"; RETVAL_ZEROD;
NOT_MATHERRD {ERRNO_RANGE;}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case expf_underflow:
@@ -2036,22 +2072,22 @@ else
{
UNDERFLOWF; NAMEF = (char *) "expf"; RETVAL_ZEROF;
NOT_MATHERRF {ERRNO_RANGE;}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case powl_zero_to_zero:
/* powl 0**0 */
{
DOMAINL; NAMEL = (char *) "powl";
- ifSVID
+ ifSVID
{
RETVAL_ZEROL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_POW_ZERO_TO_ZERO;
ERRNO_DOMAIN;
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
}
else RETVAL_ONEL;
break;
@@ -2060,15 +2096,15 @@ else
/* pow 0**0 */
{
DOMAIND; NAMED = (char *) "pow";
- ifSVID
+ ifSVID
{
RETVAL_ZEROD;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_POW_ZERO_TO_ZERO;
ERRNO_DOMAIN;
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
}
else RETVAL_ONED;
break;
@@ -2077,15 +2113,15 @@ else
/* powf 0**0 */
{
DOMAINF; NAMEF = (char *) "powf";
- ifSVID
+ ifSVID
{
RETVAL_ZEROF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_POW_ZERO_TO_ZERO;
ERRNO_DOMAIN;
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
}
else RETVAL_ONEF;
break;
@@ -2094,54 +2130,54 @@ else
/* powl(x,y) overflow */
{
OVERFLOWL; NAMEL = (char *) "powl";
- ifSVID
+ ifSVID
{
- if (INPUT_XL < 0) RETVAL_NEG_HUGEL;
+ if (INPUT_RESL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGEL;
else RETVAL_HUGEL;
}
else
- {
- if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL;
+ {
+ if (INPUT_RESL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGE_VALL;
else RETVAL_HUGE_VALL;
}
NOT_MATHERRL {ERRNO_RANGE;}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case pow_overflow:
/* pow(x,y) overflow */
{
OVERFLOWD; NAMED = (char *) "pow";
- ifSVID
+ ifSVID
{
- if (INPUT_XD < 0) RETVAL_NEG_HUGED;
+ if (INPUT_RESD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGED;
else RETVAL_HUGED;
}
else
- {
- if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD;
+ {
+ if (INPUT_RESD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGE_VALD;
else RETVAL_HUGE_VALD;
}
NOT_MATHERRD {ERRNO_RANGE;}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case powf_overflow:
/* powf(x,y) overflow */
{
OVERFLOWF; NAMEF = (char *) "powf";
- ifSVID
+ ifSVID
{
- if (INPUT_XF < 0) RETVAL_NEG_HUGEF;
- else RETVAL_HUGEF;
+ if (INPUT_RESF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGEF;
+ else RETVAL_HUGEF;
}
else
- {
- if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF;
+ {
+ if (INPUT_RESF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGE_VALF;
else RETVAL_HUGE_VALF;
}
NOT_MATHERRF {ERRNO_RANGE;}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case powl_underflow:
@@ -2149,7 +2185,7 @@ else
{
UNDERFLOWL; NAMEL = (char *) "powl"; RETVAL_ZEROL;
NOT_MATHERRL {ERRNO_RANGE;}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case pow_underflow:
@@ -2157,7 +2193,7 @@ else
{
UNDERFLOWD; NAMED = (char *) "pow"; RETVAL_ZEROD;
NOT_MATHERRD {ERRNO_RANGE;}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case powf_underflow:
@@ -2165,17 +2201,17 @@ else
{
UNDERFLOWF; NAMEF = (char *) "powf"; RETVAL_ZEROF;
NOT_MATHERRF {ERRNO_RANGE;}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case powl_zero_to_negative:
/* 0 to neg */
{
DOMAINL; NAMEL = (char *) "powl";
- ifSVID
- {
+ ifSVID
+ {
RETVAL_ZEROL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_POW_ZERO_TO_NEGATIVE;
ERRNO_DOMAIN;
@@ -2186,17 +2222,17 @@ else
RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case pow_zero_to_negative:
/* 0**neg */
{
DOMAIND; NAMED = (char *) "pow";
- ifSVID
- {
+ ifSVID
+ {
RETVAL_ZEROD;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_POW_ZERO_TO_NEGATIVE;
ERRNO_DOMAIN;
@@ -2207,18 +2243,17 @@ else
RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case powf_zero_to_negative:
/* 0**neg */
{
DOMAINF; NAMEF = (char *) "powf";
- RETVAL_NEG_HUGE_VALF;
- ifSVID
- {
+ ifSVID
+ {
RETVAL_ZEROF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_POW_ZERO_TO_NEGATIVE;
ERRNO_DOMAIN;
@@ -2229,17 +2264,17 @@ else
RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case powl_neg_to_non_integer:
/* neg**non_integral */
{
DOMAINL; NAMEL = (char *) "powl";
- ifSVID
- {
- RETVAL_ZEROF;
- NOT_MATHERRL
+ ifSVID
+ {
+ RETVAL_ZEROL;
+ NOT_MATHERRL
{
WRITEL_POW_NEG_TO_NON_INTEGER;
ERRNO_DOMAIN;
@@ -2249,17 +2284,17 @@ else
{
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case pow_neg_to_non_integer:
/* neg**non_integral */
{
DOMAIND; NAMED = (char *) "pow";
- ifSVID
- {
+ ifSVID
+ {
RETVAL_ZEROD;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_POW_NEG_TO_NON_INTEGER;
ERRNO_DOMAIN;
@@ -2269,17 +2304,17 @@ else
{
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case powf_neg_to_non_integer:
/* neg**non-integral */
{
DOMAINF; NAMEF = (char *) "powf";
- ifSVID
- {
+ ifSVID
+ {
RETVAL_ZEROF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_POW_NEG_TO_NON_INTEGER;
ERRNO_DOMAIN;
@@ -2289,7 +2324,7 @@ else
{
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case powl_nan_to_zero:
@@ -2299,9 +2334,9 @@ else
DOMAINL; NAMEL = (char *) "powl";
*(long double *)retval = *(long double *)arg1;
NOT_MATHERRL {ERRNO_DOMAIN;}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
- }
+ }
case pow_nan_to_zero:
/* pow(NaN,0.0) */
/* Special Error */
@@ -2309,7 +2344,7 @@ else
DOMAIND; NAMED = (char *) "pow";
*(double *)retval = *(double *)arg1;
NOT_MATHERRD {ERRNO_DOMAIN;}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case powf_nan_to_zero:
@@ -2319,7 +2354,7 @@ else
DOMAINF; NAMEF = (char *) "powf";
*(float *)retval = *(float *)arg1;
NOT_MATHERRF {ERRNO_DOMAIN;}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case atan2l_zero:
@@ -2327,15 +2362,15 @@ else
{
DOMAINL; NAMEL = (char *) "atan2l";
RETVAL_ZEROL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
- ifSVID
+ ifSVID
{
WRITEL_ATAN2_ZERO_BY_ZERO;
}
ERRNO_DOMAIN;
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case atan2_zero:
@@ -2343,15 +2378,15 @@ else
{
DOMAIND; NAMED = (char *) "atan2";
RETVAL_ZEROD;
- NOT_MATHERRD
+ NOT_MATHERRD
{
- ifSVID
- {
+ ifSVID
+ {
WRITED_ATAN2_ZERO_BY_ZERO;
}
ERRNO_DOMAIN;
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case atan2f_zero:
@@ -2359,13 +2394,15 @@ else
{
DOMAINF; NAMEF = (char *) "atan2f";
RETVAL_ZEROF;
- NOT_MATHERRF
- ifSVID
+ NOT_MATHERRF
+ {
+ ifSVID
{
WRITEF_ATAN2_ZERO_BY_ZERO;
}
- ERRNO_DOMAIN;
- *(float *)retval = excf.retval;
+ ERRNO_DOMAIN;
+ }
+ *(float *)retval = excf.retval;
break;
}
case atan2dl_zero:
@@ -2373,15 +2410,15 @@ else
{
DOMAINL; NAMEL = (char *) "atan2dl";
RETVAL_ZEROL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
- ifSVID
+ ifSVID
{
WRITEL_ATAN2D_ZERO_BY_ZERO;
}
ERRNO_DOMAIN;
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case atan2d_zero:
@@ -2389,15 +2426,15 @@ else
{
DOMAIND; NAMED = (char *) "atan2d";
RETVAL_ZEROD;
- NOT_MATHERRD
+ NOT_MATHERRD
{
- ifSVID
- {
+ ifSVID
+ {
WRITED_ATAN2D_ZERO_BY_ZERO;
}
ERRNO_DOMAIN;
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case atan2df_zero:
@@ -2405,13 +2442,15 @@ else
{
DOMAINF; NAMEF = (char *) "atan2df";
RETVAL_ZEROF;
- NOT_MATHERRF
- ifSVID
+ NOT_MATHERRF
+ {
+ ifSVID
{
WRITEF_ATAN2D_ZERO_BY_ZERO;
}
- ERRNO_DOMAIN;
- *(float *)retval = excf.retval;
+ ERRNO_DOMAIN;
+ }
+ *(float *)retval = excf.retval;
break;
}
case expm1_overflow:
@@ -2446,60 +2485,60 @@ else
/* scalbl underflow */
{
UNDERFLOWL; NAMEL = (char *) "scalbl";
- if (INPUT_XL < 0.0L) RETVAL_NEG_ZEROL;
+ if (INPUT_XL < ZEROL_VALUE /*0.0L*/) RETVAL_NEG_ZEROL;
else RETVAL_ZEROL;
- NOT_MATHERRL {ERRNO_RANGE;}
- *(long double *)retval = excl.retval;
+ NOT_MATHERRL {ERRNO_RANGE;}
+ *(long double *)retval = excl.retval;
break;
}
case scalb_underflow:
/* scalb underflow */
{
UNDERFLOWD; NAMED = (char *) "scalb";
- if (INPUT_XD < 0.0) RETVAL_NEG_ZEROD;
+ if (INPUT_XD < ZEROD_VALUE /*0.0*/) RETVAL_NEG_ZEROD;
else RETVAL_ZEROD;
- NOT_MATHERRD {ERRNO_RANGE;}
- *(double *)retval = exc.retval;
+ NOT_MATHERRD {ERRNO_RANGE;}
+ *(double *)retval = exc.retval;
break;
}
case scalbf_underflow:
/* scalbf underflow */
{
UNDERFLOWF; NAMEF = (char *) "scalbf";
- if (INPUT_XF < 0.0) RETVAL_NEG_ZEROF;
+ if (INPUT_XF < ZEROF_VALUE /*0.0*/) RETVAL_NEG_ZEROF;
else RETVAL_ZEROF;
- NOT_MATHERRF {ERRNO_RANGE;}
- *(float *)retval = excf.retval;
+ NOT_MATHERRF {ERRNO_RANGE;}
+ *(float *)retval = excf.retval;
break;
}
case scalbl_overflow:
/* scalbl overflow */
{
OVERFLOWL; NAMEL = (char *) "scalbl";
- if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL;
+ if (INPUT_XL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGE_VALL;
else RETVAL_HUGE_VALL;
- NOT_MATHERRL {ERRNO_RANGE;}
- *(long double *)retval = excl.retval;
+ NOT_MATHERRL {ERRNO_RANGE;}
+ *(long double *)retval = excl.retval;
break;
}
case scalb_overflow:
/* scalb overflow */
{
OVERFLOWD; NAMED = (char *) "scalb";
- if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD;
+ if (INPUT_XD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGE_VALD;
else RETVAL_HUGE_VALD;
- NOT_MATHERRD {ERRNO_RANGE;}
- *(double *)retval = exc.retval;
+ NOT_MATHERRD {ERRNO_RANGE;}
+ *(double *)retval = exc.retval;
break;
}
case scalbf_overflow:
/* scalbf overflow */
{
OVERFLOWF; NAMEF = (char *) "scalbf";
- if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF;
+ if (INPUT_XF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGE_VALF;
else RETVAL_HUGE_VALF;
- NOT_MATHERRF {ERRNO_RANGE;}
- *(float *)retval = excf.retval;
+ NOT_MATHERRF {ERRNO_RANGE;}
+ *(float *)retval = excf.retval;
break;
}
case hypotl_overflow:
@@ -2507,7 +2546,7 @@ else
{
OVERFLOWL; NAMEL = (char *) "hypotl";
ifSVID
- {
+ {
RETVAL_HUGEL;
}
else
@@ -2515,7 +2554,7 @@ else
RETVAL_HUGE_VALL;
}
NOT_MATHERRL {ERRNO_RANGE;}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case hypot_overflow:
@@ -2523,7 +2562,7 @@ else
{
OVERFLOWD; NAMED = (char *) "hypot";
ifSVID
- {
+ {
RETVAL_HUGED;
}
else
@@ -2531,14 +2570,14 @@ else
RETVAL_HUGE_VALD;
}
NOT_MATHERRD {ERRNO_RANGE;}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case hypotf_overflow:
/* hypotf overflow */
- {
+ {
OVERFLOWF; NAMEF = (char *) "hypotf";
- ifSVID
+ ifSVID
{
RETVAL_HUGEF;
}
@@ -2547,7 +2586,7 @@ else
RETVAL_HUGE_VALF;
}
NOT_MATHERRF {ERRNO_RANGE;}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case acosl_gt_one:
@@ -2555,7 +2594,7 @@ else
{
DOMAINL; NAMEL = (char *) "acosl";
RETVAL_ZEROL;
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -2575,7 +2614,7 @@ else
{
DOMAIND; NAMED = (char *) "acos";
RETVAL_ZEROD;
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -2595,9 +2634,9 @@ else
{
DOMAINF; NAMEF = (char *) "acosf";
RETVAL_ZEROF;
- ifSVID
+ ifSVID
{
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_ACOS;
ERRNO_DOMAIN;
@@ -2606,8 +2645,8 @@ else
else
{
NOT_MATHERRF {ERRNO_DOMAIN;}
- }
- *(float *)retval = excf.retval;
+ }
+ *(float *)retval = excf.retval;
break;
}
case asinl_gt_one:
@@ -2615,7 +2654,7 @@ else
{
DOMAINL; NAMEL = (char *) "asinl";
RETVAL_ZEROL;
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -2635,7 +2674,7 @@ else
{
DOMAIND; NAMED = (char *) "asin";
RETVAL_ZEROD;
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -2655,9 +2694,9 @@ else
{
DOMAINF; NAMEF = (char *) "asinf";
RETVAL_ZEROF;
- ifSVID
+ ifSVID
{
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_ASIN;
ERRNO_DOMAIN;
@@ -2666,8 +2705,8 @@ else
else
{
NOT_MATHERRF {ERRNO_DOMAIN;}
- }
- *(float *)retval = excf.retval;
+ }
+ *(float *)retval = excf.retval;
break;
}
case acosdl_gt_one:
@@ -2675,7 +2714,7 @@ else
{
DOMAINL; NAMEL = (char *) "acosdl";
RETVAL_ZEROL;
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -2695,7 +2734,7 @@ else
{
DOMAIND; NAMED = (char *) "acosd";
RETVAL_ZEROD;
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -2715,9 +2754,9 @@ else
{
DOMAINF; NAMEF = (char *) "acosdf";
RETVAL_ZEROF;
- ifSVID
+ ifSVID
{
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_ACOSD;
ERRNO_DOMAIN;
@@ -2726,8 +2765,8 @@ else
else
{
NOT_MATHERRF {ERRNO_DOMAIN;}
- }
- *(float *)retval = excf.retval;
+ }
+ *(float *)retval = excf.retval;
break;
}
case asindl_gt_one:
@@ -2735,7 +2774,7 @@ else
{
DOMAINL; NAMEL = (char *) "asindl";
RETVAL_ZEROL;
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -2755,7 +2794,7 @@ else
{
DOMAIND; NAMED = (char *) "asind";
RETVAL_ZEROD;
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -2775,9 +2814,9 @@ else
{
DOMAINF; NAMEF = (char *) "asindf";
RETVAL_ZEROF;
- ifSVID
+ ifSVID
{
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_ASIND;
ERRNO_DOMAIN;
@@ -2786,8 +2825,8 @@ else
else
{
NOT_MATHERRF {ERRNO_DOMAIN;}
- }
- *(float *)retval = excf.retval;
+ }
+ *(float *)retval = excf.retval;
break;
}
case coshl_overflow:
@@ -2798,7 +2837,7 @@ else
{
RETVAL_HUGEL;
}
- else
+ else
{
RETVAL_HUGE_VALL;
}
@@ -2814,7 +2853,7 @@ else
{
RETVAL_HUGED;
}
- else
+ else
{
RETVAL_HUGE_VALD;
}
@@ -2830,7 +2869,7 @@ else
{
RETVAL_HUGEF;
}
- else
+ else
{
RETVAL_HUGE_VALF;
}
@@ -2844,12 +2883,12 @@ else
OVERFLOWL; NAMEL = (char *) "sinhl";
ifSVID
{
- if (INPUT_XL > 0.0) RETVAL_HUGEL;
+ if (INPUT_XL > ZEROL_VALUE /*0.0*/) RETVAL_HUGEL;
else RETVAL_NEG_HUGEL;
}
- else
+ else
{
- if (INPUT_XL > 0.0) RETVAL_HUGE_VALL;
+ if (INPUT_XL > ZEROL_VALUE /*0.0*/) RETVAL_HUGE_VALL;
else RETVAL_NEG_HUGE_VALL;
}
NOT_MATHERRL {ERRNO_RANGE;}
@@ -2862,12 +2901,12 @@ else
OVERFLOWD; NAMED = (char *) "sinh";
ifSVID
{
- if (INPUT_XD > 0.0) RETVAL_HUGED;
+ if (INPUT_XD > ZEROD_VALUE /*0.0*/) RETVAL_HUGED;
else RETVAL_NEG_HUGED;
}
- else
+ else
{
- if (INPUT_XD > 0.0) RETVAL_HUGE_VALD;
+ if (INPUT_XD > ZEROD_VALUE /*0.0*/) RETVAL_HUGE_VALD;
else RETVAL_NEG_HUGE_VALD;
}
NOT_MATHERRD {ERRNO_RANGE;}
@@ -2880,12 +2919,12 @@ else
OVERFLOWF; NAMEF = (char *) "sinhf";
ifSVID
{
- if( INPUT_XF > 0.0) RETVAL_HUGEF;
+ if (INPUT_XF > ZEROF_VALUE /*0.0*/) RETVAL_HUGEF;
else RETVAL_NEG_HUGEF;
}
- else
+ else
{
- if (INPUT_XF > 0.0) RETVAL_HUGE_VALF;
+ if (INPUT_XF > ZEROF_VALUE /*0.0*/) RETVAL_HUGE_VALF;
else RETVAL_NEG_HUGE_VALF;
}
NOT_MATHERRF {ERRNO_RANGE;}
@@ -2896,7 +2935,7 @@ else
/* acoshl(x < 1) */
{
DOMAINL; NAMEL = (char *) "acoshl";
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -2904,7 +2943,7 @@ else
ERRNO_DOMAIN;
}
}
- else
+ else
{
NOT_MATHERRL {ERRNO_DOMAIN;}
}
@@ -2915,15 +2954,15 @@ else
/* acosh(x < 1) */
{
DOMAIND; NAMED = (char *) "acosh";
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
- WRITEL_ACOSH;
+ WRITED_ACOSH;
ERRNO_DOMAIN;
}
}
- else
+ else
{
NOT_MATHERRD {ERRNO_DOMAIN;}
}
@@ -2934,7 +2973,7 @@ else
/* acoshf(x < 1) */
{
DOMAINF; NAMEF = (char *) "acoshf";
- ifSVID
+ ifSVID
{
NOT_MATHERRF
{
@@ -2947,13 +2986,13 @@ else
NOT_MATHERRF {ERRNO_DOMAIN;}
}
*(float *)retval = excf.retval;
- ERRNO_DOMAIN; break;
+ break;
}
case atanhl_gt_one:
/* atanhl(|x| > 1) */
{
DOMAINL; NAMEL = (char *) "atanhl";
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -2971,7 +3010,7 @@ else
/* atanh(|x| > 1) */
{
DOMAIND; NAMED = (char *) "atanh";
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -2989,7 +3028,7 @@ else
/* atanhf(|x| > 1) */
{
DOMAINF; NAMEF = (char *) "atanhf";
- ifSVID
+ ifSVID
{
NOT_MATHERRF
{
@@ -3007,7 +3046,7 @@ else
/* atanhl(|x| == 1) */
{
SINGL; NAMEL = (char *) "atanhl";
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -3025,7 +3064,7 @@ else
/* atanh(|x| == 1) */
{
SINGD; NAMED = (char *) "atanh";
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -3043,7 +3082,7 @@ else
/* atanhf(|x| == 1) */
{
SINGF; NAMEF = (char *) "atanhf";
- ifSVID
+ ifSVID
{
NOT_MATHERRF
{
@@ -3061,7 +3100,7 @@ else
/* gammal overflow */
{
OVERFLOWL; NAMEL = (char *) "gammal";
- ifSVID
+ ifSVID
{
RETVAL_HUGEL;
}
@@ -3069,15 +3108,15 @@ else
{
RETVAL_HUGE_VALL;
}
- NOT_MATHERRL{ERRNO_RANGE;}
- *(long double*)retval = excl.retval;
+ NOT_MATHERRL {ERRNO_RANGE;}
+ *(long double *)retval = excl.retval;
break;
}
case gamma_overflow:
/* gamma overflow */
{
OVERFLOWD; NAMED = (char *) "gamma";
- ifSVID
+ ifSVID
{
RETVAL_HUGED;
}
@@ -3085,15 +3124,15 @@ else
{
RETVAL_HUGE_VALD;
}
- NOT_MATHERRD{ERRNO_RANGE;}
- *(double*)retval = exc.retval;
+ NOT_MATHERRD {ERRNO_RANGE;}
+ *(double *)retval = exc.retval;
break;
}
case gammaf_overflow:
/* gammaf overflow */
{
OVERFLOWF; NAMEF = (char *) "gammaf";
- ifSVID
+ ifSVID
{
RETVAL_HUGEF;
}
@@ -3101,8 +3140,8 @@ else
{
RETVAL_HUGE_VALF;
}
- NOT_MATHERRF{ERRNO_RANGE;}
- *(float*)retval = excf.retval;
+ NOT_MATHERRF {ERRNO_RANGE;}
+ *(float *)retval = excf.retval;
break;
}
case gammal_negative:
@@ -3121,16 +3160,16 @@ else
else
{
RETVAL_HUGE_VALL;
- NOT_MATHERRL{ERRNO_DOMAIN;}
+ NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double*)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case gamma_negative:
/* gamma -int or 0 */
{
SINGD; NAMED = (char *) "gamma";
- ifSVID
+ ifSVID
{
RETVAL_HUGED;
NOT_MATHERRD
@@ -3142,16 +3181,16 @@ else
else
{
RETVAL_HUGE_VALD;
- NOT_MATHERRD{ERRNO_DOMAIN;}
+ NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double*)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case gammaf_negative:
/* gammaf -int or 0 */
{
SINGF; NAMEF = (char *) "gammaf";
- ifSVID
+ ifSVID
{
RETVAL_HUGEF;
NOT_MATHERRF
@@ -3163,16 +3202,16 @@ else
else
{
RETVAL_HUGE_VALF;
- NOT_MATHERRF{ERRNO_DOMAIN;}
+ NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float*)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case lgammal_overflow:
/* lgammal overflow */
{
OVERFLOWL; NAMEL = (char *) "lgammal";
- ifSVID
+ ifSVID
{
RETVAL_HUGEL;
}
@@ -3180,15 +3219,15 @@ else
{
RETVAL_HUGE_VALL;
}
- NOT_MATHERRL{ERRNO_RANGE;}
- *(long double*)retval = excl.retval;
+ NOT_MATHERRL {ERRNO_RANGE;}
+ *(long double *)retval = excl.retval;
break;
}
case lgamma_overflow:
/* lgamma overflow */
{
OVERFLOWD; NAMED = (char *) "lgamma";
- ifSVID
+ ifSVID
{
RETVAL_HUGED;
}
@@ -3196,15 +3235,15 @@ else
{
RETVAL_HUGE_VALD;
}
- NOT_MATHERRD{ERRNO_RANGE;}
- *(double*)retval = exc.retval;
+ NOT_MATHERRD {ERRNO_RANGE;}
+ *(double *)retval = exc.retval;
break;
}
case lgammaf_overflow:
/* lgammaf overflow */
{
OVERFLOWF; NAMEF = (char *) "lgammaf";
- ifSVID
+ ifSVID
{
RETVAL_HUGEF;
}
@@ -3212,8 +3251,8 @@ else
{
RETVAL_HUGE_VALF;
}
- NOT_MATHERRF{ERRNO_RANGE;}
- *(float*)retval = excf.retval;
+ NOT_MATHERRF {ERRNO_RANGE;}
+ *(float *)retval = excf.retval;
break;
}
case lgammal_negative:
@@ -3225,16 +3264,16 @@ else
RETVAL_HUGEL;
NOT_MATHERRL
{
- WRITEL_GAMMA_NEGATIVE;
- ERRNO_DOMAIN;
+ WRITEL_LGAMMA_NEGATIVE;
+ ERRNO_DOMAIN;
}
}
else
{
RETVAL_HUGE_VALL;
- NOT_MATHERRL{ERRNO_DOMAIN;}
+ NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double*)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case lgamma_negative:
@@ -3253,16 +3292,16 @@ else
else
{
RETVAL_HUGE_VALD;
- NOT_MATHERRD{ERRNO_DOMAIN;}
+ NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double*)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case lgammaf_negative:
/* lgammaf -int or 0 */
{
SINGF; NAMEF = (char *) "lgammaf";
- ifSVID
+ ifSVID
{
RETVAL_HUGEF;
NOT_MATHERRF
@@ -3274,16 +3313,16 @@ else
else
{
RETVAL_HUGE_VALF;
- NOT_MATHERRF{ERRNO_DOMAIN;}
+ NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float*)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case tgammal_overflow:
/* tgammal overflow */
{
OVERFLOWL; NAMEL = (char *) "tgammal";
- ifSVID
+ ifSVID
{
RETVAL_HUGEL;
}
@@ -3291,15 +3330,15 @@ else
{
RETVAL_HUGE_VALL;
}
- NOT_MATHERRL{ERRNO_RANGE;}
- *(long double*)retval = excl.retval;
+ NOT_MATHERRL {ERRNO_RANGE;}
+ *(long double *)retval = excl.retval;
break;
}
case tgamma_overflow:
/* tgamma overflow */
{
OVERFLOWD; NAMED = (char *) "tgamma";
- ifSVID
+ ifSVID
{
RETVAL_HUGED;
}
@@ -3307,15 +3346,15 @@ else
{
RETVAL_HUGE_VALD;
}
- NOT_MATHERRD{ERRNO_RANGE;}
- *(double*)retval = exc.retval;
+ NOT_MATHERRD {ERRNO_RANGE;}
+ *(double *)retval = exc.retval;
break;
}
case tgammaf_overflow:
/* tgammaf overflow */
{
OVERFLOWF; NAMEF = (char *) "tgammaf";
- ifSVID
+ ifSVID
{
RETVAL_HUGEF;
}
@@ -3323,15 +3362,15 @@ else
{
RETVAL_HUGE_VALF;
}
- NOT_MATHERRF{ERRNO_RANGE;}
- *(float*)retval = excf.retval;
+ NOT_MATHERRF {ERRNO_RANGE;}
+ *(float *)retval = excf.retval;
break;
}
case tgammal_negative:
/* tgammal -int or 0 */
{
SINGL; NAMEL = (char *) "tgammal";
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -3341,16 +3380,16 @@ else
}
else
{
- NOT_MATHERRL{ERRNO_DOMAIN;}
+ NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double*)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case tgamma_negative:
/* tgamma -int or 0 */
{
SINGD; NAMED = (char *) "tgamma";
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -3360,16 +3399,16 @@ else
}
else
{
- NOT_MATHERRD{ERRNO_DOMAIN;}
+ NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double*)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case tgammaf_negative:
/* tgammaf -int or 0 */
{
SINGF; NAMEF = (char *) "tgammaf";
- ifSVID
+ ifSVID
{
NOT_MATHERRF
{
@@ -3379,9 +3418,9 @@ else
}
else
{
- NOT_MATHERRF{ERRNO_DOMAIN;}
+ NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float*)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case j0l_gt_loss:
@@ -3389,7 +3428,7 @@ else
{
TLOSSL; NAMEL = (char *) "j0l";
RETVAL_ZEROL;
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -3401,7 +3440,7 @@ else
{
NOT_MATHERRL {ERRNO_RANGE;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case j0_gt_loss:
@@ -3409,7 +3448,7 @@ else
{
TLOSSD; NAMED = (char *) "j0";
RETVAL_ZEROD;
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -3421,7 +3460,7 @@ else
{
NOT_MATHERRD {ERRNO_RANGE;}
}
- *(double*)retval = exc.retval;
+ *(double*)retval = exc.retval;
break;
}
case j0f_gt_loss:
@@ -3429,7 +3468,7 @@ else
{
TLOSSF; NAMEF = (char *) "j0f";
RETVAL_ZEROF;
- ifSVID
+ ifSVID
{
NOT_MATHERRF
{
@@ -3449,7 +3488,7 @@ else
{
TLOSSL; NAMEL = (char *) "j1l";
RETVAL_ZEROL;
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -3461,7 +3500,7 @@ else
{
NOT_MATHERRL {ERRNO_RANGE;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case j1_gt_loss:
@@ -3469,7 +3508,7 @@ else
{
TLOSSD; NAMED = (char *) "j1";
RETVAL_ZEROD;
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -3481,7 +3520,7 @@ else
{
NOT_MATHERRD {ERRNO_RANGE;}
}
- *(double*)retval = exc.retval;
+ *(double*)retval = exc.retval;
break;
}
case j1f_gt_loss:
@@ -3489,7 +3528,7 @@ else
{
TLOSSF; NAMEF = (char *) "j1f";
RETVAL_ZEROF;
- ifSVID
+ ifSVID
{
NOT_MATHERRF
{
@@ -3509,7 +3548,7 @@ else
{
TLOSSL; NAMEL = (char *) "jnl";
RETVAL_ZEROL;
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -3521,7 +3560,7 @@ else
{
NOT_MATHERRL {ERRNO_RANGE;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case jn_gt_loss:
@@ -3529,7 +3568,7 @@ else
{
TLOSSD; NAMED = (char *) "jn";
RETVAL_ZEROD;
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -3541,7 +3580,7 @@ else
{
NOT_MATHERRD {ERRNO_RANGE;}
}
- *(double*)retval = exc.retval;
+ *(double*)retval = exc.retval;
break;
}
case jnf_gt_loss:
@@ -3549,7 +3588,7 @@ else
{
TLOSSF; NAMEF = (char *) "jnf";
RETVAL_ZEROF;
- ifSVID
+ ifSVID
{
NOT_MATHERRF
{
@@ -3569,7 +3608,7 @@ else
{
TLOSSL; NAMEL = (char *) "y0l";
RETVAL_ZEROL;
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -3589,7 +3628,7 @@ else
{
TLOSSD; NAMED = (char *) "y0";
RETVAL_ZEROD;
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -3609,7 +3648,7 @@ else
{
TLOSSF; NAMEF = (char *) "y0f";
RETVAL_ZEROF;
- ifSVID
+ ifSVID
{
NOT_MATHERRF
{
@@ -3628,10 +3667,10 @@ else
/* y0l(0) */
{
DOMAINL; NAMEL = (char *) "y0l";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_Y0_ZERO;
ERRNO_DOMAIN;
@@ -3639,20 +3678,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALL;
+ RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case y0_zero:
/* y0(0) */
{
DOMAIND; NAMED = (char *) "y0";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_Y0_ZERO;
ERRNO_DOMAIN;
@@ -3660,20 +3699,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALD;
+ RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case y0f_zero:
/* y0f(0) */
{
DOMAINF; NAMEF = (char *) "y0f";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_Y0_ZERO;
ERRNO_DOMAIN;
@@ -3681,10 +3720,10 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALF;
+ RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case y1l_gt_loss:
@@ -3692,7 +3731,7 @@ else
{
TLOSSL; NAMEL = (char *) "y1l";
RETVAL_ZEROL;
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -3712,7 +3751,7 @@ else
{
TLOSSD; NAMED = (char *) "y1";
RETVAL_ZEROD;
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -3732,7 +3771,7 @@ else
{
TLOSSF; NAMEF = (char *) "y1f";
RETVAL_ZEROF;
- ifSVID
+ ifSVID
{
NOT_MATHERRF
{
@@ -3751,10 +3790,10 @@ else
/* y1l(0) */
{
DOMAINL; NAMEL = (char *) "y1l";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_Y1_ZERO;
ERRNO_DOMAIN;
@@ -3762,20 +3801,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALL;
+ RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case y1_zero:
/* y1(0) */
{
DOMAIND; NAMED = (char *) "y1";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_Y1_ZERO;
ERRNO_DOMAIN;
@@ -3783,30 +3822,31 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALD;
+ RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case y1f_zero:
/* y1f(0) */
{
DOMAINF; NAMEF = (char *) "y1f";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_Y1_ZERO;
ERRNO_DOMAIN;
}
- }else
+ }
+ else
{
- RETVAL_NEG_HUGE_VALF;
+ RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case ynl_gt_loss:
@@ -3814,7 +3854,7 @@ else
{
TLOSSL; NAMEL = (char *) "ynl";
RETVAL_ZEROL;
- ifSVID
+ ifSVID
{
NOT_MATHERRL
{
@@ -3834,7 +3874,7 @@ else
{
TLOSSD; NAMED = (char *) "yn";
RETVAL_ZEROD;
- ifSVID
+ ifSVID
{
NOT_MATHERRD
{
@@ -3854,7 +3894,7 @@ else
{
TLOSSF; NAMEF = (char *) "ynf";
RETVAL_ZEROF;
- ifSVID
+ ifSVID
{
NOT_MATHERRF
{
@@ -3873,10 +3913,10 @@ else
/* ynl(0) */
{
DOMAINL; NAMEL = (char *) "ynl";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_YN_ZERO;
ERRNO_DOMAIN;
@@ -3884,20 +3924,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALL;
+ RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case yn_zero:
/* yn(0) */
{
DOMAIND; NAMED = (char *) "yn";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_YN_ZERO;
ERRNO_DOMAIN;
@@ -3905,20 +3945,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALD;
+ RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case ynf_zero:
/* ynf(0) */
{
DOMAINF; NAMEF = (char *) "ynf";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_YN_ZERO;
ERRNO_DOMAIN;
@@ -3926,20 +3966,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALF;
+ RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case y0l_negative:
/* y0l(x<0) */
{
DOMAINL; NAMEL = (char *) "y0l";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_Y0_NEGATIVE;
ERRNO_DOMAIN;
@@ -3947,20 +3987,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALL;
+ RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case y0_negative:
/* y0(x<0) */
{
DOMAIND; NAMED = (char *) "y0";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_Y0_NEGATIVE;
ERRNO_DOMAIN;
@@ -3968,20 +4008,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALD;
+ RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case y0f_negative:
/* y0f(x<0) */
{
DOMAINF; NAMEF = (char *) "y0f";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_Y0_NEGATIVE;
ERRNO_DOMAIN;
@@ -3989,20 +4029,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALF;
+ RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case y1l_negative:
/* y1l(x<0) */
{
DOMAINL; NAMEL = (char *) "y1l";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_Y1_NEGATIVE;
ERRNO_DOMAIN;
@@ -4010,41 +4050,41 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALL;
+ RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case y1_negative:
/* y1(x<0) */
{
DOMAIND; NAMED = (char *) "y1";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
- NOT_MATHERRD
+ NOT_MATHERRD
{
- WRITED_Y1_NEGATIUE;
+ WRITED_Y1_NEGATIVE;
ERRNO_DOMAIN;
}
}
else
{
- RETVAL_NEG_HUGE_VALD;
+ RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case y1f_negative:
/* y1f(x<0) */
{
DOMAINF; NAMEF = (char *) "y1f";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_Y1_NEGATIVE;
ERRNO_DOMAIN;
@@ -4052,20 +4092,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALF;
+ RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
case ynl_negative:
/* ynl(x<0) */
{
DOMAINL; NAMEL = (char *) "ynl";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEL;
- NOT_MATHERRL
+ NOT_MATHERRL
{
WRITEL_YN_NEGATIVE;
ERRNO_DOMAIN;
@@ -4073,20 +4113,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALL;
+ RETVAL_NEG_HUGE_VALL;
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
case yn_negative:
/* yn(x<0) */
{
DOMAIND; NAMED = (char *) "yn";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGED;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_YN_NEGATIVE;
ERRNO_DOMAIN;
@@ -4094,20 +4134,20 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALD;
+ RETVAL_NEG_HUGE_VALD;
NOT_MATHERRD {ERRNO_DOMAIN;}
}
- *(double *)retval = exc.retval;
+ *(double *)retval = exc.retval;
break;
}
case ynf_negative:
/* ynf(x<0) */
{
DOMAINF; NAMEF = (char *) "ynf";
- ifSVID
+ ifSVID
{
RETVAL_NEG_HUGEF;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_YN_NEGATIVE;
ERRNO_DOMAIN;
@@ -4115,18 +4155,18 @@ else
}
else
{
- RETVAL_NEG_HUGE_VALF;
+ RETVAL_NEG_HUGE_VALF;
NOT_MATHERRF {ERRNO_DOMAIN;}
}
- *(float *)retval = excf.retval;
+ *(float *)retval = excf.retval;
break;
}
- case fmodl_by_zero:
+ case fmodl_by_zero:
/* fmodl(x,0) */
{
DOMAINL; NAMEL = (char *) "fmodl";
- ifSVID
- {
+ ifSVID
+ {
*(long double *)retval = *(long double *)arg1;
NOT_MATHERRL
{
@@ -4134,21 +4174,21 @@ else
ERRNO_DOMAIN;
}
}
- else
+ else
{ /* NaN already computed */
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
- case fmod_by_zero:
+ case fmod_by_zero:
/* fmod(x,0) */
{
DOMAIND; NAMED = (char *) "fmod";
- ifSVID
+ ifSVID
{
*(double *)retval = *(double *)arg1;
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_FMOD;
ERRNO_DOMAIN;
@@ -4157,18 +4197,18 @@ else
else
{ /* NaN already computed */
NOT_MATHERRD {ERRNO_DOMAIN;}
- }
- *(double *)retval = exc.retval;
+ }
+ *(double *)retval = exc.retval;
break;
}
- case fmodf_by_zero:
+ case fmodf_by_zero:
/* fmodf(x,0) */
{
DOMAINF; NAMEF = (char *) "fmodf";
- ifSVID
+ ifSVID
{
*(float *)retval = *(float *)arg1;
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_FMOD;
ERRNO_DOMAIN;
@@ -4177,36 +4217,36 @@ else
else
{
NOT_MATHERRF {ERRNO_DOMAIN;}
- }
- *(float *)retval = excf.retval;
+ }
+ *(float *)retval = excf.retval;
break;
}
- case remainderl_by_zero:
+ case remainderl_by_zero:
/* remainderl(x,0) */
{
DOMAINL; NAMEL = (char *) "remainderl";
- ifSVID
- {
+ ifSVID
+ {
NOT_MATHERRL
{
WRITEL_REM;
ERRNO_DOMAIN;
}
}
- else
+ else
{ /* NaN already computed */
NOT_MATHERRL {ERRNO_DOMAIN;}
}
- *(long double *)retval = excl.retval;
+ *(long double *)retval = excl.retval;
break;
}
- case remainder_by_zero:
+ case remainder_by_zero:
/* remainder(x,0) */
{
DOMAIND; NAMED = (char *) "remainder";
- ifSVID
+ ifSVID
{
- NOT_MATHERRD
+ NOT_MATHERRD
{
WRITED_REM;
ERRNO_DOMAIN;
@@ -4215,17 +4255,17 @@ else
else
{ /* NaN already computed */
NOT_MATHERRD {ERRNO_DOMAIN;}
- }
- *(double *)retval = exc.retval;
+ }
+ *(double *)retval = exc.retval;
break;
}
- case remainderf_by_zero:
+ case remainderf_by_zero:
/* remainderf(x,0) */
{
DOMAINF; NAMEF = (char *) "remainderf";
- ifSVID
+ ifSVID
{
- NOT_MATHERRF
+ NOT_MATHERRF
{
WRITEF_REM;
ERRNO_DOMAIN;
@@ -4234,8 +4274,8 @@ else
else
{
NOT_MATHERRF {ERRNO_DOMAIN;}
- }
- *(float *)retval = excf.retval;
+ }
+ *(float *)retval = excf.retval;
break;
}
default:
diff --git a/sysdeps/ia64/fpu/libm_lgamma.S b/sysdeps/ia64/fpu/libm_lgamma.S
index 5c13fc3..0df1e4b 100644
--- a/sysdeps/ia64/fpu/libm_lgamma.S
+++ b/sysdeps/ia64/fpu/libm_lgamma.S
@@ -47,6 +47,7 @@
// 09/15/02 Fixed bug on the branch lgamma_negrecursion
// 10/21/02 Now it returns SIGN(GAMMA(x))=-1 for negative zero
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 07/22/03 Reformatted some data tables
//
//*********************************************************************
//
@@ -951,19 +952,32 @@ data8 0xD28D3312983E98A0,0xBFFF //S2
//
data8 0x8090F777D7942F73,0x4001 // PR01
data8 0xE5B521193CF61E63,0x4000 // PR11
-data8 0xC02C000000001939,0x0000000000000233 // (-15;-14)
-data8 0xC02A000000016124,0x0000000000002BFB // (-14;-13)
-data8 0xC02800000011EED9,0x0000000000025CBB // (-13;-12)
-data8 0xC026000000D7322A,0x00000000001E1095 // (-12;-11)
-data8 0xC0240000093F2777,0x00000000013DD3DC // (-11;-10)
-data8 0xC02200005C7768FB,0x000000000C9539B9 // (-10;-9)
-data8 0xC02000034028B3F9,0x000000007570C565 // (-9;-8)
-data8 0xC01C0033FDEDFE1F,0x00000007357E670E // (-8;-7)
-data8 0xC018016B25897C8D,0x000000346DC5D639 // (-7;-6)
-data8 0xC014086A57F0B6D9,0x0000010624DD2F1B // (-6;-5)
-data8 0xC010284E78599581,0x0000051EB851EB85 // (-5;-4)
-data8 0xC009260DBC9E59AF,0x000028F5C28F5C29 // (-4;-3)
-data8 0xC003A7FC9600F86C,0x0000666666666666 // (-3;-2)
+data8 0xC02C000000001939 // (-15;-14)
+data8 0x0000000000000233 // (-15;-14)
+data8 0xC02A000000016124 // (-14;-13)
+data8 0x0000000000002BFB // (-14;-13)
+data8 0xC02800000011EED9 // (-13;-12)
+data8 0x0000000000025CBB // (-13;-12)
+data8 0xC026000000D7322A // (-12;-11)
+data8 0x00000000001E1095 // (-12;-11)
+data8 0xC0240000093F2777 // (-11;-10)
+data8 0x00000000013DD3DC // (-11;-10)
+data8 0xC02200005C7768FB // (-10;-9)
+data8 0x000000000C9539B9 // (-10;-9)
+data8 0xC02000034028B3F9 // (-9;-8)
+data8 0x000000007570C565 // (-9;-8)
+data8 0xC01C0033FDEDFE1F // (-8;-7)
+data8 0x00000007357E670E // (-8;-7)
+data8 0xC018016B25897C8D // (-7;-6)
+data8 0x000000346DC5D639 // (-7;-6)
+data8 0xC014086A57F0B6D9 // (-6;-5)
+data8 0x0000010624DD2F1B // (-6;-5)
+data8 0xC010284E78599581 // (-5;-4)
+data8 0x0000051EB851EB85 // (-5;-4)
+data8 0xC009260DBC9E59AF // (-4;-3)
+data8 0x000028F5C28F5C29 // (-4;-3)
+data8 0xC003A7FC9600F86C // (-3;-2)
+data8 0x0000666666666666 // (-3;-2)
data8 0xCC15879606130890,0x4000 // PR21
data8 0xB42FE3281465E1CC,0x4000 // PR31
//
@@ -971,19 +985,32 @@ data8 0x828185F0B95C9916,0x4001 // PR00
//
data8 0xD4D3C819E4E5654B,0x4000 // PR10
data8 0xA82FBBA4FCC75298,0x4000 // PR20
-data8 0xC02DFFFFFFFFFE52,0x000000000000001C // (-15;-14)
-data8 0xC02BFFFFFFFFE6C7,0x00000000000001A6 // (-14;-13)
-data8 0xC029FFFFFFFE9EDC,0x0000000000002BFB // (-13;-12)
-data8 0xC027FFFFFFEE1127,0x000000000001EEC8 // (-12;-11)
-data8 0xC025FFFFFF28CDD4,0x00000000001E1095 // (-11;-10)
-data8 0xC023FFFFF6C0D7C0,0x000000000101B2B3 // (-10;-9)
-data8 0xC021FFFFA3884BD0,0x000000000D6BF94D // (-9;-8)
-data8 0xC01FFFF97F8159CF,0x00000000C9539B89 // (-8;-7)
-data8 0xC01BFFCBF76B86F0,0x00000007357E670E // (-7;-6)
-data8 0xC017FE92F591F40D,0x000000346DC5D639 // (-6;-5)
-data8 0xC013F7577A6EEAFD,0x00000147AE147AE1 // (-5;-4)
-data8 0xC00FA471547C2FE5,0x00000C49BA5E353F // (-4;-3)
-data8 0xC005FB410A1BD901,0x000053F7CED91687 // (-3;-2)
+data8 0xC02DFFFFFFFFFE52 // (-15;-14)
+data8 0x000000000000001C // (-15;-14)
+data8 0xC02BFFFFFFFFE6C7 // (-14;-13)
+data8 0x00000000000001A6 // (-14;-13)
+data8 0xC029FFFFFFFE9EDC // (-13;-12)
+data8 0x0000000000002BFB // (-13;-12)
+data8 0xC027FFFFFFEE1127 // (-12;-11)
+data8 0x000000000001EEC8 // (-12;-11)
+data8 0xC025FFFFFF28CDD4 // (-11;-10)
+data8 0x00000000001E1095 // (-11;-10)
+data8 0xC023FFFFF6C0D7C0 // (-10;-9)
+data8 0x000000000101B2B3 // (-10;-9)
+data8 0xC021FFFFA3884BD0 // (-9;-8)
+data8 0x000000000D6BF94D // (-9;-8)
+data8 0xC01FFFF97F8159CF // (-8;-7)
+data8 0x00000000C9539B89 // (-8;-7)
+data8 0xC01BFFCBF76B86F0 // (-7;-6)
+data8 0x00000007357E670E // (-7;-6)
+data8 0xC017FE92F591F40D // (-6;-5)
+data8 0x000000346DC5D639 // (-6;-5)
+data8 0xC013F7577A6EEAFD // (-5;-4)
+data8 0x00000147AE147AE1 // (-5;-4)
+data8 0xC00FA471547C2FE5 // (-4;-3)
+data8 0x00000C49BA5E353F // (-4;-3)
+data8 0xC005FB410A1BD901 // (-3;-2)
+data8 0x000053F7CED91687 // (-3;-2)
data8 0x80151BB918A293AA,0x4000 // PR30
data8 0xB3C9F8F47422A314,0x400B // PRN
//
@@ -3538,6 +3565,7 @@ lgamma_libm_err:
};;
GLOBAL_LIBM_END(__libm_lgamma)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/libm_lgammaf.S b/sysdeps/ia64/fpu/libm_lgammaf.S
index 83cffd6..04dcd63 100644
--- a/sysdeps/ia64/fpu/libm_lgammaf.S
+++ b/sysdeps/ia64/fpu/libm_lgammaf.S
@@ -47,6 +47,7 @@
// 09/16/02 Improved accuracy on intervals reduced to [1;1.25]
// 10/21/02 Now it returns SIGN(GAMMA(x))=-1 for negative zero
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 07/22/03 Reformatted some data tables
//
//*********************************************************************
//
@@ -685,19 +686,26 @@ data8 0x3FF1029A9DD542B4,0xBFFAD37C209D3B25 // A6,A5
data8 0x405385E6FD9BE7EA // A0
data8 0x478895F1C0000000 // Overflow boundary
data8 0x400062D97D26B523,0xC00A03E1529FF023 // A6,A5
-data8 0x4069204C51E566CE,0 // A0
+data8 0x4069204C51E566CE // A0
+data8 0x0000000000000000 // pad
data8 0x40101476B38FD501,0xC0199DE7B387C0FC // A6,A5
-data8 0x407EB8DAEC83D759,0 // A0
+data8 0x407EB8DAEC83D759 // A0
+data8 0x0000000000000000 // pad
data8 0x401FDB008D65125A,0xC0296B506E665581 // A6,A5
-data8 0x409226D93107EF66,0 // A0
+data8 0x409226D93107EF66 // A0
+data8 0x0000000000000000 // pad
data8 0x402FB3EAAF3E7B2D,0xC039521142AD8E0D // A6,A5
-data8 0x40A4EFA4F072792E,0 // A0
+data8 0x40A4EFA4F072792E // A0
+data8 0x0000000000000000 // pad
data8 0x403FA024C66B2563,0xC0494569F250E691 // A6,A5
-data8 0x40B7B747C9235BB8,0 // A0
+data8 0x40B7B747C9235BB8 // A0
+data8 0x0000000000000000 // pad
data8 0x404F9607D6DA512C,0xC0593F0B2EDDB4BC // A6,A5
-data8 0x40CA7E29C5F16DE2,0 // A0
+data8 0x40CA7E29C5F16DE2 // A0
+data8 0x0000000000000000 // pad
data8 0x405F90C5F613D98D,0xC0693BD130E50AAF // A6,A5
-data8 0x40DD4495238B190C,0 // A0
+data8 0x40DD4495238B190C // A0
+data8 0x0000000000000000 // pad
//
// polynomial approximation of ln(sin(Pi*x)/(Pi*x)), |x| <= 0.5
data8 0xBFD58731A486E820,0xBFA4452CC28E15A9 // S16,S14
@@ -2133,6 +2141,7 @@ lgammaf_libm_err:
};;
GLOBAL_LIBM_END(__libm_lgammaf)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/libm_lgammal.S b/sysdeps/ia64/fpu/libm_lgammal.S
index 056171b..844c517 100644
--- a/sysdeps/ia64/fpu/libm_lgammal.S
+++ b/sysdeps/ia64/fpu/libm_lgammal.S
@@ -7622,6 +7622,7 @@ lgammal_singularity:
GLOBAL_LIBM_END(__libm_lgammal)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/libm_scalblnf.S b/sysdeps/ia64/fpu/libm_scalblnf.S
index 362e68b..af620d4 100644
--- a/sysdeps/ia64/fpu/libm_scalblnf.S
+++ b/sysdeps/ia64/fpu/libm_scalblnf.S
@@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
+// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@@ -44,38 +44,51 @@
// 02/06/02 Corrected to handle 32- or 64-bit integers
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/25/03 Improved performance
//
// API
//==============================================================
-// float = __libm_scalblnf (float x, long int n, int long_int_type)
+// float __libm_scalblnf (float x, long int n, int long_int_type)
// input floating point f8 and long int n (r33)
// input long_int_type = 0 if long int defined as 32 bits, = 1 if 64 bits
-//
// output floating point f8
//
-
// Returns x* 2**n using an fma and detects overflow
-// and underflow.
+// and underflow.
//
//
+// Strategy:
+// Compute biased exponent of result exp_Result = N + exp_X
+// Break into ranges:
+// exp_Result > 0x1007e -> Certain overflow
+// exp_Result = 0x1007e -> Possible overflow
+// 0x0ff81 <= exp_Result < 0x1007e -> No over/underflow (main path)
+// 0x0ff81 - 23 <= exp_Result < 0x0ff81 -> Possible underflow
+// exp_Result < 0x0ff81 - 23 -> Certain underflow
FR_Big = f6
FR_NBig = f7
FR_Floating_X = f8
FR_Result = f8
FR_Result2 = f9
-FR_Result3 = f11
-FR_Norm_X = f12
-FR_Two_N = f14
-FR_Two_to_Big = f15
+FR_Result3 = f10
+FR_Norm_X = f11
+FR_Two_N = f12
+GR_neg_ov_limit= r14
GR_N_Biased = r15
GR_Big = r16
GR_NBig = r17
-GR_Scratch = r18
-GR_Scratch1 = r19
+GR_exp_Result = r18
+GR_pos_ov_limit= r19
GR_Bias = r20
GR_N_as_int = r21
+GR_signexp_X = r22
+GR_exp_X = r23
+GR_exp_mask = r24
+GR_max_exp = r25
+GR_min_exp = r26
+GR_min_den_exp = r27
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
@@ -93,243 +106,298 @@ GLOBAL_LIBM_ENTRY(__libm_scalblnf)
// Build the exponent Bias
//
{ .mfi
- alloc r32=ar.pfs,3,0,4,0
- fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
- addl GR_Bias = 0x0FFFF,r0
+ getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
+ fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
+ mov GR_Bias = 0x0ffff
}
-
-
//
-// Is N zero?
// Normalize x
-// Do we need to sign extend input (long_int_type = 0)?
+// Is long integer type 32 bits?
//
{ .mfi
- cmp.eq.unc p6,p0 = r33,r0
- fnorm.s1 FR_Norm_X = FR_Floating_X
- cmp.eq.unc p8,p9 = r34,r0
+ mov GR_Big = 35000 // If N this big then certain overflow
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+ cmp.eq p8,p9 = r34,r0
}
;;
-{ .mii
-(p9) mov GR_N_as_int = r33 // Get n directly if long int 64 bits
-(p8) sxt4 GR_N_as_int = r33 // Sign extend n if long int 32 bits
- nop.i 0
+// Sign extend N if long int is 32 bits
+{ .mfi
+(p9) mov GR_N_as_int = r33 // Copy N if long int is 64 bits
+ fclass.m p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm
+(p8) sxt4 GR_N_as_int = r33 // Sign extend N if long int is 32 bits
+}
+{ .mfi
+ mov GR_NBig = -35000 // If N this small then certain underflow
+ nop.f 0
+ mov GR_max_exp = 0x1007e // Exponent of maximum float
+}
+;;
+
+// Create biased exponent for 2**N
+{ .mfi
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+ nop.f 0
+ cmp.ge p7, p0 = GR_N_as_int, GR_Big // Certain overflow?
+}
+{ .mib
+ cmp.le p8, p0 = GR_N_as_int, GR_NBig // Certain underflow?
+ mov GR_min_exp = 0x0ff81 // Exponent of minimum float
+(p9) br.cond.spnt SCALBNF_UNORM // Branch if x=unorm
+}
+;;
+
+SCALBNF_COMMON:
+// Main path continues. Also return here from x=unorm path.
+// Create 2**N
+.pred.rel "mutex",p7,p8
+{ .mfi
+ setf.exp FR_Two_N = GR_N_Biased
+ nop.f 0
+(p7) mov GR_N_as_int = GR_Big // Limit max N
+}
+{ .mfi
+(p8) mov GR_N_as_int = GR_NBig // Limit min N
+ nop.f 0
+(p8) cmp.eq p7,p0 = r0,r0 // Set p7 if |N| big
}
;;
//
-// Normalize x
-// Branch and return special values.
-// Create -35000
-// Create 35000
+// Create biased exponent for 2**N for N big
+// Is N zero?
//
{ .mfi
- addl GR_Big = 35000,r0
+(p7) add GR_N_Biased = GR_Bias,GR_N_as_int
nop.f 0
- add GR_N_Biased = GR_Bias,GR_N_as_int
+ cmp.eq.or p6,p0 = r33,r0
}
-{ .mfb
- addl GR_NBig = -35000,r0
-(p7) fma.s.s0 FR_Result = FR_Floating_X,f1, f0
-(p7) br.ret.spnt b0
-};;
+{ .mfi
+ mov GR_pos_ov_limit = 0x1007f // Exponent for positive overflow
+ nop.f 0
+ mov GR_exp_mask = 0x1ffff // Exponent mask
+}
+;;
//
-// Build the exponent Bias
-// Return x when N = 0
+// Create 2**N for N big
+// Return x when N = 0 or X = Nan, Inf, Zero
//
{ .mfi
- setf.exp FR_Two_N = GR_N_Biased
+(p7) setf.exp FR_Two_N = GR_N_Biased
nop.f 0
- addl GR_Scratch1 = 0x063BF,r0
+ mov GR_min_den_exp = 0x0ff81 - 23 // Exponent of min denorm float
}
{ .mfb
- addl GR_Scratch = 0x019C3F,r0
-(p6) fma.s.s0 FR_Result = FR_Floating_X,f1, f0
-(p6) br.ret.spnt b0
-};;
+ and GR_exp_X = GR_exp_mask, GR_signexp_X
+(p6) fma.s.s0 FR_Result = FR_Floating_X, f1, f0
+(p6) br.ret.spnt b0
+}
+;;
//
-// Create 2*big
-// Create 2**-big
-// Is N > 35000
-// Is N < -35000
// Raise Denormal operand flag with compare
-// Main path, create 2**N
+// Compute biased result exponent
//
{ .mfi
- setf.exp FR_NBig = GR_Scratch1
- nop.f 0
- cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big
-}
-{ .mfi
- setf.exp FR_Big = GR_Scratch
+ add GR_exp_Result = GR_exp_X, GR_N_as_int
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
- cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig
-};;
+ mov GR_neg_ov_limit = 0x3007f // Exponent for negative overflow
+}
+;;
//
-// Adjust 2**N if N was very small or very large
+// Do final operation
//
{ .mfi
- nop.m 0
-(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
- nop.i 0
+ cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
+ fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+ cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch = 0x000000000003007F
-};;
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p9) br.cond.spnt SCALBNF_UNDERFLOW // Branch if certain underflow
+}
+;;
+{ .mib
+(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
+(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
+(p7) br.ret.sptk b0 // Return from main path
+}
+;;
-{ .mfi
- nop.m 0
-(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
- nop.i 0
+{ .bbb
+(p6) br.cond.spnt SCALBNF_OVERFLOW // Branch if certain overflow
+(p8) br.cond.spnt SCALBNF_POSSIBLE_OVERFLOW // Branch if possible overflow
+(p9) br.cond.spnt SCALBNF_POSSIBLE_UNDERFLOW // Branch if possible underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch1= 0x000000000001007F
-};;
+;;
+
+// Here if possible underflow.
+// Resulting exponent: 0x0ff81-23 <= exp_Result < 0x0ff81
+SCALBNF_POSSIBLE_UNDERFLOW:
+//
+// Here if possible overflow.
+// Resulting exponent: 0x1007e = exp_Result
+SCALBNF_POSSIBLE_OVERFLOW:
-// Set up necessary status fields
+// Set up necessary status fields
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflows)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
- nop.m 999
+ nop.m 0
fsetc.s3 0x7F,0x41
- nop.i 999
+ nop.i 0
}
{ .mfi
- nop.m 999
+ nop.m 0
fsetc.s2 0x7F,0x42
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
-// Do final operation
+// Do final operation with s2 and s3
//
{ .mfi
- setf.exp FR_NBig = GR_Scratch
- fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
- nop.i 999
+ setf.exp FR_NBig = GR_neg_ov_limit
+ fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
}
{ .mfi
- nop.m 999
- fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-{ .mfi
- setf.exp FR_Big = GR_Scratch1
- fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
+ setf.exp FR_Big = GR_pos_ov_limit
+ fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
+}
+;;
// Check for overflow or underflow.
// Restore s3
// Restore s2
//
{ .mfi
- nop.m 0
+ nop.m 0
fsetc.s3 0x7F,0x40
- nop.i 999
+ nop.i 0
}
{ .mfi
- nop.m 0
+ nop.m 0
fsetc.s2 0x7F,0x40
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
// Is the result zero?
//
{ .mfi
- nop.m 999
- fclass.m.unc p6, p0 = FR_Result3, 0x007
- nop.i 999
-}
+ nop.m 0
+ fclass.m p6, p0 = FR_Result3, 0x007
+ nop.i 0
+}
{ .mfi
- addl GR_Tag = 205, r0
- fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
- nop.i 0
-};;
+ nop.m 0
+ fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
+ nop.i 0
+}
+;;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
- nop.m 999
+ nop.m 0
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
-(p6) addl GR_Tag = 206, r0
+ nop.m 0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt scalbnf_UNDERFLOW
-};;
+(p6) br.cond.spnt SCALBNF_UNDERFLOW
+}
+;;
//
// Branch out for overflow
//
-{ .mbb
- nop.m 0
-(p7) br.cond.spnt scalbnf_OVERFLOW
-(p9) br.cond.spnt scalbnf_OVERFLOW
-};;
+{ .bbb
+(p7) br.cond.spnt SCALBNF_OVERFLOW
+(p9) br.cond.spnt SCALBNF_OVERFLOW
+ br.ret.sptk b0 // Return from main path.
+}
+;;
-//
-// Return from main path.
-//
-{ .mfb
- nop.m 999
- nop.f 0
- br.ret.sptk b0;;
+// Here if result overflows
+SCALBNF_OVERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 205, r0 // Set error tag for overflow
+ br.cond.sptk __libm_error_region // Call error support for overflow
}
+;;
-GLOBAL_LIBM_END(__libm_scalblnf)
-__libm_error_region:
+// Here if result underflows
+SCALBNF_UNDERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 206, r0 // Set error tag for underflow
+ br.cond.sptk __libm_error_region // Call error support for underflow
+}
+;;
+
+// Here if x=unorm
+SCALBNF_UNORM:
+{ .mib
+ getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
+ nop.i 0
+ br.cond.sptk SCALBNF_COMMON // Return to main path
+}
+;;
-scalbnf_OVERFLOW:
-scalbnf_UNDERFLOW:
+
+GLOBAL_LIBM_END(__libm_scalblnf)
+LOCAL_LIBM_ENTRY(__libm_error_region)
//
// Get stack address of N
//
.prologue
{ .mfi
- add GR_Parameter_Y=-32,sp
+ add GR_Parameter_Y=-32,sp
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
+ mov GR_SAVE_PFS=ar.pfs
}
//
-// Adjust sp
+// Adjust sp
//
{ .mfi
.fframe 64
- add sp=-64,sp
+ add sp=-64,sp
nop.f 0
- mov GR_SAVE_GP=gp
+ mov GR_SAVE_GP=gp
};;
//
-// Store N on stack in correct position
+// Store N on stack in correct position
// Locate the address of x on stack
//
{ .mmi
- st8 [GR_Parameter_Y] = GR_N_as_int,16
- add GR_Parameter_X = 16,sp
+ st8 [GR_Parameter_Y] = GR_N_as_int,16
+ add GR_Parameter_X = 16,sp
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
+ mov GR_SAVE_B0=b0
};;
//
@@ -338,42 +406,42 @@ scalbnf_UNDERFLOW:
//
.body
{ .mib
- stfs [GR_Parameter_X] = FR_Norm_X
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ stfs [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0
}
{ .mib
- stfs [GR_Parameter_Y] = FR_Result
+ stfs [GR_Parameter_Y] = FR_Result
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support#
+ br.call.sptk b0=__libm_error_support#
};;
//
// Get location of result on stack
//
{ .mmi
+ add GR_Parameter_RESULT = 48,sp
nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
+ nop.i 0
};;
//
-// Get the new result
+// Get the new result
//
{ .mmi
- ldfs FR_Result = [GR_Parameter_RESULT]
+ ldfs FR_Result = [GR_Parameter_RESULT]
.restore sp
- add sp = 64,sp
- mov b0 = GR_SAVE_B0
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
};;
//
// Restore gp, ar.pfs and return
//
{ .mib
- mov gp = GR_SAVE_GP
- mov ar.pfs = GR_SAVE_PFS
- br.ret.sptk b0
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
};;
LOCAL_LIBM_END(__libm_error_region)
diff --git a/sysdeps/ia64/fpu/libm_sincos.S b/sysdeps/ia64/fpu/libm_sincos.S
index a3f4c72..3475b62 100644
--- a/sysdeps/ia64/fpu/libm_sincos.S
+++ b/sysdeps/ia64/fpu/libm_sincos.S
@@ -46,12 +46,13 @@
// 03/19/02 Added stack unwind around call to __libm_cis_large
// 09/05/02 Work range is widened by reduction strengthen (3 parts of Pi/16)
// 02/10/03 Reordered header: .section, .global, .proc, .align
-//
+// 08/08/03 Improved performance
+// 02/11/04 cis is moved to the separate file.
+//
// API
//==============================================================
-// 1) double _Complex cis(double)
-// 2) void sincos(double, double*s, double*c)
-// 3) __libm_sincos - internal LIBM function, that accepts
+// 1) void sincos(double, double*s, double*c)
+// 2) __libm_sincos - internal LIBM function, that accepts
// argument in f8 and returns cosine through f8, sine through f9
//
// Overview of operation
@@ -65,12 +66,12 @@
// nfloat = Round result to integer (round-to-nearest)
//
// r = x - nfloat * pi/2^k
-// Do this as ((((x - nfloat * HIGH(pi/2^k))) -
-// nfloat * LOW(pi/2^k)) -
+// Do this as ((((x - nfloat * HIGH(pi/2^k))) -
+// nfloat * LOW(pi/2^k)) -
// nfloat * LOWEST(pi/2^k) for increased accuracy.
// pi/2^k is stored as two numbers that when added make pi/2^k.
// pi/2^k = HIGH(pi/2^k) + LOW(pi/2^k)
-// HIGH and LOW parts are rounded to zero values,
+// HIGH and LOW parts are rounded to zero values,
// and LOWEST is rounded to nearest one.
//
// x = (nfloat * pi/2^k) + r
@@ -166,15 +167,14 @@
// Registers used
//==============================================================
// general input registers:
-// r14 -> r19
-// r32 -> r49
+// r14 -> r39
// predicate registers used:
// p6 -> p14
-
+//
// floating-point registers used
// f9 -> f15
-// f32 -> f100
+// f32 -> f67
// Assembly macros
//==============================================================
@@ -246,38 +246,32 @@ cis_Q = f67
cis_pResSin = r33
cis_pResCos = r34
-cis_exp_limit = r35
-cis_r_signexp = r36
-cis_AD_beta_table = r37
-cis_r_sincos = r38
-
-cis_r_exp = r39
-cis_r_17_ones = r40
-
cis_GR_sig_inv_pi_by_16 = r14
cis_GR_rshf_2to61 = r15
cis_GR_rshf = r16
cis_GR_exp_2tom61 = r17
cis_GR_n = r18
-
cis_GR_n_sin = r19
-cis_GR_m_sin = r41
-cis_GR_32m_sin = r41
-
-cis_GR_n_cos = r42
-cis_GR_m_cos = r43
-cis_GR_32m_cos = r43
-
-cis_AD_2_sin = r44
-cis_AD_2_cos = r45
-
-cis_gr_tmp = r46
-GR_SAVE_B0 = r47
-GR_SAVE_GP = r48
-rB0_SAVED = r49
-GR_SAVE_PFS = r50
-GR_SAVE_PR = r51
-cis_AD_1 = r52
+cis_exp_limit = r20
+cis_r_signexp = r21
+cis_AD_1 = r22
+cis_r_sincos = r23
+cis_r_exp = r24
+cis_r_17_ones = r25
+cis_GR_m_sin = r26
+cis_GR_32m_sin = r26
+cis_GR_n_cos = r27
+cis_GR_m_cos = r28
+cis_GR_32m_cos = r28
+cis_AD_2_sin = r29
+cis_AD_2_cos = r30
+cis_gr_tmp = r31
+
+GR_SAVE_B0 = r35
+GR_SAVE_GP = r36
+rB0_SAVED = r37
+GR_SAVE_PFS = r38
+GR_SAVE_PR = r39
RODATA
@@ -408,14 +402,14 @@ LOCAL_OBJECT_END(double_sin_cos_beta_k4)
GLOBAL_IEEE754_ENTRY(sincos)
// cis_GR_sig_inv_pi_by_16 = significand of 16/pi
{ .mlx
- alloc GR_SAVE_PFS = ar.pfs, 0, 21, 0, 0
+ getf.exp cis_r_signexp = cis_Arg
movl cis_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A
-
+
}
// cis_GR_rshf_2to61 = 1.1000 2^(63+63-2)
{ .mlx
addl cis_AD_1 = @ltoff(double_cis_pi), gp
- movl cis_GR_rshf_2to61 = 0x47b8000000000000
+ movl cis_GR_rshf_2to61 = 0x47b8000000000000
};;
{ .mfi
@@ -430,12 +424,11 @@ GLOBAL_IEEE754_ENTRY(sincos)
br.cond.sptk _CIS_COMMON
};;
GLOBAL_IEEE754_END(sincos)
-LOCAL_LIBM_ENTRY(cis)
-LOCAL_LIBM_END(cis)
+
GLOBAL_LIBM_ENTRY(__libm_sincos)
// cis_GR_sig_inv_pi_by_16 = significand of 16/pi
{ .mlx
- alloc GR_SAVE_PFS = ar.pfs,0,21,0,0
+ getf.exp cis_r_signexp = cis_Arg
movl cis_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A
}
// cis_GR_rshf_2to61 = 1.1000 2^(63+63-2)
@@ -443,11 +436,12 @@ GLOBAL_LIBM_ENTRY(__libm_sincos)
addl cis_AD_1 = @ltoff(double_cis_pi), gp
movl cis_GR_rshf_2to61 = 0x47b8000000000000
};;
+
// p14 set for __libm_sincos and cis
{ .mfi
ld8 cis_AD_1 = [cis_AD_1]
fnorm.s1 cis_NORM_f8 = cis_Arg
- cmp.eq p14, p13 = r0, r0
+ cmp.eq p14, p13 = r0, r0
}
// cis_GR_exp_2tom61 = exponent of scaling factor 2^-61
{ .mib
@@ -476,10 +470,15 @@ _CIS_COMMON:
// 2^-61 for scaling Nfloat
// 0x1001a is register_bias + 27.
// So if f8 >= 2^27, go to large arguments routine
-{ .mmi
- getf.exp cis_r_signexp = cis_Arg
- setf.exp cis_2TOM61 = cis_GR_exp_2tom61
+{ .mfi
+ alloc GR_SAVE_PFS = ar.pfs, 3, 5, 0, 0
+ fclass.m p11,p0 = cis_Arg, 0x0b // Test for x=unorm
mov cis_exp_limit = 0x1001a
+}
+{ .mib
+ setf.exp cis_2TOM61 = cis_GR_exp_2tom61
+ nop.i 0
+(p6) br.cond.spnt _CIS_SPECIAL_ARGS
};;
// Load the two pieces of pi/16
@@ -488,9 +487,11 @@ _CIS_COMMON:
{ .mmb
ldfe cis_Pi_by_16_hi = [cis_AD_1],16
setf.d cis_RSHF = cis_GR_rshf
-(p6) br.cond.spnt _CIS_SPECIAL_ARGS
+(p11) br.cond.spnt _CIS_UNORM // Branch if x=unorm
};;
+_CIS_COMMON2:
+// Return here if x=unorm
// Create constant inexact set
{ .mmi
ldfe cis_Pi_by_16_lo = [cis_AD_1],16
@@ -498,23 +499,18 @@ _CIS_COMMON:
nop.i 0
};;
+// Select exponent (17 lsb)
{ .mfi
ldfe cis_Pi_by_16_lowest = [cis_AD_1],16
nop.f 0
- nop.i 0
-};;
-
-// Start loading P, Q coefficients
-{ .mib
- ldfpd cis_P4,cis_Q4 = [cis_AD_1],16
dep.z cis_r_exp = cis_r_signexp, 0, 17
- nop.b 0
};;
+// Start loading P, Q coefficients
// p10 is true if we must call routines to handle larger arguments
// p10 is true if f8 exp is > 0x1001a
{ .mmb
- ldfpd cis_P3,cis_Q3 = [cis_AD_1],16
+ ldfpd cis_P4,cis_Q4 = [cis_AD_1],16
cmp.ge p10, p0 = cis_r_exp, cis_exp_limit
(p10) br.cond.spnt _CIS_LARGE_ARGS // go to |x| >= 2^27 path
};;
@@ -523,39 +519,33 @@ _CIS_COMMON:
// Multiply x by scaled 16/pi and add large const to shift integer part of W to
// rightmost bits of significand
{ .mfi
- ldfpd cis_P2,cis_Q2 = [cis_AD_1],16
+ ldfpd cis_P3,cis_Q3 = [cis_AD_1],16
fma.s1 cis_W_2TO61_RSH = cis_NORM_f8,cis_SIG_INV_PI_BY_16_2TO61,cis_RSHF_2TO61
nop.i 0
};;
+// get N = (int)cis_int_Nfloat
// cis_NFLOAT = Round_Int_Nearest(cis_W)
-{ .mfi
- ldfpd cis_P1,cis_Q1 = [cis_AD_1], 16
+{ .mmf
+ getf.sig cis_GR_n = cis_W_2TO61_RSH
+ ldfpd cis_P2,cis_Q2 = [cis_AD_1],16
fms.s1 cis_NFLOAT = cis_W_2TO61_RSH,cis_2TOM61,cis_RSHF
- nop.i 0
-};;
-
-// get N = (int)cis_int_Nfloat
-{ .mfi
- getf.sig cis_GR_n = cis_W_2TO61_RSH
- nop.f 0
- nop.i 0
};;
-// Add 2^(k-1) (which is in cis_r_sincos) to N
// cis_r = -cis_Nfloat * cis_Pi_by_16_hi + x
-// cis_r = cis_r -cis_Nfloat * cis_Pi_by_16_lo
{ .mfi
- add cis_GR_n_cos = 0x8, cis_GR_n
+ ldfpd cis_P1,cis_Q1 = [cis_AD_1], 16
fnma.s1 cis_r = cis_NFLOAT,cis_Pi_by_16_hi,cis_NORM_f8
nop.i 0
};;
-//Get M (least k+1 bits of N)
+// Add 2^(k-1) (which is in cis_r_sincos) to N
{ .mmi
+ add cis_GR_n_cos = 0x8, cis_GR_n
+;;
+//Get M (least k+1 bits of N)
and cis_GR_m_sin = 0x1f,cis_GR_n
and cis_GR_m_cos = 0x1f,cis_GR_n_cos
- nop.i 0
};;
{ .mmi
@@ -565,9 +555,10 @@ _CIS_COMMON:
};;
// Add 32*M to address of sin_cos_beta table
-{ .mmi
+// cis_r = cis_r -cis_Nfloat * cis_Pi_by_16_lo
+{ .mfi
add cis_AD_2_sin = cis_GR_32m_sin, cis_AD_1
- nop.m 0
+ fnma.s1 cis_r = cis_NFLOAT, cis_Pi_by_16_lo, cis_r
shl cis_GR_32m_cos = cis_GR_m_cos,5
};;
@@ -580,7 +571,6 @@ _CIS_COMMON:
{ .mfi
ldfe cis_Sm_cos = [cis_AD_2_cos], 16
- fnma.s1 cis_r = cis_NFLOAT, cis_Pi_by_16_lo, cis_r
nop.i 0
};;
@@ -604,7 +594,7 @@ _CIS_COMMON:
{ .mfi
ldfe cis_Cm_cos = [cis_AD_2_cos]
- fma.s1 cis_P_temp1 = cis_rsq, cis_P4, cis_P3
+ fma.s1 cis_P_temp1 = cis_rsq, cis_P4, cis_P3
nop.i 0
}
@@ -638,18 +628,18 @@ _CIS_COMMON:
{ .mfi
nop.m 0
- fma.s1 cis_Q = cis_rsq, cis_Q_temp2, cis_Q1
+ fmpy.s1 cis_rcub = cis_r_exact, cis_rsq // get r^3
nop.i 0
-}
+};;
+
{ .mfi
nop.m 0
- fma.s1 cis_P = cis_rsq, cis_P_temp2, cis_P1
+ fma.s1 cis_Q = cis_rsq, cis_Q_temp2, cis_Q1
nop.i 0
-};;
-
+}
{ .mfi
nop.m 0
- fmpy.s1 cis_rcub = cis_r_exact, cis_rsq // get r^3
+ fma.s1 cis_P = cis_rsq, cis_P_temp2, cis_P1
nop.i 0
};;
@@ -717,7 +707,17 @@ _CIS_SPECIAL_ARGS:
stfd [cis_pResCos] = cis_Cos_res
br.ret.sptk b0 // common exit for sincos main path
};;
+
+_CIS_UNORM:
+// Here if x=unorm
+{ .mfb
+ getf.exp cis_r_signexp = cis_NORM_f8 // Get signexp of x
+ fcmp.eq.s0 p11,p0 = cis_Arg, f0 // Dummy op to set denorm
+ br.cond.sptk _CIS_COMMON2 // Return to main path
+};;
+
GLOBAL_LIBM_END(__libm_sincos)
+
//// |x| > 2^27 path ///////
.proc _CIS_LARGE_ARGS
_CIS_LARGE_ARGS:
diff --git a/sysdeps/ia64/fpu/libm_sincos_large.S b/sysdeps/ia64/fpu/libm_sincos_large.S
index 42cf094..b09d369 100644
--- a/sysdeps/ia64/fpu/libm_sincos_large.S
+++ b/sysdeps/ia64/fpu/libm_sincos_large.S
@@ -792,6 +792,7 @@ GLOBAL_LIBM_END(__libm_sincos_large)
+
GLOBAL_LIBM_ENTRY(__libm_sin_large)
{ .mlx
@@ -821,6 +822,7 @@ alloc GR_Table_Base = ar.pfs,0,12,2,0
}
GLOBAL_LIBM_END(__libm_sin_large)
+
GLOBAL_LIBM_ENTRY(__libm_cos_large)
{ .mlx
@@ -2673,6 +2675,7 @@ SINCOS_SPECIAL:
}
GLOBAL_LIBM_END(__libm_cos_large)
+
// *******************************************************************
// *******************************************************************
// *******************************************************************
diff --git a/sysdeps/ia64/fpu/libm_sincosf.S b/sysdeps/ia64/fpu/libm_sincosf.S
index c4783ac..fb12007 100644
--- a/sysdeps/ia64/fpu/libm_sincosf.S
+++ b/sysdeps/ia64/fpu/libm_sincosf.S
@@ -47,12 +47,12 @@
// 03/19/02 Added stack unwind around call to __libm_cisf_large
// 09/05/02 Work range is widened by reduction strengthen (2 parts of Pi/16)
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 02/11/04 cisf is moved to the separate file.
// API
//==============================================================
-// 1) float _Complex cisf(float)
-// 2) void sincosf(float, float*s, float*c)
-// 3) __libm_sincosf - internal LIBM function, that accepts
+// 1) void sincosf(float, float*s, float*c)
+// 2) __libm_sincosf - internal LIBM function, that accepts
// argument in f8 and returns cosine through f8, sine through f9
//
@@ -400,7 +400,7 @@ GLOBAL_IEEE754_ENTRY(sincosf)
{ .mlx
alloc GR_SAVE_PFS = ar.pfs, 0, 21, 0, 0
movl cisf_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // 16/pi signd
-
+
}
// cis_GR_rshf_2to61 = 1.1000 2^(63+63-2)
{ .mlx
@@ -420,8 +420,7 @@ GLOBAL_IEEE754_ENTRY(sincosf)
br.cond.sptk _CISF_COMMON
};;
GLOBAL_IEEE754_END(sincosf)
-LOCAL_LIBM_ENTRY(cisf)
-LOCAL_LIBM_END(cisf)
+
GLOBAL_LIBM_ENTRY(__libm_sincosf)
{ .mlx
// cisf_GR_sig_inv_pi_by_16 = significand of 16/pi
@@ -438,7 +437,7 @@ GLOBAL_LIBM_ENTRY(__libm_sincosf)
{ .mfi
ld8 cisf_AD_1 = [cisf_AD_1]
fnorm.s1 cisf_NORM_f8 = cisf_Arg
- cmp.eq p14, p13 = r0, r0
+ cmp.eq p14, p13 = r0, r0
}
// cisf_GR_exp_2tom61 = exponent of scaling factor 2^-61
{ .mib
@@ -499,7 +498,7 @@ _CISF_COMMON:
// p10 is true if f8 exp is >= 0x10017
{ .mmb
ldfpd cisf_P1,cisf_Q1 = [cisf_AD_1], 16
- cmp.ge p10, p0 = cisf_r_exp, cisf_exp_limit
+ cmp.ge p10, p0 = cisf_r_exp, cisf_exp_limit
(p10) br.cond.spnt _CISF_LARGE_ARGS // go to |x| >= 2^24 path
};;
@@ -521,7 +520,7 @@ _CISF_COMMON:
// N = (int)cisf_int_Nfloat
{ .mfi
- getf.sig cisf_GR_n = cisf_W_2TO61_RSH
+ getf.sig cisf_GR_n = cisf_W_2TO61_RSH
nop.f 0
nop.i 0
};;
@@ -537,7 +536,7 @@ _CISF_COMMON:
//Get M (least k+1 bits of N)
{ .mmi
- and cisf_GR_m_sin = 0x1f,cisf_GR_n
+ and cisf_GR_m_sin = 0x1f,cisf_GR_n
and cisf_GR_m_cos = 0x1f,cisf_GR_n_cos
nop.i 0
};;
@@ -552,7 +551,7 @@ _CISF_COMMON:
{ .mmf
ldfpd cisf_Sm_sin, cisf_Cm_sin = [cisf_AD_2_sin]
ldfpd cisf_Sm_cos, cisf_Cm_cos = [cisf_AD_2_cos]
- fclass.m.unc p10,p0 = cisf_Arg,0x0b
+ fclass.m.unc p10,p0 = cisf_Arg,0x0b
};;
{ .mfi
@@ -679,6 +678,7 @@ _CISF_RETURN:
br.ret.sptk b0 // exit for sincos
};;
GLOBAL_LIBM_END(__libm_sincosf)
+
//// |x| > 2^24 path ///////
.proc _CISF_LARGE_ARGS
_CISF_LARGE_ARGS:
@@ -728,7 +728,7 @@ _CISF_LARGE_ARGS:
{ .mfb
nop.m 0
fma.s.s0 cisf_Sin_res = cisf_Sin_res, f1, f0
-(p14) br.cond.sptk _CISF_RETURN
+(p14) br.cond.sptk _CISF_RETURN
};;
{ .mmb
diff --git a/sysdeps/ia64/fpu/libm_sincosl.S b/sysdeps/ia64/fpu/libm_sincosl.S
index 2a03a23..1d89ff4 100644
--- a/sysdeps/ia64/fpu/libm_sincosl.S
+++ b/sysdeps/ia64/fpu/libm_sincosl.S
@@ -1,7 +1,7 @@
-.file "libm_sincosl.asm"
+.file "libm_sincosl.s"
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@@ -43,6 +43,9 @@
// 05/13/02 Initial version of sincosl (based on libm's sinl and cosl)
// 02/10/03 Reordered header: .section, .global, .proc, .align;
// used data8 for long double table values
+// 10/13/03 Corrected .file name
+// 02/11/04 cisl is moved to the separate file.
+// 10/26/04 Avoided using r14-31 as scratch so not clobbered by dynamic loader
//
//*********************************************************************
//
@@ -50,9 +53,8 @@
//
// API's
//==============================================================
-// 1) long double _Complex cisl(long double)
-// 2) void sincosl(long double, long double*s, long double*c)
-// 3) __libm_sincosl - internal LIBM function, that accepts
+// 1) void sincosl(long double, long double*s, long double*c)
+// 2) __libm_sincosl - internal LIBM function, that accepts
// argument in f8 and returns cosine through f8, sine through f9
//
//
@@ -65,7 +67,7 @@
// f32-f121
//
// General Purpose Registers:
-// r32-r47
+// r32-r61
//
// Predicate Registers: p6-p15
//
@@ -775,20 +777,6 @@ FR_Tmp = f94
sincos_pResSin = r34
sincos_pResCos = r35
-GR_sig_inv_pi = r14
-GR_rshf_2to64 = r15
-GR_exp_2tom64 = r16
-GR_rshf = r17
-GR_ad_p = r18
-GR_ad_d = r19
-GR_ad_pp = r20
-GR_ad_qq = r21
-GR_ad_c = r22
-GR_ad_s = r23
-GR_ad_ce = r24
-GR_ad_se = r25
-GR_ad_m14 = r26
-GR_ad_s1 = r27
GR_exp_m2_to_m3= r36
GR_N_Inc = r37
GR_Cis = r38
@@ -803,6 +791,20 @@ GR_N_SignS = r45
GR_N_SignC = r46
GR_N_SinCos = r47
+GR_sig_inv_pi = r48
+GR_rshf_2to64 = r49
+GR_exp_2tom64 = r50
+GR_rshf = r51
+GR_ad_p = r52
+GR_ad_d = r53
+GR_ad_pp = r54
+GR_ad_qq = r55
+GR_ad_c = r56
+GR_ad_s = r57
+GR_ad_ce = r58
+GR_ad_se = r59
+GR_ad_m14 = r60
+GR_ad_s1 = r61
// For unwind support
GR_SAVE_B0 = r39
@@ -814,7 +816,7 @@ GR_SAVE_PFS = r41
GLOBAL_IEEE754_ENTRY(sincosl)
{ .mlx ///////////////////////////// 1 /////////////////
- alloc r32 = ar.pfs,3,13,2,0
+ alloc r32 = ar.pfs,3,27,2,0
movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
}
{ .mlx
@@ -834,11 +836,9 @@ GLOBAL_IEEE754_ENTRY(sincosl)
};;
GLOBAL_IEEE754_END(sincosl)
-LOCAL_LIBM_ENTRY(cisl)
-LOCAL_LIBM_END(cisl)
GLOBAL_LIBM_ENTRY(__libm_sincosl)
{ .mlx ///////////////////////////// 1 /////////////////
- alloc r32 = ar.pfs,3,14,2,0
+ alloc r32 = ar.pfs,3,27,2,0
movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
}
{ .mlx
@@ -2447,6 +2447,7 @@ SINCOSL_SPECIAL:
GLOBAL_LIBM_END(__libm_sincosl)
+
// *******************************************************************
// *******************************************************************
// *******************************************************************
@@ -2461,7 +2462,7 @@ GLOBAL_LIBM_END(__libm_sincosl)
// c is in f9
// N is in r8
// Be sure to allocate at least 2 GP registers as output registers for
-// __libm_pi_by_2_reduce. This routine uses r49-50. These are used as
+// __libm_pi_by_2_reduce. This routine uses r62-63. These are used as
// scratch registers within the __libm_pi_by_2_reduce routine (for speed).
//
// We know also that __libm_pi_by_2_reduce preserves f10-15, f71-127. We
diff --git a/sysdeps/ia64/fpu/libm_support.h b/sysdeps/ia64/fpu/libm_support.h
index 50dac33..dc9c0a2 100644
--- a/sysdeps/ia64/fpu/libm_support.h
+++ b/sysdeps/ia64/fpu/libm_support.h
@@ -1,7 +1,8 @@
/* file: libm_support.h */
-// Copyright (c) 2000 - 2002, Intel Corporation
+/*
+// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@@ -21,13 +22,14 @@
// products derived from this software without specific prior written
// permission.
+//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@@ -38,17 +40,17 @@
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
-// History: 02/02/2000 Initial version
+// History: 02/02/2000 Initial version
// 2/28/2000 added tags for logb and nextafter
// 3/22/2000 Changes to support _LIB_VERSIONIMF variable
-// and filled some enum gaps. Added support for C99.
+// and filled some enum gaps. Added support for C99.
// 5/31/2000 added prototypes for __libm_frexp_4l/8l
// 8/10/2000 Changed declaration of _LIB_VERSIONIMF to work for library
// builds and other application builds (precompiler directives).
// 8/11/2000 Added pointers-to-matherr-functions declarations to allow
// for user-defined matherr functions in the dll build.
// 12/07/2000 Added scalbn error_types values.
-// 5/01/2001 Added error_types values for C99 nearest integer
+// 5/01/2001 Added error_types values for C99 nearest integer
// functions.
// 6/07/2001 Added error_types values for fdim.
// 6/18/2001 Added include of complex_support.h.
@@ -65,232 +67,142 @@
// 06/27/2002 Added error_types for sinhcosh.
// 12/05/2002 Added error_types for annuity and compound
// 04/10/2003 Added error_types for tgammal/tgamma/tgammaf
+// 05/16/2003 FP-treatment macros copied here from IA32 libm_support.h
+// 06/02/2003 Added pad into struct fp80 (12/16 bytes).
+// 08/01/2003 Added struct ker80 and macros for multiprecision addition,
+// subtraction, multiplication, division, square root.
+// 08/07/2003 History section updated.
+// 09/03/2003 ALIGN(n) macro added.
+// 10/01/2003 LDOUBLE_ALIGN and fp80 corrected on linux to 16 bytes.
+// 11/24/2004 Added ifdef around definitions of INT32/64
+// 12/15/2004 Added error_types for exp10, nextafter, nexttoward
+// underflow. Moved error codes into libm_error_codes.h.
//
+*/
-void __libm_sincos_pi4(double,double*,double*,int);
-void __libm_y0y1(double , double *, double *);
-void __libm_j0j1(double , double *, double *);
-double __libm_j0(double);
-double __libm_j1(double);
-double __libm_jn(int,double);
-double __libm_y0(double);
-double __libm_y1(double);
-double __libm_yn(int,double);
-double __libm_copysign (double, double);
-float __libm_copysignf (float, float);
-long double __libm_copysignl (long double, long double);
-
-extern double sqrt(double);
-extern double fabs(double);
-extern double log(double);
-extern double log1p(double);
-extern double sqrt(double);
-extern double sin(double);
-extern double exp(double);
-extern double modf(double, double *);
-extern double asinh(double);
-extern double acosh(double);
-extern double atanh(double);
-extern double tanh(double);
-extern double erf(double);
-extern double erfc(double);
-extern double j0(double);
-extern double j1(double);
-extern double jn(int, double);
-extern double y0(double);
-extern double y1(double);
-extern double yn(int, double);
-
-extern float fabsf(float);
-extern float asinhf(float);
-extern float acoshf(float);
-extern float atanhf(float);
-extern float tanhf(float);
-extern float erff(float);
-extern float erfcf(float);
-extern float j0f(float);
-extern float j1f(float);
-extern float jnf(int, float);
-extern float y0f(float);
-extern float y1f(float);
-extern float ynf(int, float);
-
-extern long double log1pl(long double);
-extern long double logl(long double);
-extern long double sqrtl(long double);
-extern long double expl(long double);
-extern long double fabsl(long double);
+#ifndef __LIBM_SUPPORT_H_INCLUDED__
+#define __LIBM_SUPPORT_H_INCLUDED__
-#if !(defined(SIZE_INT_32) || defined(SIZE_INT_64))
-#error integer size not established; define SIZE_INT_32 or SIZE_INT_64
+#ifndef _LIBC
+#if !(defined(_WIN32) || defined(_WIN64))
+# pragma const_seg(".rodata") /* place constant data in text (code) section */
#endif
-#if (defined(SIZE_INT_32) && defined(SIZE_INT_64))
-#error multiple integer size definitions; define SIZE_INT_32 or SIZE_INT_64
+#if defined(__ICC) || defined(__ICL) || defined(__ECC) || defined(__ECL)
+# pragma warning( disable : 1682 ) /* #1682: ixplicit conversion of a 64-bit integral type to a smaller integral type (potential portability problem) */
+# pragma warning( disable : 1683 ) /* #1683: explicit conversion of a 64-bit integral type to a smaller integral type (potential portability problem) */
#endif
-
-#if !(defined(SIZE_LONG_INT_32) || defined(SIZE_LONG_INT_64))
-#error long int size not established; define SIZE_LONG_INT_32 or SIZE_LONG_INT_64
#endif
-#if (defined(SIZE_LONG_INT_32) && defined(SIZE_LONG_INT_64))
-#error multiple long int size definitions; define SIZE_LONG_INT_32 or SIZE_LONG_INT_64
+/* macros to form a double value in hex representation (unsigned int type) */
+
+#define DOUBLE_HEX(hi,lo) 0x##lo,0x##hi /*LITTLE_ENDIAN*/
+
+#include "libm_cpu_defs.h"
+
+#if !(defined (IA64))
+# include "libm_dll.h"
+# include "libm_dispatch.h"
#endif
-#if !(defined(SIZE_LONG_LONG_INT_32) || defined(SIZE_LONG_LONG_INT_64))
-#error long long int size not established; define SIZE_LONG_LONG_INT_32 or SIZE_LONG_LONG_INT_64
+#include "libm_error_codes.h"
+
+struct exceptionf
+{
+ int type;
+ char *name;
+ float arg1, arg2, retval;
+};
+
+# ifdef __cplusplus
+struct __exception
+{
+ int type;
+ char *name;
+ double arg1, arg2, retval;
+};
+# else
+
+# ifndef _LIBC
+struct exception
+{
+ int type;
+ char *name;
+ double arg1, arg2, retval;
+};
+# endif
+# endif
+
+struct exceptionl
+{
+ int type;
+ char *name;
+ long double arg1, arg2, retval;
+};
+
+#if (defined (_MS_) && defined (IA64))
+#define MATHERR_F _matherrf
+#define MATHERR_D _matherr
+#else
+#define MATHERR_F matherrf
+#define MATHERR_D matherr
#endif
-#if (defined(SIZE_LONG_LONG_INT_32) && defined(SIZE_LONG_LONG_INT_64))
-#error multiple long long int size definitions; define SIZE_LONG_LONG_INT_32 or SIZE_LONG_LONG_INT_64
+# ifdef __cplusplus
+#define EXC_DECL_D __exception
+#else
+// exception is a reserved name in C++
+#define EXC_DECL_D exception
#endif
+extern int MATHERR_F(struct exceptionf*);
+extern int MATHERR_D(struct EXC_DECL_D*);
+extern int matherrl(struct exceptionl*);
+
+#ifndef _LIBC
+// Add code to support _LIB_VERSIONIMF
typedef enum
{
- logl_zero=0, logl_negative, /* 0, 1 */
- log_zero, log_negative, /* 2, 3 */
- logf_zero, logf_negative, /* 4, 5 */
- log10l_zero, log10l_negative, /* 6, 7 */
- log10_zero, log10_negative, /* 8, 9 */
- log10f_zero, log10f_negative, /* 10, 11 */
- expl_overflow, expl_underflow, /* 12, 13 */
- exp_overflow, exp_underflow, /* 14, 15 */
- expf_overflow, expf_underflow, /* 16, 17 */
- powl_overflow, powl_underflow, /* 18, 19 */
- powl_zero_to_zero, /* 20 */
- powl_zero_to_negative, /* 21 */
- powl_neg_to_non_integer, /* 22 */
- powl_nan_to_zero, /* 23 */
- pow_overflow, pow_underflow, /* 24, 25 */
- pow_zero_to_zero, /* 26 */
- pow_zero_to_negative, /* 27 */
- pow_neg_to_non_integer, /* 28 */
- pow_nan_to_zero, /* 29 */
- powf_overflow, powf_underflow, /* 30, 31 */
- powf_zero_to_zero, /* 32 */
- powf_zero_to_negative, /* 33 */
- powf_neg_to_non_integer, /* 34 */
- powf_nan_to_zero, /* 35 */
- atan2l_zero, /* 36 */
- atan2_zero, /* 37 */
- atan2f_zero, /* 38 */
- expm1l_overflow, /* 39 */
- expm1l_underflow, /* 40 */
- expm1_overflow, /* 41 */
- expm1_underflow, /* 42 */
- expm1f_overflow, /* 43 */
- expm1f_underflow, /* 44 */
- hypotl_overflow, /* 45 */
- hypot_overflow, /* 46 */
- hypotf_overflow, /* 47 */
- sqrtl_negative, /* 48 */
- sqrt_negative, /* 49 */
- sqrtf_negative, /* 50 */
- scalbl_overflow, scalbl_underflow, /* 51, 52 */
- scalb_overflow, scalb_underflow, /* 53, 54 */
- scalbf_overflow, scalbf_underflow, /* 55, 56 */
- acosl_gt_one, acos_gt_one, acosf_gt_one, /* 57, 58, 59 */
- asinl_gt_one, asin_gt_one, asinf_gt_one, /* 60, 61, 62 */
- coshl_overflow, cosh_overflow, coshf_overflow, /* 63, 64, 65 */
- y0l_zero, y0l_negative,y0l_gt_loss, /* 66, 67, 68 */
- y0_zero, y0_negative,y0_gt_loss, /* 69, 70, 71 */
- y0f_zero, y0f_negative,y0f_gt_loss, /* 72, 73, 74 */
- y1l_zero, y1l_negative,y1l_gt_loss, /* 75, 76, 77 */
- y1_zero, y1_negative,y1_gt_loss, /* 78, 79, 80 */
- y1f_zero, y1f_negative,y1f_gt_loss, /* 81, 82, 83 */
- ynl_zero, ynl_negative,ynl_gt_loss, /* 84, 85, 86 */
- yn_zero, yn_negative,yn_gt_loss, /* 87, 88, 89 */
- ynf_zero, ynf_negative,ynf_gt_loss, /* 90, 91, 92 */
- j0l_gt_loss, /* 93 */
- j0_gt_loss, /* 94 */
- j0f_gt_loss, /* 95 */
- j1l_gt_loss, /* 96 */
- j1_gt_loss, /* 97 */
- j1f_gt_loss, /* 98 */
- jnl_gt_loss, /* 99 */
- jn_gt_loss, /* 100 */
- jnf_gt_loss, /* 101 */
- lgammal_overflow, lgammal_negative,lgammal_reserve, /* 102, 103, 104 */
- lgamma_overflow, lgamma_negative,lgamma_reserve, /* 105, 106, 107 */
- lgammaf_overflow, lgammaf_negative, lgammaf_reserve,/* 108, 109, 110 */
- gammal_overflow,gammal_negative, gammal_reserve, /* 111, 112, 113 */
- gamma_overflow, gamma_negative, gamma_reserve, /* 114, 115, 116 */
- gammaf_overflow,gammaf_negative,gammaf_reserve, /* 117, 118, 119 */
- fmodl_by_zero, /* 120 */
- fmod_by_zero, /* 121 */
- fmodf_by_zero, /* 122 */
- remainderl_by_zero, /* 123 */
- remainder_by_zero, /* 124 */
- remainderf_by_zero, /* 125 */
- sinhl_overflow, sinh_overflow, sinhf_overflow, /* 126, 127, 128 */
- atanhl_gt_one, atanhl_eq_one, /* 129, 130 */
- atanh_gt_one, atanh_eq_one, /* 131, 132 */
- atanhf_gt_one, atanhf_eq_one, /* 133, 134 */
- acoshl_lt_one, /* 135 */
- acosh_lt_one, /* 136 */
- acoshf_lt_one, /* 137 */
- log1pl_zero, log1pl_negative, /* 138, 139 */
- log1p_zero, log1p_negative, /* 140, 141 */
- log1pf_zero, log1pf_negative, /* 142, 143 */
- ldexpl_overflow, ldexpl_underflow, /* 144, 145 */
- ldexp_overflow, ldexp_underflow, /* 146, 147 */
- ldexpf_overflow, ldexpf_underflow, /* 148, 149 */
- logbl_zero, logb_zero, logbf_zero, /* 150, 151, 152 */
- nextafterl_overflow, nextafter_overflow,
- nextafterf_overflow, /* 153, 154, 155 */
- ilogbl_zero, ilogb_zero, ilogbf_zero, /* 156, 157, 158 */
- exp2l_overflow, exp2l_underflow, /* 159, 160 */
- exp2_overflow, exp2_underflow, /* 161, 162 */
- exp2f_overflow, exp2f_underflow, /* 163, 164 */
- exp10l_overflow, exp10_overflow,
- exp10f_overflow, /* 165, 166, 167 */
- log2l_zero, log2l_negative, /* 168, 169 */
- log2_zero, log2_negative, /* 170, 171 */
- log2f_zero, log2f_negative, /* 172, 173 */
- scalbnl_overflow, scalbnl_underflow, /* 174, 175 */
- scalbn_overflow, scalbn_underflow, /* 176, 177 */
- scalbnf_overflow, scalbnf_underflow, /* 178, 179 */
- remquol_by_zero, /* 180 */
- remquo_by_zero, /* 181 */
- remquof_by_zero, /* 182 */
- lrintl_large, lrint_large, lrintf_large, /* 183, 184, 185 */
- llrintl_large, llrint_large, llrintf_large, /* 186, 187, 188 */
- lroundl_large, lround_large, lroundf_large, /* 189, 190, 191 */
- llroundl_large, llround_large, llroundf_large, /* 192, 193, 194 */
- fdiml_overflow, fdim_overflow, fdimf_overflow, /* 195, 196, 197 */
- nexttowardl_overflow, nexttoward_overflow,
- nexttowardf_overflow, /* 198, 199, 200 */
- scalblnl_overflow, scalblnl_underflow, /* 201, 202 */
- scalbln_overflow, scalbln_underflow, /* 203, 204 */
- scalblnf_overflow, scalblnf_underflow, /* 205, 206 */
- erfcl_underflow, erfc_underflow, erfcf_underflow, /* 207, 208, 209 */
- acosdl_gt_one, acosd_gt_one, acosdf_gt_one, /* 210, 211, 212 */
- asindl_gt_one, asind_gt_one, asindf_gt_one, /* 213, 214, 215 */
- atan2dl_zero, atan2d_zero, atan2df_zero, /* 216, 217, 218 */
- tandl_overflow, tand_overflow, tandf_overflow, /* 219, 220, 221 */
- cotdl_overflow, cotd_overflow, cotdf_overflow, /* 222, 223, 224 */
- cotl_overflow, cot_overflow, cotf_overflow, /* 225, 226, 227 */
- sinhcoshl_overflow, sinhcosh_overflow, sinhcoshf_overflow, /* 228, 229, 230 */
- annuityl_by_zero, annuity_by_zero, annuityf_by_zero, /* 231, 232, 233 */
- annuityl_less_m1, annuity_less_m1, annuityf_less_m1, /* 234, 235, 236 */
- annuityl_overflow, annuity_overflow, annuityf_overflow, /* 237, 238, 239 */
- annuityl_underflow, annuity_underflow, annuityf_underflow, /* 240, 241, 242 */
- compoundl_by_zero, compound_by_zero, compoundf_by_zero, /* 243, 244, 245 */
- compoundl_less_m1, compound_less_m1, compoundf_less_m1, /* 246, 247, 248 */
- compoundl_overflow, compound_overflow, compoundf_overflow, /* 249, 250, 251 */
- compoundl_underflow, compound_underflow, compoundf_underflow, /* 252, 253, 254 */
- tgammal_overflow, tgammal_negative, tgammal_reserve, /* 255, 256, 257 */
- tgamma_overflow, tgamma_negative, tgamma_reserve, /* 258, 259, 260 */
- tgammaf_overflow, tgammaf_negative, tgammaf_reserve, /* 261, 262, 263 */
-} error_types;
-
-void __libm_error_support(void*,void*,void*,error_types);
-#ifdef _LIBC
-libc_hidden_proto(__libm_error_support)
+ _IEEE_ = -1, // IEEE-like behavior
+ _SVID_, // SysV, Rel. 4 behavior
+ _XOPEN_, // Unix98
+ _POSIX_, // Posix
+ _ISOC_ // ISO C9X
+} _LIB_VERSION_TYPE;
#endif
-#define HI_SIGNIFICAND_LESS(X, HI) ((X)->hi_significand < 0x ## HI)
-#define f64abs(x) ((x) < 0.0 ? -(x) : (x))
+// This is a run-time variable and may affect
+// floating point behavior of the libm functions
+
+#if !defined( LIBM_BUILD )
+#if defined( _DLL )
+extern _LIB_VERSION_TYPE __declspec(dllimport) _LIB_VERSIONIMF;
+#else
+extern _LIB_VERSION_TYPE _LIB_VERSIONIMF;
+#endif /* _DLL */
+#else
+extern int (*pmatherrf)(struct exceptionf*);
+extern int (*pmatherr)(struct EXC_DECL_D*);
+extern int (*pmatherrl)(struct exceptionl*);
+#endif /* LIBM_BUILD */
+
+/* memory format definitions (LITTLE_ENDIAN only) */
+
+#if !(defined(SIZE_INT_32) || defined(SIZE_INT_64))
+# error "You need to define SIZE_INT_32 or SIZE_INT_64"
+#endif
+
+#if (defined(SIZE_INT_32) && defined(SIZE_INT_64))
+#error multiple integer size definitions; define SIZE_INT_32 or SIZE_INT_64
+#endif
+
+#if !(defined(SIZE_LONG_32) || defined(SIZE_LONG_64))
+# error "You need to define SIZE_LONG_32 or SIZE_LONG_64"
+#endif
+
+#if (defined(SIZE_LONG_32) && defined(SIZE_LONG_64))
+#error multiple integer size definitions; define SIZE_LONG_32 or SIZE_LONG_64
+#endif
#if !defined(__USE_EXTERNAL_FPMEMTYP_H__)
@@ -342,22 +254,519 @@ struct fp80 { /*/ sign:1 exponent:15 significand:64 (NO implied bits) */
unsigned exponent:15;
unsigned sign:1;
#endif
+ unsigned pad:16;
+#if !(defined(__unix__) && defined(__i386__))
+ unsigned padwin:32;
+#endif
};
#endif /*__USE_EXTERNAL_FPMEMTYP_H__*/
-/* macros to form a double value in hex representation (unsigned int type) */
+#if !(defined(opensource))
+typedef __int32 INT32;
+typedef signed __int32 SINT32;
+typedef unsigned __int32 UINT32;
-#define DOUBLE_HEX(hi,lo) 0x##lo,0x##hi /*LITTLE_ENDIAN*/
+typedef __int64 INT64;
+typedef signed __int64 SINT64;
+typedef unsigned __int64 UINT64;
+#else
+typedef int INT32;
+typedef signed int SINT32;
+typedef unsigned int UINT32;
-/* macros to form a long double value in hex representation (unsigned short type) */
+typedef long long INT64;
+typedef signed long long SINT64;
+typedef unsigned long long UINT64;
+#endif
-#if defined(_WIN32) || defined(_WIN64)
-#define LDOUBLE_ALIGN 16
-#else
-#define LDOUBLE_ALIGN 12
+#if (defined(_WIN32) || defined(_WIN64)) /* Windows */
+# define I64CONST(bits) 0x##bits##i64
+# define U64CONST(bits) 0x##bits##ui64
+#elif (defined(__linux__) && defined(_M_IA64)) /* Linux,64 */
+# define I64CONST(bits) 0x##bits##L
+# define U64CONST(bits) 0x##bits##uL
+#else /* Linux,32 */
+# define I64CONST(bits) 0x##bits##LL
+# define U64CONST(bits) 0x##bits##uLL
+#endif
+
+struct ker80 {
+ union {
+ long double ldhi;
+ struct fp80 fphi;
+ };
+ union {
+ long double ldlo;
+ struct fp80 fplo;
+ };
+ int ex;
+};
+
+/* Addition: x+y */
+/* The result is sum rhi+rlo */
+/* Temporary variables: t1 */
+/* All variables are in long double precision */
+/* Correct if no overflow (algorithm by D.Knuth) */
+#define __LIBM_ADDL1_K80( rhi,rlo,x,y, t1 ) \
+ rhi = x + y; \
+ rlo = rhi - x; \
+ t1 = rhi - rlo; \
+ rlo = y - rlo; \
+ t1 = x - t1; \
+ rlo = rlo + t1;
+
+/* Addition: (xhi+xlo) + (yhi+ylo) */
+/* The result is sum rhi+rlo */
+/* Temporary variables: t1 */
+/* All variables are in long double precision */
+/* Correct if no overflow (algorithm by T.J.Dekker) */
+#define __LIBM_ADDL2_K80( rhi,rlo,xhi,xlo,yhi,ylo, t1 ) \
+ rlo = xhi+yhi; \
+ if ( VALUE_GT_80(FP80(xhi),FP80(yhi)) ) { \
+ t1=xhi-rlo;t1=t1+yhi;t1=t1+ylo;t1=t1+xlo; \
+ } else { \
+ t1=yhi-rlo;t1=t1+xhi;t1=t1+xlo;t1=t1+ylo; \
+ } \
+ rhi=rlo+t1; \
+ rlo=rlo-rhi;rlo=rlo+t1;
+
+/* Addition: r=x+y */
+/* Variables r,x,y are pointers to struct ker80, */
+/* all other variables are in long double precision */
+/* Temporary variables: t1 */
+/* Correct if x and y belong to interval [2^-8000;2^8000], */
+/* or when one or both of them are zero */
+#if defined(SIZE_INT_32)
+#define __LIBM_ADDL_K80(r,x,y, t1) \
+ if ( ((y)->ex+(y)->fphi.exponent-134 < \
+ (x)->ex+(x)->fphi.exponent) && \
+ ((x)->ex+(x)->fphi.exponent < \
+ (y)->ex+(y)->fphi.exponent+134) && \
+ !SIGNIFICAND_ZERO_80(&((x)->fphi)) && \
+ !SIGNIFICAND_ZERO_80(&((y)->fphi)) ) \
+ { \
+ /* y/2^134 < x < y*2^134, */ \
+ /* and x,y are nonzero finite numbers */ \
+ if ( (x)->ex != (y)->ex ) { \
+ /* adjust x->ex to y->ex */ \
+ /* t1 = 2^(x->ex - y->ex) */ \
+ FP80(t1)->sign = 0; \
+ FP80(t1)->exponent = BIAS_80 + (x)->ex-(y)->ex; \
+ /* exponent is correct because */ \
+ /* |x->ex - y->ex| = */ \
+ /* = | (x->ex + x->fphi.exponent) - */ \
+ /* -(y->ex + y->fphi.exponent) + */ \
+ /* + y->fphi.exponent - */ \
+ /* - x->fphi.exponent | < */ \
+ /* < | (x->ex+x->fphi.exponent) - */ \
+ /* -(y->ex+y->fphi.exponent) | + */ \
+ /* +| y->fphi.exponent - */ \
+ /* -x->fphi.exponent | < */ \
+ /* < 134 + 16000 */ \
+ FP80(t1)->hi_significand = 0x80000000; \
+ FP80(t1)->lo_significand = 0x00000000; \
+ (x)->ex = (y)->ex; \
+ (x)->ldhi *= t1; \
+ (x)->ldlo *= t1; \
+ } \
+ /* r==x+y */ \
+ (r)->ex = (y)->ex; \
+ __LIBM_ADDL2_K80( (r)->ldhi,(r)->ldlo, \
+ (x)->ldhi,(x)->ldlo, (y)->ldhi,(y)->ldlo, t1 ); \
+ } else if ( SIGNIFICAND_ZERO_80(&((x)->fphi)) || \
+ ((y)->ex+(y)->fphi.exponent-BIAS_80 - 134 >= \
+ (x)->ex+(x)->fphi.exponent-BIAS_80) ) \
+ { \
+ /* |x|<<|y| */ \
+ *(r) = *(y); \
+ } else { \
+ /* |y|<<|x| */ \
+ *(r) = *(x); \
+ }
+#elif defined(SIZE_INT_64)
+#define __LIBM_ADDL_K80(r,x,y, t1) \
+ if ( ((y)->ex+(y)->fphi.exponent-134 < \
+ (x)->ex+(x)->fphi.exponent) && \
+ ((x)->ex+(x)->fphi.exponent < \
+ (y)->ex+(y)->fphi.exponent+134) && \
+ !SIGNIFICAND_ZERO_80(&((x)->fphi)) && \
+ !SIGNIFICAND_ZERO_80(&((y)->fphi)) ) \
+ { \
+ /* y/2^134 < x < y*2^134, */ \
+ /* and x,y are nonzero finite numbers */ \
+ if ( (x)->ex != (y)->ex ) { \
+ /* adjust x->ex to y->ex */ \
+ /* t1 = 2^(x->ex - y->ex) */ \
+ FP80(t1)->sign = 0; \
+ FP80(t1)->exponent = BIAS_80 + (x)->ex-(y)->ex; \
+ /* exponent is correct because */ \
+ /* |x->ex - y->ex| = */ \
+ /* = | (x->ex + x->fphi.exponent) - */ \
+ /* -(y->ex + y->fphi.exponent) + */ \
+ /* + y->fphi.exponent - */ \
+ /* - x->fphi.exponent | < */ \
+ /* < | (x->ex+x->fphi.exponent) - */ \
+ /* -(y->ex+y->fphi.exponent) | + */ \
+ /* +| y->fphi.exponent - */ \
+ /* -x->fphi.exponent | < */ \
+ /* < 134 + 16000 */ \
+ FP80(t1)->significand = 0x8000000000000000; \
+ (x)->ex = (y)->ex; \
+ (x)->ldhi *= t1; \
+ (x)->ldlo *= t1; \
+ } \
+ /* r==x+y */ \
+ (r)->ex = (y)->ex; \
+ __LIBM_ADDL2_K80( (r)->ldhi,(r)->ldlo, \
+ (x)->ldhi,(x)->ldlo, (y)->ldhi,(y)->ldlo, t1 ); \
+ } else if ( SIGNIFICAND_ZERO_80(&((x)->fphi)) || \
+ ((y)->ex+(y)->fphi.exponent-BIAS_80 - 134 >= \
+ (x)->ex+(x)->fphi.exponent-BIAS_80) ) \
+ { \
+ /* |x|<<|y| */ \
+ *(r) = *(y); \
+ } else { \
+ /* |y|<<|x| */ \
+ *(r) = *(x); \
+ }
#endif
+/* Addition: r=x+y */
+/* Variables r,x,y are pointers to struct ker80, */
+/* all other variables are in long double precision */
+/* Temporary variables: t1 */
+/* Correct for any finite x and y */
+#define __LIBM_ADDL_NORM_K80(r,x,y, t1) \
+ if ( ((x)->fphi.exponent-BIAS_80<-8000) || \
+ ((x)->fphi.exponent-BIAS_80>+8000) || \
+ ((y)->fphi.exponent-BIAS_80<-8000) || \
+ ((y)->fphi.exponent-BIAS_80>+8000) ) \
+ { \
+ __libm_normalizel_k80(x); \
+ __libm_normalizel_k80(y); \
+ } \
+ __LIBM_ADDL_K80(r,x,y, t1)
+
+/* Subtraction: x-y */
+/* The result is sum rhi+rlo */
+/* Temporary variables: t1 */
+/* All variables are in long double precision */
+/* Correct if no overflow (algorithm by D.Knuth) */
+#define __LIBM_SUBL1_K80( rhi, rlo, x, y, t1 ) \
+ rhi = x - y; \
+ rlo = rhi - x; \
+ t1 = rhi - rlo; \
+ rlo = y + rlo; \
+ t1 = x - t1; \
+ rlo = t1 - rlo;
+
+/* Subtraction: (xhi+xlo) - (yhi+ylo) */
+/* The result is sum rhi+rlo */
+/* Temporary variables: t1 */
+/* All variables are in long double precision */
+/* Correct if no overflow (algorithm by T.J.Dekker) */
+#define __LIBM_SUBL2_K80( rhi,rlo,xhi,xlo,yhi,ylo, t1 ) \
+ rlo = xhi-yhi; \
+ if ( VALUE_GT_80(FP80(xhi),FP80(yhi)) ) { \
+ t1=xhi-rlo;t1=t1-yhi;t1=t1-ylo;t1=t1+xlo; \
+ } else { \
+ t1=yhi+rlo;t1=xhi-t1;t1=t1+xlo;t1=t1-ylo; \
+ } \
+ rhi=rlo+t1; \
+ rlo=rlo-rhi;rlo=rlo+t1;
+
+/* Subtraction: r=x-y */
+/* Variables r,x,y are pointers to struct ker80, */
+/* all other variables are in long double precision */
+/* Temporary variables: t1 */
+/* Correct if x and y belong to interval [2^-8000;2^8000], */
+/* or when one or both of them are zero */
+#if defined(SIZE_INT_32)
+#define __LIBM_SUBL_K80(r,x,y, t1) \
+ if ( ((y)->ex+(y)->fphi.exponent-134 < \
+ (x)->ex+(x)->fphi.exponent) && \
+ ((x)->ex+(x)->fphi.exponent < \
+ (y)->ex+(y)->fphi.exponent+134) && \
+ !SIGNIFICAND_ZERO_80(&((x)->fphi)) && \
+ !SIGNIFICAND_ZERO_80(&((y)->fphi)) ) \
+ { \
+ /* y/2^134 < x < y*2^134, */ \
+ /* and x,y are nonzero finite numbers */ \
+ if ( (x)->ex != (y)->ex ) { \
+ /* adjust x->ex to y->ex */ \
+ /* t1 = 2^(x->ex - y->ex) */ \
+ FP80(t1)->sign = 0; \
+ FP80(t1)->exponent = BIAS_80 + (x)->ex-(y)->ex; \
+ /* exponent is correct because */ \
+ /* |x->ex - y->ex| = */ \
+ /* = | (x->ex + x->fphi.exponent) - */ \
+ /* -(y->ex + y->fphi.exponent) + */ \
+ /* + y->fphi.exponent - */ \
+ /* - x->fphi.exponent | < */ \
+ /* < | (x->ex+x->fphi.exponent) - */ \
+ /* -(y->ex+y->fphi.exponent) | + */ \
+ /* +| y->fphi.exponent - */ \
+ /* -x->fphi.exponent | < */ \
+ /* < 134 + 16000 */ \
+ FP80(t1)->hi_significand = 0x80000000; \
+ FP80(t1)->lo_significand = 0x00000000; \
+ (x)->ex = (y)->ex; \
+ (x)->ldhi *= t1; \
+ (x)->ldlo *= t1; \
+ } \
+ /* r==x+y */ \
+ (r)->ex = (y)->ex; \
+ __LIBM_SUBL2_K80( (r)->ldhi,(r)->ldlo, \
+ (x)->ldhi,(x)->ldlo, (y)->ldhi,(y)->ldlo, t1 ); \
+ } else if ( SIGNIFICAND_ZERO_80(&((x)->fphi)) || \
+ ((y)->ex+(y)->fphi.exponent-BIAS_80 - 134 >= \
+ (x)->ex+(x)->fphi.exponent-BIAS_80) ) \
+ { \
+ /* |x|<<|y| */ \
+ (r)->ex = (y)->ex; \
+ (r)->ldhi = -((y)->ldhi); \
+ (r)->ldlo = -((y)->ldlo); \
+ } else { \
+ /* |y|<<|x| */ \
+ *(r) = *(x); \
+ }
+#elif defined(SIZE_INT_64)
+#define __LIBM_SUBL_K80(r,x,y, t1) \
+ if ( ((y)->ex+(y)->fphi.exponent-134 < \
+ (x)->ex+(x)->fphi.exponent) && \
+ ((x)->ex+(x)->fphi.exponent < \
+ (y)->ex+(y)->fphi.exponent+134) && \
+ !SIGNIFICAND_ZERO_80(&((x)->fphi)) && \
+ !SIGNIFICAND_ZERO_80(&((y)->fphi)) ) \
+ { \
+ /* y/2^134 < x < y*2^134, */ \
+ /* and x,y are nonzero finite numbers */ \
+ if ( (x)->ex != (y)->ex ) { \
+ /* adjust x->ex to y->ex */ \
+ /* t1 = 2^(x->ex - y->ex) */ \
+ FP80(t1)->sign = 0; \
+ FP80(t1)->exponent = BIAS_80 + (x)->ex-(y)->ex; \
+ /* exponent is correct because */ \
+ /* |x->ex - y->ex| = */ \
+ /* = | (x->ex + x->fphi.exponent) - */ \
+ /* -(y->ex + y->fphi.exponent) + */ \
+ /* + y->fphi.exponent - */ \
+ /* - x->fphi.exponent | < */ \
+ /* < | (x->ex+x->fphi.exponent) - */ \
+ /* -(y->ex+y->fphi.exponent) | + */ \
+ /* +| y->fphi.exponent - */ \
+ /* -x->fphi.exponent | < */ \
+ /* < 134 + 16000 */ \
+ FP80(t1)->significand = 0x8000000000000000; \
+ (x)->ex = (y)->ex; \
+ (x)->ldhi *= t1; \
+ (x)->ldlo *= t1; \
+ } \
+ /* r==x+y */ \
+ (r)->ex = (y)->ex; \
+ __LIBM_SUBL2_K80( (r)->ldhi,(r)->ldlo, \
+ (x)->ldhi,(x)->ldlo, (y)->ldhi,(y)->ldlo, t1 ); \
+ } else if ( SIGNIFICAND_ZERO_80(&((x)->fphi)) || \
+ ((y)->ex+(y)->fphi.exponent-BIAS_80 - 134 >= \
+ (x)->ex+(x)->fphi.exponent-BIAS_80) ) \
+ { \
+ /* |x|<<|y| */ \
+ (r)->ex = (y)->ex; \
+ (r)->ldhi = -((y)->ldhi); \
+ (r)->ldlo = -((y)->ldlo); \
+ } else { \
+ /* |y|<<|x| */ \
+ *(r) = *(x); \
+ }
+#endif
+
+/* Subtraction: r=x+y */
+/* Variables r,x,y are pointers to struct ker80, */
+/* all other variables are in long double precision */
+/* Temporary variables: t1 */
+/* Correct for any finite x and y */
+#define __LIBM_SUBL_NORM_K80(r,x,y, t1) \
+ if ( ((x)->fphi.exponent-BIAS_80<-8000) || \
+ ((x)->fphi.exponent-BIAS_80>+8000) || \
+ ((y)->fphi.exponent-BIAS_80<-8000) || \
+ ((y)->fphi.exponent-BIAS_80>+8000) ) \
+ { \
+ __libm_normalizel_k80(x); \
+ __libm_normalizel_k80(y); \
+ } \
+ __LIBM_SUBL_K80(r,x,y, t1)
+
+/* Multiplication: x*y */
+/* The result is sum rhi+rlo */
+/* Here t32 is the constant 2^32+1 */
+/* Temporary variables: t1,t2,t3,t4,t5,t6 */
+/* All variables are in long double precision */
+/* Correct if no over/underflow (algorithm by T.J.Dekker) */
+#define __LIBM_MULL1_K80(rhi,rlo,x,y, \
+ t32,t1,t2,t3,t4,t5,t6) \
+ t1=(x)*(t32); t3=x-t1; t3=t3+t1; t4=x-t3; \
+ t1=(y)*(t32); t5=y-t1; t5=t5+t1; t6=y-t5; \
+ t1=(t3)*(t5); \
+ t2=(t3)*(t6)+(t4)*(t5); \
+ rhi=t1+t2; \
+ rlo=t1-rhi; rlo=rlo+t2; rlo=rlo+(t4*t6);
+
+/* Multiplication: (xhi+xlo)*(yhi+ylo) */
+/* The result is sum rhi+rlo */
+/* Here t32 is the constant 2^32+1 */
+/* Temporary variables: t1,t2,t3,t4,t5,t6,t7,t8 */
+/* All variables are in long double precision */
+/* Correct if no over/underflow (algorithm by T.J.Dekker) */
+#define __LIBM_MULL2_K80(rhi,rlo,xhi,xlo,yhi,ylo, \
+ t32,t1,t2,t3,t4,t5,t6,t7,t8) \
+ __LIBM_MULL1_K80(t7,t8,xhi,yhi, t32,t1,t2,t3,t4,t5,t6) \
+ t1=(xhi)*(ylo)+(xlo)*(yhi); t1=t1+t8; \
+ rhi=t7+t1; \
+ rlo=t7-rhi; rlo=rlo+t1;
+
+/* Multiplication: r=x*y */
+/* Variables r,x,y are pointers to struct ker80, */
+/* all other variables are in long double precision */
+/* Here t32 is the constant 2^32+1 */
+/* Temporary variables: t1,t2,t3,t4,t5,t6,t7,t8 */
+/* Correct if x and y belong to interval [2^-8000;2^8000] */
+#define __LIBM_MULL_K80(r,x,y, t32,t1,t2,t3,t4,t5,t6,t7,t8) \
+ (r)->ex = (x)->ex + (y)->ex; \
+ __LIBM_MULL2_K80((r)->ldhi,(r)->ldlo, \
+ (x)->ldhi,(x)->ldlo,(y)->ldhi,(y)->ldlo, \
+ t32,t1,t2,t3,t4,t5,t6,t7,t8)
+
+/* Multiplication: r=x*y */
+/* Variables r,x,y are pointers to struct ker80, */
+/* all other variables are in long double precision */
+/* Here t32 is the constant 2^32+1 */
+/* Temporary variables: t1,t2,t3,t4,t5,t6,t7,t8 */
+/* Correct for any finite x and y */
+#define __LIBM_MULL_NORM_K80(r,x,y, \
+ t32,t1,t2,t3,t4,t5,t6,t7,t8) \
+ if ( ((x)->fphi.exponent-BIAS_80<-8000) || \
+ ((x)->fphi.exponent-BIAS_80>+8000) || \
+ ((y)->fphi.exponent-BIAS_80<-8000) || \
+ ((y)->fphi.exponent-BIAS_80>+8000) ) \
+ { \
+ __libm_normalizel_k80(x); \
+ __libm_normalizel_k80(y); \
+ } \
+ __LIBM_MULL_K80(r,x,y, t32,t1,t2,t3,t4,t5,t6,t7,t8)
+
+/* Division: (xhi+xlo)/(yhi+ylo) */
+/* The result is sum rhi+rlo */
+/* Here t32 is the constant 2^32+1 */
+/* Temporary variables: t1,t2,t3,t4,t5,t6,t7,t8,t9 */
+/* All variables are in long double precision */
+/* Correct if no over/underflow (algorithm by T.J.Dekker) */
+#define __LIBM_DIVL2_K80(rhi,rlo,xhi,xlo,yhi,ylo, \
+ t32,t1,t2,t3,t4,t5,t6,t7,t8,t9) \
+ t7=(xhi)/(yhi); \
+ __LIBM_MULL1_K80(t8,t9,t7,yhi, t32,t1,t2,t3,t4,t5,t6) \
+ t1=xhi-t8; t1=t1-t9; t1=t1+xlo; t1=t1-(t7)*(ylo); \
+ t1=(t1)/(yhi); \
+ rhi=t7+t1; \
+ rlo=t7-rhi; rlo=rlo+t1;
+
+/* Division: r=x/y */
+/* Variables r,x,y are pointers to struct ker80, */
+/* all other variables are in long double precision */
+/* Here t32 is the constant 2^32+1 */
+/* Temporary variables: t1,t2,t3,t4,t5,t6,t7,t8,t9 */
+/* Correct if x and y belong to interval [2^-8000;2^8000] */
+#define __LIBM_DIVL_K80(r,x,y, \
+ t32,t1,t2,t3,t4,t5,t6,t7,t8,t9) \
+ (r)->ex = (x)->ex - (y)->ex; \
+ __LIBM_DIVL2_K80( (r)->ldhi,(r)->ldlo, \
+ (x)->ldhi,(x)->ldlo,(y)->ldhi,(y)->ldlo, \
+ t32,t1,t2,t3,t4,t5,t6,t7,t8,t9)
+
+/* Division: r=x/y */
+/* Variables r,x,y are pointers to struct ker80, */
+/* all other variables are in long double precision */
+/* Here t32 is the constant 2^32+1 */
+/* Temporary variables: t1,t2,t3,t4,t5,t6,t7,t8 */
+/* Correct for any finite x and y */
+#define __LIBM_DIVL_NORM_K80(r,x,y, \
+ t32,t1,t2,t3,t4,t5,t6,t7,t8,t9) \
+ if ( ((x)->fphi.exponent-BIAS_80<-8000) || \
+ ((x)->fphi.exponent-BIAS_80>+8000) || \
+ ((y)->fphi.exponent-BIAS_80<-8000) || \
+ ((y)->fphi.exponent-BIAS_80>+8000) ) \
+ { \
+ __libm_normalizel_k80(x); \
+ __libm_normalizel_k80(y); \
+ } \
+ __LIBM_DIVL_K80(r,x,y, t32,t1,t2,t3,t4,t5,t6,t7,t8,t9)
+
+/* Square root: sqrt(xhi+xlo) */
+/* The result is sum rhi+rlo */
+/* Here t32 is the constant 2^32+1 */
+/* half is the constant 0.5 */
+/* Temporary variables: t1,t2,t3,t4,t5,t6,t7,t8,t9 */
+/* All variables are in long double precision */
+/* Correct for positive xhi+xlo (algorithm by T.J.Dekker) */
+#define __LIBM_SQRTL2_NORM_K80(rhi,rlo,xhi,xlo, \
+ t32,half,t1,t2,t3,t4,t5,t6,t7,t8,t9) \
+ t7=sqrtl(xhi); \
+ __LIBM_MULL1_K80(t8,t9,t7,t7, t32,t1,t2,t3,t4,t5,t6) \
+ t1=xhi-t8; t1=t1-t9; t1=t1+xlo; t1=(t1)*(half); \
+ t1=(t1)/(t7); \
+ rhi=t7+t1; \
+ rlo=t7-rhi; rlo=rlo+t1;
+
+/* Square root: r=sqrt(x) */
+/* Variables r,x,y are pointers to struct ker80, */
+/* all other variables are in long double precision */
+/* Here t32 is the constant 2^32+1 */
+/* half is the constant 0.5 */
+/* Temporary variables: t1,t2,t3,t4,t5,t6,t7,t8,t9 */
+/* Correct if x belongs to interval [2^-16000;2^16000] */
+#define __LIBM_SQRTL_K80(r,x, \
+ t32,half,t1,t2,t3,t4,t5,t6,t7,t8,t9) \
+ if ( ((x)->ex & 1) == 1 ) { \
+ (x)->ex = (x)->ex + 1; \
+ (x)->ldhi *= half; \
+ (x)->ldlo *= half; \
+ } \
+ (r)->ex = (x)->ex >> 1; \
+ __LIBM_SQRTL2_NORM_K80( (r)->ldhi,(r)->ldlo, \
+ (x)->ldhi,(x)->ldlo, \
+ t32,half,t1,t2,t3,t4,t5,t6,t7,t8,t9)
+
+/* Square root: r=sqrt(x) */
+/* Variables r,x,y are pointers to struct ker80, */
+/* all other variables are in long double precision */
+/* Here t32 is the constant 2^32+1 */
+/* half is the constant 0.5 */
+/* Temporary variables: t1,t2,t3,t4,t5,t6,t7,t8,t9 */
+/* Correct for any positive x */
+#define __LIBM_SQRTL_NORM_K80(r,x, \
+ t32,half,t1,t2,t3,t4,t5,t6,t7,t8,t9) \
+ if ( ((x)->fphi.exponent-BIAS_80<-16000) || \
+ ((x)->fphi.exponent-BIAS_80>+16000) ) \
+ { \
+ __libm_normalizel_k80(x); \
+ } \
+ __LIBM_SQRTL_K80(r,x, t32,half,t1,t2,t3,t4,t5,t6,t7,t8,t9)
+
+
+#ifdef __INTEL_COMPILER
+#define ALIGN(n) __declspec(align(n))
+#else /* __INTEL_COMPILER */
+#define ALIGN(n)
+#endif /* __INTEL_COMPILER */
+
+/* macros to form a long double value in hex representation (unsigned short type) */
+
+#if (defined(__unix__) && defined(__i386__))
+# define LDOUBLE_ALIGN 12 /* IA32 Linux: 12-byte alignment */
+#else /*__linux__ & IA32*/
+# define LDOUBLE_ALIGN 16 /* EFI2/IA32 Win or IPF Win/Linux: 16-byte alignment */
+#endif /*__linux__ & IA32*/
+
#if (LDOUBLE_ALIGN == 16)
#define _XPD_ ,0x0000,0x0000,0x0000
#else /*12*/
@@ -451,7 +860,7 @@ struct fp80 { /*/ sign:1 exponent:15 significand:64 (NO implied bits) */
# define SIGNIFICAND_LT_HEX_64(X,HI,LO) ((X)->significand < 0x ## HI ## LO)
# define SIGNIFICAND_LE_HEX_64(X,HI,LO) ((X)->significand <= 0x ## HI ## LO)
#endif
-
+
#if defined(SIZE_INT_32)
# define SIGNIFICAND_EQ_HEX_80(X,HI,LO) \
(((X)->hi_significand == 0x ## HI) && ((X)->lo_significand == 0x ## LO))
@@ -514,15 +923,15 @@ struct fp80 { /*/ sign:1 exponent:15 significand:64 (NO implied bits) */
#if defined(SIZE_INT_32)
# define SIGNIFICAND_EQ_64(X,Y) \
- (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand == (Y)->lo_significand))
+ (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand == (Y)->lo_significand))
# define SIGNIFICAND_GT_64(X,Y) (((X)->hi_significand > (Y)->hi_significand) || \
- (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand > (Y)->lo_significand)))
+ (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand > (Y)->lo_significand)))
# define SIGNIFICAND_GE_64(X,Y) (((X)->hi_significand > (Y)->hi_significand) || \
- (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand >= (Y)->lo_significand)))
+ (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand >= (Y)->lo_significand)))
# define SIGNIFICAND_LT_64(X,Y) (((X)->hi_significand < (Y)->hi_significand) || \
- (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand < (Y)->lo_significand)))
+ (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand < (Y)->lo_significand)))
# define SIGNIFICAND_LE_64(X,Y) (((X)->hi_significand < (Y)->hi_significand) || \
- (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand <= (Y)->lo_significand)))
+ (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand <= (Y)->lo_significand)))
#elif defined(SIZE_INT_64)
# define SIGNIFICAND_EQ_64(X,Y) ((X)->significand == (Y)->significand)
# define SIGNIFICAND_GT_64(X,Y) ((X)->significand > (Y)->significand)
@@ -560,7 +969,7 @@ struct fp80 { /*/ sign:1 exponent:15 significand:64 (NO implied bits) */
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LT_32(X, Y))))
#define VALUE_LE_32(X,Y) (((X)->exponent < (Y)->exponent) || \
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LE_32(X, Y))))
-
+
#define VALUE_EQ_64(X,Y) \
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_EQ_64(X, Y)))
#define VALUE_GT_64(X,Y) (((X)->exponent > (Y)->exponent) || \
@@ -571,7 +980,7 @@ struct fp80 { /*/ sign:1 exponent:15 significand:64 (NO implied bits) */
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LT_64(X, Y))))
#define VALUE_LE_64(X,Y) (((X)->exponent < (Y)->exponent) || \
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LE_64(X, Y))))
-
+
#define VALUE_EQ_80(X,Y) \
(((X)->exponent == (Y)->exponent) && (SIGNIFICAND_EQ_80(X, Y)))
#define VALUE_GT_80(X,Y) (((X)->exponent > (Y)->exponent) || \
@@ -622,134 +1031,21 @@ struct fp80 { /*/ sign:1 exponent:15 significand:64 (NO implied bits) */
#endif
+/* error codes */
-#if (defined(_WIN32) && !defined(_WIN64))
-
-#define FP80_DECLARE()
-#define _FPC_64 0x0300
-static unsigned short __wControlWord, __wNewControlWord;
-#define FP80_SET() { \
- __asm { fnstcw word ptr [__wControlWord] } \
- __wNewControlWord = __wControlWord | _FPC_64; \
- __asm { fldcw word ptr [__wNewControlWord] } \
- }
-#define FP80_RESET() { \
- __asm { fldcw word ptr [__wControlWord] } \
- }
-#else /* defined(_WIN32) && !defined(_WIN64) */
-
-#define FP80_DECLARE()
-#define FP80_SET()
-#define FP80_RESET()
-
-#endif /* defined(_WIN32) && !defined(_WIN64) */
-
-
-#ifdef _LIBC
-# include <math.h>
-#else
-
-static const unsigned INF[] = {
- DOUBLE_HEX(7ff00000, 00000000),
- DOUBLE_HEX(fff00000, 00000000)
-};
-
-static const double _zeroo = 0.0;
-static const double _bigg = 1.0e300;
-static const double _ponee = 1.0;
-static const double _nonee = -1.0;
-
-#define INVALID (_zeroo * *((double*)&INF[0]))
-#define PINF *((double*)&INF[0])
-#define NINF -PINF
-#define PINF_DZ (_ponee/_zeroo)
-#define X_TLOSS 1.41484755040568800000e+16
-#endif
-
-struct exceptionf
-{
- int type;
- char *name;
- float arg1, arg2, retval;
-};
-
-# ifdef __cplusplus
-struct __exception
-{
- int type;
- char *name;
- double arg1, arg2, retval;
-};
-# else
-
-# ifndef _LIBC
-struct exception
-{
- int type;
- char *name;
- double arg1, arg2, retval;
-};
-# endif
-# endif
-
-
-
-struct exceptionl
-{
- int type;
- char *name;
- long double arg1, arg2, retval;
-};
-
-#ifdef _MS_
-#define MATHERR_F _matherrf
-#define MATHERR_D _matherr
-#else
-#define MATHERR_F matherrf
-#define MATHERR_D matherr
-#endif
-
-# ifdef __cplusplus
-#define EXC_DECL_D __exception
-#else
-// exception is a reserved name in C++
-#define EXC_DECL_D exception
-#endif
-
-extern int MATHERR_F(struct exceptionf*);
-extern int MATHERR_D(struct EXC_DECL_D*);
-extern int matherrl(struct exceptionl*);
-
+#define DOMAIN 1 /* argument domain error */
+#define SING 2 /* argument singularity */
+#define OVERFLOW 3 /* overflow range error */
+#define UNDERFLOW 4 /* underflow range error */
+#define TLOSS 5 /* total loss of precision */
+#define PLOSS 6 /* partial loss of precision */
-/* Set these appropriately to make thread Safe */
-#define ERRNO_RANGE errno = ERANGE
-#define ERRNO_DOMAIN errno = EDOM
+/* */
+#define VOLATILE_32 /*volatile*/
+#define VOLATILE_64 /*volatile*/
+#define VOLATILE_80 /*volatile*/
-// Add code to support _LIB_VERSIONIMF
-#ifndef _LIBC
-typedef enum
-{
- _IEEE_ = -1, // IEEE-like behavior
- _SVID_, // SysV, Rel. 4 behavior
- _XOPEN_, // Unix98
- _POSIX_, // Posix
- _ISOC_ // ISO C9X
-} _LIB_VERSION_TYPE;
-
-
-#if !defined( LIBM_BUILD )
-#if defined( _DLL )
-extern _LIB_VERSION_TYPE __declspec(dllimport) _LIB_VERSIONIMF;
-#else
-extern _LIB_VERSION_TYPE _LIB_VERSIONIMF;
-#endif /* _DLL */
-#else
-extern int (*pmatherrf)(struct exceptionf*);
-extern int (*pmatherr)(struct EXC_DECL_D*);
-extern int (*pmatherrl)(struct exceptionl*);
-#endif /* LIBM_BUILD */
+#define QUAD_TYPE _Quad
-// This is a run-time variable and may affect
-// floating point behavior of the libm functions
-#endif
+#endif /*__LIBM_SUPPORT_H_INCLUDED__*/
diff --git a/sysdeps/ia64/fpu/s_asinh.S b/sysdeps/ia64/fpu/s_asinh.S
index a9ef4e1..ab01f4f 100644
--- a/sysdeps/ia64/fpu/s_asinh.S
+++ b/sysdeps/ia64/fpu/s_asinh.S
@@ -1134,3 +1134,4 @@ ASINH_UNORM:
;;
GLOBAL_LIBM_END(asinh)
+
diff --git a/sysdeps/ia64/fpu/s_asinhl.S b/sysdeps/ia64/fpu/s_asinhl.S
index fcb4e6e..d3a5507 100644
--- a/sysdeps/ia64/fpu/s_asinhl.S
+++ b/sysdeps/ia64/fpu/s_asinhl.S
@@ -1344,3 +1344,4 @@ near_0:
GLOBAL_LIBM_END(asinhl)
+
diff --git a/sysdeps/ia64/fpu/s_atanf.S b/sysdeps/ia64/fpu/s_atanf.S
index fb7f4a3..4da68c7 100644
--- a/sysdeps/ia64/fpu/s_atanf.S
+++ b/sysdeps/ia64/fpu/s_atanf.S
@@ -553,3 +553,4 @@ ATANF_X_INF_NAN_ZERO:
;;
GLOBAL_LIBM_END(atanf)
+
diff --git a/sysdeps/ia64/fpu/s_atanl.S b/sysdeps/ia64/fpu/s_atanl.S
index bfd9f45..721a38c 100644
--- a/sysdeps/ia64/fpu/s_atanl.S
+++ b/sysdeps/ia64/fpu/s_atanl.S
@@ -812,6 +812,7 @@ GLOBAL_IEEE754_ENTRY(atanl)
;;
GLOBAL_IEEE754_END(atanl)
+
GLOBAL_IEEE754_ENTRY(atan2l)
{ .mfi
@@ -1951,6 +1952,7 @@ ATANL_ArgY_Not_INF:
;;
GLOBAL_IEEE754_END(atan2l)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/s_cbrt.S b/sysdeps/ia64/fpu/s_cbrt.S
index b7a827d..7a74ac1 100644
--- a/sysdeps/ia64/fpu/s_cbrt.S
+++ b/sysdeps/ia64/fpu/s_cbrt.S
@@ -35,7 +35,7 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http: //www.intel.com/software/products/opensource/libraries/num.htm.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
diff --git a/sysdeps/ia64/fpu/s_cbrtf.S b/sysdeps/ia64/fpu/s_cbrtf.S
index c8c6500..612fb85 100644
--- a/sysdeps/ia64/fpu/s_cbrtf.S
+++ b/sysdeps/ia64/fpu/s_cbrtf.S
@@ -35,7 +35,7 @@
//
// Intel Corporation is the author of this code, and requests that all
// problem reports or change requests be submitted to it directly at
-// http: //www.intel.com/software/products/opensource/libraries/num.htm.
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
@@ -762,3 +762,4 @@ GLOBAL_LIBM_END(cbrtf)
+
diff --git a/sysdeps/ia64/fpu/s_cbrtl.S b/sysdeps/ia64/fpu/s_cbrtl.S
index 3e621e2..76ef12f 100644
--- a/sysdeps/ia64/fpu/s_cbrtl.S
+++ b/sysdeps/ia64/fpu/s_cbrtl.S
@@ -1,7 +1,7 @@
.file "cbrtl.s"
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@@ -21,27 +21,28 @@
// products derived from this software without specific prior written
// permission.
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
+// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
-// 04/28/00 Initial version
+// 04/28/00 Initial version
// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/06/03 Reordered header: .section, .global, .proc, .align
+// 02/06/03 Reordered header:.section,.global,.proc,.align
+// 11/23/04 Reformatted routine and improved speed
//
// API
//==============================================================
@@ -53,49 +54,93 @@
//
// Implementation
//
-// cbrt(a) = cbrt(a y) / cbrt(y)
-// = cbrt(1 - (1 - a y)) * 1/cbrt(y)
+// The result is computed as
+// cbrt(x)= cbrt(1 - (1 - x*y)) * (1/cbrt(y))
+// where y = frcpa(x) = (-1)^sgn_y * 2^(3*k+j) * m_y,
+// m_y in [1,2), j in {0,1,2}
//
-// where y = frcpa(a).
+// cbrt(1 - (1 - x*y)) is approximated by a degree-6 polynomial
+// in r= 1 - x*y :
+// P = 1 - c_1 r - c_2 * r^2 - c_3 * r^3 - c_4 * r^4 - c_5 * r^5 - c_6 * r^6
//
-// * cbrt(1 - (1 - a y)) is approximated by a degree-6 polynomial
-//
-// 1 - c_1 r - c_2 * r^2 - c_3 * r^3 - c_4 * r^4 - c_5 * r^5 - c_6 * r^6
-//
-// in r = 1 - a y.
//
-// * The values 1/cbrt(y) are stored as two tables of constants T_hi
-// (double-extended precision) and D (single precision) as follows:
+// The values (1/cbrt(y)) are stored as two tables of constants T_hi
+// (double-extended precision) and D (single precision) as follows:
+// T_hi (1 + D)= 1/cbrt(y) to about 80 bits of accuracy
//
-// T_hi (1 + D) = 1/cbrt(y) to about 80 bits of accuracy
+// The tables are only stored for three exponent values (i.e.
+// only for 2^j * m_y, where j in {0,1,2} and m_y covers the 256
+// possible mantissas for an frcpa result); the index is formed
+// by the 8 leading mantissa bits of x, which is the same index used
+// by the hardware to get frcpa(x).
//
-// The tables are only stored for three exponent values and are
-// then multiplied by e/3 where e is the exponent of the input number.
-// This computation is carried out in parallel with the polynomial
-// evaluation:
+// The table values are multiplied by 2^k where e is the exponent of
+// the input number. This multiplication is carried out in parallel with
+// the polynomial evaluation:
+// T= 2^(k) * T_hi
//
-// T = 2^(e/3) * T_hi
-
-
-
-
+//=======================================================================
//===============
-// input = x
-// C = frcpa(x)
-// r = C * x - 1
-//
-// Special values
+// Special values
//==============================================================
-
-
// Registers used
//==============================================================
-// f6-f15
-// r2-r3, r23-r30
-// p6,p7,p12
-
+// p6, p7, p12
+ FR_R = f6
+ FR_C1 = f7
+ FR_C2 = f9
+ FR_C3 = f10
+ FR_C4 = f11
+ FR_C5 = f12
+ FR_C6 = f13
+ FR_XNORM = f14
+ FR_D = f15
+ FR_SPECIAL = f32
+ FR_RCP = f33
+ FR_R2 = f34
+ FR_P1 = f35
+ FR_P2 = f36
+ FR_P3 = f37
+ FR_P4 = f38
+ FR_P5 = f39
+ FR_R3 = f40
+ FR_T = f41
+ FR_TF = f42
+ FR_P = f43
+ FR_SGNEXP = f44
+
+ GR_ADDR = r2
+ GR_C_START = r2
+ GR_ARGSIG = r3
+ GR_NORMSIG = r15
+ GR_D_ADDR = r16
+ GR_D_START = r16
+ GR_INDEX2 = r17
+ GR_IX2 = r17
+ GR_NORMEXP = r18
+ GR_EXP5 = r19
+ GR_EXP3 = r20
+ GR_EXP6 = r20
+ GR_EXP17 = r21
+ GR_TMP1 = r21
+ GR_SGNMASK = r22
+ GR_T_INDEX = r23
+ GR_IX_T = r23
+ GR_IX_D = r24
+ GR_D_INDEX = r24
+ GR_TMP2 = r25
+ GR_TMP3 = r25
+ GR_TMP4 = r25
+ GR_EXP_RES = r26
+ GR_BIAS23 = r27
+ GR_EXPBIAS = r27
+ GR_EXP_MOD_3 = r28
+ GR_SIGN = r29
+ GR_EXPSIGNRES = r29
+ GR_REMTMP = r30
+ GR_NORMEXPSGN = r31
// Data tables
@@ -107,601 +152,596 @@ RODATA
LOCAL_OBJECT_START(poly_coeffs)
-data8 0xaaaaaaaaaaaaaab1, 0x00003ffd // C_1
-data8 0xe38e38e38e38e3e0, 0x00003ffb // C_2
-data8 0x3faf9add3c0be9a6, 0x3fa511e8d2b1f749 // C_3, C_4
-data8 0x3f9ee71b2c6ebe99, 0x3f9809180fd0340c // C_5, C_6
+ data8 0xaaaaaaaaaaaaaab1, 0x00003ffd // C_1
+ data8 0xe38e38e38e38e3e0, 0x00003ffb // C_2
+ data8 0x3faf9add3c0be9a6, 0x3fa511e8d2b1f749 // C_3, C_4
+ data8 0x3f9ee71b2c6ebe99, 0x3f9809180fd0340c // C_5, C_6
LOCAL_OBJECT_END(poly_coeffs)
LOCAL_OBJECT_START(T_table)
-
-data8 0x80155c748c374836, 0x8040404b0879f7f9
-data8 0x806b5dce4b405c10, 0x8096b586974669b1
-data8 0x80bcd273d952a028, 0x80e898c52813f2f3
-data8 0x81149add67c2d208, 0x813b4e2c856b6e9a
-data8 0x8167c1dde03de7aa, 0x818ed973b811135e
-data8 0x81bbc0c33e13ec98, 0x81e33e69fbe7504a
-data8 0x820aec524e3c23e9, 0x823880f78e70b805
-data8 0x826097a62a8e5200, 0x8288dfe00e9b5eaf
-data8 0x82b15a10c5371624, 0x82da06a527b18937
-data8 0x8302e60b635ab394, 0x832bf8b2feec2f0e
-data8 0x83553f0ce00e276b, 0x837eb98b50f8322a
-data8 0x83a270f44c84f699, 0x83cc4d7cfcfac5ca
-data8 0x83f65f78a8872b4c, 0x8420a75f2f7b53c8
-data8 0x844510461ff14209, 0x846fbd91b930bed2
-data8 0x84947e18234f3294, 0x84bf92755825045a
-data8 0x84e4ac0ee112ba51, 0x8509ef44b86f20be
-data8 0x85359d5d91768427, 0x855b3bd5b7384357
-data8 0x858104f0c415f79a, 0x85a6f90390d29864
-data8 0x85d3772fcd56a1dd, 0x85f9c982fcc002f3
-data8 0x862047e0e7ea554b, 0x8646f2a26f7f5852
-data8 0x866dca21754096b5, 0x8694ceb8dfd17a37
-data8 0x86bc00c49e9307e8, 0x86dccd74fce79610
-data8 0x870453c845acf90f, 0x872c089a1e90342c
-data8 0x8753ec4a92d16c5e, 0x877bff3aca19f6b4
-data8 0x879d88b6fe1c324c, 0x87c5f346dbf98c3a
-data8 0x87e7c653efacef2c, 0x881089d4e73ffefc
-data8 0x88397e6a366f2a8a, 0x885bc559e5e1c081
-data8 0x887e2ee392bb7a93, 0x88a7a8587e404257
-data8 0x88ca5eda67594784, 0x88f4356166bd590e
-data8 0x89173a0acf5ce026, 0x893a62a098b6a57b
-data8 0x895daf637236ae2c, 0x89883b9d1c2fa9c5
-data8 0x89abd8dd374a5d7b, 0x89cf9b1dcd197fa0
-data8 0x89f382a258ea79de, 0x8a178faf06648f29
-data8 0x8a3bc288b3e1d18a, 0x8a601b74f4d1f835
-data8 0x8a849aba14274764, 0x8aa9409f16cdbc9b
-data8 0x8ace0d6bbe2cb316, 0x8af301688ab33558
-data8 0x8b181cdebe6f3206, 0x8b3d60185fafcb7c
-data8 0x8b62cb603bb2fad0, 0x8b80d7d6bc4104de
-data8 0x8ba68bf73ac74f39, 0x8bcc68fb9f9f7335
-data8 0x8bf26f31c534fca2, 0x8c10f86e13a1a1f9
-data8 0x8c3749916cc6abb5, 0x8c5dc4c4f7706032
-data8 0x8c7cac3a8c42e3e0, 0x8ca373f1b7bf2716
-data8 0x8cc29907fb951294, 0x8ce9ae4e9492aac8
-data8 0x8d0911dddbfdad0e, 0x8d3075c4f20f04ee
-data8 0x8d5018a9d4de77d5, 0x8d77cc47dd143515
-data8 0x8d97af6352739cb7, 0x8db7af523167800f
-data8 0x8ddfd80bc68c32ff, 0x8e00197e1e7c88fe
-data8 0x8e207859f77e20e7, 0x8e40f4ce60c9f8e2
-data8 0x8e69ba46cf2fde4d, 0x8e8a7a00bd7ae63e
-data8 0x8eab57ef1cf2f529, 0x8ecc5442cffb1dad
-data8 0x8eed6f2d2a4acbfe, 0x8f0ea8dff24441ff
-data8 0x8f385c95d696b817, 0x8f59dc43edd930f3
-data8 0x8f7b7b5f5ffad1c4, 0x8f9d3a1bea165f38
-data8 0x8fbf18adc34b66da, 0x8fe117499e356095
-data8 0x90033624aa685f8d, 0x9025757495f36b86
-data8 0x903f3a5dcc091203, 0x9061b2fceb2bdbab
-data8 0x90844ca7211032a7, 0x90a7079403e6a15d
-data8 0x90c9e3fbafd63799, 0x90ece216c8a16ee4
-data8 0x9110021e7b516f0a, 0x912a708a39be9075
-data8 0x914dcc7b31146370, 0x91714af8cfe984d5
-data8 0x918c00a6f3795e97, 0x91afbc299ed0295d
-data8 0x91d39add3e958db0, 0x91ee9920a8974d92
-data8 0x9212b5fcac537c19, 0x9236f6b256923fcf
-data8 0x92523ee6f90dcfc3, 0x9276bef031e6eb79
-data8 0x929236ec237a24ad, 0x92b6f70b7efe9dc3
-data8 0x92d29f61eec7dc2b, 0x92f7a05d5b8ba92f
-data8 0x931379a403be5c16, 0x9338bc44de2e3f34
-data8 0x9354c71412c69486, 0x937a4c273907e262
-data8 0x93968919f6e7975d, 0x93bc516fdd4680c9
-data8 0x93d8c123d9be59b2, 0x93f546c955e60076
-data8 0x941b70a65879079f, 0x943829f337410591
-data8 0x9454f995765bc4d2, 0x947b86b57f5842ed
-data8 0x94988aeb23470f86, 0x94b5a5dc9695f42a
-data8 0x94d2d7a9170d8b42, 0x94f9e87dd78bf019
-data8 0x95175019a503d89e, 0x9534cefa625fcb3a
-data8 0x955265405c491a25, 0x9570130c1f9bb857
-data8 0x9597ca4119525184, 0x95b5af6fb5aa4d3c
-data8 0x95d3ac9273aafd7a, 0x95f1c1cafdfd3684
-data8 0x960fef3b430b8d5f, 0x962e350575b409c5
-data8 0x964c934c0dfc1708, 0x966b0a31c9c6bc7d
-data8 0x968999d9ad8d264e, 0x96a8426705198795
-data8 0x96c703fd64445ee5, 0x96e5dec0a7b4268d
-data8 0x9704d2d4f59f79f3, 0x9723e05ebe91b9b0
-data8 0x97430782be323831, 0x97624865fc0df8bf
-data8 0x9781a32dcc640b2a, 0x97a117ffd0f48e46
-data8 0x97c0a701f9d263c9, 0x97e0505a8637a036
-data8 0x97f57a9fb0b08c6e, 0x9815503365914a9d
-data8 0x98354085054fd204, 0x98554bbbf8a77902
-data8 0x987571fffb7f94f6, 0x9895b3791dd03c23
-data8 0x98ab43a5fc65d0c8, 0x98cbb2d196bd713d
-data8 0x98ec3d9ec7b6f21a, 0x990ce436db5e8344
-data8 0x9922b8218160967a, 0x99438d686f75779d
-data8 0x99647eea131fa20b, 0x997a85045a47c6d0
-data8 0x999ba5f14f8add02, 0x99bce38b5465ecae
-data8 0x99d31ca0887f30f9, 0x99f48a669c74c09e
-data8 0x9a16154eb445c873, 0x9a2c822ec198d667
-data8 0x9a4e3e080cd91b78, 0x9a70177afe52322e
-data8 0x9a86b8fa94eebe10, 0x9aa8c42866ae2958
-data8 0x9abf86f9e12fc45e, 0x9ae1c462fc05f49d
-data8 0x9af8a8dc936b84d0, 0x9b1b19033be35730
-data8 0x9b3da7daf04c2892, 0x9b54c2e4c8a9012b
-data8 0x9b77854e6c661200, 0x9b8ec2e678d56d2f
-data8 0x9ba60e6a5ca133b6, 0x9bc919ea66a151a4
-data8 0x9be0887c09ef82bb, 0x9c03c8d5fffc3503
-data8 0x9c1b5ad21a81cbb9, 0x9c3ed09216e9ca02
-data8 0x9c568656c0423def, 0x9c7a320af242ce60
-data8 0x9c920bf7a8c01dc2, 0x9ca9f475d98b159c
-data8 0x9ccdeca60e80b5f8, 0x9ce5f9d4653d4902
-data8 0x9cfe15cb38bfdd8e, 0x9d225b983f6c1f96
-data8 0x9d3a9cca32261ed7, 0x9d52ecfccebe1768
-data8 0x9d77818d95b82f86, 0x9d8ff7893fa4706c
-data8 0x9da87cbef36f2a5e, 0x9dcd6140b4a35aeb
-data8 0x9de60cd06dc6e2d4, 0x9dfec7d4cc43b76f
-data8 0x9e17925ec9fccc4a, 0x9e3cdf6db57dc075
-data8 0x9e55d110b63637a8, 0x9e6ed27594550d2e
-data8 0x9e87e3adc385d393, 0x9ead9b54b37a1055
-data8 0x9ec6d46a3d7de215, 0x9ee01d9108be3154
-data8 0x9ef976db07288d04, 0x9f12e05a4759ec25
-data8 0x9f2c5a20f4da6668, 0x9f52af78ed1733ca
-data8 0x9f6c52426a39d003, 0x9f860593d42fd7f3
-data8 0x9f9fc97fdb96bd51, 0x9fb99e194f4a7037
-data8 0x9fd383731ca51db9, 0x9fed79a04fbf9423
-data8 0xa00780b413b24ee8, 0xa02eab2c4474b0cd
-data8 0xa048dcd51ccfd142, 0xa0631fa894b11b8d
-data8 0xa07d73ba65e680af, 0xa097d91e6aaf71b0
-data8 0xa0b24fe89e02602f, 0xa0ccd82d1bd2f68b
-data8 0xa0e77200215909e6, 0xa1021d760d584855
-data8 0xa11cdaa36068a57d, 0xa137a99cbd3f880b
-data8 0xa160019ed37fb4ae, 0xa1960b5966da4608
-data8 0xa1cc5dbe6dc2aab4, 0xa202f97995b69c0d
-data8 0xa232fe6eb0c0577d, 0xa26a2582012f6e17
-data8 0xa2a197e5d10465cb, 0xa2d25a532efefbc8
-data8 0xa30a5bd6e49e4ab8, 0xa33b9c9b59879e24
-data8 0xa3742fca6a3c1f21, 0xa3a5f1273887bf22
-data8 0xa3d7ef508ff11574, 0xa4115ce30548bc15
-data8 0xa443df0e53df577a, 0xa4769fa5913c0ec3
-data8 0xa4a99f303bc7def5, 0xa4dcde37779adf4b
-data8 0xa5105d46152c938a, 0xa5441ce89825cb8d
-data8 0xa5781dad3e54d899, 0xa5ac602406c4e68c
-data8 0xa5d9601d95c2c0bc, 0xa60e1e1a2de14745
-data8 0xa6431f6e3fbd9658, 0xa67864b0d432fda4
-data8 0xa6a6444aa0243c0b, 0xa6dc094d10f25792
-data8 0xa70a574cc02bba69, 0xa7409e2af9549084
-data8 0xa76f5c64ca2cf13b, 0xa79e4f0babab5dc0
-data8 0xa7d5579ae5164b85, 0xa804bd3c6fe61cc8
-data8 0xa8345895e5250a5a, 0xa8642a122b44ef0b
-data8 0xa89c38ca18f6108b, 0xa8cc81063b6e87ca
-data8 0xa8fd00bfa409285e, 0xa92db8664d5516da
-data8 0xa95ea86b75cc2c20, 0xa98fd141a4992deb
-data8 0xa9c1335cae7446ba, 0xa9ea8686f556f645
-data8 0xaa1c52d17906bb19, 0xaa4e59b046dab887
-data8 0xaa809b9c60d1890b, 0xaab319102f3f9b33
-data8 0xaadd5a18c1e21274, 0xab1045f2ac31bdf5
-data8 0xab3ae3ab2df7231e, 0xab6e3f945d1e96fc
-data8 0xaba1d953a08fa94e, 0xabcd090db7ef4c3f
-data8 0xabf864602d7c323d, 0xac2ca5886ccf9b57
-data8 0xac5861d4aa441f0f, 0xac8d183fe3a2fbed
-data8 0xacb93703ff51571e, 0xace5830ad0c3f14b
-data8 0xad11fca5d78b3ff2, 0xad4797fddf91a798
-data8 0xad747701e559ebcb, 0xada184a47e9c7613
-data8 0xadcec13ab0dda8ff, 0xadfc2d1a5fd21ba8
-data8 0xae29c89a5053c33a, 0xae5794122b638df9
-data8 0xae858fda8137ae0a, 0xaeb3bc4ccc56d3d1
-data8 0xaee219c374c09920, 0xaf10a899d3235fe7
-data8 0xaf3f692c341fe8b4, 0xaf6e5bd7db9ae6c2
-data8 0xaf9d80fb081cd91b, 0xafc35ce063eb3787
-data8 0xaff2ddcb5f28f03d, 0xb022923b148e05c5
-data8 0xb0527a919adbf58b, 0xb078f3ab1d701c65
-data8 0xb0a93a6870649f31, 0xb0d9b624d62ec856
-data8 0xb100a5f53fb3c8e1, 0xb131821882f5540a
-data8 0xb158bf8e4cb04055, 0xb189fd69d56b238f
-data8 0xb1b189958e8108e4, 0xb1e32a8165b09832
-data8 0xb20b0678fc271eec, 0xb23d0bd3f7592b6e
-data8 0xb26538b2db8420dc, 0xb28d89e339ceca14
-data8 0xb2c022ca12e55a16, 0xb2e8c6852c6b03f1
-data8 0xb3118f4eda9fe40f, 0xb33a7d6268109ebe
-data8 0xb36ddbc5ea70ec55, 0xb3971e9b39264023
-data8 0xb3c0877ecc18e24a, 0xb3ea16ae3a6c905f
-data8 0xb413cc67aa0e4d2d, 0xb43da8e9d163e1af
-data8 0xb47233773b84d425, 0xb49c6825430fe730
-data8 0xb4c6c46bcdb27dcf, 0xb4f1488c0b35d26f
-data8 0xb51bf4c7c51f0168, 0xb546c9616087ab9c
-data8 0xb571c69bdffd9a70, 0xb59cecbae56984c3
-data8 0xb5bd64512bb14bb7, 0xb5e8d2a4bf5ba416
-data8 0xb6146a9a1bc47819, 0xb6402c7749d621c0
-data8 0xb66c1882fb435ea2, 0xb6982f048c999a56
-data8 0xb6c47044075b4142, 0xb6e5bd6bfd02bafd
-data8 0xb7124a2736ff8ef2, 0xb73f026a01e94177
-data8 0xb760a959f1d0a7a7, 0xb78dae7e06868ab0
-data8 0xb7badff8ad9e4e02, 0xb7dce25b8e17ae9f
-data8 0xb80a6226904045e2, 0xb8380f1cafd73c1c
-data8 0xb85a6ea8e321b4d8, 0xb8886b684ae7d2fa
-data8 0xb8ab0726fa00cf5d, 0xb8d954a4d13b7cb1
-data8 0xb8fc2d4f6cd9f04a, 0xb92acc851476b1ab
-data8 0xb94de2d841a184c2, 0xb97cd4c36c92693c
-data8 0xb9a0297f172665e3, 0xb9cf6f21e36c3924
-data8 0xb9f3030951267208, 0xba229d6a618e7c59
-data8 0xba467144459f9855, 0xba6a60c3c48f1a4b
-data8 0xba9a76056b67ee7a, 0xbabea699563ada6e
-data8 0xbae2f350b262cc4b, 0xbb1385a23be24e57
-data8 0xbb3814975e17c680, 0xbb5cc031009bf467
-data8 0xbb81889680024764, 0xbbb2c0d8703ae95d
-data8 0xbbd7cd09ba3c5463, 0xbbfcf68c4977718f
-data8 0xbc223d88cfc88eee, 0xbc47a2284fee4ff8
-data8 0xbc79ac0916ed7b8a, 0xbc9f5670d1a13030
-data8 0xbcc51f068cb95c1d, 0xbceb05f4b30a9bc0
-data8 0xbd110b6604c7d306, 0xbd372f8598620f19
-data8 0xbd5d727edb6b3c7e, 0xbd83d47d937bbc6d
-data8 0xbdaa55addf1ae47d, 0xbdd0f63c36aa73f0
-data8 0xbdf7b6556d550a15, 0xbe1e9626b1ffa96b
-data8 0xbe4595dd903e5371, 0xbe6cb5a7f14bc935
-data8 0xbe93f5b41d047cf7, 0xbebb5630bae4c15f
-data8 0xbee2d74cd30a430c, 0xbf0a7937cf38d981
-data8 0xbf323c217be2bc8c, 0xbf5a203a09342bbb
-data8 0xbf74cad1c14ebfc4, 0xbf9ce6a497a89f78
-data8 0xbfc52428bec6e72f, 0xbfed838fddab024b
-data8 0xc016050c0420981a, 0xc03ea8cfabddc330
-data8 0xc059d3cbd65ddbce, 0xc082b122a3c78c9d
-data8 0xc0abb1499ae736c4, 0xc0d4d474c3aedaaf
-data8 0xc0f054ca33eb3437, 0xc119b2c67e600ed0
-data8 0xc1433453de2033ff, 0xc15ef3e44e10032d
-data8 0xc188b130431d80e6, 0xc1b2929d6067730e
-data8 0xc1ce9268f31cc734, 0xc1f8b0877c1b0c08
-data8 0xc222f35a87b415ba, 0xc23f3467349e5c88
-data8 0xc269b4e40e088c01, 0xc2945aac24daaf6e
-data8 0xc2b0de05e43c1d66, 0xc2dbc275e1229d09
-data8 0xc2f86fca9d80eeff, 0xc323938449a2587e
-data8 0xc3406b40a538ed20, 0xc36bcee8211d15e0
-data8 0xc397593adf2ba366, 0xc3b475b6206155d5
-data8 0xc3e0410243b97383, 0xc3fd890709833d37
-data8 0xc41ae295f7e7fa06, 0xc44709f7bb8a4dd2
-data8 0xc4648fb0e0bec4c1, 0xc490f9a94695ba14
-data8 0xc4aeac0173b7d390, 0xc4db5941007aa853
-data8 0xc4f938aec206291a, 0xc52629e899dfd622
-data8 0xc54436e44043b965, 0xc562563abf9ea07f
-data8 0xc58fa7d1dc42921c, 0xc5adf561b91e110a
-data8 0xc5cc5591bdbd82fa, 0xc5fa08f1ff20593c
-data8 0xc618980a79ce6862, 0xc6373a09e34b50fa
-data8 0xc66550a6e0baaf35, 0xc6842241926342c9
-data8 0xc6a3070b7c93bb9e, 0xc6d18260bb84081b
-data8 0xc6f0977c9416828b, 0xc70fc0117c641630
-data8 0xc72efc34d7e615be, 0xc75dfb441594141e
-data8 0xc77d68aa019bda4c, 0xc79ce9ea478dbc4f
-data8 0xc7bc7f1ae453219d, 0xc7ec0476e15e141a
-data8 0xc80bcbe16f1d540f, 0xc82ba78a5d349735
-data8 0xc84b978847a06b87, 0xc86b9bf1ee817bc6
-data8 0xc88bb4de3667cdf4, 0xc8bc00e7fe9e23a3
-data8 0xc8dc4d7ff2d25232, 0xc8fcaeebcb40eb47
-data8 0xc91d25431426a663, 0xc93db09d7fdb2949
-data8 0xc95e5112e721582a, 0xc97f06bb49787677
-data8 0xc99fd1aecd6e1b06, 0xc9d12a3e27bb1625
-data8 0xc9f22ad82ba3d5f0, 0xca134113105e67b2
-data8 0xca346d07b045a876, 0xca55aecf0e94bb88
-data8 0xca77068257be9bab, 0xca98743ae1c693a8
-data8 0xcab9f8122c99a101, 0xcadb9221e268c3b5
-data8 0xcafd4283d8043dfd, 0xcb1f09520d37c6fb
-data8 0xcb51ddcb9e93095e, 0xcb95f333968ad59b
-data8 0xcbda64292d3ffd97, 0xcc1f3184af961596
-data8 0xcc5bb1ac954d33e2, 0xcca12e9831fc6402
-data8 0xcce70a67b64f24ad, 0xcd24794726477ea5
-data8 0xcd6b096a0b70ee87, 0xcda9177738b15a90
-data8 0xcdf05f2247dffab9, 0xce2f0f347f96f906
-data8 0xce6e0be0cd551a61, 0xceb666b2c347d1de
-data8 0xcef609b0cb874f00, 0xcf35fb5447e5c765
-data8 0xcf763c47ee869f00, 0xcfb6cd3888d71785
-data8 0xcff7aed4fbfbb447, 0xd038e1ce5167e3c6
-data8 0xd07a66d7bfa0ebba, 0xd0bc3ea6b32d1b21
-data8 0xd0f4f0e8f36c1bf8, 0xd1376458e34b037e
-data8 0xd17a2ca133f78572, 0xd1bd4a80301c5715
-data8 0xd1f71682b2fa4575, 0xd23ad555f773f059
-data8 0xd2752c7039a5bf73, 0xd2b98ee008c06b59
-data8 0xd2f4735ffd700280, 0xd32f99ed6d9ac0e1
-data8 0xd374f0666c75d51c, 0xd3b0a7d13618e4a1
-data8 0xd3eca2ea53bcec0c, 0xd428e23874f13a17
-data8 0xd46f82fe293bc6d3, 0xd4ac57e9b7186420
-data8 0xd4e972becb04e8b8, 0xd526d40a7a9b43a3
-data8 0xd5647c5b73917370, 0xd5a26c4201bd6d13
-data8 0xd5e0a45015350a7e, 0xd614b539c6194104
-data8 0xd6537310e224283f, 0xd6927ab62244c917
-data8 0xd6d1ccc1fc4ef4b7, 0xd71169cea98fdded
-data8 0xd746a66a5bc9f6d9, 0xd786ce8f0fae5317
-data8 0xd7bc7ff214c4e75a, 0xd7fd35467a517ed1
-data8 0xd83e38838648d815, 0xd874a1db598b8951
-data8 0xd8ab42205b80edaf, 0xd8ed1849d202f965
-data8 0xd92432bd5a173685, 0xd9669ca45b03c23e
-data8 0xd99e3327cf89574e, 0xd9d602b19b100466
-data8 0xda0e0ba86c096841, 0xda5195fcdb1c3dce
-data8 0xda8a1eb87a491f6c, 0xdac2e230b91c3f84
-data8 0xdafbe0d0b66aea30, 0xdb351b04a8fafced
-data8 0xdb6e9139e33cdd8e, 0xdba843ded7151ea1
-data8 0xdbe2336319b61fc8, 0xdc1c60376789fa68
-data8 0xdc56cacda82d0cd5, 0xdc917398f2797814
-data8 0xdccc5b0d90a3e628, 0xdd0781a10469f0f2
-data8 0xdd42e7ca0b52838f, 0xdd729ad01c69114d
-data8 0xddae749c001fbf5e, 0xddea8f50a51c69b1
-data8 0xde26eb69a0f0f111, 0xde576480262399bc
-data8 0xde943789645933c8, 0xded14d58139a28af
-data8 0xdf025c00bbf2b5c7, 0xdf3feb44d723a713
-data8 0xdf715bc16c159be0, 0xdfaf66240e29cda8
-data8 0xdfe139cbf6e19bdc, 0xe01fc0fe94d9fc52
-data8 0xe051f92ffcc0bd60, 0xe090feec9c9a06ac
-data8 0xe0c39d0c9ff862d6, 0xe0f668eeb99f188d
-data8 0xe1362890eb663139, 0xe1695c7212aecbaa
-data8 0xe19cbf0391bbbbe9, 0xe1d050901c531e85
-data8 0xe2110903b4f4047a, 0xe2450559b4d80b6d
-data8 0xe27931a231554ef3, 0xe2ad8e2ac3c5b04b
-data8 0xe2e21b41b9694cce, 0xe316d93615862714
-data8 0xe3590bd86a0d30f9, 0xe38e38e38e38e38e
-data8 0xe3c397d1e6db7839, 0xe3f928f5953feb9e
-data8 0xe42eeca17c62886c, 0xe464e32943446305
-data8 0xe49b0ce15747a8a2, 0xe4d16a1eee94e9d4
-data8 0xe4fa52107353f67d, 0xe5310a471f4d2dc3
-data8 0xe567f6f1c2b9c224, 0xe59f18689a9e4c9a
-data8 0xe5d66f04b8a68ecf, 0xe60dfb2005c192e9
-data8 0xe645bd1544c7ea51, 0xe66fb21b505b20a0
-data8 0xe6a7d32af4a7c59a, 0xe6e02b129c6a5ae4
-data8 0xe70a9136a7403039, 0xe74349fb2d92a589
-data8 0xe77c3a9c86ed7d42, 0xe7a713f88151518a
-data8 0xe7e067453317ed2b, 0xe819f37a81871bb5
-data8 0xe8454236bfaeca14, 0xe87f32f24c3fc90e
-data8 0xe8aacd8688892ba6, 0xe8e523fd32f606f7
-data8 0xe9110b5311407927, 0xe94bc8bf0c108fa3
-data8 0xe977fdc439c2ca3c, 0xe9b3236528fc349e
-data8 0xe9dfa70b745ac1b4, 0xea1b36268d0eaa38
-data8 0xea480963fd394197, 0xea84034425f27484
-data8 0xeab12713138dd1cc, 0xeade6db73a5e503b
-data8 0xeb1b0268343b121b, 0xeb489b0b2bdb5f14
-data8 0xeb765721e85f03d0, 0xebb389645f222f62
-data8 0xebe198f090607e0c, 0xec0fcc9321024509
-data8 0xec3e247da8b82f61, 0xec7c27d21321c9f7
-data8 0xecaad5278824e453, 0xecd9a76d097d4e77
-data8 0xed089ed5dcd99446, 0xed37bb95add09a1c
-data8 0xed76c70508f904b6, 0xeda63bb05e7f93c6
-data8 0xedd5d661daed2dc4, 0xee05974eef86b903
-data8 0xee357ead791fc670, 0xee658cb3c134a463
-data8 0xee95c1987f080211, 0xeec61d92d8c4314f
-data8 0xeef6a0da64a014ac, 0xef274ba72a07c811
-data8 0xef581e31a2c91260, 0xef8918b2bc43aec6
-data8 0xefba3b63d89d7cbf, 0xefeb867ecffaa607
-data8 0xf01cfa3df1b9c9fa, 0xf04e96dc05b43e2d
-data8 0xf0805c944d827454, 0xf0b24ba285c495cb
-data8 0xf0e46442e76f6569, 0xf116a6b2291d7896
-data8 0xf1383fa9e9b5b381, 0xf16ac84f90083b9b
-data8 0xf19d7b686dcb03d7, 0xf1d0593311db1757
-data8 0xf20361ee8f1c711e, 0xf23695da7de51d3f
-data8 0xf258d095e465cc35, 0xf28c4d0bfc982b34
-data8 0xf2bff55eb3f0ea71, 0xf2f3c9cf9884636e
-data8 0xf31670135ab9cc0f, 0xf34a8e9f0b54cdfb
-data8 0xf37ed9fa6b8add3f, 0xf3a1cfe884ef6bb6
-data8 0xf3d66689dcc8e8d3, 0xf40b2ab069d5c96a
-data8 0xf42e718b90c8bc16, 0xf463822a0a3b4b00
-data8 0xf498c1076015faf8, 0xf4bc5a19a33990b5
-data8 0xf4f1e6a7d6f5425f, 0xf527a232cf6be334
-data8 0xf54b8ecdcda90851, 0xf5819949c7ad87b4
-data8 0xf5a5bac9213b48a9, 0xf5dc1501f324a812
-data8 0xf6006bee86b5589e, 0xf63716b2fa067fa4
-data8 0xf66df22fb6132b9c, 0xf6929fb98225deb1
-data8 0xf6c9cd13021e3fea, 0xf6eeb177472cedae
-data8 0xf713abf4cb0b3afb, 0xf74b4d5333684ef1
-data8 0xf7707f75a72f8e94, 0xf7a874b97927af44
-data8 0xf7cddf140aedf1d8, 0xf806291bacb7f7a9
-data8 0xf82bcc43b92eafef, 0xf8646bf0defb759e
-data8 0xf88a487dfc3ff5f7, 0xf8b03c2b46cdc17f
-data8 0xf8e95541c152ae7a, 0xf90f832c2700c160
-data8 0xf935c88e0c7f419b, 0xf96f5cd84fd86873
-data8 0xf995dd53ebdd9d6d, 0xf9bc75a034436a41
-data8 0xf9f686f26d5518de, 0xfa1d5b39b910a8c5
-data8 0xfa4447acc4ecbfd2, 0xfa7ed7e51e6fdfb4
-data8 0xfaa601394d49a1a0, 0xfacd431644ce0e40
-data8 0xfaf49d96f7a75909, 0xfb2fd3c65e562fd5
-data8 0xfb576c5762024805, 0xfb7f1debc22c4040
-data8 0xfba6e89f32d0190a, 0xfbe2c803a0894893
-data8 0xfc0ad1ff0ed9ecf0, 0xfc32f57bdfbcbe7f
-data8 0xfc5b32968f99b21c, 0xfc83896bc861ab08
-data8 0xfcabfa1861ed4815, 0xfce8d3cea7d3163e
-data8 0xfd118595143ee273, 0xfd3a519943d4865a
-data8 0xfd6337f8e1ae5a4b, 0xfd8c38d1c8e927eb
-data8 0xfdb5544205095a53, 0xfdde8a67d2613531
-data8 0xfe07db619e781611, 0xfe460768d80bf758
-data8 0xfe6f9bfb06cd32f6, 0xfe994bcd3d14fcc2
-data8 0xfec316fecaf3f2ab, 0xfeecfdaf33fadb80
-data8 0xff16fffe2fa8fad6, 0xff411e0ba9db886d
-data8 0xff6b57f7c33e4e9a, 0xff95ade2d1bd7358
-data8 0xffc01fed60f86fb5, 0xffeaae3832b63956
+ data8 0x80155c748c374836, 0x8040404b0879f7f9
+ data8 0x806b5dce4b405c10, 0x8096b586974669b1
+ data8 0x80bcd273d952a028, 0x80e898c52813f2f3
+ data8 0x81149add67c2d208, 0x813b4e2c856b6e9a
+ data8 0x8167c1dde03de7aa, 0x818ed973b811135e
+ data8 0x81bbc0c33e13ec98, 0x81e33e69fbe7504a
+ data8 0x820aec524e3c23e9, 0x823880f78e70b805
+ data8 0x826097a62a8e5200, 0x8288dfe00e9b5eaf
+ data8 0x82b15a10c5371624, 0x82da06a527b18937
+ data8 0x8302e60b635ab394, 0x832bf8b2feec2f0e
+ data8 0x83553f0ce00e276b, 0x837eb98b50f8322a
+ data8 0x83a270f44c84f699, 0x83cc4d7cfcfac5ca
+ data8 0x83f65f78a8872b4c, 0x8420a75f2f7b53c8
+ data8 0x844510461ff14209, 0x846fbd91b930bed2
+ data8 0x84947e18234f3294, 0x84bf92755825045a
+ data8 0x84e4ac0ee112ba51, 0x8509ef44b86f20be
+ data8 0x85359d5d91768427, 0x855b3bd5b7384357
+ data8 0x858104f0c415f79a, 0x85a6f90390d29864
+ data8 0x85d3772fcd56a1dd, 0x85f9c982fcc002f3
+ data8 0x862047e0e7ea554b, 0x8646f2a26f7f5852
+ data8 0x866dca21754096b5, 0x8694ceb8dfd17a37
+ data8 0x86bc00c49e9307e8, 0x86dccd74fce79610
+ data8 0x870453c845acf90f, 0x872c089a1e90342c
+ data8 0x8753ec4a92d16c5e, 0x877bff3aca19f6b4
+ data8 0x879d88b6fe1c324c, 0x87c5f346dbf98c3a
+ data8 0x87e7c653efacef2c, 0x881089d4e73ffefc
+ data8 0x88397e6a366f2a8a, 0x885bc559e5e1c081
+ data8 0x887e2ee392bb7a93, 0x88a7a8587e404257
+ data8 0x88ca5eda67594784, 0x88f4356166bd590e
+ data8 0x89173a0acf5ce026, 0x893a62a098b6a57b
+ data8 0x895daf637236ae2c, 0x89883b9d1c2fa9c5
+ data8 0x89abd8dd374a5d7b, 0x89cf9b1dcd197fa0
+ data8 0x89f382a258ea79de, 0x8a178faf06648f29
+ data8 0x8a3bc288b3e1d18a, 0x8a601b74f4d1f835
+ data8 0x8a849aba14274764, 0x8aa9409f16cdbc9b
+ data8 0x8ace0d6bbe2cb316, 0x8af301688ab33558
+ data8 0x8b181cdebe6f3206, 0x8b3d60185fafcb7c
+ data8 0x8b62cb603bb2fad0, 0x8b80d7d6bc4104de
+ data8 0x8ba68bf73ac74f39, 0x8bcc68fb9f9f7335
+ data8 0x8bf26f31c534fca2, 0x8c10f86e13a1a1f9
+ data8 0x8c3749916cc6abb5, 0x8c5dc4c4f7706032
+ data8 0x8c7cac3a8c42e3e0, 0x8ca373f1b7bf2716
+ data8 0x8cc29907fb951294, 0x8ce9ae4e9492aac8
+ data8 0x8d0911dddbfdad0e, 0x8d3075c4f20f04ee
+ data8 0x8d5018a9d4de77d5, 0x8d77cc47dd143515
+ data8 0x8d97af6352739cb7, 0x8db7af523167800f
+ data8 0x8ddfd80bc68c32ff, 0x8e00197e1e7c88fe
+ data8 0x8e207859f77e20e7, 0x8e40f4ce60c9f8e2
+ data8 0x8e69ba46cf2fde4d, 0x8e8a7a00bd7ae63e
+ data8 0x8eab57ef1cf2f529, 0x8ecc5442cffb1dad
+ data8 0x8eed6f2d2a4acbfe, 0x8f0ea8dff24441ff
+ data8 0x8f385c95d696b817, 0x8f59dc43edd930f3
+ data8 0x8f7b7b5f5ffad1c4, 0x8f9d3a1bea165f38
+ data8 0x8fbf18adc34b66da, 0x8fe117499e356095
+ data8 0x90033624aa685f8d, 0x9025757495f36b86
+ data8 0x903f3a5dcc091203, 0x9061b2fceb2bdbab
+ data8 0x90844ca7211032a7, 0x90a7079403e6a15d
+ data8 0x90c9e3fbafd63799, 0x90ece216c8a16ee4
+ data8 0x9110021e7b516f0a, 0x912a708a39be9075
+ data8 0x914dcc7b31146370, 0x91714af8cfe984d5
+ data8 0x918c00a6f3795e97, 0x91afbc299ed0295d
+ data8 0x91d39add3e958db0, 0x91ee9920a8974d92
+ data8 0x9212b5fcac537c19, 0x9236f6b256923fcf
+ data8 0x92523ee6f90dcfc3, 0x9276bef031e6eb79
+ data8 0x929236ec237a24ad, 0x92b6f70b7efe9dc3
+ data8 0x92d29f61eec7dc2b, 0x92f7a05d5b8ba92f
+ data8 0x931379a403be5c16, 0x9338bc44de2e3f34
+ data8 0x9354c71412c69486, 0x937a4c273907e262
+ data8 0x93968919f6e7975d, 0x93bc516fdd4680c9
+ data8 0x93d8c123d9be59b2, 0x93f546c955e60076
+ data8 0x941b70a65879079f, 0x943829f337410591
+ data8 0x9454f995765bc4d2, 0x947b86b57f5842ed
+ data8 0x94988aeb23470f86, 0x94b5a5dc9695f42a
+ data8 0x94d2d7a9170d8b42, 0x94f9e87dd78bf019
+ data8 0x95175019a503d89e, 0x9534cefa625fcb3a
+ data8 0x955265405c491a25, 0x9570130c1f9bb857
+ data8 0x9597ca4119525184, 0x95b5af6fb5aa4d3c
+ data8 0x95d3ac9273aafd7a, 0x95f1c1cafdfd3684
+ data8 0x960fef3b430b8d5f, 0x962e350575b409c5
+ data8 0x964c934c0dfc1708, 0x966b0a31c9c6bc7d
+ data8 0x968999d9ad8d264e, 0x96a8426705198795
+ data8 0x96c703fd64445ee5, 0x96e5dec0a7b4268d
+ data8 0x9704d2d4f59f79f3, 0x9723e05ebe91b9b0
+ data8 0x97430782be323831, 0x97624865fc0df8bf
+ data8 0x9781a32dcc640b2a, 0x97a117ffd0f48e46
+ data8 0x97c0a701f9d263c9, 0x97e0505a8637a036
+ data8 0x97f57a9fb0b08c6e, 0x9815503365914a9d
+ data8 0x98354085054fd204, 0x98554bbbf8a77902
+ data8 0x987571fffb7f94f6, 0x9895b3791dd03c23
+ data8 0x98ab43a5fc65d0c8, 0x98cbb2d196bd713d
+ data8 0x98ec3d9ec7b6f21a, 0x990ce436db5e8344
+ data8 0x9922b8218160967a, 0x99438d686f75779d
+ data8 0x99647eea131fa20b, 0x997a85045a47c6d0
+ data8 0x999ba5f14f8add02, 0x99bce38b5465ecae
+ data8 0x99d31ca0887f30f9, 0x99f48a669c74c09e
+ data8 0x9a16154eb445c873, 0x9a2c822ec198d667
+ data8 0x9a4e3e080cd91b78, 0x9a70177afe52322e
+ data8 0x9a86b8fa94eebe10, 0x9aa8c42866ae2958
+ data8 0x9abf86f9e12fc45e, 0x9ae1c462fc05f49d
+ data8 0x9af8a8dc936b84d0, 0x9b1b19033be35730
+ data8 0x9b3da7daf04c2892, 0x9b54c2e4c8a9012b
+ data8 0x9b77854e6c661200, 0x9b8ec2e678d56d2f
+ data8 0x9ba60e6a5ca133b6, 0x9bc919ea66a151a4
+ data8 0x9be0887c09ef82bb, 0x9c03c8d5fffc3503
+ data8 0x9c1b5ad21a81cbb9, 0x9c3ed09216e9ca02
+ data8 0x9c568656c0423def, 0x9c7a320af242ce60
+ data8 0x9c920bf7a8c01dc2, 0x9ca9f475d98b159c
+ data8 0x9ccdeca60e80b5f8, 0x9ce5f9d4653d4902
+ data8 0x9cfe15cb38bfdd8e, 0x9d225b983f6c1f96
+ data8 0x9d3a9cca32261ed7, 0x9d52ecfccebe1768
+ data8 0x9d77818d95b82f86, 0x9d8ff7893fa4706c
+ data8 0x9da87cbef36f2a5e, 0x9dcd6140b4a35aeb
+ data8 0x9de60cd06dc6e2d4, 0x9dfec7d4cc43b76f
+ data8 0x9e17925ec9fccc4a, 0x9e3cdf6db57dc075
+ data8 0x9e55d110b63637a8, 0x9e6ed27594550d2e
+ data8 0x9e87e3adc385d393, 0x9ead9b54b37a1055
+ data8 0x9ec6d46a3d7de215, 0x9ee01d9108be3154
+ data8 0x9ef976db07288d04, 0x9f12e05a4759ec25
+ data8 0x9f2c5a20f4da6668, 0x9f52af78ed1733ca
+ data8 0x9f6c52426a39d003, 0x9f860593d42fd7f3
+ data8 0x9f9fc97fdb96bd51, 0x9fb99e194f4a7037
+ data8 0x9fd383731ca51db9, 0x9fed79a04fbf9423
+ data8 0xa00780b413b24ee8, 0xa02eab2c4474b0cd
+ data8 0xa048dcd51ccfd142, 0xa0631fa894b11b8d
+ data8 0xa07d73ba65e680af, 0xa097d91e6aaf71b0
+ data8 0xa0b24fe89e02602f, 0xa0ccd82d1bd2f68b
+ data8 0xa0e77200215909e6, 0xa1021d760d584855
+ data8 0xa11cdaa36068a57d, 0xa137a99cbd3f880b
+ data8 0xa160019ed37fb4ae, 0xa1960b5966da4608
+ data8 0xa1cc5dbe6dc2aab4, 0xa202f97995b69c0d
+ data8 0xa232fe6eb0c0577d, 0xa26a2582012f6e17
+ data8 0xa2a197e5d10465cb, 0xa2d25a532efefbc8
+ data8 0xa30a5bd6e49e4ab8, 0xa33b9c9b59879e24
+ data8 0xa3742fca6a3c1f21, 0xa3a5f1273887bf22
+ data8 0xa3d7ef508ff11574, 0xa4115ce30548bc15
+ data8 0xa443df0e53df577a, 0xa4769fa5913c0ec3
+ data8 0xa4a99f303bc7def5, 0xa4dcde37779adf4b
+ data8 0xa5105d46152c938a, 0xa5441ce89825cb8d
+ data8 0xa5781dad3e54d899, 0xa5ac602406c4e68c
+ data8 0xa5d9601d95c2c0bc, 0xa60e1e1a2de14745
+ data8 0xa6431f6e3fbd9658, 0xa67864b0d432fda4
+ data8 0xa6a6444aa0243c0b, 0xa6dc094d10f25792
+ data8 0xa70a574cc02bba69, 0xa7409e2af9549084
+ data8 0xa76f5c64ca2cf13b, 0xa79e4f0babab5dc0
+ data8 0xa7d5579ae5164b85, 0xa804bd3c6fe61cc8
+ data8 0xa8345895e5250a5a, 0xa8642a122b44ef0b
+ data8 0xa89c38ca18f6108b, 0xa8cc81063b6e87ca
+ data8 0xa8fd00bfa409285e, 0xa92db8664d5516da
+ data8 0xa95ea86b75cc2c20, 0xa98fd141a4992deb
+ data8 0xa9c1335cae7446ba, 0xa9ea8686f556f645
+ data8 0xaa1c52d17906bb19, 0xaa4e59b046dab887
+ data8 0xaa809b9c60d1890b, 0xaab319102f3f9b33
+ data8 0xaadd5a18c1e21274, 0xab1045f2ac31bdf5
+ data8 0xab3ae3ab2df7231e, 0xab6e3f945d1e96fc
+ data8 0xaba1d953a08fa94e, 0xabcd090db7ef4c3f
+ data8 0xabf864602d7c323d, 0xac2ca5886ccf9b57
+ data8 0xac5861d4aa441f0f, 0xac8d183fe3a2fbed
+ data8 0xacb93703ff51571e, 0xace5830ad0c3f14b
+ data8 0xad11fca5d78b3ff2, 0xad4797fddf91a798
+ data8 0xad747701e559ebcb, 0xada184a47e9c7613
+ data8 0xadcec13ab0dda8ff, 0xadfc2d1a5fd21ba8
+ data8 0xae29c89a5053c33a, 0xae5794122b638df9
+ data8 0xae858fda8137ae0a, 0xaeb3bc4ccc56d3d1
+ data8 0xaee219c374c09920, 0xaf10a899d3235fe7
+ data8 0xaf3f692c341fe8b4, 0xaf6e5bd7db9ae6c2
+ data8 0xaf9d80fb081cd91b, 0xafc35ce063eb3787
+ data8 0xaff2ddcb5f28f03d, 0xb022923b148e05c5
+ data8 0xb0527a919adbf58b, 0xb078f3ab1d701c65
+ data8 0xb0a93a6870649f31, 0xb0d9b624d62ec856
+ data8 0xb100a5f53fb3c8e1, 0xb131821882f5540a
+ data8 0xb158bf8e4cb04055, 0xb189fd69d56b238f
+ data8 0xb1b189958e8108e4, 0xb1e32a8165b09832
+ data8 0xb20b0678fc271eec, 0xb23d0bd3f7592b6e
+ data8 0xb26538b2db8420dc, 0xb28d89e339ceca14
+ data8 0xb2c022ca12e55a16, 0xb2e8c6852c6b03f1
+ data8 0xb3118f4eda9fe40f, 0xb33a7d6268109ebe
+ data8 0xb36ddbc5ea70ec55, 0xb3971e9b39264023
+ data8 0xb3c0877ecc18e24a, 0xb3ea16ae3a6c905f
+ data8 0xb413cc67aa0e4d2d, 0xb43da8e9d163e1af
+ data8 0xb47233773b84d425, 0xb49c6825430fe730
+ data8 0xb4c6c46bcdb27dcf, 0xb4f1488c0b35d26f
+ data8 0xb51bf4c7c51f0168, 0xb546c9616087ab9c
+ data8 0xb571c69bdffd9a70, 0xb59cecbae56984c3
+ data8 0xb5bd64512bb14bb7, 0xb5e8d2a4bf5ba416
+ data8 0xb6146a9a1bc47819, 0xb6402c7749d621c0
+ data8 0xb66c1882fb435ea2, 0xb6982f048c999a56
+ data8 0xb6c47044075b4142, 0xb6e5bd6bfd02bafd
+ data8 0xb7124a2736ff8ef2, 0xb73f026a01e94177
+ data8 0xb760a959f1d0a7a7, 0xb78dae7e06868ab0
+ data8 0xb7badff8ad9e4e02, 0xb7dce25b8e17ae9f
+ data8 0xb80a6226904045e2, 0xb8380f1cafd73c1c
+ data8 0xb85a6ea8e321b4d8, 0xb8886b684ae7d2fa
+ data8 0xb8ab0726fa00cf5d, 0xb8d954a4d13b7cb1
+ data8 0xb8fc2d4f6cd9f04a, 0xb92acc851476b1ab
+ data8 0xb94de2d841a184c2, 0xb97cd4c36c92693c
+ data8 0xb9a0297f172665e3, 0xb9cf6f21e36c3924
+ data8 0xb9f3030951267208, 0xba229d6a618e7c59
+ data8 0xba467144459f9855, 0xba6a60c3c48f1a4b
+ data8 0xba9a76056b67ee7a, 0xbabea699563ada6e
+ data8 0xbae2f350b262cc4b, 0xbb1385a23be24e57
+ data8 0xbb3814975e17c680, 0xbb5cc031009bf467
+ data8 0xbb81889680024764, 0xbbb2c0d8703ae95d
+ data8 0xbbd7cd09ba3c5463, 0xbbfcf68c4977718f
+ data8 0xbc223d88cfc88eee, 0xbc47a2284fee4ff8
+ data8 0xbc79ac0916ed7b8a, 0xbc9f5670d1a13030
+ data8 0xbcc51f068cb95c1d, 0xbceb05f4b30a9bc0
+ data8 0xbd110b6604c7d306, 0xbd372f8598620f19
+ data8 0xbd5d727edb6b3c7e, 0xbd83d47d937bbc6d
+ data8 0xbdaa55addf1ae47d, 0xbdd0f63c36aa73f0
+ data8 0xbdf7b6556d550a15, 0xbe1e9626b1ffa96b
+ data8 0xbe4595dd903e5371, 0xbe6cb5a7f14bc935
+ data8 0xbe93f5b41d047cf7, 0xbebb5630bae4c15f
+ data8 0xbee2d74cd30a430c, 0xbf0a7937cf38d981
+ data8 0xbf323c217be2bc8c, 0xbf5a203a09342bbb
+ data8 0xbf74cad1c14ebfc4, 0xbf9ce6a497a89f78
+ data8 0xbfc52428bec6e72f, 0xbfed838fddab024b
+ data8 0xc016050c0420981a, 0xc03ea8cfabddc330
+ data8 0xc059d3cbd65ddbce, 0xc082b122a3c78c9d
+ data8 0xc0abb1499ae736c4, 0xc0d4d474c3aedaaf
+ data8 0xc0f054ca33eb3437, 0xc119b2c67e600ed0
+ data8 0xc1433453de2033ff, 0xc15ef3e44e10032d
+ data8 0xc188b130431d80e6, 0xc1b2929d6067730e
+ data8 0xc1ce9268f31cc734, 0xc1f8b0877c1b0c08
+ data8 0xc222f35a87b415ba, 0xc23f3467349e5c88
+ data8 0xc269b4e40e088c01, 0xc2945aac24daaf6e
+ data8 0xc2b0de05e43c1d66, 0xc2dbc275e1229d09
+ data8 0xc2f86fca9d80eeff, 0xc323938449a2587e
+ data8 0xc3406b40a538ed20, 0xc36bcee8211d15e0
+ data8 0xc397593adf2ba366, 0xc3b475b6206155d5
+ data8 0xc3e0410243b97383, 0xc3fd890709833d37
+ data8 0xc41ae295f7e7fa06, 0xc44709f7bb8a4dd2
+ data8 0xc4648fb0e0bec4c1, 0xc490f9a94695ba14
+ data8 0xc4aeac0173b7d390, 0xc4db5941007aa853
+ data8 0xc4f938aec206291a, 0xc52629e899dfd622
+ data8 0xc54436e44043b965, 0xc562563abf9ea07f
+ data8 0xc58fa7d1dc42921c, 0xc5adf561b91e110a
+ data8 0xc5cc5591bdbd82fa, 0xc5fa08f1ff20593c
+ data8 0xc618980a79ce6862, 0xc6373a09e34b50fa
+ data8 0xc66550a6e0baaf35, 0xc6842241926342c9
+ data8 0xc6a3070b7c93bb9e, 0xc6d18260bb84081b
+ data8 0xc6f0977c9416828b, 0xc70fc0117c641630
+ data8 0xc72efc34d7e615be, 0xc75dfb441594141e
+ data8 0xc77d68aa019bda4c, 0xc79ce9ea478dbc4f
+ data8 0xc7bc7f1ae453219d, 0xc7ec0476e15e141a
+ data8 0xc80bcbe16f1d540f, 0xc82ba78a5d349735
+ data8 0xc84b978847a06b87, 0xc86b9bf1ee817bc6
+ data8 0xc88bb4de3667cdf4, 0xc8bc00e7fe9e23a3
+ data8 0xc8dc4d7ff2d25232, 0xc8fcaeebcb40eb47
+ data8 0xc91d25431426a663, 0xc93db09d7fdb2949
+ data8 0xc95e5112e721582a, 0xc97f06bb49787677
+ data8 0xc99fd1aecd6e1b06, 0xc9d12a3e27bb1625
+ data8 0xc9f22ad82ba3d5f0, 0xca134113105e67b2
+ data8 0xca346d07b045a876, 0xca55aecf0e94bb88
+ data8 0xca77068257be9bab, 0xca98743ae1c693a8
+ data8 0xcab9f8122c99a101, 0xcadb9221e268c3b5
+ data8 0xcafd4283d8043dfd, 0xcb1f09520d37c6fb
+ data8 0xcb51ddcb9e93095e, 0xcb95f333968ad59b
+ data8 0xcbda64292d3ffd97, 0xcc1f3184af961596
+ data8 0xcc5bb1ac954d33e2, 0xcca12e9831fc6402
+ data8 0xcce70a67b64f24ad, 0xcd24794726477ea5
+ data8 0xcd6b096a0b70ee87, 0xcda9177738b15a90
+ data8 0xcdf05f2247dffab9, 0xce2f0f347f96f906
+ data8 0xce6e0be0cd551a61, 0xceb666b2c347d1de
+ data8 0xcef609b0cb874f00, 0xcf35fb5447e5c765
+ data8 0xcf763c47ee869f00, 0xcfb6cd3888d71785
+ data8 0xcff7aed4fbfbb447, 0xd038e1ce5167e3c6
+ data8 0xd07a66d7bfa0ebba, 0xd0bc3ea6b32d1b21
+ data8 0xd0f4f0e8f36c1bf8, 0xd1376458e34b037e
+ data8 0xd17a2ca133f78572, 0xd1bd4a80301c5715
+ data8 0xd1f71682b2fa4575, 0xd23ad555f773f059
+ data8 0xd2752c7039a5bf73, 0xd2b98ee008c06b59
+ data8 0xd2f4735ffd700280, 0xd32f99ed6d9ac0e1
+ data8 0xd374f0666c75d51c, 0xd3b0a7d13618e4a1
+ data8 0xd3eca2ea53bcec0c, 0xd428e23874f13a17
+ data8 0xd46f82fe293bc6d3, 0xd4ac57e9b7186420
+ data8 0xd4e972becb04e8b8, 0xd526d40a7a9b43a3
+ data8 0xd5647c5b73917370, 0xd5a26c4201bd6d13
+ data8 0xd5e0a45015350a7e, 0xd614b539c6194104
+ data8 0xd6537310e224283f, 0xd6927ab62244c917
+ data8 0xd6d1ccc1fc4ef4b7, 0xd71169cea98fdded
+ data8 0xd746a66a5bc9f6d9, 0xd786ce8f0fae5317
+ data8 0xd7bc7ff214c4e75a, 0xd7fd35467a517ed1
+ data8 0xd83e38838648d815, 0xd874a1db598b8951
+ data8 0xd8ab42205b80edaf, 0xd8ed1849d202f965
+ data8 0xd92432bd5a173685, 0xd9669ca45b03c23e
+ data8 0xd99e3327cf89574e, 0xd9d602b19b100466
+ data8 0xda0e0ba86c096841, 0xda5195fcdb1c3dce
+ data8 0xda8a1eb87a491f6c, 0xdac2e230b91c3f84
+ data8 0xdafbe0d0b66aea30, 0xdb351b04a8fafced
+ data8 0xdb6e9139e33cdd8e, 0xdba843ded7151ea1
+ data8 0xdbe2336319b61fc8, 0xdc1c60376789fa68
+ data8 0xdc56cacda82d0cd5, 0xdc917398f2797814
+ data8 0xdccc5b0d90a3e628, 0xdd0781a10469f0f2
+ data8 0xdd42e7ca0b52838f, 0xdd729ad01c69114d
+ data8 0xddae749c001fbf5e, 0xddea8f50a51c69b1
+ data8 0xde26eb69a0f0f111, 0xde576480262399bc
+ data8 0xde943789645933c8, 0xded14d58139a28af
+ data8 0xdf025c00bbf2b5c7, 0xdf3feb44d723a713
+ data8 0xdf715bc16c159be0, 0xdfaf66240e29cda8
+ data8 0xdfe139cbf6e19bdc, 0xe01fc0fe94d9fc52
+ data8 0xe051f92ffcc0bd60, 0xe090feec9c9a06ac
+ data8 0xe0c39d0c9ff862d6, 0xe0f668eeb99f188d
+ data8 0xe1362890eb663139, 0xe1695c7212aecbaa
+ data8 0xe19cbf0391bbbbe9, 0xe1d050901c531e85
+ data8 0xe2110903b4f4047a, 0xe2450559b4d80b6d
+ data8 0xe27931a231554ef3, 0xe2ad8e2ac3c5b04b
+ data8 0xe2e21b41b9694cce, 0xe316d93615862714
+ data8 0xe3590bd86a0d30f9, 0xe38e38e38e38e38e
+ data8 0xe3c397d1e6db7839, 0xe3f928f5953feb9e
+ data8 0xe42eeca17c62886c, 0xe464e32943446305
+ data8 0xe49b0ce15747a8a2, 0xe4d16a1eee94e9d4
+ data8 0xe4fa52107353f67d, 0xe5310a471f4d2dc3
+ data8 0xe567f6f1c2b9c224, 0xe59f18689a9e4c9a
+ data8 0xe5d66f04b8a68ecf, 0xe60dfb2005c192e9
+ data8 0xe645bd1544c7ea51, 0xe66fb21b505b20a0
+ data8 0xe6a7d32af4a7c59a, 0xe6e02b129c6a5ae4
+ data8 0xe70a9136a7403039, 0xe74349fb2d92a589
+ data8 0xe77c3a9c86ed7d42, 0xe7a713f88151518a
+ data8 0xe7e067453317ed2b, 0xe819f37a81871bb5
+ data8 0xe8454236bfaeca14, 0xe87f32f24c3fc90e
+ data8 0xe8aacd8688892ba6, 0xe8e523fd32f606f7
+ data8 0xe9110b5311407927, 0xe94bc8bf0c108fa3
+ data8 0xe977fdc439c2ca3c, 0xe9b3236528fc349e
+ data8 0xe9dfa70b745ac1b4, 0xea1b36268d0eaa38
+ data8 0xea480963fd394197, 0xea84034425f27484
+ data8 0xeab12713138dd1cc, 0xeade6db73a5e503b
+ data8 0xeb1b0268343b121b, 0xeb489b0b2bdb5f14
+ data8 0xeb765721e85f03d0, 0xebb389645f222f62
+ data8 0xebe198f090607e0c, 0xec0fcc9321024509
+ data8 0xec3e247da8b82f61, 0xec7c27d21321c9f7
+ data8 0xecaad5278824e453, 0xecd9a76d097d4e77
+ data8 0xed089ed5dcd99446, 0xed37bb95add09a1c
+ data8 0xed76c70508f904b6, 0xeda63bb05e7f93c6
+ data8 0xedd5d661daed2dc4, 0xee05974eef86b903
+ data8 0xee357ead791fc670, 0xee658cb3c134a463
+ data8 0xee95c1987f080211, 0xeec61d92d8c4314f
+ data8 0xeef6a0da64a014ac, 0xef274ba72a07c811
+ data8 0xef581e31a2c91260, 0xef8918b2bc43aec6
+ data8 0xefba3b63d89d7cbf, 0xefeb867ecffaa607
+ data8 0xf01cfa3df1b9c9fa, 0xf04e96dc05b43e2d
+ data8 0xf0805c944d827454, 0xf0b24ba285c495cb
+ data8 0xf0e46442e76f6569, 0xf116a6b2291d7896
+ data8 0xf1383fa9e9b5b381, 0xf16ac84f90083b9b
+ data8 0xf19d7b686dcb03d7, 0xf1d0593311db1757
+ data8 0xf20361ee8f1c711e, 0xf23695da7de51d3f
+ data8 0xf258d095e465cc35, 0xf28c4d0bfc982b34
+ data8 0xf2bff55eb3f0ea71, 0xf2f3c9cf9884636e
+ data8 0xf31670135ab9cc0f, 0xf34a8e9f0b54cdfb
+ data8 0xf37ed9fa6b8add3f, 0xf3a1cfe884ef6bb6
+ data8 0xf3d66689dcc8e8d3, 0xf40b2ab069d5c96a
+ data8 0xf42e718b90c8bc16, 0xf463822a0a3b4b00
+ data8 0xf498c1076015faf8, 0xf4bc5a19a33990b5
+ data8 0xf4f1e6a7d6f5425f, 0xf527a232cf6be334
+ data8 0xf54b8ecdcda90851, 0xf5819949c7ad87b4
+ data8 0xf5a5bac9213b48a9, 0xf5dc1501f324a812
+ data8 0xf6006bee86b5589e, 0xf63716b2fa067fa4
+ data8 0xf66df22fb6132b9c, 0xf6929fb98225deb1
+ data8 0xf6c9cd13021e3fea, 0xf6eeb177472cedae
+ data8 0xf713abf4cb0b3afb, 0xf74b4d5333684ef1
+ data8 0xf7707f75a72f8e94, 0xf7a874b97927af44
+ data8 0xf7cddf140aedf1d8, 0xf806291bacb7f7a9
+ data8 0xf82bcc43b92eafef, 0xf8646bf0defb759e
+ data8 0xf88a487dfc3ff5f7, 0xf8b03c2b46cdc17f
+ data8 0xf8e95541c152ae7a, 0xf90f832c2700c160
+ data8 0xf935c88e0c7f419b, 0xf96f5cd84fd86873
+ data8 0xf995dd53ebdd9d6d, 0xf9bc75a034436a41
+ data8 0xf9f686f26d5518de, 0xfa1d5b39b910a8c5
+ data8 0xfa4447acc4ecbfd2, 0xfa7ed7e51e6fdfb4
+ data8 0xfaa601394d49a1a0, 0xfacd431644ce0e40
+ data8 0xfaf49d96f7a75909, 0xfb2fd3c65e562fd5
+ data8 0xfb576c5762024805, 0xfb7f1debc22c4040
+ data8 0xfba6e89f32d0190a, 0xfbe2c803a0894893
+ data8 0xfc0ad1ff0ed9ecf0, 0xfc32f57bdfbcbe7f
+ data8 0xfc5b32968f99b21c, 0xfc83896bc861ab08
+ data8 0xfcabfa1861ed4815, 0xfce8d3cea7d3163e
+ data8 0xfd118595143ee273, 0xfd3a519943d4865a
+ data8 0xfd6337f8e1ae5a4b, 0xfd8c38d1c8e927eb
+ data8 0xfdb5544205095a53, 0xfdde8a67d2613531
+ data8 0xfe07db619e781611, 0xfe460768d80bf758
+ data8 0xfe6f9bfb06cd32f6, 0xfe994bcd3d14fcc2
+ data8 0xfec316fecaf3f2ab, 0xfeecfdaf33fadb80
+ data8 0xff16fffe2fa8fad6, 0xff411e0ba9db886d
+ data8 0xff6b57f7c33e4e9a, 0xff95ade2d1bd7358
+ data8 0xffc01fed60f86fb5, 0xffeaae3832b63956
LOCAL_OBJECT_END(T_table)
-
-
-
-
LOCAL_OBJECT_START(D_table)
-data4 0x1e50f488, 0x1ebdc559, 0x1e649ec1, 0x9eed9b2c
-data4 0x9e511c44, 0x9ec6d551, 0x9eefe248, 0x9e313854
-data4 0x9f54ff18, 0x9d231411, 0x1ee5d63c, 0x9edf6b95
-data4 0x9f332aaa, 0x1dc92a84, 0x1f73fb7b, 0x1e32f100
-data4 0x9ea636f5, 0x9f6c3353, 0x9f405552, 0x1f33fd97
-data4 0x1e975291, 0x9e59a11e, 0x1e47b0ba, 0x9d8ad33e
-data4 0x1ea51bf6, 0x1f25d782, 0x9ecf534d, 0x1f55436f
-data4 0x1d0975e4, 0x9f0633a1, 0x1f3e840a, 0x1f523a4c
-data4 0x9f53cbbc, 0x9c8b5661, 0x9f6bc8eb, 0x1f4f6c7b
-data4 0x9ed9b376, 0x9f5b30b6, 0x1f64fa5e, 0x1cbcc3e0
-data4 0x1f343548, 0x1f62a6a2, 0x9f336abb, 0x9f1d15af
-data4 0x1f476c83, 0x1ea86421, 0x1f33b2cf, 0x9e8f1348
-data4 0x1f6fa829, 0x9f30ee3a, 0x9ebd6146, 0x1f2db598
-data4 0x1ef9600d, 0x1f5b1427, 0x9edd741b, 0x1f51ef4e
-data4 0x9f1aa57d, 0x9ee9b5e0, 0x9f17ecd7, 0x1ead71ff
-data4 0x1f6c910e, 0x9e1837df, 0x9f0f17d9, 0x9e8350dd
-data4 0x9d292f1b, 0x9e33b3ab, 0x9d6f0fe8, 0x9ed8c7cc
-data4 0x9ec598c8, 0x9d56758c, 0x1e090c1e, 0x9ed4b941
-data4 0x9f1fc4cf, 0x1f63513a, 0x9edd0abc, 0x1e3924dd
-data4 0x1f60d56f, 0x1ea84424, 0x9e88f4fb, 0x1f205c09
-data4 0x1ec9ae4e, 0x1d2d5738, 0x9f2c9f6d, 0x1e0765c2
-data4 0x1e8bbdd7, 0x9f16d9f1, 0x9ea62627, 0x1f13904c
-data4 0x1e566ab8, 0x9dca3d1a, 0x9e91f2a1, 0x9f14641c
-data4 0x9f278946, 0x1f490c1e, 0x1f575eb6, 0x1f50b3fd
-data4 0x9da32efb, 0x1ea95e59, 0x9e41e058, 0x9eada15f
-data4 0x9e4fe66c, 0x1f3abc98, 0x1f1b8d1e, 0x9ece97e4
-data4 0x1d188aed, 0x9e89b6ee, 0x1f287478, 0x9e8a161a
-data4 0x1e4749f7, 0x9e68084a, 0x1e867f33, 0x9f462b63
-data4 0x1db30792, 0x1f59a767, 0x9d1da4ae, 0x9f472a33
-data4 0x1d1e91cd, 0x9f414824, 0x9f473d4f, 0x1f4b5783
-data4 0x9f5b04b8, 0x9f5c205b, 0x1f309617, 0x9f0d6852
-data4 0x9d96a609, 0x9f0965c2, 0x9e23f467, 0x9f089884
-data4 0x9ec71458, 0x9ed6e955, 0x1e5e8691, 0x1f5b2bbc
-data4 0x9f128268, 0x1ed40f5b, 0x1dc430ce, 0x1f345986
-data4 0x1d778f72, 0x1e9b11d6, 0x9f5a40be, 0x9e07f61a
-data4 0x9ed641a7, 0x9f334787, 0x1e952fd0, 0x1edeb5e2
-data4 0x9e9f3eb1, 0x9e379fd9, 0x1f13102a, 0x9e5e80e1
-data4 0x1c757944, 0x1dae2260, 0x1f183ab7, 0x1e55d576
-data4 0x9e6bb99f, 0x9f52d7cb, 0x9e73a0f5, 0x1d4e1d14
-data4 0x9dd05b53, 0x1f2261e4, 0x9d4ee73d, 0x1ede515e
-data4 0x1f22a573, 0x9ecac348, 0x1e6a2ac0, 0x1e2787d2
-data4 0x9eb64b87, 0x1f0c69c6, 0x9f470a01, 0x9d7c1686
-data4 0x1e468ebe, 0x9f21ee2f, 0x9ee52116, 0x9e20f715
-data4 0x1ed18533, 0x9f005b38, 0x9f20cb95, 0x1da72967
-data4 0x1f1ba5d7, 0x1e2f8b16, 0x9c794f96, 0x9ca74ea3
-data4 0x1f410555, 0x9eff2b96, 0x1ce8f0b1, 0x1f0cee77
-data4 0x1f191edd, 0x9ed5fcbc, 0x1f30f242, 0x9e0ad369
-data4 0x1ed8f3c8, 0x1f52bb0e, 0x9e9ce408, 0x1f18907f
-data4 0x9ecdad40, 0x9e8af91d, 0x1d46698a, 0x9f4b93d6
-data4 0x9f3f5d33, 0x1e2e52f7, 0x9f13aeec, 0x9f3b1969
-data4 0x1f0996f4, 0x9f2a03df, 0x1e264767, 0x1f3ab1fb
-data4 0x9f3193c9, 0x9f21ce22, 0x9eab624c, 0x9ecd8fb1
-data4 0x1eaf9a85, 0x1f0c6a2c, 0x1eecbe61, 0x1f3fead9
-data4 0x1f1d3a29, 0x1e9099ce, 0x1eadd875, 0x1e4dbfb8
-data4 0x9dc640d2, 0x1f413680, 0x9f3f57b3, 0x1dfa1553
-data4 0x1ec71c6b, 0x1e00cc00, 0x9f271e55, 0x1e5a88bb
-data4 0x1f46cc2b, 0x1ee80ff9, 0x9e29c6f3, 0x1f15e229
-data4 0x9ea83d66, 0x1f37408e, 0x9dacb66e, 0x1e6f6259
-data4 0x9f106973, 0x1dd4e5ac, 0x1cbfdcc8, 0x9f231c9f
-data4 0x9e8677e4, 0x9e9e695a, 0x1efd782b, 0x9dd26959
-data4 0x9e80af69, 0x1f386fb3, 0x1f022e8c, 0x9e839967
-data4 0x1ce6796f, 0x1e4c22c2, 0x1e57ef24, 0x1e919804
-data4 0x9d7ea090, 0x1e40140a, 0x1f261b46, 0x1db75be2
-data4 0x1f145019, 0x9e3102b9, 0x9e22507b, 0x1eae813c
-data4 0x1f117e97, 0x1f282296, 0x1f3814b3, 0x1e17977b
-data4 0x1f39d6ff, 0x9f1c81b9, 0x9eb5bcad, 0x1f0f596e
-data4 0x1e757fd5, 0x9f090daa, 0x9f2532fc, 0x9eebafbb
-data4 0x1f086556, 0x9eeedde8, 0x9f32e174, 0x1e33c030
-data4 0x1f1f145a, 0x1e6e556c, 0x1e419ffb, 0x9eb6019a
-data4 0x9e872a2e, 0x1e113136, 0x1e93096f, 0x1f39be40
-data4 0x1f1665ad, 0x9db81d7d, 0x9cd29091, 0x1e3f4af7
-data4 0x9f23176c, 0x9eccf9b3, 0x1f34fc6c, 0x9ed36894
-data4 0x1ef08e06, 0x9f3b46bb, 0x9f2c850b, 0x1f1565a4
-data4 0x1e887bc3, 0x1e92629c, 0x9f11ac9e, 0x9e5579f3
-data4 0x1e4d5790, 0x9ee1c3d1, 0x9e916aec, 0x9eb8d9b8
-data4 0x1db46105, 0x1e168663, 0x1f26a942, 0x9f0f0383
-data4 0x9f079032, 0x9ecae1d8, 0x1ed3b34c, 0x9edc5ee6
-data4 0x9e8a75a7, 0x1f3c3de2, 0x9ee5041e, 0x1f08c727
-data4 0x1d02d7ae, 0x9f36adda, 0x9ef9a857, 0x9ef5cb3a
-data4 0x9eee73da, 0x9da5d629, 0x1e0e99be, 0x1e5159b9
-data4 0x1f2eac89, 0x9e8eedc5, 0x1dd0ec90, 0x1f229aff
-data4 0x1ed9c3e6, 0x1e95c55a, 0x9f0c24e4, 0x1e8afed6
-data4 0x1e599a96, 0x1e881b21, 0x1eab84b9, 0x9ba2bb0e
-data4 0x9e33ab10, 0x1f1710b5, 0x1ebfa271, 0x9e90bbc5
-data4 0x9f32515b, 0x9b32aae8, 0x1eda455c, 0x1da8186e
-data4 0x9e8917ff, 0x1ec4d08e, 0x1c90069d, 0x9f2f1d29
-data4 0x9ecee86d, 0x9f234d1f, 0x1f370724, 0x1da87496
-data4 0x1e7959f0, 0x9e8ada34, 0x1f1c7f6f, 0x1edd576b
-data4 0x9de91e8b, 0x1ec4ef89, 0x1f32078a, 0x1e9925e2
-data4 0x9d8eeccb, 0x9ea3d011, 0x1f231fdf, 0x9f1dbdfa
-data4 0x1e7507a3, 0x1ec42614, 0x9e8693cb, 0x9ec68398
-data4 0x1d5b05fb, 0x1de32119, 0x9f003429, 0x9ec16d92
-data4 0x9f095315, 0x9f119d2c, 0x9ed0c984, 0x9f090662
-data4 0x9e59aa1f, 0x9ed4e64a, 0x9f2798a7, 0x9f23624d
-data4 0x1e0467d9, 0x1f22e7e7, 0x1e915256, 0x9cb4df70
-data4 0x9e6f687c, 0x9e3c35e5, 0x9e5757ab, 0x9f031fa1
-data4 0x1f25bff7, 0x1f0e58c2, 0x1ef3ce04, 0x1f002ecb
-data4 0x9ebdc836, 0x9ed657dd, 0x9f149441, 0x9e8544b2
-data4 0x1cd8ff1e, 0x1e9bb463, 0x1eaa1c5c, 0x1f200c1a
-data4 0x1edbfbaf, 0x1f18724d, 0x9ed63c22, 0x9f08e045
-data4 0x1f13ad07, 0x9e949311, 0x9f0c50d4, 0x1e824516
-data4 0x1d5e52ba, 0x1d583fbd, 0x1e3b60a9, 0x9effe6d3
-data4 0x1f0d0508, 0x1f00be77, 0x9e404bfa, 0x9e1ca381
-data4 0x9f084dd8, 0x9e6db85d, 0x1db698e4, 0x9ebd1871
-data4 0x9ecc2679, 0x1ee68442, 0x1edb1050, 0x9dbc96a4
-data4 0x9f27c1f4, 0x1c99b756, 0x1eb4400a, 0x9f24390a
-data4 0x1d927875, 0x9f074faa, 0x1e9dc2c3, 0x1f13c0d2
-data4 0x1e3c9685, 0x9e6b6f75, 0x9db9cb31, 0x1ea5f3aa
-data4 0x9d992c61, 0x1f1015e4, 0x1f194f70, 0x9e19d2b3
-data4 0x9d89116c, 0x1f23cd35, 0x1e33d3a2, 0x1ee331b8
-data4 0x1d5ba7ec, 0x9f273788, 0x9e6907f4, 0x9ed5f912
-data4 0x9edd458d, 0x1e2ca7b2, 0x1ef81fe4, 0x1dc7ade6
-data4 0x1e876e51, 0x9f04ec89, 0x1f1da63a, 0x1ec02bd0
-data4 0x9e71326f, 0x1e7847b4, 0x1f0de618, 0x9e036cb6
-data4 0x1eec61e2, 0x1ef1758b, 0x9ee880a3, 0x1ed269d7
-data4 0x1e27edd3, 0x9e8a81a1, 0x1eacb84d, 0x9e1aad37
-data4 0x1f1aa8f7, 0x1e9bbd90, 0x1ea1b61f, 0x9ed41c2f
-data4 0x1dbb5dd6, 0x1f0ec733, 0x9df06b1b, 0x1e06fef1
-data4 0x9edede3a, 0x1edeb5e2, 0x1f0e63ee, 0x9db316bb
-data4 0x9efc1ad3, 0x1f01fbb5, 0x9cc0d078, 0x1ea28b36
-data4 0x9e9dd205, 0x9e791534, 0x1da1c8d5, 0x9e8195cc
-data4 0x1f0681a4, 0x1eeaf1e2, 0x9ef83b37, 0x9f22a92b
-data4 0x1eabc4ce, 0x1f10eefb, 0x1e06d9aa, 0x1e7cacd5
-data4 0x1f1ea087, 0x1eb21983, 0x9f100c78, 0x1e840abe
-data4 0x9efab66c, 0x1f183fa8, 0x9e84ee68, 0x9eea083d
-data4 0x9ee23a74, 0x1f1351d7, 0x9ec5d42a, 0x9f071f57
-data4 0x9ef578d9, 0x9f1aa7e7, 0x1eb02044, 0x1f151a2e
-data4 0x9c0dc8b2, 0x9ef4087a, 0x1ec12b93, 0x1c1a946b
-data4 0x1e89946f, 0x9dafe8c3, 0x1d295288, 0x9e8497ab
-data4 0x1ec000c6, 0x1e102f29, 0x1e542256, 0x1e67d44d
-data4 0x1ef688d8, 0x1f0e0f29, 0x1e67861f, 0x1e869748
-data4 0x1ee6aa6e, 0x9e4d228b, 0x9e50be5b, 0x1e9fe225
-data4 0x9ea34102, 0x9e628a3b, 0x9ed9fd83, 0x1ecd7109
-data4 0x1f1864ff, 0x1ea19b76, 0x1db0d1c9, 0x9dff519b
-data4 0x1e8fea71, 0x9ee82e9a, 0x9f08919b, 0x9ef5c8ae
-data4 0x9ee446a4, 0x1ea59444, 0x1eb74230, 0x1ea13fbf
-data4 0x9ea6a3ea, 0x1e5f2797, 0x9e0adb07, 0x9d3adadd
-data4 0x1ebf2ee2, 0x1da19bfa, 0x1e8dea6d, 0x1ec4fea9
-data4 0x1e669f22, 0x1dc5f919, 0x9ed25caa, 0x1ee475b1
-data4 0x1ed0603e, 0x9eacb35c, 0x1dc00b27, 0x1e2f9991
-data4 0x1e7b0406, 0x1eaa3387, 0x9d865bde, 0x1eb78a48
-data4 0x1c40ae2e, 0x1ee9838b, 0x9f0f0d7f, 0x1e3e5d26
-data4 0x1e99e7a6, 0x9e681ccf, 0x9e93ed65, 0x9eeb6a66
-data4 0x1e29e9af, 0x9e96f923, 0x9e74f11d, 0x9f1474da
-data4 0x1eec2ea7, 0x1ebf7aa3, 0x9c25dcca, 0x9f0553c2
-data4 0x9e599efd, 0x1d2ab490, 0x1e95d7cd, 0x9ee4b20e
-data4 0x9d988ce5, 0x9ef9787e, 0x9dbbba5b, 0x9f12c304
-data4 0x1e3b9d70, 0x1e7bcae8, 0x9d98bb6e, 0x9e8e6b01
-data4 0x9f07d03b, 0x9d67c822, 0x9f0ef69e, 0x1c7c0fe3
-data4 0x9e9bfbb9, 0x9e83b84b, 0x1efbf15e, 0x9ecfa6a6
-data4 0x9c91158e, 0x9ecf6770, 0x1ee1e3a8, 0x9dc95ec0
-data4 0x1ef603f7, 0x1d5e52ba, 0x1c477d1b, 0x9e955cd8
-data4 0x1ed665b0, 0x9e8376c4, 0x9c0ee88e, 0x1e8c989e
-data4 0x1ea2df29, 0x9d961e5c, 0x1e101813, 0x1e7fffff
-data4 0x9e5abff4, 0x1dbddd71, 0x1eb69100, 0x1e71f114
-data4 0x1e9ca798, 0x1ef62c8d, 0x9db4e55a, 0x1dbe69ce
-data4 0x9ef1c01f, 0x1f044a2a, 0x9eb9e0d7, 0x9ee59745
-data4 0x9e874803, 0x1ea0b418, 0x9e13572a, 0x1ddbb3a2
-data4 0x9ec0e391, 0x1e89fba1, 0x1ee8b261, 0x9e5d25f0
-data4 0x9ef222cb, 0x9ef135ec, 0x1ea04b9a, 0x9f04291f
-data4 0x9e969254, 0x9ee32f08, 0x9ed909d3, 0x9e362640
-data4 0x9ec20735, 0x1e50131b, 0x9ed4e049, 0x1ee8e817
-data4 0x1e1e09c0, 0x9ea643c5, 0x9e5a1ab6, 0x9e389059
-data4 0x1e560947, 0x1d02b877, 0x1e4475ab, 0x9ea9aaf6
-data4 0x1e95bc5e, 0x1eaf6afd, 0x1d43067d, 0x9d043821
-data4 0x9e97baa9, 0x1de5c4f9, 0x9e9a0069, 0x9e1b9944
-data4 0x1eb13686, 0x9eb907eb, 0x1e059589, 0x1cbd0f93
-data4 0x9eb7e6ae, 0x1e9fa175, 0x1ee5bdf4, 0x1e8052f7
-data4 0x9c80d1e3, 0x1bfbe28e, 0x9e672b3b, 0x9ecacf19
-data4 0x9e3c04be, 0x1dfe8c5c, 0x1e1ba9cb, 0x1eb40b1e
-data4 0x1ec7e7f6, 0x9d0d45b3, 0x1ef0113b, 0x9a155fa3
-data4 0x1e28ec3b, 0x1e7ca8df, 0x9d2f91b4, 0x1eccd9ed
-data4 0x9ed943bc, 0x9ccaab19, 0x9e8a5c58, 0x1ec3bca8
-data4 0x1ed78dc7, 0x9ed391a8, 0x9e938f6e, 0x9ec4a030
-data4 0x9e80346e, 0x1e7a4686, 0x9e284315, 0x9e39584c
-data4 0x1ebdc9b4, 0x9e9cfce5, 0x9ef55c65, 0x1e2941e7
-data4 0x9efbe59f, 0x1d87c41b, 0x1e40befc, 0x1e3d05b5
-data4 0x1de9ea67, 0x1ec9a21c, 0x1decb69a, 0x1df6e75a
-data4 0x9e8030ab, 0x9db20540, 0x9ef1e977, 0x1e3cdc43
-data4 0x1e0492b0, 0x9e91d872, 0x1e775346, 0x9e939978
-data4 0x1eb2714e, 0x1e49a203, 0x9e10195a, 0x1ef1ffc3
-data4 0x9ea8b709, 0x9e832e27, 0x1ed5ac3b, 0x1edb20a6
-data4 0x1e4dbd4e, 0x1efbb932, 0x1d8170ec, 0x1e6c4849
-data4 0x1f008e17, 0x1e8000c4, 0x1d855ecf, 0x9e37cb85
-data4 0x1ecffdf5, 0x1eba6519, 0x9edbe600, 0x1ea3e5e7
-data4 0x1ed4fb39, 0x1f00be77, 0x1e6f4484, 0x9e9e7107
-data4 0x9e30b29d, 0x9ee6e174, 0x1e3a2656, 0x9dd72f3f
-data4 0x9ee12138, 0x1ed16fed, 0x9ece8a02, 0x9ca5b249
-data4 0x9eafd508, 0x9ef0e9fc, 0x1d1307ac, 0x1eecee20
-data4 0x1cf60c6f, 0x9d556216, 0x9eaed175, 0x9ec919f4
-data4 0x1ec2c988, 0x1cd82772, 0x9dc99456, 0x1eab0467
-data4 0x1e89b36f, 0x1c757944, 0x1eef9abd, 0x9e98664d
+ data4 0x1e50f488, 0x1ebdc559, 0x1e649ec1, 0x9eed9b2c
+ data4 0x9e511c44, 0x9ec6d551, 0x9eefe248, 0x9e313854
+ data4 0x9f54ff18, 0x9d231411, 0x1ee5d63c, 0x9edf6b95
+ data4 0x9f332aaa, 0x1dc92a84, 0x1f73fb7b, 0x1e32f100
+ data4 0x9ea636f5, 0x9f6c3353, 0x9f405552, 0x1f33fd97
+ data4 0x1e975291, 0x9e59a11e, 0x1e47b0ba, 0x9d8ad33e
+ data4 0x1ea51bf6, 0x1f25d782, 0x9ecf534d, 0x1f55436f
+ data4 0x1d0975e4, 0x9f0633a1, 0x1f3e840a, 0x1f523a4c
+ data4 0x9f53cbbc, 0x9c8b5661, 0x9f6bc8eb, 0x1f4f6c7b
+ data4 0x9ed9b376, 0x9f5b30b6, 0x1f64fa5e, 0x1cbcc3e0
+ data4 0x1f343548, 0x1f62a6a2, 0x9f336abb, 0x9f1d15af
+ data4 0x1f476c83, 0x1ea86421, 0x1f33b2cf, 0x9e8f1348
+ data4 0x1f6fa829, 0x9f30ee3a, 0x9ebd6146, 0x1f2db598
+ data4 0x1ef9600d, 0x1f5b1427, 0x9edd741b, 0x1f51ef4e
+ data4 0x9f1aa57d, 0x9ee9b5e0, 0x9f17ecd7, 0x1ead71ff
+ data4 0x1f6c910e, 0x9e1837df, 0x9f0f17d9, 0x9e8350dd
+ data4 0x9d292f1b, 0x9e33b3ab, 0x9d6f0fe8, 0x9ed8c7cc
+ data4 0x9ec598c8, 0x9d56758c, 0x1e090c1e, 0x9ed4b941
+ data4 0x9f1fc4cf, 0x1f63513a, 0x9edd0abc, 0x1e3924dd
+ data4 0x1f60d56f, 0x1ea84424, 0x9e88f4fb, 0x1f205c09
+ data4 0x1ec9ae4e, 0x1d2d5738, 0x9f2c9f6d, 0x1e0765c2
+ data4 0x1e8bbdd7, 0x9f16d9f1, 0x9ea62627, 0x1f13904c
+ data4 0x1e566ab8, 0x9dca3d1a, 0x9e91f2a1, 0x9f14641c
+ data4 0x9f278946, 0x1f490c1e, 0x1f575eb6, 0x1f50b3fd
+ data4 0x9da32efb, 0x1ea95e59, 0x9e41e058, 0x9eada15f
+ data4 0x9e4fe66c, 0x1f3abc98, 0x1f1b8d1e, 0x9ece97e4
+ data4 0x1d188aed, 0x9e89b6ee, 0x1f287478, 0x9e8a161a
+ data4 0x1e4749f7, 0x9e68084a, 0x1e867f33, 0x9f462b63
+ data4 0x1db30792, 0x1f59a767, 0x9d1da4ae, 0x9f472a33
+ data4 0x1d1e91cd, 0x9f414824, 0x9f473d4f, 0x1f4b5783
+ data4 0x9f5b04b8, 0x9f5c205b, 0x1f309617, 0x9f0d6852
+ data4 0x9d96a609, 0x9f0965c2, 0x9e23f467, 0x9f089884
+ data4 0x9ec71458, 0x9ed6e955, 0x1e5e8691, 0x1f5b2bbc
+ data4 0x9f128268, 0x1ed40f5b, 0x1dc430ce, 0x1f345986
+ data4 0x1d778f72, 0x1e9b11d6, 0x9f5a40be, 0x9e07f61a
+ data4 0x9ed641a7, 0x9f334787, 0x1e952fd0, 0x1edeb5e2
+ data4 0x9e9f3eb1, 0x9e379fd9, 0x1f13102a, 0x9e5e80e1
+ data4 0x1c757944, 0x1dae2260, 0x1f183ab7, 0x1e55d576
+ data4 0x9e6bb99f, 0x9f52d7cb, 0x9e73a0f5, 0x1d4e1d14
+ data4 0x9dd05b53, 0x1f2261e4, 0x9d4ee73d, 0x1ede515e
+ data4 0x1f22a573, 0x9ecac348, 0x1e6a2ac0, 0x1e2787d2
+ data4 0x9eb64b87, 0x1f0c69c6, 0x9f470a01, 0x9d7c1686
+ data4 0x1e468ebe, 0x9f21ee2f, 0x9ee52116, 0x9e20f715
+ data4 0x1ed18533, 0x9f005b38, 0x9f20cb95, 0x1da72967
+ data4 0x1f1ba5d7, 0x1e2f8b16, 0x9c794f96, 0x9ca74ea3
+ data4 0x1f410555, 0x9eff2b96, 0x1ce8f0b1, 0x1f0cee77
+ data4 0x1f191edd, 0x9ed5fcbc, 0x1f30f242, 0x9e0ad369
+ data4 0x1ed8f3c8, 0x1f52bb0e, 0x9e9ce408, 0x1f18907f
+ data4 0x9ecdad40, 0x9e8af91d, 0x1d46698a, 0x9f4b93d6
+ data4 0x9f3f5d33, 0x1e2e52f7, 0x9f13aeec, 0x9f3b1969
+ data4 0x1f0996f4, 0x9f2a03df, 0x1e264767, 0x1f3ab1fb
+ data4 0x9f3193c9, 0x9f21ce22, 0x9eab624c, 0x9ecd8fb1
+ data4 0x1eaf9a85, 0x1f0c6a2c, 0x1eecbe61, 0x1f3fead9
+ data4 0x1f1d3a29, 0x1e9099ce, 0x1eadd875, 0x1e4dbfb8
+ data4 0x9dc640d2, 0x1f413680, 0x9f3f57b3, 0x1dfa1553
+ data4 0x1ec71c6b, 0x1e00cc00, 0x9f271e55, 0x1e5a88bb
+ data4 0x1f46cc2b, 0x1ee80ff9, 0x9e29c6f3, 0x1f15e229
+ data4 0x9ea83d66, 0x1f37408e, 0x9dacb66e, 0x1e6f6259
+ data4 0x9f106973, 0x1dd4e5ac, 0x1cbfdcc8, 0x9f231c9f
+ data4 0x9e8677e4, 0x9e9e695a, 0x1efd782b, 0x9dd26959
+ data4 0x9e80af69, 0x1f386fb3, 0x1f022e8c, 0x9e839967
+ data4 0x1ce6796f, 0x1e4c22c2, 0x1e57ef24, 0x1e919804
+ data4 0x9d7ea090, 0x1e40140a, 0x1f261b46, 0x1db75be2
+ data4 0x1f145019, 0x9e3102b9, 0x9e22507b, 0x1eae813c
+ data4 0x1f117e97, 0x1f282296, 0x1f3814b3, 0x1e17977b
+ data4 0x1f39d6ff, 0x9f1c81b9, 0x9eb5bcad, 0x1f0f596e
+ data4 0x1e757fd5, 0x9f090daa, 0x9f2532fc, 0x9eebafbb
+ data4 0x1f086556, 0x9eeedde8, 0x9f32e174, 0x1e33c030
+ data4 0x1f1f145a, 0x1e6e556c, 0x1e419ffb, 0x9eb6019a
+ data4 0x9e872a2e, 0x1e113136, 0x1e93096f, 0x1f39be40
+ data4 0x1f1665ad, 0x9db81d7d, 0x9cd29091, 0x1e3f4af7
+ data4 0x9f23176c, 0x9eccf9b3, 0x1f34fc6c, 0x9ed36894
+ data4 0x1ef08e06, 0x9f3b46bb, 0x9f2c850b, 0x1f1565a4
+ data4 0x1e887bc3, 0x1e92629c, 0x9f11ac9e, 0x9e5579f3
+ data4 0x1e4d5790, 0x9ee1c3d1, 0x9e916aec, 0x9eb8d9b8
+ data4 0x1db46105, 0x1e168663, 0x1f26a942, 0x9f0f0383
+ data4 0x9f079032, 0x9ecae1d8, 0x1ed3b34c, 0x9edc5ee6
+ data4 0x9e8a75a7, 0x1f3c3de2, 0x9ee5041e, 0x1f08c727
+ data4 0x1d02d7ae, 0x9f36adda, 0x9ef9a857, 0x9ef5cb3a
+ data4 0x9eee73da, 0x9da5d629, 0x1e0e99be, 0x1e5159b9
+ data4 0x1f2eac89, 0x9e8eedc5, 0x1dd0ec90, 0x1f229aff
+ data4 0x1ed9c3e6, 0x1e95c55a, 0x9f0c24e4, 0x1e8afed6
+ data4 0x1e599a96, 0x1e881b21, 0x1eab84b9, 0x9ba2bb0e
+ data4 0x9e33ab10, 0x1f1710b5, 0x1ebfa271, 0x9e90bbc5
+ data4 0x9f32515b, 0x9b32aae8, 0x1eda455c, 0x1da8186e
+ data4 0x9e8917ff, 0x1ec4d08e, 0x1c90069d, 0x9f2f1d29
+ data4 0x9ecee86d, 0x9f234d1f, 0x1f370724, 0x1da87496
+ data4 0x1e7959f0, 0x9e8ada34, 0x1f1c7f6f, 0x1edd576b
+ data4 0x9de91e8b, 0x1ec4ef89, 0x1f32078a, 0x1e9925e2
+ data4 0x9d8eeccb, 0x9ea3d011, 0x1f231fdf, 0x9f1dbdfa
+ data4 0x1e7507a3, 0x1ec42614, 0x9e8693cb, 0x9ec68398
+ data4 0x1d5b05fb, 0x1de32119, 0x9f003429, 0x9ec16d92
+ data4 0x9f095315, 0x9f119d2c, 0x9ed0c984, 0x9f090662
+ data4 0x9e59aa1f, 0x9ed4e64a, 0x9f2798a7, 0x9f23624d
+ data4 0x1e0467d9, 0x1f22e7e7, 0x1e915256, 0x9cb4df70
+ data4 0x9e6f687c, 0x9e3c35e5, 0x9e5757ab, 0x9f031fa1
+ data4 0x1f25bff7, 0x1f0e58c2, 0x1ef3ce04, 0x1f002ecb
+ data4 0x9ebdc836, 0x9ed657dd, 0x9f149441, 0x9e8544b2
+ data4 0x1cd8ff1e, 0x1e9bb463, 0x1eaa1c5c, 0x1f200c1a
+ data4 0x1edbfbaf, 0x1f18724d, 0x9ed63c22, 0x9f08e045
+ data4 0x1f13ad07, 0x9e949311, 0x9f0c50d4, 0x1e824516
+ data4 0x1d5e52ba, 0x1d583fbd, 0x1e3b60a9, 0x9effe6d3
+ data4 0x1f0d0508, 0x1f00be77, 0x9e404bfa, 0x9e1ca381
+ data4 0x9f084dd8, 0x9e6db85d, 0x1db698e4, 0x9ebd1871
+ data4 0x9ecc2679, 0x1ee68442, 0x1edb1050, 0x9dbc96a4
+ data4 0x9f27c1f4, 0x1c99b756, 0x1eb4400a, 0x9f24390a
+ data4 0x1d927875, 0x9f074faa, 0x1e9dc2c3, 0x1f13c0d2
+ data4 0x1e3c9685, 0x9e6b6f75, 0x9db9cb31, 0x1ea5f3aa
+ data4 0x9d992c61, 0x1f1015e4, 0x1f194f70, 0x9e19d2b3
+ data4 0x9d89116c, 0x1f23cd35, 0x1e33d3a2, 0x1ee331b8
+ data4 0x1d5ba7ec, 0x9f273788, 0x9e6907f4, 0x9ed5f912
+ data4 0x9edd458d, 0x1e2ca7b2, 0x1ef81fe4, 0x1dc7ade6
+ data4 0x1e876e51, 0x9f04ec89, 0x1f1da63a, 0x1ec02bd0
+ data4 0x9e71326f, 0x1e7847b4, 0x1f0de618, 0x9e036cb6
+ data4 0x1eec61e2, 0x1ef1758b, 0x9ee880a3, 0x1ed269d7
+ data4 0x1e27edd3, 0x9e8a81a1, 0x1eacb84d, 0x9e1aad37
+ data4 0x1f1aa8f7, 0x1e9bbd90, 0x1ea1b61f, 0x9ed41c2f
+ data4 0x1dbb5dd6, 0x1f0ec733, 0x9df06b1b, 0x1e06fef1
+ data4 0x9edede3a, 0x1edeb5e2, 0x1f0e63ee, 0x9db316bb
+ data4 0x9efc1ad3, 0x1f01fbb5, 0x9cc0d078, 0x1ea28b36
+ data4 0x9e9dd205, 0x9e791534, 0x1da1c8d5, 0x9e8195cc
+ data4 0x1f0681a4, 0x1eeaf1e2, 0x9ef83b37, 0x9f22a92b
+ data4 0x1eabc4ce, 0x1f10eefb, 0x1e06d9aa, 0x1e7cacd5
+ data4 0x1f1ea087, 0x1eb21983, 0x9f100c78, 0x1e840abe
+ data4 0x9efab66c, 0x1f183fa8, 0x9e84ee68, 0x9eea083d
+ data4 0x9ee23a74, 0x1f1351d7, 0x9ec5d42a, 0x9f071f57
+ data4 0x9ef578d9, 0x9f1aa7e7, 0x1eb02044, 0x1f151a2e
+ data4 0x9c0dc8b2, 0x9ef4087a, 0x1ec12b93, 0x1c1a946b
+ data4 0x1e89946f, 0x9dafe8c3, 0x1d295288, 0x9e8497ab
+ data4 0x1ec000c6, 0x1e102f29, 0x1e542256, 0x1e67d44d
+ data4 0x1ef688d8, 0x1f0e0f29, 0x1e67861f, 0x1e869748
+ data4 0x1ee6aa6e, 0x9e4d228b, 0x9e50be5b, 0x1e9fe225
+ data4 0x9ea34102, 0x9e628a3b, 0x9ed9fd83, 0x1ecd7109
+ data4 0x1f1864ff, 0x1ea19b76, 0x1db0d1c9, 0x9dff519b
+ data4 0x1e8fea71, 0x9ee82e9a, 0x9f08919b, 0x9ef5c8ae
+ data4 0x9ee446a4, 0x1ea59444, 0x1eb74230, 0x1ea13fbf
+ data4 0x9ea6a3ea, 0x1e5f2797, 0x9e0adb07, 0x9d3adadd
+ data4 0x1ebf2ee2, 0x1da19bfa, 0x1e8dea6d, 0x1ec4fea9
+ data4 0x1e669f22, 0x1dc5f919, 0x9ed25caa, 0x1ee475b1
+ data4 0x1ed0603e, 0x9eacb35c, 0x1dc00b27, 0x1e2f9991
+ data4 0x1e7b0406, 0x1eaa3387, 0x9d865bde, 0x1eb78a48
+ data4 0x1c40ae2e, 0x1ee9838b, 0x9f0f0d7f, 0x1e3e5d26
+ data4 0x1e99e7a6, 0x9e681ccf, 0x9e93ed65, 0x9eeb6a66
+ data4 0x1e29e9af, 0x9e96f923, 0x9e74f11d, 0x9f1474da
+ data4 0x1eec2ea7, 0x1ebf7aa3, 0x9c25dcca, 0x9f0553c2
+ data4 0x9e599efd, 0x1d2ab490, 0x1e95d7cd, 0x9ee4b20e
+ data4 0x9d988ce5, 0x9ef9787e, 0x9dbbba5b, 0x9f12c304
+ data4 0x1e3b9d70, 0x1e7bcae8, 0x9d98bb6e, 0x9e8e6b01
+ data4 0x9f07d03b, 0x9d67c822, 0x9f0ef69e, 0x1c7c0fe3
+ data4 0x9e9bfbb9, 0x9e83b84b, 0x1efbf15e, 0x9ecfa6a6
+ data4 0x9c91158e, 0x9ecf6770, 0x1ee1e3a8, 0x9dc95ec0
+ data4 0x1ef603f7, 0x1d5e52ba, 0x1c477d1b, 0x9e955cd8
+ data4 0x1ed665b0, 0x9e8376c4, 0x9c0ee88e, 0x1e8c989e
+ data4 0x1ea2df29, 0x9d961e5c, 0x1e101813, 0x1e7fffff
+ data4 0x9e5abff4, 0x1dbddd71, 0x1eb69100, 0x1e71f114
+ data4 0x1e9ca798, 0x1ef62c8d, 0x9db4e55a, 0x1dbe69ce
+ data4 0x9ef1c01f, 0x1f044a2a, 0x9eb9e0d7, 0x9ee59745
+ data4 0x9e874803, 0x1ea0b418, 0x9e13572a, 0x1ddbb3a2
+ data4 0x9ec0e391, 0x1e89fba1, 0x1ee8b261, 0x9e5d25f0
+ data4 0x9ef222cb, 0x9ef135ec, 0x1ea04b9a, 0x9f04291f
+ data4 0x9e969254, 0x9ee32f08, 0x9ed909d3, 0x9e362640
+ data4 0x9ec20735, 0x1e50131b, 0x9ed4e049, 0x1ee8e817
+ data4 0x1e1e09c0, 0x9ea643c5, 0x9e5a1ab6, 0x9e389059
+ data4 0x1e560947, 0x1d02b877, 0x1e4475ab, 0x9ea9aaf6
+ data4 0x1e95bc5e, 0x1eaf6afd, 0x1d43067d, 0x9d043821
+ data4 0x9e97baa9, 0x1de5c4f9, 0x9e9a0069, 0x9e1b9944
+ data4 0x1eb13686, 0x9eb907eb, 0x1e059589, 0x1cbd0f93
+ data4 0x9eb7e6ae, 0x1e9fa175, 0x1ee5bdf4, 0x1e8052f7
+ data4 0x9c80d1e3, 0x1bfbe28e, 0x9e672b3b, 0x9ecacf19
+ data4 0x9e3c04be, 0x1dfe8c5c, 0x1e1ba9cb, 0x1eb40b1e
+ data4 0x1ec7e7f6, 0x9d0d45b3, 0x1ef0113b, 0x9a155fa3
+ data4 0x1e28ec3b, 0x1e7ca8df, 0x9d2f91b4, 0x1eccd9ed
+ data4 0x9ed943bc, 0x9ccaab19, 0x9e8a5c58, 0x1ec3bca8
+ data4 0x1ed78dc7, 0x9ed391a8, 0x9e938f6e, 0x9ec4a030
+ data4 0x9e80346e, 0x1e7a4686, 0x9e284315, 0x9e39584c
+ data4 0x1ebdc9b4, 0x9e9cfce5, 0x9ef55c65, 0x1e2941e7
+ data4 0x9efbe59f, 0x1d87c41b, 0x1e40befc, 0x1e3d05b5
+ data4 0x1de9ea67, 0x1ec9a21c, 0x1decb69a, 0x1df6e75a
+ data4 0x9e8030ab, 0x9db20540, 0x9ef1e977, 0x1e3cdc43
+ data4 0x1e0492b0, 0x9e91d872, 0x1e775346, 0x9e939978
+ data4 0x1eb2714e, 0x1e49a203, 0x9e10195a, 0x1ef1ffc3
+ data4 0x9ea8b709, 0x9e832e27, 0x1ed5ac3b, 0x1edb20a6
+ data4 0x1e4dbd4e, 0x1efbb932, 0x1d8170ec, 0x1e6c4849
+ data4 0x1f008e17, 0x1e8000c4, 0x1d855ecf, 0x9e37cb85
+ data4 0x1ecffdf5, 0x1eba6519, 0x9edbe600, 0x1ea3e5e7
+ data4 0x1ed4fb39, 0x1f00be77, 0x1e6f4484, 0x9e9e7107
+ data4 0x9e30b29d, 0x9ee6e174, 0x1e3a2656, 0x9dd72f3f
+ data4 0x9ee12138, 0x1ed16fed, 0x9ece8a02, 0x9ca5b249
+ data4 0x9eafd508, 0x9ef0e9fc, 0x1d1307ac, 0x1eecee20
+ data4 0x1cf60c6f, 0x9d556216, 0x9eaed175, 0x9ec919f4
+ data4 0x1ec2c988, 0x1cd82772, 0x9dc99456, 0x1eab0467
+ data4 0x1e89b36f, 0x1c757944, 0x1eef9abd, 0x9e98664d
LOCAL_OBJECT_END(D_table)
@@ -709,184 +749,238 @@ LOCAL_OBJECT_END(D_table)
GLOBAL_LIBM_ENTRY(cbrtl)
{ .mfi
- getf.sig r3=f8
- // will continue only for normal/denormal numbers
- fclass.nm.unc p12,p7 = f8, 0x1b
- // r2 = pointer to C_1...C_6 followed by T_table
- addl r2 = @ltoff(poly_coeffs), gp;;
+ getf.sig GR_ARGSIG = f8
+ // will continue on main path only for normal/denormal numbers
+ // all other values will be filtered out and will exit early
+ fclass.nm.unc p12, p7 = f8, 0x1b
+ // GR_ADDR = pointer to C_1...C_6 followed by T_table
+ addl GR_ADDR = @ltoff(poly_coeffs), gp
}
-{.mfi
- // r29=2/3*bias -63=0xaaaa-0x3f=0xaa6b
- mov r29=0xaa6b
- // normalize a
- fma.s1 f14=f8,f1,f0
- // r27 = pointer to D table
- addl r27 = @ltoff(D_table), gp;;
+{ .mfi
+ // GR_BIAS23 = 2/3*bias -63 = 0xaaaa-0x3f = 0xaa6b
+ mov GR_BIAS23 = 0xaa6b
+ // normalize a
+ fma.s1 FR_XNORM = f8, f1, f0
+ // GR_D_ADDR = pointer to D table
+ addl GR_D_ADDR = @ltoff(D_table), gp
}
-{.mib
- nop.m 0
- (p7) cmp.eq p12,p0=r3,r0
- nop.b 0;;
+;;
+
+{ .mmf
+ // load start address for C_1...C_6 followed by T_table
+ ld8 GR_C_START = [ GR_ADDR ]
+ // load start address of D table
+ ld8 GR_D_START = [ GR_D_ADDR ]
+ // y = frcpa(a)
+ frcpa.s1 FR_RCP, p6 = f1, f8
}
-{.mfb
- // load start address for C_1...C_6 followed by T_table
- ld8 r2=[r2]
- (p12) fma.s0 f8=f8,f1,f0
- (p12) br.ret.spnt b0;;
+;;
+
+{ .mmi
+ // get normalized significand
+ getf.sig GR_NORMSIG = FR_XNORM
+ // get exponent
+ getf.exp GR_NORMEXPSGN = FR_XNORM
+ (p7) cmp.eq p12, p0 = GR_ARGSIG, r0
}
-{.mmf
- // load C_1
- ldfe f7=[r2],16
- // load start address of D table
- ld8 r27=[r27]
- // y=frcpa(a)
- frcpa.s0 f8,p6=f1,f8;;
+;;
+
+{ .mii
+ // load C_1
+ ldfe FR_C1 = [ GR_C_START ], 16
+ mov GR_SGNMASK = 0x20000
+ nop.i 0
}
-{.mmi
- // load C_2
- ldfe f9=[r2],16;;
- // load C_3, C_4
- ldfpd f10,f11=[r2],16
- nop.i 0;;
+;;
+
+{ .mfb
+ // load C_2
+ ldfe FR_C2 = [ GR_C_START ], 16
+ (p12) fma.s0 f8 = f8, f1, f0
+ // NaN/Infinities exit early
+ (p12) br.ret.spnt b0
}
-{.mmi
- // get normalized significand
- getf.sig r23=f14
- // get exponent
- getf.exp r24=f14
- mov r25=0x20000;;
+;;
+
+{ .mfi
+ // load C_3, C_4
+ ldfpd FR_C3, FR_C4 = [ GR_C_START ], 16
+ // y = frcpa(a), set flags and result when argument is 0
+ // only used when p6=0
+ frcpa.s0 f8, p0 = f1, f8
+ nop.i 0
}
-{.mii
- // get r26=sign
- and r26=r24,r25
- // eliminate leading 1 from r23=2nd table index
- shl r23=r23,1
- // eliminate sign from exponent (r25)
- andcm r25=r24,r25;;
+;;
+
+{ .mii
+ // get GR_SIGN = sign
+ and GR_SIGN = GR_NORMEXPSGN, GR_SGNMASK
+ // eliminate leading 1 from GR_NORMSIG = 2nd table index
+ shl GR_INDEX2 = GR_NORMSIG, 1
+ // eliminate sign from exponent
+ andcm GR_NORMEXP = GR_NORMEXPSGN, GR_SGNMASK
}
-{.mfi
- // load C_5,C_6
- (p6) ldfpd f12,f13=[r2],16
- // r=1-a*y
- (p6) fnma.s1 f6=f8,f14,f1
- // 1: exponent*=5; // (2^{16}-1)/3=0x5555
- shladd r24=r25,2,r25;;
+;;
+
+{ .mfi
+ // load C_5, C_6
+ (p6) ldfpd FR_C5, FR_C6 = [ GR_C_START ], 16
+ // r = 1-a*y
+ (p6) fnma.s1 FR_R = FR_RCP, FR_XNORM, f1
+ // Start computation of floor(exponent/3) by
+ // computing (2^20+2)/3*exponent = exponent*0x55556
+ // 1: exponent* = 5;
+ // (2^{16}-1)/3 = 0x5555:
+ // will form 0x5555*exponent by using shladd's
+ shladd GR_EXP5 = GR_NORMEXP, 2, GR_NORMEXP
}
-{.mib
- // r30=(5*expon)*16
- shladd r30=r24,4,r0
- // r28=3*exponent
- shladd r28=r25,1,r25
- nop.b 0;;
+;;
+
+{ .mib
+ // Next several integer steps compute floor(exponent/3)
+ // GR_TMP1 = (5*expon)*16
+ shladd GR_TMP1 = GR_EXP5, 4, r0
+ // GR_EXP3 = 3*exponent
+ shladd GR_EXP3 = GR_NORMEXP, 1, GR_NORMEXP
+ nop.b 0
}
-{.mmi
- // r28=6*exponent
- shladd r28=r28,1,r0
- // r24=17*expon
- add r24=r24,r30
- // r23=2nd table index (8 bits)
- shr.u r23=r23,56;;
+;;
+
+{ .mmi
+ // GR_EXP6 = 6*exponent
+ shladd GR_EXP6 = GR_EXP3, 1, r0
+ // GR_EXP17 = 17*expon
+ add GR_EXP17 = GR_EXP5, GR_TMP1
+ // GR_IX2 = 2nd table index (8 bits)
+ shr.u GR_IX2 = GR_INDEX2, 56
}
-{.mmi
- // adjust T_table pointer by 2nd index
- shladd r2=r23,3,r2
- // adjust D_table pointer by 2nd index
- shladd r27=r23,2,r27
- // r30=(17*expon)*16^2
- shl r30=r24,8;;
+;;
+
+{ .mmi
+ // adjust T_table pointer by 2nd index
+ shladd GR_T_INDEX = GR_IX2, 3, GR_C_START
+ // adjust D_table pointer by 2nd index
+ shladd GR_D_INDEX = GR_IX2, 2, GR_D_START
+ // GR_TMP2 = (17*expon)*16^2
+ shl GR_TMP2 = GR_EXP17, 8
}
-{.mmi
- // r24=expon*(2^16-1)/3
- add r24=r24,r30;;
- // r24=expon*(2^20+2)/3=expon*0x55556
- shladd r24=r24,4,r28
- nop.i 0;;
+;;
+
+{ .mmi
+ // GR_TMP3 = expon*(2^16-1)/3
+ add GR_TMP3 = GR_EXP17, GR_TMP2
+;;
+ // GR_TMP4 = expon*(2^20+2)/3 = expon*0x55556
+ shladd GR_TMP4 = GR_TMP3, 4, GR_EXP6
+ nop.i 0
}
-{.mii
- nop.m 0
- // r24=floor(expon/3)
- shr.u r24=r24,20
- nop.i 0;;
+;;
+
+{ .mii
+ nop.m 0
+ // GR_EXP_RES = floor(expon/3)
+ shr.u GR_EXP_RES = GR_TMP4, 20
+ nop.i 0
}
-{.mmi
- nop.m 0
- // r28=3*exponent
- shladd r28=r24,1,r24
- // bias exponent
- add r24=r29,r24;;
+;;
+
+{ .mmi
+ nop.m 0
+ // r16 = 3*exponent
+ shladd r16 = GR_EXP_RES, 1, GR_EXP_RES
+ // bias exponent
+ add GR_EXPBIAS = GR_BIAS23, GR_EXP_RES
}
-{.mmi
- // get remainder of exponent/3
- sub r25=r25,r28;;
- // add sign to exponent
- or r24=r24,r26
- // remainder <<=8
- shl r25=r25,8;;
-}
-{.mfi
- // adjust D_table pointer by 1st index
- shladd r27=r25,2,r27
- // P_1=C_1+C_2*r
- (p6) fma.s1 f7=f9,f6,f7
- // adjust T_table pointer by 1st index
- shladd r2=r25,3,r2
+;;
+
+{ .mmi
+ // get remainder of exponent/3
+ sub GR_EXP_MOD_3 = GR_NORMEXP, r16
+;;
+ // add sign to exponent
+ or GR_EXPSIGNRES = GR_EXPBIAS, GR_SIGN
+ // remainder << = 8
+ shl GR_REMTMP = GR_EXP_MOD_3, 8
}
-{.mfi
- // f14=sign*2^{exponent/3}
- (p6) setf.exp f14=r24
- // r2=r*r
- (p6) fma.s1 f9=f6,f6,f0
- nop.i 0;;
+;;
+
+{ .mfi
+ // adjust D_table pointer by 1st index
+ shladd GR_IX_D = GR_REMTMP, 2, GR_D_INDEX
+ // P_1 = C_1+C_2*r
+ (p6) fma.s1 FR_P1 = FR_C2, FR_R, FR_C1
+ // adjust T_table pointer by 1st index
+ shladd GR_IX_T = GR_REMTMP, 3, GR_T_INDEX
}
-{.mfi
- // load D
- (p6) ldfs f15=[r27]
- // P_2=C_3+C_4*r
- (p6) fma.s1 f10=f11,f6,f10
- nop.i 0
+{ .mfi
+ // FR_SGNEXP = sign*2^{exponent/3}
+ (p6) setf.exp FR_SGNEXP = GR_EXPSIGNRES
+ // r^2 = r*r
+ (p6) fma.s1 FR_R2 = FR_R, FR_R, f0
+ nop.i 0
}
-{.mfi
- // load T
- (p6) ldf8 f8=[r2]
- // P_3=C_5+C_6*r
- (p6) fma.s1 f12=f13,f6,f12
- nop.i 0;;
+;;
+
+{ .mfi
+ // load D
+ (p6) ldfs FR_D = [ GR_IX_D ]
+ // P_2 = C_3+C_4*r
+ (p6) fma.s1 FR_P2 = FR_C4, FR_R, FR_C3
+ nop.i 0
}
-{.mfi
- nop.m 0
- // P_4=D-r*P_1
- (p6) fnma.s1 f15=f6,f7,f15
- nop.i 0
+{ .mfi
+ // load T
+ (p6) ldf8 FR_T = [ GR_IX_T ]
+ // P_3 = C_5+C_6*r
+ (p6) fma.s1 FR_P3 = FR_C6, FR_R, FR_C5
+ nop.i 0
}
-{.mfi
- nop.m 0
- // r3=r*r2
- (p6) fma.s1 f6=f6,f9,f0
- nop.i 0;;
+;;
+
+{ .mfi
+ nop.m 0
+ // P_4 = D-r*P_1
+ (p6) fnma.s1 FR_P4 = FR_R, FR_P1, FR_D
+ nop.i 0
}
-{.mfi
- nop.m 0
- // P_5=P_2+r2*P_3
- (p6) fma.s1 f10=f9,f12,f10
- nop.i 0;;
+{ .mfi
+ nop.m 0
+ // r^3 = r*r^2
+ (p6) fma.s1 FR_R3 = FR_R, FR_R2, f0
+ nop.i 0
}
-{.mfi
- nop.m 0
- // T=T*(sign*2^{exponent/3})
- (p6) fma.s1 f8=f8,f14,f0
- nop.i 0
+;;
+
+{ .mfi
+ nop.m 0
+ // P_5 = P_2+r2*P_3
+ (p6) fma.s1 FR_P5 = FR_R2, FR_P3, FR_P2
+ nop.i 0
}
-{.mfi
- nop.m 0
- // P=P_4-r3*P_5
- (p6) fnma.s1 f6=f6,f10,f15
- nop.i 0;;
+;;
+
+{ .mfi
+ nop.m 0
+ // T = T*(sign*2^{exponent/3})
+ (p6) fma.s1 FR_TF = FR_T, FR_SGNEXP, f0
+ nop.i 0
}
-{.mfb
- nop.m 0
- // result=T+T*p
- (p6) fma.s0 f8=f8,f6,f8
- br.ret.sptk b0;;
+{ .mfi
+ nop.m 0
+ // P = P_4-r3*P_5
+ (p6) fnma.s1 FR_P = FR_R3, FR_P5, FR_P4
+ nop.i 0
}
+;;
+
+{ .mfb
+ nop.m 0
+ // result = T+T*p
+ (p6) fma.s0 f8 = FR_TF, FR_P, FR_TF
+ br.ret.sptk b0
+}
+;;
+
GLOBAL_LIBM_END(cbrtl)
+
diff --git a/sysdeps/ia64/fpu/s_cos.S b/sysdeps/ia64/fpu/s_cos.S
index 84c177a..bf8997b 100644
--- a/sysdeps/ia64/fpu/s_cos.S
+++ b/sysdeps/ia64/fpu/s_cos.S
@@ -1,7 +1,7 @@
.file "sincos.s"
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@@ -51,6 +51,8 @@
// 06/03/02 Insure inexact flag set for large arg result
// 09/05/02 Work range is widened by reduction strengthen (3 parts of Pi/16)
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/08/03 Improved performance
+// 10/28/04 Saved sincos_r_sincos to avoid clobber by dynamic loader
// API
//==============================================================
@@ -170,11 +172,11 @@
// Registers used
//==============================================================
// general input registers:
-// r14 -> r19
-// r32 -> r45
+// r14 -> r26
+// r32 -> r35
// predicate registers used:
-// p6 -> p14
+// p6 -> p11
// floating-point registers used
// f9 -> f15
@@ -236,16 +238,6 @@ fp_tmp = f61
/////////////////////////////////////////////////////////////
-sincos_AD_1 = r33
-sincos_AD_2 = r34
-sincos_exp_limit = r35
-sincos_r_signexp = r36
-sincos_AD_beta_table = r37
-sincos_r_sincos = r38
-
-sincos_r_exp = r39
-sincos_r_17_ones = r40
-
sincos_GR_sig_inv_pi_by_16 = r14
sincos_GR_rshf_2to61 = r15
sincos_GR_rshf = r16
@@ -254,11 +246,18 @@ sincos_GR_n = r18
sincos_GR_m = r19
sincos_GR_32m = r19
sincos_GR_all_ones = r19
+sincos_AD_1 = r20
+sincos_AD_2 = r21
+sincos_exp_limit = r22
+sincos_r_signexp = r23
+sincos_r_17_ones = r24
+sincos_r_sincos = r25
+sincos_r_exp = r26
-gr_tmp = r41
-GR_SAVE_PFS = r41
-GR_SAVE_B0 = r42
-GR_SAVE_GP = r43
+GR_SAVE_PFS = r33
+GR_SAVE_B0 = r34
+GR_SAVE_GP = r35
+GR_SAVE_r_sincos = r36
RODATA
@@ -405,7 +404,7 @@ LOCAL_OBJECT_END(double_sin_cos_beta_k4)
GLOBAL_IEEE754_ENTRY(sin)
{ .mlx
- alloc r32 = ar.pfs, 1, 13, 0, 0
+ getf.exp sincos_r_signexp = f8
movl sincos_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // signd of 16/pi
}
{ .mlx
@@ -427,10 +426,11 @@ GLOBAL_IEEE754_ENTRY(sin)
;;
GLOBAL_IEEE754_END(sin)
+
GLOBAL_IEEE754_ENTRY(cos)
{ .mlx
- alloc r32 = ar.pfs, 1, 13, 0, 0
+ getf.exp sincos_r_signexp = f8
movl sincos_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // signd of 16/pi
}
{ .mlx
@@ -464,7 +464,6 @@ _SINCOS_COMMON:
// Form two constants we need
// 16/pi * 2^-2 * 2^63, scaled by 2^61 since we just loaded the significand
// 1.1000...000 * 2^(63+63-2) to right shift int(W) into the low significand
-// fcmp used to set denormal, and invalid on snans
{ .mfi
setf.sig sincos_SIG_INV_PI_BY_16_2TO61 = sincos_GR_sig_inv_pi_by_16
fclass.m p6,p0 = f8, 0xe7 // if x = 0,inf,nan
@@ -480,10 +479,15 @@ _SINCOS_COMMON:
// 2^-61 for scaling Nfloat
// 0x1001a is register_bias + 27.
// So if f8 >= 2^27, go to large argument routines
-{ .mmi
- getf.exp sincos_r_signexp = f8
+{ .mfi
+ alloc r32 = ar.pfs, 1, 4, 0, 0
+ fclass.m p11,p0 = f8, 0x0b // Test for x=unorm
+ mov sincos_GR_all_ones = -1 // For "inexect" constant create
+}
+{ .mib
setf.exp sincos_2TOM61 = sincos_GR_exp_2tom61
- addl gr_tmp = -1,r0 // For "inexect" constant create
+ nop.i 999
+(p6) br.cond.spnt _SINCOS_SPECIAL_ARGS
}
;;
@@ -493,41 +497,31 @@ _SINCOS_COMMON:
{ .mmb
ldfe sincos_Pi_by_16_1 = [sincos_AD_1],16
setf.d sincos_RSHF = sincos_GR_rshf
-(p6) br.cond.spnt _SINCOS_SPECIAL_ARGS
+(p11) br.cond.spnt _SINCOS_UNORM // Branch if x=unorm
}
;;
+_SINCOS_COMMON2:
+// Return here if x=unorm
+// Create constant used to set inexact
{ .mmi
ldfe sincos_Pi_by_16_2 = [sincos_AD_1],16
- setf.sig fp_tmp = gr_tmp // constant for inexact set
+ setf.sig fp_tmp = sincos_GR_all_ones
nop.i 999
};;
+// Select exponent (17 lsb)
{ .mfi
ldfe sincos_Pi_by_16_3 = [sincos_AD_1],16
nop.f 999
- nop.i 999
+ dep.z sincos_r_exp = sincos_r_signexp, 0, 17
};;
// Polynomial coefficients (Q4, P4, Q3, P3, Q2, Q1, P2, P1) loading
-{ .mmi
- ldfpd sincos_P4,sincos_Q4 = [sincos_AD_1],16
- nop.m 999
- nop.i 999
-};;
-
-// Select exponent (17 lsb)
-{ .mmi
- ldfpd sincos_P3,sincos_Q3 = [sincos_AD_1],16
- nop.m 999
- dep.z sincos_r_exp = sincos_r_signexp, 0, 17
-}
-;;
-
// p10 is true if we must call routines to handle larger arguments
// p10 is true if f8 exp is >= 0x1001a (2^27)
{ .mmb
- ldfpd sincos_P2,sincos_Q2 = [sincos_AD_1],16
+ ldfpd sincos_P4,sincos_Q4 = [sincos_AD_1],16
cmp.ge p10,p0 = sincos_r_exp,sincos_exp_limit
(p10) br.cond.spnt _SINCOS_LARGE_ARGS // Go to "large args" routine
};;
@@ -536,66 +530,61 @@ _SINCOS_COMMON:
// Multiply x by scaled 16/pi and add large const to shift integer part of W to
// rightmost bits of significand
{ .mfi
- ldfpd sincos_P1,sincos_Q1 = [sincos_AD_1],16
+ ldfpd sincos_P3,sincos_Q3 = [sincos_AD_1],16
fma.s1 sincos_W_2TO61_RSH = sincos_NORM_f8,sincos_SIG_INV_PI_BY_16_2TO61,sincos_RSHF_2TO61
nop.i 999
};;
+// get N = (int)sincos_int_Nfloat
// sincos_NFLOAT = Round_Int_Nearest(sincos_W)
// This is done by scaling back by 2^-61 and subtracting the shift constant
-{ .mfi
- nop.m 999
+{ .mmf
+ getf.sig sincos_GR_n = sincos_W_2TO61_RSH
+ ldfpd sincos_P2,sincos_Q2 = [sincos_AD_1],16
fms.s1 sincos_NFLOAT = sincos_W_2TO61_RSH,sincos_2TOM61,sincos_RSHF
- nop.i 999
};;
-
-// get N = (int)sincos_int_Nfloat
+// sincos_r = -sincos_Nfloat * sincos_Pi_by_16_1 + x
{ .mfi
- getf.sig sincos_GR_n = sincos_W_2TO61_RSH
- nop.f 999
+ ldfpd sincos_P1,sincos_Q1 = [sincos_AD_1],16
+ fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_1, sincos_NORM_f8
nop.i 999
};;
// Add 2^(k-1) (which is in sincos_r_sincos) to N
-// sincos_r = -sincos_Nfloat * sincos_Pi_by_16_1 + x
-{ .mfi
+{ .mmi
add sincos_GR_n = sincos_GR_n, sincos_r_sincos
- fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_1, sincos_NORM_f8
+;;
+// Get M (least k+1 bits of N)
+ and sincos_GR_m = 0x1f,sincos_GR_n
nop.i 999
};;
-// Get M (least k+1 bits of N)
-{ .mmi
- and sincos_GR_m = 0x1f,sincos_GR_n;;
+// sincos_r = sincos_r -sincos_Nfloat * sincos_Pi_by_16_2
+{ .mfi
nop.m 999
+ fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_2, sincos_r
shl sincos_GR_32m = sincos_GR_m,5
};;
// Add 32*M to address of sin_cos_beta table
+// For sin denorm. - set uflow
{ .mfi
add sincos_AD_2 = sincos_GR_32m, sincos_AD_1
-(p8) fclass.m.unc p10,p0 = f8,0x0b // For sin denorm. - set uflow
+(p8) fclass.m.unc p10,p0 = f8,0x0b
nop.i 999
};;
// Load Sin and Cos table value using obtained index m (sincosf_AD_2)
{ .mfi
ldfe sincos_Sm = [sincos_AD_2],16
-(p9) fclass.m.unc p11,p0 = f8,0x0b // For cos denorm - set denorm
- nop.i 999
-};;
-
-// sincos_r = sincos_r -sincos_Nfloat * sincos_Pi_by_16_2
-{ .mfi
- ldfe sincos_Cm = [sincos_AD_2]
- fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_2, sincos_r
+ nop.f 999
nop.i 999
};;
// get rsq = r*r
{ .mfi
- nop.m 999
+ ldfe sincos_Cm = [sincos_AD_2]
fma.s1 sincos_rsq = sincos_r, sincos_r, f0 // r^2 = r*r
nop.i 999
}
@@ -660,7 +649,6 @@ _SINCOS_COMMON:
fma.s1 sincos_Q = sincos_rsq, sincos_Q_temp2, sincos_Q1
nop.i 999
}
-
{ .mfi
nop.m 999
fma.s1 sincos_P = sincos_rsq, sincos_P_temp2, sincos_P1
@@ -675,7 +663,6 @@ _SINCOS_COMMON:
fma.s1 sincos_Q = sincos_srsq,sincos_Q, sincos_Sm
nop.i 999
}
-
{ .mfi
nop.m 999
fma.s1 sincos_P = sincos_rcub,sincos_P, sincos_r_exact
@@ -683,19 +670,12 @@ _SINCOS_COMMON:
};;
// If sin(denormal), force underflow to be set
-.pred.rel "mutex",p10,p11
{ .mfi
nop.m 999
-(p10) fmpy.d.s0 fp_tmp = f8,f8 // forces underflow flag
- nop.i 999 // for denormal sine args
-}
-{ .mfi
- nop.m 999
-(p11) fma.d.s0 fp_tmp = f8,f1, f8 // forces denormal flag
- nop.i 999 // for denormal cosine args
+(p10) fmpy.d.s0 fp_tmp = sincos_NORM_f8,sincos_NORM_f8
+ nop.i 999
};;
-
// Final calculation
// result = C[m]*P + Q
{ .mfb
@@ -724,13 +704,22 @@ _SINCOS_SPECIAL_ARGS:
br.ret.sptk b0 // Exit for x = 0/Inf/NaN path
};;
+_SINCOS_UNORM:
+// Here if x=unorm
+{ .mfb
+ getf.exp sincos_r_signexp = sincos_NORM_f8 // Get signexp of x
+ fcmp.eq.s0 p11,p0 = f8, f0 // Dummy op to set denorm flag
+ br.cond.sptk _SINCOS_COMMON2 // Return to main path
+};;
+
GLOBAL_IEEE754_END(cos)
+
//////////// x >= 2^27 - large arguments routine call ////////////
LOCAL_LIBM_ENTRY(__libm_callout_sincos)
_SINCOS_LARGE_ARGS:
.prologue
{ .mfi
- mov sincos_GR_all_ones = -1 // 0xffffffff
+ mov GR_SAVE_r_sincos = sincos_r_sincos // Save sin or cos
nop.f 999
.save ar.pfs,GR_SAVE_PFS
mov GR_SAVE_PFS = ar.pfs
@@ -753,7 +742,7 @@ _SINCOS_LARGE_ARGS:
};;
{ .mbb
- cmp.ne p9,p0 = sincos_r_sincos, r0 // set p9 if cos
+ cmp.ne p9,p0 = GR_SAVE_r_sincos, r0 // set p9 if cos
nop.b 999
(p9) br.call.sptk.many b0 = __libm_cos_large# // cos(large_X)
};;
diff --git a/sysdeps/ia64/fpu/s_cosf.S b/sysdeps/ia64/fpu/s_cosf.S
index 89cf823..a588938 100644
--- a/sysdeps/ia64/fpu/s_cosf.S
+++ b/sysdeps/ia64/fpu/s_cosf.S
@@ -408,6 +408,7 @@ GLOBAL_IEEE754_ENTRY(sinf)
};;
GLOBAL_IEEE754_END(sinf)
+
GLOBAL_IEEE754_ENTRY(cosf)
{ .mlx
@@ -657,6 +658,7 @@ _SINCOSF_SPECIAL_ARGS:
};;
GLOBAL_IEEE754_END(cosf)
+
//////////// x >= 2^24 - large arguments routine call ////////////
LOCAL_LIBM_ENTRY(__libm_callout_sincosf)
_SINCOSF_LARGE_ARGS:
diff --git a/sysdeps/ia64/fpu/s_cosl.S b/sysdeps/ia64/fpu/s_cosl.S
index 374e822..8d71e50 100644
--- a/sysdeps/ia64/fpu/s_cosl.S
+++ b/sysdeps/ia64/fpu/s_cosl.S
@@ -1,7 +1,7 @@
.file "sincosl.s"
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@@ -47,6 +47,8 @@
// 05/13/02 Changed interface to __libm_pi_by_2_reduce
// 02/10/03 Reordered header: .section, .global, .proc, .align;
// used data8 for long double table values
+// 10/13/03 Corrected final .endp name to match .proc
+// 10/26/04 Avoided using r14-31 as scratch so not clobbered by dynamic loader
//
//*********************************************************************
//
@@ -63,8 +65,7 @@
// f32-f99
//
// General Purpose Registers:
-// r32-r43
-// r44-r45 (Used to pass arguments to pi_by_2 reduce routine)
+// r32-r58
//
// Predicate Registers: p6-p13
//
@@ -715,20 +716,6 @@ FR_PP_1_lo = f98
FR_ArgPrime = f99
FR_inexact = f100
-GR_sig_inv_pi = r14
-GR_rshf_2to64 = r15
-GR_exp_2tom64 = r16
-GR_rshf = r17
-GR_ad_p = r18
-GR_ad_d = r19
-GR_ad_pp = r20
-GR_ad_qq = r21
-GR_ad_c = r22
-GR_ad_s = r23
-GR_ad_ce = r24
-GR_ad_se = r25
-GR_ad_m14 = r26
-GR_ad_s1 = r27
GR_exp_m2_to_m3= r36
GR_N_Inc = r37
GR_Sin_or_Cos = r38
@@ -739,6 +726,21 @@ GR_exp_2_to_63 = r42
GR_exp_2_to_m3 = r43
GR_exp_2_to_24 = r44
+GR_sig_inv_pi = r45
+GR_rshf_2to64 = r46
+GR_exp_2tom64 = r47
+GR_rshf = r48
+GR_ad_p = r49
+GR_ad_d = r50
+GR_ad_pp = r51
+GR_ad_qq = r52
+GR_ad_c = r53
+GR_ad_s = r54
+GR_ad_ce = r55
+GR_ad_se = r56
+GR_ad_m14 = r57
+GR_ad_s1 = r58
+
// Added for unwind support
GR_SAVE_B0 = r39
@@ -750,7 +752,7 @@ GR_SAVE_PFS = r41
GLOBAL_IEEE754_ENTRY(sinl)
{ .mlx
- alloc r32 = ar.pfs,0,12,2,0
+ alloc r32 = ar.pfs,0,27,2,0
movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
}
{ .mlx
@@ -772,9 +774,10 @@ GLOBAL_IEEE754_ENTRY(sinl)
;;
GLOBAL_IEEE754_END(sinl)
+
GLOBAL_IEEE754_ENTRY(cosl)
{ .mlx
- alloc r32 = ar.pfs,0,12,2,0
+ alloc r32 = ar.pfs,0,27,2,0
movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
}
{ .mlx
@@ -2285,6 +2288,7 @@ SINCOSL_SPECIAL:
}
GLOBAL_IEEE754_END(cosl)
+
// *******************************************************************
// *******************************************************************
// *******************************************************************
@@ -2299,7 +2303,7 @@ GLOBAL_IEEE754_END(cosl)
// c is in f9
// N is in r8
// Be sure to allocate at least 2 GP registers as output registers for
-// __libm_pi_by_2_reduce. This routine uses r49-50. These are used as
+// __libm_pi_by_2_reduce. This routine uses r59-60. These are used as
// scratch registers within the __libm_pi_by_2_reduce routine (for speed).
//
// We know also that __libm_pi_by_2_reduce preserves f10-15, f71-127. We
@@ -2356,6 +2360,6 @@ SINCOSL_ARG_TOO_LARGE:
br.cond.sptk SINCOSL_NORMAL_R // Branch if |r|>=2^-3 for |x| >= 2^63
};;
-.endp
+LOCAL_LIBM_END(__libm_callout)
.type __libm_pi_by_2_reduce#,@function
.global __libm_pi_by_2_reduce#
diff --git a/sysdeps/ia64/fpu/s_erf.S b/sysdeps/ia64/fpu/s_erf.S
index 8b8cc7f..3abcd3e 100644
--- a/sysdeps/ia64/fpu/s_erf.S
+++ b/sysdeps/ia64/fpu/s_erf.S
@@ -922,3 +922,4 @@ erf_denormal:
GLOBAL_LIBM_END(erf)
+
diff --git a/sysdeps/ia64/fpu/s_erfc.S b/sysdeps/ia64/fpu/s_erfc.S
index 8b22327..3b1b583 100644
--- a/sysdeps/ia64/fpu/s_erfc.S
+++ b/sysdeps/ia64/fpu/s_erfc.S
@@ -1135,6 +1135,7 @@ GLOBAL_LIBM_ENTRY(erfc)
};;
GLOBAL_LIBM_END(erfc)
+
// call via (p15) br.cond.spnt __libm_error_region
// for x > ARG_ASYMP = 28.0
// or
diff --git a/sysdeps/ia64/fpu/s_erfcf.S b/sysdeps/ia64/fpu/s_erfcf.S
index 7d9e2a9..e09ce98 100644
--- a/sysdeps/ia64/fpu/s_erfcf.S
+++ b/sysdeps/ia64/fpu/s_erfcf.S
@@ -918,6 +918,7 @@ SPECIAL:
GLOBAL_LIBM_END(erfcf)
+
// Call via (p10) br.cond.spnt __libm_error_region
// for UnfBound < x < = POS_ARG_ASYMP
// and
diff --git a/sysdeps/ia64/fpu/s_erfcl.S b/sysdeps/ia64/fpu/s_erfcl.S
index f06e26f..11f66bc 100644
--- a/sysdeps/ia64/fpu/s_erfcl.S
+++ b/sysdeps/ia64/fpu/s_erfcl.S
@@ -2002,6 +2002,7 @@ ERFC_RESULT:
};;
GLOBAL_LIBM_END(erfcl)
+
// call via (p15) br.cond.spnt __libm_error_region
// for x > POS_ARG_ASYMP
// or
diff --git a/sysdeps/ia64/fpu/s_erfl.S b/sysdeps/ia64/fpu/s_erfl.S
index 902539b..10da22c 100644
--- a/sysdeps/ia64/fpu/s_erfl.S
+++ b/sysdeps/ia64/fpu/s_erfl.S
@@ -1237,3 +1237,4 @@ _0_to_1o8:
GLOBAL_LIBM_END(erfl)
+
diff --git a/sysdeps/ia64/fpu/s_expm1.S b/sysdeps/ia64/fpu/s_expm1.S
index 41b9954..5d1fd8c 100644
--- a/sysdeps/ia64/fpu/s_expm1.S
+++ b/sysdeps/ia64/fpu/s_expm1.S
@@ -831,6 +831,7 @@ EXPM1_CERTAIN_MINUS_ONE:
GLOBAL_IEEE754_END(expm1)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/s_expm1f.S b/sysdeps/ia64/fpu/s_expm1f.S
index 0c5f2e6..311be06 100644
--- a/sysdeps/ia64/fpu/s_expm1f.S
+++ b/sysdeps/ia64/fpu/s_expm1f.S
@@ -612,6 +612,7 @@ EXPM1_CERTAIN_OVERFLOW:
GLOBAL_IEEE754_END(expm1f)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/s_expm1l.S b/sysdeps/ia64/fpu/s_expm1l.S
index 069856d..5f135fa 100644
--- a/sysdeps/ia64/fpu/s_expm1l.S
+++ b/sysdeps/ia64/fpu/s_expm1l.S
@@ -49,6 +49,7 @@
// used data8 for long double table values
// 03/11/03 Improved accuracy and performance, corrected missing inexact flags
// 04/17/03 Eliminated misplaced and unused data label
+// 12/15/03 Eliminated call to error support on expm1l underflow
//
//*********************************************************************
//
@@ -677,6 +678,7 @@ GLOBAL_IEEE754_ENTRY(expm1l)
GLOBAL_IEEE754_END(expm1l)
+
GLOBAL_IEEE754_ENTRY(expl)
//
// Set p7 false for exp, p6 true
@@ -1287,28 +1289,14 @@ EXP_POSSIBLE_UNDERFLOW:
{ .mfi
nop.m 999
-(p7) fclass.m.unc p12, p0 = FR_ftz, 0x00F // If expm1, FTZ result denorm, zero?
- nop.i 999
-}
-;;
-
-{ .mfi
- nop.m 999
(p6) fclass.m.unc p11, p0 = FR_ftz, 0x00F // If exp, FTZ result denorm or zero?
nop.i 999
}
;;
{ .mfb
-(p12) mov GR_Parameter_TAG = 40 // expm1 underflow
- fmerge.s FR_X = f8,f8 // Save x for error call
-(p12) br.cond.spnt __libm_error_region // Branch on expm1 underflow
-}
-;;
-
-{ .mib
(p11) mov GR_Parameter_TAG = 13 // exp underflow
- nop.i 999
+ fmerge.s FR_X = f8,f8 // Save x for error call
(p11) br.cond.spnt __libm_error_region // Branch on exp underflow
}
;;
@@ -1389,6 +1377,7 @@ EXP_64_UNSUPPORTED:
;;
GLOBAL_IEEE754_END(expl)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/s_fdim.S b/sysdeps/ia64/fpu/s_fdim.S
index 96ff67b..eff290c 100644
--- a/sysdeps/ia64/fpu/s_fdim.S
+++ b/sysdeps/ia64/fpu/s_fdim.S
@@ -164,6 +164,7 @@ FDIM_OVERFLOW:
GLOBAL_LIBM_END(fdim)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
// Call error support to report possible range error
.prologue
diff --git a/sysdeps/ia64/fpu/s_fdimf.S b/sysdeps/ia64/fpu/s_fdimf.S
index 19e14d3..76d69d1 100644
--- a/sysdeps/ia64/fpu/s_fdimf.S
+++ b/sysdeps/ia64/fpu/s_fdimf.S
@@ -164,6 +164,7 @@ FDIM_OVERFLOW:
GLOBAL_LIBM_END(fdimf)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
// Call error support to report possible range error
.prologue
diff --git a/sysdeps/ia64/fpu/s_fdiml.S b/sysdeps/ia64/fpu/s_fdiml.S
index 00876c3..2227deb 100644
--- a/sysdeps/ia64/fpu/s_fdiml.S
+++ b/sysdeps/ia64/fpu/s_fdiml.S
@@ -164,6 +164,7 @@ FDIM_OVERFLOW:
GLOBAL_LIBM_END(fdiml)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
// Call error support to report possible range error
.prologue
diff --git a/sysdeps/ia64/fpu/s_frexp.c b/sysdeps/ia64/fpu/s_frexp.c
index c675006..7d90213 100644
--- a/sysdeps/ia64/fpu/s_frexp.c
+++ b/sysdeps/ia64/fpu/s_frexp.c
@@ -4,8 +4,7 @@
// Copyright (c) 2000-2002, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
diff --git a/sysdeps/ia64/fpu/s_frexpf.c b/sysdeps/ia64/fpu/s_frexpf.c
index c21a21d..920f09d 100644
--- a/sysdeps/ia64/fpu/s_frexpf.c
+++ b/sysdeps/ia64/fpu/s_frexpf.c
@@ -4,8 +4,7 @@
// Copyright (c) 2000-2002, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
diff --git a/sysdeps/ia64/fpu/s_frexpl.c b/sysdeps/ia64/fpu/s_frexpl.c
index 13d44ab..968cc32 100644
--- a/sysdeps/ia64/fpu/s_frexpl.c
+++ b/sysdeps/ia64/fpu/s_frexpl.c
@@ -4,8 +4,7 @@
// Copyright (c) 2000-2002, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
diff --git a/sysdeps/ia64/fpu/s_ilogb.S b/sysdeps/ia64/fpu/s_ilogb.S
index 3f2733c..0102370 100644
--- a/sysdeps/ia64/fpu/s_ilogb.S
+++ b/sysdeps/ia64/fpu/s_ilogb.S
@@ -206,6 +206,7 @@ ILOGB_ZERO:
GLOBAL_LIBM_END(ilogb)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
diff --git a/sysdeps/ia64/fpu/s_ilogbf.S b/sysdeps/ia64/fpu/s_ilogbf.S
index 1b6ade6..9e971bc 100644
--- a/sysdeps/ia64/fpu/s_ilogbf.S
+++ b/sysdeps/ia64/fpu/s_ilogbf.S
@@ -206,6 +206,7 @@ ILOGB_ZERO:
GLOBAL_LIBM_END(ilogbf)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
diff --git a/sysdeps/ia64/fpu/s_ilogbl.S b/sysdeps/ia64/fpu/s_ilogbl.S
index e462fb7..8a6c9dc 100644
--- a/sysdeps/ia64/fpu/s_ilogbl.S
+++ b/sysdeps/ia64/fpu/s_ilogbl.S
@@ -206,6 +206,7 @@ ILOGB_ZERO:
GLOBAL_LIBM_END(ilogbl)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
diff --git a/sysdeps/ia64/fpu/s_ldexp.c b/sysdeps/ia64/fpu/s_ldexp.c
index 015b650..a0bc14c 100644
--- a/sysdeps/ia64/fpu/s_ldexp.c
+++ b/sysdeps/ia64/fpu/s_ldexp.c
@@ -4,8 +4,7 @@
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
diff --git a/sysdeps/ia64/fpu/s_ldexpf.c b/sysdeps/ia64/fpu/s_ldexpf.c
index eae4051..ad083fa 100644
--- a/sysdeps/ia64/fpu/s_ldexpf.c
+++ b/sysdeps/ia64/fpu/s_ldexpf.c
@@ -4,8 +4,7 @@
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
diff --git a/sysdeps/ia64/fpu/s_ldexpl.c b/sysdeps/ia64/fpu/s_ldexpl.c
index 91d8268..61dfd21 100644
--- a/sysdeps/ia64/fpu/s_ldexpl.c
+++ b/sysdeps/ia64/fpu/s_ldexpl.c
@@ -4,8 +4,7 @@
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
diff --git a/sysdeps/ia64/fpu/s_libm_ldexp.S b/sysdeps/ia64/fpu/s_libm_ldexp.S
index 1fc2c3f..2aaf2c3 100644
--- a/sysdeps/ia64/fpu/s_libm_ldexp.S
+++ b/sysdeps/ia64/fpu/s_libm_ldexp.S
@@ -21,33 +21,34 @@
// products derived from this software without specific prior written
// permission.
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
+// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
-// 01/26/01 ldexp completely reworked and now standalone version
+// 01/26/01 ldexp completely reworked and now standalone version
// 01/04/02 Added handling for int 32 or 64 bits
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/04/03 Improved performance
//
// API
//==============================================================
-// double __libm_ldexp (double x, int n, int int_type)
+// double __libm_ldexp (double x, int n, int int_type)
// input floating point f8 and int n (r33), int int_type (r34)
// output floating point f8
//
@@ -55,27 +56,41 @@
// int_type = 1 if int is 64 bits
//
// Returns x* 2**n using an fma and detects overflow
-// and underflow.
+// and underflow.
//
//
+// Strategy:
+// Compute biased exponent of result exp_Result = N + exp_X
+// Break into ranges:
+// exp_Result > 0x103fe -> Certain overflow
+// exp_Result = 0x103fe -> Possible overflow
+// 0x0fc01 <= exp_Result < 0x103fe -> No over/underflow (main path)
+// 0x0fc01 - 52 <= exp_Result < 0x0fc01 -> Possible underflow
+// exp_Result < 0x0fc01 - 52 -> Certain underflow
FR_Big = f6
FR_NBig = f7
FR_Floating_X = f8
FR_Result = f8
FR_Result2 = f9
-FR_Result3 = f11
-FR_Norm_X = f12
-FR_Two_N = f14
-FR_Two_to_Big = f15
+FR_Result3 = f10
+FR_Norm_X = f11
+FR_Two_N = f12
+GR_neg_ov_limit= r14
GR_N_Biased = r15
GR_Big = r16
GR_NBig = r17
-GR_Scratch = r18
-GR_Scratch1 = r19
+GR_exp_Result = r18
+GR_pos_ov_limit= r19
GR_Bias = r20
GR_N_as_int = r21
+GR_signexp_X = r22
+GR_exp_X = r23
+GR_exp_mask = r24
+GR_max_exp = r25
+GR_min_exp = r26
+GR_min_den_exp = r27
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
@@ -93,243 +108,298 @@ GLOBAL_LIBM_ENTRY(__libm_ldexp)
// Build the exponent Bias
//
{ .mfi
- alloc r32=ar.pfs,3,0,4,0
- fclass.m p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
- addl GR_Bias = 0x0FFFF,r0
+ getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
+ fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
+ mov GR_Bias = 0x0ffff
}
-
//
-// Is N zero?
// Normalize x
// Is integer type 32 bits?
//
{ .mfi
- cmp.eq p6,p0 = r33,r0
- fnorm.s1 FR_Norm_X = FR_Floating_X
- cmp.eq p8,p9 = r34,r0
+ mov GR_Big = 35000 // If N this big then certain overflow
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+ cmp.eq p8,p9 = r34,r0
}
;;
// Sign extend N if int is 32 bits
{ .mfi
(p9) mov GR_N_as_int = r33 // Copy N if int is 64 bits
- nop.f 0
+ fclass.m p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm
(p8) sxt4 GR_N_as_int = r33 // Sign extend N if int is 32 bits
}
+{ .mfi
+ mov GR_NBig = -35000 // If N this small then certain underflow
+ nop.f 0
+ mov GR_max_exp = 0x103fe // Exponent of maximum double
+}
+;;
+
+// Create biased exponent for 2**N
+{ .mfi
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+ nop.f 0
+ cmp.ge p7, p0 = GR_N_as_int, GR_Big // Certain overflow?
+}
+{ .mib
+ cmp.le p8, p0 = GR_N_as_int, GR_NBig // Certain underflow?
+ mov GR_min_exp = 0x0fc01 // Exponent of minimum double
+(p9) br.cond.spnt LDEXP_UNORM // Branch if x=unorm
+}
+;;
+
+LDEXP_COMMON:
+// Main path continues. Also return here from x=unorm path.
+// Create 2**N
+.pred.rel "mutex",p7,p8
+{ .mfi
+ setf.exp FR_Two_N = GR_N_Biased
+ nop.f 0
+(p7) mov GR_N_as_int = GR_Big // Limit max N
+}
+{ .mfi
+(p8) mov GR_N_as_int = GR_NBig // Limit min N
+ nop.f 0
+(p8) cmp.eq p7,p0 = r0,r0 // Set p7 if |N| big
+}
;;
//
-// Branch and return special values.
-// Create -35000
-// Create 35000
+// Create biased exponent for 2**N for N big
+// Is N zero?
//
{ .mfi
- addl GR_Big = 35000,r0
+(p7) add GR_N_Biased = GR_Bias,GR_N_as_int
nop.f 0
- add GR_N_Biased = GR_Bias,GR_N_as_int
+ cmp.eq.or p6,p0 = r33,r0
}
-{ .mfb
- addl GR_NBig = -35000,r0
-(p7) fma.d.s0 FR_Result = FR_Floating_X,f1, f0
-(p7) br.ret.spnt b0
-};;
+{ .mfi
+ mov GR_pos_ov_limit = 0x103ff // Exponent for positive overflow
+ nop.f 0
+ mov GR_exp_mask = 0x1ffff // Exponent mask
+}
+;;
//
-// Build the exponent Bias
-// Return x when N = 0
+// Create 2**N for N big
+// Return x when N = 0 or X = Nan, Inf, Zero
//
{ .mfi
- setf.exp FR_Two_N = GR_N_Biased
+(p7) setf.exp FR_Two_N = GR_N_Biased
nop.f 0
- addl GR_Scratch1 = 0x063BF,r0
+ mov GR_min_den_exp = 0x0fc01 - 52 // Exponent of min denorm dble
}
{ .mfb
- addl GR_Scratch = 0x019C3F,r0
-(p6) fma.d.s0 FR_Result = FR_Floating_X,f1, f0
-(p6) br.ret.spnt b0
-};;
+ and GR_exp_X = GR_exp_mask, GR_signexp_X
+(p6) fma.d.s0 FR_Result = FR_Floating_X, f1, f0
+(p6) br.ret.spnt b0
+}
+;;
//
-// Create 2*big
-// Create 2**-big
-// Is N > 35000
-// Is N < -35000
// Raise Denormal operand flag with compare
-// Main path, create 2**N
+// Compute biased result exponent
//
{ .mfi
- setf.exp FR_NBig = GR_Scratch1
- nop.f 0
- cmp.ge p6, p0 = GR_N_as_int, GR_Big
-}
-{ .mfi
- setf.exp FR_Big = GR_Scratch
+ add GR_exp_Result = GR_exp_X, GR_N_as_int
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
- cmp.le p8, p0 = GR_N_as_int, GR_NBig
-};;
+ mov GR_neg_ov_limit = 0x303ff // Exponent for negative overflow
+}
+;;
//
-// Adjust 2**N if N was very small or very large
+// Do final operation
//
{ .mfi
- nop.m 0
-(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
- nop.i 0
+ cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
+ fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+ cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch = 0x00000000000303FF
-};;
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p9) br.cond.spnt LDEXP_UNDERFLOW // Branch if certain underflow
+}
+;;
+{ .mib
+(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
+(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
+(p7) br.ret.sptk b0 // Return from main path
+}
+;;
-{ .mfi
- nop.m 0
-(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
- nop.i 0
+{ .bbb
+(p6) br.cond.spnt LDEXP_OVERFLOW // Branch if certain overflow
+(p8) br.cond.spnt LDEXP_POSSIBLE_OVERFLOW // Branch if possible overflow
+(p9) br.cond.spnt LDEXP_POSSIBLE_UNDERFLOW // Branch if possible underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch1= 0x00000000000103FF
-};;
+;;
-// Set up necessary status fields
+// Here if possible underflow.
+// Resulting exponent: 0x0fc01-52 <= exp_Result < 0x0fc01
+LDEXP_POSSIBLE_UNDERFLOW:
+//
+// Here if possible overflow.
+// Resulting exponent: 0x103fe = exp_Result
+LDEXP_POSSIBLE_OVERFLOW:
+
+// Set up necessary status fields
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflows)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
- nop.m 999
+ nop.m 0
fsetc.s3 0x7F,0x41
- nop.i 999
+ nop.i 0
}
{ .mfi
- nop.m 999
+ nop.m 0
fsetc.s2 0x7F,0x42
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
-// Do final operation
+// Do final operation with s2 and s3
//
{ .mfi
- setf.exp FR_NBig = GR_Scratch
- fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
- nop.i 999
+ setf.exp FR_NBig = GR_neg_ov_limit
+ fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
}
{ .mfi
- nop.m 999
- fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-{ .mfi
- setf.exp FR_Big = GR_Scratch1
- fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
+ setf.exp FR_Big = GR_pos_ov_limit
+ fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
+}
+;;
-//
// Check for overflow or underflow.
// Restore s3
// Restore s2
//
{ .mfi
- nop.m 0
+ nop.m 0
fsetc.s3 0x7F,0x40
- nop.i 999
+ nop.i 0
}
{ .mfi
- nop.m 0
+ nop.m 0
fsetc.s2 0x7F,0x40
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
// Is the result zero?
//
{ .mfi
- nop.m 999
+ nop.m 0
fclass.m p6, p0 = FR_Result3, 0x007
- nop.i 999
-}
+ nop.i 0
+}
{ .mfi
- addl GR_Tag = 146, r0
+ nop.m 0
fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
- nop.i 0
-};;
+ nop.i 0
+}
+;;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
- nop.m 999
+ nop.m 0
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
-(p6) addl GR_Tag = 147, r0
+ nop.m 0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt LDEXP_UNDERFLOW
-};;
+(p6) br.cond.spnt LDEXP_UNDERFLOW
+}
+;;
//
// Branch out for overflow
//
-{ .mbb
- nop.m 0
-(p7) br.cond.spnt LDEXP_OVERFLOW
-(p9) br.cond.spnt LDEXP_OVERFLOW
-};;
+{ .bbb
+(p7) br.cond.spnt LDEXP_OVERFLOW
+(p9) br.cond.spnt LDEXP_OVERFLOW
+ br.ret.sptk b0 // Return from main path.
+}
+;;
-//
-// Return from main path.
-//
-{ .mfb
- nop.m 999
- nop.f 0
- br.ret.sptk b0;;
+// Here if result overflows
+LDEXP_OVERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 146, r0 // Set error tag for overflow
+ br.cond.sptk __libm_error_region // Call error support for overflow
+}
+;;
+
+// Here if result underflows
+LDEXP_UNDERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 147, r0 // Set error tag for underflow
+ br.cond.sptk __libm_error_region // Call error support for underflow
}
+;;
-GLOBAL_LIBM_END(__libm_ldexp)
-__libm_error_region:
+// Here if x=unorm
+LDEXP_UNORM:
+{ .mib
+ getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
+ nop.i 0
+ br.cond.sptk LDEXP_COMMON // Return to main path
+}
+;;
-LDEXP_OVERFLOW:
-LDEXP_UNDERFLOW:
+
+GLOBAL_LIBM_END(__libm_ldexp)
+LOCAL_LIBM_ENTRY(__libm_error_region)
//
// Get stack address of N
//
.prologue
{ .mfi
- add GR_Parameter_Y=-32,sp
+ add GR_Parameter_Y=-32,sp
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
+ mov GR_SAVE_PFS=ar.pfs
}
//
-// Adjust sp
+// Adjust sp
//
{ .mfi
.fframe 64
- add sp=-64,sp
+ add sp=-64,sp
nop.f 0
- mov GR_SAVE_GP=gp
+ mov GR_SAVE_GP=gp
};;
//
-// Store N on stack in correct position
+// Store N on stack in correct position
// Locate the address of x on stack
//
{ .mmi
- st8 [GR_Parameter_Y] = GR_N_as_int,16
- add GR_Parameter_X = 16,sp
+ st8 [GR_Parameter_Y] = GR_N_as_int,16
+ add GR_Parameter_X = 16,sp
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
+ mov GR_SAVE_B0=b0
};;
//
@@ -338,42 +408,42 @@ LDEXP_UNDERFLOW:
//
.body
{ .mib
- stfd [GR_Parameter_X] = FR_Norm_X
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ stfd [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0
}
{ .mib
- stfd [GR_Parameter_Y] = FR_Result
+ stfd [GR_Parameter_Y] = FR_Result
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support#
+ br.call.sptk b0=__libm_error_support#
};;
//
// Get location of result on stack
//
{ .mmi
+ add GR_Parameter_RESULT = 48,sp
nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
+ nop.i 0
};;
//
-// Get the new result
+// Get the new result
//
{ .mmi
- ldfd FR_Result = [GR_Parameter_RESULT]
+ ldfd FR_Result = [GR_Parameter_RESULT]
.restore sp
- add sp = 64,sp
- mov b0 = GR_SAVE_B0
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
};;
//
// Restore gp, ar.pfs and return
//
{ .mib
- mov gp = GR_SAVE_GP
- mov ar.pfs = GR_SAVE_PFS
- br.ret.sptk b0
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
};;
LOCAL_LIBM_END(__libm_error_region)
diff --git a/sysdeps/ia64/fpu/s_libm_ldexpf.S b/sysdeps/ia64/fpu/s_libm_ldexpf.S
index d7f161c..1326a14 100644
--- a/sysdeps/ia64/fpu/s_libm_ldexpf.S
+++ b/sysdeps/ia64/fpu/s_libm_ldexpf.S
@@ -21,33 +21,34 @@
// products derived from this software without specific prior written
// permission.
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
+// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
-// 01/26/01 ldexpf completely reworked and now standalone version
+// 01/26/01 ldexpf completely reworked and now standalone version
// 01/04/02 Added handling for int 32 or 64 bits
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/04/03 Improved performance
//
// API
//==============================================================
-// float __libm_ldexpf (float x, int n, int int_type)
+// float __libm_ldexpf (float x, int n, int int_type)
// input floating point f8 and int n (r33), int int_type (r34)
// output floating point f8
//
@@ -55,27 +56,41 @@
// int_type = 1 if int is 64 bits
//
// Returns x* 2**n using an fma and detects overflow
-// and underflow.
+// and underflow.
//
//
+// Strategy:
+// Compute biased exponent of result exp_Result = N + exp_X
+// Break into ranges:
+// exp_Result > 0x1007e -> Certain overflow
+// exp_Result = 0x1007e -> Possible overflow
+// 0x0ff81 <= exp_Result < 0x1007e -> No over/underflow (main path)
+// 0x0ff81 - 23 <= exp_Result < 0x0ff81 -> Possible underflow
+// exp_Result < 0x0ff81 - 23 -> Certain underflow
FR_Big = f6
FR_NBig = f7
FR_Floating_X = f8
FR_Result = f8
FR_Result2 = f9
-FR_Result3 = f11
-FR_Norm_X = f12
-FR_Two_N = f14
-FR_Two_to_Big = f15
+FR_Result3 = f10
+FR_Norm_X = f11
+FR_Two_N = f12
+GR_neg_ov_limit= r14
GR_N_Biased = r15
GR_Big = r16
GR_NBig = r17
-GR_Scratch = r18
-GR_Scratch1 = r19
+GR_exp_Result = r18
+GR_pos_ov_limit= r19
GR_Bias = r20
GR_N_as_int = r21
+GR_signexp_X = r22
+GR_exp_X = r23
+GR_exp_mask = r24
+GR_max_exp = r25
+GR_min_exp = r26
+GR_min_den_exp = r27
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
@@ -93,241 +108,298 @@ GLOBAL_LIBM_ENTRY(__libm_ldexpf)
// Build the exponent Bias
//
{ .mfi
- alloc r32=ar.pfs,3,0,4,0
- fclass.m p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
- addl GR_Bias = 0x0FFFF,r0
+ getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
+ fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
+ mov GR_Bias = 0x0ffff
}
//
-// Is N zero?
// Normalize x
// Is integer type 32 bits?
//
{ .mfi
- cmp.eq p6,p0 = r33,r0
- fnorm.s1 FR_Norm_X = FR_Floating_X
- cmp.eq p8,p9 = r34,r0
+ mov GR_Big = 35000 // If N this big then certain overflow
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+ cmp.eq p8,p9 = r34,r0
}
;;
// Sign extend N if int is 32 bits
{ .mfi
(p9) mov GR_N_as_int = r33 // Copy N if int is 64 bits
- nop.f 0
+ fclass.m p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm
(p8) sxt4 GR_N_as_int = r33 // Sign extend N if int is 32 bits
}
+{ .mfi
+ mov GR_NBig = -35000 // If N this small then certain underflow
+ nop.f 0
+ mov GR_max_exp = 0x1007e // Exponent of maximum float
+}
+;;
+
+// Create biased exponent for 2**N
+{ .mfi
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+ nop.f 0
+ cmp.ge p7, p0 = GR_N_as_int, GR_Big // Certain overflow?
+}
+{ .mib
+ cmp.le p8, p0 = GR_N_as_int, GR_NBig // Certain underflow?
+ mov GR_min_exp = 0x0ff81 // Exponent of minimum float
+(p9) br.cond.spnt LDEXPF_UNORM // Branch if x=unorm
+}
+;;
+
+LDEXPF_COMMON:
+// Main path continues. Also return here from x=unorm path.
+// Create 2**N
+.pred.rel "mutex",p7,p8
+{ .mfi
+ setf.exp FR_Two_N = GR_N_Biased
+ nop.f 0
+(p7) mov GR_N_as_int = GR_Big // Limit max N
+}
+{ .mfi
+(p8) mov GR_N_as_int = GR_NBig // Limit min N
+ nop.f 0
+(p8) cmp.eq p7,p0 = r0,r0 // Set p7 if |N| big
+}
;;
//
-// Branch and return special values.
-// Create -35000
-// Create 35000
+// Create biased exponent for 2**N for N big
+// Is N zero?
//
{ .mfi
- addl GR_Big = 35000,r0
+(p7) add GR_N_Biased = GR_Bias,GR_N_as_int
nop.f 0
- add GR_N_Biased = GR_Bias,GR_N_as_int
+ cmp.eq.or p6,p0 = r33,r0
}
-{ .mfb
- addl GR_NBig = -35000,r0
-(p7) fma.s.s0 FR_Result = FR_Floating_X,f1, f0
-(p7) br.ret.spnt b0
-};;
+{ .mfi
+ mov GR_pos_ov_limit = 0x1007f // Exponent for positive overflow
+ nop.f 0
+ mov GR_exp_mask = 0x1ffff // Exponent mask
+}
+;;
//
-// Build the exponent Bias
-// Return x when N = 0
+// Create 2**N for N big
+// Return x when N = 0 or X = Nan, Inf, Zero
//
{ .mfi
- setf.exp FR_Two_N = GR_N_Biased
+(p7) setf.exp FR_Two_N = GR_N_Biased
nop.f 0
- addl GR_Scratch1 = 0x063BF,r0
+ mov GR_min_den_exp = 0x0ff81 - 23 // Exponent of min denorm float
}
{ .mfb
- addl GR_Scratch = 0x019C3F,r0
-(p6) fma.s.s0 FR_Result = FR_Floating_X,f1, f0
-(p6) br.ret.spnt b0
-};;
+ and GR_exp_X = GR_exp_mask, GR_signexp_X
+(p6) fma.s.s0 FR_Result = FR_Floating_X, f1, f0
+(p6) br.ret.spnt b0
+}
+;;
//
-// Create 2*big
-// Create 2**-big
-// Is N > 35000
-// Is N < -35000
// Raise Denormal operand flag with compare
-// Main path, create 2**N
+// Compute biased result exponent
//
{ .mfi
- setf.exp FR_NBig = GR_Scratch1
- nop.f 0
- cmp.ge p6, p0 = GR_N_as_int, GR_Big
-}
-{ .mfi
- setf.exp FR_Big = GR_Scratch
+ add GR_exp_Result = GR_exp_X, GR_N_as_int
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
- cmp.le p8, p0 = GR_N_as_int, GR_NBig
-};;
+ mov GR_neg_ov_limit = 0x3007f // Exponent for negative overflow
+}
+;;
//
-// Adjust 2**N if N was very small or very large
+// Do final operation
//
{ .mfi
- nop.m 0
-(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
- nop.i 0
+ cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
+ fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+ cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch = 0x000000000003007F
-};;
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p9) br.cond.spnt LDEXPF_UNDERFLOW // Branch if certain underflow
+}
+;;
+{ .mib
+(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
+(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
+(p7) br.ret.sptk b0 // Return from main path
+}
+;;
-{ .mfi
- nop.m 0
-(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
- nop.i 0
+{ .bbb
+(p6) br.cond.spnt LDEXPF_OVERFLOW // Branch if certain overflow
+(p8) br.cond.spnt LDEXPF_POSSIBLE_OVERFLOW // Branch if possible overflow
+(p9) br.cond.spnt LDEXPF_POSSIBLE_UNDERFLOW // Branch if possible underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch1= 0x000000000001007F
-};;
+;;
-// Set up necessary status fields
+// Here if possible underflow.
+// Resulting exponent: 0x0ff81-23 <= exp_Result < 0x0ff81
+LDEXPF_POSSIBLE_UNDERFLOW:
+//
+// Here if possible overflow.
+// Resulting exponent: 0x1007e = exp_Result
+LDEXPF_POSSIBLE_OVERFLOW:
+
+// Set up necessary status fields
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflows)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
- nop.m 999
+ nop.m 0
fsetc.s3 0x7F,0x41
- nop.i 999
+ nop.i 0
}
{ .mfi
- nop.m 999
+ nop.m 0
fsetc.s2 0x7F,0x42
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
-// Do final operation
+// Do final operation with s2 and s3
//
{ .mfi
- setf.exp FR_NBig = GR_Scratch
- fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
- nop.i 999
+ setf.exp FR_NBig = GR_neg_ov_limit
+ fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
}
{ .mfi
- nop.m 999
- fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-{ .mfi
- setf.exp FR_Big = GR_Scratch1
- fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
+ setf.exp FR_Big = GR_pos_ov_limit
+ fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
+}
+;;
// Check for overflow or underflow.
// Restore s3
// Restore s2
//
{ .mfi
- nop.m 0
+ nop.m 0
fsetc.s3 0x7F,0x40
- nop.i 999
+ nop.i 0
}
{ .mfi
- nop.m 0
+ nop.m 0
fsetc.s2 0x7F,0x40
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
// Is the result zero?
//
{ .mfi
- nop.m 999
+ nop.m 0
fclass.m p6, p0 = FR_Result3, 0x007
- nop.i 999
-}
+ nop.i 0
+}
{ .mfi
- addl GR_Tag = 148, r0
+ nop.m 0
fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
- nop.i 0
-};;
+ nop.i 0
+}
+;;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
- nop.m 999
+ nop.m 0
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
-(p6) addl GR_Tag = 149, r0
+ nop.m 0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt LDEXPF_UNDERFLOW
-};;
+(p6) br.cond.spnt LDEXPF_UNDERFLOW
+}
+;;
//
// Branch out for overflow
//
-{ .mbb
- nop.m 0
-(p7) br.cond.spnt LDEXPF_OVERFLOW
-(p9) br.cond.spnt LDEXPF_OVERFLOW
-};;
+{ .bbb
+(p7) br.cond.spnt LDEXPF_OVERFLOW
+(p9) br.cond.spnt LDEXPF_OVERFLOW
+ br.ret.sptk b0 // Return from main path.
+}
+;;
-//
-// Return from main path.
-//
-{ .mfb
- nop.m 999
- nop.f 0
- br.ret.sptk b0;;
+// Here if result overflows
+LDEXPF_OVERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 148, r0 // Set error tag for overflow
+ br.cond.sptk __libm_error_region // Call error support for overflow
}
+;;
-GLOBAL_LIBM_END(__libm_ldexpf)
-__libm_error_region:
+// Here if result underflows
+LDEXPF_UNDERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 149, r0 // Set error tag for underflow
+ br.cond.sptk __libm_error_region // Call error support for underflow
+}
+;;
+
+// Here if x=unorm
+LDEXPF_UNORM:
+{ .mib
+ getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
+ nop.i 0
+ br.cond.sptk LDEXPF_COMMON // Return to main path
+}
+;;
-LDEXPF_OVERFLOW:
-LDEXPF_UNDERFLOW:
+
+GLOBAL_LIBM_END(__libm_ldexpf)
+LOCAL_LIBM_ENTRY(__libm_error_region)
//
// Get stack address of N
//
.prologue
{ .mfi
- add GR_Parameter_Y=-32,sp
+ add GR_Parameter_Y=-32,sp
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
+ mov GR_SAVE_PFS=ar.pfs
}
//
-// Adjust sp
+// Adjust sp
//
{ .mfi
.fframe 64
- add sp=-64,sp
+ add sp=-64,sp
nop.f 0
- mov GR_SAVE_GP=gp
+ mov GR_SAVE_GP=gp
};;
//
-// Store N on stack in correct position
+// Store N on stack in correct position
// Locate the address of x on stack
//
{ .mmi
- st8 [GR_Parameter_Y] = GR_N_as_int,16
- add GR_Parameter_X = 16,sp
+ st8 [GR_Parameter_Y] = GR_N_as_int,16
+ add GR_Parameter_X = 16,sp
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
+ mov GR_SAVE_B0=b0
};;
//
@@ -336,42 +408,42 @@ LDEXPF_UNDERFLOW:
//
.body
{ .mib
- stfs [GR_Parameter_X] = FR_Norm_X
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ stfs [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0
}
{ .mib
- stfs [GR_Parameter_Y] = FR_Result
+ stfs [GR_Parameter_Y] = FR_Result
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support#
+ br.call.sptk b0=__libm_error_support#
};;
//
// Get location of result on stack
//
{ .mmi
+ add GR_Parameter_RESULT = 48,sp
nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
+ nop.i 0
};;
//
-// Get the new result
+// Get the new result
//
{ .mmi
- ldfs FR_Result = [GR_Parameter_RESULT]
+ ldfs FR_Result = [GR_Parameter_RESULT]
.restore sp
- add sp = 64,sp
- mov b0 = GR_SAVE_B0
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
};;
//
// Restore gp, ar.pfs and return
//
{ .mib
- mov gp = GR_SAVE_GP
- mov ar.pfs = GR_SAVE_PFS
- br.ret.sptk b0
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
};;
LOCAL_LIBM_END(__libm_error_region)
diff --git a/sysdeps/ia64/fpu/s_libm_ldexpl.S b/sysdeps/ia64/fpu/s_libm_ldexpl.S
index 72d4560..fffda9e 100644
--- a/sysdeps/ia64/fpu/s_libm_ldexpl.S
+++ b/sysdeps/ia64/fpu/s_libm_ldexpl.S
@@ -21,33 +21,34 @@
// products derived from this software without specific prior written
// permission.
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
+// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
-// 01/26/01 ldexpl completely reworked and now standalone version
+// 01/26/01 ldexpl completely reworked and now standalone version
// 01/04/02 Added handling for int 32 or 64 bits
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/04/03 Improved performance
//
// API
//==============================================================
-// long double __libm_ldexpl (long double x, int n, int int_type)
+// long double __libm_ldexpl (long double x, int n, int int_type)
// input floating point f8 and int n (r34), int int_type (r35)
// output floating point f8
//
@@ -55,27 +56,41 @@
// int_type = 1 if int is 64 bits
//
// Returns x* 2**n using an fma and detects overflow
-// and underflow.
+// and underflow.
//
//
+// Strategy:
+// Compute biased exponent of result exp_Result = N + exp_X
+// Break into ranges:
+// exp_Result > 0x13ffe -> Certain overflow
+// exp_Result = 0x13ffe -> Possible overflow
+// 0x0c001 <= exp_Result < 0x13ffe -> No over/underflow (main path)
+// 0x0c001 - 63 <= exp_Result < 0x0c001 -> Possible underflow
+// exp_Result < 0x0c001 - 63 -> Certain underflow
FR_Big = f6
FR_NBig = f7
FR_Floating_X = f8
FR_Result = f8
FR_Result2 = f9
-FR_Result3 = f11
-FR_Norm_X = f12
-FR_Two_N = f14
-FR_Two_to_Big = f15
+FR_Result3 = f10
+FR_Norm_X = f11
+FR_Two_N = f12
+GR_neg_ov_limit= r14
GR_N_Biased = r15
GR_Big = r16
GR_NBig = r17
-GR_Scratch = r18
-GR_Scratch1 = r19
+GR_exp_Result = r18
+GR_pos_ov_limit= r19
GR_Bias = r20
GR_N_as_int = r21
+GR_signexp_X = r22
+GR_exp_X = r23
+GR_exp_mask = r24
+GR_max_exp = r25
+GR_min_exp = r26
+GR_min_den_exp = r27
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
@@ -93,242 +108,298 @@ GLOBAL_LIBM_ENTRY(__libm_ldexpl)
// Build the exponent Bias
//
{ .mfi
- alloc r32=ar.pfs,3,0,4,0
- fclass.m p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
- addl GR_Bias = 0x0FFFF,r0
+ getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
+ fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
+ mov GR_Bias = 0x0ffff
}
-
//
-// Is N zero?
// Normalize x
// Is integer type 32 bits?
//
{ .mfi
- cmp.eq p6,p0 = r34,r0
- fnorm.s1 FR_Norm_X = FR_Floating_X
- cmp.eq p8,p9 = r35,r0
+ mov GR_Big = 35000 // If N this big then certain overflow
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+ cmp.eq p8,p9 = r35,r0
}
;;
// Sign extend N if int is 32 bits
{ .mfi
(p9) mov GR_N_as_int = r34 // Copy N if int is 64 bits
- nop.f 0
+ fclass.m p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm
(p8) sxt4 GR_N_as_int = r34 // Sign extend N if int is 32 bits
}
+{ .mfi
+ mov GR_NBig = -35000 // If N this small then certain underflow
+ nop.f 0
+ mov GR_max_exp = 0x13ffe // Exponent of maximum long double
+}
+;;
+
+// Create biased exponent for 2**N
+{ .mfi
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+ nop.f 0
+ cmp.ge p7, p0 = GR_N_as_int, GR_Big // Certain overflow?
+}
+{ .mib
+ cmp.le p8, p0 = GR_N_as_int, GR_NBig // Certain underflow?
+ mov GR_min_exp = 0x0c001 // Exponent of minimum long double
+(p9) br.cond.spnt LDEXPL_UNORM // Branch if x=unorm
+}
+;;
+
+LDEXPL_COMMON:
+// Main path continues. Also return here from x=unorm path.
+// Create 2**N
+.pred.rel "mutex",p7,p8
+{ .mfi
+ setf.exp FR_Two_N = GR_N_Biased
+ nop.f 0
+(p7) mov GR_N_as_int = GR_Big // Limit max N
+}
+{ .mfi
+(p8) mov GR_N_as_int = GR_NBig // Limit min N
+ nop.f 0
+(p8) cmp.eq p7,p0 = r0,r0 // Set p7 if |N| big
+}
;;
//
-// Branch and return special values.
-// Create -35000
-// Create 35000
+// Create biased exponent for 2**N for N big
+// Is N zero?
//
{ .mfi
- addl GR_Big = 35000,r0
+(p7) add GR_N_Biased = GR_Bias,GR_N_as_int
nop.f 0
- add GR_N_Biased = GR_Bias,GR_N_as_int
+ cmp.eq.or p6,p0 = r34,r0
}
-{ .mfb
- addl GR_NBig = -35000,r0
-(p7) fma.s0 FR_Result = FR_Floating_X,f1, f0
-(p7) br.ret.spnt b0
-};;
+{ .mfi
+ mov GR_pos_ov_limit = 0x13fff // Exponent for positive overflow
+ nop.f 0
+ mov GR_exp_mask = 0x1ffff // Exponent mask
+}
+;;
//
-// Build the exponent Bias
-// Return x when N = 0
+// Create 2**N for N big
+// Return x when N = 0 or X = Nan, Inf, Zero
//
{ .mfi
- setf.exp FR_Two_N = GR_N_Biased
+(p7) setf.exp FR_Two_N = GR_N_Biased
nop.f 0
- addl GR_Scratch1 = 0x063BF,r0
+ mov GR_min_den_exp = 0x0c001 - 63 // Exp of min denorm long dble
}
{ .mfb
- addl GR_Scratch = 0x019C3F,r0
-(p6) fma.s0 FR_Result = FR_Floating_X,f1, f0
-(p6) br.ret.spnt b0
-};;
+ and GR_exp_X = GR_exp_mask, GR_signexp_X
+(p6) fma.s0 FR_Result = FR_Floating_X, f1, f0
+(p6) br.ret.spnt b0
+}
+;;
//
-// Create 2*big
-// Create 2**-big
-// Is N > 35000
-// Is N < -35000
// Raise Denormal operand flag with compare
-// Main path, create 2**N
+// Compute biased result exponent
//
{ .mfi
- setf.exp FR_NBig = GR_Scratch1
- nop.f 0
- cmp.ge p6, p0 = GR_N_as_int, GR_Big
-}
-{ .mfi
- setf.exp FR_Big = GR_Scratch
+ add GR_exp_Result = GR_exp_X, GR_N_as_int
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
- cmp.le p8, p0 = GR_N_as_int, GR_NBig
-};;
+ mov GR_neg_ov_limit = 0x33fff // Exponent for negative overflow
+}
+;;
//
-// Adjust 2**N if N was very small or very large
+// Do final operation
//
{ .mfi
- nop.m 0
-(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
- nop.i 0
+ cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
+ fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+ cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch = 0x0000000000033FFF
-};;
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p9) br.cond.spnt LDEXPL_UNDERFLOW // Branch if certain underflow
+}
+;;
+{ .mib
+(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
+(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
+(p7) br.ret.sptk b0 // Return from main path
+}
+;;
-{ .mfi
- nop.m 0
-(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
- nop.i 0
+{ .bbb
+(p6) br.cond.spnt LDEXPL_OVERFLOW // Branch if certain overflow
+(p8) br.cond.spnt LDEXPL_POSSIBLE_OVERFLOW // Branch if possible overflow
+(p9) br.cond.spnt LDEXPL_POSSIBLE_UNDERFLOW // Branch if possible underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch1= 0x0000000000013FFF
-};;
+;;
-// Set up necessary status fields
+// Here if possible underflow.
+// Resulting exponent: 0x0c001-63 <= exp_Result < 0x0c001
+LDEXPL_POSSIBLE_UNDERFLOW:
+//
+// Here if possible overflow.
+// Resulting exponent: 0x13ffe = exp_Result
+LDEXPL_POSSIBLE_OVERFLOW:
+
+// Set up necessary status fields
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflows)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
- nop.m 999
+ nop.m 0
fsetc.s3 0x7F,0x41
- nop.i 999
+ nop.i 0
}
{ .mfi
- nop.m 999
+ nop.m 0
fsetc.s2 0x7F,0x42
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
-// Do final operation
+// Do final operation with s2 and s3
//
{ .mfi
- setf.exp FR_NBig = GR_Scratch
- fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
- nop.i 999
+ setf.exp FR_NBig = GR_neg_ov_limit
+ fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
}
{ .mfi
- nop.m 999
- fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-{ .mfi
- setf.exp FR_Big = GR_Scratch1
- fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
+ setf.exp FR_Big = GR_pos_ov_limit
+ fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
+}
+;;
// Check for overflow or underflow.
// Restore s3
// Restore s2
//
{ .mfi
- nop.m 0
+ nop.m 0
fsetc.s3 0x7F,0x40
- nop.i 999
+ nop.i 0
}
{ .mfi
- nop.m 0
+ nop.m 0
fsetc.s2 0x7F,0x40
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
// Is the result zero?
//
{ .mfi
- nop.m 999
+ nop.m 0
fclass.m p6, p0 = FR_Result3, 0x007
- nop.i 999
-}
+ nop.i 0
+}
{ .mfi
- addl GR_Tag = 144, r0
+ nop.m 0
fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
- nop.i 0
-};;
+ nop.i 0
+}
+;;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
- nop.m 999
+ nop.m 0
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
-(p6) addl GR_Tag = 145, r0
+ nop.m 0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt LDEXPL_UNDERFLOW
-};;
+(p6) br.cond.spnt LDEXPL_UNDERFLOW
+}
+;;
//
// Branch out for overflow
//
-{ .mbb
- nop.m 0
-(p7) br.cond.spnt LDEXPL_OVERFLOW
-(p9) br.cond.spnt LDEXPL_OVERFLOW
-};;
+{ .bbb
+(p7) br.cond.spnt LDEXPL_OVERFLOW
+(p9) br.cond.spnt LDEXPL_OVERFLOW
+ br.ret.sptk b0 // Return from main path.
+}
+;;
-//
-// Return from main path.
-//
-{ .mfb
- nop.m 999
- nop.f 0
- br.ret.sptk b0;;
+// Here if result overflows
+LDEXPL_OVERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 144, r0 // Set error tag for overflow
+ br.cond.sptk __libm_error_region // Call error support for overflow
}
+;;
-GLOBAL_LIBM_END(__libm_ldexpl)
-__libm_error_region:
+// Here if result underflows
+LDEXPL_UNDERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 145, r0 // Set error tag for underflow
+ br.cond.sptk __libm_error_region // Call error support for underflow
+}
+;;
+
+// Here if x=unorm
+LDEXPL_UNORM:
+{ .mib
+ getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
+ nop.i 0
+ br.cond.sptk LDEXPL_COMMON // Return to main path
+}
+;;
-LDEXPL_OVERFLOW:
-LDEXPL_UNDERFLOW:
+
+GLOBAL_LIBM_END(__libm_ldexpl)
+LOCAL_LIBM_ENTRY(__libm_error_region)
//
// Get stack address of N
//
.prologue
{ .mfi
- add GR_Parameter_Y=-32,sp
+ add GR_Parameter_Y=-32,sp
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
+ mov GR_SAVE_PFS=ar.pfs
}
//
-// Adjust sp
+// Adjust sp
//
{ .mfi
.fframe 64
- add sp=-64,sp
+ add sp=-64,sp
nop.f 0
- mov GR_SAVE_GP=gp
+ mov GR_SAVE_GP=gp
};;
//
-// Store N on stack in correct position
+// Store N on stack in correct position
// Locate the address of x on stack
//
{ .mmi
- st8 [GR_Parameter_Y] = GR_N_as_int,16
- add GR_Parameter_X = 16,sp
+ st8 [GR_Parameter_Y] = GR_N_as_int,16
+ add GR_Parameter_X = 16,sp
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
+ mov GR_SAVE_B0=b0
};;
//
@@ -337,42 +408,42 @@ LDEXPL_UNDERFLOW:
//
.body
{ .mib
- stfe [GR_Parameter_X] = FR_Norm_X
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ stfe [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0
}
{ .mib
- stfe [GR_Parameter_Y] = FR_Result
+ stfe [GR_Parameter_Y] = FR_Result
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support#
+ br.call.sptk b0=__libm_error_support#
};;
//
// Get location of result on stack
//
{ .mmi
+ add GR_Parameter_RESULT = 48,sp
nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
+ nop.i 0
};;
//
-// Get the new result
+// Get the new result
//
{ .mmi
- ldfe FR_Result = [GR_Parameter_RESULT]
+ ldfe FR_Result = [GR_Parameter_RESULT]
.restore sp
- add sp = 64,sp
- mov b0 = GR_SAVE_B0
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
};;
//
// Restore gp, ar.pfs and return
//
{ .mib
- mov gp = GR_SAVE_GP
- mov ar.pfs = GR_SAVE_PFS
- br.ret.sptk b0
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
};;
LOCAL_LIBM_END(__libm_error_region)
diff --git a/sysdeps/ia64/fpu/s_libm_scalbn.S b/sysdeps/ia64/fpu/s_libm_scalbn.S
index fb7ab93..eaccb7d 100644
--- a/sysdeps/ia64/fpu/s_libm_scalbn.S
+++ b/sysdeps/ia64/fpu/s_libm_scalbn.S
@@ -21,33 +21,34 @@
// products derived from this software without specific prior written
// permission.
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
+// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
-// 01/26/01 Scalbn completely reworked and now standalone version
+// 01/26/01 Scalbn completely reworked and now standalone version
// 01/04/02 Added handling for int 32 or 64 bits
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/04/03 Improved performance
//
// API
//==============================================================
-// double __libm_scalbn (double x, int n, int int_type)
+// double __libm_scalbn (double x, int n, int int_type)
// input floating point f8 and int n (r33), int int_type (r34)
// output floating point f8
//
@@ -55,27 +56,41 @@
// int_type = 1 if int is 64 bits
//
// Returns x* 2**n using an fma and detects overflow
-// and underflow.
+// and underflow.
//
//
+// Strategy:
+// Compute biased exponent of result exp_Result = N + exp_X
+// Break into ranges:
+// exp_Result > 0x103fe -> Certain overflow
+// exp_Result = 0x103fe -> Possible overflow
+// 0x0fc01 <= exp_Result < 0x103fe -> No over/underflow (main path)
+// 0x0fc01 - 52 <= exp_Result < 0x0fc01 -> Possible underflow
+// exp_Result < 0x0fc01 - 52 -> Certain underflow
FR_Big = f6
FR_NBig = f7
FR_Floating_X = f8
FR_Result = f8
FR_Result2 = f9
-FR_Result3 = f11
-FR_Norm_X = f12
-FR_Two_N = f14
-FR_Two_to_Big = f15
+FR_Result3 = f10
+FR_Norm_X = f11
+FR_Two_N = f12
+GR_neg_ov_limit= r14
GR_N_Biased = r15
GR_Big = r16
GR_NBig = r17
-GR_Scratch = r18
-GR_Scratch1 = r19
+GR_exp_Result = r18
+GR_pos_ov_limit= r19
GR_Bias = r20
GR_N_as_int = r21
+GR_signexp_X = r22
+GR_exp_X = r23
+GR_exp_mask = r24
+GR_max_exp = r25
+GR_min_exp = r26
+GR_min_den_exp = r27
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
@@ -93,242 +108,298 @@ GLOBAL_LIBM_ENTRY(__libm_scalbn)
// Build the exponent Bias
//
{ .mfi
- alloc r32=ar.pfs,3,0,4,0
- fclass.m p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
- addl GR_Bias = 0x0FFFF,r0
+ getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
+ fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
+ mov GR_Bias = 0x0ffff
}
-
//
-// Is N zero?
// Normalize x
// Is integer type 32 bits?
//
{ .mfi
- cmp.eq p6,p0 = r33,r0
- fnorm.s1 FR_Norm_X = FR_Floating_X
- cmp.eq p8,p9 = r34,r0
+ mov GR_Big = 35000 // If N this big then certain overflow
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+ cmp.eq p8,p9 = r34,r0
}
;;
// Sign extend N if int is 32 bits
{ .mfi
(p9) mov GR_N_as_int = r33 // Copy N if int is 64 bits
- nop.f 0
+ fclass.m p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm
(p8) sxt4 GR_N_as_int = r33 // Sign extend N if int is 32 bits
}
+{ .mfi
+ mov GR_NBig = -35000 // If N this small then certain underflow
+ nop.f 0
+ mov GR_max_exp = 0x103fe // Exponent of maximum double
+}
+;;
+
+// Create biased exponent for 2**N
+{ .mfi
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+ nop.f 0
+ cmp.ge p7, p0 = GR_N_as_int, GR_Big // Certain overflow?
+}
+{ .mib
+ cmp.le p8, p0 = GR_N_as_int, GR_NBig // Certain underflow?
+ mov GR_min_exp = 0x0fc01 // Exponent of minimum double
+(p9) br.cond.spnt SCALBN_UNORM // Branch if x=unorm
+}
+;;
+
+SCALBN_COMMON:
+// Main path continues. Also return here from x=unorm path.
+// Create 2**N
+.pred.rel "mutex",p7,p8
+{ .mfi
+ setf.exp FR_Two_N = GR_N_Biased
+ nop.f 0
+(p7) mov GR_N_as_int = GR_Big // Limit max N
+}
+{ .mfi
+(p8) mov GR_N_as_int = GR_NBig // Limit min N
+ nop.f 0
+(p8) cmp.eq p7,p0 = r0,r0 // Set p7 if |N| big
+}
;;
//
-// Branch and return special values.
-// Create -35000
-// Create 35000
+// Create biased exponent for 2**N for N big
+// Is N zero?
//
{ .mfi
- addl GR_Big = 35000,r0
+(p7) add GR_N_Biased = GR_Bias,GR_N_as_int
nop.f 0
- add GR_N_Biased = GR_Bias,GR_N_as_int
+ cmp.eq.or p6,p0 = r33,r0
}
-{ .mfb
- addl GR_NBig = -35000,r0
-(p7) fma.d.s0 FR_Result = FR_Floating_X,f1, f0
-(p7) br.ret.spnt b0
-};;
+{ .mfi
+ mov GR_pos_ov_limit = 0x103ff // Exponent for positive overflow
+ nop.f 0
+ mov GR_exp_mask = 0x1ffff // Exponent mask
+}
+;;
//
-// Build the exponent Bias
-// Return x when N = 0
+// Create 2**N for N big
+// Return x when N = 0 or X = Nan, Inf, Zero
//
{ .mfi
- setf.exp FR_Two_N = GR_N_Biased
+(p7) setf.exp FR_Two_N = GR_N_Biased
nop.f 0
- addl GR_Scratch1 = 0x063BF,r0
+ mov GR_min_den_exp = 0x0fc01 - 52 // Exponent of min denorm dble
}
{ .mfb
- addl GR_Scratch = 0x019C3F,r0
-(p6) fma.d.s0 FR_Result = FR_Floating_X,f1, f0
-(p6) br.ret.spnt b0
-};;
+ and GR_exp_X = GR_exp_mask, GR_signexp_X
+(p6) fma.d.s0 FR_Result = FR_Floating_X, f1, f0
+(p6) br.ret.spnt b0
+}
+;;
//
-// Create 2*big
-// Create 2**-big
-// Is N > 35000
-// Is N < -35000
// Raise Denormal operand flag with compare
-// Main path, create 2**N
+// Compute biased result exponent
//
{ .mfi
- setf.exp FR_NBig = GR_Scratch1
- nop.f 0
- cmp.ge p6, p0 = GR_N_as_int, GR_Big
-}
-{ .mfi
- setf.exp FR_Big = GR_Scratch
+ add GR_exp_Result = GR_exp_X, GR_N_as_int
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
- cmp.le p8, p0 = GR_N_as_int, GR_NBig
-};;
+ mov GR_neg_ov_limit = 0x303ff // Exponent for negative overflow
+}
+;;
//
-// Adjust 2**N if N was very small or very large
+// Do final operation
//
{ .mfi
- nop.m 0
-(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
- nop.i 0
+ cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
+ fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+ cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch = 0x00000000000303FF
-};;
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p9) br.cond.spnt SCALBN_UNDERFLOW // Branch if certain underflow
+}
+;;
+{ .mib
+(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
+(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
+(p7) br.ret.sptk b0 // Return from main path
+}
+;;
-{ .mfi
- nop.m 0
-(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
- nop.i 0
+{ .bbb
+(p6) br.cond.spnt SCALBN_OVERFLOW // Branch if certain overflow
+(p8) br.cond.spnt SCALBN_POSSIBLE_OVERFLOW // Branch if possible overflow
+(p9) br.cond.spnt SCALBN_POSSIBLE_UNDERFLOW // Branch if possible underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch1= 0x00000000000103FF
-};;
+;;
-// Set up necessary status fields
+// Here if possible underflow.
+// Resulting exponent: 0x0fc01-52 <= exp_Result < 0x0fc01
+SCALBN_POSSIBLE_UNDERFLOW:
+//
+// Here if possible overflow.
+// Resulting exponent: 0x103fe = exp_Result
+SCALBN_POSSIBLE_OVERFLOW:
+
+// Set up necessary status fields
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflows)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
- nop.m 999
+ nop.m 0
fsetc.s3 0x7F,0x41
- nop.i 999
+ nop.i 0
}
{ .mfi
- nop.m 999
+ nop.m 0
fsetc.s2 0x7F,0x42
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
-// Do final operation
+// Do final operation with s2 and s3
//
{ .mfi
- setf.exp FR_NBig = GR_Scratch
- fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
- nop.i 999
+ setf.exp FR_NBig = GR_neg_ov_limit
+ fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
}
{ .mfi
- nop.m 999
- fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-{ .mfi
- setf.exp FR_Big = GR_Scratch1
- fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
+ setf.exp FR_Big = GR_pos_ov_limit
+ fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
+}
+;;
// Check for overflow or underflow.
// Restore s3
// Restore s2
//
{ .mfi
- nop.m 0
+ nop.m 0
fsetc.s3 0x7F,0x40
- nop.i 999
+ nop.i 0
}
{ .mfi
- nop.m 0
+ nop.m 0
fsetc.s2 0x7F,0x40
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
// Is the result zero?
//
{ .mfi
- nop.m 999
+ nop.m 0
fclass.m p6, p0 = FR_Result3, 0x007
- nop.i 999
-}
+ nop.i 0
+}
{ .mfi
- addl GR_Tag = 176, r0
+ nop.m 0
fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
- nop.i 0
-};;
+ nop.i 0
+}
+;;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
- nop.m 999
+ nop.m 0
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
-(p6) addl GR_Tag = 177, r0
+ nop.m 0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt SCALBN_UNDERFLOW
-};;
+(p6) br.cond.spnt SCALBN_UNDERFLOW
+}
+;;
//
// Branch out for overflow
//
-{ .mbb
- nop.m 0
-(p7) br.cond.spnt SCALBN_OVERFLOW
-(p9) br.cond.spnt SCALBN_OVERFLOW
-};;
+{ .bbb
+(p7) br.cond.spnt SCALBN_OVERFLOW
+(p9) br.cond.spnt SCALBN_OVERFLOW
+ br.ret.sptk b0 // Return from main path.
+}
+;;
-//
-// Return from main path.
-//
-{ .mfb
- nop.m 999
- nop.f 0
- br.ret.sptk b0;;
+// Here if result overflows
+SCALBN_OVERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 176, r0 // Set error tag for overflow
+ br.cond.sptk __libm_error_region // Call error support for overflow
}
+;;
-GLOBAL_LIBM_END(__libm_scalbn)
-__libm_error_region:
+// Here if result underflows
+SCALBN_UNDERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 177, r0 // Set error tag for underflow
+ br.cond.sptk __libm_error_region // Call error support for underflow
+}
+;;
+
+// Here if x=unorm
+SCALBN_UNORM:
+{ .mib
+ getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
+ nop.i 0
+ br.cond.sptk SCALBN_COMMON // Return to main path
+}
+;;
-SCALBN_OVERFLOW:
-SCALBN_UNDERFLOW:
+
+GLOBAL_LIBM_END(__libm_scalbn)
+LOCAL_LIBM_ENTRY(__libm_error_region)
//
// Get stack address of N
//
.prologue
{ .mfi
- add GR_Parameter_Y=-32,sp
+ add GR_Parameter_Y=-32,sp
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
+ mov GR_SAVE_PFS=ar.pfs
}
//
-// Adjust sp
+// Adjust sp
//
{ .mfi
.fframe 64
- add sp=-64,sp
+ add sp=-64,sp
nop.f 0
- mov GR_SAVE_GP=gp
+ mov GR_SAVE_GP=gp
};;
//
-// Store N on stack in correct position
+// Store N on stack in correct position
// Locate the address of x on stack
//
{ .mmi
- st8 [GR_Parameter_Y] = GR_N_as_int,16
- add GR_Parameter_X = 16,sp
+ st8 [GR_Parameter_Y] = GR_N_as_int,16
+ add GR_Parameter_X = 16,sp
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
+ mov GR_SAVE_B0=b0
};;
//
@@ -337,42 +408,42 @@ SCALBN_UNDERFLOW:
//
.body
{ .mib
- stfd [GR_Parameter_X] = FR_Norm_X
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ stfd [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0
}
{ .mib
- stfd [GR_Parameter_Y] = FR_Result
+ stfd [GR_Parameter_Y] = FR_Result
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support#
+ br.call.sptk b0=__libm_error_support#
};;
//
// Get location of result on stack
//
{ .mmi
+ add GR_Parameter_RESULT = 48,sp
nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
+ nop.i 0
};;
//
-// Get the new result
+// Get the new result
//
{ .mmi
- ldfd FR_Result = [GR_Parameter_RESULT]
+ ldfd FR_Result = [GR_Parameter_RESULT]
.restore sp
- add sp = 64,sp
- mov b0 = GR_SAVE_B0
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
};;
//
// Restore gp, ar.pfs and return
//
{ .mib
- mov gp = GR_SAVE_GP
- mov ar.pfs = GR_SAVE_PFS
- br.ret.sptk b0
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
};;
LOCAL_LIBM_END(__libm_error_region)
diff --git a/sysdeps/ia64/fpu/s_libm_scalbnf.S b/sysdeps/ia64/fpu/s_libm_scalbnf.S
index 57ab2cc..e00997a 100644
--- a/sysdeps/ia64/fpu/s_libm_scalbnf.S
+++ b/sysdeps/ia64/fpu/s_libm_scalbnf.S
@@ -21,33 +21,34 @@
// products derived from this software without specific prior written
// permission.
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
+// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
-// 01/26/01 scalbnf completely reworked and now standalone version
+// 01/26/01 Scalbnf completely reworked and now standalone version
// 01/04/02 Added handling for int 32 or 64 bits
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/04/03 Improved performance
//
// API
//==============================================================
-// float __libm_scalbnf (float x, int n, int int_type)
+// float __libm_scalbnf (float x, int n, int int_type)
// input floating point f8 and int n (r33), int int_type (r34)
// output floating point f8
//
@@ -55,27 +56,41 @@
// int_type = 1 if int is 64 bits
//
// Returns x* 2**n using an fma and detects overflow
-// and underflow.
+// and underflow.
//
//
+// Strategy:
+// Compute biased exponent of result exp_Result = N + exp_X
+// Break into ranges:
+// exp_Result > 0x1007e -> Certain overflow
+// exp_Result = 0x1007e -> Possible overflow
+// 0x0ff81 <= exp_Result < 0x1007e -> No over/underflow (main path)
+// 0x0ff81 - 23 <= exp_Result < 0x0ff81 -> Possible underflow
+// exp_Result < 0x0ff81 - 23 -> Certain underflow
FR_Big = f6
FR_NBig = f7
FR_Floating_X = f8
FR_Result = f8
FR_Result2 = f9
-FR_Result3 = f11
-FR_Norm_X = f12
-FR_Two_N = f14
-FR_Two_to_Big = f15
+FR_Result3 = f10
+FR_Norm_X = f11
+FR_Two_N = f12
+GR_neg_ov_limit= r14
GR_N_Biased = r15
GR_Big = r16
GR_NBig = r17
-GR_Scratch = r18
-GR_Scratch1 = r19
+GR_exp_Result = r18
+GR_pos_ov_limit= r19
GR_Bias = r20
GR_N_as_int = r21
+GR_signexp_X = r22
+GR_exp_X = r23
+GR_exp_mask = r24
+GR_max_exp = r25
+GR_min_exp = r26
+GR_min_den_exp = r27
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
@@ -93,242 +108,298 @@ GLOBAL_LIBM_ENTRY(__libm_scalbnf)
// Build the exponent Bias
//
{ .mfi
- alloc r32=ar.pfs,3,0,4,0
- fclass.m p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
- addl GR_Bias = 0x0FFFF,r0
+ getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
+ fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
+ mov GR_Bias = 0x0ffff
}
-
//
-// Is N zero?
// Normalize x
// Is integer type 32 bits?
//
{ .mfi
- cmp.eq p6,p0 = r33,r0
- fnorm.s1 FR_Norm_X = FR_Floating_X
- cmp.eq p8,p9 = r34,r0
+ mov GR_Big = 35000 // If N this big then certain overflow
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+ cmp.eq p8,p9 = r34,r0
}
;;
// Sign extend N if int is 32 bits
{ .mfi
(p9) mov GR_N_as_int = r33 // Copy N if int is 64 bits
- nop.f 0
+ fclass.m p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm
(p8) sxt4 GR_N_as_int = r33 // Sign extend N if int is 32 bits
}
+{ .mfi
+ mov GR_NBig = -35000 // If N this small then certain underflow
+ nop.f 0
+ mov GR_max_exp = 0x1007e // Exponent of maximum float
+}
+;;
+
+// Create biased exponent for 2**N
+{ .mfi
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+ nop.f 0
+ cmp.ge p7, p0 = GR_N_as_int, GR_Big // Certain overflow?
+}
+{ .mib
+ cmp.le p8, p0 = GR_N_as_int, GR_NBig // Certain underflow?
+ mov GR_min_exp = 0x0ff81 // Exponent of minimum float
+(p9) br.cond.spnt SCALBNF_UNORM // Branch if x=unorm
+}
+;;
+
+SCALBNF_COMMON:
+// Main path continues. Also return here from x=unorm path.
+// Create 2**N
+.pred.rel "mutex",p7,p8
+{ .mfi
+ setf.exp FR_Two_N = GR_N_Biased
+ nop.f 0
+(p7) mov GR_N_as_int = GR_Big // Limit max N
+}
+{ .mfi
+(p8) mov GR_N_as_int = GR_NBig // Limit min N
+ nop.f 0
+(p8) cmp.eq p7,p0 = r0,r0 // Set p7 if |N| big
+}
;;
//
-// Branch and return special values.
-// Create -35000
-// Create 35000
+// Create biased exponent for 2**N for N big
+// Is N zero?
//
{ .mfi
- addl GR_Big = 35000,r0
+(p7) add GR_N_Biased = GR_Bias,GR_N_as_int
nop.f 0
- add GR_N_Biased = GR_Bias,GR_N_as_int
+ cmp.eq.or p6,p0 = r33,r0
}
-{ .mfb
- addl GR_NBig = -35000,r0
-(p7) fma.s.s0 FR_Result = FR_Floating_X,f1, f0
-(p7) br.ret.spnt b0
-};;
+{ .mfi
+ mov GR_pos_ov_limit = 0x1007f // Exponent for positive overflow
+ nop.f 0
+ mov GR_exp_mask = 0x1ffff // Exponent mask
+}
+;;
//
-// Build the exponent Bias
-// Return x when N = 0
+// Create 2**N for N big
+// Return x when N = 0 or X = Nan, Inf, Zero
//
{ .mfi
- setf.exp FR_Two_N = GR_N_Biased
+(p7) setf.exp FR_Two_N = GR_N_Biased
nop.f 0
- addl GR_Scratch1 = 0x063BF,r0
+ mov GR_min_den_exp = 0x0ff81 - 23 // Exponent of min denorm float
}
{ .mfb
- addl GR_Scratch = 0x019C3F,r0
-(p6) fma.s.s0 FR_Result = FR_Floating_X,f1, f0
-(p6) br.ret.spnt b0
-};;
+ and GR_exp_X = GR_exp_mask, GR_signexp_X
+(p6) fma.s.s0 FR_Result = FR_Floating_X, f1, f0
+(p6) br.ret.spnt b0
+}
+;;
//
-// Create 2*big
-// Create 2**-big
-// Is N > 35000
-// Is N < -35000
// Raise Denormal operand flag with compare
-// Main path, create 2**N
+// Compute biased result exponent
//
{ .mfi
- setf.exp FR_NBig = GR_Scratch1
- nop.f 0
- cmp.ge p6, p0 = GR_N_as_int, GR_Big
-}
-{ .mfi
- setf.exp FR_Big = GR_Scratch
+ add GR_exp_Result = GR_exp_X, GR_N_as_int
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
- cmp.le p8, p0 = GR_N_as_int, GR_NBig
-};;
+ mov GR_neg_ov_limit = 0x3007f // Exponent for negative overflow
+}
+;;
//
-// Adjust 2**N if N was very small or very large
+// Do final operation
//
{ .mfi
- nop.m 0
-(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
- nop.i 0
+ cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
+ fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+ cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch = 0x000000000003007F
-};;
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p9) br.cond.spnt SCALBNF_UNDERFLOW // Branch if certain underflow
+}
+;;
+{ .mib
+(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
+(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
+(p7) br.ret.sptk b0 // Return from main path
+}
+;;
-{ .mfi
- nop.m 0
-(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
- nop.i 0
+{ .bbb
+(p6) br.cond.spnt SCALBNF_OVERFLOW // Branch if certain overflow
+(p8) br.cond.spnt SCALBNF_POSSIBLE_OVERFLOW // Branch if possible overflow
+(p9) br.cond.spnt SCALBNF_POSSIBLE_UNDERFLOW // Branch if possible underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch1= 0x000000000001007F
-};;
+;;
-// Set up necessary status fields
+// Here if possible underflow.
+// Resulting exponent: 0x0ff81-23 <= exp_Result < 0x0ff81
+SCALBNF_POSSIBLE_UNDERFLOW:
+//
+// Here if possible overflow.
+// Resulting exponent: 0x1007e = exp_Result
+SCALBNF_POSSIBLE_OVERFLOW:
+
+// Set up necessary status fields
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflows)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
- nop.m 999
+ nop.m 0
fsetc.s3 0x7F,0x41
- nop.i 999
+ nop.i 0
}
{ .mfi
- nop.m 999
+ nop.m 0
fsetc.s2 0x7F,0x42
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
-// Do final operation
+// Do final operation with s2 and s3
//
{ .mfi
- setf.exp FR_NBig = GR_Scratch
- fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
- nop.i 999
+ setf.exp FR_NBig = GR_neg_ov_limit
+ fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
}
{ .mfi
- nop.m 999
- fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-{ .mfi
- setf.exp FR_Big = GR_Scratch1
- fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
+ setf.exp FR_Big = GR_pos_ov_limit
+ fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
+}
+;;
// Check for overflow or underflow.
// Restore s3
// Restore s2
//
{ .mfi
- nop.m 0
+ nop.m 0
fsetc.s3 0x7F,0x40
- nop.i 999
+ nop.i 0
}
{ .mfi
- nop.m 0
+ nop.m 0
fsetc.s2 0x7F,0x40
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
// Is the result zero?
//
{ .mfi
- nop.m 999
+ nop.m 0
fclass.m p6, p0 = FR_Result3, 0x007
- nop.i 999
-}
+ nop.i 0
+}
{ .mfi
- addl GR_Tag = 178, r0
+ nop.m 0
fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
- nop.i 0
-};;
+ nop.i 0
+}
+;;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
- nop.m 999
+ nop.m 0
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
-(p6) addl GR_Tag = 179, r0
+ nop.m 0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt SCALBNF_UNDERFLOW
-};;
+(p6) br.cond.spnt SCALBNF_UNDERFLOW
+}
+;;
//
// Branch out for overflow
//
-{ .mbb
- nop.m 0
-(p7) br.cond.spnt SCALBNF_OVERFLOW
-(p9) br.cond.spnt SCALBNF_OVERFLOW
-};;
+{ .bbb
+(p7) br.cond.spnt SCALBNF_OVERFLOW
+(p9) br.cond.spnt SCALBNF_OVERFLOW
+ br.ret.sptk b0 // Return from main path.
+}
+;;
-//
-// Return from main path.
-//
-{ .mfb
- nop.m 999
- nop.f 0
- br.ret.sptk b0;;
+// Here if result overflows
+SCALBNF_OVERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 178, r0 // Set error tag for overflow
+ br.cond.sptk __libm_error_region // Call error support for overflow
}
+;;
-GLOBAL_LIBM_END(__libm_scalbnf)
-__libm_error_region:
+// Here if result underflows
+SCALBNF_UNDERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 179, r0 // Set error tag for underflow
+ br.cond.sptk __libm_error_region // Call error support for underflow
+}
+;;
+
+// Here if x=unorm
+SCALBNF_UNORM:
+{ .mib
+ getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
+ nop.i 0
+ br.cond.sptk SCALBNF_COMMON // Return to main path
+}
+;;
-SCALBNF_OVERFLOW:
-SCALBNF_UNDERFLOW:
+
+GLOBAL_LIBM_END(__libm_scalbnf)
+LOCAL_LIBM_ENTRY(__libm_error_region)
//
// Get stack address of N
//
.prologue
{ .mfi
- add GR_Parameter_Y=-32,sp
+ add GR_Parameter_Y=-32,sp
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
+ mov GR_SAVE_PFS=ar.pfs
}
//
-// Adjust sp
+// Adjust sp
//
{ .mfi
.fframe 64
- add sp=-64,sp
+ add sp=-64,sp
nop.f 0
- mov GR_SAVE_GP=gp
+ mov GR_SAVE_GP=gp
};;
//
-// Store N on stack in correct position
+// Store N on stack in correct position
// Locate the address of x on stack
//
{ .mmi
- st8 [GR_Parameter_Y] = GR_N_as_int,16
- add GR_Parameter_X = 16,sp
+ st8 [GR_Parameter_Y] = GR_N_as_int,16
+ add GR_Parameter_X = 16,sp
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
+ mov GR_SAVE_B0=b0
};;
//
@@ -337,42 +408,42 @@ SCALBNF_UNDERFLOW:
//
.body
{ .mib
- stfs [GR_Parameter_X] = FR_Norm_X
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ stfs [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0
}
{ .mib
- stfs [GR_Parameter_Y] = FR_Result
+ stfs [GR_Parameter_Y] = FR_Result
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support#
+ br.call.sptk b0=__libm_error_support#
};;
//
// Get location of result on stack
//
{ .mmi
+ add GR_Parameter_RESULT = 48,sp
nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
+ nop.i 0
};;
//
-// Get the new result
+// Get the new result
//
{ .mmi
- ldfs FR_Result = [GR_Parameter_RESULT]
+ ldfs FR_Result = [GR_Parameter_RESULT]
.restore sp
- add sp = 64,sp
- mov b0 = GR_SAVE_B0
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
};;
//
// Restore gp, ar.pfs and return
//
{ .mib
- mov gp = GR_SAVE_GP
- mov ar.pfs = GR_SAVE_PFS
- br.ret.sptk b0
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
};;
LOCAL_LIBM_END(__libm_error_region)
diff --git a/sysdeps/ia64/fpu/s_libm_scalbnl.S b/sysdeps/ia64/fpu/s_libm_scalbnl.S
index 6eb6e17..1edf9a0 100644
--- a/sysdeps/ia64/fpu/s_libm_scalbnl.S
+++ b/sysdeps/ia64/fpu/s_libm_scalbnl.S
@@ -21,33 +21,34 @@
// products derived from this software without specific prior written
// permission.
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
+// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
//==============================================================
// 02/02/00 Initial version
-// 01/26/01 scalbnl completely reworked and now standalone version
+// 01/26/01 Scalbnl completely reworked and now standalone version
// 01/04/02 Added handling for int 32 or 64 bits
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/04/03 Improved performance
//
// API
//==============================================================
-// long double __libm_scalbnl (long double x, int n, int int_type)
+// long double __libm_scalbnl (long double x, int n, int int_type)
// input floating point f8 and int n (r34), int int_type (r35)
// output floating point f8
//
@@ -55,27 +56,41 @@
// int_type = 1 if int is 64 bits
//
// Returns x* 2**n using an fma and detects overflow
-// and underflow.
+// and underflow.
//
//
+// Strategy:
+// Compute biased exponent of result exp_Result = N + exp_X
+// Break into ranges:
+// exp_Result > 0x13ffe -> Certain overflow
+// exp_Result = 0x13ffe -> Possible overflow
+// 0x0c001 <= exp_Result < 0x13ffe -> No over/underflow (main path)
+// 0x0c001 - 63 <= exp_Result < 0x0c001 -> Possible underflow
+// exp_Result < 0x0c001 - 63 -> Certain underflow
FR_Big = f6
FR_NBig = f7
FR_Floating_X = f8
FR_Result = f8
FR_Result2 = f9
-FR_Result3 = f11
-FR_Norm_X = f12
-FR_Two_N = f14
-FR_Two_to_Big = f15
+FR_Result3 = f10
+FR_Norm_X = f11
+FR_Two_N = f12
+GR_neg_ov_limit= r14
GR_N_Biased = r15
GR_Big = r16
GR_NBig = r17
-GR_Scratch = r18
-GR_Scratch1 = r19
+GR_exp_Result = r18
+GR_pos_ov_limit= r19
GR_Bias = r20
GR_N_as_int = r21
+GR_signexp_X = r22
+GR_exp_X = r23
+GR_exp_mask = r24
+GR_max_exp = r25
+GR_min_exp = r26
+GR_min_den_exp = r27
GR_SAVE_B0 = r32
GR_SAVE_GP = r33
@@ -93,242 +108,298 @@ GLOBAL_LIBM_ENTRY(__libm_scalbnl)
// Build the exponent Bias
//
{ .mfi
- alloc r32=ar.pfs,3,0,4,0
- fclass.m p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
- addl GR_Bias = 0x0FFFF,r0
+ getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
+ fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
+ mov GR_Bias = 0x0ffff
}
-
//
-// Is N zero?
// Normalize x
// Is integer type 32 bits?
//
{ .mfi
- cmp.eq p6,p0 = r34,r0
- fnorm.s1 FR_Norm_X = FR_Floating_X
- cmp.eq p8,p9 = r35,r0
+ mov GR_Big = 35000 // If N this big then certain overflow
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+ cmp.eq p8,p9 = r35,r0
}
;;
// Sign extend N if int is 32 bits
{ .mfi
(p9) mov GR_N_as_int = r34 // Copy N if int is 64 bits
- nop.f 0
+ fclass.m p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm
(p8) sxt4 GR_N_as_int = r34 // Sign extend N if int is 32 bits
}
+{ .mfi
+ mov GR_NBig = -35000 // If N this small then certain underflow
+ nop.f 0
+ mov GR_max_exp = 0x13ffe // Exponent of maximum long double
+}
+;;
+
+// Create biased exponent for 2**N
+{ .mfi
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+ nop.f 0
+ cmp.ge p7, p0 = GR_N_as_int, GR_Big // Certain overflow?
+}
+{ .mib
+ cmp.le p8, p0 = GR_N_as_int, GR_NBig // Certain underflow?
+ mov GR_min_exp = 0x0c001 // Exponent of minimum long double
+(p9) br.cond.spnt SCALBNL_UNORM // Branch if x=unorm
+}
+;;
+
+SCALBNL_COMMON:
+// Main path continues. Also return here from x=unorm path.
+// Create 2**N
+.pred.rel "mutex",p7,p8
+{ .mfi
+ setf.exp FR_Two_N = GR_N_Biased
+ nop.f 0
+(p7) mov GR_N_as_int = GR_Big // Limit max N
+}
+{ .mfi
+(p8) mov GR_N_as_int = GR_NBig // Limit min N
+ nop.f 0
+(p8) cmp.eq p7,p0 = r0,r0 // Set p7 if |N| big
+}
;;
//
-// Branch and return special values.
-// Create -35000
-// Create 35000
+// Create biased exponent for 2**N for N big
+// Is N zero?
//
{ .mfi
- addl GR_Big = 35000,r0
+(p7) add GR_N_Biased = GR_Bias,GR_N_as_int
nop.f 0
- add GR_N_Biased = GR_Bias,GR_N_as_int
+ cmp.eq.or p6,p0 = r34,r0
}
-{ .mfb
- addl GR_NBig = -35000,r0
-(p7) fma.s0 FR_Result = FR_Floating_X,f1, f0
-(p7) br.ret.spnt b0
-};;
+{ .mfi
+ mov GR_pos_ov_limit = 0x13fff // Exponent for positive overflow
+ nop.f 0
+ mov GR_exp_mask = 0x1ffff // Exponent mask
+}
+;;
//
-// Build the exponent Bias
-// Return x when N = 0
+// Create 2**N for N big
+// Return x when N = 0 or X = Nan, Inf, Zero
//
{ .mfi
- setf.exp FR_Two_N = GR_N_Biased
+(p7) setf.exp FR_Two_N = GR_N_Biased
nop.f 0
- addl GR_Scratch1 = 0x063BF,r0
+ mov GR_min_den_exp = 0x0c001 - 63 // Exp of min denorm long dble
}
{ .mfb
- addl GR_Scratch = 0x019C3F,r0
-(p6) fma.s0 FR_Result = FR_Floating_X,f1, f0
-(p6) br.ret.spnt b0
-};;
+ and GR_exp_X = GR_exp_mask, GR_signexp_X
+(p6) fma.s0 FR_Result = FR_Floating_X, f1, f0
+(p6) br.ret.spnt b0
+}
+;;
//
-// Create 2*big
-// Create 2**-big
-// Is N > 35000
-// Is N < -35000
// Raise Denormal operand flag with compare
-// Main path, create 2**N
+// Compute biased result exponent
//
{ .mfi
- setf.exp FR_NBig = GR_Scratch1
- nop.f 0
- cmp.ge p6, p0 = GR_N_as_int, GR_Big
-}
-{ .mfi
- setf.exp FR_Big = GR_Scratch
+ add GR_exp_Result = GR_exp_X, GR_N_as_int
fcmp.ge.s0 p0,p11 = FR_Floating_X,f0
- cmp.le p8, p0 = GR_N_as_int, GR_NBig
-};;
+ mov GR_neg_ov_limit = 0x33fff // Exponent for negative overflow
+}
+;;
//
-// Adjust 2**N if N was very small or very large
+// Do final operation
//
{ .mfi
- nop.m 0
-(p6) fma.s1 FR_Two_N = FR_Big,f1,f0
- nop.i 0
+ cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
+ fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+ cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch = 0x0000000000033FFF
-};;
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p9) br.cond.spnt SCALBNL_UNDERFLOW // Branch if certain underflow
+}
+;;
+{ .mib
+(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
+(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
+(p7) br.ret.sptk b0 // Return from main path
+}
+;;
-{ .mfi
- nop.m 0
-(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0
- nop.i 0
+{ .bbb
+(p6) br.cond.spnt SCALBNL_OVERFLOW // Branch if certain overflow
+(p8) br.cond.spnt SCALBNL_POSSIBLE_OVERFLOW // Branch if possible overflow
+(p9) br.cond.spnt SCALBNL_POSSIBLE_UNDERFLOW // Branch if possible underflow
}
-{ .mlx
- nop.m 999
- movl GR_Scratch1= 0x0000000000013FFF
-};;
+;;
-// Set up necessary status fields
+// Here if possible underflow.
+// Resulting exponent: 0x0c001-63 <= exp_Result < 0x0c001
+SCALBNL_POSSIBLE_UNDERFLOW:
+//
+// Here if possible overflow.
+// Resulting exponent: 0x13ffe = exp_Result
+SCALBNL_POSSIBLE_OVERFLOW:
+
+// Set up necessary status fields
//
// S0 user supplied status
// S2 user supplied status + WRE + TD (Overflows)
// S3 user supplied status + FZ + TD (Underflows)
//
{ .mfi
- nop.m 999
+ nop.m 0
fsetc.s3 0x7F,0x41
- nop.i 999
+ nop.i 0
}
{ .mfi
- nop.m 999
+ nop.m 0
fsetc.s2 0x7F,0x42
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
-// Do final operation
+// Do final operation with s2 and s3
//
{ .mfi
- setf.exp FR_NBig = GR_Scratch
- fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
- nop.i 999
+ setf.exp FR_NBig = GR_neg_ov_limit
+ fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
}
{ .mfi
- nop.m 999
- fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
-{ .mfi
- setf.exp FR_Big = GR_Scratch1
- fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
- nop.i 999
-};;
+ setf.exp FR_Big = GR_pos_ov_limit
+ fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
+}
+;;
// Check for overflow or underflow.
// Restore s3
// Restore s2
//
{ .mfi
- nop.m 0
+ nop.m 0
fsetc.s3 0x7F,0x40
- nop.i 999
+ nop.i 0
}
{ .mfi
- nop.m 0
+ nop.m 0
fsetc.s2 0x7F,0x40
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
// Is the result zero?
//
{ .mfi
- nop.m 999
+ nop.m 0
fclass.m p6, p0 = FR_Result3, 0x007
- nop.i 999
-}
+ nop.i 0
+}
{ .mfi
- addl GR_Tag = 174, r0
+ nop.m 0
fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
- nop.i 0
-};;
+ nop.i 0
+}
+;;
//
// Detect masked underflow - Tiny + Inexact Only
//
{ .mfi
- nop.m 999
+ nop.m 0
(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
- nop.i 999
-};;
+ nop.i 0
+}
+;;
//
// Is result bigger the allowed range?
// Branch out for underflow
//
{ .mfb
-(p6) addl GR_Tag = 175, r0
+ nop.m 0
(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt SCALBNL_UNDERFLOW
-};;
+(p6) br.cond.spnt SCALBNL_UNDERFLOW
+}
+;;
//
// Branch out for overflow
//
-{ .mbb
- nop.m 0
-(p7) br.cond.spnt SCALBNL_OVERFLOW
-(p9) br.cond.spnt SCALBNL_OVERFLOW
-};;
+{ .bbb
+(p7) br.cond.spnt SCALBNL_OVERFLOW
+(p9) br.cond.spnt SCALBNL_OVERFLOW
+ br.ret.sptk b0 // Return from main path.
+}
+;;
-//
-// Return from main path.
-//
-{ .mfb
- nop.m 999
- nop.f 0
- br.ret.sptk b0;;
+// Here if result overflows
+SCALBNL_OVERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 174, r0 // Set error tag for overflow
+ br.cond.sptk __libm_error_region // Call error support for overflow
}
+;;
-GLOBAL_LIBM_END(__libm_scalbnl)
-__libm_error_region:
+// Here if result underflows
+SCALBNL_UNDERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 175, r0 // Set error tag for underflow
+ br.cond.sptk __libm_error_region // Call error support for underflow
+}
+;;
+
+// Here if x=unorm
+SCALBNL_UNORM:
+{ .mib
+ getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
+ nop.i 0
+ br.cond.sptk SCALBNL_COMMON // Return to main path
+}
+;;
-SCALBNL_OVERFLOW:
-SCALBNL_UNDERFLOW:
+
+GLOBAL_LIBM_END(__libm_scalbnl)
+LOCAL_LIBM_ENTRY(__libm_error_region)
//
// Get stack address of N
//
.prologue
{ .mfi
- add GR_Parameter_Y=-32,sp
+ add GR_Parameter_Y=-32,sp
nop.f 0
.save ar.pfs,GR_SAVE_PFS
- mov GR_SAVE_PFS=ar.pfs
+ mov GR_SAVE_PFS=ar.pfs
}
//
-// Adjust sp
+// Adjust sp
//
{ .mfi
.fframe 64
- add sp=-64,sp
+ add sp=-64,sp
nop.f 0
- mov GR_SAVE_GP=gp
+ mov GR_SAVE_GP=gp
};;
//
-// Store N on stack in correct position
+// Store N on stack in correct position
// Locate the address of x on stack
//
{ .mmi
- st8 [GR_Parameter_Y] = GR_N_as_int,16
- add GR_Parameter_X = 16,sp
+ st8 [GR_Parameter_Y] = GR_N_as_int,16
+ add GR_Parameter_X = 16,sp
.save b0, GR_SAVE_B0
- mov GR_SAVE_B0=b0
+ mov GR_SAVE_B0=b0
};;
//
@@ -337,42 +408,42 @@ SCALBNL_UNDERFLOW:
//
.body
{ .mib
- stfe [GR_Parameter_X] = FR_Norm_X
- add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ stfe [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
nop.b 0
}
{ .mib
- stfe [GR_Parameter_Y] = FR_Result
+ stfe [GR_Parameter_Y] = FR_Result
add GR_Parameter_Y = -16,GR_Parameter_Y
- br.call.sptk b0=__libm_error_support#
+ br.call.sptk b0=__libm_error_support#
};;
//
// Get location of result on stack
//
{ .mmi
+ add GR_Parameter_RESULT = 48,sp
nop.m 0
- nop.m 0
- add GR_Parameter_RESULT = 48,sp
+ nop.i 0
};;
//
-// Get the new result
+// Get the new result
//
{ .mmi
- ldfe FR_Result = [GR_Parameter_RESULT]
+ ldfe FR_Result = [GR_Parameter_RESULT]
.restore sp
- add sp = 64,sp
- mov b0 = GR_SAVE_B0
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
};;
//
// Restore gp, ar.pfs and return
//
{ .mib
- mov gp = GR_SAVE_GP
- mov ar.pfs = GR_SAVE_PFS
- br.ret.sptk b0
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
};;
LOCAL_LIBM_END(__libm_error_region)
diff --git a/sysdeps/ia64/fpu/s_log1p.S b/sysdeps/ia64/fpu/s_log1p.S
index cd35519..ccf0c31 100644
--- a/sysdeps/ia64/fpu/s_log1p.S
+++ b/sysdeps/ia64/fpu/s_log1p.S
@@ -1047,6 +1047,7 @@ log_libm_err:
GLOBAL_IEEE754_END(log1p)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/s_log1pf.S b/sysdeps/ia64/fpu/s_log1pf.S
index a148d4b..77e79c3 100644
--- a/sysdeps/ia64/fpu/s_log1pf.S
+++ b/sysdeps/ia64/fpu/s_log1pf.S
@@ -48,6 +48,7 @@
// 10/02/02 Improved performance by basing on log algorithm
// 02/10/03 Reordered header: .section, .global, .proc, .align
// 04/18/03 Eliminate possible WAW dependency warning
+// 12/16/03 Fixed parameter passing to/from error handling routine
//
// API
//==============================================================
@@ -733,6 +734,7 @@ log_libm_err:
GLOBAL_IEEE754_END(log1pf)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/s_log1pl.S b/sysdeps/ia64/fpu/s_log1pl.S
index d392a58..9654265 100644
--- a/sysdeps/ia64/fpu/s_log1pl.S
+++ b/sysdeps/ia64/fpu/s_log1pl.S
@@ -1145,6 +1145,7 @@ LOG1P_LT_Minus_1:
GLOBAL_IEEE754_END(log1pl)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/s_logb.S b/sysdeps/ia64/fpu/s_logb.S
index dfe581a..7ee8987 100644
--- a/sysdeps/ia64/fpu/s_logb.S
+++ b/sysdeps/ia64/fpu/s_logb.S
@@ -219,6 +219,7 @@ LOGB_ZERO:
GLOBAL_LIBM_END(logb)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
diff --git a/sysdeps/ia64/fpu/s_logbf.S b/sysdeps/ia64/fpu/s_logbf.S
index 1d605cd..eefa270 100644
--- a/sysdeps/ia64/fpu/s_logbf.S
+++ b/sysdeps/ia64/fpu/s_logbf.S
@@ -219,6 +219,7 @@ LOGB_ZERO:
GLOBAL_LIBM_END(logbf)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
diff --git a/sysdeps/ia64/fpu/s_logbl.S b/sysdeps/ia64/fpu/s_logbl.S
index 6a08e94..e312c1b 100644
--- a/sysdeps/ia64/fpu/s_logbl.S
+++ b/sysdeps/ia64/fpu/s_logbl.S
@@ -219,6 +219,7 @@ LOGB_ZERO:
GLOBAL_LIBM_END(logbl)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
diff --git a/sysdeps/ia64/fpu/s_nearbyint.S b/sysdeps/ia64/fpu/s_nearbyint.S
index cba74e6..ec1ff22 100644
--- a/sysdeps/ia64/fpu/s_nearbyint.S
+++ b/sysdeps/ia64/fpu/s_nearbyint.S
@@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
+// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@@ -43,48 +43,44 @@
// 02/08/01 Corrected behavior for all rounding modes.
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 07/25/03 Improved performance
//==============================================================
-//
+
// API
//==============================================================
// double nearbyint(double x)
-//
-// general registers used:
-//
+//==============================================================
-nearbyint_GR_signexp = r14
-nearbyint_GR_exponent = r15
-nearbyint_GR_17ones = r16
-nearbyint_GR_10033 = r17
-nearbyint_GR_fpsr = r18
-nearbyint_GR_rcs0 = r19
-nearbyint_GR_rcs0_mask = r20
+// general input registers:
+// r14 - r21
+rSignexp = r14
+rExp = r15
+rExpMask = r16
+rBigexp = r17
+rFpsr = r19
+rRcs0 = r20
+rRcs0Mask = r21
-// predicate registers used:
-// p6-11
+// floating-point registers:
+// f8 - f10
-// floating-point registers used:
+fXInt = f9
+fNormX = f10
-NEARBYINT_NORM_f8 = f9
-NEARBYINT_FLOAT_INT_f8 = f10
-NEARBYINT_INT_f8 = f11
+// predicate registers used:
+// p6 - p10
// Overview of operation
//==============================================================
-
// double nearbyint(double x)
-// Return an integer value (represented as a double) that is x rounded to integer in current
-// rounding mode
-// *******************************************************************************
-
-// Set denormal flag for denormal input and
-// and take denormal fault if necessary.
-
-// Is the input an integer value already?
+// Return an integer value (represented as a double) that is x
+// rounded to integer in current rounding mode
+// Inexact is not set, otherwise result identical with rint.
+//==============================================================
// double_extended
-// if the exponent is >= 1003e => 3F(true) = 63(decimal)
+// if the exponent is > 1003e => 3F(true) = 63(decimal)
// we have a significand of 64 bits 1.63-bits.
// If we multiply by 2^63, we no longer have a fractional part
// So input is an integer value already.
@@ -97,128 +93,125 @@ NEARBYINT_INT_f8 = f11
// So input is an integer value already.
// single
-// if the exponent is >= 10016 => 17(true) = 23(decimal)
-// we have a significand of 53 bits 1.52-bits. (implicit 1)
-// If we multiply by 2^52, we no longer have a fractional part
+// if the exponent is > 10016 => 17(true) = 23(decimal)
+// we have a significand of 24 bits 1.23-bits. (implicit 1)
+// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
-// If x is NAN, ZERO, or INFINITY, then return
-
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 1 11 0xe7
-
-
.section .text
GLOBAL_LIBM_ENTRY(nearbyint)
{ .mfi
- mov nearbyint_GR_fpsr = ar40 // Read the fpsr--need to check rc.s0
- fcvt.fx.s1 NEARBYINT_INT_f8 = f8
- addl nearbyint_GR_10033 = 0x10033, r0
+ getf.exp rSignexp = f8 // Get signexp, recompute if unorm
+ fclass.m p7,p0 = f8, 0x0b // Test x unorm
+ addl rBigexp = 0x10033, r0 // Set exponent at which is integer
}
{ .mfi
- nop.m 999
- fnorm.s1 NEARBYINT_NORM_f8 = f8
- mov nearbyint_GR_17ones = 0x1FFFF
-;;
+ nop.m 0
+ fcvt.fx.s1 fXInt = f8 // Convert to int in significand
+ mov rExpMask = 0x1FFFF // Form exponent mask
}
+;;
{ .mfi
- nop.m 999
- fclass.m.unc p6,p0 = f8, 0xe7
- mov nearbyint_GR_rcs0_mask = 0x0c00
-;;
+ mov rFpsr = ar40 // Read fpsr -- check rc.s0
+ fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p6) fnorm.d.s0 f8 = f8
-(p6) br.ret.spnt b0 // Exit if x nan, inf, zero
+ nop.m 0
+ fnorm.s1 fNormX = f8 // Normalize input
+(p7) br.cond.spnt RINT_UNORM // Branch if x unorm
+}
;;
+
+
+RINT_COMMON:
+// Return here from RINT_UNORM
+{ .mfb
+ and rExp = rSignexp, rExpMask // Get biased exponent
+(p6) fma.d.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf
}
+;;
{ .mfi
- nop.m 999
- fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8
- nop.i 999
-;;
+ mov rRcs0Mask = 0x0c00 // Mask for rc.s0
+ fcvt.xf f8 = fXInt // Result assume |x| < 2^52
+ cmp.ge p7,p8 = rExp, rBigexp // Is |x| >= 2^52?
}
+;;
+// We must correct result if |x| >= 2^52
{ .mfi
- getf.exp nearbyint_GR_signexp = NEARBYINT_NORM_f8
- fcmp.eq.s0 p8,p0 = f8,f0 // Dummy op to set denormal
- nop.i 999
-;;
+ nop.m 0
+(p7) fma.d.s0 f8 = fNormX, f1, f0 // If |x| >= 2^52, result x
+ nop.i 0
}
-
-
-{ .mii
- nop.m 999
- nop.i 999
- and nearbyint_GR_exponent = nearbyint_GR_signexp, nearbyint_GR_17ones
;;
-}
-{ .mmi
- cmp.ge.unc p7,p6 = nearbyint_GR_exponent, nearbyint_GR_10033
- and nearbyint_GR_rcs0 = nearbyint_GR_rcs0_mask, nearbyint_GR_fpsr
- nop.i 999
-;;
+{ .mfi
+ nop.m 0
+(p8) fmerge.s f8 = fNormX, f8 // Make sign nearbyint(x) = sign x
+ nop.i 0
}
-
-// Check to see if s0 rounding mode is round to nearest. If not then set s2
-// rounding mode to that of s0 and repeat conversions.
-NEARBYINT_COMMON:
-{ .mfb
- cmp.ne p11,p0 = nearbyint_GR_rcs0, r0
-(p6) fclass.m.unc p9,p10 = NEARBYINT_FLOAT_INT_f8, 0x07 // Test for result=0
-(p11) br.cond.spnt NEARBYINT_NOT_ROUND_NEAREST // Branch if not round to nearest
;;
-}
{ .mfi
- nop.m 999
-(p7) fnorm.d.s0 f8 = f8
- nop.i 999
-;;
+(p8) and rRcs0 = rFpsr, rRcs0Mask // Get rounding mode for sf0
+ nop.f 0
+ nop.i 0
}
+;;
-// If result is zero, merge sign of input
-{ .mfi
- nop.m 999
-(p9) fmerge.s f8 = f8, NEARBYINT_FLOAT_INT_f8
- nop.i 999
+// If |x| < 2^52 we must test for other rounding modes
+{ .mbb
+(p8) cmp.ne.unc p10,p0 = rRcs0, r0 // Test for other rounding modes
+(p10) br.cond.spnt RINT_NOT_ROUND_NEAREST // Branch if not round nearest
+ br.ret.sptk b0 // Exit main path if round nearest
}
+;;
+
+
+RINT_UNORM:
+// Here if x unorm
{ .mfb
- nop.m 999
-(p10) fnorm.d.s0 f8 = NEARBYINT_FLOAT_INT_f8
- br.ret.sptk b0
-;;
+ getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ br.cond.sptk RINT_COMMON // Return to main path
}
+;;
-
-NEARBYINT_NOT_ROUND_NEAREST:
-// Set rounding mode of s2 to that of s0
+RINT_NOT_ROUND_NEAREST:
+// Here if not round to nearest, and |x| < 2^52
+// Set rounding mode of s2 to that of s0, and repeat the conversion using s2
{ .mfi
- mov nearbyint_GR_rcs0 = r0 // Clear so we don't come back here
- fsetc.s2 0x7f, 0x40
- nop.i 999
-;;
+ nop.m 0
+ fsetc.s2 0x7f, 0x40
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
- fcvt.fx.s2 NEARBYINT_INT_f8 = f8
- nop.i 999
+ nop.m 0
+ fcvt.fx.s2 fXInt = fNormX // Convert to int in significand
+ nop.i 0
+}
;;
+
+{ .mfi
+ nop.m 0
+ fcvt.xf f8 = fXInt // Expected result
+ nop.i 0
}
+;;
+// Be sure sign of result = sign of input. Fixes cases where result is 0.
{ .mfb
- nop.m 999
- fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8
- br.cond.sptk NEARBYINT_COMMON
-;;
+ nop.m 0
+ fmerge.s f8 = fNormX, f8
+ br.ret.sptk b0 // Exit main path
}
-
+;;
GLOBAL_LIBM_END(nearbyint)
diff --git a/sysdeps/ia64/fpu/s_nearbyintf.S b/sysdeps/ia64/fpu/s_nearbyintf.S
index 6471232..aac7b5c 100644
--- a/sysdeps/ia64/fpu/s_nearbyintf.S
+++ b/sysdeps/ia64/fpu/s_nearbyintf.S
@@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
+// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@@ -43,48 +43,44 @@
// 02/08/01 Corrected behavior for all rounding modes.
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 07/25/03 Improved performance
//==============================================================
-//
+
// API
//==============================================================
// float nearbyintf(float x)
-//
-// general registers used:
-//
+//==============================================================
-nearbyint_GR_signexp = r14
-nearbyint_GR_exponent = r15
-nearbyint_GR_17ones = r16
-nearbyint_GR_10033 = r17
-nearbyint_GR_fpsr = r18
-nearbyint_GR_rcs0 = r19
-nearbyint_GR_rcs0_mask = r20
+// general input registers:
+// r14 - r21
+rSignexp = r14
+rExp = r15
+rExpMask = r16
+rBigexp = r17
+rFpsr = r19
+rRcs0 = r20
+rRcs0Mask = r21
-// predicate registers used:
-// p6-11
+// floating-point registers:
+// f8 - f10
-// floating-point registers used:
+fXInt = f9
+fNormX = f10
-NEARBYINT_NORM_f8 = f9
-NEARBYINT_FLOAT_INT_f8 = f10
-NEARBYINT_INT_f8 = f11
+// predicate registers used:
+// p6 - p10
// Overview of operation
//==============================================================
-
// float nearbyintf(float x)
-// Return an integer value (represented as a float) that is x rounded to integer in current
-// rounding mode
-// *******************************************************************************
-
-// Set denormal flag for denormal input and
-// and take denormal fault if necessary.
-
-// Is the input an integer value already?
+// Return an integer value (represented as a float) that is x
+// rounded to integer in current rounding mode
+// Inexact is not set, otherwise result identical with rint.
+//==============================================================
// double_extended
-// if the exponent is >= 1003e => 3F(true) = 63(decimal)
+// if the exponent is > 1003e => 3F(true) = 63(decimal)
// we have a significand of 64 bits 1.63-bits.
// If we multiply by 2^63, we no longer have a fractional part
// So input is an integer value already.
@@ -97,128 +93,125 @@ NEARBYINT_INT_f8 = f11
// So input is an integer value already.
// single
-// if the exponent is >= 10016 => 17(true) = 23(decimal)
-// we have a significand of 53 bits 1.52-bits. (implicit 1)
-// If we multiply by 2^52, we no longer have a fractional part
+// if the exponent is > 10016 => 17(true) = 23(decimal)
+// we have a significand of 24 bits 1.23-bits. (implicit 1)
+// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
-// If x is NAN, ZERO, or INFINITY, then return
-
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 1 11 0xe7
-
-
.section .text
GLOBAL_LIBM_ENTRY(nearbyintf)
{ .mfi
- mov nearbyint_GR_fpsr = ar40 // Read the fpsr--need to check rc.s0
- fcvt.fx.s1 NEARBYINT_INT_f8 = f8
- addl nearbyint_GR_10033 = 0x10016, r0
+ getf.exp rSignexp = f8 // Get signexp, recompute if unorm
+ fclass.m p7,p0 = f8, 0x0b // Test x unorm
+ addl rBigexp = 0x10016, r0 // Set exponent at which is integer
}
{ .mfi
- nop.m 999
- fnorm.s1 NEARBYINT_NORM_f8 = f8
- mov nearbyint_GR_17ones = 0x1FFFF
-;;
+ nop.m 0
+ fcvt.fx.s1 fXInt = f8 // Convert to int in significand
+ mov rExpMask = 0x1FFFF // Form exponent mask
}
+;;
{ .mfi
- nop.m 999
- fclass.m.unc p6,p0 = f8, 0xe7
- mov nearbyint_GR_rcs0_mask = 0x0c00
-;;
+ mov rFpsr = ar40 // Read fpsr -- check rc.s0
+ fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p6) fnorm.s.s0 f8 = f8
-(p6) br.ret.spnt b0 // Exit if x nan, inf, zero
+ nop.m 0
+ fnorm.s1 fNormX = f8 // Normalize input
+(p7) br.cond.spnt RINT_UNORM // Branch if x unorm
+}
;;
+
+
+RINT_COMMON:
+// Return here from RINT_UNORM
+{ .mfb
+ and rExp = rSignexp, rExpMask // Get biased exponent
+(p6) fma.s.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf
}
+;;
{ .mfi
- nop.m 999
- fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8
- nop.i 999
-;;
+ mov rRcs0Mask = 0x0c00 // Mask for rc.s0
+ fcvt.xf f8 = fXInt // Result assume |x| < 2^23
+ cmp.ge p7,p8 = rExp, rBigexp // Is |x| >= 2^23?
}
+;;
+// We must correct result if |x| >= 2^23
{ .mfi
- getf.exp nearbyint_GR_signexp = NEARBYINT_NORM_f8
- fcmp.eq.s0 p8,p0 = f8,f0 // Dummy op to set denormal
- nop.i 999
-;;
+ nop.m 0
+(p7) fma.s.s0 f8 = fNormX, f1, f0 // If |x| >= 2^23, result x
+ nop.i 0
}
-
-
-{ .mii
- nop.m 999
- nop.i 999
- and nearbyint_GR_exponent = nearbyint_GR_signexp, nearbyint_GR_17ones
;;
+
+{ .mfi
+ nop.m 0
+(p8) fmerge.s f8 = fNormX, f8 // Make sign nearbyintf(x)= sign x
+ nop.i 0
}
+;;
-{ .mmi
- cmp.ge.unc p7,p6 = nearbyint_GR_exponent, nearbyint_GR_10033
- and nearbyint_GR_rcs0 = nearbyint_GR_rcs0_mask, nearbyint_GR_fpsr
- nop.i 999
+{ .mfi
+(p8) and rRcs0 = rFpsr, rRcs0Mask // Get rounding mode for sf0
+ nop.f 0
+ nop.i 0
+}
;;
+
+// If |x| < 2^23 we must test for other rounding modes
+{ .mbb
+(p8) cmp.ne.unc p10,p0 = rRcs0, r0 // Test for other rounding modes
+(p10) br.cond.spnt RINT_NOT_ROUND_NEAREST // Branch if not round nearest
+ br.ret.sptk b0 // Exit main path if round nearest
}
+;;
-// Check to see if s0 rounding mode is round to nearest. If not then set s2
-// rounding mode to that of s0 and repeat conversions.
-NEARBYINT_COMMON:
+
+RINT_UNORM:
+// Here if x unorm
{ .mfb
- cmp.ne p11,p0 = nearbyint_GR_rcs0, r0
-(p6) fclass.m.unc p9,p10 = NEARBYINT_FLOAT_INT_f8, 0x07 // Test for result=0
-(p11) br.cond.spnt NEARBYINT_NOT_ROUND_NEAREST // Branch if not round to nearest
-;;
+ getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ br.cond.sptk RINT_COMMON // Return to main path
}
-
-{ .mfi
- nop.m 999
-(p7) fnorm.s.s0 f8 = f8
- nop.i 999
;;
-}
-// If result is zero, merge sign of input
+RINT_NOT_ROUND_NEAREST:
+// Here if not round to nearest, and |x| < 2^23
+// Set rounding mode of s2 to that of s0, and repeat the conversion using s2
{ .mfi
- nop.m 999
-(p9) fmerge.s f8 = f8, NEARBYINT_FLOAT_INT_f8
- nop.i 999
+ nop.m 0
+ fsetc.s2 0x7f, 0x40
+ nop.i 0
}
-{ .mfb
- nop.m 999
-(p10) fnorm.s.s0 f8 = NEARBYINT_FLOAT_INT_f8
- br.ret.sptk b0
;;
-}
-
-NEARBYINT_NOT_ROUND_NEAREST:
-// Set rounding mode of s2 to that of s0
{ .mfi
- mov nearbyint_GR_rcs0 = r0 // Clear so we don't come back here
- fsetc.s2 0x7f, 0x40
- nop.i 999
-;;
+ nop.m 0
+ fcvt.fx.s2 fXInt = fNormX // Convert to int in significand
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
- fcvt.fx.s2 NEARBYINT_INT_f8 = f8
- nop.i 999
-;;
+ nop.m 0
+ fcvt.xf f8 = fXInt // Expected result
+ nop.i 0
}
+;;
+// Be sure sign of result = sign of input. Fixes cases where result is 0.
{ .mfb
- nop.m 999
- fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8
- br.cond.sptk NEARBYINT_COMMON
-;;
+ nop.m 0
+ fmerge.s f8 = fNormX, f8
+ br.ret.sptk b0 // Exit main path
}
-
+;;
GLOBAL_LIBM_END(nearbyintf)
diff --git a/sysdeps/ia64/fpu/s_nearbyintl.S b/sysdeps/ia64/fpu/s_nearbyintl.S
index 9c4c2e4..ee6159c 100644
--- a/sysdeps/ia64/fpu/s_nearbyintl.S
+++ b/sysdeps/ia64/fpu/s_nearbyintl.S
@@ -21,20 +21,20 @@
// products derived from this software without specific prior written
// permission.
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
+// problem reports or change requests be submitted to it directly at
// http://www.intel.com/software/products/opensource/libraries/num.htm.
//
// History
@@ -43,49 +43,44 @@
// 02/08/01 Corrected behavior for all rounding modes.
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 07/25/03 Improved performance
//==============================================================
-//
+
// API
//==============================================================
// long double nearbyintl(long double x)
-//
-// general registers used:
-//
+//==============================================================
-nearbyint_GR_signexp = r14
-nearbyint_GR_exponent = r15
-nearbyint_GR_17ones = r16
-nearbyint_GR_10033 = r17
-nearbyint_GR_fpsr = r18
-nearbyint_GR_rcs0 = r19
-nearbyint_GR_rcs0_mask = r20
+// general input registers:
+// r14 - r21
+rSignexp = r14
+rExp = r15
+rExpMask = r16
+rBigexp = r17
+rFpsr = r19
+rRcs0 = r20
+rRcs0Mask = r21
-// predicate registers used:
-// p6-11
+// floating-point registers:
+// f8 - f10
-// floating-point registers used:
+fXInt = f9
+fNormX = f10
-NEARBYINT_NORM_f8 = f9
-NEARBYINT_FLOAT_INT_f8 = f10
-NEARBYINT_INT_f8 = f11
-NEARBYINT_SIGNED_FLOAT_INT_f8 = f12
+// predicate registers used:
+// p6 - p10
// Overview of operation
//==============================================================
-
// long double nearbyintl(long double x)
-// Return an integer value (represented as a long double) that is
-// x rounded to integer in current rounding mode
-// *******************************************************************************
-
-// Set denormal flag for denormal input and
-// and take denormal fault if necessary.
-
-// Is the input an integer value already?
+// Return an integer value (represented as a long double) that is x
+// rounded to integer in current rounding mode
+// Inexact is not set, otherwise result identical with rint.
+//==============================================================
// double_extended
-// if the exponent is >= 1003e => 3F(true) = 63(decimal)
+// if the exponent is > 1003e => 3F(true) = 63(decimal)
// we have a significand of 64 bits 1.63-bits.
// If we multiply by 2^63, we no longer have a fractional part
// So input is an integer value already.
@@ -98,124 +93,125 @@ NEARBYINT_SIGNED_FLOAT_INT_f8 = f12
// So input is an integer value already.
// single
-// if the exponent is >= 10016 => 17(true) = 23(decimal)
-// we have a significand of 53 bits 1.52-bits. (implicit 1)
-// If we multiply by 2^52, we no longer have a fractional part
+// if the exponent is > 10016 => 17(true) = 23(decimal)
+// we have a significand of 24 bits 1.23-bits. (implicit 1)
+// If we multiply by 2^23, we no longer have a fractional part
// So input is an integer value already.
-// If x is NAN, ZERO, or INFINITY, then return
-
-// qnan snan inf norm unorm 0 -+
-// 1 1 1 0 0 1 11 0xe7
-
-
.section .text
GLOBAL_LIBM_ENTRY(nearbyintl)
{ .mfi
- mov nearbyint_GR_fpsr = ar40 // Read the fpsr--need to check rc.s0
- fcvt.fx.s1 NEARBYINT_INT_f8 = f8
- addl nearbyint_GR_10033 = 0x1003e, r0
+ getf.exp rSignexp = f8 // Get signexp, recompute if unorm
+ fclass.m p7,p0 = f8, 0x0b // Test x unorm
+ addl rBigexp = 0x1003e, r0 // Set exponent at which is integer
}
{ .mfi
- nop.m 999
- fnorm.s1 NEARBYINT_NORM_f8 = f8
- mov nearbyint_GR_17ones = 0x1FFFF
-;;
+ nop.m 0
+ fcvt.fx.s1 fXInt = f8 // Convert to int in significand
+ mov rExpMask = 0x1FFFF // Form exponent mask
}
+;;
{ .mfi
- nop.m 999
- fclass.m.unc p6,p0 = f8, 0xe7
- mov nearbyint_GR_rcs0_mask = 0x0c00
-;;
+ mov rFpsr = ar40 // Read fpsr -- check rc.s0
+ fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf
+ nop.i 0
}
-
{ .mfb
- nop.m 999
-(p6) fnorm.s0 f8 = f8
-(p6) br.ret.spnt b0 // Exit if x nan, inf, zero
+ nop.m 0
+ fnorm.s1 fNormX = f8 // Normalize input
+(p7) br.cond.spnt RINT_UNORM // Branch if x unorm
+}
;;
+
+
+RINT_COMMON:
+// Return here from RINT_UNORM
+{ .mfb
+ and rExp = rSignexp, rExpMask // Get biased exponent
+(p6) fma.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf
+(p6) br.ret.spnt b0 // Exit if x natval, nan, inf
}
+;;
{ .mfi
- nop.m 999
- fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8
- nop.i 999
-;;
+ mov rRcs0Mask = 0x0c00 // Mask for rc.s0
+ fcvt.xf f8 = fXInt // Result assume |x| < 2^63
+ cmp.ge p7,p8 = rExp, rBigexp // Is |x| >= 2^63?
}
+;;
+// We must correct result if |x| >= 2^63
{ .mfi
- getf.exp nearbyint_GR_signexp = NEARBYINT_NORM_f8
- fcmp.eq.s0 p8,p0 = f8,f0 // Dummy op to set denormal
- nop.i 999
-;;
+ nop.m 0
+(p7) fma.s0 f8 = fNormX, f1, f0 // If |x| >= 2^63, result x
+ nop.i 0
}
-
-
-{ .mii
- nop.m 999
- nop.i 999
- and nearbyint_GR_exponent = nearbyint_GR_signexp, nearbyint_GR_17ones
;;
-}
-{ .mmi
- cmp.ge.unc p7,p6 = nearbyint_GR_exponent, nearbyint_GR_10033
- and nearbyint_GR_rcs0 = nearbyint_GR_rcs0_mask, nearbyint_GR_fpsr
- nop.i 999
-;;
+{ .mfi
+ nop.m 0
+(p8) fmerge.s f8 = fNormX, f8 // Make sign nearbyintl(x)= sign x
+ nop.i 0
}
-
-// Check to see if s0 rounding mode is round to nearest. If not then set s2
-// rounding mode to that of s0 and repeat conversions.
-// Must merge the original sign for cases where the result is zero or the input
-// is the largest that still has a fraction (0x1007dfffffffffff)
-NEARBYINT_COMMON:
-{ .mfb
- cmp.ne p11,p0 = nearbyint_GR_rcs0, r0
-(p6) fmerge.s NEARBYINT_SIGNED_FLOAT_INT_f8 = f8, NEARBYINT_FLOAT_INT_f8
-(p11) br.cond.spnt NEARBYINT_NOT_ROUND_NEAREST // Branch if not round to nearest
;;
-}
{ .mfi
- nop.m 999
-(p7) fnorm.s0 f8 = f8
- nop.i 999
+(p8) and rRcs0 = rFpsr, rRcs0Mask // Get rounding mode for sf0
+ nop.f 0
+ nop.i 0
+}
;;
+
+// If |x| < 2^63 we must test for other rounding modes
+{ .mbb
+(p8) cmp.ne.unc p10,p0 = rRcs0, r0 // Test for other rounding modes
+(p10) br.cond.spnt RINT_NOT_ROUND_NEAREST // Branch if not round nearest
+ br.ret.sptk b0 // Exit main path if round nearest
}
+;;
+
+RINT_UNORM:
+// Here if x unorm
{ .mfb
- nop.m 999
-(p6) fnorm.s0 f8 = NEARBYINT_SIGNED_FLOAT_INT_f8
- br.ret.sptk b0
-;;
+ getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ br.cond.sptk RINT_COMMON // Return to main path
}
+;;
-
-NEARBYINT_NOT_ROUND_NEAREST:
-// Set rounding mode of s2 to that of s0
+RINT_NOT_ROUND_NEAREST:
+// Here if not round to nearest, and |x| < 2^63
+// Set rounding mode of s2 to that of s0, and repeat the conversion using s2
{ .mfi
- mov nearbyint_GR_rcs0 = r0 // Clear so we don't come back here
- fsetc.s2 0x7f, 0x40
- nop.i 999
-;;
+ nop.m 0
+ fsetc.s2 0x7f, 0x40
+ nop.i 0
}
+;;
{ .mfi
- nop.m 999
- fcvt.fx.s2 NEARBYINT_INT_f8 = f8
- nop.i 999
+ nop.m 0
+ fcvt.fx.s2 fXInt = fNormX // Convert to int in significand
+ nop.i 0
+}
;;
+
+{ .mfi
+ nop.m 0
+ fcvt.xf f8 = fXInt // Expected result
+ nop.i 0
}
+;;
+// Be sure sign of result = sign of input. Fixes cases where result is 0.
{ .mfb
- nop.m 999
- fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8
- br.cond.sptk NEARBYINT_COMMON
-;;
+ nop.m 0
+ fmerge.s f8 = fNormX, f8
+ br.ret.sptk b0 // Exit main path
}
-
+;;
GLOBAL_LIBM_END(nearbyintl)
diff --git a/sysdeps/ia64/fpu/s_nextafter.S b/sysdeps/ia64/fpu/s_nextafter.S
index 8c77aa4..6635a31 100644
--- a/sysdeps/ia64/fpu/s_nextafter.S
+++ b/sysdeps/ia64/fpu/s_nextafter.S
@@ -1,7 +1,7 @@
.file "nextafter.s"
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@@ -51,6 +51,7 @@
// fixed flag settings for several cases
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 12/14/04 Added error handling on underflow.
//
// API
//==============================================================
@@ -60,21 +61,21 @@
//
// Registers used
//==============================================================
-nextafter_GR_max_pexp = r14
-nextafter_GR_min_pexp = r15
-nextafter_GR_exp = r16
-nextafter_GR_sig = r17
-nextafter_GR_lnorm_sig = r18
-nextafter_GR_sign_mask = r19
-nextafter_GR_exp_mask = r20
-nextafter_GR_sden_sig = r21
-nextafter_GR_new_sig = r22
-nextafter_GR_new_exp = r23
-nextafter_GR_lden_sig = r24
-nextafter_GR_snorm_sig = r25
-nextafter_GR_exp1 = r26
-nextafter_GR_x_exp = r27
-nextafter_GR_min_den_rexp = r28
+GR_max_pexp = r14
+GR_min_pexp = r15
+GR_exp = r16
+GR_sig = r17
+GR_lnorm_sig = r18
+GR_sign_mask = r19
+GR_exp_mask = r20
+GR_sden_sig = r21
+GR_new_sig = r22
+GR_new_exp = r23
+GR_lden_sig = r24
+GR_snorm_sig = r25
+GR_exp1 = r26
+GR_x_exp = r27
+GR_min_den_rexp = r28
// r36-39 parameters for libm_error_support
GR_SAVE_B0 = r34
@@ -84,20 +85,21 @@ GR_SAVE_PFS = r32
GR_Parameter_X = r36
GR_Parameter_Y = r37
GR_Parameter_RESULT = r38
-
-NEXTAFTER_lnorm_sig = f10
-NEXTAFTER_lnorm_exp = f11
-NEXTAFTER_lnorm = f12
-NEXTAFTER_sden_sig = f13
-NEXTAFTER_sden_exp = f14
-NEXTAFTER_sden = f15
-NEXTAFTER_save_f8 = f33
-NEXTAFTER_new_exp = f34
-NEXTAFTER_new_sig = f35
-NEXTAFTER_lden_sig = f36
-NEXTAFTER_snorm_sig = f37
-NEXTAFTER_exp1 = f38
-NEXTAFTER_tmp = f39
+GR_Parameter_TAG = r39
+
+FR_lnorm_sig = f10
+FR_lnorm_exp = f11
+FR_lnorm = f12
+FR_sden_sig = f13
+FR_sden_exp = f14
+FR_sden = f15
+FR_save_f8 = f33
+FR_new_exp = f34
+FR_new_sig = f35
+FR_lden_sig = f36
+FR_snorm_sig = f37
+FR_exp1 = f38
+FR_tmp = f39
//
// Overview of operation
@@ -113,30 +115,30 @@ GLOBAL_LIBM_ENTRY(nextafter)
// Is x < y ? p10 if yes, p11 if no
// Form smallest denormal significand = ulp size
{ .mfi
- getf.exp nextafter_GR_exp = f8
+ getf.exp GR_exp = f8
fcmp.lt.s1 p10,p11 = f8, f9
- addl nextafter_GR_sden_sig = 0x800, r0
+ addl GR_sden_sig = 0x800, r0
}
// Form largest normal significand 0xfffffffffffff800
// Form smallest normal exponent
{ .mfi
- addl nextafter_GR_lnorm_sig = -0x800,r0
+ addl GR_lnorm_sig = -0x800,r0
nop.f 999
- addl nextafter_GR_min_pexp = 0x0fc01, r0 ;;
+ addl GR_min_pexp = 0x0fc01, r0 ;;
}
// Extract significand from x
// Is x=y?
// Form largest normal exponent
{ .mfi
- getf.sig nextafter_GR_sig = f8
+ getf.sig GR_sig = f8
fcmp.eq.s0 p6,p0 = f8, f9
- addl nextafter_GR_max_pexp = 0x103fe, r0
+ addl GR_max_pexp = 0x103fe, r0
}
// Move largest normal significand to fp reg for special cases
{ .mfi
- setf.sig NEXTAFTER_lnorm_sig = nextafter_GR_lnorm_sig
+ setf.sig FR_lnorm_sig = GR_lnorm_sig
nop.f 999
- addl nextafter_GR_sign_mask = 0x20000, r0 ;;
+ addl GR_sign_mask = 0x20000, r0 ;;
}
// Move smallest denormal significand and signexp to fp regs
@@ -145,13 +147,13 @@ GLOBAL_LIBM_ENTRY(nextafter)
// It increases (p12 set) if x<y and x>=0 or if x>y and x<0
// It decreases (p13 set) if x<y and x<0 or if x>y and x>=0
{ .mfi
- setf.sig NEXTAFTER_sden_sig = nextafter_GR_sden_sig
+ setf.sig FR_sden_sig = GR_sden_sig
fclass.m p8,p0 = f8, 0xc3
-(p10) cmp.lt p12,p13 = nextafter_GR_exp, nextafter_GR_sign_mask
+(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask
}
{ .mfi
- setf.exp NEXTAFTER_sden_exp = nextafter_GR_min_pexp
-(p11) cmp.ge p12,p13 = nextafter_GR_exp, nextafter_GR_sign_mask ;;
+ setf.exp FR_sden_exp = GR_min_pexp
+(p11) cmp.ge p12,p13 = GR_exp, GR_sign_mask ;;
}
.pred.rel "mutex",p12,p13
@@ -160,33 +162,33 @@ GLOBAL_LIBM_ENTRY(nextafter)
// If x=y set result to y
// Form smallest normal significand and largest denormal significand
{ .mfi
-(p12) add nextafter_GR_new_sig = nextafter_GR_sig, nextafter_GR_sden_sig
+(p12) add GR_new_sig = GR_sig, GR_sden_sig
(p6) fmerge.s f8=f9,f9
- dep.z nextafter_GR_snorm_sig = 1,63,1 // 0x8000000000000000
+ dep.z GR_snorm_sig = 1,63,1 // 0x8000000000000000
}
{ .mlx
-(p13) sub nextafter_GR_new_sig = nextafter_GR_sig, nextafter_GR_sden_sig
- movl nextafter_GR_lden_sig = 0x7ffffffffffff800 ;;
+(p13) sub GR_new_sig = GR_sig, GR_sden_sig
+ movl GR_lden_sig = 0x7ffffffffffff800 ;;
}
// Move expected result significand and signexp to fp regs
// Is y=nan?
// Form new exponent in case result exponent needs incrementing or decrementing
{ .mfi
- setf.exp NEXTAFTER_new_exp = nextafter_GR_exp
+ setf.exp FR_new_exp = GR_exp
fclass.m p9,p0 = f9, 0xc3
-(p12) add nextafter_GR_exp1 = 1, nextafter_GR_exp
+(p12) add GR_exp1 = 1, GR_exp
}
{ .mib
- setf.sig NEXTAFTER_new_sig = nextafter_GR_new_sig
-(p13) add nextafter_GR_exp1 = -1, nextafter_GR_exp
+ setf.sig FR_new_sig = GR_new_sig
+(p13) add GR_exp1 = -1, GR_exp
(p6) br.ret.spnt b0 ;; // Exit if x=y
}
// Move largest normal signexp to fp reg for special cases
// Is x=zero?
{ .mfi
- setf.exp NEXTAFTER_lnorm_exp = nextafter_GR_max_pexp
+ setf.exp FR_lnorm_exp = GR_max_pexp
fclass.m p7,p0 = f8, 0x7
nop.i 999
}
@@ -199,12 +201,12 @@ GLOBAL_LIBM_ENTRY(nextafter)
// Move exp+-1 and smallest normal significand to fp regs for special cases
// Is x=inf?
{ .mfi
- setf.exp NEXTAFTER_exp1 = nextafter_GR_exp1
+ setf.exp FR_exp1 = GR_exp1
fclass.m p6,p0 = f8, 0x23
- addl nextafter_GR_exp_mask = 0x1ffff, r0
+ addl GR_exp_mask = 0x1ffff, r0
}
{ .mfb
- setf.sig NEXTAFTER_snorm_sig = nextafter_GR_snorm_sig
+ setf.sig FR_snorm_sig = GR_snorm_sig
(p9) fma.s0 f8 = f8,f1,f9
(p9) br.ret.spnt b0 ;; // Exit if y=nan
}
@@ -212,16 +214,16 @@ GLOBAL_LIBM_ENTRY(nextafter)
// Move largest denormal significand to fp regs for special cases
// Save x
{ .mfb
- setf.sig NEXTAFTER_lden_sig = nextafter_GR_lden_sig
- mov NEXTAFTER_save_f8 = f8
-(p7) br.cond.spnt NEXTAFTER_ZERO ;; // Exit if x=0
+ setf.sig FR_lden_sig = GR_lden_sig
+ mov FR_save_f8 = f8
+(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0
}
// Mask off the sign to get x_exp
{ .mfb
- and nextafter_GR_x_exp = nextafter_GR_exp_mask, nextafter_GR_exp
+ and GR_x_exp = GR_exp_mask, GR_exp
nop.f 999
-(p6) br.cond.spnt NEXTAFTER_INF ;; // Exit if x=inf
+(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf
}
// Check 6 special cases when significand rolls over:
@@ -240,35 +242,35 @@ GLOBAL_LIBM_ENTRY(nextafter)
//
// Form exponent of smallest double denormal (if normalized register format)
{ .mmi
- adds nextafter_GR_min_den_rexp = -52, nextafter_GR_min_pexp
-(p12) cmp.eq.unc p6,p0 = nextafter_GR_new_sig, r0
-(p13) cmp.eq.unc p8,p10 = nextafter_GR_new_sig, nextafter_GR_lden_sig ;;
+ adds GR_min_den_rexp = -52, GR_min_pexp
+(p12) cmp.eq.unc p6,p0 = GR_new_sig, r0
+(p13) cmp.eq.unc p8,p10 = GR_new_sig, GR_lden_sig ;;
}
{ .mmi
-(p6) cmp.lt.unc p6,p7 = nextafter_GR_x_exp, nextafter_GR_max_pexp
-(p8) cmp.gt.unc p8,p9 = nextafter_GR_x_exp, nextafter_GR_min_pexp
-(p10) cmp.eq.unc p10,p0 = nextafter_GR_new_sig, r0 ;;
+(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp
+(p8) cmp.gt.unc p8,p9 = GR_x_exp, GR_min_pexp
+(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0 ;;
}
// Create small normal in case need to generate underflow flag
{ .mfi
-(p10) cmp.le.unc p10,p0 = nextafter_GR_x_exp, nextafter_GR_min_pexp
- fmerge.se NEXTAFTER_tmp = NEXTAFTER_sden_exp, NEXTAFTER_lnorm_sig
-(p9) cmp.gt.unc p9,p14 = nextafter_GR_x_exp, nextafter_GR_min_den_rexp
+(p10) cmp.le.unc p10,p0 = GR_x_exp, GR_min_pexp
+ fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig
+(p9) cmp.gt.unc p9,p14 = GR_x_exp, GR_min_den_rexp
}
// Branch if cases 1, 2, 3
{ .bbb
-(p6) br.cond.spnt NEXTAFTER_EXPUP
-(p7) br.cond.spnt NEXTAFTER_OVERFLOW
-(p8) br.cond.spnt NEXTAFTER_EXPDOWN ;;
+(p6) br.cond.spnt NEXT_EXPUP
+(p7) br.cond.spnt NEXT_OVERFLOW
+(p8) br.cond.spnt NEXT_EXPDOWN ;;
}
// Branch if cases 4, 5, 6
{ .bbb
-(p9) br.cond.spnt NEXTAFTER_NORM_TO_DENORM
-(p10) br.cond.spnt NEXTAFTER_UNDERFLOW_TO_ZERO
-(p14) br.cond.spnt NEXTAFTER_UNDERFLOW_TO_ZERO ;;
+(p9) br.cond.spnt NEXT_NORM_TO_DENORM
+(p10) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO
+(p14) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO ;;
}
// Here if no special cases
@@ -276,68 +278,72 @@ GLOBAL_LIBM_ENTRY(nextafter)
// Case 1: x_exp=min_exp, x_sig=unnormalized
// Case 2: x_exp<min_exp
{ .mfi
- cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp
- fmerge.se f8 = NEXTAFTER_new_exp, NEXTAFTER_new_sig
+ cmp.lt p6,p7 = GR_x_exp, GR_min_pexp
+ fmerge.se f8 = FR_new_exp, FR_new_sig
nop.i 999 ;;
}
{ .mfi
nop.m 999
nop.f 999
-(p7) tbit.z p6,p0 = nextafter_GR_new_sig, 63 ;;
+(p7) tbit.z p6,p0 = GR_new_sig, 63 ;;
}
-NEXTAFTER_COMMON_FINISH:
+NEXT_COMMON_FINISH:
// Force underflow and inexact if denormal result
{ .mfi
nop.m 999
-(p6) fma.d.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0
- nop.i 999 ;;
+(p6) fma.d.s0 FR_tmp = FR_tmp,FR_tmp,f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fnorm.d.s0 f8 = f8 // Final normalization to result precision
+(p6) br.cond.spnt NEXT_UNDERFLOW ;;
}
-// Final normalization to result precision and exit
{ .mfb
nop.m 999
- fnorm.d.s0 f8 = f8
+ nop.f 999
br.ret.sptk b0;;
}
//Special cases
-NEXTAFTER_EXPUP:
+NEXT_EXPUP:
{ .mfb
- cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp
- fmerge.se f8 = NEXTAFTER_exp1, NEXTAFTER_snorm_sig
- br.cond.sptk NEXTAFTER_COMMON_FINISH ;;
+ cmp.lt p6,p7 = GR_x_exp, GR_min_pexp
+ fmerge.se f8 = FR_exp1, FR_snorm_sig
+ br.cond.sptk NEXT_COMMON_FINISH ;;
}
-NEXTAFTER_EXPDOWN:
+NEXT_EXPDOWN:
{ .mfb
- cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp
- fmerge.se f8 = NEXTAFTER_exp1, NEXTAFTER_lnorm_sig
- br.cond.sptk NEXTAFTER_COMMON_FINISH ;;
+ cmp.lt p6,p7 = GR_x_exp, GR_min_pexp
+ fmerge.se f8 = FR_exp1, FR_lnorm_sig
+ br.cond.sptk NEXT_COMMON_FINISH ;;
}
-NEXTAFTER_NORM_TO_DENORM:
+NEXT_NORM_TO_DENORM:
{ .mfi
nop.m 999
- fmerge.se f8 = NEXTAFTER_new_exp, NEXTAFTER_lden_sig
+ fmerge.se f8 = FR_new_exp, FR_lden_sig
nop.i 999
}
// Force underflow and inexact if denormal result
{ .mfb
nop.m 999
- fma.d.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0
- br.ret.sptk b0 ;;
+ fma.d.s0 FR_tmp = FR_tmp,FR_tmp,f0
+ br.cond.sptk NEXT_UNDERFLOW ;;
}
-NEXTAFTER_UNDERFLOW_TO_ZERO:
+NEXT_UNDERFLOW_TO_ZERO:
{ .mfb
cmp.eq p6,p0 = r0,r0
- fmerge.s f8 = NEXTAFTER_save_f8,f0
- br.cond.sptk NEXTAFTER_COMMON_FINISH ;;
+ fmerge.s f8 = FR_save_f8,f0
+ br.cond.sptk NEXT_COMMON_FINISH ;;
}
-NEXTAFTER_INF:
+NEXT_INF:
// Here if f8 is +- infinity
// INF
// if f8 is +inf, no matter what y is return largest double
@@ -345,17 +351,17 @@ NEXTAFTER_INF:
{ .mfi
nop.m 999
- fmerge.se NEXTAFTER_lnorm = NEXTAFTER_lnorm_exp,NEXTAFTER_lnorm_sig
+ fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig
nop.i 999 ;;
}
{ .mfb
nop.m 999
- fmerge.s f8 = f8,NEXTAFTER_lnorm
+ fmerge.s f8 = f8,FR_lnorm
br.ret.sptk b0 ;;
}
-NEXTAFTER_ZERO:
+NEXT_ZERO:
// Here if f8 is +- zero
// ZERO
@@ -364,75 +370,72 @@ NEXTAFTER_ZERO:
{ .mfi
nop.m 999
- fmerge.se NEXTAFTER_sden = NEXTAFTER_sden_exp,NEXTAFTER_sden_sig
+ fmerge.se FR_sden = FR_sden_exp,FR_sden_sig
nop.i 999 ;;
}
// Create small normal to generate underflow flag
{ .mfi
nop.m 999
- fmerge.se NEXTAFTER_tmp = NEXTAFTER_sden_exp, NEXTAFTER_lnorm_sig
+ fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig
nop.i 999 ;;
}
// Add correct sign from direction arg
{ .mfi
nop.m 999
- fmerge.s f8 = f9,NEXTAFTER_sden
+ fmerge.s f8 = f9,FR_sden
nop.i 999 ;;
}
+// Force underflow and inexact flags
{ .mfb
nop.m 999
- fma.d.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0
- br.ret.sptk b0 ;;
+ fma.d.s0 FR_tmp = FR_tmp,FR_tmp,f0
+ br.cond.sptk NEXT_UNDERFLOW ;;
}
-GLOBAL_LIBM_END(nextafter)
-// Stack operations when calling error support.
-// (1) (2) (3) (call) (4)
-// sp -> + psp -> + psp -> + sp -> +
-// | | | |
-// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
-// | | | |
-// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
-// | | | |
-// | | <- GR_X X1 ->| |
-// | | | |
-// sp-64 -> + sp -> + sp -> + +
-// save ar.pfs save b0 restore gp
-// save gp restore ar.pfs
-
-
+NEXT_UNDERFLOW:
+// Here if result is a denorm, or input is finite and result is zero
+// Call error support to report possible range error
+{ .mib
+ alloc r32=ar.pfs,2,2,4,0
+ mov GR_Parameter_TAG = 268 // Error code
+ br.cond.sptk __libm_error_region // Branch to error call
+}
+;;
-LOCAL_LIBM_ENTRY(__libm_error_region)
-NEXTAFTER_OVERFLOW:
-// Here if f8 is finite, but result will be infinite
+NEXT_OVERFLOW:
+// Here if input is finite, but result will be infinite
// Use frcpa to generate infinity of correct sign
// Call error support to report possible range error
-.prologue
-
{ .mfi
alloc r32=ar.pfs,2,2,4,0
- frcpa.s1 f8,p6 = NEXTAFTER_save_f8, f0
+ frcpa.s1 f8,p6 = FR_save_f8, f0
nop.i 999 ;;
}
// Create largest double
{ .mfi
nop.m 999
- fmerge.se NEXTAFTER_lnorm = NEXTAFTER_lnorm_exp,NEXTAFTER_lnorm_sig
+ fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig
nop.i 999 ;;
}
// Force overflow and inexact flags to be set
-{ .mfi
- mov r39 = 154 // Error code
- fma.d.s0 NEXTAFTER_tmp = NEXTAFTER_lnorm,NEXTAFTER_lnorm,f0
- nop.i 999
+{ .mfb
+ mov GR_Parameter_TAG = 154 // Error code
+ fma.d.s0 FR_tmp = FR_lnorm,FR_lnorm,f0
+ br.cond.sptk __libm_error_region // Branch to error call
}
;;
+GLOBAL_LIBM_END(nextafter)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+
// (1)
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@@ -459,7 +462,7 @@ NEXTAFTER_OVERFLOW:
.body
// (3)
{ .mib
- stfd [GR_Parameter_X] = NEXTAFTER_save_f8 // STORE Parameter 1 on stack
+ stfd [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
diff --git a/sysdeps/ia64/fpu/s_nextafterf.S b/sysdeps/ia64/fpu/s_nextafterf.S
index 6d2a927..0c269ec 100644
--- a/sysdeps/ia64/fpu/s_nextafterf.S
+++ b/sysdeps/ia64/fpu/s_nextafterf.S
@@ -1,7 +1,7 @@
.file "nextafterf.s"
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@@ -51,6 +51,7 @@
// fixed flag settings for several cases
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 12/14/04 Added error handling on underflow.
//
// API
//==============================================================
@@ -60,21 +61,21 @@
//
// Registers used
//==============================================================
-nextafter_GR_max_pexp = r14
-nextafter_GR_min_pexp = r15
-nextafter_GR_exp = r16
-nextafter_GR_sig = r17
-nextafter_GR_lnorm_sig = r18
-nextafter_GR_sign_mask = r19
-nextafter_GR_exp_mask = r20
-nextafter_GR_sden_sig = r21
-nextafter_GR_new_sig = r22
-nextafter_GR_new_exp = r23
-nextafter_GR_lden_sig = r24
-nextafter_GR_snorm_sig = r25
-nextafter_GR_exp1 = r26
-nextafter_GR_x_exp = r27
-nextafter_GR_min_den_rexp = r28
+GR_max_pexp = r14
+GR_min_pexp = r15
+GR_exp = r16
+GR_sig = r17
+GR_lnorm_sig = r18
+GR_sign_mask = r19
+GR_exp_mask = r20
+GR_sden_sig = r21
+GR_new_sig = r22
+GR_new_exp = r23
+GR_lden_sig = r24
+GR_snorm_sig = r25
+GR_exp1 = r26
+GR_x_exp = r27
+GR_min_den_rexp = r28
// r36-39 parameters for libm_error_support
GR_SAVE_B0 = r34
@@ -84,20 +85,21 @@ GR_SAVE_PFS = r32
GR_Parameter_X = r36
GR_Parameter_Y = r37
GR_Parameter_RESULT = r38
-
-NEXTAFTER_lnorm_sig = f10
-NEXTAFTER_lnorm_exp = f11
-NEXTAFTER_lnorm = f12
-NEXTAFTER_sden_sig = f13
-NEXTAFTER_sden_exp = f14
-NEXTAFTER_sden = f15
-NEXTAFTER_save_f8 = f33
-NEXTAFTER_new_exp = f34
-NEXTAFTER_new_sig = f35
-NEXTAFTER_lden_sig = f36
-NEXTAFTER_snorm_sig = f37
-NEXTAFTER_exp1 = f38
-NEXTAFTER_tmp = f39
+GR_Parameter_TAG = r39
+
+FR_lnorm_sig = f10
+FR_lnorm_exp = f11
+FR_lnorm = f12
+FR_sden_sig = f13
+FR_sden_exp = f14
+FR_sden = f15
+FR_save_f8 = f33
+FR_new_exp = f34
+FR_new_sig = f35
+FR_lden_sig = f36
+FR_snorm_sig = f37
+FR_exp1 = f38
+FR_tmp = f39
//
// Overview of operation
@@ -112,21 +114,21 @@ GLOBAL_LIBM_ENTRY(nextafterf)
// Extract signexp from x
// Form smallest denormal significand = ulp size
{ .mlx
- getf.exp nextafter_GR_exp = f8
- movl nextafter_GR_sden_sig = 0x0000010000000000
+ getf.exp GR_exp = f8
+ movl GR_sden_sig = 0x0000010000000000
}
// Form largest normal exponent
// Is x < y ? p10 if yes, p11 if no
// Form smallest normal exponent
{ .mfi
- addl nextafter_GR_max_pexp = 0x1007e, r0
+ addl GR_max_pexp = 0x1007e, r0
fcmp.lt.s1 p10,p11 = f8, f9
- addl nextafter_GR_min_pexp = 0x0ff81, r0 ;;
+ addl GR_min_pexp = 0x0ff81, r0 ;;
}
// Is x=y?
{ .mfi
- getf.sig nextafter_GR_sig = f8
+ getf.sig GR_sig = f8
fcmp.eq.s0 p6,p0 = f8, f9
nop.i 0
}
@@ -134,14 +136,14 @@ GLOBAL_LIBM_ENTRY(nextafterf)
// Form largest normal significand
{ .mlx
nop.m 0
- movl nextafter_GR_lnorm_sig = 0xffffff0000000000 ;;
+ movl GR_lnorm_sig = 0xffffff0000000000 ;;
}
// Move largest normal significand to fp reg for special cases
{ .mfi
- setf.sig NEXTAFTER_lnorm_sig = nextafter_GR_lnorm_sig
+ setf.sig FR_lnorm_sig = GR_lnorm_sig
nop.f 0
- addl nextafter_GR_sign_mask = 0x20000, r0 ;;
+ addl GR_sign_mask = 0x20000, r0 ;;
}
// Move smallest denormal significand and signexp to fp regs
@@ -150,14 +152,14 @@ GLOBAL_LIBM_ENTRY(nextafterf)
// It increases (p12 set) if x<y and x>=0 or if x>y and x<0
// It decreases (p13 set) if x<y and x<0 or if x>y and x>=0
{ .mfi
- setf.sig NEXTAFTER_sden_sig = nextafter_GR_sden_sig
+ setf.sig FR_sden_sig = GR_sden_sig
fclass.m p8,p0 = f8, 0xc3
-(p10) cmp.lt p12,p13 = nextafter_GR_exp, nextafter_GR_sign_mask
+(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask
}
{ .mfi
- setf.exp NEXTAFTER_sden_exp = nextafter_GR_min_pexp
+ setf.exp FR_sden_exp = GR_min_pexp
nop.f 999
-(p11) cmp.ge p12,p13 = nextafter_GR_exp, nextafter_GR_sign_mask ;;
+(p11) cmp.ge p12,p13 = GR_exp, GR_sign_mask ;;
}
.pred.rel "mutex",p12,p13
@@ -166,33 +168,33 @@ GLOBAL_LIBM_ENTRY(nextafterf)
// If x=y set result to y
// Form smallest normal significand and largest denormal significand
{ .mfi
-(p12) add nextafter_GR_new_sig = nextafter_GR_sig, nextafter_GR_sden_sig
+(p12) add GR_new_sig = GR_sig, GR_sden_sig
(p6) fmerge.s f8=f9,f9
- dep.z nextafter_GR_snorm_sig = 1,63,1 // 0x8000000000000000
+ dep.z GR_snorm_sig = 1,63,1 // 0x8000000000000000
}
{ .mlx
-(p13) sub nextafter_GR_new_sig = nextafter_GR_sig, nextafter_GR_sden_sig
- movl nextafter_GR_lden_sig = 0x7fffff0000000000 ;;
+(p13) sub GR_new_sig = GR_sig, GR_sden_sig
+ movl GR_lden_sig = 0x7fffff0000000000 ;;
}
// Move expected result significand and signexp to fp regs
// Is y=nan?
// Form new exponent in case result exponent needs incrementing or decrementing
{ .mfi
- setf.exp NEXTAFTER_new_exp = nextafter_GR_exp
+ setf.exp FR_new_exp = GR_exp
fclass.m p9,p0 = f9, 0xc3
-(p12) add nextafter_GR_exp1 = 1, nextafter_GR_exp
+(p12) add GR_exp1 = 1, GR_exp
}
{ .mib
- setf.sig NEXTAFTER_new_sig = nextafter_GR_new_sig
-(p13) add nextafter_GR_exp1 = -1, nextafter_GR_exp
+ setf.sig FR_new_sig = GR_new_sig
+(p13) add GR_exp1 = -1, GR_exp
(p6) br.ret.spnt b0 ;; // Exit if x=y
}
// Move largest normal signexp to fp reg for special cases
// Is x=zero?
{ .mfi
- setf.exp NEXTAFTER_lnorm_exp = nextafter_GR_max_pexp
+ setf.exp FR_lnorm_exp = GR_max_pexp
fclass.m p7,p0 = f8, 0x7
nop.i 999
}
@@ -205,12 +207,12 @@ GLOBAL_LIBM_ENTRY(nextafterf)
// Move exp+-1 and smallest normal significand to fp regs for special cases
// Is x=inf?
{ .mfi
- setf.exp NEXTAFTER_exp1 = nextafter_GR_exp1
+ setf.exp FR_exp1 = GR_exp1
fclass.m p6,p0 = f8, 0x23
- addl nextafter_GR_exp_mask = 0x1ffff, r0
+ addl GR_exp_mask = 0x1ffff, r0
}
{ .mfb
- setf.sig NEXTAFTER_snorm_sig = nextafter_GR_snorm_sig
+ setf.sig FR_snorm_sig = GR_snorm_sig
(p9) fma.s0 f8 = f8,f1,f9
(p9) br.ret.spnt b0 ;; // Exit if y=nan
}
@@ -218,16 +220,16 @@ GLOBAL_LIBM_ENTRY(nextafterf)
// Move largest denormal significand to fp regs for special cases
// Save x
{ .mfb
- setf.sig NEXTAFTER_lden_sig = nextafter_GR_lden_sig
- mov NEXTAFTER_save_f8 = f8
-(p7) br.cond.spnt NEXTAFTER_ZERO ;; // Exit if x=0
+ setf.sig FR_lden_sig = GR_lden_sig
+ mov FR_save_f8 = f8
+(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0
}
// Mask off the sign to get x_exp
{ .mfb
- and nextafter_GR_x_exp = nextafter_GR_exp_mask, nextafter_GR_exp
+ and GR_x_exp = GR_exp_mask, GR_exp
nop.f 999
-(p6) br.cond.spnt NEXTAFTER_INF ;; // Exit if x=inf
+(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf
}
// Check 6 special cases when significand rolls over:
@@ -246,35 +248,35 @@ GLOBAL_LIBM_ENTRY(nextafterf)
//
// Form exponent of smallest float denormal (if normalized register format)
{ .mmi
- adds nextafter_GR_min_den_rexp = -23, nextafter_GR_min_pexp
-(p12) cmp.eq.unc p6,p0 = nextafter_GR_new_sig, r0
-(p13) cmp.eq.unc p8,p10 = nextafter_GR_new_sig, nextafter_GR_lden_sig ;;
+ adds GR_min_den_rexp = -23, GR_min_pexp
+(p12) cmp.eq.unc p6,p0 = GR_new_sig, r0
+(p13) cmp.eq.unc p8,p10 = GR_new_sig, GR_lden_sig ;;
}
{ .mmi
-(p6) cmp.lt.unc p6,p7 = nextafter_GR_x_exp, nextafter_GR_max_pexp
-(p8) cmp.gt.unc p8,p9 = nextafter_GR_x_exp, nextafter_GR_min_pexp
-(p10) cmp.eq.unc p10,p0 = nextafter_GR_new_sig, r0 ;;
+(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp
+(p8) cmp.gt.unc p8,p9 = GR_x_exp, GR_min_pexp
+(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0 ;;
}
// Create small normal in case need to generate underflow flag
{ .mfi
-(p10) cmp.le.unc p10,p0 = nextafter_GR_x_exp, nextafter_GR_min_pexp
- fmerge.se NEXTAFTER_tmp = NEXTAFTER_sden_exp, NEXTAFTER_lnorm_sig
-(p9) cmp.gt.unc p9,p14 = nextafter_GR_x_exp, nextafter_GR_min_den_rexp
+(p10) cmp.le.unc p10,p0 = GR_x_exp, GR_min_pexp
+ fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig
+(p9) cmp.gt.unc p9,p14 = GR_x_exp, GR_min_den_rexp
}
// Branch if cases 1, 2, 3
{ .bbb
-(p6) br.cond.spnt NEXTAFTER_EXPUP
-(p7) br.cond.spnt NEXTAFTER_OVERFLOW
-(p8) br.cond.spnt NEXTAFTER_EXPDOWN ;;
+(p6) br.cond.spnt NEXT_EXPUP
+(p7) br.cond.spnt NEXT_OVERFLOW
+(p8) br.cond.spnt NEXT_EXPDOWN ;;
}
// Branch if cases 4, 5, 6
{ .bbb
-(p9) br.cond.spnt NEXTAFTER_NORM_TO_DENORM
-(p10) br.cond.spnt NEXTAFTER_UNDERFLOW_TO_ZERO
-(p14) br.cond.spnt NEXTAFTER_UNDERFLOW_TO_ZERO ;;
+(p9) br.cond.spnt NEXT_NORM_TO_DENORM
+(p10) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO
+(p14) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO ;;
}
// Here if no special cases
@@ -282,68 +284,72 @@ GLOBAL_LIBM_ENTRY(nextafterf)
// Case 1: x_exp=min_exp, x_sig=unnormalized
// Case 2: x_exp<min_exp
{ .mfi
- cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp
- fmerge.se f8 = NEXTAFTER_new_exp, NEXTAFTER_new_sig
+ cmp.lt p6,p7 = GR_x_exp, GR_min_pexp
+ fmerge.se f8 = FR_new_exp, FR_new_sig
nop.i 999 ;;
}
{ .mfi
nop.m 999
nop.f 999
-(p7) tbit.z p6,p0 = nextafter_GR_new_sig, 63 ;;
+(p7) tbit.z p6,p0 = GR_new_sig, 63 ;;
}
-NEXTAFTER_COMMON_FINISH:
+NEXT_COMMON_FINISH:
// Force underflow and inexact if denormal result
{ .mfi
nop.m 999
-(p6) fma.s.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0
- nop.i 999 ;;
+(p6) fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fnorm.s.s0 f8 = f8 // Final normalization to result precision
+(p6) br.cond.spnt NEXT_UNDERFLOW ;;
}
-// Final normalization to result precision and exit
{ .mfb
nop.m 999
- fnorm.s.s0 f8 = f8
+ nop.f 999
br.ret.sptk b0;;
}
//Special cases
-NEXTAFTER_EXPUP:
+NEXT_EXPUP:
{ .mfb
- cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp
- fmerge.se f8 = NEXTAFTER_exp1, NEXTAFTER_snorm_sig
- br.cond.sptk NEXTAFTER_COMMON_FINISH ;;
+ cmp.lt p6,p7 = GR_x_exp, GR_min_pexp
+ fmerge.se f8 = FR_exp1, FR_snorm_sig
+ br.cond.sptk NEXT_COMMON_FINISH ;;
}
-NEXTAFTER_EXPDOWN:
+NEXT_EXPDOWN:
{ .mfb
- cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp
- fmerge.se f8 = NEXTAFTER_exp1, NEXTAFTER_lnorm_sig
- br.cond.sptk NEXTAFTER_COMMON_FINISH ;;
+ cmp.lt p6,p7 = GR_x_exp, GR_min_pexp
+ fmerge.se f8 = FR_exp1, FR_lnorm_sig
+ br.cond.sptk NEXT_COMMON_FINISH ;;
}
-NEXTAFTER_NORM_TO_DENORM:
+NEXT_NORM_TO_DENORM:
{ .mfi
nop.m 999
- fmerge.se f8 = NEXTAFTER_new_exp, NEXTAFTER_lden_sig
+ fmerge.se f8 = FR_new_exp, FR_lden_sig
nop.i 999
}
// Force underflow and inexact
{ .mfb
nop.m 999
- fma.s.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0
- br.ret.sptk b0 ;;
+ fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0
+ br.cond.sptk NEXT_UNDERFLOW ;;
}
-NEXTAFTER_UNDERFLOW_TO_ZERO:
+NEXT_UNDERFLOW_TO_ZERO:
{ .mfb
cmp.eq p6,p0 = r0,r0
- fmerge.s f8 = NEXTAFTER_save_f8,f0
- br.cond.sptk NEXTAFTER_COMMON_FINISH ;;
+ fmerge.s f8 = FR_save_f8,f0
+ br.cond.sptk NEXT_COMMON_FINISH ;;
}
-NEXTAFTER_INF:
+NEXT_INF:
// Here if f8 is +- infinity
// INF
// if f8 is +inf, no matter what y is return largest float
@@ -351,17 +357,17 @@ NEXTAFTER_INF:
{ .mfi
nop.m 999
- fmerge.se NEXTAFTER_lnorm = NEXTAFTER_lnorm_exp,NEXTAFTER_lnorm_sig
+ fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig
nop.i 999 ;;
}
{ .mfb
nop.m 999
- fmerge.s f8 = f8,NEXTAFTER_lnorm
+ fmerge.s f8 = f8,FR_lnorm
br.ret.sptk b0 ;;
}
-NEXTAFTER_ZERO:
+NEXT_ZERO:
// Here if f8 is +- zero
// ZERO
@@ -370,76 +376,72 @@ NEXTAFTER_ZERO:
{ .mfi
nop.m 999
- fmerge.se NEXTAFTER_sden = NEXTAFTER_sden_exp,NEXTAFTER_sden_sig
+ fmerge.se FR_sden = FR_sden_exp,FR_sden_sig
nop.i 999 ;;
}
// Create small normal to generate underflow flag
{ .mfi
nop.m 999
- fmerge.se NEXTAFTER_tmp = NEXTAFTER_sden_exp, NEXTAFTER_lnorm_sig
+ fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig
nop.i 999 ;;
}
// Add correct sign from direction arg
{ .mfi
nop.m 999
- fmerge.s f8 = f9,NEXTAFTER_sden
+ fmerge.s f8 = f9,FR_sden
nop.i 999 ;;
}
// Force underflow and inexact flags
{ .mfb
nop.m 999
- fma.s.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0
- br.ret.sptk b0 ;;
+ fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0
+ br.cond.sptk NEXT_UNDERFLOW ;;
}
-GLOBAL_LIBM_END(nextafterf)
-// Stack operations when calling error support.
-// (1) (2) (3) (call) (4)
-// sp -> + psp -> + psp -> + sp -> +
-// | | | |
-// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
-// | | | |
-// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
-// | | | |
-// | | <- GR_X X1 ->| |
-// | | | |
-// sp-64 -> + sp -> + sp -> + +
-// save ar.pfs save b0 restore gp
-// save gp restore ar.pfs
-
-
+NEXT_UNDERFLOW:
+// Here if result is a denorm, or input is finite and result is zero
+// Call error support to report possible range error
+{ .mib
+ alloc r32=ar.pfs,2,2,4,0
+ mov GR_Parameter_TAG = 269 // Error code
+ br.cond.sptk __libm_error_region // Branch to error call
+}
+;;
-LOCAL_LIBM_ENTRY(__libm_error_region)
-NEXTAFTER_OVERFLOW:
-// Here if f8 is finite, but result will be infinite
+NEXT_OVERFLOW:
+// Here if input is finite, but result will be infinite
// Use frcpa to generate infinity of correct sign
// Call error support to report possible range error
-.prologue
-
{ .mfi
alloc r32=ar.pfs,2,2,4,0
- frcpa.s1 f8,p6 = NEXTAFTER_save_f8, f0
- nop.i 999
+ frcpa.s1 f8,p6 = FR_save_f8, f0
+ nop.i 999 ;;
}
-// Create largest float
+// Create largest double
{ .mfi
nop.m 999
- fmerge.se NEXTAFTER_lnorm = NEXTAFTER_lnorm_exp,NEXTAFTER_lnorm_sig
+ fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig
nop.i 999 ;;
}
// Force overflow and inexact flags to be set
-{ .mfi
- mov r39 = 155 // Error code
- fma.s.s0 NEXTAFTER_tmp = NEXTAFTER_lnorm,NEXTAFTER_lnorm,f0
- nop.i 999
+{ .mfb
+ mov GR_Parameter_TAG = 155 // Error code
+ fma.s.s0 FR_tmp = FR_lnorm,FR_lnorm,f0
+ br.cond.sptk __libm_error_region // Branch to error call
}
;;
+GLOBAL_LIBM_END(nextafterf)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+
// (1)
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@@ -466,7 +468,7 @@ NEXTAFTER_OVERFLOW:
.body
// (3)
{ .mib
- stfs [GR_Parameter_X] = NEXTAFTER_save_f8 // STORE Parameter 1 on stack
+ stfs [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
diff --git a/sysdeps/ia64/fpu/s_nextafterl.S b/sysdeps/ia64/fpu/s_nextafterl.S
index 05bdd9c..20c927b 100644
--- a/sysdeps/ia64/fpu/s_nextafterl.S
+++ b/sysdeps/ia64/fpu/s_nextafterl.S
@@ -1,7 +1,7 @@
.file "nextafterl.s"
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@@ -52,6 +52,7 @@
// for several cases
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 12/14/04 Added error handling on underflow.
//
// API
//==============================================================
@@ -61,20 +62,20 @@
//
// Registers used
//==============================================================
-nextafter_GR_max_pexp = r14
-nextafter_GR_min_pexp = r15
-nextafter_GR_exp = r16
-nextafter_GR_sig = r17
-nextafter_GR_lnorm_sig = r18
-nextafter_GR_sign_mask = r19
-nextafter_GR_exp_mask = r20
-nextafter_GR_sden_sig = r21
-nextafter_GR_new_sig = r22
-nextafter_GR_new_exp = r23
-nextafter_GR_lden_sig = r24
-nextafter_GR_snorm_sig = r25
-nextafter_GR_exp1 = r26
-nextafter_GR_x_exp = r27
+GR_max_pexp = r14
+GR_min_pexp = r15
+GR_exp = r16
+GR_sig = r17
+GR_lnorm_sig = r18
+GR_sign_mask = r19
+GR_exp_mask = r20
+GR_sden_sig = r21
+GR_new_sig = r22
+GR_new_exp = r23
+GR_lden_sig = r24
+GR_snorm_sig = r25
+GR_exp1 = r26
+GR_x_exp = r27
// r36-39 parameters for libm_error_support
GR_SAVE_B0 = r34
@@ -84,21 +85,22 @@ GR_SAVE_PFS = r32
GR_Parameter_X = r36
GR_Parameter_Y = r37
GR_Parameter_RESULT = r38
-
-NEXTAFTER_lnorm_sig = f10
-NEXTAFTER_lnorm_exp = f11
-NEXTAFTER_lnorm = f12
-NEXTAFTER_sden_sig = f13
-NEXTAFTER_den_exp = f14
-NEXTAFTER_sden = f15
-NEXTAFTER_snorm_exp = f32
-NEXTAFTER_save_f8 = f33
-NEXTAFTER_new_exp = f34
-NEXTAFTER_new_sig = f35
-NEXTAFTER_lden_sig = f36
-NEXTAFTER_snorm_sig = f37
-NEXTAFTER_exp1 = f38
-NEXTAFTER_tmp = f39
+GR_Parameter_TAG = r39
+
+FR_lnorm_sig = f10
+FR_lnorm_exp = f11
+FR_lnorm = f12
+FR_sden_sig = f13
+FR_den_exp = f14
+FR_sden = f15
+FR_snorm_exp = f32
+FR_save_f8 = f33
+FR_new_exp = f34
+FR_new_sig = f35
+FR_lden_sig = f36
+FR_snorm_sig = f37
+FR_exp1 = f38
+FR_tmp = f39
//
// Overview of operation
@@ -114,31 +116,31 @@ GLOBAL_LIBM_ENTRY(nextafterl)
// Is x < y ? p10 if yes, p11 if no
// Form smallest denormal significand = ulp size
{ .mfi
- getf.exp nextafter_GR_exp = f8
+ getf.exp GR_exp = f8
fcmp.lt.s1 p10,p11 = f8, f9
- addl nextafter_GR_sden_sig = 0x1, r0
+ addl GR_sden_sig = 0x1, r0
}
// Form largest normal significand 0xffffffffffffffff
// Form smallest normal exponent
{ .mfi
- addl nextafter_GR_lnorm_sig = -0x1,r0
+ addl GR_lnorm_sig = -0x1,r0
nop.f 999
- addl nextafter_GR_min_pexp = 0x0c001, r0 ;;
+ addl GR_min_pexp = 0x0c001, r0 ;;
}
// Extract significand from x
// Is x=y? This fcmp also sets Invalid and Denormal if required
// Form largest normal exponent
{ .mfi
- getf.sig nextafter_GR_sig = f8
+ getf.sig GR_sig = f8
fcmp.eq.s0 p6,p0 = f8, f9
- addl nextafter_GR_max_pexp = 0x13ffe, r0
+ addl GR_max_pexp = 0x13ffe, r0
}
// Move largest normal significand to fp reg for special cases
{ .mfi
- setf.sig NEXTAFTER_lnorm_sig = nextafter_GR_lnorm_sig
+ setf.sig FR_lnorm_sig = GR_lnorm_sig
nop.f 999
- addl nextafter_GR_sign_mask = 0x20000, r0 ;;
+ addl GR_sign_mask = 0x20000, r0 ;;
}
// Move smallest denormal significand and exp to fp regs
@@ -147,15 +149,15 @@ GLOBAL_LIBM_ENTRY(nextafterl)
// It increases (p12 set) if x<y and x>=0 or if x>y and x<0
// It decreases (p13 set) if x<y and x<0 or if x>y and x>=0
{ .mfi
- setf.sig NEXTAFTER_sden_sig = nextafter_GR_sden_sig
+ setf.sig FR_sden_sig = GR_sden_sig
fclass.m p8,p0 = f8, 0xc3
-(p10) cmp.lt p12,p13 = nextafter_GR_exp, nextafter_GR_sign_mask
+(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask
}
// Move smallest normal exp to fp regs
{ .mfi
- setf.exp NEXTAFTER_snorm_exp = nextafter_GR_min_pexp
+ setf.exp FR_snorm_exp = GR_min_pexp
nop.f 999
-(p11) cmp.ge p12,p13 = nextafter_GR_exp, nextafter_GR_sign_mask ;;
+(p11) cmp.ge p12,p13 = GR_exp, GR_sign_mask ;;
}
.pred.rel "mutex",p12,p13
@@ -164,38 +166,38 @@ GLOBAL_LIBM_ENTRY(nextafterl)
// If x=y set result to y
// Form smallest normal significand and largest denormal significand
{ .mfi
-(p12) add nextafter_GR_new_sig = nextafter_GR_sig, nextafter_GR_sden_sig
+(p12) add GR_new_sig = GR_sig, GR_sden_sig
(p6) fmerge.s f8=f9,f9
- dep.z nextafter_GR_snorm_sig = 1,63,1 // 0x8000000000000000
+ dep.z GR_snorm_sig = 1,63,1 // 0x8000000000000000
}
{ .mlx
-(p13) sub nextafter_GR_new_sig = nextafter_GR_sig, nextafter_GR_sden_sig
- movl nextafter_GR_lden_sig = 0x7fffffffffffffff ;;
+(p13) sub GR_new_sig = GR_sig, GR_sden_sig
+ movl GR_lden_sig = 0x7fffffffffffffff ;;
}
// Move expected result significand and signexp to fp regs
// Is y=nan?
// Form new exponent in case result exponent needs incrementing or decrementing
{ .mfi
- setf.exp NEXTAFTER_new_exp = nextafter_GR_exp
+ setf.exp FR_new_exp = GR_exp
fclass.m p9,p0 = f9, 0xc3
-(p12) add nextafter_GR_exp1 = 1, nextafter_GR_exp
+(p12) add GR_exp1 = 1, GR_exp
}
{ .mib
- setf.sig NEXTAFTER_new_sig = nextafter_GR_new_sig
-(p13) add nextafter_GR_exp1 = -1, nextafter_GR_exp
+ setf.sig FR_new_sig = GR_new_sig
+(p13) add GR_exp1 = -1, GR_exp
(p6) br.ret.spnt b0 ;; // Exit if x=y
}
// Move largest normal signexp to fp reg for special cases
// Is x=zero?
{ .mfi
- setf.exp NEXTAFTER_lnorm_exp = nextafter_GR_max_pexp
+ setf.exp FR_lnorm_exp = GR_max_pexp
fclass.m p7,p0 = f8, 0x7
nop.i 999
}
{ .mfb
- setf.exp NEXTAFTER_den_exp = nextafter_GR_min_pexp
+ setf.exp FR_den_exp = GR_min_pexp
(p8) fma.s0 f8 = f8,f1,f9
(p8) br.ret.spnt b0 ;; // Exit if x=nan
}
@@ -203,12 +205,12 @@ GLOBAL_LIBM_ENTRY(nextafterl)
// Move exp+-1 and smallest normal significand to fp regs for special cases
// Is x=inf?
{ .mfi
- setf.exp NEXTAFTER_exp1 = nextafter_GR_exp1
+ setf.exp FR_exp1 = GR_exp1
fclass.m p6,p0 = f8, 0x23
- addl nextafter_GR_exp_mask = 0x1ffff, r0
+ addl GR_exp_mask = 0x1ffff, r0
}
{ .mfb
- setf.sig NEXTAFTER_snorm_sig = nextafter_GR_snorm_sig
+ setf.sig FR_snorm_sig = GR_snorm_sig
(p9) fma.s0 f8 = f8,f1,f9
(p9) br.ret.spnt b0 ;; // Exit if y=nan
}
@@ -216,16 +218,16 @@ GLOBAL_LIBM_ENTRY(nextafterl)
// Move largest denormal significand to fp regs for special cases
// Save x
{ .mfb
- setf.sig NEXTAFTER_lden_sig = nextafter_GR_lden_sig
- mov NEXTAFTER_save_f8 = f8
-(p7) br.cond.spnt NEXTAFTER_ZERO ;; // Exit if x=0
+ setf.sig FR_lden_sig = GR_lden_sig
+ mov FR_save_f8 = f8
+(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0
}
// Mask off the sign to get x_exp
{ .mfb
- and nextafter_GR_x_exp = nextafter_GR_exp_mask, nextafter_GR_exp
+ and GR_x_exp = GR_exp_mask, GR_exp
nop.f 999
-(p6) br.cond.spnt NEXTAFTER_INF ;; // Exit if x=inf
+(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf
}
// Check 5 special cases when significand rolls over:
@@ -241,37 +243,37 @@ GLOBAL_LIBM_ENTRY(nextafterl)
// Set p10, result is zero, sign of x, signal underflow and inexact
//
{ .mmi
-(p12) cmp.eq.unc p6,p0 = nextafter_GR_new_sig, r0
-(p13) cmp.eq.unc p9,p10 = nextafter_GR_new_sig, nextafter_GR_lden_sig
+(p12) cmp.eq.unc p6,p0 = GR_new_sig, r0
+(p13) cmp.eq.unc p9,p10 = GR_new_sig, GR_lden_sig
nop.i 999
;;
}
{ .mmi
-(p6) cmp.lt.unc p6,p7 = nextafter_GR_x_exp, nextafter_GR_max_pexp
-(p10) cmp.eq.unc p10,p0 = nextafter_GR_new_sig, r0
-(p9) cmp.le.unc p9,p8 = nextafter_GR_x_exp, nextafter_GR_min_pexp
+(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp
+(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0
+(p9) cmp.le.unc p9,p8 = GR_x_exp, GR_min_pexp
;;
}
// Create small normal in case need to generate underflow flag
{ .mfi
nop.m 999
- fmerge.se NEXTAFTER_tmp = NEXTAFTER_snorm_exp, NEXTAFTER_lnorm_sig
+ fmerge.se FR_tmp = FR_snorm_exp, FR_lnorm_sig
nop.i 999
}
// Branch if cases 1, 2, 3
{ .bbb
-(p6) br.cond.spnt NEXTAFTER_EXPUP
-(p7) br.cond.spnt NEXTAFTER_OVERFLOW
-(p8) br.cond.spnt NEXTAFTER_EXPDOWN ;;
+(p6) br.cond.spnt NEXT_EXPUP
+(p7) br.cond.spnt NEXT_OVERFLOW
+(p8) br.cond.spnt NEXT_EXPDOWN ;;
}
// Branch if cases 4, 5
{ .mbb
nop.m 999
-(p9) br.cond.spnt NEXTAFTER_NORM_TO_DENORM
-(p10) br.cond.spnt NEXTAFTER_UNDERFLOW_TO_ZERO
+(p9) br.cond.spnt NEXT_NORM_TO_DENORM
+(p10) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO
;;
}
@@ -280,68 +282,72 @@ GLOBAL_LIBM_ENTRY(nextafterl)
// Case 1: x_exp=min_exp, x_sig=unnormalized
// Case 2: x_exp<min_exp
{ .mfi
- cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp
- fmerge.se f8 = NEXTAFTER_new_exp, NEXTAFTER_new_sig
+ cmp.lt p6,p7 = GR_x_exp, GR_min_pexp
+ fmerge.se f8 = FR_new_exp, FR_new_sig
nop.i 999 ;;
}
{ .mfi
nop.m 999
nop.f 999
-(p6) tbit.z p6,p0 = nextafter_GR_new_sig, 63 ;;
+(p6) tbit.z p6,p0 = GR_new_sig, 63 ;;
}
-NEXTAFTER_COMMON_FINISH:
+NEXT_COMMON_FINISH:
// Force underflow and inexact if denormal result
{ .mfi
nop.m 999
-(p6) fma.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0
- nop.i 999 ;;
+(p6) fma.s0 FR_tmp = FR_tmp,FR_tmp,f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fnorm.s0 f8 = f8 // Final normalization to result precision
+(p6) br.cond.spnt NEXT_UNDERFLOW ;;
}
-// Final normalization to result precision and exit
{ .mfb
nop.m 999
- fnorm.s0 f8 = f8
+ nop.f 999
br.ret.sptk b0;;
}
//Special cases
-NEXTAFTER_EXPUP:
+NEXT_EXPUP:
{ .mfb
- cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp
- fmerge.se f8 = NEXTAFTER_exp1, NEXTAFTER_snorm_sig
- br.cond.sptk NEXTAFTER_COMMON_FINISH ;;
+ cmp.lt p6,p7 = GR_x_exp, GR_min_pexp
+ fmerge.se f8 = FR_exp1, FR_snorm_sig
+ br.cond.sptk NEXT_COMMON_FINISH ;;
}
-NEXTAFTER_EXPDOWN:
+NEXT_EXPDOWN:
{ .mfb
- cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp
- fmerge.se f8 = NEXTAFTER_exp1, NEXTAFTER_lnorm_sig
- br.cond.sptk NEXTAFTER_COMMON_FINISH ;;
+ cmp.lt p6,p7 = GR_x_exp, GR_min_pexp
+ fmerge.se f8 = FR_exp1, FR_lnorm_sig
+ br.cond.sptk NEXT_COMMON_FINISH ;;
}
-NEXTAFTER_NORM_TO_DENORM:
+NEXT_NORM_TO_DENORM:
{ .mfi
nop.m 999
- fmerge.se f8 = NEXTAFTER_exp1, NEXTAFTER_lden_sig
+ fmerge.se f8 = FR_exp1, FR_lden_sig
nop.i 999
}
// Force underflow and inexact
{ .mfb
nop.m 999
- fma.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0
- br.ret.sptk b0 ;;
+ fma.s0 FR_tmp = FR_tmp,FR_tmp,f0
+ br.cond.sptk NEXT_UNDERFLOW ;;
}
-NEXTAFTER_UNDERFLOW_TO_ZERO:
+NEXT_UNDERFLOW_TO_ZERO:
{ .mfb
cmp.eq p6,p0 = r0,r0
- fmerge.s f8 = NEXTAFTER_save_f8,f0
- br.cond.sptk NEXTAFTER_COMMON_FINISH ;;
+ fmerge.s f8 = FR_save_f8,f0
+ br.cond.sptk NEXT_COMMON_FINISH ;;
}
-NEXTAFTER_INF:
+NEXT_INF:
// Here if f8 is +- infinity
// INF
// if f8 is +inf, no matter what y is return largest long double
@@ -350,17 +356,17 @@ NEXTAFTER_INF:
// Create largest long double
{ .mfi
nop.m 999
- fmerge.se NEXTAFTER_lnorm = NEXTAFTER_lnorm_exp,NEXTAFTER_lnorm_sig
+ fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig
nop.i 999 ;;
}
{ .mfb
nop.m 999
- fmerge.s f8 = f8,NEXTAFTER_lnorm
+ fmerge.s f8 = f8,FR_lnorm
br.ret.sptk b0 ;;
}
-NEXTAFTER_ZERO:
+NEXT_ZERO:
// Here if f8 is +- zero
// ZERO
@@ -369,76 +375,72 @@ NEXTAFTER_ZERO:
{ .mfi
nop.m 999
- fmerge.se NEXTAFTER_sden = f0,NEXTAFTER_sden_sig
+ fmerge.se FR_sden = f0,FR_sden_sig
nop.i 999 ;;
}
// Create small normal to generate underflow flag
{ .mfi
nop.m 999
- fmerge.se NEXTAFTER_tmp = NEXTAFTER_snorm_exp, NEXTAFTER_lnorm_sig
+ fmerge.se FR_tmp = FR_snorm_exp, FR_lnorm_sig
nop.i 999 ;;
}
// Add correct sign from direction arg
{ .mfi
nop.m 999
- fmerge.s f8 = f9,NEXTAFTER_sden
+ fmerge.s f8 = f9,FR_sden
nop.i 999 ;;
}
// Force underflow and inexact flags
{ .mfb
nop.m 999
- fma.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0
- br.ret.sptk b0 ;;
+ fma.s0 FR_tmp = FR_tmp,FR_tmp,f0
+ br.cond.sptk NEXT_UNDERFLOW ;;
}
-GLOBAL_LIBM_END(nextafterl)
-// Stack operations when calling error support.
-// (1) (2) (3) (call) (4)
-// sp -> + psp -> + psp -> + sp -> +
-// | | | |
-// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
-// | | | |
-// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
-// | | | |
-// | | <- GR_X X1 ->| |
-// | | | |
-// sp-64 -> + sp -> + sp -> + +
-// save ar.pfs save b0 restore gp
-// save gp restore ar.pfs
-
-
+NEXT_UNDERFLOW:
+// Here if result is a denorm, or input is finite and result is zero
+// Call error support to report possible range error
+{ .mib
+ alloc r32=ar.pfs,2,2,4,0
+ mov GR_Parameter_TAG = 267 // Error code
+ br.cond.sptk __libm_error_region // Branch to error call
+}
+;;
-LOCAL_LIBM_ENTRY(__libm_error_region)
-NEXTAFTER_OVERFLOW:
-// Here if f8 is finite, but result will be infinite
+NEXT_OVERFLOW:
+// Here if input is finite, but result will be infinite
// Use frcpa to generate infinity of correct sign
// Call error support to report possible range error
-.prologue
-
{ .mfi
alloc r32=ar.pfs,2,2,4,0
- frcpa.s1 f8,p6 = NEXTAFTER_save_f8, f0
+ frcpa.s1 f8,p6 = FR_save_f8, f0
nop.i 999 ;;
}
-// Create largest long double
+// Create largest double
{ .mfi
nop.m 999
- fmerge.se NEXTAFTER_lnorm = NEXTAFTER_lnorm_exp,NEXTAFTER_lnorm_sig
+ fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig
nop.i 999 ;;
}
// Force overflow and inexact flags to be set
-{ .mfi
- mov r39 = 153 // Error code
- fma.s0 NEXTAFTER_tmp = NEXTAFTER_lnorm,NEXTAFTER_lnorm,f0
- nop.i 999
+{ .mfb
+ mov GR_Parameter_TAG = 153 // Error code
+ fma.s0 FR_tmp = FR_lnorm,FR_lnorm,f0
+ br.cond.sptk __libm_error_region // Branch to error call
}
;;
+GLOBAL_LIBM_END(nextafterl)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+
// (1)
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@@ -465,7 +467,7 @@ NEXTAFTER_OVERFLOW:
.body
// (3)
{ .mib
- stfe [GR_Parameter_X] = NEXTAFTER_save_f8 // STORE Parameter 1 on stack
+ stfe [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
diff --git a/sysdeps/ia64/fpu/s_nexttoward.S b/sysdeps/ia64/fpu/s_nexttoward.S
index f8fac1e..741fea0 100644
--- a/sysdeps/ia64/fpu/s_nexttoward.S
+++ b/sysdeps/ia64/fpu/s_nexttoward.S
@@ -1,7 +1,7 @@
.file "nexttoward.s"
-// Copyright (c) 2001 - 2003, Intel Corporation
+// Copyright (c) 2001 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2001 by the Intel Numerics Group, Intel Corporation
@@ -43,6 +43,7 @@
// 08/23/01 Corrected error tag number
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 12/14/04 Added error handling on underflow.
//
// API
//==============================================================
@@ -52,21 +53,21 @@
//
// Registers used
//==============================================================
-nexttoward_GR_max_pexp = r14
-nexttoward_GR_min_pexp = r15
-nexttoward_GR_exp = r16
-nexttoward_GR_sig = r17
-nexttoward_GR_lnorm_sig = r18
-nexttoward_GR_sign_mask = r19
-nexttoward_GR_exp_mask = r20
-nexttoward_GR_sden_sig = r21
-nexttoward_GR_new_sig = r22
-nexttoward_GR_new_exp = r23
-nexttoward_GR_lden_sig = r24
-nexttoward_GR_snorm_sig = r25
-nexttoward_GR_exp1 = r26
-nexttoward_GR_x_exp = r27
-nexttoward_GR_min_den_rexp = r28
+GR_max_pexp = r14
+GR_min_pexp = r15
+GR_exp = r16
+GR_sig = r17
+GR_lnorm_sig = r18
+GR_sign_mask = r19
+GR_exp_mask = r20
+GR_sden_sig = r21
+GR_new_sig = r22
+GR_new_exp = r23
+GR_lden_sig = r24
+GR_snorm_sig = r25
+GR_exp1 = r26
+GR_x_exp = r27
+GR_min_den_rexp = r28
// r36-39 parameters for libm_error_support
GR_SAVE_B0 = r34
@@ -76,20 +77,21 @@ GR_SAVE_PFS = r32
GR_Parameter_X = r36
GR_Parameter_Y = r37
GR_Parameter_RESULT = r38
-
-NEXTTOWARD_lnorm_sig = f10
-NEXTTOWARD_lnorm_exp = f11
-NEXTTOWARD_lnorm = f12
-NEXTTOWARD_sden_sig = f13
-NEXTTOWARD_sden_exp = f14
-NEXTTOWARD_sden = f15
-NEXTTOWARD_save_f8 = f33
-NEXTTOWARD_new_exp = f34
-NEXTTOWARD_new_sig = f35
-NEXTTOWARD_lden_sig = f36
-NEXTTOWARD_snorm_sig = f37
-NEXTTOWARD_exp1 = f38
-NEXTTOWARD_tmp = f39
+GR_Parameter_TAG = r39
+
+FR_lnorm_sig = f10
+FR_lnorm_exp = f11
+FR_lnorm = f12
+FR_sden_sig = f13
+FR_sden_exp = f14
+FR_sden = f15
+FR_save_f8 = f33
+FR_new_exp = f34
+FR_new_sig = f35
+FR_lden_sig = f36
+FR_snorm_sig = f37
+FR_exp1 = f38
+FR_tmp = f39
//
// Overview of operation
@@ -105,30 +107,30 @@ GLOBAL_LIBM_ENTRY(nexttoward)
// Is x < y ? p10 if yes, p11 if no
// Form smallest denormal significand = ulp size
{ .mfi
- getf.exp nexttoward_GR_exp = f8
+ getf.exp GR_exp = f8
fcmp.lt.s1 p10,p11 = f8, f9
- addl nexttoward_GR_sden_sig = 0x800, r0
+ addl GR_sden_sig = 0x800, r0
}
// Form largest normal significand 0xfffffffffffff800
// Form smallest normal exponent
{ .mfi
- addl nexttoward_GR_lnorm_sig = -0x800,r0
+ addl GR_lnorm_sig = -0x800,r0
nop.f 999
- addl nexttoward_GR_min_pexp = 0x0fc01, r0 ;;
+ addl GR_min_pexp = 0x0fc01, r0 ;;
}
// Extract significand from x
// Is x=y?
// Form largest normal exponent
{ .mfi
- getf.sig nexttoward_GR_sig = f8
+ getf.sig GR_sig = f8
fcmp.eq.s0 p6,p0 = f8, f9
- addl nexttoward_GR_max_pexp = 0x103fe, r0
+ addl GR_max_pexp = 0x103fe, r0
}
// Move largest normal significand to fp reg for special cases
{ .mfi
- setf.sig NEXTTOWARD_lnorm_sig = nexttoward_GR_lnorm_sig
+ setf.sig FR_lnorm_sig = GR_lnorm_sig
nop.f 999
- addl nexttoward_GR_sign_mask = 0x20000, r0 ;;
+ addl GR_sign_mask = 0x20000, r0 ;;
}
// Move smallest denormal significand and signexp to fp regs
@@ -137,13 +139,13 @@ GLOBAL_LIBM_ENTRY(nexttoward)
// It increases (p12 set) if x<y and x>=0 or if x>y and x<0
// It decreases (p13 set) if x<y and x<0 or if x>y and x>=0
{ .mfi
- setf.sig NEXTTOWARD_sden_sig = nexttoward_GR_sden_sig
+ setf.sig FR_sden_sig = GR_sden_sig
fclass.m p8,p0 = f8, 0xc3
-(p10) cmp.lt p12,p13 = nexttoward_GR_exp, nexttoward_GR_sign_mask
+(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask
}
{ .mfi
- setf.exp NEXTTOWARD_sden_exp = nexttoward_GR_min_pexp
-(p11) cmp.ge p12,p13 = nexttoward_GR_exp, nexttoward_GR_sign_mask ;;
+ setf.exp FR_sden_exp = GR_min_pexp
+(p11) cmp.ge p12,p13 = GR_exp, GR_sign_mask ;;
}
.pred.rel "mutex",p12,p13
@@ -152,33 +154,33 @@ GLOBAL_LIBM_ENTRY(nexttoward)
// If x=y set result to y
// Form smallest normal significand and largest denormal significand
{ .mfi
-(p12) add nexttoward_GR_new_sig = nexttoward_GR_sig, nexttoward_GR_sden_sig
+(p12) add GR_new_sig = GR_sig, GR_sden_sig
(p6) fnorm.d.s0 f8=f9 //Normalise
- dep.z nexttoward_GR_snorm_sig = 1,63,1 // 0x8000000000000000
+ dep.z GR_snorm_sig = 1,63,1 // 0x8000000000000000
}
{ .mlx
-(p13) sub nexttoward_GR_new_sig = nexttoward_GR_sig, nexttoward_GR_sden_sig
- movl nexttoward_GR_lden_sig = 0x7ffffffffffff800 ;;
+(p13) sub GR_new_sig = GR_sig, GR_sden_sig
+ movl GR_lden_sig = 0x7ffffffffffff800 ;;
}
// Move expected result significand and signexp to fp regs
// Is y=nan?
// Form new exponent in case result exponent needs incrementing or decrementing
{ .mfi
- setf.exp NEXTTOWARD_new_exp = nexttoward_GR_exp
+ setf.exp FR_new_exp = GR_exp
fclass.m p9,p0 = f9, 0xc3
-(p12) add nexttoward_GR_exp1 = 1, nexttoward_GR_exp
+(p12) add GR_exp1 = 1, GR_exp
}
{ .mib
- setf.sig NEXTTOWARD_new_sig = nexttoward_GR_new_sig
-(p13) add nexttoward_GR_exp1 = -1, nexttoward_GR_exp
+ setf.sig FR_new_sig = GR_new_sig
+(p13) add GR_exp1 = -1, GR_exp
(p6) br.ret.spnt b0 ;; // Exit if x=y
}
// Move largest normal signexp to fp reg for special cases
// Is x=zero?
{ .mfi
- setf.exp NEXTTOWARD_lnorm_exp = nexttoward_GR_max_pexp
+ setf.exp FR_lnorm_exp = GR_max_pexp
fclass.m p7,p0 = f8, 0x7
nop.i 999
}
@@ -191,12 +193,12 @@ GLOBAL_LIBM_ENTRY(nexttoward)
// Move exp+-1 and smallest normal significand to fp regs for special cases
// Is x=inf?
{ .mfi
- setf.exp NEXTTOWARD_exp1 = nexttoward_GR_exp1
+ setf.exp FR_exp1 = GR_exp1
fclass.m p6,p0 = f8, 0x23
- addl nexttoward_GR_exp_mask = 0x1ffff, r0
+ addl GR_exp_mask = 0x1ffff, r0
}
{ .mfb
- setf.sig NEXTTOWARD_snorm_sig = nexttoward_GR_snorm_sig
+ setf.sig FR_snorm_sig = GR_snorm_sig
(p9) fma.s0 f8 = f8,f1,f9
(p9) br.ret.spnt b0 ;; // Exit if y=nan
}
@@ -204,16 +206,16 @@ GLOBAL_LIBM_ENTRY(nexttoward)
// Move largest denormal significand to fp regs for special cases
// Save x
{ .mfb
- setf.sig NEXTTOWARD_lden_sig = nexttoward_GR_lden_sig
- mov NEXTTOWARD_save_f8 = f8
-(p7) br.cond.spnt NEXTTOWARD_ZERO ;; // Exit if x=0
+ setf.sig FR_lden_sig = GR_lden_sig
+ mov FR_save_f8 = f8
+(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0
}
// Mask off the sign to get x_exp
{ .mfb
- and nexttoward_GR_x_exp = nexttoward_GR_exp_mask, nexttoward_GR_exp
+ and GR_x_exp = GR_exp_mask, GR_exp
nop.f 999
-(p6) br.cond.spnt NEXTTOWARD_INF ;; // Exit if x=inf
+(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf
}
// Check 6 special cases when significand rolls over:
@@ -232,35 +234,35 @@ GLOBAL_LIBM_ENTRY(nexttoward)
//
// Form exponent of smallest double denormal (if normalized register format)
{ .mmi
- adds nexttoward_GR_min_den_rexp = -52, nexttoward_GR_min_pexp
-(p12) cmp.eq.unc p6,p0 = nexttoward_GR_new_sig, r0
-(p13) cmp.eq.unc p8,p10 = nexttoward_GR_new_sig, nexttoward_GR_lden_sig ;;
+ adds GR_min_den_rexp = -52, GR_min_pexp
+(p12) cmp.eq.unc p6,p0 = GR_new_sig, r0
+(p13) cmp.eq.unc p8,p10 = GR_new_sig, GR_lden_sig ;;
}
{ .mmi
-(p6) cmp.lt.unc p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_max_pexp
-(p8) cmp.gt.unc p8,p9 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp
-(p10) cmp.eq.unc p10,p0 = nexttoward_GR_new_sig, r0 ;;
+(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp
+(p8) cmp.gt.unc p8,p9 = GR_x_exp, GR_min_pexp
+(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0 ;;
}
// Create small normal in case need to generate underflow flag
{ .mfi
-(p10) cmp.le.unc p10,p0 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp
- fmerge.se NEXTTOWARD_tmp = NEXTTOWARD_sden_exp, NEXTTOWARD_lnorm_sig
-(p9) cmp.gt.unc p9,p14 = nexttoward_GR_x_exp, nexttoward_GR_min_den_rexp
+(p10) cmp.le.unc p10,p0 = GR_x_exp, GR_min_pexp
+ fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig
+(p9) cmp.gt.unc p9,p14 = GR_x_exp, GR_min_den_rexp
}
// Branch if cases 1, 2, 3
{ .bbb
-(p6) br.cond.spnt NEXTTOWARD_EXPUP
-(p7) br.cond.spnt NEXTTOWARD_OVERFLOW
-(p8) br.cond.spnt NEXTTOWARD_EXPDOWN ;;
+(p6) br.cond.spnt NEXT_EXPUP
+(p7) br.cond.spnt NEXT_OVERFLOW
+(p8) br.cond.spnt NEXT_EXPDOWN ;;
}
// Branch if cases 4, 5, 6
{ .bbb
-(p9) br.cond.spnt NEXTTOWARD_NORM_TO_DENORM
-(p10) br.cond.spnt NEXTTOWARD_UNDERFLOW_TO_ZERO
-(p14) br.cond.spnt NEXTTOWARD_UNDERFLOW_TO_ZERO ;;
+(p9) br.cond.spnt NEXT_NORM_TO_DENORM
+(p10) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO
+(p14) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO ;;
}
// Here if no special cases
@@ -268,68 +270,72 @@ GLOBAL_LIBM_ENTRY(nexttoward)
// Case 1: x_exp=min_exp, x_sig=unnormalized
// Case 2: x_exp<min_exp
{ .mfi
- cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp
- fmerge.se f8 = NEXTTOWARD_new_exp, NEXTTOWARD_new_sig
+ cmp.lt p6,p7 = GR_x_exp, GR_min_pexp
+ fmerge.se f8 = FR_new_exp, FR_new_sig
nop.i 999 ;;
}
{ .mfi
nop.m 999
nop.f 999
-(p7) tbit.z p6,p0 = nexttoward_GR_new_sig, 63 ;;
+(p7) tbit.z p6,p0 = GR_new_sig, 63 ;;
}
-NEXTTOWARD_COMMON_FINISH:
+NEXT_COMMON_FINISH:
// Force underflow and inexact if denormal result
{ .mfi
nop.m 999
-(p6) fma.d.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0
- nop.i 999 ;;
+(p6) fma.d.s0 FR_tmp = FR_tmp,FR_tmp,f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fnorm.d.s0 f8 = f8 // Final normalization to result precision
+(p6) br.cond.spnt NEXT_UNDERFLOW ;;
}
-// Final normalization to result precision and exit
{ .mfb
nop.m 999
- fnorm.d.s0 f8 = f8
+ nop.f 999
br.ret.sptk b0;;
}
//Special cases
-NEXTTOWARD_EXPUP:
+NEXT_EXPUP:
{ .mfb
- cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp
- fmerge.se f8 = NEXTTOWARD_exp1, NEXTTOWARD_snorm_sig
- br.cond.sptk NEXTTOWARD_COMMON_FINISH ;;
+ cmp.lt p6,p7 = GR_x_exp, GR_min_pexp
+ fmerge.se f8 = FR_exp1, FR_snorm_sig
+ br.cond.sptk NEXT_COMMON_FINISH ;;
}
-NEXTTOWARD_EXPDOWN:
+NEXT_EXPDOWN:
{ .mfb
- cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp
- fmerge.se f8 = NEXTTOWARD_exp1, NEXTTOWARD_lnorm_sig
- br.cond.sptk NEXTTOWARD_COMMON_FINISH ;;
+ cmp.lt p6,p7 = GR_x_exp, GR_min_pexp
+ fmerge.se f8 = FR_exp1, FR_lnorm_sig
+ br.cond.sptk NEXT_COMMON_FINISH ;;
}
-NEXTTOWARD_NORM_TO_DENORM:
+NEXT_NORM_TO_DENORM:
{ .mfi
nop.m 999
- fmerge.se f8 = NEXTTOWARD_new_exp, NEXTTOWARD_lden_sig
+ fmerge.se f8 = FR_new_exp, FR_lden_sig
nop.i 999
}
// Force underflow and inexact if denormal result
{ .mfb
nop.m 999
- fma.d.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0
- br.ret.sptk b0 ;;
+ fma.d.s0 FR_tmp = FR_tmp,FR_tmp,f0
+ br.cond.sptk NEXT_UNDERFLOW ;;
}
-NEXTTOWARD_UNDERFLOW_TO_ZERO:
+NEXT_UNDERFLOW_TO_ZERO:
{ .mfb
cmp.eq p6,p0 = r0,r0
- fmerge.s f8 = NEXTTOWARD_save_f8,f0
- br.cond.sptk NEXTTOWARD_COMMON_FINISH ;;
+ fmerge.s f8 = FR_save_f8,f0
+ br.cond.sptk NEXT_COMMON_FINISH ;;
}
-NEXTTOWARD_INF:
+NEXT_INF:
// Here if f8 is +- infinity
// INF
// if f8 is +inf, no matter what y is return largest double
@@ -337,17 +343,17 @@ NEXTTOWARD_INF:
{ .mfi
nop.m 999
- fmerge.se NEXTTOWARD_lnorm = NEXTTOWARD_lnorm_exp,NEXTTOWARD_lnorm_sig
+ fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig
nop.i 999 ;;
}
{ .mfb
nop.m 999
- fmerge.s f8 = f8,NEXTTOWARD_lnorm
+ fmerge.s f8 = f8,FR_lnorm
br.ret.sptk b0 ;;
}
-NEXTTOWARD_ZERO:
+NEXT_ZERO:
// Here if f8 is +- zero
// ZERO
@@ -356,76 +362,72 @@ NEXTTOWARD_ZERO:
{ .mfi
nop.m 999
- fmerge.se NEXTTOWARD_sden = NEXTTOWARD_sden_exp,NEXTTOWARD_sden_sig
+ fmerge.se FR_sden = FR_sden_exp,FR_sden_sig
nop.i 999 ;;
}
// Create small normal to generate underflow flag
{ .mfi
nop.m 999
- fmerge.se NEXTTOWARD_tmp = NEXTTOWARD_sden_exp, NEXTTOWARD_lnorm_sig
+ fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig
nop.i 999 ;;
}
// Add correct sign from direction arg
{ .mfi
nop.m 999
- fmerge.s f8 = f9,NEXTTOWARD_sden
+ fmerge.s f8 = f9,FR_sden
nop.i 999 ;;
}
// Force underflow and inexact flags
{ .mfb
nop.m 999
- fma.d.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0
- br.ret.sptk b0 ;;
+ fma.d.s0 FR_tmp = FR_tmp,FR_tmp,f0
+ br.cond.sptk NEXT_UNDERFLOW ;;
}
-GLOBAL_LIBM_END(nexttoward)
-// Stack operations when calling error support.
-// (1) (2) (3) (call) (4)
-// sp -> + psp -> + psp -> + sp -> +
-// | | | |
-// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
-// | | | |
-// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
-// | | | |
-// | | <- GR_X X1 ->| |
-// | | | |
-// sp-64 -> + sp -> + sp -> + +
-// save ar.pfs save b0 restore gp
-// save gp restore ar.pfs
-
-
+NEXT_UNDERFLOW:
+// Here if result is a denorm, or input is finite and result is zero
+// Call error support to report possible range error
+{ .mib
+ alloc r32=ar.pfs,2,2,4,0
+ mov GR_Parameter_TAG = 271 // Error code
+ br.cond.sptk __libm_error_region // Branch to error call
+}
+;;
-LOCAL_LIBM_ENTRY(__libm_error_region)
-NEXTTOWARD_OVERFLOW:
-// Here if f8 is finite, but result will be infinite
+NEXT_OVERFLOW:
+// Here if input is finite, but result will be infinite
// Use frcpa to generate infinity of correct sign
// Call error support to report possible range error
-.prologue
-
{ .mfi
alloc r32=ar.pfs,2,2,4,0
- frcpa.s1 f8,p6 = NEXTTOWARD_save_f8, f0
+ frcpa.s1 f8,p6 = FR_save_f8, f0
nop.i 999 ;;
}
// Create largest double
{ .mfi
nop.m 999
- fmerge.se NEXTTOWARD_lnorm = NEXTTOWARD_lnorm_exp,NEXTTOWARD_lnorm_sig
+ fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig
nop.i 999 ;;
}
// Force overflow and inexact flags to be set
-{ .mfi
- mov r39 = 199 // Error code
- fma.d.s0 NEXTTOWARD_tmp = NEXTTOWARD_lnorm,NEXTTOWARD_lnorm,f0
- nop.i 999
+{ .mfb
+ mov GR_Parameter_TAG = 199 // Error code
+ fma.d.s0 FR_tmp = FR_lnorm,FR_lnorm,f0
+ br.cond.sptk __libm_error_region // Branch to error call
}
;;
+GLOBAL_LIBM_END(nexttoward)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+
// (1)
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@@ -452,7 +454,7 @@ NEXTTOWARD_OVERFLOW:
.body
// (3)
{ .mib
- stfd [GR_Parameter_X] = NEXTTOWARD_save_f8 // STORE Parameter 1 on stack
+ stfd [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
diff --git a/sysdeps/ia64/fpu/s_nexttowardf.S b/sysdeps/ia64/fpu/s_nexttowardf.S
index fb1adae..b8b9762 100644
--- a/sysdeps/ia64/fpu/s_nexttowardf.S
+++ b/sysdeps/ia64/fpu/s_nexttowardf.S
@@ -1,7 +1,7 @@
.file "nexttowardf.s"
-// Copyright (c) 2001 - 2003, Intel Corporation
+// Copyright (c) 2001 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2001 by the Intel Numerics Group, Intel Corporation
@@ -43,6 +43,7 @@
// 08/23/01 Corrected error tag number
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 12/14/04 Added error handling on underflow.
//
// API
//==============================================================
@@ -52,21 +53,21 @@
//
// Registers used
//==============================================================
-nexttoward_GR_max_pexp = r14
-nexttoward_GR_min_pexp = r15
-nexttoward_GR_exp = r16
-nexttoward_GR_sig = r17
-nexttoward_GR_lnorm_sig = r18
-nexttoward_GR_sign_mask = r19
-nexttoward_GR_exp_mask = r20
-nexttoward_GR_sden_sig = r21
-nexttoward_GR_new_sig = r22
-nexttoward_GR_new_exp = r23
-nexttoward_GR_lden_sig = r24
-nexttoward_GR_snorm_sig = r25
-nexttoward_GR_exp1 = r26
-nexttoward_GR_x_exp = r27
-nexttoward_GR_min_den_rexp = r28
+GR_max_pexp = r14
+GR_min_pexp = r15
+GR_exp = r16
+GR_sig = r17
+GR_lnorm_sig = r18
+GR_sign_mask = r19
+GR_exp_mask = r20
+GR_sden_sig = r21
+GR_new_sig = r22
+GR_new_exp = r23
+GR_lden_sig = r24
+GR_snorm_sig = r25
+GR_exp1 = r26
+GR_x_exp = r27
+GR_min_den_rexp = r28
// r36-39 parameters for libm_error_support
GR_SAVE_B0 = r34
@@ -76,20 +77,21 @@ GR_SAVE_PFS = r32
GR_Parameter_X = r36
GR_Parameter_Y = r37
GR_Parameter_RESULT = r38
-
-NEXTTOWARD_lnorm_sig = f10
-NEXTTOWARD_lnorm_exp = f11
-NEXTTOWARD_lnorm = f12
-NEXTTOWARD_sden_sig = f13
-NEXTTOWARD_sden_exp = f14
-NEXTTOWARD_sden = f15
-NEXTTOWARD_save_f8 = f33
-NEXTTOWARD_new_exp = f34
-NEXTTOWARD_new_sig = f35
-NEXTTOWARD_lden_sig = f36
-NEXTTOWARD_snorm_sig = f37
-NEXTTOWARD_exp1 = f38
-NEXTTOWARD_tmp = f39
+GR_Parameter_TAG = r39
+
+FR_lnorm_sig = f10
+FR_lnorm_exp = f11
+FR_lnorm = f12
+FR_sden_sig = f13
+FR_sden_exp = f14
+FR_sden = f15
+FR_save_f8 = f33
+FR_new_exp = f34
+FR_new_sig = f35
+FR_lden_sig = f36
+FR_snorm_sig = f37
+FR_exp1 = f38
+FR_tmp = f39
//
// Overview of operation
@@ -104,21 +106,21 @@ GLOBAL_LIBM_ENTRY(nexttowardf)
// Extract signexp from x
// Form smallest denormal significand = ulp size
{ .mlx
- getf.exp nexttoward_GR_exp = f8
- movl nexttoward_GR_sden_sig = 0x0000010000000000
+ getf.exp GR_exp = f8
+ movl GR_sden_sig = 0x0000010000000000
}
// Form largest normal exponent
// Is x < y ? p10 if yes, p11 if no
// Form smallest normal exponent
{ .mfi
- addl nexttoward_GR_max_pexp = 0x1007e, r0
+ addl GR_max_pexp = 0x1007e, r0
fcmp.lt.s1 p10,p11 = f8, f9
- addl nexttoward_GR_min_pexp = 0x0ff81, r0 ;;
+ addl GR_min_pexp = 0x0ff81, r0 ;;
}
// Is x=y?
{ .mfi
- getf.sig nexttoward_GR_sig = f8
+ getf.sig GR_sig = f8
fcmp.eq.s0 p6,p0 = f8, f9
nop.i 0
}
@@ -126,14 +128,14 @@ GLOBAL_LIBM_ENTRY(nexttowardf)
// Form largest normal significand
{ .mlx
nop.m 0
- movl nexttoward_GR_lnorm_sig = 0xffffff0000000000 ;;
+ movl GR_lnorm_sig = 0xffffff0000000000 ;;
}
// Move largest normal significand to fp reg for special cases
{ .mfi
- setf.sig NEXTTOWARD_lnorm_sig = nexttoward_GR_lnorm_sig
+ setf.sig FR_lnorm_sig = GR_lnorm_sig
nop.f 0
- addl nexttoward_GR_sign_mask = 0x20000, r0 ;;
+ addl GR_sign_mask = 0x20000, r0 ;;
}
// Move smallest denormal significand and signexp to fp regs
@@ -142,14 +144,14 @@ GLOBAL_LIBM_ENTRY(nexttowardf)
// It increases (p12 set) if x<y and x>=0 or if x>y and x<0
// It decreases (p13 set) if x<y and x<0 or if x>y and x>=0
{ .mfi
- setf.sig NEXTTOWARD_sden_sig = nexttoward_GR_sden_sig
+ setf.sig FR_sden_sig = GR_sden_sig
fclass.m p8,p0 = f8, 0xc3
-(p10) cmp.lt p12,p13 = nexttoward_GR_exp, nexttoward_GR_sign_mask
+(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask
}
{ .mfi
- setf.exp NEXTTOWARD_sden_exp = nexttoward_GR_min_pexp
+ setf.exp FR_sden_exp = GR_min_pexp
nop.f 999
-(p11) cmp.ge p12,p13 = nexttoward_GR_exp, nexttoward_GR_sign_mask ;;
+(p11) cmp.ge p12,p13 = GR_exp, GR_sign_mask ;;
}
.pred.rel "mutex",p12,p13
@@ -158,33 +160,33 @@ GLOBAL_LIBM_ENTRY(nexttowardf)
// If x=y set result to y
// Form smallest normal significand and largest denormal significand
{ .mfi
-(p12) add nexttoward_GR_new_sig = nexttoward_GR_sig, nexttoward_GR_sden_sig
+(p12) add GR_new_sig = GR_sig, GR_sden_sig
(p6) fnorm.s.s0 f8=f9 //Normalise
- dep.z nexttoward_GR_snorm_sig = 1,63,1 // 0x8000000000000000
+ dep.z GR_snorm_sig = 1,63,1 // 0x8000000000000000
}
{ .mlx
-(p13) sub nexttoward_GR_new_sig = nexttoward_GR_sig, nexttoward_GR_sden_sig
- movl nexttoward_GR_lden_sig = 0x7fffff0000000000 ;;
+(p13) sub GR_new_sig = GR_sig, GR_sden_sig
+ movl GR_lden_sig = 0x7fffff0000000000 ;;
}
// Move expected result significand and signexp to fp regs
// Is y=nan?
// Form new exponent in case result exponent needs incrementing or decrementing
{ .mfi
- setf.exp NEXTTOWARD_new_exp = nexttoward_GR_exp
+ setf.exp FR_new_exp = GR_exp
fclass.m p9,p0 = f9, 0xc3
-(p12) add nexttoward_GR_exp1 = 1, nexttoward_GR_exp
+(p12) add GR_exp1 = 1, GR_exp
}
{ .mib
- setf.sig NEXTTOWARD_new_sig = nexttoward_GR_new_sig
-(p13) add nexttoward_GR_exp1 = -1, nexttoward_GR_exp
+ setf.sig FR_new_sig = GR_new_sig
+(p13) add GR_exp1 = -1, GR_exp
(p6) br.ret.spnt b0 ;; // Exit if x=y
}
// Move largest normal signexp to fp reg for special cases
// Is x=zero?
{ .mfi
- setf.exp NEXTTOWARD_lnorm_exp = nexttoward_GR_max_pexp
+ setf.exp FR_lnorm_exp = GR_max_pexp
fclass.m p7,p0 = f8, 0x7
nop.i 999
}
@@ -197,12 +199,12 @@ GLOBAL_LIBM_ENTRY(nexttowardf)
// Move exp+-1 and smallest normal significand to fp regs for special cases
// Is x=inf?
{ .mfi
- setf.exp NEXTTOWARD_exp1 = nexttoward_GR_exp1
+ setf.exp FR_exp1 = GR_exp1
fclass.m p6,p0 = f8, 0x23
- addl nexttoward_GR_exp_mask = 0x1ffff, r0
+ addl GR_exp_mask = 0x1ffff, r0
}
{ .mfb
- setf.sig NEXTTOWARD_snorm_sig = nexttoward_GR_snorm_sig
+ setf.sig FR_snorm_sig = GR_snorm_sig
(p9) fma.s0 f8 = f8,f1,f9
(p9) br.ret.spnt b0 ;; // Exit if y=nan
}
@@ -210,16 +212,16 @@ GLOBAL_LIBM_ENTRY(nexttowardf)
// Move largest denormal significand to fp regs for special cases
// Save x
{ .mfb
- setf.sig NEXTTOWARD_lden_sig = nexttoward_GR_lden_sig
- mov NEXTTOWARD_save_f8 = f8
-(p7) br.cond.spnt NEXTTOWARD_ZERO ;; // Exit if x=0
+ setf.sig FR_lden_sig = GR_lden_sig
+ mov FR_save_f8 = f8
+(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0
}
// Mask off the sign to get x_exp
{ .mfb
- and nexttoward_GR_x_exp = nexttoward_GR_exp_mask, nexttoward_GR_exp
+ and GR_x_exp = GR_exp_mask, GR_exp
nop.f 999
-(p6) br.cond.spnt NEXTTOWARD_INF ;; // Exit if x=inf
+(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf
}
// Check 6 special cases when significand rolls over:
@@ -238,35 +240,35 @@ GLOBAL_LIBM_ENTRY(nexttowardf)
//
// Form exponent of smallest float denormal (if normalized register format)
{ .mmi
- adds nexttoward_GR_min_den_rexp = -23, nexttoward_GR_min_pexp
-(p12) cmp.eq.unc p6,p0 = nexttoward_GR_new_sig, r0
-(p13) cmp.eq.unc p8,p10 = nexttoward_GR_new_sig, nexttoward_GR_lden_sig ;;
+ adds GR_min_den_rexp = -23, GR_min_pexp
+(p12) cmp.eq.unc p6,p0 = GR_new_sig, r0
+(p13) cmp.eq.unc p8,p10 = GR_new_sig, GR_lden_sig ;;
}
{ .mmi
-(p6) cmp.lt.unc p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_max_pexp
-(p8) cmp.gt.unc p8,p9 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp
-(p10) cmp.eq.unc p10,p0 = nexttoward_GR_new_sig, r0 ;;
+(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp
+(p8) cmp.gt.unc p8,p9 = GR_x_exp, GR_min_pexp
+(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0 ;;
}
// Create small normal in case need to generate underflow flag
{ .mfi
-(p10) cmp.le.unc p10,p0 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp
- fmerge.se NEXTTOWARD_tmp = NEXTTOWARD_sden_exp, NEXTTOWARD_lnorm_sig
-(p9) cmp.gt.unc p9,p14 = nexttoward_GR_x_exp, nexttoward_GR_min_den_rexp
+(p10) cmp.le.unc p10,p0 = GR_x_exp, GR_min_pexp
+ fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig
+(p9) cmp.gt.unc p9,p14 = GR_x_exp, GR_min_den_rexp
}
// Branch if cases 1, 2, 3
{ .bbb
-(p6) br.cond.spnt NEXTTOWARD_EXPUP
-(p7) br.cond.spnt NEXTTOWARD_OVERFLOW
-(p8) br.cond.spnt NEXTTOWARD_EXPDOWN ;;
+(p6) br.cond.spnt NEXT_EXPUP
+(p7) br.cond.spnt NEXT_OVERFLOW
+(p8) br.cond.spnt NEXT_EXPDOWN ;;
}
// Branch if cases 4, 5, 6
{ .bbb
-(p9) br.cond.spnt NEXTTOWARD_NORM_TO_DENORM
-(p10) br.cond.spnt NEXTTOWARD_UNDERFLOW_TO_ZERO
-(p14) br.cond.spnt NEXTTOWARD_UNDERFLOW_TO_ZERO ;;
+(p9) br.cond.spnt NEXT_NORM_TO_DENORM
+(p10) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO
+(p14) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO ;;
}
// Here if no special cases
@@ -274,68 +276,72 @@ GLOBAL_LIBM_ENTRY(nexttowardf)
// Case 1: x_exp=min_exp, x_sig=unnormalized
// Case 2: x_exp<min_exp
{ .mfi
- cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp
- fmerge.se f8 = NEXTTOWARD_new_exp, NEXTTOWARD_new_sig
+ cmp.lt p6,p7 = GR_x_exp, GR_min_pexp
+ fmerge.se f8 = FR_new_exp, FR_new_sig
nop.i 999 ;;
}
{ .mfi
nop.m 999
nop.f 999
-(p7) tbit.z p6,p0 = nexttoward_GR_new_sig, 63 ;;
+(p7) tbit.z p6,p0 = GR_new_sig, 63 ;;
}
-NEXTTOWARD_COMMON_FINISH:
+NEXT_COMMON_FINISH:
// Force underflow and inexact if denormal result
{ .mfi
nop.m 999
-(p6) fma.s.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0
- nop.i 999 ;;
+(p6) fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fnorm.s.s0 f8 = f8 // Final normalization to result precision
+(p6) br.cond.spnt NEXT_UNDERFLOW ;;
}
-// Final normalization to result precision and exit
{ .mfb
nop.m 999
- fnorm.s.s0 f8 = f8
+ nop.f 999
br.ret.sptk b0;;
}
//Special cases
-NEXTTOWARD_EXPUP:
+NEXT_EXPUP:
{ .mfb
- cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp
- fmerge.se f8 = NEXTTOWARD_exp1, NEXTTOWARD_snorm_sig
- br.cond.sptk NEXTTOWARD_COMMON_FINISH ;;
+ cmp.lt p6,p7 = GR_x_exp, GR_min_pexp
+ fmerge.se f8 = FR_exp1, FR_snorm_sig
+ br.cond.sptk NEXT_COMMON_FINISH ;;
}
-NEXTTOWARD_EXPDOWN:
+NEXT_EXPDOWN:
{ .mfb
- cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp
- fmerge.se f8 = NEXTTOWARD_exp1, NEXTTOWARD_lnorm_sig
- br.cond.sptk NEXTTOWARD_COMMON_FINISH ;;
+ cmp.lt p6,p7 = GR_x_exp, GR_min_pexp
+ fmerge.se f8 = FR_exp1, FR_lnorm_sig
+ br.cond.sptk NEXT_COMMON_FINISH ;;
}
-NEXTTOWARD_NORM_TO_DENORM:
+NEXT_NORM_TO_DENORM:
{ .mfi
nop.m 999
- fmerge.se f8 = NEXTTOWARD_new_exp, NEXTTOWARD_lden_sig
+ fmerge.se f8 = FR_new_exp, FR_lden_sig
nop.i 999
}
// Force underflow and inexact
{ .mfb
nop.m 999
- fma.s.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0
- br.ret.sptk b0 ;;
+ fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0
+ br.cond.sptk NEXT_UNDERFLOW ;;
}
-NEXTTOWARD_UNDERFLOW_TO_ZERO:
+NEXT_UNDERFLOW_TO_ZERO:
{ .mfb
cmp.eq p6,p0 = r0,r0
- fmerge.s f8 = NEXTTOWARD_save_f8,f0
- br.cond.sptk NEXTTOWARD_COMMON_FINISH ;;
+ fmerge.s f8 = FR_save_f8,f0
+ br.cond.sptk NEXT_COMMON_FINISH ;;
}
-NEXTTOWARD_INF:
+NEXT_INF:
// Here if f8 is +- infinity
// INF
// if f8 is +inf, no matter what y is return largest float
@@ -343,17 +349,17 @@ NEXTTOWARD_INF:
{ .mfi
nop.m 999
- fmerge.se NEXTTOWARD_lnorm = NEXTTOWARD_lnorm_exp,NEXTTOWARD_lnorm_sig
+ fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig
nop.i 999 ;;
}
{ .mfb
nop.m 999
- fmerge.s f8 = f8,NEXTTOWARD_lnorm
+ fmerge.s f8 = f8,FR_lnorm
br.ret.sptk b0 ;;
}
-NEXTTOWARD_ZERO:
+NEXT_ZERO:
// Here if f8 is +- zero
// ZERO
@@ -362,76 +368,72 @@ NEXTTOWARD_ZERO:
{ .mfi
nop.m 999
- fmerge.se NEXTTOWARD_sden = NEXTTOWARD_sden_exp,NEXTTOWARD_sden_sig
+ fmerge.se FR_sden = FR_sden_exp,FR_sden_sig
nop.i 999 ;;
}
// Create small normal to generate underflow flag
{ .mfi
nop.m 999
- fmerge.se NEXTTOWARD_tmp = NEXTTOWARD_sden_exp, NEXTTOWARD_lnorm_sig
+ fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig
nop.i 999 ;;
}
// Add correct sign from direction arg
{ .mfi
nop.m 999
- fmerge.s f8 = f9,NEXTTOWARD_sden
- nop.i 999;;
+ fmerge.s f8 = f9,FR_sden
+ nop.i 999 ;;
}
// Force underflow and inexact flags
{ .mfb
nop.m 999
- fma.s.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0
- br.ret.sptk b0 ;;
+ fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0
+ br.cond.sptk NEXT_UNDERFLOW ;;
}
-GLOBAL_LIBM_END(nexttowardf)
-// Stack operations when calling error support.
-// (1) (2) (3) (call) (4)
-// sp -> + psp -> + psp -> + sp -> +
-// | | | |
-// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
-// | | | |
-// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
-// | | | |
-// | | <- GR_X X1 ->| |
-// | | | |
-// sp-64 -> + sp -> + sp -> + +
-// save ar.pfs save b0 restore gp
-// save gp restore ar.pfs
-
-
+NEXT_UNDERFLOW:
+// Here if result is a denorm, or input is finite and result is zero
+// Call error support to report possible range error
+{ .mib
+ alloc r32=ar.pfs,2,2,4,0
+ mov GR_Parameter_TAG = 272 // Error code
+ br.cond.sptk __libm_error_region // Branch to error call
+}
+;;
-LOCAL_LIBM_ENTRY(__libm_error_region)
-NEXTTOWARD_OVERFLOW:
-// Here if f8 is finite, but result will be infinite
+NEXT_OVERFLOW:
+// Here if input is finite, but result will be infinite
// Use frcpa to generate infinity of correct sign
// Call error support to report possible range error
-.prologue
-
{ .mfi
alloc r32=ar.pfs,2,2,4,0
- frcpa.s1 f8,p6 = NEXTTOWARD_save_f8, f0
- nop.i 999
+ frcpa.s1 f8,p6 = FR_save_f8, f0
+ nop.i 999 ;;
}
-// Create largest float
+// Create largest double
{ .mfi
nop.m 999
- fmerge.se NEXTTOWARD_lnorm = NEXTTOWARD_lnorm_exp,NEXTTOWARD_lnorm_sig
+ fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig
nop.i 999 ;;
}
// Force overflow and inexact flags to be set
-{ .mfi
- mov r39 = 200 // Error code
- fma.s.s0 NEXTTOWARD_tmp = NEXTTOWARD_lnorm,NEXTTOWARD_lnorm,f0
- nop.i 999
+{ .mfb
+ mov GR_Parameter_TAG = 200 // Error code
+ fma.s.s0 FR_tmp = FR_lnorm,FR_lnorm,f0
+ br.cond.sptk __libm_error_region // Branch to error call
}
;;
+GLOBAL_LIBM_END(nexttowardf)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+
// (1)
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@@ -458,7 +460,7 @@ NEXTTOWARD_OVERFLOW:
.body
// (3)
{ .mib
- stfs [GR_Parameter_X] = NEXTTOWARD_save_f8 // STORE Parameter 1 on stack
+ stfs [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
diff --git a/sysdeps/ia64/fpu/s_nexttowardl.S b/sysdeps/ia64/fpu/s_nexttowardl.S
index 9c79f2c..fa2db12 100644
--- a/sysdeps/ia64/fpu/s_nexttowardl.S
+++ b/sysdeps/ia64/fpu/s_nexttowardl.S
@@ -1,7 +1,7 @@
.file "nexttowardl.s"
-// Copyright (c) 2001 - 2003, Intel Corporation
+// Copyright (c) 2001 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2001 by the Intel Numerics Group, Intel Corporation
@@ -43,6 +43,7 @@
// 08/23/01 Corrected error tag number
// 05/20/02 Cleaned up namespace and sf0 syntax
// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 12/14/04 Added error handling on underflow.
//
// API
//==============================================================
@@ -52,20 +53,20 @@
//
// Registers used
//==============================================================
-nexttoward_GR_max_pexp = r14
-nexttoward_GR_min_pexp = r15
-nexttoward_GR_exp = r16
-nexttoward_GR_sig = r17
-nexttoward_GR_lnorm_sig = r18
-nexttoward_GR_sign_mask = r19
-nexttoward_GR_exp_mask = r20
-nexttoward_GR_sden_sig = r21
-nexttoward_GR_new_sig = r22
-nexttoward_GR_new_exp = r23
-nexttoward_GR_lden_sig = r24
-nexttoward_GR_snorm_sig = r25
-nexttoward_GR_exp1 = r26
-nexttoward_GR_x_exp = r27
+GR_max_pexp = r14
+GR_min_pexp = r15
+GR_exp = r16
+GR_sig = r17
+GR_lnorm_sig = r18
+GR_sign_mask = r19
+GR_exp_mask = r20
+GR_sden_sig = r21
+GR_new_sig = r22
+GR_new_exp = r23
+GR_lden_sig = r24
+GR_snorm_sig = r25
+GR_exp1 = r26
+GR_x_exp = r27
// r36-39 parameters for libm_error_support
GR_SAVE_B0 = r34
@@ -75,21 +76,22 @@ GR_SAVE_PFS = r32
GR_Parameter_X = r36
GR_Parameter_Y = r37
GR_Parameter_RESULT = r38
-
-NEXTTOWARD_lnorm_sig = f10
-NEXTTOWARD_lnorm_exp = f11
-NEXTTOWARD_lnorm = f12
-NEXTTOWARD_sden_sig = f13
-NEXTTOWARD_den_exp = f14
-NEXTTOWARD_sden = f15
-NEXTTOWARD_snorm_exp = f32
-NEXTTOWARD_save_f8 = f33
-NEXTTOWARD_new_exp = f34
-NEXTTOWARD_new_sig = f35
-NEXTTOWARD_lden_sig = f36
-NEXTTOWARD_snorm_sig = f37
-NEXTTOWARD_exp1 = f38
-NEXTTOWARD_tmp = f39
+GR_Parameter_TAG = r39
+
+FR_lnorm_sig = f10
+FR_lnorm_exp = f11
+FR_lnorm = f12
+FR_sden_sig = f13
+FR_den_exp = f14
+FR_sden = f15
+FR_snorm_exp = f32
+FR_save_f8 = f33
+FR_new_exp = f34
+FR_new_sig = f35
+FR_lden_sig = f36
+FR_snorm_sig = f37
+FR_exp1 = f38
+FR_tmp = f39
//
// Overview of operation
@@ -105,31 +107,31 @@ GLOBAL_LIBM_ENTRY(nexttowardl)
// Is x < y ? p10 if yes, p11 if no
// Form smallest denormal significand = ulp size
{ .mfi
- getf.exp nexttoward_GR_exp = f8
+ getf.exp GR_exp = f8
fcmp.lt.s1 p10,p11 = f8, f9
- addl nexttoward_GR_sden_sig = 0x1, r0
+ addl GR_sden_sig = 0x1, r0
}
// Form largest normal significand 0xffffffffffffffff
// Form smallest normal exponent
{ .mfi
- addl nexttoward_GR_lnorm_sig = -0x1,r0
+ addl GR_lnorm_sig = -0x1,r0
nop.f 999
- addl nexttoward_GR_min_pexp = 0x0c001, r0 ;;
+ addl GR_min_pexp = 0x0c001, r0 ;;
}
// Extract significand from x
// Is x=y? This fcmp also sets Invalid and Denormal if required
// Form largest normal exponent
{ .mfi
- getf.sig nexttoward_GR_sig = f8
+ getf.sig GR_sig = f8
fcmp.eq.s0 p6,p0 = f8, f9
- addl nexttoward_GR_max_pexp = 0x13ffe, r0
+ addl GR_max_pexp = 0x13ffe, r0
}
// Move largest normal significand to fp reg for special cases
{ .mfi
- setf.sig NEXTTOWARD_lnorm_sig = nexttoward_GR_lnorm_sig
+ setf.sig FR_lnorm_sig = GR_lnorm_sig
nop.f 999
- addl nexttoward_GR_sign_mask = 0x20000, r0 ;;
+ addl GR_sign_mask = 0x20000, r0 ;;
}
// Move smallest denormal significand and exp to fp regs
@@ -138,15 +140,15 @@ GLOBAL_LIBM_ENTRY(nexttowardl)
// It increases (p12 set) if x<y and x>=0 or if x>y and x<0
// It decreases (p13 set) if x<y and x<0 or if x>y and x>=0
{ .mfi
- setf.sig NEXTTOWARD_sden_sig = nexttoward_GR_sden_sig
+ setf.sig FR_sden_sig = GR_sden_sig
fclass.m p8,p0 = f8, 0xc3
-(p10) cmp.lt p12,p13 = nexttoward_GR_exp, nexttoward_GR_sign_mask
+(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask
}
// Move smallest normal exp to fp regs
{ .mfi
- setf.exp NEXTTOWARD_snorm_exp = nexttoward_GR_min_pexp
+ setf.exp FR_snorm_exp = GR_min_pexp
nop.f 999
-(p11) cmp.ge p12,p13 = nexttoward_GR_exp, nexttoward_GR_sign_mask ;;
+(p11) cmp.ge p12,p13 = GR_exp, GR_sign_mask ;;
}
.pred.rel "mutex",p12,p13
@@ -155,38 +157,38 @@ GLOBAL_LIBM_ENTRY(nexttowardl)
// If x=y set result to y
// Form smallest normal significand and largest denormal significand
{ .mfi
-(p12) add nexttoward_GR_new_sig = nexttoward_GR_sig, nexttoward_GR_sden_sig
+(p12) add GR_new_sig = GR_sig, GR_sden_sig
(p6) fmerge.s f8=f9,f9
- dep.z nexttoward_GR_snorm_sig = 1,63,1 // 0x8000000000000000
+ dep.z GR_snorm_sig = 1,63,1 // 0x8000000000000000
}
{ .mlx
-(p13) sub nexttoward_GR_new_sig = nexttoward_GR_sig, nexttoward_GR_sden_sig
- movl nexttoward_GR_lden_sig = 0x7fffffffffffffff ;;
+(p13) sub GR_new_sig = GR_sig, GR_sden_sig
+ movl GR_lden_sig = 0x7fffffffffffffff ;;
}
// Move expected result significand and signexp to fp regs
// Is y=nan?
// Form new exponent in case result exponent needs incrementing or decrementing
{ .mfi
- setf.exp NEXTTOWARD_new_exp = nexttoward_GR_exp
+ setf.exp FR_new_exp = GR_exp
fclass.m p9,p0 = f9, 0xc3
-(p12) add nexttoward_GR_exp1 = 1, nexttoward_GR_exp
+(p12) add GR_exp1 = 1, GR_exp
}
{ .mib
- setf.sig NEXTTOWARD_new_sig = nexttoward_GR_new_sig
-(p13) add nexttoward_GR_exp1 = -1, nexttoward_GR_exp
+ setf.sig FR_new_sig = GR_new_sig
+(p13) add GR_exp1 = -1, GR_exp
(p6) br.ret.spnt b0 ;; // Exit if x=y
}
// Move largest normal signexp to fp reg for special cases
// Is x=zero?
{ .mfi
- setf.exp NEXTTOWARD_lnorm_exp = nexttoward_GR_max_pexp
+ setf.exp FR_lnorm_exp = GR_max_pexp
fclass.m p7,p0 = f8, 0x7
nop.i 999
}
{ .mfb
- setf.exp NEXTTOWARD_den_exp = nexttoward_GR_min_pexp
+ setf.exp FR_den_exp = GR_min_pexp
(p8) fma.s0 f8 = f8,f1,f9
(p8) br.ret.spnt b0 ;; // Exit if x=nan
}
@@ -194,12 +196,12 @@ GLOBAL_LIBM_ENTRY(nexttowardl)
// Move exp+-1 and smallest normal significand to fp regs for special cases
// Is x=inf?
{ .mfi
- setf.exp NEXTTOWARD_exp1 = nexttoward_GR_exp1
+ setf.exp FR_exp1 = GR_exp1
fclass.m p6,p0 = f8, 0x23
- addl nexttoward_GR_exp_mask = 0x1ffff, r0
+ addl GR_exp_mask = 0x1ffff, r0
}
{ .mfb
- setf.sig NEXTTOWARD_snorm_sig = nexttoward_GR_snorm_sig
+ setf.sig FR_snorm_sig = GR_snorm_sig
(p9) fma.s0 f8 = f8,f1,f9
(p9) br.ret.spnt b0 ;; // Exit if y=nan
}
@@ -207,16 +209,16 @@ GLOBAL_LIBM_ENTRY(nexttowardl)
// Move largest denormal significand to fp regs for special cases
// Save x
{ .mfb
- setf.sig NEXTTOWARD_lden_sig = nexttoward_GR_lden_sig
- mov NEXTTOWARD_save_f8 = f8
-(p7) br.cond.spnt NEXTTOWARD_ZERO ;; // Exit if x=0
+ setf.sig FR_lden_sig = GR_lden_sig
+ mov FR_save_f8 = f8
+(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0
}
// Mask off the sign to get x_exp
{ .mfb
- and nexttoward_GR_x_exp = nexttoward_GR_exp_mask, nexttoward_GR_exp
+ and GR_x_exp = GR_exp_mask, GR_exp
nop.f 999
-(p6) br.cond.spnt NEXTTOWARD_INF ;; // Exit if x=inf
+(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf
}
// Check 5 special cases when significand rolls over:
@@ -232,37 +234,37 @@ GLOBAL_LIBM_ENTRY(nexttowardl)
// Set p10, result is zero, sign of x, signal underflow and inexact
//
{ .mmi
-(p12) cmp.eq.unc p6,p0 = nexttoward_GR_new_sig, r0
-(p13) cmp.eq.unc p9,p10 = nexttoward_GR_new_sig, nexttoward_GR_lden_sig
+(p12) cmp.eq.unc p6,p0 = GR_new_sig, r0
+(p13) cmp.eq.unc p9,p10 = GR_new_sig, GR_lden_sig
nop.i 999
;;
}
{ .mmi
-(p6) cmp.lt.unc p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_max_pexp
-(p10) cmp.eq.unc p10,p0 = nexttoward_GR_new_sig, r0
-(p9) cmp.le.unc p9,p8 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp
+(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp
+(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0
+(p9) cmp.le.unc p9,p8 = GR_x_exp, GR_min_pexp
;;
}
// Create small normal in case need to generate underflow flag
{ .mfi
nop.m 999
- fmerge.se NEXTTOWARD_tmp = NEXTTOWARD_snorm_exp, NEXTTOWARD_lnorm_sig
+ fmerge.se FR_tmp = FR_snorm_exp, FR_lnorm_sig
nop.i 999
}
// Branch if cases 1, 2, 3
{ .bbb
-(p6) br.cond.spnt NEXTTOWARD_EXPUP
-(p7) br.cond.spnt NEXTTOWARD_OVERFLOW
-(p8) br.cond.spnt NEXTTOWARD_EXPDOWN ;;
+(p6) br.cond.spnt NEXT_EXPUP
+(p7) br.cond.spnt NEXT_OVERFLOW
+(p8) br.cond.spnt NEXT_EXPDOWN ;;
}
// Branch if cases 4, 5
{ .mbb
nop.m 999
-(p9) br.cond.spnt NEXTTOWARD_NORM_TO_DENORM
-(p10) br.cond.spnt NEXTTOWARD_UNDERFLOW_TO_ZERO
+(p9) br.cond.spnt NEXT_NORM_TO_DENORM
+(p10) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO
;;
}
@@ -271,68 +273,72 @@ GLOBAL_LIBM_ENTRY(nexttowardl)
// Case 1: x_exp=min_exp, x_sig=unnormalized
// Case 2: x_exp<min_exp
{ .mfi
- cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp
- fmerge.se f8 = NEXTTOWARD_new_exp, NEXTTOWARD_new_sig
+ cmp.lt p6,p7 = GR_x_exp, GR_min_pexp
+ fmerge.se f8 = FR_new_exp, FR_new_sig
nop.i 999 ;;
}
{ .mfi
nop.m 999
nop.f 999
-(p6) tbit.z p6,p0 = nexttoward_GR_new_sig, 63 ;;
+(p6) tbit.z p6,p0 = GR_new_sig, 63 ;;
}
-NEXTTOWARD_COMMON_FINISH:
+NEXT_COMMON_FINISH:
// Force underflow and inexact if denormal result
{ .mfi
nop.m 999
-(p6) fma.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0
- nop.i 999 ;;
+(p6) fma.s0 FR_tmp = FR_tmp,FR_tmp,f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fnorm.s0 f8 = f8 // Final normalization to result precision
+(p6) br.cond.spnt NEXT_UNDERFLOW ;;
}
-// Final normalization to result precision and exit
{ .mfb
nop.m 999
- fnorm.s0 f8 = f8
+ nop.f 999
br.ret.sptk b0;;
}
//Special cases
-NEXTTOWARD_EXPUP:
+NEXT_EXPUP:
{ .mfb
- cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp
- fmerge.se f8 = NEXTTOWARD_exp1, NEXTTOWARD_snorm_sig
- br.cond.sptk NEXTTOWARD_COMMON_FINISH ;;
+ cmp.lt p6,p7 = GR_x_exp, GR_min_pexp
+ fmerge.se f8 = FR_exp1, FR_snorm_sig
+ br.cond.sptk NEXT_COMMON_FINISH ;;
}
-NEXTTOWARD_EXPDOWN:
+NEXT_EXPDOWN:
{ .mfb
- cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp
- fmerge.se f8 = NEXTTOWARD_exp1, NEXTTOWARD_lnorm_sig
- br.cond.sptk NEXTTOWARD_COMMON_FINISH ;;
+ cmp.lt p6,p7 = GR_x_exp, GR_min_pexp
+ fmerge.se f8 = FR_exp1, FR_lnorm_sig
+ br.cond.sptk NEXT_COMMON_FINISH ;;
}
-NEXTTOWARD_NORM_TO_DENORM:
+NEXT_NORM_TO_DENORM:
{ .mfi
nop.m 999
- fmerge.se f8 = NEXTTOWARD_exp1, NEXTTOWARD_lden_sig
+ fmerge.se f8 = FR_exp1, FR_lden_sig
nop.i 999
}
// Force underflow and inexact
{ .mfb
nop.m 999
- fma.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0
- br.ret.sptk b0 ;;
+ fma.s0 FR_tmp = FR_tmp,FR_tmp,f0
+ br.cond.sptk NEXT_UNDERFLOW ;;
}
-NEXTTOWARD_UNDERFLOW_TO_ZERO:
+NEXT_UNDERFLOW_TO_ZERO:
{ .mfb
cmp.eq p6,p0 = r0,r0
- fmerge.s f8 = NEXTTOWARD_save_f8,f0
- br.cond.sptk NEXTTOWARD_COMMON_FINISH ;;
+ fmerge.s f8 = FR_save_f8,f0
+ br.cond.sptk NEXT_COMMON_FINISH ;;
}
-NEXTTOWARD_INF:
+NEXT_INF:
// Here if f8 is +- infinity
// INF
// if f8 is +inf, no matter what y is return largest long double
@@ -341,17 +347,17 @@ NEXTTOWARD_INF:
// Create largest long double
{ .mfi
nop.m 999
- fmerge.se NEXTTOWARD_lnorm = NEXTTOWARD_lnorm_exp,NEXTTOWARD_lnorm_sig
+ fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig
nop.i 999 ;;
}
{ .mfb
nop.m 999
- fmerge.s f8 = f8,NEXTTOWARD_lnorm
+ fmerge.s f8 = f8,FR_lnorm
br.ret.sptk b0 ;;
}
-NEXTTOWARD_ZERO:
+NEXT_ZERO:
// Here if f8 is +- zero
// ZERO
@@ -360,76 +366,72 @@ NEXTTOWARD_ZERO:
{ .mfi
nop.m 999
- fmerge.se NEXTTOWARD_sden = f0,NEXTTOWARD_sden_sig
+ fmerge.se FR_sden = f0,FR_sden_sig
nop.i 999 ;;
}
// Create small normal to generate underflow flag
{ .mfi
nop.m 999
- fmerge.se NEXTTOWARD_tmp = NEXTTOWARD_snorm_exp, NEXTTOWARD_lnorm_sig
+ fmerge.se FR_tmp = FR_snorm_exp, FR_lnorm_sig
nop.i 999 ;;
}
// Add correct sign from direction arg
{ .mfi
nop.m 999
- fmerge.s f8 = f9,NEXTTOWARD_sden
+ fmerge.s f8 = f9,FR_sden
nop.i 999 ;;
}
// Force underflow and inexact flags
{ .mfb
nop.m 999
- fma.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0
- br.ret.sptk b0 ;;
+ fma.s0 FR_tmp = FR_tmp,FR_tmp,f0
+ br.cond.sptk NEXT_UNDERFLOW ;;
}
-GLOBAL_LIBM_END(nexttowardl)
-// Stack operations when calling error support.
-// (1) (2) (3) (call) (4)
-// sp -> + psp -> + psp -> + sp -> +
-// | | | |
-// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
-// | | | |
-// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
-// | | | |
-// | | <- GR_X X1 ->| |
-// | | | |
-// sp-64 -> + sp -> + sp -> + +
-// save ar.pfs save b0 restore gp
-// save gp restore ar.pfs
-
-
+NEXT_UNDERFLOW:
+// Here if result is a denorm, or input is finite and result is zero
+// Call error support to report possible range error
+{ .mib
+ alloc r32=ar.pfs,2,2,4,0
+ mov GR_Parameter_TAG = 270 // Error code
+ br.cond.sptk __libm_error_region // Branch to error call
+}
+;;
-LOCAL_LIBM_ENTRY(__libm_error_region)
-NEXTTOWARD_OVERFLOW:
-// Here if f8 is finite, but result will be infinite
+NEXT_OVERFLOW:
+// Here if input is finite, but result will be infinite
// Use frcpa to generate infinity of correct sign
// Call error support to report possible range error
-.prologue
-
{ .mfi
alloc r32=ar.pfs,2,2,4,0
- frcpa.s1 f8,p6 = NEXTTOWARD_save_f8, f0
+ frcpa.s1 f8,p6 = FR_save_f8, f0
nop.i 999 ;;
}
-// Create largest long double
+// Create largest double
{ .mfi
nop.m 999
- fmerge.se NEXTTOWARD_lnorm = NEXTTOWARD_lnorm_exp,NEXTTOWARD_lnorm_sig
+ fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig
nop.i 999 ;;
}
// Force overflow and inexact flags to be set
-{ .mfi
- mov r39 = 198 // Error code
- fma.s0 NEXTTOWARD_tmp = NEXTTOWARD_lnorm,NEXTTOWARD_lnorm,f0
- nop.i 999
+{ .mfb
+ mov GR_Parameter_TAG = 198 // Error code
+ fma.s0 FR_tmp = FR_lnorm,FR_lnorm,f0
+ br.cond.sptk __libm_error_region // Branch to error call
}
;;
+GLOBAL_LIBM_END(nexttowardl)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+
// (1)
{ .mfi
add GR_Parameter_Y=-32,sp // Parameter 2 value
@@ -456,7 +458,7 @@ NEXTTOWARD_OVERFLOW:
.body
// (3)
{ .mib
- stfe [GR_Parameter_X] = NEXTTOWARD_save_f8 // STORE Parameter 1 on stack
+ stfe [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
diff --git a/sysdeps/ia64/fpu/s_round.S b/sysdeps/ia64/fpu/s_round.S
index 04033b4..ed5ffae 100644
--- a/sysdeps/ia64/fpu/s_round.S
+++ b/sysdeps/ia64/fpu/s_round.S
@@ -44,6 +44,7 @@
// 05/20/02 Cleaned up namespace and sf0 syntax
// 01/20/03 Improved performance and reduced code size
// 04/18/03 Eliminate possible WAW dependency warning
+// 09/03/03 Improved performance
//==============================================================
// API
@@ -52,14 +53,13 @@
//==============================================================
// general input registers:
-// r14 - r19
+// r14 - r18
rSignexp = r14
rExp = r15
rExpMask = r16
rBigexp = r17
rExpHalf = r18
-rExpMHalf = r19
// floating-point registers:
// f8 - f13
@@ -67,7 +67,7 @@ rExpMHalf = r19
fXtruncInt = f9
fNormX = f10
fHalf = f11
-fMHalf = f12
+fInc = f12
fRem = f13
// predicate registers used:
@@ -119,15 +119,15 @@ GLOBAL_LIBM_ENTRY(round)
}
;;
-{ .mmf
+{ .mfi
setf.exp fHalf = rExpHalf // Form 0.5
- mov rExpMHalf = 0x2FFFE // Form sign and exponent of -0.5
fclass.m p7,p0 = f8, 0x0b // Test x unorm
+ nop.i 0
}
;;
{ .mfb
- setf.exp fMHalf = rExpMHalf // Form -0.5
+ nop.m 0
fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf
(p7) br.cond.spnt ROUND_UNORM // Branch if x unorm
}
@@ -135,27 +135,31 @@ GLOBAL_LIBM_ENTRY(round)
ROUND_COMMON:
// Return here from ROUND_UNORM
-{ .mfi
+{ .mfb
nop.m 0
fcmp.lt.s1 p8,p9 = f8, f0 // Test if x < 0
+(p6) br.cond.spnt ROUND_SPECIAL // Exit if x natval, nan, inf
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcvt.xf f8 = fXtruncInt // Pre-Result if 0.5 <= |x| < 2^52
nop.i 0
}
-{ .mfb
+;;
+
+{ .mfi
and rExp = rSignexp, rExpMask // Get biased exponent
-(p6) fma.d.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf
-(p6) br.ret.spnt b0 // Exit if x natval, nan, inf
+ fmerge.s fInc = fNormX, f1 // Form increment if |rem| >= 0.5
+ nop.i 0
}
;;
-{ .mfi
+{ .mmi
cmp.lt p6,p0 = rExp, rExpHalf // Is |x| < 0.5?
- fcvt.xf f8 = fXtruncInt // Pre-Result if 0.5 <= |x| < 2^52
cmp.ge p7,p0 = rExp, rBigexp // Is |x| >= 2^52?
-}
-{ .mfi
cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5?
- nop.f 0
- nop.i 0
}
;;
@@ -176,44 +180,52 @@ ROUND_COMMON:
// Here if 0.5 <= |x| < 2^52
{ .mfi
nop.m 0
- fms.s1 fRem = fNormX, f1, f8 // Get remainder = x - trunc(x)
+(p9) fms.s1 fRem = fNormX, f1, f8 // Get remainder = x - trunc(x)
nop.i 0
}
-;;
-
{ .mfi
nop.m 0
-(p8) fcmp.le.s1 p8,p0 = fRem, fMHalf
+(p8) fms.s1 fRem = f8, f1, fNormX // Get remainder = trunc(x) - x
nop.i 0
}
+;;
+
{ .mfi
nop.m 0
-(p9) fcmp.ge.s1 p9,p0 = fRem, fHalf
+ fcmp.ge.s1 p9,p0 = fRem, fHalf // Test |rem| >= 0.5
nop.i 0
}
;;
// If x < 0 and remainder <= -0.5, then subtract 1 from result
// If x > 0 and remainder >= +0.5, then add 1 to result
-.pred.rel "mutex",p8,p9
-{ .mfi
+{ .mfb
nop.m 0
-(p8) fms.d.s0 f8 = f8, f1, f1
- nop.i 0
+(p9) fma.d.s0 f8 = f8, f1, fInc
+ br.ret.sptk b0
}
+;;
+
+
+ROUND_SPECIAL:
+// Here if x natval, nan, inf
{ .mfb
nop.m 0
-(p9) fma.d.s0 f8 = f8, f1, f1
+ fma.d.s0 f8 = f8, f1, f0
br.ret.sptk b0
}
;;
-
ROUND_UNORM:
// Here if x unorm
-{ .mfb
+{ .mfi
getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ fcvt.fx.trunc.s1 fXtruncInt = fNormX // Convert to int in significand
br.cond.sptk ROUND_COMMON // Return to main path
}
;;
diff --git a/sysdeps/ia64/fpu/s_roundf.S b/sysdeps/ia64/fpu/s_roundf.S
index 1e8dc78..7cec860 100644
--- a/sysdeps/ia64/fpu/s_roundf.S
+++ b/sysdeps/ia64/fpu/s_roundf.S
@@ -44,6 +44,7 @@
// 05/20/02 Cleaned up namespace and sf0 syntax
// 01/20/03 Improved performance and reduced code size
// 04/18/03 Eliminate possible WAW dependency warning
+// 09/03/03 Improved performance
//==============================================================
// API
@@ -52,14 +53,13 @@
//==============================================================
// general input registers:
-// r14 - r19
+// r14 - r18
rSignexp = r14
rExp = r15
rExpMask = r16
rBigexp = r17
rExpHalf = r18
-rExpMHalf = r19
// floating-point registers:
// f8 - f13
@@ -67,7 +67,7 @@ rExpMHalf = r19
fXtruncInt = f9
fNormX = f10
fHalf = f11
-fMHalf = f12
+fInc = f12
fRem = f13
// predicate registers used:
@@ -119,15 +119,15 @@ GLOBAL_LIBM_ENTRY(roundf)
}
;;
-{ .mmf
+{ .mfi
setf.exp fHalf = rExpHalf // Form 0.5
- mov rExpMHalf = 0x2FFFE // Form sign and exponent of -0.5
fclass.m p7,p0 = f8, 0x0b // Test x unorm
+ nop.i 0
}
;;
{ .mfb
- setf.exp fMHalf = rExpMHalf // Form -0.5
+ nop.m 0
fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf
(p7) br.cond.spnt ROUND_UNORM // Branch if x unorm
}
@@ -135,27 +135,31 @@ GLOBAL_LIBM_ENTRY(roundf)
ROUND_COMMON:
// Return here from ROUND_UNORM
-{ .mfi
+{ .mfb
nop.m 0
fcmp.lt.s1 p8,p9 = f8, f0 // Test if x < 0
+(p6) br.cond.spnt ROUND_SPECIAL // Exit if x natval, nan, inf
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcvt.xf f8 = fXtruncInt // Pre-Result if 0.5 <= |x| < 2^23
nop.i 0
}
-{ .mfb
+;;
+
+{ .mfi
and rExp = rSignexp, rExpMask // Get biased exponent
-(p6) fma.s.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf
-(p6) br.ret.spnt b0 // Exit if x natval, nan, inf
+ fmerge.s fInc = fNormX, f1 // Form increment if |rem| >= 0.5
+ nop.i 0
}
;;
-{ .mfi
+{ .mmi
cmp.lt p6,p0 = rExp, rExpHalf // Is |x| < 0.5?
- fcvt.xf f8 = fXtruncInt // Pre-Result if 0.5 <= |x| < 2^23
cmp.ge p7,p0 = rExp, rBigexp // Is |x| >= 2^23?
-}
-{ .mfi
cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5?
- nop.f 0
- nop.i 0
}
;;
@@ -176,44 +180,52 @@ ROUND_COMMON:
// Here if 0.5 <= |x| < 2^23
{ .mfi
nop.m 0
- fms.s1 fRem = fNormX, f1, f8 // Get remainder = x - trunc(x)
+(p9) fms.s1 fRem = fNormX, f1, f8 // Get remainder = x - trunc(x)
nop.i 0
}
-;;
-
{ .mfi
nop.m 0
-(p8) fcmp.le.s1 p8,p0 = fRem, fMHalf
+(p8) fms.s1 fRem = f8, f1, fNormX // Get remainder = trunc(x) - x
nop.i 0
}
+;;
+
{ .mfi
nop.m 0
-(p9) fcmp.ge.s1 p9,p0 = fRem, fHalf
+ fcmp.ge.s1 p9,p0 = fRem, fHalf // Test |rem| >= 0.5
nop.i 0
}
;;
// If x < 0 and remainder <= -0.5, then subtract 1 from result
// If x > 0 and remainder >= +0.5, then add 1 to result
-.pred.rel "mutex",p8,p9
-{ .mfi
+{ .mfb
nop.m 0
-(p8) fms.s.s0 f8 = f8, f1, f1
- nop.i 0
+(p9) fma.s.s0 f8 = f8, f1, fInc
+ br.ret.sptk b0
}
+;;
+
+
+ROUND_SPECIAL:
+// Here if x natval, nan, inf
{ .mfb
nop.m 0
-(p9) fma.s.s0 f8 = f8, f1, f1
+ fma.s.s0 f8 = f8, f1, f0
br.ret.sptk b0
}
;;
-
ROUND_UNORM:
// Here if x unorm
-{ .mfb
+{ .mfi
getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ fcvt.fx.trunc.s1 fXtruncInt = fNormX // Convert to int in significand
br.cond.sptk ROUND_COMMON // Return to main path
}
;;
diff --git a/sysdeps/ia64/fpu/s_roundl.S b/sysdeps/ia64/fpu/s_roundl.S
index 79dff00..da6cbfe 100644
--- a/sysdeps/ia64/fpu/s_roundl.S
+++ b/sysdeps/ia64/fpu/s_roundl.S
@@ -44,6 +44,7 @@
// 05/20/02 Cleaned up namespace and sf0 syntax
// 01/20/03 Improved performance and reduced code size
// 04/18/03 Eliminate possible WAW dependency warning
+// 09/03/03 Improved performance
//==============================================================
// API
@@ -52,14 +53,13 @@
//==============================================================
// general input registers:
-// r14 - r19
+// r14 - r18
rSignexp = r14
rExp = r15
rExpMask = r16
rBigexp = r17
rExpHalf = r18
-rExpMHalf = r19
// floating-point registers:
// f8 - f13
@@ -67,7 +67,7 @@ rExpMHalf = r19
fXtruncInt = f9
fNormX = f10
fHalf = f11
-fMHalf = f12
+fInc = f12
fRem = f13
// predicate registers used:
@@ -119,15 +119,15 @@ GLOBAL_LIBM_ENTRY(roundl)
}
;;
-{ .mmf
+{ .mfi
setf.exp fHalf = rExpHalf // Form 0.5
- mov rExpMHalf = 0x2FFFE // Form sign and exponent of -0.5
fclass.m p7,p0 = f8, 0x0b // Test x unorm
+ nop.i 0
}
;;
{ .mfb
- setf.exp fMHalf = rExpMHalf // Form -0.5
+ nop.m 0
fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf
(p7) br.cond.spnt ROUND_UNORM // Branch if x unorm
}
@@ -135,27 +135,31 @@ GLOBAL_LIBM_ENTRY(roundl)
ROUND_COMMON:
// Return here from ROUND_UNORM
-{ .mfi
+{ .mfb
nop.m 0
fcmp.lt.s1 p8,p9 = f8, f0 // Test if x < 0
+(p6) br.cond.spnt ROUND_SPECIAL // Exit if x natval, nan, inf
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcvt.xf f8 = fXtruncInt // Pre-Result if 0.5 <= |x| < 2^63
nop.i 0
}
-{ .mfb
+;;
+
+{ .mfi
and rExp = rSignexp, rExpMask // Get biased exponent
-(p6) fma.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf
-(p6) br.ret.spnt b0 // Exit if x natval, nan, inf
+ fmerge.s fInc = fNormX, f1 // Form increment if |rem| >= 0.5
+ nop.i 0
}
;;
-{ .mfi
+{ .mmi
cmp.lt p6,p0 = rExp, rExpHalf // Is |x| < 0.5?
- fcvt.xf f8 = fXtruncInt // Pre-Result if 0.5 <= |x| < 2^63
cmp.ge p7,p0 = rExp, rBigexp // Is |x| >= 2^63?
-}
-{ .mfi
cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5?
- nop.f 0
- nop.i 0
}
;;
@@ -176,44 +180,52 @@ ROUND_COMMON:
// Here if 0.5 <= |x| < 2^63
{ .mfi
nop.m 0
- fms.s1 fRem = fNormX, f1, f8 // Get remainder = x - trunc(x)
+(p9) fms.s1 fRem = fNormX, f1, f8 // Get remainder = x - trunc(x)
nop.i 0
}
-;;
-
{ .mfi
nop.m 0
-(p8) fcmp.le.s1 p8,p0 = fRem, fMHalf
+(p8) fms.s1 fRem = f8, f1, fNormX // Get remainder = trunc(x) - x
nop.i 0
}
+;;
+
{ .mfi
nop.m 0
-(p9) fcmp.ge.s1 p9,p0 = fRem, fHalf
+ fcmp.ge.s1 p9,p0 = fRem, fHalf // Test |rem| >= 0.5
nop.i 0
}
;;
// If x < 0 and remainder <= -0.5, then subtract 1 from result
// If x > 0 and remainder >= +0.5, then add 1 to result
-.pred.rel "mutex",p8,p9
-{ .mfi
+{ .mfb
nop.m 0
-(p8) fms.s0 f8 = f8, f1, f1
- nop.i 0
+(p9) fma.s0 f8 = f8, f1, fInc
+ br.ret.sptk b0
}
+;;
+
+
+ROUND_SPECIAL:
+// Here if x natval, nan, inf
{ .mfb
nop.m 0
-(p9) fma.s0 f8 = f8, f1, f1
+ fma.s0 f8 = f8, f1, f0
br.ret.sptk b0
}
;;
-
ROUND_UNORM:
// Here if x unorm
-{ .mfb
+{ .mfi
getf.exp rSignexp = fNormX // Get signexp, recompute if unorm
fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ fcvt.fx.trunc.s1 fXtruncInt = fNormX // Convert to int in significand
br.cond.sptk ROUND_COMMON // Return to main path
}
;;
diff --git a/sysdeps/ia64/fpu/s_scalblnf.c b/sysdeps/ia64/fpu/s_scalblnf.c
index 97de090..2fa51ba 100644
--- a/sysdeps/ia64/fpu/s_scalblnf.c
+++ b/sysdeps/ia64/fpu/s_scalblnf.c
@@ -4,8 +4,7 @@
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
diff --git a/sysdeps/ia64/fpu/s_scalbn.c b/sysdeps/ia64/fpu/s_scalbn.c
index b0bd44a..1f57141 100644
--- a/sysdeps/ia64/fpu/s_scalbn.c
+++ b/sysdeps/ia64/fpu/s_scalbn.c
@@ -4,8 +4,7 @@
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
diff --git a/sysdeps/ia64/fpu/s_scalbnf.c b/sysdeps/ia64/fpu/s_scalbnf.c
index 176c2ed..97c06da 100644
--- a/sysdeps/ia64/fpu/s_scalbnf.c
+++ b/sysdeps/ia64/fpu/s_scalbnf.c
@@ -4,8 +4,7 @@
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
diff --git a/sysdeps/ia64/fpu/s_scalbnl.c b/sysdeps/ia64/fpu/s_scalbnl.c
index d19ddd3..d7a81df 100644
--- a/sysdeps/ia64/fpu/s_scalbnl.c
+++ b/sysdeps/ia64/fpu/s_scalbnl.c
@@ -4,8 +4,7 @@
// Copyright (c) 2000, 2001, Intel Corporation
// All rights reserved.
//
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
diff --git a/sysdeps/ia64/fpu/s_tan.S b/sysdeps/ia64/fpu/s_tan.S
index 3000f5e..a2f80c8 100644
--- a/sysdeps/ia64/fpu/s_tan.S
+++ b/sysdeps/ia64/fpu/s_tan.S
@@ -282,6 +282,7 @@ LOCAL_LIBM_ENTRY(cot)
LOCAL_LIBM_END(cot)
+
GLOBAL_IEEE754_ENTRY(tan)
// The initial fnorm will take any unmasked faults and
// normalize any single/double unorms
@@ -737,6 +738,7 @@ COMMON_PATH:
}
GLOBAL_IEEE754_END(tan)
+
LOCAL_LIBM_ENTRY(__libm_callout)
TAN_DBX:
.prologue
diff --git a/sysdeps/ia64/fpu/s_tanf.S b/sysdeps/ia64/fpu/s_tanf.S
index 48f8234..98e3f76 100644
--- a/sysdeps/ia64/fpu/s_tanf.S
+++ b/sysdeps/ia64/fpu/s_tanf.S
@@ -247,6 +247,7 @@ LOCAL_LIBM_ENTRY(cotf)
LOCAL_LIBM_END(cotf)
+
GLOBAL_IEEE754_ENTRY(tanf)
{ .mlx
@@ -549,6 +550,7 @@ Return_From_Huges:
GLOBAL_IEEE754_END(tanf)
+
LOCAL_LIBM_ENTRY(__libm_callout)
Huge_Argument:
.prologue
diff --git a/sysdeps/ia64/fpu/s_tanh.S b/sysdeps/ia64/fpu/s_tanh.S
index c858398..5e0c407 100644
--- a/sysdeps/ia64/fpu/s_tanh.S
+++ b/sysdeps/ia64/fpu/s_tanh.S
@@ -985,3 +985,4 @@ _tanh_spec:
GLOBAL_LIBM_END(tanh)
+
diff --git a/sysdeps/ia64/fpu/s_tanhl.S b/sysdeps/ia64/fpu/s_tanhl.S
index ab00994..3435f43 100644
--- a/sysdeps/ia64/fpu/s_tanhl.S
+++ b/sysdeps/ia64/fpu/s_tanhl.S
@@ -1345,3 +1345,4 @@ GLOBAL_LIBM_END(tanhl)
+
diff --git a/sysdeps/ia64/fpu/s_tanl.S b/sysdeps/ia64/fpu/s_tanl.S
index 345a059..607a271 100644
--- a/sysdeps/ia64/fpu/s_tanl.S
+++ b/sysdeps/ia64/fpu/s_tanl.S
@@ -1,7 +1,7 @@
.file "tancotl.s"
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000 - 2004, Intel Corporation
// All rights reserved.
//
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
@@ -50,6 +50,7 @@
// 02/10/03 Reordered header: .section, .global, .proc, .align;
// used data8 for long double table values
// 05/15/03 Reformatted data tables
+// 10/26/04 Avoided using r14-31 as scratch so not clobbered by dynamic loader
//
//*********************************************************************
//
@@ -65,7 +66,7 @@
// f32-f121
//
// General Purpose Registers:
-// r14-r26,r32-r57
+// r32-r70
//
// Predicate Registers: p6-p15
//
@@ -1171,20 +1172,6 @@ TWO_TO_NEG65 = f119
fp_tmp = f120
mOne = f121
-GR_sig_inv_pi = r14
-GR_rshf_2to64 = r15
-GR_exp_2tom64 = r16
-GR_rshf = r17
-GR_exp_2_to_63 = r18
-GR_exp_2_to_24 = r19
-GR_signexp_x = r20
-GR_exp_x = r21
-GR_exp_mask = r22
-GR_exp_2tom14 = r23
-GR_exp_m2tom14 = r24
-GR_exp_2tom33 = r25
-GR_exp_m2tom33 = r26
-
GR_SAVE_B0 = r33
GR_SAVE_GP = r34
GR_SAVE_PFS = r35
@@ -1204,13 +1191,28 @@ bmask2 = r48
gr_tmp = r49
cot_flag = r50
-GR_SAVE_B0 = r51
-GR_SAVE_PFS = r52
-GR_SAVE_GP = r53
-GR_Parameter_X = r54
-GR_Parameter_Y = r55
-GR_Parameter_RESULT = r56
-GR_Parameter_Tag = r57
+GR_sig_inv_pi = r51
+GR_rshf_2to64 = r52
+GR_exp_2tom64 = r53
+GR_rshf = r54
+GR_exp_2_to_63 = r55
+GR_exp_2_to_24 = r56
+GR_signexp_x = r57
+GR_exp_x = r58
+GR_exp_mask = r59
+GR_exp_2tom14 = r60
+GR_exp_m2tom14 = r61
+GR_exp_2tom33 = r62
+GR_exp_m2tom33 = r63
+
+GR_SAVE_B0 = r64
+GR_SAVE_PFS = r65
+GR_SAVE_GP = r66
+
+GR_Parameter_X = r67
+GR_Parameter_Y = r68
+GR_Parameter_RESULT = r69
+GR_Parameter_Tag = r70
.section .text
@@ -1223,7 +1225,7 @@ __libm_cotl:
LOCAL_LIBM_ENTRY(cotl)
{ .mlx
- alloc r32 = ar.pfs, 0,22,4,0
+ alloc r32 = ar.pfs, 0,35,4,0
movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
}
{ .mlx
@@ -1246,13 +1248,14 @@ LOCAL_LIBM_ENTRY(cotl)
LOCAL_LIBM_END(cotl)
+
.proc __libm_tanl#
__libm_tanl:
.endp __libm_tanl#
GLOBAL_IEEE754_ENTRY(tanl)
{ .mlx
- alloc r32 = ar.pfs, 0,22,4,0
+ alloc r32 = ar.pfs, 0,35,4,0
movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
}
{ .mlx
@@ -3089,6 +3092,7 @@ TANL_UNSUPPORTED:
GLOBAL_IEEE754_END(tanl)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
diff --git a/sysdeps/ia64/fpu/w_lgamma.c b/sysdeps/ia64/fpu/w_lgamma.c
index fb799df..f16256e 100644
--- a/sysdeps/ia64/fpu/w_lgamma.c
+++ b/sysdeps/ia64/fpu/w_lgamma.c
@@ -1,5 +1,6 @@
/* file: lgamma.c */
+
// Copyright (c) 2002 Intel Corporation
// All rights reserved.
//
@@ -20,7 +21,6 @@
// products derived from this software without specific prior written
// permission.
-// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
diff --git a/sysdeps/ia64/fpu/w_lgammaf.c b/sysdeps/ia64/fpu/w_lgammaf.c
index bda3741..5ac3b82 100644
--- a/sysdeps/ia64/fpu/w_lgammaf.c
+++ b/sysdeps/ia64/fpu/w_lgammaf.c
@@ -1,5 +1,6 @@
/* file: lgammaf.c */
+
// Copyright (c) 2002 Intel Corporation
// All rights reserved.
//
@@ -20,7 +21,6 @@
// products derived from this software without specific prior written
// permission.
-// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
diff --git a/sysdeps/ia64/fpu/w_lgammal.c b/sysdeps/ia64/fpu/w_lgammal.c
index 9f9f356..8ddbb74 100644
--- a/sysdeps/ia64/fpu/w_lgammal.c
+++ b/sysdeps/ia64/fpu/w_lgammal.c
@@ -1,5 +1,6 @@
/* file: lgammal.c */
+
// Copyright (c) 2002 Intel Corporation
// All rights reserved.
//
@@ -20,7 +21,6 @@
// products derived from this software without specific prior written
// permission.
-// WARRANTY DISCLAIMER
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
diff --git a/sysdeps/ia64/fpu/w_tgamma.S b/sysdeps/ia64/fpu/w_tgamma.S
index 7d654d0..e55e4e3 100644
--- a/sysdeps/ia64/fpu/w_tgamma.S
+++ b/sysdeps/ia64/fpu/w_tgamma.S
@@ -1781,6 +1781,7 @@ tgamma_libm_err:
};;
GLOBAL_LIBM_END(tgamma)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
diff --git a/sysdeps/ia64/fpu/w_tgammaf.S b/sysdeps/ia64/fpu/w_tgammaf.S
index 4363ca2..64421ab 100644
--- a/sysdeps/ia64/fpu/w_tgammaf.S
+++ b/sysdeps/ia64/fpu/w_tgammaf.S
@@ -45,6 +45,7 @@
// 02/10/03 Reordered header: .section, .global, .proc, .align
// 04/04/03 Changed error codes for overflow and negative integers
// 04/10/03 Changed code for overflow near zero handling
+// 12/16/03 Fixed parameter passing to/from error handling routine
//
//*********************************************************************
//
@@ -1274,6 +1275,7 @@ tgammaf_libm_err:
};;
GLOBAL_LIBM_END(tgammaf)
+
LOCAL_LIBM_ENTRY(__libm_error_region)
.prologue
{ .mfi
@@ -1289,19 +1291,19 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
mov GR_SAVE_GP=gp // Save gp
};;
{ .mmi
- stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
+ stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
add GR_Parameter_X = 16,sp // Parameter 1 address
.save b0, GR_SAVE_B0
mov GR_SAVE_B0=b0 // Save b0
};;
.body
{ .mib
- stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
nop.b 0
}
{ .mib
- stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
+ stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
add GR_Parameter_Y = -16,GR_Parameter_Y
br.call.sptk b0=__libm_error_support# // Call error handling function
};;
@@ -1311,7 +1313,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
add GR_Parameter_RESULT = 48,sp
};;
{ .mmi
- ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
.restore sp
add sp = 64,sp // Restore stack pointer
mov b0 = GR_SAVE_B0 // Restore return address
diff --git a/sysdeps/ia64/fpu/w_tgammal.S b/sysdeps/ia64/fpu/w_tgammal.S
index 75b1069..d801ba0 100644
--- a/sysdeps/ia64/fpu/w_tgammal.S
+++ b/sysdeps/ia64/fpu/w_tgammal.S
@@ -4428,6 +4428,7 @@ GLOBAL_LIBM_END(tgammal)
+
////////////////// Tgammal error handler ///////////////////////////////////////
//------------------------------------------------------------------------------
LOCAL_LIBM_ENTRY(__libm_error_region)
diff --git a/sysdeps/unix/sysv/linux/x86_64/clone.S b/sysdeps/unix/sysv/linux/x86_64/clone.S
index 9695e1e..8a12b09 100644
--- a/sysdeps/unix/sysv/linux/x86_64/clone.S
+++ b/sysdeps/unix/sysv/linux/x86_64/clone.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+/* Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -74,7 +74,7 @@ ENTRY (BP_SYM (__clone))
movq %r8, %rdx
movq %r9, %r8
movq 8(%rsp), %r10
- movq $SYS_ify(clone),%rax
+ movl $SYS_ify(clone),%eax
/* End FDE now, because in the child the unwind info will be
wrong. */
@@ -91,7 +91,7 @@ L(pseudo_end):
L(thread_start):
/* Clear the frame pointer. The ABI suggests this be done, to mark
the outermost frame obviously. */
- xorq %rbp, %rbp
+ xorl %ebp, %ebp
#ifdef RESET_PID
testq $CLONE_THREAD, %rdi
@@ -99,7 +99,7 @@ L(thread_start):
testq $CLONE_VM, %rdi
movl $-1, %eax
jne 2f
- movq $SYS_ify(getpid), %rax
+ movl $SYS_ify(getpid), %eax
syscall
2: movl %eax, %fs:PID
movl %eax, %fs:TID
diff --git a/sysdeps/unix/sysv/linux/x86_64/getcontext.S b/sysdeps/unix/sysv/linux/x86_64/getcontext.S
index 2f2c710..d19c9f0 100644
--- a/sysdeps/unix/sysv/linux/x86_64/getcontext.S
+++ b/sysdeps/unix/sysv/linux/x86_64/getcontext.S
@@ -1,5 +1,5 @@
/* Save current context.
- Copyright (C) 2002 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
@@ -67,16 +67,20 @@ ENTRY(__getcontext)
/* Save the current signal mask with
rt_sigprocmask (SIG_BLOCK, NULL, set,_NSIG/8). */
leaq oSIGMASK(%rdi), %rdx
- xorq %rsi,%rsi
- movq $SIG_BLOCK, %rdi
- movq $_NSIG8,%r10
- movq $__NR_rt_sigprocmask, %rax
+ xorl %esi,%esi
+#if SIG_BLOCK == 0
+ xorl %edi, %edi
+#else
+ movl $SIG_BLOCK, %edi
+#endif
+ movl $_NSIG8,%r10d
+ movl $__NR_rt_sigprocmask, %eax
syscall
cmpq $-4095, %rax /* Check %rax for error. */
jae SYSCALL_ERROR_LABEL /* Jump to error handler if error. */
/* All done, return 0 for success. */
- xorq %rax, %rax
+ xorl %eax, %eax
L(pseudo_end):
ret
PSEUDO_END(__getcontext)
diff --git a/sysdeps/unix/sysv/linux/x86_64/setcontext.S b/sysdeps/unix/sysv/linux/x86_64/setcontext.S
index 65c03d8..51e4a50 100644
--- a/sysdeps/unix/sysv/linux/x86_64/setcontext.S
+++ b/sysdeps/unix/sysv/linux/x86_64/setcontext.S
@@ -1,5 +1,5 @@
/* Install given context.
- Copyright (C) 2002, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
@@ -40,10 +40,10 @@ ENTRY(__setcontext)
/* Set the signal mask with
rt_sigprocmask (SIG_SETMASK, mask, NULL, _NSIG/8). */
leaq oSIGMASK(%rdi), %rsi
- xorq %rdx, %rdx
- movq $SIG_SETMASK, %rdi
- movq $_NSIG8,%r10
- movq $__NR_rt_sigprocmask, %rax
+ xorl %edx, %edx
+ movl $SIG_SETMASK, %edi
+ movl $_NSIG8,%r10d
+ movl $__NR_rt_sigprocmask, %eax
syscall
popq %rdi /* Reload %rdi, adjust stack. */
cfi_adjust_cfa_offset(-8)
@@ -96,7 +96,7 @@ ENTRY(__setcontext)
cfi_startproc
/* Clear rax to indicate success. */
- xorq %rax, %rax
+ xorl %eax, %eax
L(pseudo_end):
ret
diff --git a/sysdeps/unix/sysv/linux/x86_64/swapcontext.S b/sysdeps/unix/sysv/linux/x86_64/swapcontext.S
index 4394e2a..8d48d3b 100644
--- a/sysdeps/unix/sysv/linux/x86_64/swapcontext.S
+++ b/sysdeps/unix/sysv/linux/x86_64/swapcontext.S
@@ -1,5 +1,5 @@
/* Save current context and install the given one.
- Copyright (C) 2002 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2002.
@@ -73,9 +73,9 @@ ENTRY(__swapcontext)
rt_sigprocmask (SIG_BLOCK, newset, oldset,_NSIG/8). */
leaq oSIGMASK(%rdi), %rdx
leaq oSIGMASK(%rsi), %rsi
- movq $SIG_SETMASK, %rdi
- movq $_NSIG8,%r10
- movq $__NR_rt_sigprocmask, %rax
+ movl $SIG_SETMASK, %edi
+ movl $_NSIG8,%r10d
+ movl $__NR_rt_sigprocmask, %eax
syscall
cmpq $-4095, %rax /* Check %rax for error. */
jae SYSCALL_ERROR_LABEL /* Jump to error handler if error. */
@@ -114,7 +114,7 @@ ENTRY(__swapcontext)
movq oRSI(%rsi), %rsi
/* Clear rax to indicate success. */
- xorq %rax, %rax
+ xorl %eax, %eax
L(pseudo_end):
ret
diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
index fd92d7a..0dc2f27 100644
--- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
+++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2001,02,03,04 Free Software Foundation, Inc.
+/* Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -119,7 +119,7 @@
# define SYSCALL_ERROR_HANDLER \
0: \
leaq rtld_errno(%rip), %rcx; \
- xorq %rdx, %rdx; \
+ xorl %edx, %edx; \
subq %rax, %rdx; \
movl %edx, (%rcx); \
orq $-1, %rax; \
@@ -133,7 +133,7 @@
# define SYSCALL_ERROR_HANDLER \
0: \
movq SYSCALL_ERROR_ERRNO@GOTTPOFF(%rip), %rcx;\
- xorq %rdx, %rdx; \
+ xorl %edx, %edx; \
subq %rax, %rdx; \
movl %edx, %fs:(%rcx); \
orq $-1, %rax; \
@@ -143,7 +143,7 @@
Note that errno occupies only 4 bytes. */
# define SYSCALL_ERROR_HANDLER \
0: \
- xorq %rdx, %rdx; \
+ xorl %edx, %edx; \
subq %rax, %rdx; \
pushq %rdx; \
cfi_adjust_cfa_offset(8); \
@@ -161,7 +161,7 @@
#else /* Not _LIBC_REENTRANT. */
# define SYSCALL_ERROR_HANDLER \
0:movq errno@GOTPCREL(%RIP), %rcx; \
- xorq %rdx, %rdx; \
+ xorl %edx, %edx; \
subq %rax, %rdx; \
movl %edx, (%rcx); \
orq $-1, %rax; \
@@ -208,7 +208,7 @@
#undef DO_CALL
#define DO_CALL(syscall_name, args) \
DOARGS_##args \
- movq $SYS_ify (syscall_name), %rax; \
+ movl $SYS_ify (syscall_name), %eax; \
syscall;
#define DOARGS_0 /* nothing */
diff --git a/sysdeps/unix/x86_64/sysdep.S b/sysdeps/unix/x86_64/sysdep.S
index 0d0d715..aca81fe 100644
--- a/sysdeps/unix/x86_64/sysdep.S
+++ b/sysdeps/unix/x86_64/sysdep.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2001, 2002, 2004 Free Software Foundation, Inc.
+/* Copyright (C) 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -42,7 +42,7 @@ syscall_error:
EWOULDBLOCK_sys is the original number. */
cmpq $EWOULDBLOCK_sys, %rax /* Is it the old EWOULDBLOCK? */
jne notb /* Branch if not. */
- movq $EAGAIN, %rax /* Yes; translate it to EAGAIN. */
+ movl $EAGAIN, %eax /* Yes; translate it to EAGAIN. */
notb:
#endif
#if USE___THREAD
diff --git a/sysdeps/x86_64/bsd-_setjmp.S b/sysdeps/x86_64/bsd-_setjmp.S
index 22b6eb4..838fae7 100644
--- a/sysdeps/x86_64/bsd-_setjmp.S
+++ b/sysdeps/x86_64/bsd-_setjmp.S
@@ -1,5 +1,6 @@
/* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'. x86-64 version.
- Copyright (C) 1994-1997,2000,2001,2002,2003 Free Software Foundation, Inc.
+ Copyright (C) 1994-1997, 2000, 2001, 2002, 2003, 2005
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -30,7 +31,7 @@
ENTRY (BP_SYM (_setjmp))
/* Set up arguments, we only need to set the second arg. */
- xorq %rsi, %rsi
+ xorl %esi, %esi
#ifdef PIC
jmp HIDDEN_JUMPTARGET (__sigsetjmp)
#else
diff --git a/sysdeps/x86_64/bsd-setjmp.S b/sysdeps/x86_64/bsd-setjmp.S
index c168f81..fdf4833 100644
--- a/sysdeps/x86_64/bsd-setjmp.S
+++ b/sysdeps/x86_64/bsd-setjmp.S
@@ -1,5 +1,6 @@
/* BSD `setjmp' entry point to `sigsetjmp (..., 1)'. x86-64 version.
- Copyright (C) 1994,1995,1996,1997,2000,2001 Free Software Foundation, Inc.
+ Copyright (C) 1994, 1995, 1996, 1997, 2000, 2001, 2005
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -30,7 +31,7 @@
ENTRY (BP_SYM (setjmp))
/* Set up arguments, we only need to set the 2nd arg. */
- movq $1, %rsi
+ movl $1, %esi
#ifdef PIC
#else
jmp BP_SYM (__sigsetjmp)
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index bb0c77f..335b38a 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -170,7 +170,7 @@ _dl_start_user:\n\
# argv -> rdx\n\
leaq 8(%r13), %rdx\n\
# Clear %rbp to mark outermost frame obviously even for constructors.\n\
- xorq %rbp, %rbp\n\
+ xorl %ebp, %ebp\n\
# Call the function to run the initializers.\n\
call _dl_init_internal@PLT\n\
# Pass our finalizer function to the user in %rdx, as per ELF ABI.\n\
diff --git a/sysdeps/x86_64/elf/start.S b/sysdeps/x86_64/elf/start.S
index 0ef2523..3c2caf9d 100644
--- a/sysdeps/x86_64/elf/start.S
+++ b/sysdeps/x86_64/elf/start.S
@@ -1,5 +1,5 @@
/* Startup code compliant to the ELF x86-64 ABI.
- Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
+ Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Andreas Jaeger <aj@suse.de>, 2001.
@@ -62,7 +62,7 @@
_start:
/* Clear the frame pointer. The ABI suggests this be done, to mark
the outermost frame obviously. */
- xorq %rbp, %rbp
+ xorl %ebp, %ebp
/* Extract the arguments as encoded on the stack and set up
the arguments for __libc_start_main (int (*main) (int, char **, char **),
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index 6b718b7..1c421c7 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -41,7 +41,7 @@ END (__memset_chk)
ENTRY (memset)
#if BZERO_P
mov %rsi,%rdx /* Adjust parameter. */
- xorq %rsi,%rsi /* Fill with 0s. */
+ xorl %esi,%esi /* Fill with 0s. */
#endif
cmp $0x7,%rdx /* Check for small length. */
mov %rdi,%rcx /* Save ptr as return value. */
diff --git a/sysdeps/x86_64/strchr.S b/sysdeps/x86_64/strchr.S
index a657796..8934697 100644
--- a/sysdeps/x86_64/strchr.S
+++ b/sysdeps/x86_64/strchr.S
@@ -1,6 +1,6 @@
/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR.
For AMD x86-64.
- Copyright (C) 2002 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -221,7 +221,7 @@ ENTRY (BP_SYM (strchr))
7: /* Return NULL. */
- xorq %rax, %rax
+ xorl %eax, %eax
retq
diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S
index ed6710b..119b88e 100644
--- a/sysdeps/x86_64/strcmp.S
+++ b/sysdeps/x86_64/strcmp.S
@@ -1,5 +1,5 @@
/* Highly optimized version for x86-64.
- Copyright (C) 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
+ Copyright (C) 1999, 2000, 2002, 2003, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Based on i686 version contributed by Ulrich Drepper
<drepper@cygnus.com>, 1999.
@@ -34,7 +34,7 @@ L(oop): movb (%rdi), %al
testb %al, %al
jnz L(oop)
- xorq %rax, %rax
+ xorl %eax, %eax
ret
L(neq): movl $1, %eax
diff --git a/sysdeps/x86_64/strcspn.S b/sysdeps/x86_64/strcspn.S
index 63af04a..4672013 100644
--- a/sysdeps/x86_64/strcspn.S
+++ b/sysdeps/x86_64/strcspn.S
@@ -1,7 +1,8 @@
/* strcspn (str, ss) -- Return the length of the initial segment of STR
which contains no characters from SS.
For AMD x86-64.
- Copyright (C) 1994-1997,2000,2002,2003,2004 Free Software Foundation, Inc.
+ Copyright (C) 1994-1997, 2000, 2002, 2003, 2004, 2005
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>.
Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>.
@@ -41,9 +42,9 @@ ENTRY (strcspn)
movq %rdi, %r8 /* Save value. */
subq $256, %rsp /* Make space for 256 bytes. */
cfi_adjust_cfa_offset(256)
- movq $32, %rcx /* 32*8 bytes = 256 bytes. */
+ movl $32, %ecx /* 32*8 bytes = 256 bytes. */
movq %rsp, %rdi
- xorq %rax, %rax /* We store 0s. */
+ xorl %eax, %eax /* We store 0s. */
cld
rep
stosq
@@ -113,7 +114,7 @@ L(5): incq %rax
L(4): addq $256, %rsp /* remove skipset */
cfi_adjust_cfa_offset(-256)
#if STRPBRK_P
- xorq %rdx,%rdx
+ xorl %edx,%edx
orb %cl, %cl /* was last character NUL? */
cmovzq %rdx, %rax /* Yes: return NULL */
#else
diff --git a/sysdeps/x86_64/strspn.S b/sysdeps/x86_64/strspn.S
index fa4abd1..54aac18 100644
--- a/sysdeps/x86_64/strspn.S
+++ b/sysdeps/x86_64/strspn.S
@@ -1,7 +1,8 @@
/* strspn (str, ss) -- Return the length of the initial segment of STR
which contains only characters from SS.
For AMD x86-64.
- Copyright (C) 1994-1997,2000,2002,2003,2004 Free Software Foundation, Inc.
+ Copyright (C) 1994-1997, 2000, 2002, 2003, 2004, 2005
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>.
Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>.
@@ -37,9 +38,9 @@ ENTRY (strspn)
movq %rdi, %r8 /* Save value. */
subq $256, %rsp /* Make space for 256 bytes. */
cfi_adjust_cfa_offset(256)
- movq $32, %rcx /* 32*8 bytes = 256 bytes. */
+ movl $32, %ecx /* 32*8 bytes = 256 bytes. */
movq %rsp, %rdi
- xorq %rax, %rax /* We store 0s. */
+ xorl %eax, %eax /* We store 0s. */
cld
rep
stosq
diff --git a/sysdeps/x86_64/strtok.S b/sysdeps/x86_64/strtok.S
index 5d0cbef..de427dc 100644
--- a/sysdeps/x86_64/strtok.S
+++ b/sysdeps/x86_64/strtok.S
@@ -1,6 +1,6 @@
/* strtok (str, delim) -- Return next DELIM separated token from STR.
For AMD x86-64.
- Copyright (C) 1998,2000,2001,2002,2003 Free Software Foundation, Inc.
+ Copyright (C) 1998,2000,2001,2002,2003,2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Based on i686 version contributed by Ulrich Drepper
<drepper@cygnus.com>, 1998.
@@ -69,9 +69,9 @@ ENTRY (BP_SYM (FUNCTION))
movq %rdi, %r8 /* Save value. */
subq $256, %rsp /* Make space for 256 bytes. */
cfi_adjust_cfa_offset(256)
- movq $32, %rcx /* 32*8 bytes = 256 bytes. */
+ movl $32, %ecx /* 32*8 bytes = 256 bytes. */
movq %rsp, %rdi
- xorq %rax, %rax /* We store 0s. */
+ xorl %eax, %eax /* We store 0s. */
cld
rep
stosq
@@ -204,7 +204,7 @@ L(epilogue):
retq
L(returnNULL):
- xorq %rax, %rax
+ xorl %eax, %eax
jmp L(epilogue)
END (BP_SYM (FUNCTION))