diff options
author | Ulrich Drepper <drepper@redhat.com> | 2005-03-31 10:02:53 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2005-03-31 10:02:53 +0000 |
commit | ee6189855aab3a9be8f3c2d95ce2b2cd17db4ec2 (patch) | |
tree | cf3e2fe1f9be5b358033fd927a0bd922542e04a1 /sysdeps/ia64 | |
parent | 4d6302cf51b16a129addf7687c91490c40a7225c (diff) | |
download | glibc-ee6189855aab3a9be8f3c2d95ce2b2cd17db4ec2.zip glibc-ee6189855aab3a9be8f3c2d95ce2b2cd17db4ec2.tar.gz glibc-ee6189855aab3a9be8f3c2d95ce2b2cd17db4ec2.tar.bz2 |
* sysdeps/unix/sysv/linux/x86_64/getcontext.S: Use functionally
equivalent, but shorter instructions.
* sysdeps/unix/sysv/linux/x86_64/sysdep.h: Likewise.
* sysdeps/unix/sysv/linux/x86_64/setcontext.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/clone.S: Likewise.
* sysdeps/unix/sysv/linux/x86_64/swapcontext.S: Likewise.
* sysdeps/unix/x86_64/sysdep.S: Likewise.
* sysdeps/x86_64/strchr.S: Likewise.
* sysdeps/x86_64/memset.S: Likewise.
* sysdeps/x86_64/strcspn.S: Likewise.
* sysdeps/x86_64/strcmp.S: Likewise.
* sysdeps/x86_64/elf/start.S: Likewise.
* sysdeps/x86_64/strspn.S: Likewise.
* sysdeps/x86_64/dl-machine.h: Likewise.
* sysdeps/x86_64/bsd-_setjmp.S: Likewise.
* sysdeps/x86_64/bsd-setjmp.S: Likewise.
* sysdeps/x86_64/strtok.S: Likewise.
Diffstat (limited to 'sysdeps/ia64')
138 files changed, 7642 insertions, 6383 deletions
diff --git a/sysdeps/ia64/fpu/Makefile b/sysdeps/ia64/fpu/Makefile index 7ec30c4..384fc83 100644 --- a/sysdeps/ia64/fpu/Makefile +++ b/sysdeps/ia64/fpu/Makefile @@ -27,7 +27,8 @@ sysdep_routines += libc_libm_error libm_frexp libm_frexpf libm_frexpl \ $(duplicated-routines) sysdep-CPPFLAGS += -include libm-symbols.h \ - -D__POSIX__ \ + -D__POSIX__ -Dopensource \ -D_LIB_VERSIONIMF=_LIB_VERSION \ - -DSIZE_INT_32 -DSIZE_LONG_INT_64 -DSIZE_LONG_LONG_INT_64 + -DSIZE_INT_32 -DSIZE_LONG_INT_64 -DSIZE_LONG_LONG_INT_64 \ + -DSIZE_LONG_64 -DIA64 endif diff --git a/sysdeps/ia64/fpu/e_acos.S b/sysdeps/ia64/fpu/e_acos.S index b515f01..c2b31ab 100644 --- a/sysdeps/ia64/fpu/e_acos.S +++ b/sysdeps/ia64/fpu/e_acos.S @@ -824,6 +824,7 @@ acos_abs_gt_1: GLOBAL_LIBM_END(acos) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_acosf.S b/sysdeps/ia64/fpu/e_acosf.S index 417f5b7..68b0b2e 100644 --- a/sysdeps/ia64/fpu/e_acosf.S +++ b/sysdeps/ia64/fpu/e_acosf.S @@ -601,6 +601,7 @@ ACOSF_ABS_ONE: GLOBAL_LIBM_END(acosf) + // Stack operations when calling error support. // (1) (2) // sp -> + psp -> + diff --git a/sysdeps/ia64/fpu/e_acosh.S b/sysdeps/ia64/fpu/e_acosh.S index 675d5fe..b55a6ab 100644 --- a/sysdeps/ia64/fpu/e_acosh.S +++ b/sysdeps/ia64/fpu/e_acosh.S @@ -1139,6 +1139,7 @@ ACOSH_LESS_ONE: GLOBAL_LIBM_END(acosh) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_acoshf.S b/sysdeps/ia64/fpu/e_acoshf.S index 4a54c26..58ef5f2 100644 --- a/sysdeps/ia64/fpu/e_acoshf.S +++ b/sysdeps/ia64/fpu/e_acoshf.S @@ -968,6 +968,7 @@ ACOSH_LESS_ONE: GLOBAL_LIBM_END(acoshf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_acoshl.S b/sysdeps/ia64/fpu/e_acoshl.S index 85282d1..5eb2b34 100644 --- a/sysdeps/ia64/fpu/e_acoshl.S +++ b/sysdeps/ia64/fpu/e_acoshl.S @@ -1650,6 +1650,7 @@ acoshl_lt_pone: GLOBAL_LIBM_END(acoshl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_acosl.S b/sysdeps/ia64/fpu/e_acosl.S index daa75b1..4fd345b 100644 --- a/sysdeps/ia64/fpu/e_acosl.S +++ b/sysdeps/ia64/fpu/e_acosl.S @@ -35,7 +35,7 @@ // // Intel Corporation is the author of this code, and requests that all // problem reports or change requests be submitted to it directly at -// http: //www.intel.com/software/products/opensource/libraries/num.htm. +// http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== @@ -2482,6 +2482,7 @@ acosl_SPECIAL_CASES: GLOBAL_LIBM_END(acosl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue // (1) diff --git a/sysdeps/ia64/fpu/e_asin.S b/sysdeps/ia64/fpu/e_asin.S index 398079e..f995c59 100644 --- a/sysdeps/ia64/fpu/e_asin.S +++ b/sysdeps/ia64/fpu/e_asin.S @@ -800,6 +800,7 @@ asin_abs_gt_1: GLOBAL_LIBM_END(asin) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_asinf.S b/sysdeps/ia64/fpu/e_asinf.S index f9a1312..af24165 100644 --- a/sysdeps/ia64/fpu/e_asinf.S +++ b/sysdeps/ia64/fpu/e_asinf.S @@ -583,6 +583,7 @@ ASINF_ABS_ONE: ;; GLOBAL_LIBM_END(asinf) + // Stack operations when calling error support. // (1) (2) // sp -> + psp -> + diff --git a/sysdeps/ia64/fpu/e_asinl.S b/sysdeps/ia64/fpu/e_asinl.S index bf5feba..ad65a73 100644 --- a/sysdeps/ia64/fpu/e_asinl.S +++ b/sysdeps/ia64/fpu/e_asinl.S @@ -35,7 +35,7 @@ // // Intel Corporation is the author of this code, and requests that all // problem reports or change requests be submitted to it directly at -// http: //www.intel.com/software/products/opensource/libraries/num.htm. +// http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== @@ -2459,6 +2459,7 @@ SMALL_S: GLOBAL_LIBM_END(asinl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue // (1) diff --git a/sysdeps/ia64/fpu/e_atan2.S b/sysdeps/ia64/fpu/e_atan2.S index 8be7c6c..7a17fbf 100644 --- a/sysdeps/ia64/fpu/e_atan2.S +++ b/sysdeps/ia64/fpu/e_atan2.S @@ -52,6 +52,7 @@ // 08/20/02 Corrected inexact flag and directed rounding symmetry bugs // 02/06/03 Reordered header: .section, .global, .proc, .align // 04/17/03 Added missing mutex directive +// 12/23/03 atan2(NaN1,NaN2) now QNaN1, for consistency with atan2f, atan2l // // API //============================================================== @@ -142,7 +143,7 @@ // -0 -0 -pi // // Nan anything quiet Y -// anything NaN quiet X +// Not NaN NaN quiet X // atan2(+-0/+-0) sets double error tag to 37 @@ -388,7 +389,7 @@ GLOBAL_IEEE754_ENTRY(atan2) } { .mfb ldfe atan2_P21 = [EXP_AD_P2],16 -(p10) fma.d.s0 f8 = atan2_Y,atan2_X,f0 // If y=nan, result quietized y +(p10) fma.d.s0 f8 = atan2_X,atan2_Y,f0 // If y=nan, result quietized y (p10) br.ret.spnt b0 // Exit if y=nan ;; } @@ -985,6 +986,7 @@ ATAN2_ERROR: } GLOBAL_IEEE754_END(atan2) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue // (1) diff --git a/sysdeps/ia64/fpu/e_atan2f.S b/sysdeps/ia64/fpu/e_atan2f.S index c483a7a..67618f0 100644 --- a/sysdeps/ia64/fpu/e_atan2f.S +++ b/sysdeps/ia64/fpu/e_atan2f.S @@ -827,6 +827,7 @@ ATAN2F_XY_INF_NAN_ZERO: GLOBAL_IEEE754_END(atan2f) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue mov GR_Parameter_TAG = 38 diff --git a/sysdeps/ia64/fpu/e_atanh.S b/sysdeps/ia64/fpu/e_atanh.S index 7ddc3e3..5ae96dc 100644 --- a/sysdeps/ia64/fpu/e_atanh.S +++ b/sysdeps/ia64/fpu/e_atanh.S @@ -1008,6 +1008,7 @@ atanh_ge_one: GLOBAL_LIBM_END(atanh) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_atanhf.S b/sysdeps/ia64/fpu/e_atanhf.S index 3675c5f..1ec1408 100644 --- a/sysdeps/ia64/fpu/e_atanhf.S +++ b/sysdeps/ia64/fpu/e_atanhf.S @@ -782,6 +782,7 @@ atanhf_ge_one: GLOBAL_LIBM_END(atanhf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_atanhl.S b/sysdeps/ia64/fpu/e_atanhl.S index 8266bd5..cee1ba1 100644 --- a/sysdeps/ia64/fpu/e_atanhl.S +++ b/sysdeps/ia64/fpu/e_atanhl.S @@ -1101,6 +1101,7 @@ atanhl_gt_one: };; GLOBAL_LIBM_END(atanhl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_cosh.S b/sysdeps/ia64/fpu/e_cosh.S index 0c6c5b4..38bd80e 100644 --- a/sysdeps/ia64/fpu/e_cosh.S +++ b/sysdeps/ia64/fpu/e_cosh.S @@ -811,6 +811,7 @@ COSH_UNORM: GLOBAL_IEEE754_END(cosh) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_coshf.S b/sysdeps/ia64/fpu/e_coshf.S index 91846e4..6d30064 100644 --- a/sysdeps/ia64/fpu/e_coshf.S +++ b/sysdeps/ia64/fpu/e_coshf.S @@ -652,6 +652,7 @@ COSH_UNORM: GLOBAL_IEEE754_END(coshf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_coshl.S b/sysdeps/ia64/fpu/e_coshl.S index cef8be0..b5872d0 100644 --- a/sysdeps/ia64/fpu/e_coshl.S +++ b/sysdeps/ia64/fpu/e_coshl.S @@ -1033,6 +1033,7 @@ COSH_HUGE: GLOBAL_IEEE754_END(coshl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_exp.S b/sysdeps/ia64/fpu/e_exp.S index 5ae8afe..d22fd18 100644 --- a/sysdeps/ia64/fpu/e_exp.S +++ b/sysdeps/ia64/fpu/e_exp.S @@ -1,7 +1,7 @@ .file "exp.s" -// Copyright (c) 2000 - 2002, Intel Corporation +// Copyright (c) 2000 - 2003, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -52,6 +52,7 @@ // 05/20/02 Cleaned up namespace and sf0 syntax // 09/07/02 Force inexact flag // 11/15/02 Split underflow path into zero/nonzero; eliminated fma in main path +// 05/30/03 Set inexact flag on unmasked overflow/underflow // API //============================================================== @@ -602,7 +603,7 @@ EXP_CERTAIN_OVERFLOW: } { .mfb mov GR_Parameter_TAG = 14 - fma.d.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result + fma.d.s0 FR_RESULT = fTmp, fTmp, fTmp // Set I,O and +INF result br.cond.sptk __libm_error_region } ;; @@ -685,6 +686,13 @@ EXP_CERTAIN_UNDERFLOW: } ;; +{ .mfi + nop.m 0 + fmerge.se fTmp = fTmp, fLn2_by_128_lo // Small with signif lsb 1 + nop.i 0 +} +;; + { .mfb nop.m 0 fma.d.s0 f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result @@ -730,6 +738,7 @@ EXP_UNDERFLOW_ZERO: GLOBAL_IEEE754_END(exp) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_exp10.S b/sysdeps/ia64/fpu/e_exp10.S index 1cc5bef..6bfc218 100644 --- a/sysdeps/ia64/fpu/e_exp10.S +++ b/sysdeps/ia64/fpu/e_exp10.S @@ -1,7 +1,7 @@ .file "exp10.s" -// Copyright (c) 2000 - 2003, Intel Corporation +// Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -35,7 +35,7 @@ // // Intel Corporation is the author of this code, and requests that all // problem reports or change requests be submitted to it directly at -// http: //www.intel.com/software/products/opensource/libraries/num.htm. +// http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== @@ -43,6 +43,7 @@ // 05/20/02 Cleaned up namespace and sf0 syntax // 09/06/02 Improved performance; no inexact flags on exact cases // 01/29/03 Added missing } to bundle templates +// 12/16/04 Call error handling on underflow. // // API //============================================================== @@ -81,8 +82,8 @@ // Registers used //============================================================== // r2-r3, r14-r40 -// f6-f15, f32-f51 -// p6-p9, p12 +// f6-f15, f32-f52 +// p6-p12 // @@ -104,6 +105,7 @@ GR_EXPMAX = r24 GR_BIAS53 = r25 GR_ROUNDVAL = r26 +GR_SNORM_LIMIT = r26 GR_MASK = r27 GR_KF0 = r28 GR_MASK_low = r29 @@ -161,6 +163,7 @@ FR_E = f49 FR_exact_limit = f50 FR_int_x = f51 +FR_SNORM_LIMIT = f52 // Data tables @@ -256,8 +259,12 @@ GLOBAL_IEEE754_ENTRY(exp10) } ;; -{.mib +{.mlx ldfe FR_LOG2_10= [ GR_COEFF_START ], 16 // load log2(10)*2^(10-63) + movl GR_SNORM_LIMIT= 0xc0733a7146f72a41 // Smallest normal threshold +} +{.mib + nop.m 0 nop.i 0 (p12) br.cond.spnt SPECIAL_exp10 // Branch if nan, inf, zero } @@ -284,7 +291,7 @@ GLOBAL_IEEE754_ENTRY(exp10) ;; {.mfi - nop.m 0 + setf.d FR_SNORM_LIMIT= GR_SNORM_LIMIT // Set smallest normal limit fma.s1 FR_L2_10_high= FR_LOG2_10, FR_2P53, f0 // FR_LOG2_10= log2(10)_hi nop.i 0 } @@ -390,6 +397,13 @@ GLOBAL_IEEE754_ENTRY(exp10) {.mfi nop.m 0 + fcmp.ge.s1 p11,p0= f8, FR_SNORM_LIMIT // Test x for normal range + nop.i 0 +} +;; + +{.mfi + nop.m 0 fma.s1 FR_E= FR_E0, FR_COEFF1, f0 // E= C_1*e nop.i 0 } @@ -431,10 +445,17 @@ GLOBAL_IEEE754_ENTRY(exp10) {.mfb nop.m 0 (p9) fma.d.s1 f8= FR_P, FR_T, FR_T // result= T+T*P, exact use s1 - br.ret.sptk b0 // return + (p11) br.ret.sptk b0 // return, if result normal } ;; +// Here if result in denormal range (and not zero) +{.mib + nop.m 0 + mov GR_Parameter_TAG= 265 + br.cond.sptk __libm_error_region // Branch to error handling +} +;; SPECIAL_exp10: {.mfi @@ -487,53 +508,35 @@ SPECIAL_exp10: OUT_RANGE_exp10: +// underflow: p6= 1 // overflow: p8= 1 -{.mii +.pred.rel "mutex",p6,p8 +{.mmi (p8) mov GR_EXPMAX= 0x1fffe - nop.i 0 - nop.i 0 -} -;; - - -{.mmb - (p8) mov GR_Parameter_TAG= 166 - (p8) setf.exp FR_R= GR_EXPMAX - nop.b 999 -} -;; - -{.mfi - nop.m 999 - (p8) fma.d.s0 f8= FR_R, FR_R, f0 // Create overflow - nop.i 999 -} -// underflow: p6= 1 -{.mii - nop.m 0 (p6) mov GR_EXPMAX= 1 nop.i 0 } ;; -{.mmb - nop.m 0 - (p6) setf.exp FR_R= GR_EXPMAX - nop.b 999 +{.mii + setf.exp FR_R= GR_EXPMAX + (p8) mov GR_Parameter_TAG= 166 + (p6) mov GR_Parameter_TAG= 265 } ;; {.mfb - nop.m 999 - (p6) fma.d.s0 f8= FR_R, FR_R, f0 // Create underflow - (p6) br.ret.sptk b0 // will not call libm_error for underflow + nop.m 0 + fma.d.s0 f8= FR_R, FR_R, f0 // Create overflow/underflow + br.cond.sptk __libm_error_region // Branch to error handling } ;; GLOBAL_IEEE754_END(exp10) weak_alias (exp10, pow10) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_exp10f.S b/sysdeps/ia64/fpu/e_exp10f.S index f069b3a..46615e9 100644 --- a/sysdeps/ia64/fpu/e_exp10f.S +++ b/sysdeps/ia64/fpu/e_exp10f.S @@ -1,7 +1,7 @@ .file "exp10f.s" -// Copyright (c) 2000 - 2003, Intel Corporation +// Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -35,7 +35,7 @@ // // Intel Corporation is the author of this code, and requests that all // problem reports or change requests be submitted to it directly at -// http: //www.intel.com/software/products/opensource/libraries/num.htm. +// http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== @@ -43,6 +43,7 @@ // 05/20/02 Cleaned up namespace and sf0 syntax // 09/06/02 Improved performance and accuracy; no inexact flags on exact cases // 01/29/03 Added missing } to bundle templates +// 12/16/04 Call error handling on underflow. // // API //============================================================== @@ -80,8 +81,8 @@ // Registers used //============================================================== // r2-r3, r14-r40 -// f6-f15, f32-f51 -// p6-p9, p12 +// f6-f15, f32-f52 +// p6-p12 // @@ -102,6 +103,7 @@ GR_Fh_ADDR = r23 GR_EXPMAX = r24 GR_ROUNDVAL = r26 +GR_SNORM_LIMIT = r26 GR_MASK = r27 GR_KF0 = r28 GR_MASK_low = r29 @@ -153,6 +155,7 @@ FR_E = f49 FR_exact_limit = f50 FR_int_x = f51 +FR_SNORM_LIMIT = f52 // Data tables @@ -246,8 +249,12 @@ GLOBAL_IEEE754_ENTRY(exp10f) } ;; -{.mib +{.mlx ldfe FR_LOG2_10= [ GR_COEFF_START ], 16 // load log2(10)*2^(10-63) + movl GR_SNORM_LIMIT= 0xc217b818 // Smallest normal threshold +} +{.mib + nop.m 0 nop.i 0 (p12) br.cond.spnt SPECIAL_exp10 // Branch if nan, inf, zero } @@ -261,7 +268,7 @@ GLOBAL_IEEE754_ENTRY(exp10f) ;; {.mfi - nop.m 0 + setf.s FR_SNORM_LIMIT= GR_SNORM_LIMIT // Set smallest normal limit (p8) fcvt.fx.s1 FR_int_x = f8 // Convert x to integer nop.i 0 } @@ -335,7 +342,7 @@ GLOBAL_IEEE754_ENTRY(exp10f) {.mfb ldf8 FR_T_high= [ GR_Fh_ADDR ] // load T_high= 2^{f_high} - nop.f 0 + fcmp.ge.s1 p11, p0= f8, FR_SNORM_LIMIT // Test x for normal range (p12) br.cond.spnt OUT_RANGE_exp10 } ;; @@ -390,10 +397,17 @@ GLOBAL_IEEE754_ENTRY(exp10f) {.mfb nop.m 0 (p9) fma.s.s1 f8= FR_P, FR_T, FR_T // result= T+T*P, exact use s1 - br.ret.sptk b0 // return + (p11) br.ret.sptk b0 // return, if result normal } ;; +// Here if result in denormal range (and not zero) +{.mib + nop.m 0 + mov GR_Parameter_TAG= 266 + br.cond.sptk __libm_error_region // Branch to error handling +} +;; SPECIAL_exp10: {.mfi @@ -446,53 +460,35 @@ SPECIAL_exp10: OUT_RANGE_exp10: +// underflow: p6= 1 // overflow: p8= 1 -{.mii +.pred.rel "mutex",p6,p8 +{.mmi (p8) mov GR_EXPMAX= 0x1fffe - nop.i 0 - nop.i 0 -} -;; - - -{.mmb - (p8) mov GR_Parameter_TAG= 167 - (p8) setf.exp FR_R= GR_EXPMAX - nop.b 999 -} -;; - -{.mfi - nop.m 999 - (p8) fma.s.s0 f8= FR_R, FR_R, f0 // Create overflow - nop.i 999 -} -// underflow: p6= 1 -{.mii - nop.m 0 (p6) mov GR_EXPMAX= 1 nop.i 0 } ;; -{.mmb - nop.m 0 - (p6) setf.exp FR_R= GR_EXPMAX - nop.b 999 +{.mii + setf.exp FR_R= GR_EXPMAX + (p8) mov GR_Parameter_TAG= 167 + (p6) mov GR_Parameter_TAG= 266 } ;; {.mfb - nop.m 999 - (p6) fma.s.s0 f8= FR_R, FR_R, f0 // Create underflow - (p6) br.ret.sptk b0 // will not call libm_error for underflow + nop.m 0 + fma.s.s0 f8= FR_R, FR_R, f0 // Create overflow/underflow + br.cond.sptk __libm_error_region // Branch to error handling } ;; GLOBAL_IEEE754_END(exp10f) weak_alias (exp10f, pow10f) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_exp10l.S b/sysdeps/ia64/fpu/e_exp10l.S index 1b47258..a2e84b3 100644 --- a/sysdeps/ia64/fpu/e_exp10l.S +++ b/sysdeps/ia64/fpu/e_exp10l.S @@ -1,7 +1,7 @@ .file "exp10l.s" -// Copyright (c) 2000 - 2003, Intel Corporation +// Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -44,6 +44,7 @@ // 02/06/03 Reordered header: .section, .global, .proc, .align // 05/08/03 Reformatted assembly source; corrected overflow result for round to // -inf and round to zero; exact results now don't set inexact flag +// 12/16/04 Call error handling on underflow. // // API //============================================================== @@ -79,9 +80,9 @@ // Registers used //============================================================== -// f6-f15, f32-f62 +// f6-f15, f32-f63 // r14-r30, r32-r40 -// p6-p8, p12-p14 +// p6-p8, p11-p14 // @@ -129,6 +130,7 @@ FR_4 = f60 FR_28 = f61 FR_32 = f62 + FR_SNORM_LIMIT = f63 GR_ADDR0 = r14 @@ -178,6 +180,7 @@ LOCAL_OBJECT_START(poly_coeffs) data8 0x3f55d87fe78a6731 // C_5 data8 0x3f2430912f86c787 // C_6 data8 0x9257edfe9b5fb698, 0x00003fbf // log2(10)_low (bits 64...127) + data8 0x9a1bc98027a81918, 0x0000c00b // Smallest normal threshold LOCAL_OBJECT_END(poly_coeffs) @@ -435,7 +438,7 @@ GLOBAL_IEEE754_ENTRY(exp10l) {.mmf // GR_D_ADDR = pointer to D table - add GR_D_ADDR = 2048-64+96+16, GR_ADDR0 + add GR_D_ADDR = 2048-64+96+32, GR_ADDR0 // load C_3, C_4 ldfpd FR_COEFF3, FR_COEFF4 = [ GR_ADDR0 ], 16 // y = x*log2(10)*2^8 @@ -471,7 +474,8 @@ GLOBAL_IEEE754_ENTRY(exp10l) } {.mfi - nop.m 0 + // load smallest normal limit + ldfe FR_SNORM_LIMIT = [ GR_ADDR0 ], 16 // x>overflow threshold ? fcmp.gt.s1 p12, p7 = f8, FR_OF_TEST nop.i 0 ;; @@ -598,6 +602,13 @@ GLOBAL_IEEE754_ENTRY(exp10l) {.mfi nop.m 0 + // test if x >= smallest normal limit + fcmp.ge.s1 p11, p0 = f8, FR_SNORM_LIMIT + nop.i 0 ;; +} + +{.mfi + nop.m 0 // P36 = P34+r2*P56 fma.s1 FR_COEFF4 = FR_COEFF5, FR_COEFF3, FR_COEFF4 nop.i 0 @@ -646,9 +657,16 @@ GLOBAL_IEEE754_ENTRY(exp10l) // result = T+T*P (p14) fma.s0 f8 = FR_COEFF3, FR_UF_TEST, FR_UF_TEST // return - br.ret.sptk b0 ;; + (p11) br.ret.sptk b0 ;; // return, if result normal } +// Here if result in denormal range (and not zero) +{.mib + nop.m 0 + mov GR_Parameter_TAG= 264 + br.cond.sptk __libm_error_region // Branch to error handling +} +;; SPECIAL_EXP10: @@ -703,47 +721,35 @@ SPECIAL_EXP10: OUT_RANGE_EXP10: -{.mii - // overflow: p8 = 1 +// underflow: p6 = 1 +// overflow: p8 = 1 + +.pred.rel "mutex",p6,p8 +{.mmi (p8) mov GR_CONST1 = 0x1fffe + (p6) mov GR_CONST1 = 1 nop.i 0 - nop.i 0 ;; } +;; -{.mmb - (p8) mov GR_Parameter_TAG = 165 - (p8) setf.exp FR_KF0 = GR_CONST1 - nop.b 999 ;; -} - -{.mfi - nop.m 999 - (p8) fma.s0 f8 = FR_KF0, FR_KF0, f0 - nop.i 999 -} {.mii - nop.m 0 - // underflow: p6 = 1 - (p6) mov GR_CONST1 = 1 - nop.i 0 ;; -} - -{.mmb - nop.m 0 - (p6) setf.exp FR_KF0 = GR_CONST1 - nop.b 999 ;; + setf.exp FR_KF0 = GR_CONST1 + (p8) mov GR_Parameter_TAG = 165 + (p6) mov GR_Parameter_TAG = 264 } +;; {.mfb nop.m 999 - (p6) fma.s0 f8 = FR_KF0, FR_KF0, f0 - // will not call libm_error for underflow - (p6) br.ret.sptk b0 ;; + fma.s0 f8 = FR_KF0, FR_KF0, f0 // Create overflow/underflow + br.cond.sptk __libm_error_region // Branch to error handling } +;; GLOBAL_IEEE754_END(exp10l) weak_alias (exp10l, pow10l) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue {.mfi diff --git a/sysdeps/ia64/fpu/e_exp2.S b/sysdeps/ia64/fpu/e_exp2.S index e4a1dad..46fca2d 100644 --- a/sysdeps/ia64/fpu/e_exp2.S +++ b/sysdeps/ia64/fpu/e_exp2.S @@ -35,7 +35,7 @@ // // Intel Corporation is the author of this code, and requests that all // problem reports or change requests be submitted to it directly at -// http: //www.intel.com/software/products/opensource/libraries/num.htm. +// http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== @@ -495,6 +495,7 @@ OUT_RANGE_exp2: GLOBAL_LIBM_END(exp2) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_exp2f.S b/sysdeps/ia64/fpu/e_exp2f.S index f785b70..8ee600c 100644 --- a/sysdeps/ia64/fpu/e_exp2f.S +++ b/sysdeps/ia64/fpu/e_exp2f.S @@ -35,7 +35,7 @@ // // Intel Corporation is the author of this code, and requests that all // problem reports or change requests be submitted to it directly at -// http: //www.intel.com/software/products/opensource/libraries/num.htm. +// http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== @@ -470,6 +470,7 @@ OUT_RANGE_exp2: GLOBAL_LIBM_END(exp2f) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_exp2l.S b/sysdeps/ia64/fpu/e_exp2l.S index 6e2a62a..743ed35 100644 --- a/sysdeps/ia64/fpu/e_exp2l.S +++ b/sysdeps/ia64/fpu/e_exp2l.S @@ -747,6 +747,7 @@ OUT_RANGE_exp2l: GLOBAL_LIBM_END(exp2l) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue {.mfi diff --git a/sysdeps/ia64/fpu/e_expf.S b/sysdeps/ia64/fpu/e_expf.S index 8d620b6..3dc0ba9 100644 --- a/sysdeps/ia64/fpu/e_expf.S +++ b/sysdeps/ia64/fpu/e_expf.S @@ -1,7 +1,7 @@ .file "expf.s" -// Copyright (c) 2000 - 2002, Intel Corporation +// Copyright (c) 2000 - 2003, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -52,6 +52,7 @@ // 09/26/02 support of higher precision inputs added, underflow threshold // corrected // 11/15/02 Improved performance on Itanium 2, added possible over/under paths +// 05/30/03 Set inexact flag on unmasked overflow/underflow // // // API @@ -521,7 +522,7 @@ EXP_CERTAIN_OVERFLOW: } { .mfb mov GR_Parameter_TAG = 16 - fma.s.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result + fma.s.s0 FR_RESULT = fTmp, fTmp, fTmp // Set I,O and +INF result br.cond.sptk __libm_error_region } ;; @@ -604,6 +605,13 @@ EXP_CERTAIN_UNDERFLOW: } ;; +{ .mfi + nop.m 0 + fmerge.se fTmp = fTmp, f64DivLn2 // Small with non-trial signif + nop.i 0 +} +;; + { .mfb nop.m 0 fma.s.s0 f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result @@ -649,6 +657,7 @@ EXP_UNDERFLOW_ZERO: GLOBAL_IEEE754_END(expf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_fmod.S b/sysdeps/ia64/fpu/e_fmod.S index d801e0c..dbd0a29 100644 --- a/sysdeps/ia64/fpu/e_fmod.S +++ b/sysdeps/ia64/fpu/e_fmod.S @@ -499,6 +499,7 @@ FMOD_Y_ZERO: } GLOBAL_IEEE754_END(fmod) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_fmodf.S b/sysdeps/ia64/fpu/e_fmodf.S index fe1ec03..36e5807 100644 --- a/sysdeps/ia64/fpu/e_fmodf.S +++ b/sysdeps/ia64/fpu/e_fmodf.S @@ -514,6 +514,7 @@ EXP_ERROR_RETURN: } GLOBAL_IEEE754_END(fmodf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_fmodl.S b/sysdeps/ia64/fpu/e_fmodl.S index da08ae3..3e87eb0 100644 --- a/sysdeps/ia64/fpu/e_fmodl.S +++ b/sysdeps/ia64/fpu/e_fmodl.S @@ -1,7 +1,7 @@ .file "fmodl.s" -// Copyright (c) 2000 - 2003, Intel Corporation +// Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -43,56 +43,88 @@ // 03/02/00 New Algorithm // 04/04/00 Unwind support added // 08/15/00 Bundle added after call to __libm_error_support to properly -// set [the previously overwritten] GR_Parameter_RESULT. +// set [ the previously overwritten ] GR_Parameter_RESULT. // 11/28/00 Set FR_Y to f9 -// 03/11/02 Fixed flags for fmodl(qnan,zero) +// 03/11/02 Fixed flags for fmodl(qnan, zero) // 05/20/02 Cleaned up namespace and sf0 syntax -// 02/10/03 Reordered header: .section, .global, .proc, .align -// 04/28/03 Fix: fmod(sNaN,0) no longer sets errno +// 02/10/03 Reordered header:.section,.global,.proc,.align +// 04/28/03 Fix: fmod(sNaN, 0) no longer sets errno +// 11/23/04 Reformatted routine and improved speed // // API //==================================================================== -// long double fmodl(long double,long double); +// long double fmodl(long double, long double); // // Overview of operation //==================================================================== -// fmod(a,b)=a-i*b, -// where i is an integer such that, if b!=0, -// |i|<|a/b| and |a/b-i|<1 +// fmod(a, b)= a-i*b, +// where i is an integer such that, if b!= 0, +// |i|<|a/b| and |a/b-i|<1 // // Algorithm //==================================================================== // a). if |a|<|b|, return a // b). get quotient and reciprocal overestimates accurate to -// 33 bits (q2,y2) +// 33 bits (q2, y2) // c). if the exponent difference (exponent(a)-exponent(b)) -// is less than 32, truncate quotient to integer and -// finish in one iteration -// d). if exponent(a)-exponent(b)>=32 (q2>=2^32) -// round quotient estimate to single precision (k=RN(q2)), -// calculate partial remainder (a'=a-k*b), -// get quotient estimate (a'*y2), and repeat from c). +// is less than 32, truncate quotient to integer and +// finish in one iteration +// d). if exponent(a)-exponent(b)>= 32 (q2>= 2^32) +// round quotient estimate to single precision (k= RN(q2)), +// calculate partial remainder (a'= a-k*b), +// get quotient estimate (a'*y2), and repeat from c). // // Registers used //==================================================================== -// Predicate registers: p6-p11 -// General registers: r2,r29,r32 (ar.pfs), r33-r39 -// Floating point registers: f6-f15 - -GR_SAVE_B0 = r33 -GR_SAVE_PFS = r34 -GR_SAVE_GP = r35 -GR_SAVE_SP = r36 - -GR_Parameter_X = r37 -GR_Parameter_Y = r38 -GR_Parameter_RESULT = r39 -GR_Parameter_TAG = r40 - -FR_X = f10 -FR_Y = f9 -FR_RESULT = f8 +GR_SMALLBIASEXP = r2 +GR_2P32 = r3 +GR_SMALLBIASEXP = r20 +GR_ROUNDCONST = r21 +GR_SIG_B = r22 +GR_ARPFS = r23 +GR_TMP1 = r24 +GR_TMP2 = r25 +GR_TMP3 = r26 + +GR_SAVE_B0 = r33 +GR_SAVE_PFS = r34 +GR_SAVE_GP = r35 +GR_SAVE_SP = r36 + +GR_Parameter_X = r37 +GR_Parameter_Y = r38 +GR_Parameter_RESULT = r39 +GR_Parameter_TAG = r40 + +FR_X = f10 +FR_Y = f9 +FR_RESULT = f8 + +FR_ABS_A = f6 +FR_ABS_B = f7 +FR_Y_INV = f10 +FR_SMALLBIAS = f11 +FR_E0 = f12 +FR_Q = f13 +FR_E1 = f14 +FR_2P32 = f15 +FR_TMPX = f32 +FR_TMPY = f33 +FR_ROUNDCONST = f34 +FR_QINT = f35 +FR_QRND24 = f36 +FR_NORM_B = f37 +FR_TMP = f38 +FR_TMP2 = f39 +FR_DFLAG = f40 +FR_Y_INV0 = f41 +FR_Y_INV1 = f42 +FR_Q0 = f43 +FR_Q1 = f44 +FR_QINT_Z = f45 +FR_QREM = f46 +FR_B_SGN_A = f47 .section .text GLOBAL_IEEE754_ENTRY(fmodl) @@ -101,495 +133,540 @@ GLOBAL_IEEE754_ENTRY(fmodl) // result in f8 { .mfi - alloc r32=ar.pfs,1,4,4,0 - // f6=|a| - fmerge.s f6=f0,f8 - mov r2 = 0x0ffdd + getf.sig GR_SIG_B = f9 + // FR_ABS_A = |a| + fmerge.s FR_ABS_A = f0, f8 + mov GR_SMALLBIASEXP = 0x0ffdd } - {.mfi - getf.sig r29=f9 - // f7=|b| - fmerge.s f7=f0,f9 - nop.i 0;; +{ .mfi + nop.m 0 + // FR_ABS_B = |b| + fmerge.s FR_ABS_B = f0, f9 + nop.i 0 } +;; { .mfi - setf.exp f11 = r2 - // (1) y0 - frcpa.s1 f10,p6=f6,f7 - nop.i 0;; + setf.exp FR_SMALLBIAS = GR_SMALLBIASEXP + // (1) y0 + frcpa.s1 FR_Y_INV0, p6 = FR_ABS_A, FR_ABS_B + nop.i 0 +} +;; + +{ .mlx + nop.m 0 + movl GR_ROUNDCONST = 0x33a00000 } +;; // eliminate special cases -{.mmi -nop.m 0 -nop.m 0 -// y pseudo-zero ? -cmp.eq p7,p10=r29,r0;; +{ .mmi + nop.m 0 + nop.m 0 + // y pseudo-zero ? + cmp.eq p7, p10 = GR_SIG_B, r0 } +;; -// Y +-NAN, +-inf, +-0? p7 +// set p7 if b +/-NAN, +/-inf, +/-0 { .mfi - nop.m 999 -(p10) fclass.m p7,p10 = f9, 0xe7 - nop.i 999;; + nop.m 0 + (p10) fclass.m p7, p10 = f9, 0xe7 + nop.i 0 } +;; -// qnan snan inf norm unorm 0 -+ -// 1 1 1 0 0 0 11 -// e 3 -// X +-NAN, +-inf, ? p9 - { .mfi - nop.m 999 - fclass.m.unc p9,p11 = f8, 0xe3 - nop.i 999 + mov GR_2P32 = 0x1001f + // (2) q0 = a*y0 + (p6) fma.s1 FR_Q0 = FR_ABS_A, FR_Y_INV0, f0 + nop.i 0 +} +{ .mfi + nop.m 0 + // (3) e0 = 1 - b * y0 + (p6) fnma.s1 FR_E0 = FR_ABS_B, FR_Y_INV0, f1 + nop.i 0 } +;; -// |x| < |y|? Return x p8 +// set p9 if a +/-NAN, +/-inf +{ .mfi + nop.m 0 + fclass.m.unc p9, p11 = f8, 0xe3 + nop.i 0 +} + // |a| < |b|? Return a, p8=1 { .mfi - nop.m 999 -(p10) fcmp.lt.unc.s1 p8,p0 = f6,f7 - nop.i 999 ;; + nop.m 0 + (p10) fcmp.lt.unc.s1 p8, p0 = FR_ABS_A, FR_ABS_B + nop.i 0 } +;; - { .mfi - mov r2=0x1001f - // (2) q0=a*y0 - (p6) fma.s1 f13=f6,f10,f0 - nop.i 0 -} { .mfi - nop.m 0 - // (3) e0 = 1 - b * y0 - (p6) fnma.s1 f12=f7,f10,f1 - nop.i 0;; +// set p7 if b +/-NAN, +/-inf, +/-0 +{ .mfi + nop.m 0 + // pseudo-NaN ? + (p10) fclass.nm p7, p0 = f9, 0xff + nop.i 0 } +;; -// Y +-NAN, +-inf, +-0? p7 +// set p9 if a is +/-NaN, +/-Inf +{ .mfi + nop.m 0 + (p11) fclass.nm p9, p0 = f8, 0xff + nop.i 0 +} { .mfi - nop.m 999 - // pseudo-NaN ? -(p10) fclass.nm p7,p0 = f9, 0xff - nop.i 999 + nop.m 0 + // b denormal ? set D flag (if |a|<|b|) + (p8) fnma.s0 FR_DFLAG = f9, f1, f9 + nop.i 0 } +;; -// qnan snan inf norm unorm 0 -+ -// 1 1 1 0 0 0 11 -// e 3 -// X +-NAN, +-inf, ? p9 +{ .mfi + // FR_2P32 = 2^32 + setf.exp FR_2P32 = GR_2P32 + // (4) q1 = q0+e0*q0 + (p6) fma.s1 FR_Q1 = FR_E0, FR_Q0, FR_Q0 + nop.i 0 +} +{ .mfi + nop.m 0 + // (5) e1 = e0 * e0 + 2^-34 + (p6) fma.s1 FR_E1 = FR_E0, FR_E0, FR_SMALLBIAS + nop.i 0 +} +;; { .mfi - nop.m 999 -(p11) fclass.nm p9,p0 = f8, 0xff - nop.i 999;; + nop.m 0 + // normalize a (if |a|<|b|) + (p8) fma.s0 f8 = f8, f1, f0 + nop.i 0 +} +{ .bbb + (p9) br.cond.spnt FMOD_A_NAN_INF + (p7) br.cond.spnt FMOD_B_NAN_INF_ZERO + // if |a|<|b|, return + (p8) br.ret.spnt b0 } +;; + { .mfi - nop.m 0 - // y denormal ? set D flag (if |x|<|y|) - (p8) fnma.s0 f10=f9,f1,f9 - nop.i 0;; + nop.m 0 + // (6) y1 = y0 + e0 * y0 + (p6) fma.s1 FR_Y_INV1 = FR_E0, FR_Y_INV0, FR_Y_INV0 + nop.i 0 } +;; +{ .mfi + nop.m 0 + // a denormal ? set D flag + // b denormal ? set D flag + fcmp.eq.s0 p12,p0 = FR_ABS_A, FR_ABS_B + nop.i 0 +} +{ .mfi + // set FR_ROUNDCONST = 1.25*2^{-24} + setf.s FR_ROUNDCONST = GR_ROUNDCONST + // (7) q2 = q1+e1*q1 + (p6) fma.s1 FR_Q = FR_Q1, FR_E1, FR_Q1 + nop.i 0 +} +;; -{.mfi - nop.m 0 - // normalize x (if |x|<|y|) - (p8) fma.s0 f8=f8,f1,f0 - nop.i 0 +{ .mfi + nop.m 0 + fmerge.s FR_B_SGN_A = f8, f9 + nop.i 0 } -{.bbb - (p9) br.cond.spnt FMOD_X_NAN_INF - (p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO - // if |x|<|y|, return - (p8) br.ret.spnt b0;; +{ .mfi + nop.m 0 + // (8) y2 = y1 + e1 * y1 + (p6) fma.s1 FR_Y_INV = FR_E1, FR_Y_INV1, FR_Y_INV1 + // set p6 = 0, p10 = 0 + cmp.ne.and p6, p10 = r0, r0 } +;; - {.mfi - nop.m 0 - // x denormal ? set D flag - fnma.s0 f32=f6,f1,f6 - nop.i 0 +// will compute integer quotient bits (24 bits per iteration) +.align 32 +loop64: +{ .mfi + nop.m 0 + // compare q2, 2^32 + fcmp.lt.unc.s1 p8, p7 = FR_Q, FR_2P32 + nop.i 0 } -{.mfi - nop.m 0 - // y denormal ? set D flag - fnma.s0 f33=f7,f1,f7 - nop.i 0;; +{ .mfi + nop.m 0 + // will truncate quotient to integer, if exponent<32 (in advance) + fcvt.fx.trunc.s1 FR_QINT = FR_Q + nop.i 0 } +;; - {.mfi - // f15=2^32 - setf.exp f15=r2 - // (4) q1=q0+e0*q0 - (p6) fma.s1 f13=f12,f13,f13 - nop.i 0 +{ .mfi + nop.m 0 + // if exponent>32 round quotient to single precision (perform in advance) + fma.s.s1 FR_QRND24 = FR_Q, f1, f0 + nop.i 0 } +;; + { .mfi - nop.m 0 - // (5) e1 = e0 * e0 + 2^-34 - (p6) fma.s1 f14=f12,f12,f11 - nop.i 0;; + nop.m 0 + // set FR_ROUNDCONST = sgn(a) + (p8) fmerge.s FR_ROUNDCONST = f8, f1 + nop.i 0 } -{.mlx - nop.m 0 - movl r2=0x33a00000;; +{ .mfi + nop.m 0 + // normalize truncated quotient + (p8) fcvt.xf FR_QRND24 = FR_QINT + nop.i 0 } +;; + { .mfi - nop.m 0 - // (6) y1 = y0 + e0 * y0 - (p6) fma.s1 f10=f12,f10,f10 - nop.i 0;; + nop.m 0 + // calculate remainder (assuming FR_QRND24 = RZ(Q)) + (p7) fnma.s1 FR_E1 = FR_QRND24, FR_ABS_B, FR_ABS_A + nop.i 0 } -{.mfi - // set f12=1.25*2^{-24} - setf.s f12=r2 - // (7) q2=q1+e1*q1 - (p6) fma.s1 f13=f13,f14,f13 - nop.i 0;; +{ .mfi + nop.m 0 + // also if exponent>32, round quotient to single precision + // and subtract 1 ulp: q = q-q*(1.25*2^{-24}) + (p7) fnma.s.s1 FR_QINT_Z = FR_QRND24, FR_ROUNDCONST, FR_QRND24 + nop.i 0 } -{.mfi - nop.m 0 - fmerge.s f9=f8,f9 - nop.i 0 +;; + +{ .mfi + nop.m 0 + // (p8) calculate remainder (82-bit format) + (p8) fnma.s1 FR_QREM = FR_QRND24, FR_ABS_B, FR_ABS_A + nop.i 0 } { .mfi - nop.m 0 - // (8) y2 = y1 + e1 * y1 - (p6) fma.s1 f10=f14,f10,f10 - // set p6=0, p10=0 - cmp.ne.and p6,p10=r0,r0;; + nop.m 0 + // (p7) calculate remainder (assuming FR_QINT_Z = RZ(Q)) + (p7) fnma.s1 FR_ABS_A = FR_QINT_Z, FR_ABS_B, FR_ABS_A + nop.i 0 } +;; +{ .mfi + nop.m 0 + // Final iteration (p8): is FR_ABS_A the correct remainder + // (quotient was not overestimated) ? + (p8) fcmp.lt.unc.s1 p6, p10 = FR_QREM, f0 + nop.i 0 +} +;; -.align 32 -loop64: - {.mfi - nop.m 0 - // compare q2, 2^32 - fcmp.lt.unc.s1 p8,p7=f13,f15 - nop.i 0 -} - {.mfi - nop.m 0 - // will truncate quotient to integer, if exponent<32 (in advance) - fcvt.fx.trunc.s1 f11=f13 - nop.i 0;; -} - {.mfi - nop.m 0 - // if exponent>32, round quotient to single precision (perform in advance) - fma.s.s1 f13=f13,f1,f0 - nop.i 0;; -} - - - {.mfi - nop.m 0 - // set f12=sgn(a) - (p8) fmerge.s f12=f8,f1 - nop.i 0 -} - {.mfi - nop.m 0 - // normalize truncated quotient - (p8) fcvt.xf f13=f11 - nop.i 0;; -} - { .mfi - nop.m 0 - // calculate remainder (assuming f13=RZ(Q)) - (p7) fnma.s1 f14=f13,f7,f6 - nop.i 0 -} - {.mfi - nop.m 0 - // also if exponent>32, round quotient to single precision - // and subtract 1 ulp: q=q-q*(1.25*2^{-24}) - (p7) fnma.s.s1 f11=f13,f12,f13 - nop.i 0;; -} - - {.mfi - nop.m 0 - // (p8) calculate remainder (82-bit format) - (p8) fnma.s1 f11=f13,f7,f6 - nop.i 0 -} - {.mfi - nop.m 0 - // (p7) calculate remainder (assuming f11=RZ(Q)) - (p7) fnma.s1 f6=f11,f7,f6 - nop.i 0;; -} - - - {.mfi - nop.m 0 - // Final iteration (p8): is f6 the correct remainder (quotient was not overestimated) ? - (p8) fcmp.lt.unc.s1 p6,p10=f11,f0 - nop.i 0;; -} - {.mfi - nop.m 0 - // get new quotient estimation: a'*y2 - (p7) fma.s1 f13=f14,f10,f0 - nop.i 0 -} - {.mfb - nop.m 0 - // was f13=RZ(Q) ? (then new remainder f14>=0) - (p7) fcmp.lt.unc.s1 p7,p9=f14,f0 - nop.b 0;; -} - - -.pred.rel "mutex",p6,p10 - {.mfb - nop.m 0 - // add b to estimated remainder (to cover the case when the quotient was overestimated) - // also set correct sign by using f9=|b|*sgn(a), f12=sgn(a) - (p6) fma.s0 f8=f11,f12,f9 - nop.b 0 -} - {.mfb - nop.m 0 - // set correct sign of result before returning: f12=sgn(a) - (p10) fma.s0 f8=f11,f12,f0 - (p8) br.ret.sptk b0;; -} - {.mfi - nop.m 0 - // if f13!=RZ(Q), get alternative quotient estimation: a''*y2 - (p7) fma.s1 f13=f6,f10,f0 - nop.i 0 -} - {.mfb - nop.m 0 - // if f14 was RZ(Q), set remainder to f14 - (p9) mov f6=f14 - br.cond.sptk loop64;; +{ .mfi + nop.m 0 + // get new quotient estimation: a'*y2 + (p7) fma.s1 FR_Q = FR_E1, FR_Y_INV, f0 + nop.i 0 } +{ .mfb + nop.m 0 + // was FR_Q = RZ(Q) ? (then new remainder FR_E1> = 0) + (p7) fcmp.lt.unc.s1 p7, p9 = FR_E1, f0 + nop.b 0 +} +;; +.pred.rel "mutex", p6, p10 +{ .mfb + nop.m 0 + // add b to estimated remainder (to cover the case when the quotient was + // overestimated) + // also set correct sign by using + // FR_B_SGN_A = |b|*sgn(a), FR_ROUNDCONST = sgn(a) + (p6) fma.s0 f8 = FR_QREM, FR_ROUNDCONST, FR_B_SGN_A + nop.b 0 +} +{ .mfb + nop.m 0 + // set correct sign of result before returning: FR_ROUNDCONST = sgn(a) + (p10) fma.s0 f8 = FR_QREM, FR_ROUNDCONST, f0 + (p8) br.ret.sptk b0 +} +;; +{ .mfi + nop.m 0 + // if f13! = RZ(Q), get alternative quotient estimation: a''*y2 + (p7) fma.s1 FR_Q = FR_ABS_A, FR_Y_INV, f0 + nop.i 0 +} +{ .mfb + nop.m 0 + // if FR_E1 was RZ(Q), set remainder to FR_E1 + (p9) fma.s1 FR_ABS_A = FR_E1, f1, f0 + br.cond.sptk loop64 +} +;; -FMOD_X_NAN_INF: +FMOD_A_NAN_INF: -// Y zero ? -{.mfi - nop.m 0 - fclass.m p10,p0=f8,0xc3 // Test x=nan - nop.i 0 +// b zero ? +{ .mfi + nop.m 0 + fclass.m p10, p0 = f8, 0xc3 // Test a = nan + nop.i 0 } -{.mfi - nop.m 0 - fma.s1 f10=f9,f1,f0 - nop.i 0;; +{ .mfi + nop.m 0 + fma.s1 FR_NORM_B = f9, f1, f0 + nop.i 0 } +;; -{.mfi - nop.m 0 - fma.s0 f8=f8,f1,f0 - nop.i 0 +{ .mfi + nop.m 0 + fma.s0 f8 = f8, f1, f0 + nop.i 0 } -{.mfi - nop.m 0 -(p10) fclass.m p10,p0=f9,0x07 // Test x=nan, and y=zero - nop.i 0;; +{ .mfi + nop.m 0 + (p10) fclass.m p10, p0 = f9, 0x07 // Test x = nan, and y = zero + nop.i 0 } -{.mfb - nop.m 0 - fcmp.eq.unc.s1 p11,p0=f10,f0 -(p10) br.ret.spnt b0;; // Exit with result=x if x=nan and y=zero +;; + +{ .mfb + nop.m 0 + fcmp.eq.unc.s1 p11, p0 = FR_NORM_B, f0 + (p10) br.ret.spnt b0 // Exit with result = a if a = nan and b = zero } -{.mib - nop.m 0 - nop.i 0 - // if Y zero - (p11) br.cond.spnt FMOD_Y_ZERO;; +;; + +{ .mib + nop.m 0 + nop.i 0 + // if Y zero + (p11) br.cond.spnt FMOD_B_ZERO } +;; -// X infinity? Return QNAN indefinite +// a= infinity? Return QNAN indefinite { .mfi - // set p7 t0 0 - cmp.ne p7,p0=r0,r0 - fclass.m.unc p8,p9 = f8, 0x23 - nop.i 999;; + // set p7 t0 0 + cmp.ne p7, p0 = r0, r0 + fclass.m.unc p8, p9 = f8, 0x23 + nop.i 0 } -// Y NaN ? -{.mfi - nop.m 999 -(p8) fclass.m p9,p8=f9,0xc3 - nop.i 0;; +;; + +// b NaN ? +{ .mfi + nop.m 0 + (p8) fclass.m p9, p8 = f9, 0xc3 + nop.i 0 } -// Y not pseudo-zero ? (r29 holds significand) -{.mii - nop.m 999 -(p8) cmp.ne p7,p0=r29,r0 - nop.i 0;; +;; + +// b not pseudo-zero ? (GR_SIG_B holds significand) +{ .mii + nop.m 0 + (p8) cmp.ne p7, p0 = GR_SIG_B, r0 + nop.i 0 } -{.mfi - nop.m 999 -(p8) frcpa.s0 f8,p0 = f8,f8 - nop.i 0 +;; + +{ .mfi + nop.m 0 + (p8) frcpa.s0 f8, p0 = f8, f8 + nop.i 0 } { .mfi - nop.m 999 - // also set Denormal flag if necessary -(p7) fnma.s0 f9=f9,f1,f9 - nop.i 999 ;; + nop.m 0 + // also set Denormal flag if necessary + (p7) fnma.s0 f9 = f9, f1, f9 + nop.i 0 } +;; { .mfb - nop.m 999 -(p8) fma.s0 f8=f8,f1,f0 - nop.b 999 ;; + nop.m 0 + (p8) fma.s0 f8 = f8, f1, f0 + nop.b 0 } +;; { .mfb - nop.m 999 -(p9) frcpa.s0 f8,p7=f8,f9 - br.ret.sptk b0 ;; + nop.m 0 + (p9) frcpa.s0 f8, p7 = f8, f9 + br.ret.sptk b0 } +;; - -FMOD_Y_NAN_INF_ZERO: -// Y INF +FMOD_B_NAN_INF_ZERO: +// b INF { .mfi - nop.m 999 - fclass.m.unc p7,p0 = f9, 0x23 - nop.i 999 ;; + nop.m 0 + fclass.m.unc p7, p0 = f9, 0x23 + nop.i 0 } +;; { .mfb - nop.m 999 -(p7) fma.s0 f8=f8,f1,f0 -(p7) br.ret.spnt b0 ;; + nop.m 0 + (p7) fma.s0 f8 = f8, f1, f0 + (p7) br.ret.spnt b0 } +;; -// Y NAN? +// b NAN? { .mfi - nop.m 999 - fclass.m.unc p9,p10 = f9, 0xc3 - nop.i 999 ;; + nop.m 0 + fclass.m.unc p9, p10 = f9, 0xc3 + nop.i 0 } +;; + { .mfi - nop.m 999 -(p10) fclass.nm p9,p0 = f9, 0xff - nop.i 999 ;; + nop.m 0 + (p10) fclass.nm p9, p0 = f9, 0xff + nop.i 0 } +;; { .mfb - nop.m 999 -(p9) fma.s0 f8=f9,f1,f0 -(p9) br.ret.spnt b0 ;; + nop.m 0 + (p9) fma.s0 f8 = f9, f1, f0 + (p9) br.ret.spnt b0 } +;; -FMOD_Y_ZERO: +FMOD_B_ZERO: // Y zero? Must be zero at this point // because it is the only choice left. // Return QNAN indefinite -{.mfi - nop.m 0 - // set Invalid - frcpa.s0 f12,p0=f0,f0 - nop.i 0 -} -// X NAN? { .mfi - nop.m 999 - fclass.m.unc p9,p10 = f8, 0xc3 - nop.i 999 ;; + nop.m 0 + // set Invalid + frcpa.s0 FR_TMP, p0 = f0, f0 + nop.i 0 } +;; + +// a NAN? { .mfi - nop.m 999 -(p10) fclass.nm p9,p10 = f8, 0xff - nop.i 999 ;; + nop.m 0 + fclass.m.unc p9, p10 = f8, 0xc3 + nop.i 0 } +;; -{.mfi - nop.m 999 - (p9) frcpa.s0 f11,p7=f8,f0 - nop.i 0;; +{ .mfi + alloc GR_ARPFS = ar.pfs, 1, 4, 4, 0 + (p10) fclass.nm p9, p10 = f8, 0xff + nop.i 0 } - +;; { .mfi - nop.m 999 -(p10) frcpa.s0 f11,p7 = f9,f9 - mov GR_Parameter_TAG = 120 ;; + nop.m 0 + (p9) frcpa.s0 FR_TMP2, p7 = f8, f0 + nop.i 0 } +;; { .mfi - nop.m 999 - fmerge.s f10 = f8, f8 - nop.i 999 + nop.m 0 + (p10) frcpa.s0 FR_TMP2, p7 = f9, f9 + mov GR_Parameter_TAG = 120 } +;; +{ .mfi + nop.m 0 + fmerge.s FR_X = f8, f8 + nop.i 0 +} { .mfb - nop.m 999 - fma.s0 f8=f11,f1,f0 - br.sptk __libm_error_region;; + nop.m 0 + fma.s0 f8 = FR_TMP2, f1, f0 + br.sptk __libm_error_region } +;; GLOBAL_IEEE754_END(fmodl) - LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi - add GR_Parameter_Y=-32,sp // Parameter 2 value - nop.f 0 -.save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs // Save ar.pfs + add GR_Parameter_Y = -32, sp // Parameter 2 value + nop.f 0 +.save ar.pfs, GR_SAVE_PFS + mov GR_SAVE_PFS = ar.pfs // Save ar.pfs } { .mfi .fframe 64 - add sp=-64,sp // Create new stack - nop.f 0 - mov GR_SAVE_GP=gp // Save gp -};; + add sp = -64, sp // Create new stack + nop.f 0 + mov GR_SAVE_GP = gp // Save gp +} +;; + { .mmi - stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack - add GR_Parameter_X = 16,sp // Parameter 1 address -.save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 // Save b0 -};; + stfe [ GR_Parameter_Y ] = FR_Y, 16 // Save Parameter 2 on stack + add GR_Parameter_X = 16, sp // Parameter 1 address +.save b0, GR_SAVE_B0 + mov GR_SAVE_B0 = b0 // Save b0 +} +;; + .body { .mib - stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack - add GR_Parameter_RESULT = 0,GR_Parameter_Y - nop.b 0 // Parameter 3 address + stfe [ GR_Parameter_X ] = FR_X // Store Parameter 1 on stack + add GR_Parameter_RESULT = 0, GR_Parameter_Y + nop.b 0 // Parameter 3 address } { .mib - stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack - add GR_Parameter_Y = -16,GR_Parameter_Y - br.call.sptk b0=__libm_error_support# // Call error handling function -};; + stfe [ GR_Parameter_Y ] = FR_RESULT // Store Parameter 3 on stack + add GR_Parameter_Y = -16, GR_Parameter_Y + br.call.sptk b0 = __libm_error_support# // Call error handling function +} +;; + { .mmi - nop.m 0 - nop.m 0 - add GR_Parameter_RESULT = 48,sp -};; + nop.m 0 + nop.m 0 + add GR_Parameter_RESULT = 48, sp +} +;; + { .mmi - ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack + ldfe f8 = [ GR_Parameter_RESULT ] // Get return result off stack .restore sp - add sp = 64,sp // Restore stack pointer - mov b0 = GR_SAVE_B0 // Restore return address -};; + add sp = 64, sp // Restore stack pointer + mov b0 = GR_SAVE_B0 // Restore return address +} +;; + { .mib - mov gp = GR_SAVE_GP // Restore gp - mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs - br.ret.sptk b0 // Return -};; + mov gp = GR_SAVE_GP // Restore gp + mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs + br.ret.sptk b0 // Return +} +;; LOCAL_LIBM_END(__libm_error_region) - - - -.type __libm_error_support#,@function +.type __libm_error_support#, @function .global __libm_error_support# - - diff --git a/sysdeps/ia64/fpu/e_hypot.S b/sysdeps/ia64/fpu/e_hypot.S index 885c819..36cfd1e 100644 --- a/sysdeps/ia64/fpu/e_hypot.S +++ b/sysdeps/ia64/fpu/e_hypot.S @@ -106,6 +106,7 @@ FR_RESULT = f8 LOCAL_LIBM_ENTRY(cabs) LOCAL_LIBM_END(cabs) + GLOBAL_IEEE754_ENTRY(hypot) {.mfi @@ -384,6 +385,7 @@ GLOBAL_IEEE754_ENTRY(hypot) (p9) br.ret.sptk b0;; } GLOBAL_IEEE754_END(hypot) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_hypotf.S b/sysdeps/ia64/fpu/e_hypotf.S index 633bb67..d6fcbd1 100644 --- a/sysdeps/ia64/fpu/e_hypotf.S +++ b/sysdeps/ia64/fpu/e_hypotf.S @@ -106,6 +106,7 @@ FR_RESULT = f8 LOCAL_LIBM_ENTRY(cabsf) LOCAL_LIBM_END(cabsf) + GLOBAL_IEEE754_ENTRY(hypotf) {.mfi alloc r32= ar.pfs,0,4,4,0 @@ -337,6 +338,7 @@ GLOBAL_IEEE754_ENTRY(hypotf) (p9) br.ret.sptk b0;; } GLOBAL_IEEE754_END(hypotf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mii diff --git a/sysdeps/ia64/fpu/e_hypotl.S b/sysdeps/ia64/fpu/e_hypotl.S index 0aa94b6..988b86e 100644 --- a/sysdeps/ia64/fpu/e_hypotl.S +++ b/sysdeps/ia64/fpu/e_hypotl.S @@ -105,6 +105,7 @@ FR_RESULT = f8 LOCAL_LIBM_ENTRY(cabsl) LOCAL_LIBM_END(cabsl) + GLOBAL_IEEE754_ENTRY(hypotl) {.mfi alloc r32= ar.pfs,0,4,4,0 @@ -421,6 +422,7 @@ GLOBAL_IEEE754_ENTRY(hypotl) (p9) br.ret.sptk b0;; } GLOBAL_IEEE754_END(hypotl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_lgamma_r.c b/sysdeps/ia64/fpu/e_lgamma_r.c index e892635..ebc90fc 100644 --- a/sysdeps/ia64/fpu/e_lgamma_r.c +++ b/sysdeps/ia64/fpu/e_lgamma_r.c @@ -1,5 +1,6 @@ /* file: lgamma_r.c */ + // Copyright (c) 2002 Intel Corporation // All rights reserved. // @@ -20,7 +21,6 @@ // products derived from this software without specific prior written // permission. -// WARRANTY DISCLAIMER // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT diff --git a/sysdeps/ia64/fpu/e_lgammaf_r.c b/sysdeps/ia64/fpu/e_lgammaf_r.c index e5d4d2e..4efa840 100644 --- a/sysdeps/ia64/fpu/e_lgammaf_r.c +++ b/sysdeps/ia64/fpu/e_lgammaf_r.c @@ -1,5 +1,6 @@ /* file: lgammaf_r.c */ + // Copyright (c) 2002 Intel Corporation // All rights reserved. // @@ -20,7 +21,6 @@ // products derived from this software without specific prior written // permission. -// WARRANTY DISCLAIMER // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT diff --git a/sysdeps/ia64/fpu/e_lgammal_r.c b/sysdeps/ia64/fpu/e_lgammal_r.c index a2b36d6..3fbea70 100644 --- a/sysdeps/ia64/fpu/e_lgammal_r.c +++ b/sysdeps/ia64/fpu/e_lgammal_r.c @@ -1,5 +1,6 @@ /* file: lgammal_r.c */ + // Copyright (c) 2002 Intel Corporation // All rights reserved. // @@ -20,7 +21,6 @@ // products derived from this software without specific prior written // permission. -// WARRANTY DISCLAIMER // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT diff --git a/sysdeps/ia64/fpu/e_log.S b/sysdeps/ia64/fpu/e_log.S index f80f153..7b277f8 100644 --- a/sysdeps/ia64/fpu/e_log.S +++ b/sysdeps/ia64/fpu/e_log.S @@ -1386,6 +1386,7 @@ GLOBAL_IEEE754_ENTRY(log10) };; GLOBAL_IEEE754_END(log10) + GLOBAL_IEEE754_ENTRY(log) { .mfi getf.exp GR_Exp = f8 // if x is unorm then must recompute @@ -1667,6 +1668,7 @@ log_libm_err: };; GLOBAL_IEEE754_END(log) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_log2.S b/sysdeps/ia64/fpu/e_log2.S index 7679357..660a952 100644 --- a/sysdeps/ia64/fpu/e_log2.S +++ b/sysdeps/ia64/fpu/e_log2.S @@ -655,6 +655,7 @@ SPECIAL_LOG2: GLOBAL_LIBM_END(log2) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_log2f.S b/sysdeps/ia64/fpu/e_log2f.S index 6de2f38..17d710a 100644 --- a/sysdeps/ia64/fpu/e_log2f.S +++ b/sysdeps/ia64/fpu/e_log2f.S @@ -493,6 +493,7 @@ SPECIAL_log2f: GLOBAL_LIBM_END(log2f) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_log2l.S b/sysdeps/ia64/fpu/e_log2l.S index 37af2f2..b3fe63f 100644 --- a/sysdeps/ia64/fpu/e_log2l.S +++ b/sysdeps/ia64/fpu/e_log2l.S @@ -761,6 +761,7 @@ LOG2_PSEUDO_ZERO: GLOBAL_IEEE754_END(log2l) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_logf.S b/sysdeps/ia64/fpu/e_logf.S index 0ca6d3f..186edab 100644 --- a/sysdeps/ia64/fpu/e_logf.S +++ b/sysdeps/ia64/fpu/e_logf.S @@ -841,6 +841,7 @@ GLOBAL_IEEE754_ENTRY(log10f) br.cond.sptk logf_log10f_common };; GLOBAL_IEEE754_END(log10f) + GLOBAL_IEEE754_ENTRY(logf) { .mfi getf.exp GR_Exp = f8 // if x is unorm then must recompute @@ -1087,6 +1088,7 @@ logf_libm_err: };; GLOBAL_IEEE754_END(logf) + // Stack operations when calling error support. // (1) (2) (3) (call) (4) // sp -> + psp -> + psp -> + sp -> + diff --git a/sysdeps/ia64/fpu/e_logl.S b/sysdeps/ia64/fpu/e_logl.S index ba6b55b..3ebb20a 100644 --- a/sysdeps/ia64/fpu/e_logl.S +++ b/sysdeps/ia64/fpu/e_logl.S @@ -634,6 +634,7 @@ GLOBAL_IEEE754_ENTRY(logl) GLOBAL_IEEE754_END(logl) + GLOBAL_IEEE754_ENTRY(log10l) { .mfi alloc r32 = ar.pfs,0,21,4,0 @@ -1144,6 +1145,7 @@ LOGL_64_negative: GLOBAL_IEEE754_END(log10l) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_pow.S b/sysdeps/ia64/fpu/e_pow.S index 11fae53..86005f2 100644 --- a/sysdeps/ia64/fpu/e_pow.S +++ b/sysdeps/ia64/fpu/e_pow.S @@ -2234,6 +2234,7 @@ POW_OVER_UNDER_ERROR: GLOBAL_LIBM_END(pow) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_powf.S b/sysdeps/ia64/fpu/e_powf.S index 275843f..4c839cb 100644 --- a/sysdeps/ia64/fpu/e_powf.S +++ b/sysdeps/ia64/fpu/e_powf.S @@ -64,6 +64,8 @@ // 05/20/02 Cleaned up namespace and sf0 syntax // 08/29/02 Improved Itanium 2 performance // 02/10/03 Reordered header: .section, .global, .proc, .align +// 10/09/03 Modified algorithm to improve performance, reduce table size, and +// fix boundary case powf(2.0,-150.0) // // API //============================================================== @@ -106,37 +108,33 @@ // // Log(1/Cm) = log(1/frcpa(1+m/256)) where m goes from 0 to 255. // -// We tabluate as two doubles, T and t, where T +t is the value itself. +// We tabluate as one double, T for single precision power // -// Log(x) = (K Log(2)_hi + T) + (Log(2)_hi + t) + Log( 1 + (Bx-1)) -// Log(x) = G + delta + Log( 1 + (Bx-1)) +// Log(x) = (K Log(2)_hi + T) + (K Log(2)_lo) + Log( 1 + (Bx-1)) +// Log(x) = G + delta + Log( 1 + (Bx-1)) // // The Log( 1 + (Bx-1)) can be calculated as a series in r = Bx-1. // // Log( 1 + (Bx-1)) = r - rsq/2 + p +// where p = r^3(P0 + P1*r + P2*r^2) // // Then, // // yLog(x) = yG + y delta + y(r-rsq/2) + yp -// yLog(x) = Z1 + e3 + Z2 + Z3 + (e2 + e3) +// yLog(x) = Z1 + e3 + Z2 + Z3 // // -// exp(yLog(x)) = exp(Z1 + Z2 + Z3) exp(e1 + e2 + e3) +// exp(yLog(x)) = exp(Z1 + Z2) exp(Z3) exp(e3) // // // exp(Z3) is another series. -// exp(e1 + e2 + e3) is approximated as f3 = 1 + (e1 + e2 + e3) +// exp(e3) is approximated as f3 = 1 + e3 // -// Z1 (128/log2) = number of log2/128 in Z1 is N1 -// Z2 (128/log2) = number of log2/128 in Z2 is N2 -// -// s1 = Z1 - N1 log2/128 -// s2 = Z2 - N2 log2/128 +// exp(Z1 + Z2) = exp(Z) +// Z (128/log2) = number of log2/128 in Z is N // -// s = s1 + s2 -// N = N1 + N2 +// s = Z - N log2/128 // -// exp(Z1 + Z2) = exp(Z) // exp(Z) = exp(s) exp(N log2/128) // // exp(r) = exp(Z - N log2/128) @@ -161,13 +159,11 @@ // N log2/128 = M log2 + I2 log2/8 + I1 log2/128 // // exp(Z) = exp(s) (1+d) exp(log(2^M) + log(2^I2/8) + log(2^I1/128)) -// exp(Z) = exp(s) (1+d1) (1+d2)(2^M) 2^I2/8 2^I1/128 -// exp(Z) = exp(s) f1 f2 (2^M) 2^I2/8 2^I1/128 +// exp(Z) = exp(s) f12 (2^M) 2^I2/8 2^I1/128 // // I1, I2 are table indices. Use a series for exp(s). // Then get exp(Z) // -// exp(yLog(x)) = exp(Z1 + Z2 + Z3) exp(e1 + e2 + e3) // exp(yLog(x)) = exp(Z) exp(Z3) f3 // exp(yLog(x)) = exp(Z)f3 exp(Z3) // exp(yLog(x)) = A exp(Z3) @@ -331,6 +327,8 @@ // +------------+----------------+-+ // | 13 bits | 50 bits | | // +------------+----------------+-+ +// +// Note: For powf only the table of T is needed // Special Cases @@ -402,10 +400,17 @@ // integer registers used +pow_GR_exp_half = r10 +pow_GR_signexp_Xm1 = r11 +pow_GR_tmp = r11 + pow_GR_signexp_X = r14 pow_GR_17ones = r15 +pow_GR_Fpsr = r15 pow_AD_P = r16 +pow_GR_rcs0_mask = r16 pow_GR_exp_2tom8 = r17 +pow_GR_rcs0 = r17 pow_GR_sig_X = r18 pow_GR_10033 = r19 pow_GR_16ones = r20 @@ -423,9 +428,6 @@ pow_GR_offset = r29 pow_GR_exp_Xm1 = r30 pow_GR_xneg_yodd = r31 -pow_GR_signexp_Xm1 = r35 -pow_GR_int_W1 = r36 -pow_GR_int_W2 = r37 pow_GR_int_N = r38 pow_GR_index1 = r39 pow_GR_index2 = r40 @@ -465,24 +467,20 @@ POW_B = f32 POW_NORM_X = f33 POW_Xm1 = f34 POW_r1 = f34 -POW_P4 = f35 -POW_P5 = f36 POW_NORM_Y = f37 POW_Q2 = f38 -POW_Q3 = f39 +POW_eps = f39 POW_P2 = f40 -POW_P3 = f41 POW_P0 = f42 POW_log2_lo = f43 POW_r = f44 POW_Q0_half = f45 -POW_Q1 = f46 POW_tmp = f47 POW_log2_hi = f48 -POW_Q4 = f49 +POW_Q1 = f49 POW_P1 = f50 POW_log2_by_128_hi = f51 @@ -491,54 +489,33 @@ POW_rsq = f53 POW_Yrcub = f54 POW_log2_by_128_lo = f55 -POW_v6 = f56 POW_xsq = f57 -POW_v4 = f58 POW_v2 = f59 POW_T = f60 -POW_Tt = f61 POW_RSHF = f62 -POW_v21ps = f63 -POW_s4 = f64 +POW_v210 = f63 POW_twoV = f65 POW_U = f66 POW_G = f67 POW_delta = f68 -POW_v3 = f69 POW_V = f70 POW_p = f71 -POW_Z1 = f72 +POW_Z = f72 POW_e3 = f73 -POW_e2 = f74 POW_Z2 = f75 -POW_e1 = f76 POW_W1 = f77 -POW_UmZ2 = f78 -POW_W2 = f79 POW_Z3 = f80 -POW_int_W1 = f81 -POW_e12 = f82 -POW_int_W2 = f83 -POW_UmZ2pV = f84 POW_Z3sq = f85 -POW_e123 = f86 -POW_N1float = f87 -POW_N2float = f88 +POW_Nfloat = f87 POW_f3 = f89 POW_q = f90 -POW_s1 = f91 -POW_Nfloat = f92 -POW_s2 = f93 -POW_f2 = f94 -POW_f1 = f95 - POW_T1 = f96 POW_T2 = f97 POW_2M = f98 @@ -575,25 +552,18 @@ RODATA .align 16 LOCAL_OBJECT_START(pow_table_P) -data8 0x8000F7B249FF332D, 0x0000BFFC // P_5 -data8 0xAAAAAAA9E7902C7F, 0x0000BFFC // P_3 data8 0x80000000000018E5, 0x0000BFFD // P_1 data8 0xb8aa3b295c17f0bc, 0x00004006 // inv_ln2_by_128 // // data8 0x3FA5555555554A9E // Q_2 -data8 0x3F8111124F4DD9F9 // Q_3 -data8 0x3FE0000000000000 // Q_0 +data8 0x0000000000000000 // Pad data8 0x3FC5555555554733 // Q_1 -data8 0x3F56C16D9360FFA0 // Q_4 data8 0x43e8000000000000 // Right shift constant for exp data8 0xc9e3b39803f2f6af, 0x00003fb7 // ln2_by_128_lo -data8 0x0000000000000000 // pad to eliminate bank conflicts with pow_table_Q -data8 0x0000000000000000 // pad to eliminate bank conflicts with pow_table_Q LOCAL_OBJECT_END(pow_table_P) LOCAL_OBJECT_START(pow_table_Q) -data8 0x9249FE7F0DC423CF, 0x00003FFC // P_4 data8 0xCCCCCCCC4ED2BA7F, 0x00003FFC // P_2 data8 0xAAAAAAAAAAAAB505, 0x00003FFD // P_0 data8 0x3fe62e42fefa39e8, 0x3cccd5e4f1d9cc02 // log2 hi lo = +6.93147e-001 @@ -602,262 +572,262 @@ LOCAL_OBJECT_END(pow_table_Q) LOCAL_OBJECT_START(pow_Tt) -data8 0x3f60040155d58800, 0x3c93bce0ce3ddd81 // log(1/frcpa(1+0/256))= +1.95503e-003 -data8 0x3f78121214586a00, 0x3cb540e0a5cfc9bc // log(1/frcpa(1+1/256))= +5.87661e-003 -data8 0x3f841929f9683200, 0x3cbdf1d57404da1f // log(1/frcpa(1+2/256))= +9.81362e-003 -data8 0x3f8c317384c75f00, 0x3c69806208c04c22 // log(1/frcpa(1+3/256))= +1.37662e-002 -data8 0x3f91a6b91ac73380, 0x3c7874daa716eb32 // log(1/frcpa(1+4/256))= +1.72376e-002 -data8 0x3f95ba9a5d9ac000, 0x3cacbb84e08d78ac // log(1/frcpa(1+5/256))= +2.12196e-002 -data8 0x3f99d2a807432580, 0x3cbcf80538b441e1 // log(1/frcpa(1+6/256))= +2.52177e-002 -data8 0x3f9d6b2725979800, 0x3c6095e5c8f8f359 // log(1/frcpa(1+7/256))= +2.87291e-002 -data8 0x3fa0c58fa19dfa80, 0x3cb4c5d4e9d0dda2 // log(1/frcpa(1+8/256))= +3.27573e-002 -data8 0x3fa2954c78cbce00, 0x3caa932b860ab8d6 // log(1/frcpa(1+9/256))= +3.62953e-002 -data8 0x3fa4a94d2da96c40, 0x3ca670452b76bbd5 // log(1/frcpa(1+10/256))= +4.03542e-002 -data8 0x3fa67c94f2d4bb40, 0x3ca84104f9941798 // log(1/frcpa(1+11/256))= +4.39192e-002 -data8 0x3fa85188b630f040, 0x3cb40a882cbf0153 // log(1/frcpa(1+12/256))= +4.74971e-002 -data8 0x3faa6b8abe73af40, 0x3c988d46e25c9059 // log(1/frcpa(1+13/256))= +5.16017e-002 -data8 0x3fac441e06f72a80, 0x3cae3e930a1a2a96 // log(1/frcpa(1+14/256))= +5.52072e-002 -data8 0x3fae1e6713606d00, 0x3c8a796f6283b580 // log(1/frcpa(1+15/256))= +5.88257e-002 -data8 0x3faffa6911ab9300, 0x3c5193070351e88a // log(1/frcpa(1+16/256))= +6.24574e-002 -data8 0x3fb0ec139c5da600, 0x3c623f2a75eb992d // log(1/frcpa(1+17/256))= +6.61022e-002 -data8 0x3fb1dbd2643d1900, 0x3ca649b2ef8927f0 // log(1/frcpa(1+18/256))= +6.97605e-002 -data8 0x3fb2cc7284fe5f00, 0x3cbc5e86599513e2 // log(1/frcpa(1+19/256))= +7.34321e-002 -data8 0x3fb3bdf5a7d1ee60, 0x3c90bd4bb69dada3 // log(1/frcpa(1+20/256))= +7.71173e-002 -data8 0x3fb4b05d7aa012e0, 0x3c54e377c9b8a54f // log(1/frcpa(1+21/256))= +8.08161e-002 -data8 0x3fb580db7ceb5700, 0x3c7fdb2f98354cde // log(1/frcpa(1+22/256))= +8.39975e-002 -data8 0x3fb674f089365a60, 0x3cb9994c9d3301c1 // log(1/frcpa(1+23/256))= +8.77219e-002 -data8 0x3fb769ef2c6b5680, 0x3caaec639db52a79 // log(1/frcpa(1+24/256))= +9.14602e-002 -data8 0x3fb85fd927506a40, 0x3c9f9f99a3cf8e25 // log(1/frcpa(1+25/256))= +9.52125e-002 -data8 0x3fb9335e5d594980, 0x3ca15c3abd47d99a // log(1/frcpa(1+26/256))= +9.84401e-002 -data8 0x3fba2b0220c8e5e0, 0x3cb4ca639adf6fc3 // log(1/frcpa(1+27/256))= +1.02219e-001 -data8 0x3fbb0004ac1a86a0, 0x3ca7cb81bf959a59 // log(1/frcpa(1+28/256))= +1.05469e-001 -data8 0x3fbbf968769fca00, 0x3cb0c646c121418e // log(1/frcpa(1+29/256))= +1.09274e-001 -data8 0x3fbccfedbfee13a0, 0x3ca0465fce24ab4b // log(1/frcpa(1+30/256))= +1.12548e-001 -data8 0x3fbda727638446a0, 0x3c82803f4e2e6603 // log(1/frcpa(1+31/256))= +1.15832e-001 -data8 0x3fbea3257fe10f60, 0x3cb986a3f2313d1a // log(1/frcpa(1+32/256))= +1.19677e-001 -data8 0x3fbf7be9fedbfde0, 0x3c97d16a6a621cf4 // log(1/frcpa(1+33/256))= +1.22985e-001 -data8 0x3fc02ab352ff25f0, 0x3c9cc6baad365600 // log(1/frcpa(1+34/256))= +1.26303e-001 -data8 0x3fc097ce579d2040, 0x3cb9ba16d329440b // log(1/frcpa(1+35/256))= +1.29633e-001 -data8 0x3fc1178e8227e470, 0x3cb7bc671683f8e6 // log(1/frcpa(1+36/256))= +1.33531e-001 -data8 0x3fc185747dbecf30, 0x3c9d1116f66d2345 // log(1/frcpa(1+37/256))= +1.36885e-001 -data8 0x3fc1f3b925f25d40, 0x3c8162c9ef939ac6 // log(1/frcpa(1+38/256))= +1.40250e-001 -data8 0x3fc2625d1e6ddf50, 0x3caad3a1ec384fc3 // log(1/frcpa(1+39/256))= +1.43627e-001 -data8 0x3fc2d1610c868130, 0x3cb3ad997036941b // log(1/frcpa(1+40/256))= +1.47015e-001 -data8 0x3fc340c597411420, 0x3cbc2308262c7998 // log(1/frcpa(1+41/256))= +1.50414e-001 -data8 0x3fc3b08b6757f2a0, 0x3cb2170d6cdf0526 // log(1/frcpa(1+42/256))= +1.53825e-001 -data8 0x3fc40dfb08378000, 0x3c9bb453c4f7b685 // log(1/frcpa(1+43/256))= +1.56677e-001 -data8 0x3fc47e74e8ca5f70, 0x3cb836a48fdfce9d // log(1/frcpa(1+44/256))= +1.60109e-001 -data8 0x3fc4ef51f6466de0, 0x3ca07a43919aa64b // log(1/frcpa(1+45/256))= +1.63553e-001 -data8 0x3fc56092e02ba510, 0x3ca85006899d97b0 // log(1/frcpa(1+46/256))= +1.67010e-001 -data8 0x3fc5d23857cd74d0, 0x3ca30a5ba6e7abbe // log(1/frcpa(1+47/256))= +1.70478e-001 -data8 0x3fc6313a37335d70, 0x3ca905586f0ac97e // log(1/frcpa(1+48/256))= +1.73377e-001 -data8 0x3fc6a399dabbd380, 0x3c9b2c6657a96684 // log(1/frcpa(1+49/256))= +1.76868e-001 -data8 0x3fc70337dd3ce410, 0x3cb50bc52f55cdd8 // log(1/frcpa(1+50/256))= +1.79786e-001 -data8 0x3fc77654128f6120, 0x3cad2eb7c9a39efe // log(1/frcpa(1+51/256))= +1.83299e-001 -data8 0x3fc7e9d82a0b0220, 0x3cba127e90393c01 // log(1/frcpa(1+52/256))= +1.86824e-001 -data8 0x3fc84a6b759f5120, 0x3cbd7fd52079f706 // log(1/frcpa(1+53/256))= +1.89771e-001 -data8 0x3fc8ab47d5f5a300, 0x3cbfae141751a3de // log(1/frcpa(1+54/256))= +1.92727e-001 -data8 0x3fc91fe490965810, 0x3cb69cf30a1c319e // log(1/frcpa(1+55/256))= +1.96286e-001 -data8 0x3fc981634011aa70, 0x3ca5bb3d208bc42a // log(1/frcpa(1+56/256))= +1.99261e-001 -data8 0x3fc9f6c407089660, 0x3ca04d68658179a0 // log(1/frcpa(1+57/256))= +2.02843e-001 -data8 0x3fca58e729348f40, 0x3c99f5411546c286 // log(1/frcpa(1+58/256))= +2.05838e-001 -data8 0x3fcabb55c31693a0, 0x3cb9a5350eb327d5 // log(1/frcpa(1+59/256))= +2.08842e-001 -data8 0x3fcb1e104919efd0, 0x3c18965fcce7c406 // log(1/frcpa(1+60/256))= +2.11855e-001 -data8 0x3fcb94ee93e367c0, 0x3cb503716da45184 // log(1/frcpa(1+61/256))= +2.15483e-001 -data8 0x3fcbf851c0675550, 0x3cbdf1b3f7ab5378 // log(1/frcpa(1+62/256))= +2.18516e-001 -data8 0x3fcc5c0254bf23a0, 0x3ca7aab9ed0b1d7b // log(1/frcpa(1+63/256))= +2.21558e-001 -data8 0x3fccc000c9db3c50, 0x3c92a7a2a850072a // log(1/frcpa(1+64/256))= +2.24609e-001 -data8 0x3fcd244d99c85670, 0x3c9f6019120edf4c // log(1/frcpa(1+65/256))= +2.27670e-001 -data8 0x3fcd88e93fb2f450, 0x3c6affb96815e081 // log(1/frcpa(1+66/256))= +2.30741e-001 -data8 0x3fcdedd437eaef00, 0x3c72553595897976 // log(1/frcpa(1+67/256))= +2.33820e-001 -data8 0x3fce530effe71010, 0x3c90913b020fa182 // log(1/frcpa(1+68/256))= +2.36910e-001 -data8 0x3fceb89a1648b970, 0x3c837ba4045bfd25 // log(1/frcpa(1+69/256))= +2.40009e-001 -data8 0x3fcf1e75fadf9bd0, 0x3cbcea6d13e0498d // log(1/frcpa(1+70/256))= +2.43117e-001 -data8 0x3fcf84a32ead7c30, 0x3ca5e3a67b3c6d77 // log(1/frcpa(1+71/256))= +2.46235e-001 -data8 0x3fcfeb2233ea07c0, 0x3cba0c6f0049c5a6 // log(1/frcpa(1+72/256))= +2.49363e-001 -data8 0x3fd028f9c7035c18, 0x3cb0a30b06677ff6 // log(1/frcpa(1+73/256))= +2.52501e-001 -data8 0x3fd05c8be0d96358, 0x3ca0f1c77ccb5865 // log(1/frcpa(1+74/256))= +2.55649e-001 -data8 0x3fd085eb8f8ae790, 0x3cbd513f45fe7a97 // log(1/frcpa(1+75/256))= +2.58174e-001 -data8 0x3fd0b9c8e32d1910, 0x3c927449047ca006 // log(1/frcpa(1+76/256))= +2.61339e-001 -data8 0x3fd0edd060b78080, 0x3c89b52d8435f53e // log(1/frcpa(1+77/256))= +2.64515e-001 -data8 0x3fd122024cf00638, 0x3cbdd976fabda4bd // log(1/frcpa(1+78/256))= +2.67701e-001 -data8 0x3fd14be2927aecd0, 0x3cb02f90ad0bc471 // log(1/frcpa(1+79/256))= +2.70257e-001 -data8 0x3fd180618ef18ad8, 0x3cbd003792c71a98 // log(1/frcpa(1+80/256))= +2.73461e-001 -data8 0x3fd1b50bbe2fc638, 0x3ca9ae64c6403ead // log(1/frcpa(1+81/256))= +2.76675e-001 -data8 0x3fd1df4cc7cf2428, 0x3cb43f0455f7e395 // log(1/frcpa(1+82/256))= +2.79254e-001 -data8 0x3fd214456d0eb8d0, 0x3cb0fbd748d75d30 // log(1/frcpa(1+83/256))= +2.82487e-001 -data8 0x3fd23ec5991eba48, 0x3c906edd746b77e2 // log(1/frcpa(1+84/256))= +2.85081e-001 -data8 0x3fd2740d9f870af8, 0x3ca9802e6a00a670 // log(1/frcpa(1+85/256))= +2.88333e-001 -data8 0x3fd29ecdabcdfa00, 0x3cacecef70890cfa // log(1/frcpa(1+86/256))= +2.90943e-001 -data8 0x3fd2d46602adcce8, 0x3cb97911955f3521 // log(1/frcpa(1+87/256))= +2.94214e-001 -data8 0x3fd2ff66b04ea9d0, 0x3cb12dabe191d1c9 // log(1/frcpa(1+88/256))= +2.96838e-001 -data8 0x3fd335504b355a30, 0x3cbdf9139df924ec // log(1/frcpa(1+89/256))= +3.00129e-001 -data8 0x3fd360925ec44f58, 0x3cb253e68977a1e3 // log(1/frcpa(1+90/256))= +3.02769e-001 -data8 0x3fd38bf1c3337e70, 0x3cb3d283d2a2da21 // log(1/frcpa(1+91/256))= +3.05417e-001 -data8 0x3fd3c25277333180, 0x3cadaa5b035eae27 // log(1/frcpa(1+92/256))= +3.08735e-001 -data8 0x3fd3edf463c16838, 0x3cb983d680d3c108 // log(1/frcpa(1+93/256))= +3.11399e-001 -data8 0x3fd419b423d5e8c0, 0x3cbc86dd921c139d // log(1/frcpa(1+94/256))= +3.14069e-001 -data8 0x3fd44591e0539f48, 0x3c86a76d6dc2782e // log(1/frcpa(1+95/256))= +3.16746e-001 -data8 0x3fd47c9175b6f0a8, 0x3cb59a2e013c6b5f // log(1/frcpa(1+96/256))= +3.20103e-001 -data8 0x3fd4a8b341552b08, 0x3c93f1e86e468694 // log(1/frcpa(1+97/256))= +3.22797e-001 -data8 0x3fd4d4f390890198, 0x3cbf5e4ea7c5105a // log(1/frcpa(1+98/256))= +3.25498e-001 -data8 0x3fd501528da1f960, 0x3cbf58da53e9ad10 // log(1/frcpa(1+99/256))= +3.28206e-001 -data8 0x3fd52dd06347d4f0, 0x3cb98a28cebf6eef // log(1/frcpa(1+100/256))= +3.30921e-001 -data8 0x3fd55a6d3c7b8a88, 0x3c9c76b67c2d1fd4 // log(1/frcpa(1+101/256))= +3.33644e-001 -data8 0x3fd5925d2b112a58, 0x3c9029616a4331b8 // log(1/frcpa(1+102/256))= +3.37058e-001 -data8 0x3fd5bf406b543db0, 0x3c9fb8292ecfc820 // log(1/frcpa(1+103/256))= +3.39798e-001 -data8 0x3fd5ec433d5c35a8, 0x3cb71a1229d17eec // log(1/frcpa(1+104/256))= +3.42545e-001 -data8 0x3fd61965cdb02c18, 0x3cbba94fe1dbb8d2 // log(1/frcpa(1+105/256))= +3.45300e-001 -data8 0x3fd646a84935b2a0, 0x3c9ee496d2c9ae57 // log(1/frcpa(1+106/256))= +3.48063e-001 -data8 0x3fd6740add31de90, 0x3cb1da3a6c7a9dfd // log(1/frcpa(1+107/256))= +3.50833e-001 -data8 0x3fd6a18db74a58c0, 0x3cb494c257add8dc // log(1/frcpa(1+108/256))= +3.53610e-001 -data8 0x3fd6cf31058670e8, 0x3cb0b244a70a8da9 // log(1/frcpa(1+109/256))= +3.56396e-001 -data8 0x3fd6f180e852f0b8, 0x3c9db7aefa866720 // log(1/frcpa(1+110/256))= +3.58490e-001 -data8 0x3fd71f5d71b894e8, 0x3cbe91c4bf324957 // log(1/frcpa(1+111/256))= +3.61289e-001 -data8 0x3fd74d5aefd66d58, 0x3cb06b3d9bfac023 // log(1/frcpa(1+112/256))= +3.64096e-001 -data8 0x3fd77b79922bd378, 0x3cb727d8804491f4 // log(1/frcpa(1+113/256))= +3.66911e-001 -data8 0x3fd7a9b9889f19e0, 0x3ca2ef22df5bc543 // log(1/frcpa(1+114/256))= +3.69734e-001 -data8 0x3fd7d81b037eb6a0, 0x3cb8fd3ba07a7ece // log(1/frcpa(1+115/256))= +3.72565e-001 -data8 0x3fd8069e33827230, 0x3c8bd1e25866e61a // log(1/frcpa(1+116/256))= +3.75404e-001 -data8 0x3fd82996d3ef8bc8, 0x3ca5aab9f5928928 // log(1/frcpa(1+117/256))= +3.77538e-001 -data8 0x3fd85855776dcbf8, 0x3ca56f33337789d6 // log(1/frcpa(1+118/256))= +3.80391e-001 -data8 0x3fd8873658327cc8, 0x3cbb8ef0401db49d // log(1/frcpa(1+119/256))= +3.83253e-001 -data8 0x3fd8aa75973ab8c8, 0x3cbb9961f509a680 // log(1/frcpa(1+120/256))= +3.85404e-001 -data8 0x3fd8d992dc8824e0, 0x3cb220512a53732d // log(1/frcpa(1+121/256))= +3.88280e-001 -data8 0x3fd908d2ea7d9510, 0x3c985f0e513bfb5c // log(1/frcpa(1+122/256))= +3.91164e-001 -data8 0x3fd92c59e79c0e50, 0x3cb82e073fd30d63 // log(1/frcpa(1+123/256))= +3.93332e-001 -data8 0x3fd95bd750ee3ed0, 0x3ca4aa7cdb6dd8a8 // log(1/frcpa(1+124/256))= +3.96231e-001 -data8 0x3fd98b7811a3ee58, 0x3caa93a5b660893e // log(1/frcpa(1+125/256))= +3.99138e-001 -data8 0x3fd9af47f33d4068, 0x3cac294b3b3190ba // log(1/frcpa(1+126/256))= +4.01323e-001 -data8 0x3fd9df270c1914a0, 0x3cbe1a58fd0cd67e // log(1/frcpa(1+127/256))= +4.04245e-001 -data8 0x3fda0325ed14fda0, 0x3cb1efa7950fb57e // log(1/frcpa(1+128/256))= +4.06442e-001 -data8 0x3fda33440224fa78, 0x3c8915fe75e7d477 // log(1/frcpa(1+129/256))= +4.09379e-001 -data8 0x3fda57725e80c380, 0x3ca72bd1062b1b7f // log(1/frcpa(1+130/256))= +4.11587e-001 -data8 0x3fda87d0165dd198, 0x3c91f7845f58dbad // log(1/frcpa(1+131/256))= +4.14539e-001 -data8 0x3fdaac2e6c03f890, 0x3cb6f237a911c509 // log(1/frcpa(1+132/256))= +4.16759e-001 -data8 0x3fdadccc6fdf6a80, 0x3c90ddc4b7687169 // log(1/frcpa(1+133/256))= +4.19726e-001 -data8 0x3fdb015b3eb1e790, 0x3c692dd7d90e1e8e // log(1/frcpa(1+134/256))= +4.21958e-001 -data8 0x3fdb323a3a635948, 0x3c6f85655cbe14de // log(1/frcpa(1+135/256))= +4.24941e-001 -data8 0x3fdb56fa04462908, 0x3c95252d841994de // log(1/frcpa(1+136/256))= +4.27184e-001 -data8 0x3fdb881aa659bc90, 0x3caa53a745a3642f // log(1/frcpa(1+137/256))= +4.30182e-001 -data8 0x3fdbad0bef3db160, 0x3cb32f2540dcc16a // log(1/frcpa(1+138/256))= +4.32437e-001 -data8 0x3fdbd21297781c28, 0x3cbd8e891e106f1d // log(1/frcpa(1+139/256))= +4.34697e-001 -data8 0x3fdc039236f08818, 0x3c809435af522ba7 // log(1/frcpa(1+140/256))= +4.37718e-001 -data8 0x3fdc28cb1e4d32f8, 0x3cb3944752fbd81e // log(1/frcpa(1+141/256))= +4.39990e-001 -data8 0x3fdc4e19b84723c0, 0x3c9a465260cd3fe5 // log(1/frcpa(1+142/256))= +4.42267e-001 -data8 0x3fdc7ff9c74554c8, 0x3c92447d5b6ca369 // log(1/frcpa(1+143/256))= +4.45311e-001 -data8 0x3fdca57b64e9db00, 0x3cb44344a8a00c82 // log(1/frcpa(1+144/256))= +4.47600e-001 -data8 0x3fdccb130a5ceba8, 0x3cbefaddfb97b73f // log(1/frcpa(1+145/256))= +4.49895e-001 -data8 0x3fdcf0c0d18f3268, 0x3cbd3e7bfee57898 // log(1/frcpa(1+146/256))= +4.52194e-001 -data8 0x3fdd232075b5a200, 0x3c9222599987447c // log(1/frcpa(1+147/256))= +4.55269e-001 -data8 0x3fdd490246defa68, 0x3cabafe9a767a80d // log(1/frcpa(1+148/256))= +4.57581e-001 -data8 0x3fdd6efa918d25c8, 0x3cb58a2624e1c6fd // log(1/frcpa(1+149/256))= +4.59899e-001 -data8 0x3fdd9509707ae528, 0x3cbdc3babce578e7 // log(1/frcpa(1+150/256))= +4.62221e-001 -data8 0x3fddbb2efe92c550, 0x3cb0ac0943c434a4 // log(1/frcpa(1+151/256))= +4.64550e-001 -data8 0x3fddee2f3445e4a8, 0x3cbba9d07ce820e8 // log(1/frcpa(1+152/256))= +4.67663e-001 -data8 0x3fde148a1a2726c8, 0x3cb6537e3375b205 // log(1/frcpa(1+153/256))= +4.70004e-001 -data8 0x3fde3afc0a49ff38, 0x3cbfed5518dbc20e // log(1/frcpa(1+154/256))= +4.72350e-001 -data8 0x3fde6185206d5168, 0x3cb6572601f73d5c // log(1/frcpa(1+155/256))= +4.74702e-001 -data8 0x3fde882578823d50, 0x3c9b24abd4584d1a // log(1/frcpa(1+156/256))= +4.77060e-001 -data8 0x3fdeaedd2eac9908, 0x3cb0ceb5e4d2c8f7 // log(1/frcpa(1+157/256))= +4.79423e-001 -data8 0x3fded5ac5f436be0, 0x3ca72f21f1f5238e // log(1/frcpa(1+158/256))= +4.81792e-001 -data8 0x3fdefc9326d16ab8, 0x3c85081a1639a45c // log(1/frcpa(1+159/256))= +4.84166e-001 -data8 0x3fdf2391a21575f8, 0x3cbf11015bdd297a // log(1/frcpa(1+160/256))= +4.86546e-001 -data8 0x3fdf4aa7ee031928, 0x3cb3795bc052a2d1 // log(1/frcpa(1+161/256))= +4.88932e-001 -data8 0x3fdf71d627c30bb0, 0x3c35c61f0f5a88f3 // log(1/frcpa(1+162/256))= +4.91323e-001 -data8 0x3fdf991c6cb3b378, 0x3c97d99419be6028 // log(1/frcpa(1+163/256))= +4.93720e-001 -data8 0x3fdfc07ada69a908, 0x3cbfe9341ded70b1 // log(1/frcpa(1+164/256))= +4.96123e-001 -data8 0x3fdfe7f18eb03d38, 0x3cb85718a640c33f // log(1/frcpa(1+165/256))= +4.98532e-001 -data8 0x3fe007c053c5002c, 0x3cb3addc9c065f09 // log(1/frcpa(1+166/256))= +5.00946e-001 -data8 0x3fe01b942198a5a0, 0x3c9d5aa4c77da6ac // log(1/frcpa(1+167/256))= +5.03367e-001 -data8 0x3fe02f74400c64e8, 0x3cb5a0ee4450ef52 // log(1/frcpa(1+168/256))= +5.05793e-001 -data8 0x3fe04360be7603ac, 0x3c9dd00c35630fe0 // log(1/frcpa(1+169/256))= +5.08225e-001 -data8 0x3fe05759ac47fe30, 0x3cbd063e1f0bd82c // log(1/frcpa(1+170/256))= +5.10663e-001 -data8 0x3fe06b5f1911cf50, 0x3cae8da674af5289 // log(1/frcpa(1+171/256))= +5.13107e-001 -data8 0x3fe078bf0533c568, 0x3c62241edf5fd1f7 // log(1/frcpa(1+172/256))= +5.14740e-001 -data8 0x3fe08cd9687e7b0c, 0x3cb3007febcca227 // log(1/frcpa(1+173/256))= +5.17194e-001 -data8 0x3fe0a10074cf9018, 0x3ca496e84603816b // log(1/frcpa(1+174/256))= +5.19654e-001 -data8 0x3fe0b5343a234474, 0x3cb46098d14fc90a // log(1/frcpa(1+175/256))= +5.22120e-001 -data8 0x3fe0c974c89431cc, 0x3cac0a7cdcbb86c6 // log(1/frcpa(1+176/256))= +5.24592e-001 -data8 0x3fe0ddc2305b9884, 0x3cb2f753210410ff // log(1/frcpa(1+177/256))= +5.27070e-001 -data8 0x3fe0eb524bafc918, 0x3c88affd6682229e // log(1/frcpa(1+178/256))= +5.28726e-001 -data8 0x3fe0ffb54213a474, 0x3cadeefbab9af993 // log(1/frcpa(1+179/256))= +5.31214e-001 -data8 0x3fe114253da97d9c, 0x3cbaf1c2b8bc160a // log(1/frcpa(1+180/256))= +5.33709e-001 -data8 0x3fe128a24f1d9afc, 0x3cb9cf4df375e650 // log(1/frcpa(1+181/256))= +5.36210e-001 -data8 0x3fe1365252bf0864, 0x3c985a621d4be111 // log(1/frcpa(1+182/256))= +5.37881e-001 -data8 0x3fe14ae558b4a92c, 0x3ca104c4aa8977d1 // log(1/frcpa(1+183/256))= +5.40393e-001 -data8 0x3fe15f85a19c7658, 0x3cbadf26e540f375 // log(1/frcpa(1+184/256))= +5.42910e-001 -data8 0x3fe16d4d38c119f8, 0x3cb3aea11caec416 // log(1/frcpa(1+185/256))= +5.44592e-001 -data8 0x3fe18203c20dd130, 0x3cba82d1211d1d6d // log(1/frcpa(1+186/256))= +5.47121e-001 -data8 0x3fe196c7bc4b1f38, 0x3cb6267acc4f4f4a // log(1/frcpa(1+187/256))= +5.49656e-001 -data8 0x3fe1a4a738b7a33c, 0x3c858930213c987d // log(1/frcpa(1+188/256))= +5.51349e-001 -data8 0x3fe1b981c0c9653c, 0x3c9bc2a4a30f697b // log(1/frcpa(1+189/256))= +5.53895e-001 -data8 0x3fe1ce69e8bb1068, 0x3cb7ae6199cf2a00 // log(1/frcpa(1+190/256))= +5.56447e-001 -data8 0x3fe1dc619de06944, 0x3c6b50bb38388177 // log(1/frcpa(1+191/256))= +5.58152e-001 -data8 0x3fe1f160a2ad0da0, 0x3cbd05b2778a5e1d // log(1/frcpa(1+192/256))= +5.60715e-001 -data8 0x3fe2066d7740737c, 0x3cb32e828f9c6bd6 // log(1/frcpa(1+193/256))= +5.63285e-001 -data8 0x3fe2147dba47a390, 0x3cbd579851b8b672 // log(1/frcpa(1+194/256))= +5.65001e-001 -data8 0x3fe229a1bc5ebac0, 0x3cbb321be5237ce8 // log(1/frcpa(1+195/256))= +5.67582e-001 -data8 0x3fe237c1841a502c, 0x3cb3b56e0915ea64 // log(1/frcpa(1+196/256))= +5.69306e-001 -data8 0x3fe24cfce6f80d98, 0x3cb34a4d1a422919 // log(1/frcpa(1+197/256))= +5.71898e-001 -data8 0x3fe25b2c55cd5760, 0x3cb237401ea5015e // log(1/frcpa(1+198/256))= +5.73630e-001 -data8 0x3fe2707f4d5f7c40, 0x3c9d30f20acc8341 // log(1/frcpa(1+199/256))= +5.76233e-001 -data8 0x3fe285e0842ca380, 0x3cbc4d866d5f21c0 // log(1/frcpa(1+200/256))= +5.78842e-001 -data8 0x3fe294294708b770, 0x3cb85e14d5dc54fa // log(1/frcpa(1+201/256))= +5.80586e-001 -data8 0x3fe2a9a2670aff0c, 0x3c7e6f8f468bbf91 // log(1/frcpa(1+202/256))= +5.83207e-001 -data8 0x3fe2b7fb2c8d1cc0, 0x3c930ffcf63c8b65 // log(1/frcpa(1+203/256))= +5.84959e-001 -data8 0x3fe2c65a6395f5f4, 0x3ca0afe20b53d2d2 // log(1/frcpa(1+204/256))= +5.86713e-001 -data8 0x3fe2dbf557b0df40, 0x3cb646be1188fbc9 // log(1/frcpa(1+205/256))= +5.89350e-001 -data8 0x3fe2ea64c3f97654, 0x3c96516fa8df33b2 // log(1/frcpa(1+206/256))= +5.91113e-001 -data8 0x3fe3001823684d70, 0x3cb96d64e16d1360 // log(1/frcpa(1+207/256))= +5.93762e-001 -data8 0x3fe30e97e9a8b5cc, 0x3c98ef96bc97cca0 // log(1/frcpa(1+208/256))= +5.95531e-001 -data8 0x3fe32463ebdd34e8, 0x3caef1dc9a56c1bf // log(1/frcpa(1+209/256))= +5.98192e-001 -data8 0x3fe332f4314ad794, 0x3caa4f0ac5d5fa11 // log(1/frcpa(1+210/256))= +5.99970e-001 -data8 0x3fe348d90e7464cc, 0x3cbe7889f0516acd // log(1/frcpa(1+211/256))= +6.02643e-001 -data8 0x3fe35779f8c43d6c, 0x3ca96bbab7245411 // log(1/frcpa(1+212/256))= +6.04428e-001 -data8 0x3fe36621961a6a98, 0x3ca31f32262db9fb // log(1/frcpa(1+213/256))= +6.06217e-001 -data8 0x3fe37c299f3c3668, 0x3cb15c72c107ee29 // log(1/frcpa(1+214/256))= +6.08907e-001 -data8 0x3fe38ae2171976e4, 0x3cba42a2554b2dd4 // log(1/frcpa(1+215/256))= +6.10704e-001 -data8 0x3fe399a157a603e4, 0x3cb99c62286d8919 // log(1/frcpa(1+216/256))= +6.12504e-001 -data8 0x3fe3afccfe77b9d0, 0x3ca11048f96a43bd // log(1/frcpa(1+217/256))= +6.15210e-001 -data8 0x3fe3be9d503533b4, 0x3ca4022f47588c3e // log(1/frcpa(1+218/256))= +6.17018e-001 -data8 0x3fe3cd7480b4a8a0, 0x3cb4ba7afc2dc56a // log(1/frcpa(1+219/256))= +6.18830e-001 -data8 0x3fe3e3c43918f76c, 0x3c859673d064b8ba // log(1/frcpa(1+220/256))= +6.21554e-001 -data8 0x3fe3f2acb27ed6c4, 0x3cb55c6b452a16a8 // log(1/frcpa(1+221/256))= +6.23373e-001 -data8 0x3fe4019c2125ca90, 0x3cb8c367879c5a31 // log(1/frcpa(1+222/256))= +6.25197e-001 -data8 0x3fe4181061389720, 0x3cb2c17a79c5cc6c // log(1/frcpa(1+223/256))= +6.27937e-001 -data8 0x3fe42711518df544, 0x3ca5f38d47012fc5 // log(1/frcpa(1+224/256))= +6.29769e-001 -data8 0x3fe436194e12b6bc, 0x3cb9854d65a9b426 // log(1/frcpa(1+225/256))= +6.31604e-001 -data8 0x3fe445285d68ea68, 0x3ca3ff9b3a81cd81 // log(1/frcpa(1+226/256))= +6.33442e-001 -data8 0x3fe45bcc464c8938, 0x3cb0a2d8011a6c05 // log(1/frcpa(1+227/256))= +6.36206e-001 -data8 0x3fe46aed21f117fc, 0x3c8a2be41f8e9f3d // log(1/frcpa(1+228/256))= +6.38053e-001 -data8 0x3fe47a1527e8a2d0, 0x3cba4a83594fab09 // log(1/frcpa(1+229/256))= +6.39903e-001 -data8 0x3fe489445efffcc8, 0x3cbf306a23dcbcde // log(1/frcpa(1+230/256))= +6.41756e-001 -data8 0x3fe4a018bcb69834, 0x3ca46c9285029fd1 // log(1/frcpa(1+231/256))= +6.44543e-001 -data8 0x3fe4af5a0c9d65d4, 0x3cbbc1db897580e3 // log(1/frcpa(1+232/256))= +6.46405e-001 -data8 0x3fe4bea2a5bdbe84, 0x3cb84d880d7ef775 // log(1/frcpa(1+233/256))= +6.48271e-001 -data8 0x3fe4cdf28f10ac44, 0x3cb3ec4b7893ce1f // log(1/frcpa(1+234/256))= +6.50140e-001 -data8 0x3fe4dd49cf994058, 0x3c897224d59d3408 // log(1/frcpa(1+235/256))= +6.52013e-001 -data8 0x3fe4eca86e64a680, 0x3cbccf620f24f0cd // log(1/frcpa(1+236/256))= +6.53889e-001 -data8 0x3fe503c43cd8eb68, 0x3c3f872c65971084 // log(1/frcpa(1+237/256))= +6.56710e-001 -data8 0x3fe513356667fc54, 0x3cb9ca64cc3d52c8 // log(1/frcpa(1+238/256))= +6.58595e-001 -data8 0x3fe522ae0738a3d4, 0x3cbe708164c75968 // log(1/frcpa(1+239/256))= +6.60483e-001 -data8 0x3fe5322e26867854, 0x3cb9988ba4aea615 // log(1/frcpa(1+240/256))= +6.62376e-001 -data8 0x3fe541b5cb979808, 0x3ca1662e3a6b95f5 // log(1/frcpa(1+241/256))= +6.64271e-001 -data8 0x3fe55144fdbcbd60, 0x3cb3acd4ca45c1e0 // log(1/frcpa(1+242/256))= +6.66171e-001 -data8 0x3fe560dbc45153c4, 0x3cb4988947959fed // log(1/frcpa(1+243/256))= +6.68074e-001 -data8 0x3fe5707a26bb8c64, 0x3cb3017fe6607ba9 // log(1/frcpa(1+244/256))= +6.69980e-001 -data8 0x3fe587f60ed5b8fc, 0x3cbe7a3266366ed4 // log(1/frcpa(1+245/256))= +6.72847e-001 -data8 0x3fe597a7977c8f30, 0x3ca1e12b9959a90e // log(1/frcpa(1+246/256))= +6.74763e-001 -data8 0x3fe5a760d634bb88, 0x3cb7c365e53d9602 // log(1/frcpa(1+247/256))= +6.76682e-001 -data8 0x3fe5b721d295f10c, 0x3cb716c2551ccbf0 // log(1/frcpa(1+248/256))= +6.78605e-001 -data8 0x3fe5c6ea94431ef8, 0x3ca02b2ed0e28261 // log(1/frcpa(1+249/256))= +6.80532e-001 -data8 0x3fe5d6bb22ea86f4, 0x3caf43a8bbb2f974 // log(1/frcpa(1+250/256))= +6.82462e-001 -data8 0x3fe5e6938645d38c, 0x3cbcedc98821b333 // log(1/frcpa(1+251/256))= +6.84397e-001 -data8 0x3fe5f673c61a2ed0, 0x3caa385eef5f2789 // log(1/frcpa(1+252/256))= +6.86335e-001 -data8 0x3fe6065bea385924, 0x3cb11624f165c5b4 // log(1/frcpa(1+253/256))= +6.88276e-001 -data8 0x3fe6164bfa7cc068, 0x3cbad884f87073fa // log(1/frcpa(1+254/256))= +6.90222e-001 -data8 0x3fe62643fecf9740, 0x3cb78c51da12f4df // log(1/frcpa(1+255/256))= +6.92171e-001 +data8 0x3f60040155d58800 // log(1/frcpa(1+0/256))= +1.95503e-003 +data8 0x3f78121214586a00 // log(1/frcpa(1+1/256))= +5.87661e-003 +data8 0x3f841929f9683200 // log(1/frcpa(1+2/256))= +9.81362e-003 +data8 0x3f8c317384c75f00 // log(1/frcpa(1+3/256))= +1.37662e-002 +data8 0x3f91a6b91ac73380 // log(1/frcpa(1+4/256))= +1.72376e-002 +data8 0x3f95ba9a5d9ac000 // log(1/frcpa(1+5/256))= +2.12196e-002 +data8 0x3f99d2a807432580 // log(1/frcpa(1+6/256))= +2.52177e-002 +data8 0x3f9d6b2725979800 // log(1/frcpa(1+7/256))= +2.87291e-002 +data8 0x3fa0c58fa19dfa80 // log(1/frcpa(1+8/256))= +3.27573e-002 +data8 0x3fa2954c78cbce00 // log(1/frcpa(1+9/256))= +3.62953e-002 +data8 0x3fa4a94d2da96c40 // log(1/frcpa(1+10/256))= +4.03542e-002 +data8 0x3fa67c94f2d4bb40 // log(1/frcpa(1+11/256))= +4.39192e-002 +data8 0x3fa85188b630f040 // log(1/frcpa(1+12/256))= +4.74971e-002 +data8 0x3faa6b8abe73af40 // log(1/frcpa(1+13/256))= +5.16017e-002 +data8 0x3fac441e06f72a80 // log(1/frcpa(1+14/256))= +5.52072e-002 +data8 0x3fae1e6713606d00 // log(1/frcpa(1+15/256))= +5.88257e-002 +data8 0x3faffa6911ab9300 // log(1/frcpa(1+16/256))= +6.24574e-002 +data8 0x3fb0ec139c5da600 // log(1/frcpa(1+17/256))= +6.61022e-002 +data8 0x3fb1dbd2643d1900 // log(1/frcpa(1+18/256))= +6.97605e-002 +data8 0x3fb2cc7284fe5f00 // log(1/frcpa(1+19/256))= +7.34321e-002 +data8 0x3fb3bdf5a7d1ee60 // log(1/frcpa(1+20/256))= +7.71173e-002 +data8 0x3fb4b05d7aa012e0 // log(1/frcpa(1+21/256))= +8.08161e-002 +data8 0x3fb580db7ceb5700 // log(1/frcpa(1+22/256))= +8.39975e-002 +data8 0x3fb674f089365a60 // log(1/frcpa(1+23/256))= +8.77219e-002 +data8 0x3fb769ef2c6b5680 // log(1/frcpa(1+24/256))= +9.14602e-002 +data8 0x3fb85fd927506a40 // log(1/frcpa(1+25/256))= +9.52125e-002 +data8 0x3fb9335e5d594980 // log(1/frcpa(1+26/256))= +9.84401e-002 +data8 0x3fba2b0220c8e5e0 // log(1/frcpa(1+27/256))= +1.02219e-001 +data8 0x3fbb0004ac1a86a0 // log(1/frcpa(1+28/256))= +1.05469e-001 +data8 0x3fbbf968769fca00 // log(1/frcpa(1+29/256))= +1.09274e-001 +data8 0x3fbccfedbfee13a0 // log(1/frcpa(1+30/256))= +1.12548e-001 +data8 0x3fbda727638446a0 // log(1/frcpa(1+31/256))= +1.15832e-001 +data8 0x3fbea3257fe10f60 // log(1/frcpa(1+32/256))= +1.19677e-001 +data8 0x3fbf7be9fedbfde0 // log(1/frcpa(1+33/256))= +1.22985e-001 +data8 0x3fc02ab352ff25f0 // log(1/frcpa(1+34/256))= +1.26303e-001 +data8 0x3fc097ce579d2040 // log(1/frcpa(1+35/256))= +1.29633e-001 +data8 0x3fc1178e8227e470 // log(1/frcpa(1+36/256))= +1.33531e-001 +data8 0x3fc185747dbecf30 // log(1/frcpa(1+37/256))= +1.36885e-001 +data8 0x3fc1f3b925f25d40 // log(1/frcpa(1+38/256))= +1.40250e-001 +data8 0x3fc2625d1e6ddf50 // log(1/frcpa(1+39/256))= +1.43627e-001 +data8 0x3fc2d1610c868130 // log(1/frcpa(1+40/256))= +1.47015e-001 +data8 0x3fc340c597411420 // log(1/frcpa(1+41/256))= +1.50414e-001 +data8 0x3fc3b08b6757f2a0 // log(1/frcpa(1+42/256))= +1.53825e-001 +data8 0x3fc40dfb08378000 // log(1/frcpa(1+43/256))= +1.56677e-001 +data8 0x3fc47e74e8ca5f70 // log(1/frcpa(1+44/256))= +1.60109e-001 +data8 0x3fc4ef51f6466de0 // log(1/frcpa(1+45/256))= +1.63553e-001 +data8 0x3fc56092e02ba510 // log(1/frcpa(1+46/256))= +1.67010e-001 +data8 0x3fc5d23857cd74d0 // log(1/frcpa(1+47/256))= +1.70478e-001 +data8 0x3fc6313a37335d70 // log(1/frcpa(1+48/256))= +1.73377e-001 +data8 0x3fc6a399dabbd380 // log(1/frcpa(1+49/256))= +1.76868e-001 +data8 0x3fc70337dd3ce410 // log(1/frcpa(1+50/256))= +1.79786e-001 +data8 0x3fc77654128f6120 // log(1/frcpa(1+51/256))= +1.83299e-001 +data8 0x3fc7e9d82a0b0220 // log(1/frcpa(1+52/256))= +1.86824e-001 +data8 0x3fc84a6b759f5120 // log(1/frcpa(1+53/256))= +1.89771e-001 +data8 0x3fc8ab47d5f5a300 // log(1/frcpa(1+54/256))= +1.92727e-001 +data8 0x3fc91fe490965810 // log(1/frcpa(1+55/256))= +1.96286e-001 +data8 0x3fc981634011aa70 // log(1/frcpa(1+56/256))= +1.99261e-001 +data8 0x3fc9f6c407089660 // log(1/frcpa(1+57/256))= +2.02843e-001 +data8 0x3fca58e729348f40 // log(1/frcpa(1+58/256))= +2.05838e-001 +data8 0x3fcabb55c31693a0 // log(1/frcpa(1+59/256))= +2.08842e-001 +data8 0x3fcb1e104919efd0 // log(1/frcpa(1+60/256))= +2.11855e-001 +data8 0x3fcb94ee93e367c0 // log(1/frcpa(1+61/256))= +2.15483e-001 +data8 0x3fcbf851c0675550 // log(1/frcpa(1+62/256))= +2.18516e-001 +data8 0x3fcc5c0254bf23a0 // log(1/frcpa(1+63/256))= +2.21558e-001 +data8 0x3fccc000c9db3c50 // log(1/frcpa(1+64/256))= +2.24609e-001 +data8 0x3fcd244d99c85670 // log(1/frcpa(1+65/256))= +2.27670e-001 +data8 0x3fcd88e93fb2f450 // log(1/frcpa(1+66/256))= +2.30741e-001 +data8 0x3fcdedd437eaef00 // log(1/frcpa(1+67/256))= +2.33820e-001 +data8 0x3fce530effe71010 // log(1/frcpa(1+68/256))= +2.36910e-001 +data8 0x3fceb89a1648b970 // log(1/frcpa(1+69/256))= +2.40009e-001 +data8 0x3fcf1e75fadf9bd0 // log(1/frcpa(1+70/256))= +2.43117e-001 +data8 0x3fcf84a32ead7c30 // log(1/frcpa(1+71/256))= +2.46235e-001 +data8 0x3fcfeb2233ea07c0 // log(1/frcpa(1+72/256))= +2.49363e-001 +data8 0x3fd028f9c7035c18 // log(1/frcpa(1+73/256))= +2.52501e-001 +data8 0x3fd05c8be0d96358 // log(1/frcpa(1+74/256))= +2.55649e-001 +data8 0x3fd085eb8f8ae790 // log(1/frcpa(1+75/256))= +2.58174e-001 +data8 0x3fd0b9c8e32d1910 // log(1/frcpa(1+76/256))= +2.61339e-001 +data8 0x3fd0edd060b78080 // log(1/frcpa(1+77/256))= +2.64515e-001 +data8 0x3fd122024cf00638 // log(1/frcpa(1+78/256))= +2.67701e-001 +data8 0x3fd14be2927aecd0 // log(1/frcpa(1+79/256))= +2.70257e-001 +data8 0x3fd180618ef18ad8 // log(1/frcpa(1+80/256))= +2.73461e-001 +data8 0x3fd1b50bbe2fc638 // log(1/frcpa(1+81/256))= +2.76675e-001 +data8 0x3fd1df4cc7cf2428 // log(1/frcpa(1+82/256))= +2.79254e-001 +data8 0x3fd214456d0eb8d0 // log(1/frcpa(1+83/256))= +2.82487e-001 +data8 0x3fd23ec5991eba48 // log(1/frcpa(1+84/256))= +2.85081e-001 +data8 0x3fd2740d9f870af8 // log(1/frcpa(1+85/256))= +2.88333e-001 +data8 0x3fd29ecdabcdfa00 // log(1/frcpa(1+86/256))= +2.90943e-001 +data8 0x3fd2d46602adcce8 // log(1/frcpa(1+87/256))= +2.94214e-001 +data8 0x3fd2ff66b04ea9d0 // log(1/frcpa(1+88/256))= +2.96838e-001 +data8 0x3fd335504b355a30 // log(1/frcpa(1+89/256))= +3.00129e-001 +data8 0x3fd360925ec44f58 // log(1/frcpa(1+90/256))= +3.02769e-001 +data8 0x3fd38bf1c3337e70 // log(1/frcpa(1+91/256))= +3.05417e-001 +data8 0x3fd3c25277333180 // log(1/frcpa(1+92/256))= +3.08735e-001 +data8 0x3fd3edf463c16838 // log(1/frcpa(1+93/256))= +3.11399e-001 +data8 0x3fd419b423d5e8c0 // log(1/frcpa(1+94/256))= +3.14069e-001 +data8 0x3fd44591e0539f48 // log(1/frcpa(1+95/256))= +3.16746e-001 +data8 0x3fd47c9175b6f0a8 // log(1/frcpa(1+96/256))= +3.20103e-001 +data8 0x3fd4a8b341552b08 // log(1/frcpa(1+97/256))= +3.22797e-001 +data8 0x3fd4d4f390890198 // log(1/frcpa(1+98/256))= +3.25498e-001 +data8 0x3fd501528da1f960 // log(1/frcpa(1+99/256))= +3.28206e-001 +data8 0x3fd52dd06347d4f0 // log(1/frcpa(1+100/256))= +3.30921e-001 +data8 0x3fd55a6d3c7b8a88 // log(1/frcpa(1+101/256))= +3.33644e-001 +data8 0x3fd5925d2b112a58 // log(1/frcpa(1+102/256))= +3.37058e-001 +data8 0x3fd5bf406b543db0 // log(1/frcpa(1+103/256))= +3.39798e-001 +data8 0x3fd5ec433d5c35a8 // log(1/frcpa(1+104/256))= +3.42545e-001 +data8 0x3fd61965cdb02c18 // log(1/frcpa(1+105/256))= +3.45300e-001 +data8 0x3fd646a84935b2a0 // log(1/frcpa(1+106/256))= +3.48063e-001 +data8 0x3fd6740add31de90 // log(1/frcpa(1+107/256))= +3.50833e-001 +data8 0x3fd6a18db74a58c0 // log(1/frcpa(1+108/256))= +3.53610e-001 +data8 0x3fd6cf31058670e8 // log(1/frcpa(1+109/256))= +3.56396e-001 +data8 0x3fd6f180e852f0b8 // log(1/frcpa(1+110/256))= +3.58490e-001 +data8 0x3fd71f5d71b894e8 // log(1/frcpa(1+111/256))= +3.61289e-001 +data8 0x3fd74d5aefd66d58 // log(1/frcpa(1+112/256))= +3.64096e-001 +data8 0x3fd77b79922bd378 // log(1/frcpa(1+113/256))= +3.66911e-001 +data8 0x3fd7a9b9889f19e0 // log(1/frcpa(1+114/256))= +3.69734e-001 +data8 0x3fd7d81b037eb6a0 // log(1/frcpa(1+115/256))= +3.72565e-001 +data8 0x3fd8069e33827230 // log(1/frcpa(1+116/256))= +3.75404e-001 +data8 0x3fd82996d3ef8bc8 // log(1/frcpa(1+117/256))= +3.77538e-001 +data8 0x3fd85855776dcbf8 // log(1/frcpa(1+118/256))= +3.80391e-001 +data8 0x3fd8873658327cc8 // log(1/frcpa(1+119/256))= +3.83253e-001 +data8 0x3fd8aa75973ab8c8 // log(1/frcpa(1+120/256))= +3.85404e-001 +data8 0x3fd8d992dc8824e0 // log(1/frcpa(1+121/256))= +3.88280e-001 +data8 0x3fd908d2ea7d9510 // log(1/frcpa(1+122/256))= +3.91164e-001 +data8 0x3fd92c59e79c0e50 // log(1/frcpa(1+123/256))= +3.93332e-001 +data8 0x3fd95bd750ee3ed0 // log(1/frcpa(1+124/256))= +3.96231e-001 +data8 0x3fd98b7811a3ee58 // log(1/frcpa(1+125/256))= +3.99138e-001 +data8 0x3fd9af47f33d4068 // log(1/frcpa(1+126/256))= +4.01323e-001 +data8 0x3fd9df270c1914a0 // log(1/frcpa(1+127/256))= +4.04245e-001 +data8 0x3fda0325ed14fda0 // log(1/frcpa(1+128/256))= +4.06442e-001 +data8 0x3fda33440224fa78 // log(1/frcpa(1+129/256))= +4.09379e-001 +data8 0x3fda57725e80c380 // log(1/frcpa(1+130/256))= +4.11587e-001 +data8 0x3fda87d0165dd198 // log(1/frcpa(1+131/256))= +4.14539e-001 +data8 0x3fdaac2e6c03f890 // log(1/frcpa(1+132/256))= +4.16759e-001 +data8 0x3fdadccc6fdf6a80 // log(1/frcpa(1+133/256))= +4.19726e-001 +data8 0x3fdb015b3eb1e790 // log(1/frcpa(1+134/256))= +4.21958e-001 +data8 0x3fdb323a3a635948 // log(1/frcpa(1+135/256))= +4.24941e-001 +data8 0x3fdb56fa04462908 // log(1/frcpa(1+136/256))= +4.27184e-001 +data8 0x3fdb881aa659bc90 // log(1/frcpa(1+137/256))= +4.30182e-001 +data8 0x3fdbad0bef3db160 // log(1/frcpa(1+138/256))= +4.32437e-001 +data8 0x3fdbd21297781c28 // log(1/frcpa(1+139/256))= +4.34697e-001 +data8 0x3fdc039236f08818 // log(1/frcpa(1+140/256))= +4.37718e-001 +data8 0x3fdc28cb1e4d32f8 // log(1/frcpa(1+141/256))= +4.39990e-001 +data8 0x3fdc4e19b84723c0 // log(1/frcpa(1+142/256))= +4.42267e-001 +data8 0x3fdc7ff9c74554c8 // log(1/frcpa(1+143/256))= +4.45311e-001 +data8 0x3fdca57b64e9db00 // log(1/frcpa(1+144/256))= +4.47600e-001 +data8 0x3fdccb130a5ceba8 // log(1/frcpa(1+145/256))= +4.49895e-001 +data8 0x3fdcf0c0d18f3268 // log(1/frcpa(1+146/256))= +4.52194e-001 +data8 0x3fdd232075b5a200 // log(1/frcpa(1+147/256))= +4.55269e-001 +data8 0x3fdd490246defa68 // log(1/frcpa(1+148/256))= +4.57581e-001 +data8 0x3fdd6efa918d25c8 // log(1/frcpa(1+149/256))= +4.59899e-001 +data8 0x3fdd9509707ae528 // log(1/frcpa(1+150/256))= +4.62221e-001 +data8 0x3fddbb2efe92c550 // log(1/frcpa(1+151/256))= +4.64550e-001 +data8 0x3fddee2f3445e4a8 // log(1/frcpa(1+152/256))= +4.67663e-001 +data8 0x3fde148a1a2726c8 // log(1/frcpa(1+153/256))= +4.70004e-001 +data8 0x3fde3afc0a49ff38 // log(1/frcpa(1+154/256))= +4.72350e-001 +data8 0x3fde6185206d5168 // log(1/frcpa(1+155/256))= +4.74702e-001 +data8 0x3fde882578823d50 // log(1/frcpa(1+156/256))= +4.77060e-001 +data8 0x3fdeaedd2eac9908 // log(1/frcpa(1+157/256))= +4.79423e-001 +data8 0x3fded5ac5f436be0 // log(1/frcpa(1+158/256))= +4.81792e-001 +data8 0x3fdefc9326d16ab8 // log(1/frcpa(1+159/256))= +4.84166e-001 +data8 0x3fdf2391a21575f8 // log(1/frcpa(1+160/256))= +4.86546e-001 +data8 0x3fdf4aa7ee031928 // log(1/frcpa(1+161/256))= +4.88932e-001 +data8 0x3fdf71d627c30bb0 // log(1/frcpa(1+162/256))= +4.91323e-001 +data8 0x3fdf991c6cb3b378 // log(1/frcpa(1+163/256))= +4.93720e-001 +data8 0x3fdfc07ada69a908 // log(1/frcpa(1+164/256))= +4.96123e-001 +data8 0x3fdfe7f18eb03d38 // log(1/frcpa(1+165/256))= +4.98532e-001 +data8 0x3fe007c053c5002c // log(1/frcpa(1+166/256))= +5.00946e-001 +data8 0x3fe01b942198a5a0 // log(1/frcpa(1+167/256))= +5.03367e-001 +data8 0x3fe02f74400c64e8 // log(1/frcpa(1+168/256))= +5.05793e-001 +data8 0x3fe04360be7603ac // log(1/frcpa(1+169/256))= +5.08225e-001 +data8 0x3fe05759ac47fe30 // log(1/frcpa(1+170/256))= +5.10663e-001 +data8 0x3fe06b5f1911cf50 // log(1/frcpa(1+171/256))= +5.13107e-001 +data8 0x3fe078bf0533c568 // log(1/frcpa(1+172/256))= +5.14740e-001 +data8 0x3fe08cd9687e7b0c // log(1/frcpa(1+173/256))= +5.17194e-001 +data8 0x3fe0a10074cf9018 // log(1/frcpa(1+174/256))= +5.19654e-001 +data8 0x3fe0b5343a234474 // log(1/frcpa(1+175/256))= +5.22120e-001 +data8 0x3fe0c974c89431cc // log(1/frcpa(1+176/256))= +5.24592e-001 +data8 0x3fe0ddc2305b9884 // log(1/frcpa(1+177/256))= +5.27070e-001 +data8 0x3fe0eb524bafc918 // log(1/frcpa(1+178/256))= +5.28726e-001 +data8 0x3fe0ffb54213a474 // log(1/frcpa(1+179/256))= +5.31214e-001 +data8 0x3fe114253da97d9c // log(1/frcpa(1+180/256))= +5.33709e-001 +data8 0x3fe128a24f1d9afc // log(1/frcpa(1+181/256))= +5.36210e-001 +data8 0x3fe1365252bf0864 // log(1/frcpa(1+182/256))= +5.37881e-001 +data8 0x3fe14ae558b4a92c // log(1/frcpa(1+183/256))= +5.40393e-001 +data8 0x3fe15f85a19c7658 // log(1/frcpa(1+184/256))= +5.42910e-001 +data8 0x3fe16d4d38c119f8 // log(1/frcpa(1+185/256))= +5.44592e-001 +data8 0x3fe18203c20dd130 // log(1/frcpa(1+186/256))= +5.47121e-001 +data8 0x3fe196c7bc4b1f38 // log(1/frcpa(1+187/256))= +5.49656e-001 +data8 0x3fe1a4a738b7a33c // log(1/frcpa(1+188/256))= +5.51349e-001 +data8 0x3fe1b981c0c9653c // log(1/frcpa(1+189/256))= +5.53895e-001 +data8 0x3fe1ce69e8bb1068 // log(1/frcpa(1+190/256))= +5.56447e-001 +data8 0x3fe1dc619de06944 // log(1/frcpa(1+191/256))= +5.58152e-001 +data8 0x3fe1f160a2ad0da0 // log(1/frcpa(1+192/256))= +5.60715e-001 +data8 0x3fe2066d7740737c // log(1/frcpa(1+193/256))= +5.63285e-001 +data8 0x3fe2147dba47a390 // log(1/frcpa(1+194/256))= +5.65001e-001 +data8 0x3fe229a1bc5ebac0 // log(1/frcpa(1+195/256))= +5.67582e-001 +data8 0x3fe237c1841a502c // log(1/frcpa(1+196/256))= +5.69306e-001 +data8 0x3fe24cfce6f80d98 // log(1/frcpa(1+197/256))= +5.71898e-001 +data8 0x3fe25b2c55cd5760 // log(1/frcpa(1+198/256))= +5.73630e-001 +data8 0x3fe2707f4d5f7c40 // log(1/frcpa(1+199/256))= +5.76233e-001 +data8 0x3fe285e0842ca380 // log(1/frcpa(1+200/256))= +5.78842e-001 +data8 0x3fe294294708b770 // log(1/frcpa(1+201/256))= +5.80586e-001 +data8 0x3fe2a9a2670aff0c // log(1/frcpa(1+202/256))= +5.83207e-001 +data8 0x3fe2b7fb2c8d1cc0 // log(1/frcpa(1+203/256))= +5.84959e-001 +data8 0x3fe2c65a6395f5f4 // log(1/frcpa(1+204/256))= +5.86713e-001 +data8 0x3fe2dbf557b0df40 // log(1/frcpa(1+205/256))= +5.89350e-001 +data8 0x3fe2ea64c3f97654 // log(1/frcpa(1+206/256))= +5.91113e-001 +data8 0x3fe3001823684d70 // log(1/frcpa(1+207/256))= +5.93762e-001 +data8 0x3fe30e97e9a8b5cc // log(1/frcpa(1+208/256))= +5.95531e-001 +data8 0x3fe32463ebdd34e8 // log(1/frcpa(1+209/256))= +5.98192e-001 +data8 0x3fe332f4314ad794 // log(1/frcpa(1+210/256))= +5.99970e-001 +data8 0x3fe348d90e7464cc // log(1/frcpa(1+211/256))= +6.02643e-001 +data8 0x3fe35779f8c43d6c // log(1/frcpa(1+212/256))= +6.04428e-001 +data8 0x3fe36621961a6a98 // log(1/frcpa(1+213/256))= +6.06217e-001 +data8 0x3fe37c299f3c3668 // log(1/frcpa(1+214/256))= +6.08907e-001 +data8 0x3fe38ae2171976e4 // log(1/frcpa(1+215/256))= +6.10704e-001 +data8 0x3fe399a157a603e4 // log(1/frcpa(1+216/256))= +6.12504e-001 +data8 0x3fe3afccfe77b9d0 // log(1/frcpa(1+217/256))= +6.15210e-001 +data8 0x3fe3be9d503533b4 // log(1/frcpa(1+218/256))= +6.17018e-001 +data8 0x3fe3cd7480b4a8a0 // log(1/frcpa(1+219/256))= +6.18830e-001 +data8 0x3fe3e3c43918f76c // log(1/frcpa(1+220/256))= +6.21554e-001 +data8 0x3fe3f2acb27ed6c4 // log(1/frcpa(1+221/256))= +6.23373e-001 +data8 0x3fe4019c2125ca90 // log(1/frcpa(1+222/256))= +6.25197e-001 +data8 0x3fe4181061389720 // log(1/frcpa(1+223/256))= +6.27937e-001 +data8 0x3fe42711518df544 // log(1/frcpa(1+224/256))= +6.29769e-001 +data8 0x3fe436194e12b6bc // log(1/frcpa(1+225/256))= +6.31604e-001 +data8 0x3fe445285d68ea68 // log(1/frcpa(1+226/256))= +6.33442e-001 +data8 0x3fe45bcc464c8938 // log(1/frcpa(1+227/256))= +6.36206e-001 +data8 0x3fe46aed21f117fc // log(1/frcpa(1+228/256))= +6.38053e-001 +data8 0x3fe47a1527e8a2d0 // log(1/frcpa(1+229/256))= +6.39903e-001 +data8 0x3fe489445efffcc8 // log(1/frcpa(1+230/256))= +6.41756e-001 +data8 0x3fe4a018bcb69834 // log(1/frcpa(1+231/256))= +6.44543e-001 +data8 0x3fe4af5a0c9d65d4 // log(1/frcpa(1+232/256))= +6.46405e-001 +data8 0x3fe4bea2a5bdbe84 // log(1/frcpa(1+233/256))= +6.48271e-001 +data8 0x3fe4cdf28f10ac44 // log(1/frcpa(1+234/256))= +6.50140e-001 +data8 0x3fe4dd49cf994058 // log(1/frcpa(1+235/256))= +6.52013e-001 +data8 0x3fe4eca86e64a680 // log(1/frcpa(1+236/256))= +6.53889e-001 +data8 0x3fe503c43cd8eb68 // log(1/frcpa(1+237/256))= +6.56710e-001 +data8 0x3fe513356667fc54 // log(1/frcpa(1+238/256))= +6.58595e-001 +data8 0x3fe522ae0738a3d4 // log(1/frcpa(1+239/256))= +6.60483e-001 +data8 0x3fe5322e26867854 // log(1/frcpa(1+240/256))= +6.62376e-001 +data8 0x3fe541b5cb979808 // log(1/frcpa(1+241/256))= +6.64271e-001 +data8 0x3fe55144fdbcbd60 // log(1/frcpa(1+242/256))= +6.66171e-001 +data8 0x3fe560dbc45153c4 // log(1/frcpa(1+243/256))= +6.68074e-001 +data8 0x3fe5707a26bb8c64 // log(1/frcpa(1+244/256))= +6.69980e-001 +data8 0x3fe587f60ed5b8fc // log(1/frcpa(1+245/256))= +6.72847e-001 +data8 0x3fe597a7977c8f30 // log(1/frcpa(1+246/256))= +6.74763e-001 +data8 0x3fe5a760d634bb88 // log(1/frcpa(1+247/256))= +6.76682e-001 +data8 0x3fe5b721d295f10c // log(1/frcpa(1+248/256))= +6.78605e-001 +data8 0x3fe5c6ea94431ef8 // log(1/frcpa(1+249/256))= +6.80532e-001 +data8 0x3fe5d6bb22ea86f4 // log(1/frcpa(1+250/256))= +6.82462e-001 +data8 0x3fe5e6938645d38c // log(1/frcpa(1+251/256))= +6.84397e-001 +data8 0x3fe5f673c61a2ed0 // log(1/frcpa(1+252/256))= +6.86335e-001 +data8 0x3fe6065bea385924 // log(1/frcpa(1+253/256))= +6.88276e-001 +data8 0x3fe6164bfa7cc068 // log(1/frcpa(1+254/256))= +6.90222e-001 +data8 0x3fe62643fecf9740 // log(1/frcpa(1+255/256))= +6.92171e-001 LOCAL_OBJECT_END(pow_Tt) @@ -909,14 +879,14 @@ GLOBAL_LIBM_ENTRY(powf) addl pow_AD_P = @ltoff(pow_table_P), gp fma.s1 POW_Xp1 = f8,f1,f1 // Will be used for r1 if x<0 nop.i 999 -;; } +;; // Get significand of x. Will be used to get index to fetch T, Tt. { .mfi getf.sig pow_GR_sig_X = f8 frcpa.s1 POW_B, p6 = f1,f8 - nop.i 999 + mov pow_GR_exp_half = 0xFFFE // Exponent for 0.5 } { .mfi ld8 pow_AD_P = [pow_AD_P] @@ -925,11 +895,10 @@ GLOBAL_LIBM_ENTRY(powf) } ;; -// p13 = TRUE ==> X is unorm // DOUBLE 0x10033 exponent limit at which y is an integer { .mfi nop.m 999 - fclass.m p13,p0 = f8, 0x0b // Test for x unorm + fcmp.lt.s1 p8,p9 = f8, f0 // Test for x<0 addl pow_GR_10033 = 0x10033, r0 } { .mfi @@ -939,11 +908,11 @@ GLOBAL_LIBM_ENTRY(powf) } ;; -// p14 = TRUE ==> X is ZERO +// p13 = TRUE ==> X is unorm { .mfi + setf.exp POW_Q0_half = pow_GR_exp_half // Form 0.5 + fclass.m p13,p0 = f8, 0x0b // Test for x unorm adds pow_AD_Tt = pow_Tt - pow_table_P, pow_AD_P - fclass.m p14,p0 = f8, 0x07 - and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones } { .mfi adds pow_AD_Q = pow_table_Q - pow_table_P, pow_AD_P @@ -952,14 +921,16 @@ GLOBAL_LIBM_ENTRY(powf) } ;; +// p14 = TRUE ==> X is ZERO { .mfi - ldfe POW_P5 = [pow_AD_P], 16 - fcmp.lt.s1 p8,p9 = f8, f0 // Test for x<0 + ldfe POW_P2 = [pow_AD_Q], 16 + fclass.m p14,p0 = f8, 0x07 nop.i 999 } -{ .mib - ldfe POW_P4 = [pow_AD_Q], 16 - sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones +// Note POW_Xm1 and POW_r1 are used interchangably +{ .mfb + nop.m 999 +(p8) fnma.s1 POW_Xm1 = POW_Xp1,f1,f0 (p13) br.cond.spnt POW_X_DENORM } ;; @@ -968,26 +939,33 @@ GLOBAL_LIBM_ENTRY(powf) POW_COMMON: // p11 = TRUE ==> Y is a NAN { .mfi - ldfe POW_P3 = [pow_AD_P], 16 + and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones fclass.m p11,p0 = f9, 0xc3 nop.i 999 } { .mfi - ldfe POW_P2 = [pow_AD_Q], 16 - nop.f 999 + nop.m 999 + fms.s1 POW_r = POW_B, POW_NORM_X,f1 mov pow_GR_y_zero = 0 } ;; -// Note POW_Xm1 and POW_r1 are used interchangably +// Get exponent of |x|-1 to use in comparison to 2^-8 +{ .mmi + getf.exp pow_GR_signexp_Xm1 = POW_Xm1 + sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones + extr.u pow_GR_offset = pow_GR_sig_X, 55, 8 +} +;; + { .mfi alloc r32=ar.pfs,2,19,4,0 - fms.s1 POW_r = POW_B, POW_NORM_X,f1 - nop.i 999 + fcvt.fx.s1 POW_int_Y = POW_NORM_Y + shladd pow_AD_Tt = pow_GR_offset, 3, pow_AD_Tt } { .mfi setf.sig POW_int_K = pow_GR_true_exp_X -(p8) fnma.s1 POW_Xm1 = POW_Xp1,f1,f0 + nop.f 999 nop.i 999 } ;; @@ -997,7 +975,7 @@ POW_COMMON: { .mfi ldfe POW_P1 = [pow_AD_P], 16 fclass.m p12,p0 = f9, 0x07 - shl pow_GR_offset = pow_GR_sig_X, 1 + nop.i 999 } { .mfb ldfe POW_P0 = [pow_AD_Q], 16 @@ -1006,19 +984,18 @@ POW_COMMON: } ;; -// Get exponent of |x|-1 to use in comparison to 2^-8 -{ .mfi - getf.exp pow_GR_signexp_Xm1 = POW_Xm1 - fcvt.fx.s1 POW_int_Y = POW_NORM_Y - shr.u pow_GR_offset = pow_GR_offset,56 +{ .mmf + getf.exp pow_GR_signexp_Y = POW_NORM_Y + ldfd POW_T = [pow_AD_Tt] + fma.s1 POW_rsq = POW_r, POW_r,f0 } ;; // p11 = TRUE ==> X is a NAN { .mfi ldfpd POW_log2_hi, POW_log2_lo = [pow_AD_Q], 16 - fclass.m p11,p0 = f8, 0xc3 - shladd pow_AD_Tt = pow_GR_offset, 4, pow_AD_Tt + fclass.m p11,p0 = POW_NORM_X, 0xc3 + nop.i 999 } { .mfi ldfe POW_inv_log2_by_128 = [pow_AD_P], 16 @@ -1028,28 +1005,33 @@ POW_COMMON: ;; { .mfi - ldfpd POW_Q2, POW_Q3 = [pow_AD_P], 16 - fma.s1 POW_G = f0,f0,f0 // G=0 in case |x| near 1 + ldfd POW_Q2 = [pow_AD_P], 16 + fnma.s1 POW_twoV = POW_r, POW_Q0_half,f1 and pow_GR_exp_Xm1 = pow_GR_signexp_Xm1, pow_GR_17ones } +{ .mfi + nop.m 999 + fma.s1 POW_U = POW_NORM_Y,POW_r,f0 + nop.i 999 +} ;; // Determine if we will use the |x| near 1 path (p6) or normal path (p7) { .mfi - getf.exp pow_GR_signexp_Y = POW_NORM_Y - nop.f 999 + nop.m 999 + fcvt.xf POW_K = POW_int_K cmp.lt p6,p7 = pow_GR_exp_Xm1, pow_GR_exp_2tom8 } { .mfb - ldfpd POW_T, POW_Tt = [pow_AD_Tt], 16 - fma.s1 POW_rsq = POW_r, POW_r,f0 + nop.m 999 + fma.s1 POW_G = f0,f0,f0 // G=0 in case |x| near 1 (p11) br.cond.spnt POW_X_NAN // Branch if x=nan and y not nan } ;; -// If on the x near 1 path, assign r1 to r and r1*r1 to rsq +// If on the x near 1 path, assign r1 to r { .mfi - ldfpd POW_Q0_half, POW_Q1 = [pow_AD_P], 16 + ldfpd POW_Q1, POW_RSHF = [pow_AD_P], 16 (p6) fma.s1 POW_r = POW_r1, f1, f0 nop.i 999 } @@ -1061,57 +1043,25 @@ POW_COMMON: ;; { .mfi - ldfpd POW_Q4, POW_RSHF = [pow_AD_P], 16 -(p7) fma.s1 POW_v6 = POW_r, POW_P5, POW_P4 - nop.i 999 -} -{ .mfi - nop.m 999 -(p6) fma.s1 POW_v6 = POW_r1, POW_P5, POW_P4 - nop.i 999 -} -;; - -{ .mfi - nop.m 999 -(p7) fma.s1 POW_v4 = POW_P3, POW_r, POW_P2 - nop.i 999 -} -{ .mfi - nop.m 999 -(p6) fma.s1 POW_v4 = POW_P3, POW_r1, POW_P2 - nop.i 999 -} -;; - -{ .mfi - nop.m 999 - fcvt.xf POW_K = POW_int_K - nop.i 999 -} -;; - -{ .mfi getf.sig pow_GR_sig_int_Y = POW_int_Y - fnma.s1 POW_twoV = POW_NORM_Y, POW_rsq,f0 +(p6) fnma.s1 POW_twoV = POW_r1, POW_Q0_half,f1 and pow_GR_exp_Y = pow_GR_signexp_Y, pow_GR_17ones } { .mfb andcm pow_GR_sign_Y = pow_GR_signexp_Y, pow_GR_17ones - fma.s1 POW_U = POW_NORM_Y,POW_r,f0 +(p6) fma.s1 POW_U = POW_NORM_Y,POW_r1,f0 (p12) br.cond.spnt POW_Y_0 // Branch if y=zero, x not zero or nan } ;; -// p11 = TRUE ==> X is NEGATIVE but not inf { .mfi ldfe POW_log2_by_128_lo = [pow_AD_P], 16 - fclass.m p11,p0 = f8, 0x1a +(p7) fma.s1 POW_Z2 = POW_twoV, POW_U, f0 nop.i 999 } { .mfi ldfe POW_log2_by_128_hi = [pow_AD_Q], 16 - fma.s1 POW_v2 = POW_P1, POW_r, POW_P0 + nop.f 999 nop.i 999 } ;; @@ -1123,43 +1073,32 @@ POW_COMMON: } { .mfi nop.m 999 - fma.s1 POW_v3 = POW_v6, POW_rsq, POW_v4 +(p7) fma.s1 POW_G = POW_K, POW_log2_hi, POW_T adds pow_AD_tbl1 = pow_tbl1 - pow_Tt, pow_AD_Q } ;; +// p11 = TRUE ==> X is NEGATIVE but not inf { .mfi nop.m 999 -(p7) fma.s1 POW_delta = POW_K, POW_log2_lo, POW_Tt + fclass.m p11,p0 = POW_NORM_X, 0x1a nop.i 999 } { .mfi nop.m 999 -(p7) fma.s1 POW_G = POW_K, POW_log2_hi, POW_T +(p7) fma.s1 POW_delta = POW_K, POW_log2_lo, f0 adds pow_AD_tbl2 = pow_tbl2 - pow_tbl1, pow_AD_tbl1 } ;; { .mfi nop.m 999 - fms.s1 POW_e2 = POW_NORM_Y, POW_r, POW_U +(p6) fma.s1 POW_Z = POW_twoV, POW_U, f0 nop.i 999 } { .mfi nop.m 999 - fma.s1 POW_Z2 = POW_twoV, POW_Q0_half, POW_U - nop.i 999 -} -;; - -{ .mfi - nop.m 999 - fma.s1 POW_Yrcub = POW_rsq, POW_U, f0 - nop.i 999 -} -{ .mfi - nop.m 999 - fma.s1 POW_p = POW_rsq, POW_v3, POW_v2 + fma.s1 POW_v2 = POW_P1, POW_r, POW_P0 nop.i 999 } ;; @@ -1169,7 +1108,7 @@ POW_COMMON: // p13 = TRUE ==> X is NEGATIVE AND Y possible int { .mfi nop.m 999 - fma.s1 POW_Z1 = POW_NORM_Y, POW_G, f0 +(p7) fma.s1 POW_Z = POW_NORM_Y, POW_G, POW_Z2 (p11) cmp.gt.unc p12,p13 = pow_GR_exp_Y, pow_GR_10033 } { .mfi @@ -1179,35 +1118,28 @@ POW_COMMON: } ;; -// By adding RSHF (1.1000...*2^63) we put integer part in rightmost significand { .mfi nop.m 999 - fma.s1 POW_W2 = POW_Z2, POW_inv_log2_by_128, POW_RSHF + fma.s1 POW_Yrcub = POW_rsq, POW_U, f0 nop.i 999 } { .mfi nop.m 999 - fms.s1 POW_UmZ2 = POW_U, f1, POW_Z2 + fma.s1 POW_p = POW_rsq, POW_P2, POW_v2 nop.i 999 } ;; +// Test if x inf { .mfi nop.m 999 - fma.s1 POW_Z3 = POW_p, POW_Yrcub, f0 + fclass.m p15,p0 = POW_NORM_X, 0x23 nop.i 999 } -;; - // By adding RSHF (1.1000...*2^63) we put integer part in rightmost significand { .mfi nop.m 999 - fms.s1 POW_e1 = POW_NORM_Y, POW_G, POW_Z1 - nop.i 999 -} -{ .mfi - nop.m 999 - fma.s1 POW_W1 = POW_Z1, POW_inv_log2_by_128, POW_RSHF + fma.s1 POW_W1 = POW_Z, POW_inv_log2_by_128, POW_RSHF nop.i 999 } ;; @@ -1227,93 +1159,38 @@ POW_COMMON: } ;; -// By subtracting RSHF we get rounded integer POW_N2float -{ .mfi - nop.m 999 - fms.s1 POW_N2float = POW_W2, f1, POW_RSHF - nop.i 999 -} -{ .mfi - nop.m 999 - fma.s1 POW_UmZ2pV = POW_twoV,POW_Q0_half,POW_UmZ2 - nop.i 999 -} -;; - -{ .mfi - nop.m 999 - fma.s1 POW_Z3sq = POW_Z3, POW_Z3, f0 - nop.i 999 -} -{ .mfi - nop.m 999 - fma.s1 POW_v4 = POW_Z3, POW_Q3, POW_Q2 - nop.i 999 -} -;; - -// Extract rounded integer from rightmost significand of POW_W2 -// By subtracting RSHF we get rounded integer POW_N1float -{ .mfi - getf.sig pow_GR_int_W2 = POW_W2 - fms.s1 POW_N1float = POW_W1, f1, POW_RSHF - nop.i 999 -} -{ .mfi - nop.m 999 - fma.s1 POW_v2 = POW_Z3, POW_Q1, POW_Q0_half - nop.i 999 -} -;; - -{ .mfi - nop.m 999 - fnma.s1 POW_s2 = POW_N2float, POW_log2_by_128_hi, POW_Z2 - nop.i 999 -} +// p11 = TRUE ==> X is +1.0 { .mfi nop.m 999 - fma.s1 POW_e2 = POW_e2,f1,POW_UmZ2pV + fcmp.eq.s1 p11,p0 = POW_NORM_X, f1 nop.i 999 } ;; // Extract rounded integer from rightmost significand of POW_W1 -// Test if x inf +// By subtracting RSHF we get rounded integer POW_Nfloat { .mfi - getf.sig pow_GR_int_W1 = POW_W1 - fclass.m p15,p0 = POW_NORM_X, 0x23 + getf.sig pow_GR_int_N = POW_W1 + fms.s1 POW_Nfloat = POW_W1, f1, POW_RSHF nop.i 999 } { .mfb nop.m 999 - fnma.s1 POW_f2 = POW_N2float, POW_log2_by_128_lo, f1 + fma.s1 POW_Z3 = POW_p, POW_Yrcub, f0 (p12) br.cond.spnt POW_X_NEG_Y_NONINT // Branch if x neg, y not integer } ;; -// p11 = TRUE ==> X is +1.0 +// p7 = TRUE ==> Y is +1.0 // p12 = TRUE ==> X is NEGATIVE AND Y is an odd integer { .mfi getf.exp pow_GR_signexp_Y_Gpr = POW_Y_Gpr - fcmp.eq.s1 p11,p0 = POW_NORM_X, f1 + fcmp.eq.s1 p7,p0 = POW_NORM_Y, f1 // Test for y=1.0 (p10) tbit.nz.unc p12,p0 = pow_GR_sig_int_Y,0 } -{ .mfi - nop.m 999 - fma.s1 POW_v3 = POW_Z3sq, POW_Q4, POW_v4 - nop.i 999 -} -;; - -{ .mfi - nop.m 999 - fnma.s1 POW_f1 = POW_N1float, POW_log2_by_128_lo, f1 - nop.i 999 -} { .mfb nop.m 999 - fnma.s1 POW_s1 = POW_N1float, POW_log2_by_128_hi, POW_Z1 +(p11) fma.s.s0 f8 = f1,f1,f0 // If x=1, result is +1 (p15) br.cond.spnt POW_X_INF } ;; @@ -1324,77 +1201,73 @@ POW_COMMON: fcmp.eq.s0 p15,p0 = f8,f9 nop.i 999 } -{ .mfi +{ .mfb nop.m 999 fma.s1 POW_e3 = POW_NORM_Y, POW_delta, f0 - nop.i 999 +(p11) br.ret.spnt b0 // Early exit if x=1.0, result is +1 } ;; { .mfi - nop.m 999 - fcmp.eq.s1 p7,p0 = POW_NORM_Y, f1 // Test for y=1.0 +(p12) mov pow_GR_xneg_yodd = 1 + fnma.s1 POW_f12 = POW_Nfloat, POW_log2_by_128_lo, f1 nop.i 999 } -{ .mfi +{ .mfb nop.m 999 - fma.s1 POW_e12 = POW_e1,f1,POW_e2 - nop.i 999 -} -;; - -{ .mfi - add pow_GR_int_N = pow_GR_int_W1, pow_GR_int_W2 -(p11) fma.s.s0 f8 = f1,f1,f0 // If x=1, result is +1 - nop.i 999 -} -{ .mib -(p12) mov pow_GR_xneg_yodd = 1 - nop.i 999 -(p11) br.ret.spnt b0 // Early exit if x=1.0, result is +1 + fnma.s1 POW_s = POW_Nfloat, POW_log2_by_128_hi, POW_Z +(p7) br.ret.spnt b0 // Early exit if y=1.0, result is x } ;; -{ .mfi +{ .mmi and pow_GR_index1 = 0x0f, pow_GR_int_N - fma.s1 POW_q = POW_Z3sq, POW_v3, POW_v2 - shr pow_int_GR_M = pow_GR_int_N, 7 // M = N/128 -} -{ .mib and pow_GR_index2 = 0x70, pow_GR_int_N - nop.i 999 -(p7) br.ret.spnt b0 // Early exit if y=1.0, result is x + shr pow_int_GR_M = pow_GR_int_N, 7 // M = N/128 } ;; { .mfi shladd pow_AD_T1 = pow_GR_index1, 4, pow_AD_tbl1 - fma.s1 POW_s = POW_s1, f1, POW_s2 + fma.s1 POW_q = POW_Z3, POW_Q1, POW_Q0_half add pow_int_GR_M = pow_GR_16ones, pow_int_GR_M } { .mfi add pow_AD_T2 = pow_AD_tbl2, pow_GR_index2 - fma.s1 POW_f12 = POW_f1, POW_f2,f0 + fma.s1 POW_Z3sq = POW_Z3, POW_Z3, f0 nop.i 999 } ;; -{ .mmf +{ .mmi ldfe POW_T1 = [pow_AD_T1] ldfe POW_T2 = [pow_AD_T2] - nop.f 999 + nop.i 999 } ;; +// f123 = f12*(e3+1) = f12*e3+f12 { .mfi setf.exp POW_2M = pow_int_GR_M - fma.s1 POW_e123 = POW_e12, f1, POW_e3 - and pow_GR_exp_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones + fma.s1 POW_f123 = POW_e3,POW_f12,POW_f12 + nop.i 999 +} +{ .mfi + nop.m 999 + fma.s1 POW_ssq = POW_s, POW_s, f0 + nop.i 999 } ;; { .mfi nop.m 999 + fma.s1 POW_v2 = POW_s, POW_Q2, POW_Q1 + and pow_GR_exp_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones +} +;; + +{ .mfi + cmp.ne p12,p13 = pow_GR_xneg_yodd, r0 fma.s1 POW_q = POW_Z3sq, POW_q, POW_Z3 sub pow_GR_true_exp_Y_Gpr = pow_GR_exp_Y_Gpr, pow_GR_16ones } @@ -1411,88 +1284,62 @@ POW_COMMON: // Form signexp of constants to indicate overflow { .mfi mov pow_GR_big_pos = 0x1007f - fma.s1 POW_ssq = POW_s, POW_s, f0 + nop.f 999 cmp.le p8,p9 = 7, pow_GR_true_exp_Y_Gpr } { .mfi mov pow_GR_big_neg = 0x3007f - fma.s1 POW_v4 = POW_s, POW_Q3, POW_Q2 + nop.f 999 andcm pow_GR_sign_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones } ;; // Form big positive and negative constants to test for possible overflow +// Scale both terms of the polynomial by POW_f123 { .mfi setf.exp POW_big_pos = pow_GR_big_pos - fma.s1 POW_v2 = POW_s, POW_Q1, POW_Q0_half + fma.s1 POW_ssq = POW_ssq, POW_f123, f0 (p9) cmp.le.unc p0,p10 = 6, pow_GR_true_exp_Y_Gpr } { .mfb setf.exp POW_big_neg = pow_GR_big_neg - fma.s1 POW_1ps = f1,f1,POW_s + fma.s1 POW_1ps = POW_s, POW_f123, POW_f123 (p8) br.cond.spnt POW_OVER_UNDER_X_NOT_INF } ;; -// f123 = f12*(e123+1) = f12*e123+f12 { .mfi nop.m 999 - fma.s1 POW_f123 = POW_e123,POW_f12,POW_f12 +(p12) fnma.s1 POW_T1T2 = POW_T1, POW_T2, f0 nop.i 999 } -;; - { .mfi nop.m 999 - fma.s1 POW_T1T2 = POW_T1, POW_T2, f0 +(p13) fma.s1 POW_T1T2 = POW_T1, POW_T2, f0 nop.i 999 } -{ .mfi - nop.m 999 - fma.s1 POW_v3 = POW_ssq, POW_Q4, POW_v4 - cmp.ne p12,p13 = pow_GR_xneg_yodd, r0 -} ;; { .mfi nop.m 999 - fma.s1 POW_2Mqp1 = POW_2M, POW_q, POW_2M - nop.i 999 -} -;; - -{ .mfi - nop.m 999 - fma.s1 POW_v21ps = POW_ssq, POW_v2, POW_1ps + fma.s1 POW_v210 = POW_s, POW_v2, POW_Q0_half nop.i 999 } { .mfi nop.m 999 - fma.s1 POW_s4 = POW_ssq, POW_ssq, f0 - nop.i 999 -} -;; - -{ .mfi - nop.m 999 -(p12) fnma.s1 POW_A = POW_T1T2, POW_f123, f0 - nop.i 999 -} -{ .mfi - nop.m 999 -(p13) fma.s1 POW_A = POW_T1T2, POW_f123, f0 + fma.s1 POW_2Mqp1 = POW_2M, POW_q, POW_2M nop.i 999 } ;; { .mfi nop.m 999 - fma.s1 POW_es = POW_s4, POW_v3, POW_v21ps + fma.s1 POW_es = POW_ssq, POW_v210, POW_1ps nop.i 999 } { .mfi nop.m 999 - fma.s1 POW_A = POW_A, POW_2Mqp1, f0 + fma.s1 POW_A = POW_T1T2, POW_2Mqp1, f0 nop.i 999 } ;; @@ -1623,16 +1470,25 @@ POW_POSSIBLE_UNDER: // 0.1...11 2^-3ffe (biased, 1) // largest dn smallest normal +// Form small constant (2^-170) to correct underflow result near region of +// smallest denormal in round-nearest. + // Put in s2 (td set, ftz set) +.pred.rel "mutex",p12,p13 { .mfi - nop.m 999 + mov pow_GR_Fpsr = ar40 // Read the fpsr--need to check rc.s0 fsetc.s2 0x7F,0x41 - nop.i 999 + mov pow_GR_rcs0_mask = 0x0c00 // Set mask for rc.s0 +} +{ .mfi +(p12) mov pow_GR_tmp = 0x2ffff - 170 + nop.f 999 +(p13) mov pow_GR_tmp = 0x0ffff - 170 } ;; { .mfi - nop.m 999 + setf.exp POW_eps = pow_GR_tmp // Form 2^-170 fma.s.s2 POW_ftz_urm_f8 = POW_A, POW_es, f0 nop.i 999 } @@ -1654,6 +1510,21 @@ POW_POSSIBLE_UNDER: } ;; +{ .mmi +(p7) and pow_GR_rcs0 = pow_GR_rcs0_mask, pow_GR_Fpsr // Isolate rc.s0 +;; +(p7) cmp.eq.unc p6,p0 = pow_GR_rcs0, r0 // Test for round to nearest + nop.i 999 +} +;; + +// Tweak result slightly if underflow to get correct rounding near smallest +// denormal if round-nearest +{ .mfi + nop.m 999 +(p6) fms.s.s0 f8 = POW_A, POW_es, POW_eps + nop.i 999 +} { .mbb (p7) mov pow_GR_tag = 31 (p7) br.cond.spnt __libm_error_region // Branch if underflow @@ -1671,16 +1542,8 @@ POW_X_DENORM: } ;; -{ .mmi - getf.sig pow_GR_sig_X = POW_NORM_X -;; - and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones - nop.i 999 -} -;; - { .mib - sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones + getf.sig pow_GR_sig_X = POW_NORM_X nop.i 999 br.cond.sptk POW_COMMON } @@ -2140,6 +2003,7 @@ POW_OVER_UNDER_ERROR: GLOBAL_LIBM_END(powf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_powl.S b/sysdeps/ia64/fpu/e_powl.S index 0896c19..3f93f60 100644 --- a/sysdeps/ia64/fpu/e_powl.S +++ b/sysdeps/ia64/fpu/e_powl.S @@ -60,6 +60,7 @@ // 02/10/03 Reordered header: .section, .global, .proc, .align; // used data8 for long double table values // 04/17/03 Added missing mutex directive +// 10/13/03 Corrected .endp names to match .proc names // //********************************************************************* // @@ -2755,6 +2756,7 @@ POWL_64_SQRT: GLOBAL_LIBM_END(powl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi @@ -2803,6 +2805,6 @@ LOCAL_LIBM_ENTRY(__libm_error_region) br.ret.sptk b0 // Return };; -.endp +LOCAL_LIBM_END(__libm_error_region#) .type __libm_error_support#,@function .global __libm_error_support# diff --git a/sysdeps/ia64/fpu/e_remainder.S b/sysdeps/ia64/fpu/e_remainder.S index 2f6e90f..f655567 100644 --- a/sysdeps/ia64/fpu/e_remainder.S +++ b/sysdeps/ia64/fpu/e_remainder.S @@ -531,6 +531,7 @@ EXP_ERROR_RETURN: GLOBAL_IEEE754_END(remainder) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_remainderf.S b/sysdeps/ia64/fpu/e_remainderf.S index bbb5fd0..0e9bedd 100644 --- a/sysdeps/ia64/fpu/e_remainderf.S +++ b/sysdeps/ia64/fpu/e_remainderf.S @@ -550,6 +550,7 @@ EXP_ERROR_RETURN: GLOBAL_IEEE754_END(remainderf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_remainderl.S b/sysdeps/ia64/fpu/e_remainderl.S index 1c1a3c3..8c1630e 100644 --- a/sysdeps/ia64/fpu/e_remainderl.S +++ b/sysdeps/ia64/fpu/e_remainderl.S @@ -557,6 +557,7 @@ EXP_ERROR_RETURN: } GLOBAL_IEEE754_END(remainderl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_scalb.S b/sysdeps/ia64/fpu/e_scalb.S index 82e914e..3d48aab 100644 --- a/sysdeps/ia64/fpu/e_scalb.S +++ b/sysdeps/ia64/fpu/e_scalb.S @@ -21,60 +21,82 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== // 02/02/00 Initial version -// 01/26/01 Scalb completely reworked and now standalone version +// 01/26/01 Scalb completely reworked and now standalone version // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 08/06/03 Improved performance // // API //============================================================== -// double = scalb (double x, double n) +// double = scalb (double x, double n) // input floating point f8 and floating point f9 // output floating point f8 // +// int_type = 0 if int is 32 bits +// int_type = 1 if int is 64 bits +// // Returns x* 2**n using an fma and detects overflow -// and underflow. +// and underflow. // // +// Strategy: +// Compute biased exponent of result exp_Result = N + exp_X +// Break into ranges: +// exp_Result > 0x103fe -> Certain overflow +// exp_Result = 0x103fe -> Possible overflow +// 0x0fc01 <= exp_Result < 0x103fe -> No over/underflow (main path) +// 0x0fc01 - 52 <= exp_Result < 0x0fc01 -> Possible underflow +// exp_Result < 0x0fc01 - 52 -> Certain underflow +FR_Big = f6 +FR_NBig = f7 FR_Floating_X = f8 FR_Result = f8 FR_Floating_N = f9 FR_Result2 = f9 -FR_Norm_N = f10 -FR_Result3 = f11 -FR_Norm_X = f12 +FR_Result3 = f10 +FR_Norm_X = f11 +FR_Two_N = f12 FR_N_float_int = f13 -FR_Two_N = f14 -FR_Two_to_Big = f15 -FR_Big = f6 -FR_NBig = f7 +FR_Norm_N = f14 +GR_neg_ov_limit= r14 +GR_big_exp = r14 GR_N_Biased = r15 GR_Big = r16 -GR_NBig = r17 -GR_Scratch = r18 -GR_Scratch1 = r19 +GR_exp_Result = r18 +GR_pos_ov_limit= r19 +GR_exp_sure_ou = r19 GR_Bias = r20 GR_N_as_int = r21 +GR_signexp_X = r22 +GR_exp_X = r23 +GR_exp_mask = r24 +GR_max_exp = r25 +GR_min_exp = r26 +GR_min_den_exp = r27 +GR_Scratch = r28 +GR_signexp_N = r29 +GR_exp_N = r30 GR_SAVE_B0 = r32 GR_SAVE_GP = r33 @@ -89,412 +111,442 @@ GLOBAL_IEEE754_ENTRY(scalb) // // Is x NAN, INF, ZERO, +-? +// Build the exponent Bias // { .mfi - alloc r32=ar.pfs,0,3,4,0 - fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero - addl GR_Scratch = 0x019C3F,r0 + getf.exp GR_signexp_N = FR_Floating_N // Get signexp of n + fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero + mov GR_Bias = 0x0ffff +} +{ .mfi + mov GR_Big = 35000 // If N this big then certain overflow + fcvt.fx.trunc.s1 FR_N_float_int = FR_Floating_N // Get N in significand + nop.i 0 +} +;; + +{ .mfi + getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x + fclass.m p7,p0 = FR_Floating_N, 0x0b // Test for n=unorm + nop.i 0 } // -// Is y a NAN, INF, ZERO, +-? +// Normalize n // { .mfi - nop.m 999 - fclass.m.unc p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf | @zero - addl GR_Scratch1 = 0x063BF,r0 + mov GR_exp_mask = 0x1ffff // Exponent mask + fnorm.s1 FR_Norm_N = FR_Floating_N + nop.i 0 } ;; // -// Convert N to a fp integer -// Normalize x +// Is n NAN, INF, ZERO, +-? // { .mfi - nop.m 0 - fnorm.s1 FR_Norm_N = FR_Floating_N - nop.i 999 + mov GR_big_exp = 0x1003e // Exponent at which n is integer + fclass.m p9,p0 = FR_Floating_N, 0xe7 // @snan | @qnan | @inf | @zero + mov GR_max_exp = 0x103fe // Exponent of maximum double } -{ .mfi - nop.m 999 - fnorm.s1 FR_Norm_X = FR_Floating_X - nop.i 999 -};; - // -// Create 2*big -// Create 2**-big // Normalize x -// Branch on special values. // -{ .mib - setf.exp FR_Big = GR_Scratch - nop.i 0 -(p6) br.cond.spnt SCALB_NAN_INF_ZERO +{ .mfb + nop.m 0 + fnorm.s1 FR_Norm_X = FR_Floating_X +(p7) br.cond.spnt SCALB_N_UNORM // Branch if n=unorm } -{ .mib - setf.exp FR_NBig = GR_Scratch1 - nop.i 0 -(p7) br.cond.spnt SCALB_NAN_INF_ZERO -};; +;; -// -// Convert N to a fp integer -// Create -35000 -// +SCALB_COMMON1: +// Main path continues. Also return here from u=unorm path. +// Handle special cases if x = Nan, Inf, Zero +{ .mfb + nop.m 0 + fcmp.lt.s1 p7,p0 = FR_Floating_N, f0 // Test N negative +(p6) br.cond.spnt SCALB_NAN_INF_ZERO +} +;; + +// Handle special cases if n = Nan, Inf, Zero { .mfi - addl GR_Scratch = 1,r0 - fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N - addl GR_NBig = -35000,r0 + getf.sig GR_N_as_int = FR_N_float_int // Get n from significand + fclass.m p8,p0 = FR_Floating_X, 0x0b // Test for x=unorm + mov GR_exp_sure_ou = 0x1000e // Exp_N where x*2^N sure over/under +} +{ .mfb + mov GR_min_exp = 0x0fc01 // Exponent of minimum double + fcvt.xf FR_N_float_int = FR_N_float_int // Convert N to FP integer +(p9) br.cond.spnt SCALB_NAN_INF_ZERO } ;; -// -// Put N if a GP register -// Convert N_float_int to floating point value -// Create 35000 -// Build the exponent Bias -// -{ .mii - getf.sig GR_N_as_int = FR_N_float_int - shl GR_Scratch = GR_Scratch,63 - addl GR_Big = 35000,r0 +{ .mmi + and GR_exp_N = GR_exp_mask, GR_signexp_N // Get exponent of N +(p7) sub GR_Big = r0, GR_Big // Limit for N + nop.i 0 } -{ .mfi - addl GR_Bias = 0x0FFFF,r0 - fcvt.xf FR_N_float_int = FR_N_float_int - nop.i 0 -};; +;; -// -// Catch those fp values that are beyond 2**64-1 -// Is N > 35000 -// Is N < -35000 -// -{ .mfi - cmp.ne.unc p9,p10 = GR_N_as_int,GR_Scratch - nop.f 0 - nop.i 0 +{ .mib + cmp.lt p9,p0 = GR_exp_N, GR_big_exp // N possible non-integer? + cmp.ge p6,p0 = GR_exp_N, GR_exp_sure_ou // N certain over/under? +(p8) br.cond.spnt SCALB_X_UNORM // Branch if x=unorm } -{ .mmi - cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big - cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig - nop.i 0 -};; +;; -// -// Is N really an int, only for those non-int indefinites? -// Create exp bias. -// -{ .mfi - add GR_N_Biased = GR_Bias,GR_N_as_int -(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int - nop.i 0 -};; +SCALB_COMMON2: +// Main path continues. Also return here from x=unorm path. +// Create biased exponent for 2**N +{ .mmi +(p6) mov GR_N_as_int = GR_Big // Limit N +;; + add GR_N_Biased = GR_Bias,GR_N_as_int + nop.i 0 +} +;; -// -// Branch and return if N is not an int. -// Main path, create 2**N -// { .mfi - setf.exp FR_Two_N = GR_N_Biased - nop.i 999 + setf.exp FR_Two_N = GR_N_Biased // Form 2**N +(p9) fcmp.neq.unc.s1 p9,p0 = FR_Norm_N, FR_N_float_int // Test if N an integer + and GR_exp_X = GR_exp_mask, GR_signexp_X // Get exponent of X } -{ .mfb - nop.m 0 -(p7) frcpa.s0 f8,p11 = f0,f0 -(p7) br.ret.spnt b0 -};; +;; // -// Set denormal on denormal input x and denormal input N +// Compute biased result exponent +// Branch if N is not an integer // -{ .mfi - nop.m 999 -(p10)fcmp.ge.s1 p6,p8 = FR_Norm_N,f0 - nop.i 0 -};; -{ .mfi - nop.m 999 - fcmp.ge.s0 p0,p11 = FR_Floating_X,f0 - nop.i 999 +{ .mib + add GR_exp_Result = GR_exp_X, GR_N_as_int + mov GR_min_den_exp = 0x0fc01 - 52 // Exponent of min denorm dble +(p9) br.cond.spnt SCALB_N_NOT_INT } -{ .mfi - nop.m 999 - fcmp.ge.s0 p12,p13 = FR_Floating_N,f0 - nop.i 0 -};; +;; // -// Adjust 2**N if N was very small or very large +// Raise Denormal operand flag with compare +// Do final operation // - { .mfi - nop.m 0 -(p6) fma.s1 FR_Two_N = FR_Big,f1,f0 - nop.i 0 + cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow + fcmp.ge.s0 p0,p11 = FR_Floating_X,FR_Floating_N // Dummy to set denorm + cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow } -{ .mlx - nop.m 999 - movl GR_Scratch = 0x00000000000303FF -};; -{ .mfi - nop.m 0 -(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0 - nop.i 0 +{ .mfb + nop.m 0 + fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 +(p9) br.cond.spnt SCALB_UNDERFLOW // Branch if certain underflow } -{ .mlx - nop.m 999 - movl GR_Scratch1= 0x00000000000103FF -};; +;; + +{ .mib +(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow +(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow +(p7) br.ret.sptk b0 // Return from main path +} +;; -// Set up necessary status fields +{ .bbb +(p6) br.cond.spnt SCALB_OVERFLOW // Branch if certain overflow +(p8) br.cond.spnt SCALB_POSSIBLE_OVERFLOW // Branch if possible overflow +(p9) br.cond.spnt SCALB_POSSIBLE_UNDERFLOW // Branch if possible underflow +} +;; + +// Here if possible underflow. +// Resulting exponent: 0x0fc01-52 <= exp_Result < 0x0fc01 +SCALB_POSSIBLE_UNDERFLOW: +// +// Here if possible overflow. +// Resulting exponent: 0x103fe = exp_Result +SCALB_POSSIBLE_OVERFLOW: + +// Set up necessary status fields // // S0 user supplied status // S2 user supplied status + WRE + TD (Overflows) // S3 user supplied status + FZ + TD (Underflows) // { .mfi - nop.m 999 - fsetc.s3 0x7F,0x41 - nop.i 999 + mov GR_pos_ov_limit = 0x103ff // Exponent for positive overflow + fsetc.s3 0x7F,0x41 + nop.i 0 } { .mfi - nop.m 999 - fsetc.s2 0x7F,0x42 - nop.i 999 -};; + mov GR_neg_ov_limit = 0x303ff // Exponent for negative overflow + fsetc.s2 0x7F,0x42 + nop.i 0 +} +;; // -// Do final operation +// Do final operation with s2 and s3 // { .mfi - setf.exp FR_NBig = GR_Scratch - fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 - nop.i 999 + setf.exp FR_NBig = GR_neg_ov_limit + fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 } { .mfi - nop.m 999 - fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; -{ .mfi - setf.exp FR_Big = GR_Scratch1 - fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; + setf.exp FR_Big = GR_pos_ov_limit + fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 +} +;; // Check for overflow or underflow. -// -// S0 user supplied status -// S2 user supplied status + WRE + TD (Overflow) -// S3 user supplied status + FZ + TD (Underflow) -// -// // Restore s3 // Restore s2 // { .mfi - nop.m 0 - fsetc.s3 0x7F,0x40 - nop.i 999 + nop.m 0 + fsetc.s3 0x7F,0x40 + nop.i 0 } { .mfi - nop.m 0 - fsetc.s2 0x7F,0x40 - nop.i 999 -};; + nop.m 0 + fsetc.s2 0x7F,0x40 + nop.i 0 +} +;; // // Is the result zero? // { .mfi - nop.m 999 - fclass.m.unc p6, p0 = FR_Result3, 0x007 - nop.i 999 -} + nop.m 0 + fclass.m p6, p0 = FR_Result3, 0x007 + nop.i 0 +} { .mfi - addl GR_Tag = 53, r0 - fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big - nop.i 0 -};; + nop.m 0 + fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big + nop.i 0 +} +;; // // Detect masked underflow - Tiny + Inexact Only // { .mfi - nop.m 999 + nop.m 0 (p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2 - nop.i 999 -};; + nop.i 0 +} +;; // // Is result bigger the allowed range? // Branch out for underflow // { .mfb -(p6) addl GR_Tag = 54, r0 + nop.m 0 (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig -(p6) br.cond.spnt SCALB_UNDERFLOW -};; +(p6) br.cond.spnt SCALB_UNDERFLOW +} +;; // // Branch out for overflow // -{ .mbb - nop.m 0 -(p7) br.cond.spnt SCALB_OVERFLOW -(p9) br.cond.spnt SCALB_OVERFLOW -};; +{ .bbb +(p7) br.cond.spnt SCALB_OVERFLOW +(p9) br.cond.spnt SCALB_OVERFLOW + br.ret.sptk b0 // Return from main path. +} +;; -// -// Return from main path. -// -{ .mfb - nop.m 999 - nop.f 0 - br.ret.sptk b0;; +// Here if result overflows +SCALB_OVERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 53, r0 // Set error tag for overflow + br.cond.sptk __libm_error_region // Call error support for overflow +} +;; + +// Here if result underflows +SCALB_UNDERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 54, r0 // Set error tag for underflow + br.cond.sptk __libm_error_region // Call error support for underflow } +;; -SCALB_NAN_INF_ZERO: +SCALB_NAN_INF_ZERO: // -// Convert N to a fp integer -// +// Before entry, N has been converted to a fp integer in significand of +// FR_N_float_int +// +// Convert N_float_int to floating point value +// { .mfi - addl GR_Scratch = 1,r0 - fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N - nop.i 999 + getf.sig GR_N_as_int = FR_N_float_int + fclass.m p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan + nop.i 0 } { .mfi - nop.m 0 - fclass.m.unc p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan - nop.i 0 -};; + addl GR_Scratch = 1,r0 + fcvt.xf FR_N_float_int = FR_N_float_int + nop.i 0 +} +;; + { .mfi - nop.m 0 - fclass.m.unc p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan - shl GR_Scratch = GR_Scratch,63 -};; + nop.m 0 + fclass.m p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan + shl GR_Scratch = GR_Scratch,63 +} +;; + { .mfi - nop.m 0 - fclass.m.unc p8,p0 = FR_Floating_N, 0x21 // @inf - nop.i 0 -} - { .mfi - nop.m 0 - fclass.m.unc p9,p0 = FR_Floating_N, 0x22 // @-inf - nop.i 0 -};; + nop.m 0 + fclass.m p8,p0 = FR_Floating_N, 0x21 // @inf + nop.i 0 +} +{ .mfi + nop.m 0 + fclass.m p9,p0 = FR_Floating_N, 0x22 // @-inf + nop.i 0 +} +;; // // Either X or N is a Nan, return result and possible raise invalid. // { .mfb - nop.m 0 -(p6) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0 + nop.m 0 +(p6) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0 (p6) br.ret.spnt b0 -};; +} +;; + { .mfb - getf.sig GR_N_as_int = FR_N_float_int -(p7) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0 + nop.m 0 +(p7) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0 (p7) br.ret.spnt b0 -};; +} +;; // // If N + Inf do something special // For N = -Inf, create Int // { .mfb - nop.m 0 -(p8) fma.d.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0 -(p8) br.ret.spnt b0 + nop.m 0 +(p8) fma.d.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0 +(p8) br.ret.spnt b0 } { .mfi - nop.m 0 -(p9) fnma.d.s0 FR_Floating_N = FR_Floating_N, f1, f0 - nop.i 0 -};; + nop.m 0 +(p9) fnma.d.s0 FR_Floating_N = FR_Floating_N, f1, f0 + nop.i 0 +} +;; // // If N==-Inf,return x/(-N) // { .mfb - nop.m 0 -(p9) frcpa.s0 FR_Result,p6 = FR_Floating_X,FR_Floating_N -(p9) br.ret.spnt b0 -};; - -// -// Convert N_float_int to floating point value -// -{ .mfi - cmp.ne.unc p9,p0 = GR_N_as_int,GR_Scratch - fcvt.xf FR_N_float_int = FR_N_float_int - nop.i 0 -};; + cmp.ne p7,p0 = GR_N_as_int,GR_Scratch +(p9) frcpa.s0 FR_Result,p0 = FR_Floating_X,FR_Floating_N +(p9) br.ret.spnt b0 +} +;; // // Is N an integer. // { .mfi - nop.m 0 -(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int - nop.i 0 -};; + nop.m 0 +(p7) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int + nop.i 0 +} +;; // // If N not an int, return NaN and raise invalid. // { .mfb - nop.m 0 -(p7) frcpa.s0 FR_Result,p6 = f0,f0 -(p7) br.ret.spnt b0 -};; + nop.m 0 +(p7) frcpa.s0 FR_Result,p0 = f0,f0 +(p7) br.ret.spnt b0 +} +;; // -// Always return x in other path. +// Always return x in other path. // { .mfb - nop.m 0 - fma.d.s0 FR_Result = FR_Floating_X,f1,f0 - br.ret.sptk b0 -};; + nop.m 0 + fma.d.s0 FR_Result = FR_Floating_X,f1,f0 + br.ret.sptk b0 +} +;; -GLOBAL_IEEE754_END(scalb) -__libm_error_region: +// Here if n not int +// Return NaN and raise invalid. +SCALB_N_NOT_INT: +{ .mfb + nop.m 0 + frcpa.s0 FR_Result,p0 = f0,f0 + br.ret.sptk b0 +} +;; + +// Here if n=unorm +SCALB_N_UNORM: +{ .mfb + getf.exp GR_signexp_N = FR_Norm_N // Get signexp of normalized n + fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N // Get N in significand + br.cond.sptk SCALB_COMMON1 // Return to main path +} +;; + +// Here if x=unorm +SCALB_X_UNORM: +{ .mib + getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x + nop.i 0 + br.cond.sptk SCALB_COMMON2 // Return to main path +} +;; -SCALB_OVERFLOW: -SCALB_UNDERFLOW: +GLOBAL_IEEE754_END(scalb) +LOCAL_LIBM_ENTRY(__libm_error_region) // // Get stack address of N // .prologue { .mfi - add GR_Parameter_Y=-32,sp + add GR_Parameter_Y=-32,sp nop.f 0 .save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs + mov GR_SAVE_PFS=ar.pfs } // -// Adjust sp +// Adjust sp // { .mfi .fframe 64 - add sp=-64,sp + add sp=-64,sp nop.f 0 - mov GR_SAVE_GP=gp + mov GR_SAVE_GP=gp };; // -// Store N on stack in correct position +// Store N on stack in correct position // Locate the address of x on stack // { .mmi - stfd [GR_Parameter_Y] = FR_Norm_N,16 - add GR_Parameter_X = 16,sp + stfd [GR_Parameter_Y] = FR_Norm_N,16 + add GR_Parameter_X = 16,sp .save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 + mov GR_SAVE_B0=b0 };; // @@ -503,42 +555,42 @@ SCALB_UNDERFLOW: // .body { .mib - stfd [GR_Parameter_X] = FR_Norm_X - add GR_Parameter_RESULT = 0,GR_Parameter_Y + stfd [GR_Parameter_X] = FR_Norm_X + add GR_Parameter_RESULT = 0,GR_Parameter_Y nop.b 0 } { .mib - stfd [GR_Parameter_Y] = FR_Result + stfd [GR_Parameter_Y] = FR_Result add GR_Parameter_Y = -16,GR_Parameter_Y - br.call.sptk b0=__libm_error_support# + br.call.sptk b0=__libm_error_support# };; // // Get location of result on stack // { .mmi + add GR_Parameter_RESULT = 48,sp nop.m 0 - nop.m 0 - add GR_Parameter_RESULT = 48,sp + nop.i 0 };; // -// Get the new result +// Get the new result // { .mmi - ldfd FR_Result = [GR_Parameter_RESULT] + ldfd FR_Result = [GR_Parameter_RESULT] .restore sp - add sp = 64,sp - mov b0 = GR_SAVE_B0 + add sp = 64,sp + mov b0 = GR_SAVE_B0 };; // // Restore gp, ar.pfs and return // { .mib - mov gp = GR_SAVE_GP - mov ar.pfs = GR_SAVE_PFS - br.ret.sptk b0 + mov gp = GR_SAVE_GP + mov ar.pfs = GR_SAVE_PFS + br.ret.sptk b0 };; LOCAL_LIBM_END(__libm_error_region) diff --git a/sysdeps/ia64/fpu/e_scalbf.S b/sysdeps/ia64/fpu/e_scalbf.S index 07acb32..e965667 100644 --- a/sysdeps/ia64/fpu/e_scalbf.S +++ b/sysdeps/ia64/fpu/e_scalbf.S @@ -21,60 +21,82 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== // 02/02/00 Initial version -// 01/26/01 Scalb completely reworked and now standalone version +// 01/26/01 Scalb completely reworked and now standalone version // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 08/06/03 Improved performance // // API //============================================================== -// float = scalbf (float x, float n) +// float = scalbf (float x, float n) // input floating point f8 and floating point f9 // output floating point f8 // +// int_type = 0 if int is 32 bits +// int_type = 1 if int is 64 bits +// // Returns x* 2**n using an fma and detects overflow -// and underflow. +// and underflow. // // +// Strategy: +// Compute biased exponent of result exp_Result = N + exp_X +// Break into ranges: +// exp_Result > 0x1007e -> Certain overflow +// exp_Result = 0x1007e -> Possible overflow +// 0x0ff81 <= exp_Result < 0x1007e -> No over/underflow (main path) +// 0x0ff81 - 23 <= exp_Result < 0x0ff81 -> Possible underflow +// exp_Result < 0x0ff81 - 23 -> Certain underflow +FR_Big = f6 +FR_NBig = f7 FR_Floating_X = f8 FR_Result = f8 FR_Floating_N = f9 FR_Result2 = f9 -FR_Norm_N = f10 -FR_Result3 = f11 -FR_Norm_X = f12 +FR_Result3 = f10 +FR_Norm_X = f11 +FR_Two_N = f12 FR_N_float_int = f13 -FR_Two_N = f14 -FR_Two_to_Big = f15 -FR_Big = f6 -FR_NBig = f7 +FR_Norm_N = f14 +GR_neg_ov_limit= r14 +GR_big_exp = r14 GR_N_Biased = r15 GR_Big = r16 -GR_NBig = r17 -GR_Scratch = r18 -GR_Scratch1 = r19 +GR_exp_Result = r18 +GR_pos_ov_limit= r19 +GR_exp_sure_ou = r19 GR_Bias = r20 GR_N_as_int = r21 +GR_signexp_X = r22 +GR_exp_X = r23 +GR_exp_mask = r24 +GR_max_exp = r25 +GR_min_exp = r26 +GR_min_den_exp = r27 +GR_Scratch = r28 +GR_signexp_N = r29 +GR_exp_N = r30 GR_SAVE_B0 = r32 GR_SAVE_GP = r33 @@ -89,412 +111,442 @@ GLOBAL_IEEE754_ENTRY(scalbf) // // Is x NAN, INF, ZERO, +-? +// Build the exponent Bias // { .mfi - alloc r32=ar.pfs,0,3,4,0 - fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero - addl GR_Scratch = 0x019C3F,r0 + getf.exp GR_signexp_N = FR_Floating_N // Get signexp of n + fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero + mov GR_Bias = 0x0ffff +} +{ .mfi + mov GR_Big = 35000 // If N this big then certain overflow + fcvt.fx.trunc.s1 FR_N_float_int = FR_Floating_N // Get N in significand + nop.i 0 +} +;; + +{ .mfi + getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x + fclass.m p7,p0 = FR_Floating_N, 0x0b // Test for n=unorm + nop.i 0 } // -// Is y a NAN, INF, ZERO, +-? +// Normalize n // { .mfi - nop.m 999 - fclass.m.unc p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf | @zero - addl GR_Scratch1 = 0x063BF,r0 + mov GR_exp_mask = 0x1ffff // Exponent mask + fnorm.s1 FR_Norm_N = FR_Floating_N + nop.i 0 } ;; // -// Convert N to a fp integer -// Normalize x +// Is n NAN, INF, ZERO, +-? // { .mfi - nop.m 0 - fnorm.s1 FR_Norm_N = FR_Floating_N - nop.i 999 + mov GR_big_exp = 0x1003e // Exponent at which n is integer + fclass.m p9,p0 = FR_Floating_N, 0xe7 // @snan | @qnan | @inf | @zero + mov GR_max_exp = 0x1007e // Exponent of maximum float } -{ .mfi - nop.m 999 - fnorm.s1 FR_Norm_X = FR_Floating_X - nop.i 999 -};; - // -// Create 2*big -// Create 2**-big // Normalize x -// Branch on special values. // -{ .mib - setf.exp FR_Big = GR_Scratch - nop.i 0 -(p6) br.cond.spnt SCALBF_NAN_INF_ZERO +{ .mfb + nop.m 0 + fnorm.s1 FR_Norm_X = FR_Floating_X +(p7) br.cond.spnt SCALBF_N_UNORM // Branch if n=unorm } -{ .mib - setf.exp FR_NBig = GR_Scratch1 - nop.i 0 -(p7) br.cond.spnt SCALBF_NAN_INF_ZERO -};; +;; -// -// Convert N to a fp integer -// Create -35000 -// +SCALBF_COMMON1: +// Main path continues. Also return here from u=unorm path. +// Handle special cases if x = Nan, Inf, Zero +{ .mfb + nop.m 0 + fcmp.lt.s1 p7,p0 = FR_Floating_N, f0 // Test N negative +(p6) br.cond.spnt SCALBF_NAN_INF_ZERO +} +;; + +// Handle special cases if n = Nan, Inf, Zero { .mfi - addl GR_Scratch = 1,r0 - fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N - addl GR_NBig = -35000,r0 + getf.sig GR_N_as_int = FR_N_float_int // Get n from significand + fclass.m p8,p0 = FR_Floating_X, 0x0b // Test for x=unorm + mov GR_exp_sure_ou = 0x1000e // Exp_N where x*2^N sure over/under +} +{ .mfb + mov GR_min_exp = 0x0ff81 // Exponent of minimum float + fcvt.xf FR_N_float_int = FR_N_float_int // Convert N to FP integer +(p9) br.cond.spnt SCALBF_NAN_INF_ZERO } ;; -// -// Put N if a GP register -// Convert N_float_int to floating point value -// Create 35000 -// Build the exponent Bias -// -{ .mii - getf.sig GR_N_as_int = FR_N_float_int - shl GR_Scratch = GR_Scratch,63 - addl GR_Big = 35000,r0 +{ .mmi + and GR_exp_N = GR_exp_mask, GR_signexp_N // Get exponent of N +(p7) sub GR_Big = r0, GR_Big // Limit for N + nop.i 0 } -{ .mfi - addl GR_Bias = 0x0FFFF,r0 - fcvt.xf FR_N_float_int = FR_N_float_int - nop.i 0 -};; +;; -// -// Catch those fp values that are beyond 2**64-1 -// Is N > 35000 -// Is N < -35000 -// -{ .mfi - cmp.ne.unc p9,p10 = GR_N_as_int,GR_Scratch - nop.f 0 - nop.i 0 +{ .mib + cmp.lt p9,p0 = GR_exp_N, GR_big_exp // N possible non-integer? + cmp.ge p6,p0 = GR_exp_N, GR_exp_sure_ou // N certain over/under? +(p8) br.cond.spnt SCALBF_X_UNORM // Branch if x=unorm } -{ .mmi - cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big - cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig - nop.i 0 -};; +;; -// -// Is N really an int, only for those non-int indefinites? -// Create exp bias. -// -{ .mfi - add GR_N_Biased = GR_Bias,GR_N_as_int -(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int - nop.i 0 -};; +SCALBF_COMMON2: +// Main path continues. Also return here from x=unorm path. +// Create biased exponent for 2**N +{ .mmi +(p6) mov GR_N_as_int = GR_Big // Limit N +;; + add GR_N_Biased = GR_Bias,GR_N_as_int + nop.i 0 +} +;; -// -// Branch and return if N is not an int. -// Main path, create 2**N -// { .mfi - setf.exp FR_Two_N = GR_N_Biased - nop.i 999 + setf.exp FR_Two_N = GR_N_Biased // Form 2**N +(p9) fcmp.neq.unc.s1 p9,p0 = FR_Norm_N, FR_N_float_int // Test if N an integer + and GR_exp_X = GR_exp_mask, GR_signexp_X // Get exponent of X } -{ .mfb - nop.m 0 -(p7) frcpa.s0 f8,p11 = f0,f0 -(p7) br.ret.spnt b0 -};; +;; // -// Set denormal on denormal input x and denormal input N +// Compute biased result exponent +// Branch if N is not an integer // -{ .mfi - nop.m 999 -(p10)fcmp.ge.s1 p6,p8 = FR_Norm_N,f0 - nop.i 0 -};; -{ .mfi - nop.m 999 - fcmp.ge.s0 p0,p11 = FR_Floating_X,f0 - nop.i 999 +{ .mib + add GR_exp_Result = GR_exp_X, GR_N_as_int + mov GR_min_den_exp = 0x0ff81 - 23 // Exponent of min denorm float +(p9) br.cond.spnt SCALBF_N_NOT_INT } -{ .mfi - nop.m 999 - fcmp.ge.s0 p12,p13 = FR_Floating_N,f0 - nop.i 0 -};; +;; // -// Adjust 2**N if N was very small or very large +// Raise Denormal operand flag with compare +// Do final operation // - { .mfi - nop.m 0 -(p6) fma.s1 FR_Two_N = FR_Big,f1,f0 - nop.i 0 + cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow + fcmp.ge.s0 p0,p11 = FR_Floating_X,FR_Floating_N // Dummy to set denorm + cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow } -{ .mlx - nop.m 999 - movl GR_Scratch = 0x000000000003007F -};; -{ .mfi - nop.m 0 -(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0 - nop.i 0 +{ .mfb + nop.m 0 + fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 +(p9) br.cond.spnt SCALBF_UNDERFLOW // Branch if certain underflow } -{ .mlx - nop.m 999 - movl GR_Scratch1= 0x000000000001007F -};; +;; + +{ .mib +(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow +(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow +(p7) br.ret.sptk b0 // Return from main path +} +;; -// Set up necessary status fields +{ .bbb +(p6) br.cond.spnt SCALBF_OVERFLOW // Branch if certain overflow +(p8) br.cond.spnt SCALBF_POSSIBLE_OVERFLOW // Branch if possible overflow +(p9) br.cond.spnt SCALBF_POSSIBLE_UNDERFLOW // Branch if possible underflow +} +;; + +// Here if possible underflow. +// Resulting exponent: 0x0ff81-23 <= exp_Result < 0x0ff81 +SCALBF_POSSIBLE_UNDERFLOW: +// +// Here if possible overflow. +// Resulting exponent: 0x1007e = exp_Result +SCALBF_POSSIBLE_OVERFLOW: + +// Set up necessary status fields // // S0 user supplied status // S2 user supplied status + WRE + TD (Overflows) // S3 user supplied status + FZ + TD (Underflows) // { .mfi - nop.m 999 - fsetc.s3 0x7F,0x41 - nop.i 999 + mov GR_pos_ov_limit = 0x1007f // Exponent for positive overflow + fsetc.s3 0x7F,0x41 + nop.i 0 } { .mfi - nop.m 999 - fsetc.s2 0x7F,0x42 - nop.i 999 -};; + mov GR_neg_ov_limit = 0x3007f // Exponent for negative overflow + fsetc.s2 0x7F,0x42 + nop.i 0 +} +;; // -// Do final operation +// Do final operation with s2 and s3 // { .mfi - setf.exp FR_NBig = GR_Scratch - fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 - nop.i 999 + setf.exp FR_NBig = GR_neg_ov_limit + fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 } { .mfi - nop.m 999 - fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; -{ .mfi - setf.exp FR_Big = GR_Scratch1 - fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; + setf.exp FR_Big = GR_pos_ov_limit + fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 +} +;; // Check for overflow or underflow. -// -// S0 user supplied status -// S2 user supplied status + WRE + TD (Overflow) -// S3 user supplied status + FZ + TD (Underflow) -// -// // Restore s3 // Restore s2 // { .mfi - nop.m 0 - fsetc.s3 0x7F,0x40 - nop.i 999 + nop.m 0 + fsetc.s3 0x7F,0x40 + nop.i 0 } { .mfi - nop.m 0 - fsetc.s2 0x7F,0x40 - nop.i 999 -};; + nop.m 0 + fsetc.s2 0x7F,0x40 + nop.i 0 +} +;; // // Is the result zero? // { .mfi - nop.m 999 - fclass.m.unc p6, p0 = FR_Result3, 0x007 - nop.i 999 -} + nop.m 0 + fclass.m p6, p0 = FR_Result3, 0x007 + nop.i 0 +} { .mfi - addl GR_Tag = 55, r0 - fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big - nop.i 0 -};; + nop.m 0 + fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big + nop.i 0 +} +;; // // Detect masked underflow - Tiny + Inexact Only // { .mfi - nop.m 999 + nop.m 0 (p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2 - nop.i 999 -};; + nop.i 0 +} +;; // // Is result bigger the allowed range? // Branch out for underflow // { .mfb -(p6) addl GR_Tag = 56, r0 + nop.m 0 (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig -(p6) br.cond.spnt SCALBF_UNDERFLOW -};; +(p6) br.cond.spnt SCALBF_UNDERFLOW +} +;; // // Branch out for overflow // -{ .mbb - nop.m 0 -(p7) br.cond.spnt SCALBF_OVERFLOW -(p9) br.cond.spnt SCALBF_OVERFLOW -};; +{ .bbb +(p7) br.cond.spnt SCALBF_OVERFLOW +(p9) br.cond.spnt SCALBF_OVERFLOW + br.ret.sptk b0 // Return from main path. +} +;; -// -// Return from main path. -// -{ .mfb - nop.m 999 - nop.f 0 - br.ret.sptk b0;; +// Here if result overflows +SCALBF_OVERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 55, r0 // Set error tag for overflow + br.cond.sptk __libm_error_region // Call error support for overflow +} +;; + +// Here if result underflows +SCALBF_UNDERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 56, r0 // Set error tag for underflow + br.cond.sptk __libm_error_region // Call error support for underflow } +;; -SCALBF_NAN_INF_ZERO: +SCALBF_NAN_INF_ZERO: // -// Convert N to a fp integer -// +// Before entry, N has been converted to a fp integer in significand of +// FR_N_float_int +// +// Convert N_float_int to floating point value +// { .mfi - addl GR_Scratch = 1,r0 - fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N - nop.i 999 + getf.sig GR_N_as_int = FR_N_float_int + fclass.m p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan + nop.i 0 } { .mfi - nop.m 0 - fclass.m.unc p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan - nop.i 0 -};; + addl GR_Scratch = 1,r0 + fcvt.xf FR_N_float_int = FR_N_float_int + nop.i 0 +} +;; + { .mfi - nop.m 0 - fclass.m.unc p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan - shl GR_Scratch = GR_Scratch,63 -};; + nop.m 0 + fclass.m p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan + shl GR_Scratch = GR_Scratch,63 +} +;; + { .mfi - nop.m 0 - fclass.m.unc p8,p0 = FR_Floating_N, 0x21 // @inf - nop.i 0 -} - { .mfi - nop.m 0 - fclass.m.unc p9,p0 = FR_Floating_N, 0x22 // @-inf - nop.i 0 -};; + nop.m 0 + fclass.m p8,p0 = FR_Floating_N, 0x21 // @inf + nop.i 0 +} +{ .mfi + nop.m 0 + fclass.m p9,p0 = FR_Floating_N, 0x22 // @-inf + nop.i 0 +} +;; // // Either X or N is a Nan, return result and possible raise invalid. // { .mfb - nop.m 0 -(p6) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0 + nop.m 0 +(p6) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0 (p6) br.ret.spnt b0 -};; +} +;; + { .mfb - getf.sig GR_N_as_int = FR_N_float_int -(p7) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0 + nop.m 0 +(p7) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0 (p7) br.ret.spnt b0 -};; +} +;; // // If N + Inf do something special // For N = -Inf, create Int // { .mfb - nop.m 0 -(p8) fma.s.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0 -(p8) br.ret.spnt b0 + nop.m 0 +(p8) fma.s.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0 +(p8) br.ret.spnt b0 } { .mfi - nop.m 0 -(p9) fnma.s.s0 FR_Floating_N = FR_Floating_N, f1, f0 - nop.i 0 -};; + nop.m 0 +(p9) fnma.s.s0 FR_Floating_N = FR_Floating_N, f1, f0 + nop.i 0 +} +;; // // If N==-Inf,return x/(-N) // { .mfb - nop.m 0 -(p9) frcpa.s0 FR_Result,p6 = FR_Floating_X,FR_Floating_N -(p9) br.ret.spnt b0 -};; - -// -// Convert N_float_int to floating point value -// -{ .mfi - cmp.ne.unc p9,p0 = GR_N_as_int,GR_Scratch - fcvt.xf FR_N_float_int = FR_N_float_int - nop.i 0 -};; + cmp.ne p7,p0 = GR_N_as_int,GR_Scratch +(p9) frcpa.s0 FR_Result,p0 = FR_Floating_X,FR_Floating_N +(p9) br.ret.spnt b0 +} +;; // // Is N an integer. // { .mfi - nop.m 0 -(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int - nop.i 0 -};; + nop.m 0 +(p7) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int + nop.i 0 +} +;; // // If N not an int, return NaN and raise invalid. // { .mfb - nop.m 0 -(p7) frcpa.s0 FR_Result,p6 = f0,f0 -(p7) br.ret.spnt b0 -};; + nop.m 0 +(p7) frcpa.s0 FR_Result,p0 = f0,f0 +(p7) br.ret.spnt b0 +} +;; // -// Always return x in other path. +// Always return x in other path. // { .mfb - nop.m 0 - fma.s.s0 FR_Result = FR_Floating_X,f1,f0 - br.ret.sptk b0 -};; + nop.m 0 + fma.s.s0 FR_Result = FR_Floating_X,f1,f0 + br.ret.sptk b0 +} +;; -GLOBAL_IEEE754_END(scalbf) -__libm_error_region: +// Here if n not int +// Return NaN and raise invalid. +SCALBF_N_NOT_INT: +{ .mfb + nop.m 0 + frcpa.s0 FR_Result,p0 = f0,f0 + br.ret.sptk b0 +} +;; + +// Here if n=unorm +SCALBF_N_UNORM: +{ .mfb + getf.exp GR_signexp_N = FR_Norm_N // Get signexp of normalized n + fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N // Get N in significand + br.cond.sptk SCALBF_COMMON1 // Return to main path +} +;; + +// Here if x=unorm +SCALBF_X_UNORM: +{ .mib + getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x + nop.i 0 + br.cond.sptk SCALBF_COMMON2 // Return to main path +} +;; -SCALBF_OVERFLOW: -SCALBF_UNDERFLOW: +GLOBAL_IEEE754_END(scalbf) +LOCAL_LIBM_ENTRY(__libm_error_region) // // Get stack address of N // .prologue { .mfi - add GR_Parameter_Y=-32,sp + add GR_Parameter_Y=-32,sp nop.f 0 .save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs + mov GR_SAVE_PFS=ar.pfs } // -// Adjust sp +// Adjust sp // { .mfi .fframe 64 - add sp=-64,sp + add sp=-64,sp nop.f 0 - mov GR_SAVE_GP=gp + mov GR_SAVE_GP=gp };; // -// Store N on stack in correct position +// Store N on stack in correct position // Locate the address of x on stack // { .mmi - stfs [GR_Parameter_Y] = FR_Norm_N,16 - add GR_Parameter_X = 16,sp + stfs [GR_Parameter_Y] = FR_Norm_N,16 + add GR_Parameter_X = 16,sp .save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 + mov GR_SAVE_B0=b0 };; // @@ -503,42 +555,42 @@ SCALBF_UNDERFLOW: // .body { .mib - stfs [GR_Parameter_X] = FR_Norm_X - add GR_Parameter_RESULT = 0,GR_Parameter_Y + stfs [GR_Parameter_X] = FR_Norm_X + add GR_Parameter_RESULT = 0,GR_Parameter_Y nop.b 0 } { .mib - stfs [GR_Parameter_Y] = FR_Result + stfs [GR_Parameter_Y] = FR_Result add GR_Parameter_Y = -16,GR_Parameter_Y - br.call.sptk b0=__libm_error_support# + br.call.sptk b0=__libm_error_support# };; // // Get location of result on stack // { .mmi + add GR_Parameter_RESULT = 48,sp nop.m 0 - nop.m 0 - add GR_Parameter_RESULT = 48,sp + nop.i 0 };; // -// Get the new result +// Get the new result // { .mmi - ldfs FR_Result = [GR_Parameter_RESULT] + ldfs FR_Result = [GR_Parameter_RESULT] .restore sp - add sp = 64,sp - mov b0 = GR_SAVE_B0 + add sp = 64,sp + mov b0 = GR_SAVE_B0 };; // // Restore gp, ar.pfs and return // { .mib - mov gp = GR_SAVE_GP - mov ar.pfs = GR_SAVE_PFS - br.ret.sptk b0 + mov gp = GR_SAVE_GP + mov ar.pfs = GR_SAVE_PFS + br.ret.sptk b0 };; LOCAL_LIBM_END(__libm_error_region) diff --git a/sysdeps/ia64/fpu/e_scalbl.S b/sysdeps/ia64/fpu/e_scalbl.S index d22d029..9b6467f 100644 --- a/sysdeps/ia64/fpu/e_scalbl.S +++ b/sysdeps/ia64/fpu/e_scalbl.S @@ -21,60 +21,82 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== // 02/02/00 Initial version -// 01/26/01 Scalb completely reworked and now standalone version +// 01/26/01 Scalb completely reworked and now standalone version // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 08/06/03 Improved performance // // API //============================================================== -// double-extended = scalbl (double-extended x, double-extended n) +// long double = scalbl (long double x, long double n) // input floating point f8 and floating point f9 // output floating point f8 // +// int_type = 0 if int is 32 bits +// int_type = 1 if int is 64 bits +// // Returns x* 2**n using an fma and detects overflow -// and underflow. +// and underflow. // // +// Strategy: +// Compute biased exponent of result exp_Result = N + exp_X +// Break into ranges: +// exp_Result > 0x13ffe -> Certain overflow +// exp_Result = 0x13ffe -> Possible overflow +// 0x0c001 <= exp_Result < 0x13ffe -> No over/underflow (main path) +// 0x0c001 - 63 <= exp_Result < 0x0c001 -> Possible underflow +// exp_Result < 0x0c001 - 63 -> Certain underflow +FR_Big = f6 +FR_NBig = f7 FR_Floating_X = f8 FR_Result = f8 FR_Floating_N = f9 FR_Result2 = f9 -FR_Norm_N = f10 -FR_Result3 = f11 -FR_Norm_X = f12 +FR_Result3 = f10 +FR_Norm_X = f11 +FR_Two_N = f12 FR_N_float_int = f13 -FR_Two_N = f14 -FR_Two_to_Big = f15 -FR_Big = f6 -FR_NBig = f7 +FR_Norm_N = f14 +GR_neg_ov_limit= r14 +GR_big_exp = r14 GR_N_Biased = r15 GR_Big = r16 -GR_NBig = r17 -GR_Scratch = r18 -GR_Scratch1 = r19 +GR_exp_Result = r18 +GR_pos_ov_limit= r19 +GR_exp_sure_ou = r19 GR_Bias = r20 GR_N_as_int = r21 +GR_signexp_X = r22 +GR_exp_X = r23 +GR_exp_mask = r24 +GR_max_exp = r25 +GR_min_exp = r26 +GR_min_den_exp = r27 +GR_Scratch = r28 +GR_signexp_N = r29 +GR_exp_N = r30 GR_SAVE_B0 = r32 GR_SAVE_GP = r33 @@ -89,412 +111,442 @@ GLOBAL_IEEE754_ENTRY(scalbl) // // Is x NAN, INF, ZERO, +-? +// Build the exponent Bias // { .mfi - alloc r32=ar.pfs,0,3,4,0 - fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero - addl GR_Scratch = 0x019C3F,r0 + getf.exp GR_signexp_N = FR_Floating_N // Get signexp of n + fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero + mov GR_Bias = 0x0ffff +} +{ .mfi + mov GR_Big = 35000 // If N this big then certain overflow + fcvt.fx.trunc.s1 FR_N_float_int = FR_Floating_N // Get N in significand + nop.i 0 +} +;; + +{ .mfi + getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x + fclass.m p7,p0 = FR_Floating_N, 0x0b // Test for n=unorm + nop.i 0 } // -// Is y a NAN, INF, ZERO, +-? +// Normalize n // { .mfi - nop.m 999 - fclass.m.unc p6,p0 = FR_Floating_N, 0xe7 //@snan | @qnan | @inf | @zero - addl GR_Scratch1 = 0x063BF,r0 + mov GR_exp_mask = 0x1ffff // Exponent mask + fnorm.s1 FR_Norm_N = FR_Floating_N + nop.i 0 } ;; // -// Convert N to a fp integer -// Normalize x +// Is n NAN, INF, ZERO, +-? // { .mfi - nop.m 0 - fnorm.s1 FR_Norm_N = FR_Floating_N - nop.i 999 + mov GR_big_exp = 0x1003e // Exponent at which n is integer + fclass.m p9,p0 = FR_Floating_N, 0xe7 // @snan | @qnan | @inf | @zero + mov GR_max_exp = 0x13ffe // Exponent of maximum long double } -{ .mfi - nop.m 999 - fnorm.s1 FR_Norm_X = FR_Floating_X - nop.i 999 -};; - // -// Create 2*big -// Create 2**-big // Normalize x -// Branch on special values. // -{ .mib - setf.exp FR_Big = GR_Scratch - nop.i 0 -(p6) br.cond.spnt SCALBL_NAN_INF_ZERO +{ .mfb + nop.m 0 + fnorm.s1 FR_Norm_X = FR_Floating_X +(p7) br.cond.spnt SCALBL_N_UNORM // Branch if n=unorm } -{ .mib - setf.exp FR_NBig = GR_Scratch1 - nop.i 0 -(p7) br.cond.spnt SCALBL_NAN_INF_ZERO -};; +;; -// -// Convert N to a fp integer -// Create -35000 -// +SCALBL_COMMON1: +// Main path continues. Also return here from u=unorm path. +// Handle special cases if x = Nan, Inf, Zero +{ .mfb + nop.m 0 + fcmp.lt.s1 p7,p0 = FR_Floating_N, f0 // Test N negative +(p6) br.cond.spnt SCALBL_NAN_INF_ZERO +} +;; + +// Handle special cases if n = Nan, Inf, Zero { .mfi - addl GR_Scratch = 1,r0 - fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N - addl GR_NBig = -35000,r0 + getf.sig GR_N_as_int = FR_N_float_int // Get n from significand + fclass.m p8,p0 = FR_Floating_X, 0x0b // Test for x=unorm + mov GR_exp_sure_ou = 0x1000e // Exp_N where x*2^N sure over/under +} +{ .mfb + mov GR_min_exp = 0x0c001 // Exponent of minimum long double + fcvt.xf FR_N_float_int = FR_N_float_int // Convert N to FP integer +(p9) br.cond.spnt SCALBL_NAN_INF_ZERO } ;; -// -// Put N if a GP register -// Convert N_float_int to floating point value -// Create 35000 -// Build the exponent Bias -// -{ .mii - getf.sig GR_N_as_int = FR_N_float_int - shl GR_Scratch = GR_Scratch,63 - addl GR_Big = 35000,r0 +{ .mmi + and GR_exp_N = GR_exp_mask, GR_signexp_N // Get exponent of N +(p7) sub GR_Big = r0, GR_Big // Limit for N + nop.i 0 } -{ .mfi - addl GR_Bias = 0x0FFFF,r0 - fcvt.xf FR_N_float_int = FR_N_float_int - nop.i 0 -};; +;; -// -// Catch those fp values that are beyond 2**64-1 -// Is N > 35000 -// Is N < -35000 -// -{ .mfi - cmp.ne.unc p9,p10 = GR_N_as_int,GR_Scratch - nop.f 0 - nop.i 0 +{ .mib + cmp.lt p9,p0 = GR_exp_N, GR_big_exp // N possible non-integer? + cmp.ge p6,p0 = GR_exp_N, GR_exp_sure_ou // N certain over/under? +(p8) br.cond.spnt SCALBL_X_UNORM // Branch if x=unorm } -{ .mmi - cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big - cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig - nop.i 0 -};; +;; -// -// Is N really an int, only for those non-int indefinites? -// Create exp bias. -// -{ .mfi - add GR_N_Biased = GR_Bias,GR_N_as_int -(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int - nop.i 0 -};; +SCALBL_COMMON2: +// Main path continues. Also return here from x=unorm path. +// Create biased exponent for 2**N +{ .mmi +(p6) mov GR_N_as_int = GR_Big // Limit N +;; + add GR_N_Biased = GR_Bias,GR_N_as_int + nop.i 0 +} +;; -// -// Branch and return if N is not an int. -// Main path, create 2**N -// { .mfi - setf.exp FR_Two_N = GR_N_Biased - nop.i 999 + setf.exp FR_Two_N = GR_N_Biased // Form 2**N +(p9) fcmp.neq.unc.s1 p9,p0 = FR_Norm_N, FR_N_float_int // Test if N an integer + and GR_exp_X = GR_exp_mask, GR_signexp_X // Get exponent of X } -{ .mfb - nop.m 0 -(p7) frcpa.s0 f8,p11 = f0,f0 -(p7) br.ret.spnt b0 -};; +;; // -// Set denormal on denormal input x and denormal input N +// Compute biased result exponent +// Branch if N is not an integer // -{ .mfi - nop.m 999 -(p10)fcmp.ge.s1 p6,p8 = FR_Norm_N,f0 - nop.i 0 -};; -{ .mfi - nop.m 999 - fcmp.ge.s0 p0,p11 = FR_Floating_X,f0 - nop.i 999 +{ .mib + add GR_exp_Result = GR_exp_X, GR_N_as_int + mov GR_min_den_exp = 0x0c001 - 63 // Exp of min denorm long dble +(p9) br.cond.spnt SCALBL_N_NOT_INT } -{ .mfi - nop.m 999 - fcmp.ge.s0 p12,p13 = FR_Floating_N,f0 - nop.i 0 -};; +;; // -// Adjust 2**N if N was very small or very large +// Raise Denormal operand flag with compare +// Do final operation // - { .mfi - nop.m 0 -(p6) fma.s1 FR_Two_N = FR_Big,f1,f0 - nop.i 0 + cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow + fcmp.ge.s0 p0,p11 = FR_Floating_X,FR_Floating_N // Dummy to set denorm + cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow } -{ .mlx - nop.m 999 - movl GR_Scratch = 0x0000000000033FFF -};; -{ .mfi - nop.m 0 -(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0 - nop.i 0 +{ .mfb + nop.m 0 + fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 +(p9) br.cond.spnt SCALBL_UNDERFLOW // Branch if certain underflow } -{ .mlx - nop.m 999 - movl GR_Scratch1= 0x0000000000013FFF -};; +;; + +{ .mib +(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow +(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow +(p7) br.ret.sptk b0 // Return from main path +} +;; -// Set up necessary status fields +{ .bbb +(p6) br.cond.spnt SCALBL_OVERFLOW // Branch if certain overflow +(p8) br.cond.spnt SCALBL_POSSIBLE_OVERFLOW // Branch if possible overflow +(p9) br.cond.spnt SCALBL_POSSIBLE_UNDERFLOW // Branch if possible underflow +} +;; + +// Here if possible underflow. +// Resulting exponent: 0x0c001-63 <= exp_Result < 0x0c001 +SCALBL_POSSIBLE_UNDERFLOW: +// +// Here if possible overflow. +// Resulting exponent: 0x13ffe = exp_Result +SCALBL_POSSIBLE_OVERFLOW: + +// Set up necessary status fields // // S0 user supplied status // S2 user supplied status + WRE + TD (Overflows) // S3 user supplied status + FZ + TD (Underflows) // { .mfi - nop.m 999 - fsetc.s3 0x7F,0x41 - nop.i 999 + mov GR_pos_ov_limit = 0x13fff // Exponent for positive overflow + fsetc.s3 0x7F,0x41 + nop.i 0 } { .mfi - nop.m 999 - fsetc.s2 0x7F,0x42 - nop.i 999 -};; + mov GR_neg_ov_limit = 0x33fff // Exponent for negative overflow + fsetc.s2 0x7F,0x42 + nop.i 0 +} +;; // -// Do final operation +// Do final operation with s2 and s3 // { .mfi - setf.exp FR_NBig = GR_Scratch - fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 - nop.i 999 + setf.exp FR_NBig = GR_neg_ov_limit + fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 } { .mfi - nop.m 999 - fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; -{ .mfi - setf.exp FR_Big = GR_Scratch1 - fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; + setf.exp FR_Big = GR_pos_ov_limit + fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 +} +;; // Check for overflow or underflow. -// -// S0 user supplied status -// S2 user supplied status + WRE + TD (Overflow) -// S3 user supplied status + FZ + TD (Underflow) -// -// // Restore s3 // Restore s2 // { .mfi - nop.m 0 - fsetc.s3 0x7F,0x40 - nop.i 999 + nop.m 0 + fsetc.s3 0x7F,0x40 + nop.i 0 } { .mfi - nop.m 0 - fsetc.s2 0x7F,0x40 - nop.i 999 -};; + nop.m 0 + fsetc.s2 0x7F,0x40 + nop.i 0 +} +;; // // Is the result zero? // { .mfi - nop.m 999 - fclass.m.unc p6, p0 = FR_Result3, 0x007 - nop.i 999 -} + nop.m 0 + fclass.m p6, p0 = FR_Result3, 0x007 + nop.i 0 +} { .mfi - addl GR_Tag = 51, r0 - fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big - nop.i 0 -};; + nop.m 0 + fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big + nop.i 0 +} +;; // // Detect masked underflow - Tiny + Inexact Only // { .mfi - nop.m 999 + nop.m 0 (p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2 - nop.i 999 -};; + nop.i 0 +} +;; // // Is result bigger the allowed range? // Branch out for underflow // { .mfb -(p6) addl GR_Tag = 52, r0 + nop.m 0 (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig -(p6) br.cond.spnt SCALBL_UNDERFLOW -};; +(p6) br.cond.spnt SCALBL_UNDERFLOW +} +;; // // Branch out for overflow // -{ .mbb - nop.m 0 -(p7) br.cond.spnt SCALBL_OVERFLOW -(p9) br.cond.spnt SCALBL_OVERFLOW -};; +{ .bbb +(p7) br.cond.spnt SCALBL_OVERFLOW +(p9) br.cond.spnt SCALBL_OVERFLOW + br.ret.sptk b0 // Return from main path. +} +;; -// -// Return from main path. -// -{ .mfb - nop.m 999 - nop.f 0 - br.ret.sptk b0;; +// Here if result overflows +SCALBL_OVERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 51, r0 // Set error tag for overflow + br.cond.sptk __libm_error_region // Call error support for overflow +} +;; + +// Here if result underflows +SCALBL_UNDERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 52, r0 // Set error tag for underflow + br.cond.sptk __libm_error_region // Call error support for underflow } +;; -SCALBL_NAN_INF_ZERO: +SCALBL_NAN_INF_ZERO: // -// Convert N to a fp integer -// +// Before entry, N has been converted to a fp integer in significand of +// FR_N_float_int +// +// Convert N_float_int to floating point value +// { .mfi - addl GR_Scratch = 1,r0 - fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N - nop.i 999 + getf.sig GR_N_as_int = FR_N_float_int + fclass.m p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan + nop.i 0 } { .mfi - nop.m 0 - fclass.m.unc p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan - nop.i 0 -};; + addl GR_Scratch = 1,r0 + fcvt.xf FR_N_float_int = FR_N_float_int + nop.i 0 +} +;; + { .mfi - nop.m 0 - fclass.m.unc p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan - shl GR_Scratch = GR_Scratch,63 -};; + nop.m 0 + fclass.m p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan + shl GR_Scratch = GR_Scratch,63 +} +;; + { .mfi - nop.m 0 - fclass.m.unc p8,p0 = FR_Floating_N, 0x21 // @inf - nop.i 0 -} - { .mfi - nop.m 0 - fclass.m.unc p9,p0 = FR_Floating_N, 0x22 // @-inf - nop.i 0 -};; + nop.m 0 + fclass.m p8,p0 = FR_Floating_N, 0x21 // @inf + nop.i 0 +} +{ .mfi + nop.m 0 + fclass.m p9,p0 = FR_Floating_N, 0x22 // @-inf + nop.i 0 +} +;; // // Either X or N is a Nan, return result and possible raise invalid. // { .mfb - nop.m 0 -(p6) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0 + nop.m 0 +(p6) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0 (p6) br.ret.spnt b0 -};; +} +;; + { .mfb - getf.sig GR_N_as_int = FR_N_float_int -(p7) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0 + nop.m 0 +(p7) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0 (p7) br.ret.spnt b0 -};; +} +;; // // If N + Inf do something special // For N = -Inf, create Int // { .mfb - nop.m 0 -(p8) fma.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0 -(p8) br.ret.spnt b0 + nop.m 0 +(p8) fma.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0 +(p8) br.ret.spnt b0 } { .mfi - nop.m 0 -(p9) fnma.s0 FR_Floating_N = FR_Floating_N, f1, f0 - nop.i 0 -};; + nop.m 0 +(p9) fnma.s0 FR_Floating_N = FR_Floating_N, f1, f0 + nop.i 0 +} +;; // // If N==-Inf,return x/(-N) // { .mfb - nop.m 0 -(p9) frcpa.s0 FR_Result,p6 = FR_Floating_X,FR_Floating_N -(p9) br.ret.spnt b0 -};; - -// -// Convert N_float_int to floating point value -// -{ .mfi - cmp.ne.unc p9,p0 = GR_N_as_int,GR_Scratch - fcvt.xf FR_N_float_int = FR_N_float_int - nop.i 0 -};; + cmp.ne p7,p0 = GR_N_as_int,GR_Scratch +(p9) frcpa.s0 FR_Result,p0 = FR_Floating_X,FR_Floating_N +(p9) br.ret.spnt b0 +} +;; // // Is N an integer. // { .mfi - nop.m 0 -(p9) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int - nop.i 0 -};; + nop.m 0 +(p7) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int + nop.i 0 +} +;; // // If N not an int, return NaN and raise invalid. // { .mfb - nop.m 0 -(p7) frcpa.s0 FR_Result,p6 = f0,f0 -(p7) br.ret.spnt b0 -};; + nop.m 0 +(p7) frcpa.s0 FR_Result,p0 = f0,f0 +(p7) br.ret.spnt b0 +} +;; // -// Always return x in other path. +// Always return x in other path. // { .mfb - nop.m 0 - fma.s0 FR_Result = FR_Floating_X,f1,f0 - br.ret.sptk b0 -};; + nop.m 0 + fma.s0 FR_Result = FR_Floating_X,f1,f0 + br.ret.sptk b0 +} +;; -GLOBAL_IEEE754_END(scalbl) -__libm_error_region: +// Here if n not int +// Return NaN and raise invalid. +SCALBL_N_NOT_INT: +{ .mfb + nop.m 0 + frcpa.s0 FR_Result,p0 = f0,f0 + br.ret.sptk b0 +} +;; + +// Here if n=unorm +SCALBL_N_UNORM: +{ .mfb + getf.exp GR_signexp_N = FR_Norm_N // Get signexp of normalized n + fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N // Get N in significand + br.cond.sptk SCALBL_COMMON1 // Return to main path +} +;; + +// Here if x=unorm +SCALBL_X_UNORM: +{ .mib + getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x + nop.i 0 + br.cond.sptk SCALBL_COMMON2 // Return to main path +} +;; -SCALBL_OVERFLOW: -SCALBL_UNDERFLOW: +GLOBAL_IEEE754_END(scalbl) +LOCAL_LIBM_ENTRY(__libm_error_region) // // Get stack address of N // .prologue { .mfi - add GR_Parameter_Y=-32,sp + add GR_Parameter_Y=-32,sp nop.f 0 .save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs + mov GR_SAVE_PFS=ar.pfs } // -// Adjust sp +// Adjust sp // { .mfi .fframe 64 - add sp=-64,sp + add sp=-64,sp nop.f 0 - mov GR_SAVE_GP=gp + mov GR_SAVE_GP=gp };; // -// Store N on stack in correct position +// Store N on stack in correct position // Locate the address of x on stack // { .mmi - stfe [GR_Parameter_Y] = FR_Norm_N,16 - add GR_Parameter_X = 16,sp + stfe [GR_Parameter_Y] = FR_Norm_N,16 + add GR_Parameter_X = 16,sp .save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 + mov GR_SAVE_B0=b0 };; // @@ -503,42 +555,42 @@ SCALBL_UNDERFLOW: // .body { .mib - stfe [GR_Parameter_X] = FR_Norm_X - add GR_Parameter_RESULT = 0,GR_Parameter_Y + stfe [GR_Parameter_X] = FR_Norm_X + add GR_Parameter_RESULT = 0,GR_Parameter_Y nop.b 0 } { .mib - stfe [GR_Parameter_Y] = FR_Result + stfe [GR_Parameter_Y] = FR_Result add GR_Parameter_Y = -16,GR_Parameter_Y - br.call.sptk b0=__libm_error_support# + br.call.sptk b0=__libm_error_support# };; // // Get location of result on stack // { .mmi + add GR_Parameter_RESULT = 48,sp nop.m 0 - nop.m 0 - add GR_Parameter_RESULT = 48,sp + nop.i 0 };; // -// Get the new result +// Get the new result // { .mmi - ldfe FR_Result = [GR_Parameter_RESULT] + ldfe FR_Result = [GR_Parameter_RESULT] .restore sp - add sp = 64,sp - mov b0 = GR_SAVE_B0 + add sp = 64,sp + mov b0 = GR_SAVE_B0 };; // // Restore gp, ar.pfs and return // { .mib - mov gp = GR_SAVE_GP - mov ar.pfs = GR_SAVE_PFS - br.ret.sptk b0 + mov gp = GR_SAVE_GP + mov ar.pfs = GR_SAVE_PFS + br.ret.sptk b0 };; LOCAL_LIBM_END(__libm_error_region) diff --git a/sysdeps/ia64/fpu/e_sinh.S b/sysdeps/ia64/fpu/e_sinh.S index 84c312c..5910d4a 100644 --- a/sysdeps/ia64/fpu/e_sinh.S +++ b/sysdeps/ia64/fpu/e_sinh.S @@ -850,6 +850,7 @@ SINH_UNORM: GLOBAL_IEEE754_END(sinh) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_sinhf.S b/sysdeps/ia64/fpu/e_sinhf.S index 4a407b7..d01d830 100644 --- a/sysdeps/ia64/fpu/e_sinhf.S +++ b/sysdeps/ia64/fpu/e_sinhf.S @@ -689,6 +689,7 @@ SINH_UNORM: GLOBAL_IEEE754_END(sinhf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/e_sinhl.S b/sysdeps/ia64/fpu/e_sinhl.S index ccc996a..5b4a4ad 100644 --- a/sysdeps/ia64/fpu/e_sinhl.S +++ b/sysdeps/ia64/fpu/e_sinhl.S @@ -1055,6 +1055,7 @@ SINH_HUGE: GLOBAL_IEEE754_END(sinhl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/e_sqrt.S b/sysdeps/ia64/fpu/e_sqrt.S index 0e208b3..53e60ef 100644 --- a/sysdeps/ia64/fpu/e_sqrt.S +++ b/sysdeps/ia64/fpu/e_sqrt.S @@ -252,6 +252,7 @@ GLOBAL_IEEE754_ENTRY(sqrt) } // END DOUBLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM GLOBAL_IEEE754_END(sqrt) + // Stack operations when calling error support. // (1) (2) (3) (call) (4) // sp -> + psp -> + psp -> + sp -> + diff --git a/sysdeps/ia64/fpu/e_sqrtf.S b/sysdeps/ia64/fpu/e_sqrtf.S index bee0df7..daa2045 100644 --- a/sysdeps/ia64/fpu/e_sqrtf.S +++ b/sysdeps/ia64/fpu/e_sqrtf.S @@ -204,6 +204,7 @@ GLOBAL_IEEE754_ENTRY(sqrtf) // GLOBAL_IEEE754_END(sqrtf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mii diff --git a/sysdeps/ia64/fpu/e_sqrtl.S b/sysdeps/ia64/fpu/e_sqrtl.S index ec14756..6a5735d 100644 --- a/sysdeps/ia64/fpu/e_sqrtl.S +++ b/sysdeps/ia64/fpu/e_sqrtl.S @@ -221,6 +221,7 @@ alloc r32= ar.pfs,0,5,4,0 // END DOUBLE EXTENDED PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM GLOBAL_IEEE754_END(sqrtl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/gen_import_file_list b/sysdeps/ia64/fpu/gen_import_file_list index a02bb31..b8bd6a5 100644 --- a/sysdeps/ia64/fpu/gen_import_file_list +++ b/sysdeps/ia64/fpu/gen_import_file_list @@ -16,8 +16,18 @@ import_c() { echo "$1 $libm_dir/$2 $3" } -import_c DUMMY libm_support.h libm_support.h -import_c DUMMY libm_error.c libm_error.c +dummy_files=" +libm_cpu_defs.h +libm_error_codes.h +libm_support.h +libm_error.c +" + +for f in $dummy_files +do + import_c DUMMY $f $f +done + import_c scalblnf scalblnf.c s_scalblnf.c for f in acos acosh asin atanh cosh exp2 exp10 fmod log2 pow remainder \ diff --git a/sysdeps/ia64/fpu/import_file.awk b/sysdeps/ia64/fpu/import_file.awk index c6335dc..97fe77e 100644 --- a/sysdeps/ia64/fpu/import_file.awk +++ b/sysdeps/ia64/fpu/import_file.awk @@ -7,10 +7,13 @@ BEGIN { getline; while (!match($0, "^// WARRANTY DISCLAIMER")) { print; - getline; + if (!getline) { + break; + } } - getline; - printf \ + if (getline) + { + printf \ "// Redistribution and use in source and binary forms, with or without\n" \ "// modification, are permitted provided that the following conditions are\n" \ "// met:\n" \ @@ -25,10 +28,11 @@ BEGIN { "// * The name of Intel Corporation may not be used to endorse or promote\n" \ "// products derived from this software without specific prior written\n" \ "// permission.\n\n"; - if (LICENSE_ONLY == "y") { - do { - print; - } while (getline); + if (LICENSE_ONLY == "y") { + do { + print; + } while (getline); + } } } @@ -115,7 +119,6 @@ BEGIN { print getline; } - getline; printf "%s_END(%s)\n", type, name; if (match(name, "^exp10[fl]?$")) { t=substr(name,6) diff --git a/sysdeps/ia64/fpu/import_intel_libm b/sysdeps/ia64/fpu/import_intel_libm index 752ba37..1aaa646 100644 --- a/sysdeps/ia64/fpu/import_intel_libm +++ b/sysdeps/ia64/fpu/import_intel_libm @@ -16,6 +16,7 @@ import_s() { # $2 = source file-name # $3 = destination file-name echo "Importing $1 from $2 -> $3" + rm -f $3 awk -f import_file.awk FUNC=$1 $2 > $3 } @@ -24,19 +25,82 @@ import_c() { # $2 = source file-name # $3 = destination file-name echo "Importing $1 from $2 -> $3" + rm -f $3 awk -f import_file.awk LICENSE_ONLY=y $2 > $3 } do_imports() { while read func_pattern src_file dst_file; do - if [ "$(expr $src_file : '.*\(c\)$')" = "c" ]; then + case $src_file in + *.[ch]) import_c "$func_pattern" "$src_file" "$dst_file" - else + ;; + *) import_s "$func_pattern" "$src_file" "$dst_file" - fi + ;; + esac done } ./gen_import_file_list $libm_dir > import_file_list do_imports < import_file_list + +emptyfiles=" +e_gamma_r.c +e_gammaf_r.c +e_gammal_r.c +s_sincos.c +s_sincosf.c +s_sincosl.c +t_exp.c +w_acosh.c +w_acoshf.c +w_acoshl.c +w_atanh.c +w_atanhf.c +w_atanhl.c +w_exp10.c +w_exp10f.c +w_exp10l.c +w_exp2.c +w_exp2f.c +w_exp2l.c +w_expl.c +w_lgamma_r.c +w_lgammaf_r.c +w_lgammal_r.c +w_log2.c +w_log2f.c +w_log2l.c +w_sinh.c +w_sinhf.c +w_sinhl.c +" +for f in $emptyfiles +do + rm -f $f + echo "/* Not needed. */" > $f +done + +removedfiles=" +libm_atan2_reg.S +s_ldexp.S +s_ldexpf.S +s_ldexpl.S +s_scalbn.S +s_scalbnf.S +s_scalbnl.S +" + +rm -f $removedfiles + +for f in lgammaf_r.c lgammal_r.c lgamma_r.c +do + import_c $f $libm_dir/$f e_$f +done + +for f in lgamma.c lgammaf.c lgammal.c +do + import_c $f $libm_dir/$f w_$f +done diff --git a/sysdeps/ia64/fpu/libm_error.c b/sysdeps/ia64/fpu/libm_error.c index 42ca36d..a7f9dae 100644 --- a/sysdeps/ia64/fpu/libm_error.c +++ b/sysdeps/ia64/fpu/libm_error.c @@ -1,7 +1,7 @@ /* file: libm_error.c */ -// Copyright (c) 2000 - 2003, Intel Corporation +// Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -23,12 +23,12 @@ // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS @@ -41,8 +41,8 @@ // History //============================================================== // 2/02/00: Initial version -// 3/22/00: Updated to support flexible and dynamic error handling. -// 8/16/00: Changed all matherr function-calls to use the pmatherr +// 3/22/00: Updated to support flexible and dynamic error handling. +// 8/16/00: Changed all matherr function-calls to use the pmatherr // function-pointers. // 10/03/00: Corrected a scalb type. // 11/28/00: Changed INPUT_XL to INPUT_XD for scalb_underflow case. @@ -61,15 +61,30 @@ // 01/28/02: Corrected SVID/XOPEN stderr message for log2 // 05/20/02: Added code for cot // 07/01/02: Added code for sinhcosh -// 10/04/02: Underflow detection in ISOC path redefined to -// be zero rather than tiny and inexact +// 10/04/02: Underflow detection in ISOC path redefined to +// be zero rather than tiny and inexact // 12/06/02: Added code for annuity and compound // 01/30/03: Corrected test for underflow in ISOC path to not set denormal // 04/10/03: Corrected ISOC branch for gamma/lgamma to return ERANGE for neg ints. // Added code for tgamma -// 04/11/03: Corrected POSIX/SVID/XOPEN branches for gamma/lgamma +// 04/11/03: Corrected POSIX/SVID/XOPEN branches for gamma/lgamma // to return EDOM for neg ints. -// +// 09/08/03: Corrected XOPEN/SVID result for pow overflow with neg x, pos y. +// 10/14/03: Added ILP32 ifdef +// 12/12/03: Corrected XOPEN/SVID results for powf_zero_to_negative, +// powl_neg_to_non_integer, atan2f_zero, atan2df_zero, +// acoshf_lt_one, acosh_lt_one. +// 12/07/04: Cast name strings as char *. +// 12/08/04: Corrected POSIX behavior for atan2_zero, acos_gt_one, asin_gt_one, +// log_negative, log10_negative, log1p_negative, and log2_negative. +// Added SVID and XOPEN case log2l_zero. +// 12/13/04: Corrected POSIX behavior for exp2_overflow, exp2_underflow, +// exp10_overflow, exp10_underflow. Added ISOC to set errno for +// exp10_underflow. +// 12/14/04: Corrected POSIX behavior for nextafter_overflow, +// nextafter_underflow, nexttoward_overflow, nexttoward_underflow. +// Added ISOC to set errno for nextafter and nexttoward underflow. +// 12/15/04: Corrected POSIX behavior for exp, exp2, and exp10 underflow. #include <errno.h> #include <stdio.h> @@ -102,13 +117,13 @@ int (*pmatherr)(struct EXC_DECL_D*) = MATHERR_D; int (*pmatherrl)(struct exceptionl*) = matherrl; void __libm_setusermatherrf( int(*user_merrf)(struct exceptionf*) ) -{ pmatherrf = ( (user_merrf==NULL)? (MATHERR_F) : (user_merrf) ); } +{ pmatherrf = ( (user_merrf==NULL)? (MATHERR_F) : (user_merrf) ); } void __libm_setusermatherr( int(*user_merr)(struct EXC_DECL_D*) ) -{ pmatherr = ( (user_merr==NULL)? (MATHERR_D) : (user_merr) ); } +{ pmatherr = ( (user_merr==NULL)? (MATHERR_D) : (user_merr) ); } void __libm_setusermatherrl( int(*user_merrl)(struct exceptionl*) ) -{ pmatherrl = ( (user_merrl==NULL)? (matherrl) : (user_merrl) ); } +{ pmatherrl = ( (user_merrl==NULL)? (matherrl) : (user_merrl) ); } #endif /* !_LIBC */ @@ -120,14 +135,14 @@ void __libm_error_support(void *arg1,void *arg2,void *retval,error_types input_t # ifdef __cplusplus struct __exception exc; -# else +# else struct exception exc; -# endif +# endif struct exceptionf excf; struct exceptionl excl; -# if defined(__GNUC__) +# ifdef __GNUC__ #define ALIGNIT __attribute__ ((__aligned__ (16))) # elif defined opensource #define ALIGNIT @@ -137,75 +152,87 @@ struct exceptionl excl; # ifdef SIZE_LONG_INT_64 #define __INT_64__ signed long -# else +# else +# if ILP32 +#define __INT_64__ signed long long +# else #define __INT_64__ __int64 # endif +# endif + -const char float_inf[4] = {0x00,0x00,0x80,0x7F}; -const char float_huge[4] = {0xFF,0xFF,0x7F,0x7F}; -const char float_zero[4] = {0x00,0x00,0x00,0x00}; -const char float_neg_inf[4] = {0x00,0x00,0x80,0xFF}; -const char float_neg_huge[4] = {0xFF,0xFF,0x7F,0xFF}; -const char float_neg_zero[4] = {0x00,0x00,0x00,0x80}; +#define STATIC static + +STATIC const char float_inf[4] = {0x00,0x00,0x80,0x7F}; +STATIC const char float_huge[4] = {0xFF,0xFF,0x7F,0x7F}; +STATIC const char float_zero[4] = {0x00,0x00,0x00,0x00}; +STATIC const char float_neg_inf[4] = {0x00,0x00,0x80,0xFF}; +STATIC const char float_neg_huge[4] = {0xFF,0xFF,0x7F,0xFF}; +STATIC const char float_neg_zero[4] = {0x00,0x00,0x00,0x80}; ALIGNIT -const char double_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x7F}; -#if 0 /* unused */ +STATIC const char double_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x7F}; +#ifndef _LIBC ALIGNIT -const char double_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0x7F}; +STATIC const char double_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0x7F}; #endif ALIGNIT -const char double_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; +STATIC const char double_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; ALIGNIT -const char double_neg_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF}; -#if 0 /* unused */ +STATIC const char double_neg_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF}; +#ifndef _LIBC ALIGNIT -const char double_neg_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0xFF}; +STATIC const char double_neg_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0xFF}; #endif ALIGNIT -const char double_neg_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80}; +STATIC const char double_neg_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80}; ALIGNIT -const char long_double_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0x7F,0x00,0x00,0x00,0x00,0x00,0x00}; -#if 0 /* unused */ +STATIC const char long_double_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0x7F,0x00,0x00,0x00,0x00,0x00,0x00}; ALIGNIT -const char long_double_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0x7F,0x00,0x00,0x00,0x00,0x00,0x00}; +#ifndef _LIBC +STATIC const char long_double_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0x7F,0x00,0x00,0x00,0x00,0x00,0x00}; #endif ALIGNIT -const char long_double_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; +STATIC const char long_double_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; ALIGNIT -const char long_double_neg_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00}; -#if 0 /* unused */ +STATIC const char long_double_neg_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00}; ALIGNIT -const char long_double_neg_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0x00,0x00,0x00,0x00,0x00,0x00}; +#ifndef _LIBC +STATIC const char long_double_neg_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0x00,0x00,0x00,0x00,0x00,0x00}; #endif ALIGNIT -const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00,0x00}; +STATIC const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00,0x00}; + -#define RETVAL_HUGE_VALL *(long double *)retval = *(long double *)long_double_inf -#define RETVAL_NEG_HUGE_VALL *(long double *)retval = *(long double *)long_double_neg_inf -#define RETVAL_HUGEL *(long double *)retval = (long double)*(float *)float_huge -#define RETVAL_NEG_HUGEL *(long double *)retval =(long double)*(float*)float_neg_huge +#define RETVAL_HUGE_VALL *(long double *)retval = *(long double *)long_double_inf +#define RETVAL_NEG_HUGE_VALL *(long double *)retval = *(long double *)long_double_neg_inf +#define RETVAL_HUGEL *(long double *)retval = (long double)*(float *)float_huge +#define RETVAL_NEG_HUGEL *(long double *)retval =(long double)*(float*)float_neg_huge #define RETVAL_HUGE_VALD *(double *)retval = *(double *) double_inf #define RETVAL_NEG_HUGE_VALD *(double *)retval = *(double *) double_neg_inf #define RETVAL_HUGED *(double *)retval = (double) *(float *)float_huge -#define RETVAL_NEG_HUGED *(double *)retval = (double) *(float *) float_neg_huge +#define RETVAL_NEG_HUGED *(double *)retval = (double) *(float *) float_neg_huge #define RETVAL_HUGE_VALF *(float *)retval = *(float *) float_inf #define RETVAL_NEG_HUGE_VALF *(float *)retval = *(float *) float_neg_inf #define RETVAL_HUGEF *(float *)retval = *(float *) float_huge -#define RETVAL_NEG_HUGEF *(float *)retval = *(float *) float_neg_huge +#define RETVAL_NEG_HUGEF *(float *)retval = *(float *) float_neg_huge -#define RETVAL_ZEROL *(long double *)retval = *(long double *)long_double_zero -#define RETVAL_ZEROD *(double *)retval = *(double *)double_zero -#define RETVAL_ZEROF *(float *)retval = *(float *)float_zero +#define ZEROL_VALUE *(long double *)long_double_zero +#define ZEROD_VALUE *(double *)double_zero +#define ZEROF_VALUE *(float *)float_zero -#define RETVAL_NEG_ZEROL *(long double *)retval = *(long double *)long_double_neg_zero -#define RETVAL_NEG_ZEROD *(double *)retval = *(double *)double_neg_zero -#define RETVAL_NEG_ZEROF *(float *)retval = *(float *)float_neg_zero +#define RETVAL_ZEROL *(long double *)retval = *(long double *)long_double_zero +#define RETVAL_ZEROD *(double *)retval = *(double *)double_zero +#define RETVAL_ZEROF *(float *)retval = *(float *)float_zero -#define RETVAL_ONEL *(long double *)retval = (long double) 1.0 -#define RETVAL_ONED *(double *)retval = 1.0 -#define RETVAL_ONEF *(float *)retval = 1.0f +#define RETVAL_NEG_ZEROL *(long double *)retval = *(long double *)long_double_neg_zero +#define RETVAL_NEG_ZEROD *(double *)retval = *(double *)double_neg_zero +#define RETVAL_NEG_ZEROF *(float *)retval = *(float *)float_neg_zero + +#define RETVAL_ONEL *(long double *)retval = (long double) 1.0 +#define RETVAL_ONED *(double *)retval = 1.0 +#define RETVAL_ONEF *(float *)retval = 1.0f #define NOT_MATHERRL excl.arg1=*(long double *)arg1;excl.arg2=*(long double *)arg2;excl.retval=*(long double *)retval;if(!pmatherrl(&excl)) #define NOT_MATHERRD exc.arg1=*(double *)arg1;exc.arg2=*(double *)arg2;exc.retval=*(double *)retval;if(!pmatherr(&exc)) @@ -213,9 +240,9 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0 #define ifSVID if(_LIB_VERSIONIMF==_SVID_) -#define NAMEL excl.name -#define NAMED exc.name -#define NAMEF excf.name +#define NAMEL excl.name +#define NAMED exc.name +#define NAMEF excf.name // // These should work OK for MS because they are ints - @@ -230,20 +257,20 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0 #define PLOSS 6 #define SINGL excl.type = SING -#define DOMAINL excl.type = DOMAIN -#define OVERFLOWL excl.type = OVERFLOW -#define UNDERFLOWL excl.type = UNDERFLOW -#define TLOSSL excl.type = TLOSS +#define DOMAINL excl.type = DOMAIN +#define OVERFLOWL excl.type = OVERFLOW +#define UNDERFLOWL excl.type = UNDERFLOW +#define TLOSSL excl.type = TLOSS #define SINGD exc.type = SING -#define DOMAIND exc.type = DOMAIN -#define OVERFLOWD exc.type = OVERFLOW -#define UNDERFLOWD exc.type = UNDERFLOW -#define TLOSSD exc.type = TLOSS +#define DOMAIND exc.type = DOMAIN +#define OVERFLOWD exc.type = OVERFLOW +#define UNDERFLOWD exc.type = UNDERFLOW +#define TLOSSD exc.type = TLOSS #define SINGF excf.type = SING -#define DOMAINF excf.type = DOMAIN -#define OVERFLOWF excf.type = OVERFLOW -#define UNDERFLOWF excf.type = UNDERFLOW -#define TLOSSF excf.type = TLOSS +#define DOMAINF excf.type = DOMAIN +#define OVERFLOWF excf.type = OVERFLOW +#define UNDERFLOWF excf.type = UNDERFLOW +#define TLOSSF excf.type = TLOSS #define INPUT_XL (excl.arg1=*(long double*)arg1) #define INPUT_XD (exc.arg1=*(double*)arg1) @@ -251,9 +278,10 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0 #define INPUT_YL (excl.arg2=*(long double*)arg2) #define INPUT_YD (exc.arg2=*(double*)arg2) #define INPUT_YF (excf.arg2=*(float*)arg2) -#define INPUT_RESL (*(long double *)retval) +#define INPUT_RESL (*(long double *)retval) #define INPUT_RESD (*(double *)retval) #define INPUT_RESF (*(float *)retval) +#define INPUT_RESI64 (*(__INT_64__ *)retval) #define WRITEL_LOG_ZERO fputs("logl: SING error\n",stderr) #define WRITED_LOG_ZERO fputs("log: SING error\n",stderr) @@ -271,7 +299,7 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0 #define WRITED_Y1_ZERO fputs("y1: DOMAIN error\n",stderr) #define WRITEF_Y1_ZERO fputs("y1f: DOMAIN error\n",stderr) #define WRITEL_Y1_NEGATIVE fputs("y1l: DOMAIN error\n",stderr) -#define WRITED_Y1_NEGATIUE fputs("y1: DOMAIN error\n",stderr) +#define WRITED_Y1_NEGATIVE fputs("y1: DOMAIN error\n",stderr) #define WRITEF_Y1_NEGATIVE fputs("y1f: DOMAIN error\n",stderr) #define WRITEL_YN_ZERO fputs("ynl: DOMAIN error\n",stderr) #define WRITED_YN_ZERO fputs("yn: DOMAIN error\n",stderr) @@ -286,13 +314,13 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0 #define WRITED_LOG1P_NEGATIVE fputs("log1p: DOMAIN error\n",stderr) #define WRITEF_LOG1P_NEGATIVE fputs("log1pf: DOMAIN error\n",stderr) #define WRITEL_LOG10_ZERO fputs("log10l: SING error\n",stderr) -#define WRITED_LOG10_ZERO fputs("log10: SING error\n",stderr) +#define WRITED_LOG10_ZERO fputs("log10: SING error\n",stderr) #define WRITEF_LOG10_ZERO fputs("log10f: SING error\n",stderr) #define WRITEL_LOG10_NEGATIVE fputs("log10l: DOMAIN error\n",stderr) #define WRITED_LOG10_NEGATIVE fputs("log10: DOMAIN error\n",stderr) #define WRITEF_LOG10_NEGATIVE fputs("log10f: DOMAIN error\n",stderr) #define WRITEL_LOG2_ZERO fputs("log2l: SING error\n",stderr) -#define WRITED_LOG2_ZERO fputs("log2: SING error\n",stderr) +#define WRITED_LOG2_ZERO fputs("log2: SING error\n",stderr) #define WRITEF_LOG2_ZERO fputs("log2f: SING error\n",stderr) #define WRITEL_LOG2_NEGATIVE fputs("log2l: DOMAIN error\n",stderr) #define WRITED_LOG2_NEGATIVE fputs("log2: DOMAIN error\n",stderr) @@ -339,9 +367,9 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0 #define WRITEL_GAMMA_NEGATIVE fputs("gammal: SING error\n",stderr) #define WRITED_GAMMA_NEGATIVE fputs("gamma: SING error\n",stderr) #define WRITEF_GAMMA_NEGATIVE fputs("gammaf: SING error\n",stderr) -#define WRITEL_TGAMMA_NEGATIVE fputs("tgammal: DOMAIN error\n",stderr) -#define WRITED_TGAMMA_NEGATIVE fputs("tgamma: DOMAIN error\n",stderr) -#define WRITEF_TGAMMA_NEGATIVE fputs("tgammaf: DOMAIN error\n",stderr) +#define WRITEL_TGAMMA_NEGATIVE fputs("tgammal: SING error\n",stderr) +#define WRITED_TGAMMA_NEGATIVE fputs("tgamma: SING error\n",stderr) +#define WRITEF_TGAMMA_NEGATIVE fputs("tgammaf: SING error\n",stderr) #define WRITEL_J0_TLOSS fputs("j0l: TLOSS error\n",stderr) #define WRITEL_Y0_TLOSS fputs("y0l: TLOSS error\n",stderr) #define WRITEL_J1_TLOSS fputs("j1l: TLOSS error\n",stderr) @@ -379,7 +407,7 @@ if(_LIB_VERSIONIMF==_IEEE_) return; /***********************/ /* C9X Path */ /***********************/ -else if(_LIB_VERSIONIMF==_ISOC_) +else if(_LIB_VERSIONIMF==_ISOC_) { switch(input_tag) { @@ -396,29 +424,29 @@ else if(_LIB_VERSIONIMF==_ISOC_) case log1p_zero: case log1pf_zero: case powl_overflow: - case pow_overflow: - case powf_overflow: + case pow_overflow: + case powf_overflow: case expl_overflow: - case exp_overflow: - case expf_overflow: + case exp_overflow: + case expf_overflow: case exp2l_overflow: - case exp2_overflow: - case exp2f_overflow: + case exp2_overflow: + case exp2f_overflow: case exp10l_overflow: - case exp10_overflow: - case exp10f_overflow: + case exp10_overflow: + case exp10f_overflow: case expm1l_overflow: - case expm1_overflow: - case expm1f_overflow: + case expm1_overflow: + case expm1f_overflow: case hypotl_overflow: case hypot_overflow: case hypotf_overflow: - case sinhl_overflow: - case sinh_overflow: - case sinhf_overflow: - case atanhl_eq_one: - case atanh_eq_one: - case atanhf_eq_one: + case sinhl_overflow: + case sinh_overflow: + case sinhf_overflow: + case atanhl_eq_one: + case atanh_eq_one: + case atanhf_eq_one: case scalbl_overflow: case scalb_overflow: case scalbf_overflow: @@ -428,9 +456,15 @@ else if(_LIB_VERSIONIMF==_ISOC_) case nextafterl_overflow: case nextafter_overflow: case nextafterf_overflow: + case nextafterl_underflow: + case nextafter_underflow: + case nextafterf_underflow: case nexttowardl_overflow: case nexttoward_overflow: case nexttowardf_overflow: + case nexttowardl_underflow: + case nexttoward_underflow: + case nexttowardf_underflow: case scalbnl_overflow: case scalbn_overflow: case scalbnf_overflow: @@ -453,35 +487,35 @@ else if(_LIB_VERSIONIMF==_ISOC_) case gamma_negative: case gammaf_negative: case ilogbl_zero: - case ilogb_zero: + case ilogb_zero: case ilogbf_zero: case fdiml_overflow: - case fdim_overflow: + case fdim_overflow: case fdimf_overflow: case llrintl_large: - case llrint_large: + case llrint_large: case llrintf_large: case llroundl_large: - case llround_large: + case llround_large: case llroundf_large: case lrintl_large: - case lrint_large: + case lrint_large: case lrintf_large: case lroundl_large: - case lround_large: + case lround_large: case lroundf_large: case tandl_overflow: - case tand_overflow: + case tand_overflow: case tandf_overflow: case cotdl_overflow: - case cotd_overflow: + case cotd_overflow: case cotdf_overflow: case cotl_overflow: - case cot_overflow: + case cot_overflow: case cotf_overflow: - case sinhcoshl_overflow: - case sinhcosh_overflow: - case sinhcoshf_overflow: + case sinhcoshl_overflow: + case sinhcosh_overflow: + case sinhcoshf_overflow: case annuityl_overflow: case annuity_overflow: case annuityf_overflow: @@ -496,6 +530,7 @@ else if(_LIB_VERSIONIMF==_ISOC_) } case powl_underflow: case expl_underflow: + case exp10l_underflow: case exp2l_underflow: case scalbl_underflow: case scalbnl_underflow: @@ -505,35 +540,43 @@ else if(_LIB_VERSIONIMF==_ISOC_) case annuityl_underflow: case compoundl_underflow: { - if ( *(__INT_64__*)retval == 0 ) ERRNO_RANGE; + /* Test for zero by testing 64 significand bits for zero. An integer + test is needed so denormal flag is not set by a floating-point test */ + if ( INPUT_RESI64 == 0 ) ERRNO_RANGE; break; } - case pow_underflow: - case exp_underflow: - case exp2_underflow: + case pow_underflow: + case exp_underflow: + case exp10_underflow: + case exp2_underflow: case scalb_underflow: case scalbn_underflow: case scalbln_underflow: case ldexp_underflow: - case erfc_underflow: + case erfc_underflow: case annuity_underflow: case compound_underflow: { - if ( ((*(__INT_64__*)retval)<<1) == 0 ) ERRNO_RANGE; + /* Test for zero by testing exp and significand bits for zero. An integer + test is needed so denormal flag is not set by a floating-point test */ + if ( (INPUT_RESI64 << 1) == 0 ) ERRNO_RANGE; break; } - case powf_underflow: - case expf_underflow: - case exp2f_underflow: + case powf_underflow: + case expf_underflow: + case exp10f_underflow: + case exp2f_underflow: case scalbf_underflow: case scalbnf_underflow: case scalblnf_underflow: case ldexpf_underflow: - case erfcf_underflow: + case erfcf_underflow: case annuityf_underflow: case compoundf_underflow: { - if ( ((*(__INT_64__*)retval)<<33) == 0 ) ERRNO_RANGE; + /* Test for zero by testing exp and significand bits for zero. An integer + test is needed so denormal flag is not set by a floating-point test */ + if ( (INPUT_RESI64 << 33) == 0 ) ERRNO_RANGE; break; } case logl_negative: @@ -563,17 +606,17 @@ else if(_LIB_VERSIONIMF==_ISOC_) case fmodl_by_zero: case fmod_by_zero: case fmodf_by_zero: - case atanhl_gt_one: - case atanh_gt_one: - case atanhf_gt_one: - case acosl_gt_one: - case acos_gt_one: - case acosf_gt_one: - case asinl_gt_one: - case asin_gt_one: - case asinf_gt_one: + case atanhl_gt_one: + case atanh_gt_one: + case atanhf_gt_one: + case acosl_gt_one: + case acos_gt_one: + case acosf_gt_one: + case asinl_gt_one: + case asin_gt_one: + case asinf_gt_one: case logbl_zero: - case logb_zero: + case logb_zero: case logbf_zero: case acoshl_lt_one: case acosh_lt_one: @@ -596,12 +639,12 @@ else if(_LIB_VERSIONIMF==_ISOC_) case ynl_negative: case yn_negative: case ynf_negative: - case acosdl_gt_one: - case acosd_gt_one: - case acosdf_gt_one: - case asindl_gt_one: - case asind_gt_one: - case asindf_gt_one: + case acosdl_gt_one: + case acosd_gt_one: + case acosdf_gt_one: + case asindl_gt_one: + case asind_gt_one: + case asindf_gt_one: case atan2dl_zero: case atan2d_zero: case atan2df_zero: @@ -656,10 +699,10 @@ switch(input_tag) RETVAL_HUGE_VALF; ERRNO_RANGE; break; } case gammal_negative: - case lgammal_negative: case gamma_negative: - case lgamma_negative: case gammaf_negative: + case lgammal_negative: + case lgamma_negative: case lgammaf_negative: case tgammal_negative: case tgamma_negative: @@ -686,49 +729,61 @@ switch(input_tag) case scalblnf_overflow: case scalblnf_underflow: case tandl_overflow: - case tand_overflow: + case tand_overflow: case tandf_overflow: case cotdl_overflow: - case cotd_overflow: + case cotd_overflow: case cotdf_overflow: case cotl_overflow: - case cot_overflow: + case cot_overflow: case cotf_overflow: - case sinhcoshl_overflow: - case sinhcosh_overflow: - case sinhcoshf_overflow: + case sinhcoshl_overflow: + case sinhcosh_overflow: + case sinhcoshf_overflow: + case nextafterl_overflow: + case nextafter_overflow: + case nextafterf_overflow: + case nextafterl_underflow: + case nextafter_underflow: + case nextafterf_underflow: + case nexttowardl_overflow: + case nexttoward_overflow: + case nexttowardf_overflow: + case nexttowardl_underflow: + case nexttoward_underflow: + case nexttowardf_underflow: { ERRNO_RANGE; break; } - case atanhl_gt_one: - case atanhl_eq_one: + case atanhl_gt_one: + case atanhl_eq_one: /* atanhl(|x| >= 1) */ { ERRNO_DOMAIN; break; } - case atanh_gt_one: - case atanh_eq_one: + case atanh_gt_one: + case atanh_eq_one: /* atanh(|x| >= 1) */ { ERRNO_DOMAIN; break; } - case atanhf_gt_one: - case atanhf_eq_one: + case atanhf_gt_one: + case atanhf_eq_one: /* atanhf(|x| >= 1) */ { ERRNO_DOMAIN; break; } - case sqrtl_negative: + case sqrtl_negative: /* sqrtl(x < 0) */ { ERRNO_DOMAIN; break; } - case sqrt_negative: + case sqrt_negative: /* sqrt(x < 0) */ { ERRNO_DOMAIN; break; } - case sqrtf_negative: + case sqrtf_negative: /* sqrtf(x < 0) */ { ERRNO_DOMAIN; break; @@ -767,7 +822,10 @@ switch(input_tag) /* y1l(x < 0) */ /* ynl(x < 0) */ { - RETVAL_NEG_HUGE_VALL; ERRNO_DOMAIN; break; +#ifndef _LIBC + RETVAL_NEG_HUGE_VALL; +#endif + ERRNO_DOMAIN; break; } case y0_negative: case y1_negative: @@ -777,7 +835,7 @@ switch(input_tag) /* yn(x < 0) */ { RETVAL_NEG_HUGE_VALD; ERRNO_DOMAIN; break; - } + } case y0f_negative: case y1f_negative: case ynf_negative: @@ -786,14 +844,15 @@ switch(input_tag) /* ynf(x < 0) */ { RETVAL_NEG_HUGE_VALF; ERRNO_DOMAIN; break; - } + } case logl_zero: case log1pl_zero: case log10l_zero: case log2l_zero: /* logl(0) */ - /* log1pl(0) */ + /* log1pl(-1) */ /* log10l(0) */ + /* log2l(0) */ { RETVAL_NEG_HUGE_VALL; ERRNO_RANGE; break; } @@ -802,8 +861,9 @@ switch(input_tag) case log10_zero: case log2_zero: /* log(0) */ - /* log1p(0) */ + /* log1p(-1) */ /* log10(0) */ + /* log2(0) */ { RETVAL_NEG_HUGE_VALD; ERRNO_RANGE; break; } @@ -812,8 +872,9 @@ switch(input_tag) case log10f_zero: case log2f_zero: /* logf(0) */ - /* log1pf(0) */ + /* log1pf(-1) */ /* log10f(0) */ + /* log2f(0) */ { RETVAL_NEG_HUGE_VALF; ERRNO_RANGE; break; } @@ -822,12 +883,10 @@ switch(input_tag) case log10l_negative: case log2l_negative: /* logl(x < 0) */ - /* log1pl(x < 0) */ + /* log1pl(x < -1) */ /* log10l(x < 0) */ + /* log2l(x < 0) */ { -#ifndef _LIBC - RETVAL_NEG_HUGE_VALL; -#endif ERRNO_DOMAIN; break; } case log_negative: @@ -835,65 +894,74 @@ switch(input_tag) case log10_negative: case log2_negative: /* log(x < 0) */ - /* log1p(x < 0) */ + /* log1p(x < -1) */ /* log10(x < 0) */ + /* log2(x < 0) */ { -#ifndef _LIBC - RETVAL_NEG_HUGE_VALD; -#endif ERRNO_DOMAIN; break; - } + } case logf_negative: case log1pf_negative: case log10f_negative: case log2f_negative: /* logf(x < 0) */ - /* log1pf(x < 0) */ + /* log1pf(x < -1) */ /* log10f(x < 0) */ + /* log2f(x < 0) */ { -#ifndef _LIBC - RETVAL_NEG_HUGE_VALF; -#endif ERRNO_DOMAIN; break; - } + } case expl_overflow: - case exp2l_overflow: case exp10l_overflow: + case exp2l_overflow: /* expl overflow */ + /* exp10l overflow */ + /* exp2l overflow */ { RETVAL_HUGE_VALL; ERRNO_RANGE; break; } case exp_overflow: - case exp2_overflow: case exp10_overflow: + case exp2_overflow: /* exp overflow */ + /* exp10 overflow */ + /* exp2 overflow */ { RETVAL_HUGE_VALD; ERRNO_RANGE; break; } case expf_overflow: - case exp2f_overflow: case exp10f_overflow: + case exp2f_overflow: /* expf overflow */ { RETVAL_HUGE_VALF; ERRNO_RANGE; break; } case expl_underflow: + case exp10l_underflow: case exp2l_underflow: /* expl underflow */ + /* exp10l underflow */ + /* exp2l underflow */ { - RETVAL_ZEROL; ERRNO_RANGE; break; + ERRNO_RANGE; break; } case exp_underflow: + case exp10_underflow: case exp2_underflow: /* exp underflow */ + /* exp10 underflow */ + /* exp2 underflow */ { - RETVAL_ZEROD; ERRNO_RANGE; break; + ERRNO_RANGE; break; } case expf_underflow: + case exp10f_underflow: case exp2f_underflow: /* expf underflow */ + /* exp10f underflow */ + /* exp2f underflow */ { - RETVAL_ZEROF; ERRNO_RANGE; break; + ERRNO_RANGE; break; } case j0l_gt_loss: case y0l_gt_loss: @@ -945,16 +1013,16 @@ switch(input_tag) case compoundl_overflow: /* powl(x,y) overflow */ { - if (INPUT_RESL < 0) RETVAL_NEG_HUGE_VALL; + if (INPUT_RESL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGE_VALL; else RETVAL_HUGE_VALL; - ERRNO_RANGE; break; + ERRNO_RANGE; break; } case pow_overflow: case annuity_overflow: case compound_overflow: /* pow(x,y) overflow */ { - if (INPUT_RESD < 0) RETVAL_NEG_HUGE_VALD; + if (INPUT_RESD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGE_VALD; else RETVAL_HUGE_VALD; ERRNO_RANGE; break; } @@ -963,7 +1031,7 @@ switch(input_tag) case compoundf_overflow: /* powf(x,y) overflow */ { - if (INPUT_RESF < 0) RETVAL_NEG_HUGE_VALF; + if (INPUT_RESF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGE_VALF; else RETVAL_HUGE_VALF; ERRNO_RANGE; break; } @@ -1038,7 +1106,7 @@ switch(input_tag) /* Special Error */ { break; - } + } case pow_nan_to_zero: /* pow(NaN,0.0) */ { @@ -1051,51 +1119,24 @@ switch(input_tag) } case atan2l_zero: case atan2dl_zero: + /* atan2l(0,0) */ /* atan2dl(0,0) */ { -#ifndef _LIBC - RETVAL_ZEROL; -#else - /* XXX arg1 and arg2 are switched!!!! */ - if (signbit (*(long double *) arg1)) - /* y == -0 */ - *(long double *) retval = __libm_copysignl (M_PIl, *(long double *) arg2); - else - *(long double *) retval = *(long double *) arg2; -#endif - ERRNO_DOMAIN; break; + break; } case atan2_zero: case atan2d_zero: + /* atan2(0,0) */ /* atan2d(0,0) */ { -#ifndef _LIBC - RETVAL_ZEROD; -#else - /* XXX arg1 and arg2 are switched!!!! */ - if (signbit (*(double *) arg1)) - /* y == -0 */ - *(double *) retval = __libm_copysign (M_PI, *(double *) arg2); - else - *(double *) retval = *(double *) arg2; -#endif - ERRNO_DOMAIN; break; + break; } case atan2f_zero: case atan2df_zero: /* atan2f(0,0) */ /* atan2df(0,0) */ { -#ifndef _LIBC - RETVAL_ZEROF; -#else - if (signbit (*(float *) arg2)) - /* y == -0 */ - *(float *) retval = __libm_copysignf (M_PI, *(float *) arg1); - else - *(float *) retval = *(float *) arg1; -#endif - ERRNO_DOMAIN; break; + break; } case expm1l_overflow: /* expm1 overflow */ @@ -1145,42 +1186,42 @@ switch(input_tag) case scalbl_underflow: /* scalbl underflow */ { - if (INPUT_XL < 0) RETVAL_NEG_ZEROL; + if (INPUT_XL < ZEROL_VALUE /*0*/) RETVAL_NEG_ZEROL; else RETVAL_ZEROL; ERRNO_RANGE; break; } case scalb_underflow: /* scalb underflow */ { - if (INPUT_XD < 0) RETVAL_NEG_ZEROD; + if (INPUT_XD < ZEROD_VALUE /*0*/) RETVAL_NEG_ZEROD; else RETVAL_ZEROD; ERRNO_RANGE; break; } case scalbf_underflow: /* scalbf underflow */ { - if (INPUT_XF < 0) RETVAL_NEG_ZEROF; + if (INPUT_XF < ZEROF_VALUE /*0*/) RETVAL_NEG_ZEROF; else RETVAL_ZEROF; ERRNO_RANGE; break; } case scalbl_overflow: /* scalbl overflow */ { - if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL; + if (INPUT_XL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGE_VALL; else RETVAL_HUGE_VALL; ERRNO_RANGE; break; } case scalb_overflow: /* scalb overflow */ { - if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD; + if (INPUT_XD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGE_VALD; else RETVAL_HUGE_VALD; ERRNO_RANGE; break; } case scalbf_overflow: /* scalbf overflow */ { - if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF; + if (INPUT_XF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGE_VALF; else RETVAL_HUGE_VALF; ERRNO_RANGE; break; } @@ -1204,9 +1245,6 @@ switch(input_tag) /* acosl(x > 1) */ /* acosdl(x > 1) */ { -#ifndef _LIBC - RETVAL_ZEROL; -#endif ERRNO_DOMAIN; break; } case acos_gt_one: @@ -1214,9 +1252,6 @@ switch(input_tag) /* acos(x > 1) */ /* acosd(x > 1) */ { -#ifndef _LIBC - RETVAL_ZEROD; -#endif ERRNO_DOMAIN; break; } case acosf_gt_one: @@ -1224,9 +1259,6 @@ switch(input_tag) /* acosf(x > 1) */ /* acosdf(x > 1) */ { -#ifndef _LIBC - RETVAL_ZEROF; -#endif ERRNO_DOMAIN; break; } case asinl_gt_one: @@ -1234,9 +1266,6 @@ switch(input_tag) /* asinl(x > 1) */ /* asindl(x > 1) */ { -#ifndef _LIBC - RETVAL_ZEROL; -#endif ERRNO_DOMAIN; break; } case asin_gt_one: @@ -1244,18 +1273,13 @@ switch(input_tag) /* asin(x > 1) */ /* asind(x > 1) */ { -#ifndef _LIBC - RETVAL_ZEROD; -#endif ERRNO_DOMAIN; break; } case asinf_gt_one: case asindf_gt_one: + /* asinf(x > 1) */ /* asindf(x > 1) */ { -#ifndef _LIBC - RETVAL_ZEROF; -#endif ERRNO_DOMAIN; break; } case remainderl_by_zero: @@ -1291,33 +1315,24 @@ switch(input_tag) { RETVAL_HUGE_VALF; ERRNO_RANGE; break; } - case nextafterl_overflow: - case nextafter_overflow: - case nextafterf_overflow: - case nexttowardl_overflow: - case nexttoward_overflow: - case nexttowardf_overflow: - { - ERRNO_RANGE; break; - } case sinhl_overflow: /* sinhl overflows */ { - if (INPUT_XL > 0) RETVAL_HUGE_VALL; + if (INPUT_XL > ZEROL_VALUE /*0*/) RETVAL_HUGE_VALL; else RETVAL_NEG_HUGE_VALL; ERRNO_RANGE; break; } case sinh_overflow: /* sinh overflows */ { - if (INPUT_XD > 0) RETVAL_HUGE_VALD; + if (INPUT_XD > ZEROD_VALUE /*0*/) RETVAL_HUGE_VALD; else RETVAL_NEG_HUGE_VALD; ERRNO_RANGE; break; } case sinhf_overflow: /* sinhf overflows */ { - if (INPUT_XF > 0) RETVAL_HUGE_VALF; + if (INPUT_XF > ZEROF_VALUE /*0*/) RETVAL_HUGE_VALF; else RETVAL_NEG_HUGE_VALF; ERRNO_RANGE; break; } @@ -1361,7 +1376,7 @@ return; /*******************************/ /* __SVID__ and __XOPEN__ Path */ /*******************************/ -else +else { switch(input_tag) { @@ -1384,13 +1399,13 @@ else case scalblnf_overflow: case scalblnf_underflow: case tandl_overflow: - case tand_overflow: + case tand_overflow: case tandf_overflow: case cotdl_overflow: - case cotd_overflow: + case cotd_overflow: case cotdf_overflow: case cotl_overflow: - case cot_overflow: + case cot_overflow: case cotf_overflow: case annuityl_overflow: case annuityl_underflow: @@ -1422,12 +1437,12 @@ else { ERRNO_DOMAIN; break; } - case sqrtl_negative: + case sqrtl_negative: /* sqrtl(x < 0) */ { DOMAINL; NAMEL = (char *) "sqrtl"; - ifSVID - { + ifSVID + { RETVAL_ZEROL; NOT_MATHERRL { @@ -1435,22 +1450,22 @@ else ERRNO_DOMAIN; } } - else + else { /* NaN already computed */ NOT_MATHERRL {ERRNO_DOMAIN;} } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } - case sqrt_negative: + case sqrt_negative: /* sqrt(x < 0) */ { DOMAIND; NAMED = (char *) "sqrt"; - ifSVID + ifSVID { - + RETVAL_ZEROD; - NOT_MATHERRD + NOT_MATHERRD { WRITED_SQRT; ERRNO_DOMAIN; @@ -1459,18 +1474,18 @@ else else { /* NaN already computed */ NOT_MATHERRD {ERRNO_DOMAIN;} - } - *(double *)retval = exc.retval; + } + *(double *)retval = exc.retval; break; } - case sqrtf_negative: + case sqrtf_negative: /* sqrtf(x < 0) */ { DOMAINF; NAMEF = (char *) "sqrtf"; - ifSVID + ifSVID { RETVAL_ZEROF; - NOT_MATHERRF + NOT_MATHERRF { WRITEF_SQRT; ERRNO_DOMAIN; @@ -1479,59 +1494,59 @@ else else { NOT_MATHERRF {ERRNO_DOMAIN;} - } - *(float *)retval = excf.retval; + } + *(float *)retval = excf.retval; break; } case logl_zero: /* logl(0) */ { SINGL; NAMEL = (char *) "logl"; - ifSVID + ifSVID { RETVAL_NEG_HUGEL; - NOT_MATHERRL + NOT_MATHERRL { WRITEL_LOG_ZERO; ERRNO_DOMAIN; - } + } } else { RETVAL_NEG_HUGE_VALL; NOT_MATHERRL {ERRNO_DOMAIN;} - } - *(long double *)retval = excl.retval; + } + *(long double *)retval = excl.retval; break; } case log_zero: /* log(0) */ { SINGD; NAMED = (char *) "log"; - ifSVID + ifSVID { RETVAL_NEG_HUGED; - NOT_MATHERRD + NOT_MATHERRD { WRITED_LOG_ZERO; ERRNO_DOMAIN; - } + } } else { RETVAL_NEG_HUGE_VALD; NOT_MATHERRD {ERRNO_DOMAIN;} } - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case logf_zero: /* logf(0) */ { SINGF; NAMEF = (char *) "logf"; - ifSVID + ifSVID { - RETVAL_NEG_HUGEF; + RETVAL_NEG_HUGEF; NOT_MATHERRF { WRITEF_LOG_ZERO; @@ -1540,10 +1555,10 @@ else } else { - RETVAL_NEG_HUGE_VALF; + RETVAL_NEG_HUGE_VALF; NOT_MATHERRF {ERRNO_DOMAIN;} } - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; break; } @@ -1551,10 +1566,10 @@ else /* logl(x < 0) */ { DOMAINL; NAMEL = (char *) "logl"; - ifSVID + ifSVID { RETVAL_NEG_HUGEL; - NOT_MATHERRL + NOT_MATHERRL { WRITEL_LOG_NEGATIVE; ERRNO_DOMAIN; @@ -1562,20 +1577,20 @@ else } else { - RETVAL_NEG_HUGE_VALL; + RETVAL_NEG_HUGE_VALL; NOT_MATHERRL {ERRNO_DOMAIN;} } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case log_negative: /* log(x < 0) */ { DOMAIND; NAMED = (char *) "log"; - ifSVID + ifSVID { RETVAL_NEG_HUGED; - NOT_MATHERRD + NOT_MATHERRD { WRITED_LOG_NEGATIVE; ERRNO_DOMAIN; @@ -1583,38 +1598,38 @@ else } else { - RETVAL_NEG_HUGE_VALD; + RETVAL_NEG_HUGE_VALD; NOT_MATHERRD {ERRNO_DOMAIN;} } - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; - } + } case logf_negative: /* logf(x < 0) */ { DOMAINF; NAMEF = (char *) "logf"; - ifSVID + ifSVID { RETVAL_NEG_HUGEF; - NOT_MATHERRF + NOT_MATHERRF { WRITEF_LOG_NEGATIVE; ERRNO_DOMAIN; } - } + } else { - RETVAL_NEG_HUGE_VALF; + RETVAL_NEG_HUGE_VALF; NOT_MATHERRF{ERRNO_DOMAIN;} } - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; break; } case log1pl_zero: /* log1pl(-1) */ { SINGL; NAMEL = (char *) "log1pl"; - ifSVID + ifSVID { RETVAL_NEG_HUGEL; NOT_MATHERRL @@ -1635,7 +1650,7 @@ else /* log1p(-1) */ { SINGD; NAMED = (char *) "log1p"; - ifSVID + ifSVID { RETVAL_NEG_HUGED; NOT_MATHERRD @@ -1656,7 +1671,7 @@ else /* log1pf(-1) */ { SINGF; NAMEF = (char *) "log1pf"; - ifSVID + ifSVID { RETVAL_NEG_HUGEF; NOT_MATHERRF @@ -1672,7 +1687,7 @@ else } *(float *)retval = excf.retval; break; - } + } case log1pl_negative: /* log1pl(x < -1) */ { @@ -1686,7 +1701,7 @@ else ERRNO_DOMAIN; } } - else + else { RETVAL_NEG_HUGE_VALL; NOT_MATHERRL {ERRNO_DOMAIN;} @@ -1707,7 +1722,7 @@ else ERRNO_DOMAIN; } } - else + else { RETVAL_NEG_HUGE_VALD; NOT_MATHERRD {ERRNO_DOMAIN;} @@ -1728,7 +1743,7 @@ else ERRNO_DOMAIN; } } - else + else { RETVAL_NEG_HUGE_VALF; NOT_MATHERRF {ERRNO_DOMAIN;} @@ -1740,7 +1755,7 @@ else /* log10l(0) */ { SINGL; NAMEL = (char *) "log10l"; - ifSVID + ifSVID { RETVAL_NEG_HUGEL; NOT_MATHERRL @@ -1754,14 +1769,14 @@ else RETVAL_NEG_HUGE_VALL; NOT_MATHERRL {ERRNO_DOMAIN;} } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case log10_zero: /* log10(0) */ { SINGD; NAMED = (char *) "log10"; - ifSVID + ifSVID { RETVAL_NEG_HUGED; NOT_MATHERRD @@ -1775,14 +1790,14 @@ else RETVAL_NEG_HUGE_VALD; NOT_MATHERRD {ERRNO_DOMAIN;} } - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case log10f_zero: /* log10f(0) */ { SINGF; NAMEF = (char *) "log10f"; - ifSVID + ifSVID { RETVAL_NEG_HUGEF; NOT_MATHERRF @@ -1796,17 +1811,17 @@ else RETVAL_NEG_HUGE_VALF; NOT_MATHERRF {ERRNO_DOMAIN;} } - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; break; } case log10l_negative: /* log10l(x < 0) */ { DOMAINL; NAMEL = (char *) "log10l"; - ifSVID + ifSVID { RETVAL_NEG_HUGEL; - NOT_MATHERRL + NOT_MATHERRL { WRITEL_LOG10_NEGATIVE; ERRNO_DOMAIN; @@ -1817,38 +1832,38 @@ else RETVAL_NEG_HUGE_VALL; NOT_MATHERRL {ERRNO_DOMAIN;} } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case log10_negative: /* log10(x < 0) */ { DOMAIND; NAMED = (char *) "log10"; - ifSVID + ifSVID { RETVAL_NEG_HUGED; - NOT_MATHERRD + NOT_MATHERRD { WRITED_LOG10_NEGATIVE; ERRNO_DOMAIN; } - } + } else { RETVAL_NEG_HUGE_VALD; NOT_MATHERRD {ERRNO_DOMAIN;} } - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case log10f_negative: /* log10f(x < 0) */ { DOMAINF; NAMEF = (char *) "log10f"; - ifSVID + ifSVID { RETVAL_NEG_HUGEF; - NOT_MATHERRF + NOT_MATHERRF { WRITEF_LOG10_NEGATIVE; ERRNO_DOMAIN; @@ -1859,14 +1874,35 @@ else RETVAL_NEG_HUGE_VALF; NOT_MATHERRF {ERRNO_DOMAIN;} } - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; + break; + } + case log2l_zero: + /* log2l(0) */ + { + SINGL; NAMEL = (char *) "log2l"; + ifSVID + { + RETVAL_NEG_HUGEL; + NOT_MATHERRL + { + WRITEL_LOG2_ZERO; + ERRNO_DOMAIN; + } + } + else + { + RETVAL_NEG_HUGE_VALL; + NOT_MATHERRL {ERRNO_DOMAIN;} + } + *(long double *)retval = excl.retval; break; } case log2_zero: /* log2(0) */ { SINGD; NAMED = (char *) "log2"; - ifSVID + ifSVID { RETVAL_NEG_HUGED; NOT_MATHERRD @@ -1880,14 +1916,14 @@ else RETVAL_NEG_HUGE_VALD; NOT_MATHERRD {ERRNO_DOMAIN;} } - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case log2f_zero: /* log2f(0) */ { SINGF; NAMEF = (char *) "log2f"; - ifSVID + ifSVID { RETVAL_NEG_HUGEF; NOT_MATHERRF @@ -1901,17 +1937,17 @@ else RETVAL_NEG_HUGE_VALF; NOT_MATHERRF {ERRNO_DOMAIN;} } - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; break; } case log2l_negative: /* log2l(x < 0) */ { DOMAINL; NAMEL = (char *) "log2l"; - ifSVID + ifSVID { RETVAL_NEG_HUGEL; - NOT_MATHERRL + NOT_MATHERRL { WRITEL_LOG2_NEGATIVE; ERRNO_DOMAIN; @@ -1922,38 +1958,38 @@ else RETVAL_NEG_HUGE_VALL; NOT_MATHERRL {ERRNO_DOMAIN;} } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case log2_negative: /* log2(x < 0) */ { DOMAIND; NAMED = (char *) "log2"; - ifSVID + ifSVID { RETVAL_NEG_HUGED; - NOT_MATHERRD + NOT_MATHERRD { WRITED_LOG2_NEGATIVE; ERRNO_DOMAIN; } - } + } else { RETVAL_NEG_HUGE_VALD; NOT_MATHERRD {ERRNO_DOMAIN;} } - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case log2f_negative: /* log2f(x < 0) */ { DOMAINF; NAMEF = (char *) "log2f"; - ifSVID + ifSVID { RETVAL_NEG_HUGEF; - NOT_MATHERRF + NOT_MATHERRF { WRITEF_LOG2_NEGATIVE; ERRNO_DOMAIN; @@ -1964,14 +2000,14 @@ else RETVAL_NEG_HUGE_VALF; NOT_MATHERRF {ERRNO_DOMAIN;} } - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; break; } case expl_overflow: /* expl overflow */ { OVERFLOWL; NAMEL = (char *) "expl"; - ifSVID + ifSVID { RETVAL_HUGEL; } @@ -1980,14 +2016,14 @@ else RETVAL_HUGE_VALL; } NOT_MATHERRL {ERRNO_RANGE;} - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case exp_overflow: /* exp overflow */ { OVERFLOWD; NAMED = (char *) "exp"; - ifSVID + ifSVID { RETVAL_HUGED; } @@ -1996,14 +2032,14 @@ else RETVAL_HUGE_VALD; } NOT_MATHERRD {ERRNO_RANGE;} - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case expf_overflow: /* expf overflow */ { OVERFLOWF; NAMEF = (char *) "expf"; - ifSVID + ifSVID { RETVAL_HUGEF; } @@ -2012,7 +2048,7 @@ else RETVAL_HUGE_VALF; } NOT_MATHERRF {ERRNO_RANGE;} - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; break; } case expl_underflow: @@ -2020,7 +2056,7 @@ else { UNDERFLOWL; NAMEL = (char *) "expl"; RETVAL_ZEROL; NOT_MATHERRL {ERRNO_RANGE;} - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case exp_underflow: @@ -2028,7 +2064,7 @@ else { UNDERFLOWD; NAMED = (char *) "exp"; RETVAL_ZEROD; NOT_MATHERRD {ERRNO_RANGE;} - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case expf_underflow: @@ -2036,22 +2072,22 @@ else { UNDERFLOWF; NAMEF = (char *) "expf"; RETVAL_ZEROF; NOT_MATHERRF {ERRNO_RANGE;} - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; break; } case powl_zero_to_zero: /* powl 0**0 */ { DOMAINL; NAMEL = (char *) "powl"; - ifSVID + ifSVID { RETVAL_ZEROL; - NOT_MATHERRL + NOT_MATHERRL { WRITEL_POW_ZERO_TO_ZERO; ERRNO_DOMAIN; } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; } else RETVAL_ONEL; break; @@ -2060,15 +2096,15 @@ else /* pow 0**0 */ { DOMAIND; NAMED = (char *) "pow"; - ifSVID + ifSVID { RETVAL_ZEROD; - NOT_MATHERRD + NOT_MATHERRD { WRITED_POW_ZERO_TO_ZERO; ERRNO_DOMAIN; } - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; } else RETVAL_ONED; break; @@ -2077,15 +2113,15 @@ else /* powf 0**0 */ { DOMAINF; NAMEF = (char *) "powf"; - ifSVID + ifSVID { RETVAL_ZEROF; - NOT_MATHERRF + NOT_MATHERRF { WRITEF_POW_ZERO_TO_ZERO; ERRNO_DOMAIN; } - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; } else RETVAL_ONEF; break; @@ -2094,54 +2130,54 @@ else /* powl(x,y) overflow */ { OVERFLOWL; NAMEL = (char *) "powl"; - ifSVID + ifSVID { - if (INPUT_XL < 0) RETVAL_NEG_HUGEL; + if (INPUT_RESL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGEL; else RETVAL_HUGEL; } else - { - if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL; + { + if (INPUT_RESL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGE_VALL; else RETVAL_HUGE_VALL; } NOT_MATHERRL {ERRNO_RANGE;} - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case pow_overflow: /* pow(x,y) overflow */ { OVERFLOWD; NAMED = (char *) "pow"; - ifSVID + ifSVID { - if (INPUT_XD < 0) RETVAL_NEG_HUGED; + if (INPUT_RESD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGED; else RETVAL_HUGED; } else - { - if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD; + { + if (INPUT_RESD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGE_VALD; else RETVAL_HUGE_VALD; } NOT_MATHERRD {ERRNO_RANGE;} - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case powf_overflow: /* powf(x,y) overflow */ { OVERFLOWF; NAMEF = (char *) "powf"; - ifSVID + ifSVID { - if (INPUT_XF < 0) RETVAL_NEG_HUGEF; - else RETVAL_HUGEF; + if (INPUT_RESF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGEF; + else RETVAL_HUGEF; } else - { - if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF; + { + if (INPUT_RESF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGE_VALF; else RETVAL_HUGE_VALF; } NOT_MATHERRF {ERRNO_RANGE;} - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; break; } case powl_underflow: @@ -2149,7 +2185,7 @@ else { UNDERFLOWL; NAMEL = (char *) "powl"; RETVAL_ZEROL; NOT_MATHERRL {ERRNO_RANGE;} - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case pow_underflow: @@ -2157,7 +2193,7 @@ else { UNDERFLOWD; NAMED = (char *) "pow"; RETVAL_ZEROD; NOT_MATHERRD {ERRNO_RANGE;} - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case powf_underflow: @@ -2165,17 +2201,17 @@ else { UNDERFLOWF; NAMEF = (char *) "powf"; RETVAL_ZEROF; NOT_MATHERRF {ERRNO_RANGE;} - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; break; } case powl_zero_to_negative: /* 0 to neg */ { DOMAINL; NAMEL = (char *) "powl"; - ifSVID - { + ifSVID + { RETVAL_ZEROL; - NOT_MATHERRL + NOT_MATHERRL { WRITEL_POW_ZERO_TO_NEGATIVE; ERRNO_DOMAIN; @@ -2186,17 +2222,17 @@ else RETVAL_NEG_HUGE_VALL; NOT_MATHERRL {ERRNO_DOMAIN;} } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case pow_zero_to_negative: /* 0**neg */ { DOMAIND; NAMED = (char *) "pow"; - ifSVID - { + ifSVID + { RETVAL_ZEROD; - NOT_MATHERRD + NOT_MATHERRD { WRITED_POW_ZERO_TO_NEGATIVE; ERRNO_DOMAIN; @@ -2207,18 +2243,17 @@ else RETVAL_NEG_HUGE_VALD; NOT_MATHERRD {ERRNO_DOMAIN;} } - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case powf_zero_to_negative: /* 0**neg */ { DOMAINF; NAMEF = (char *) "powf"; - RETVAL_NEG_HUGE_VALF; - ifSVID - { + ifSVID + { RETVAL_ZEROF; - NOT_MATHERRF + NOT_MATHERRF { WRITEF_POW_ZERO_TO_NEGATIVE; ERRNO_DOMAIN; @@ -2229,17 +2264,17 @@ else RETVAL_NEG_HUGE_VALF; NOT_MATHERRF {ERRNO_DOMAIN;} } - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; break; } case powl_neg_to_non_integer: /* neg**non_integral */ { DOMAINL; NAMEL = (char *) "powl"; - ifSVID - { - RETVAL_ZEROF; - NOT_MATHERRL + ifSVID + { + RETVAL_ZEROL; + NOT_MATHERRL { WRITEL_POW_NEG_TO_NON_INTEGER; ERRNO_DOMAIN; @@ -2249,17 +2284,17 @@ else { NOT_MATHERRL {ERRNO_DOMAIN;} } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case pow_neg_to_non_integer: /* neg**non_integral */ { DOMAIND; NAMED = (char *) "pow"; - ifSVID - { + ifSVID + { RETVAL_ZEROD; - NOT_MATHERRD + NOT_MATHERRD { WRITED_POW_NEG_TO_NON_INTEGER; ERRNO_DOMAIN; @@ -2269,17 +2304,17 @@ else { NOT_MATHERRD {ERRNO_DOMAIN;} } - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case powf_neg_to_non_integer: /* neg**non-integral */ { DOMAINF; NAMEF = (char *) "powf"; - ifSVID - { + ifSVID + { RETVAL_ZEROF; - NOT_MATHERRF + NOT_MATHERRF { WRITEF_POW_NEG_TO_NON_INTEGER; ERRNO_DOMAIN; @@ -2289,7 +2324,7 @@ else { NOT_MATHERRF {ERRNO_DOMAIN;} } - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; break; } case powl_nan_to_zero: @@ -2299,9 +2334,9 @@ else DOMAINL; NAMEL = (char *) "powl"; *(long double *)retval = *(long double *)arg1; NOT_MATHERRL {ERRNO_DOMAIN;} - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; - } + } case pow_nan_to_zero: /* pow(NaN,0.0) */ /* Special Error */ @@ -2309,7 +2344,7 @@ else DOMAIND; NAMED = (char *) "pow"; *(double *)retval = *(double *)arg1; NOT_MATHERRD {ERRNO_DOMAIN;} - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case powf_nan_to_zero: @@ -2319,7 +2354,7 @@ else DOMAINF; NAMEF = (char *) "powf"; *(float *)retval = *(float *)arg1; NOT_MATHERRF {ERRNO_DOMAIN;} - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; break; } case atan2l_zero: @@ -2327,15 +2362,15 @@ else { DOMAINL; NAMEL = (char *) "atan2l"; RETVAL_ZEROL; - NOT_MATHERRL + NOT_MATHERRL { - ifSVID + ifSVID { WRITEL_ATAN2_ZERO_BY_ZERO; } ERRNO_DOMAIN; } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case atan2_zero: @@ -2343,15 +2378,15 @@ else { DOMAIND; NAMED = (char *) "atan2"; RETVAL_ZEROD; - NOT_MATHERRD + NOT_MATHERRD { - ifSVID - { + ifSVID + { WRITED_ATAN2_ZERO_BY_ZERO; } ERRNO_DOMAIN; } - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case atan2f_zero: @@ -2359,13 +2394,15 @@ else { DOMAINF; NAMEF = (char *) "atan2f"; RETVAL_ZEROF; - NOT_MATHERRF - ifSVID + NOT_MATHERRF + { + ifSVID { WRITEF_ATAN2_ZERO_BY_ZERO; } - ERRNO_DOMAIN; - *(float *)retval = excf.retval; + ERRNO_DOMAIN; + } + *(float *)retval = excf.retval; break; } case atan2dl_zero: @@ -2373,15 +2410,15 @@ else { DOMAINL; NAMEL = (char *) "atan2dl"; RETVAL_ZEROL; - NOT_MATHERRL + NOT_MATHERRL { - ifSVID + ifSVID { WRITEL_ATAN2D_ZERO_BY_ZERO; } ERRNO_DOMAIN; } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case atan2d_zero: @@ -2389,15 +2426,15 @@ else { DOMAIND; NAMED = (char *) "atan2d"; RETVAL_ZEROD; - NOT_MATHERRD + NOT_MATHERRD { - ifSVID - { + ifSVID + { WRITED_ATAN2D_ZERO_BY_ZERO; } ERRNO_DOMAIN; } - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case atan2df_zero: @@ -2405,13 +2442,15 @@ else { DOMAINF; NAMEF = (char *) "atan2df"; RETVAL_ZEROF; - NOT_MATHERRF - ifSVID + NOT_MATHERRF + { + ifSVID { WRITEF_ATAN2D_ZERO_BY_ZERO; } - ERRNO_DOMAIN; - *(float *)retval = excf.retval; + ERRNO_DOMAIN; + } + *(float *)retval = excf.retval; break; } case expm1_overflow: @@ -2446,60 +2485,60 @@ else /* scalbl underflow */ { UNDERFLOWL; NAMEL = (char *) "scalbl"; - if (INPUT_XL < 0.0L) RETVAL_NEG_ZEROL; + if (INPUT_XL < ZEROL_VALUE /*0.0L*/) RETVAL_NEG_ZEROL; else RETVAL_ZEROL; - NOT_MATHERRL {ERRNO_RANGE;} - *(long double *)retval = excl.retval; + NOT_MATHERRL {ERRNO_RANGE;} + *(long double *)retval = excl.retval; break; } case scalb_underflow: /* scalb underflow */ { UNDERFLOWD; NAMED = (char *) "scalb"; - if (INPUT_XD < 0.0) RETVAL_NEG_ZEROD; + if (INPUT_XD < ZEROD_VALUE /*0.0*/) RETVAL_NEG_ZEROD; else RETVAL_ZEROD; - NOT_MATHERRD {ERRNO_RANGE;} - *(double *)retval = exc.retval; + NOT_MATHERRD {ERRNO_RANGE;} + *(double *)retval = exc.retval; break; } case scalbf_underflow: /* scalbf underflow */ { UNDERFLOWF; NAMEF = (char *) "scalbf"; - if (INPUT_XF < 0.0) RETVAL_NEG_ZEROF; + if (INPUT_XF < ZEROF_VALUE /*0.0*/) RETVAL_NEG_ZEROF; else RETVAL_ZEROF; - NOT_MATHERRF {ERRNO_RANGE;} - *(float *)retval = excf.retval; + NOT_MATHERRF {ERRNO_RANGE;} + *(float *)retval = excf.retval; break; } case scalbl_overflow: /* scalbl overflow */ { OVERFLOWL; NAMEL = (char *) "scalbl"; - if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL; + if (INPUT_XL < ZEROL_VALUE /*0*/) RETVAL_NEG_HUGE_VALL; else RETVAL_HUGE_VALL; - NOT_MATHERRL {ERRNO_RANGE;} - *(long double *)retval = excl.retval; + NOT_MATHERRL {ERRNO_RANGE;} + *(long double *)retval = excl.retval; break; } case scalb_overflow: /* scalb overflow */ { OVERFLOWD; NAMED = (char *) "scalb"; - if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD; + if (INPUT_XD < ZEROD_VALUE /*0*/) RETVAL_NEG_HUGE_VALD; else RETVAL_HUGE_VALD; - NOT_MATHERRD {ERRNO_RANGE;} - *(double *)retval = exc.retval; + NOT_MATHERRD {ERRNO_RANGE;} + *(double *)retval = exc.retval; break; } case scalbf_overflow: /* scalbf overflow */ { OVERFLOWF; NAMEF = (char *) "scalbf"; - if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF; + if (INPUT_XF < ZEROF_VALUE /*0*/) RETVAL_NEG_HUGE_VALF; else RETVAL_HUGE_VALF; - NOT_MATHERRF {ERRNO_RANGE;} - *(float *)retval = excf.retval; + NOT_MATHERRF {ERRNO_RANGE;} + *(float *)retval = excf.retval; break; } case hypotl_overflow: @@ -2507,7 +2546,7 @@ else { OVERFLOWL; NAMEL = (char *) "hypotl"; ifSVID - { + { RETVAL_HUGEL; } else @@ -2515,7 +2554,7 @@ else RETVAL_HUGE_VALL; } NOT_MATHERRL {ERRNO_RANGE;} - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case hypot_overflow: @@ -2523,7 +2562,7 @@ else { OVERFLOWD; NAMED = (char *) "hypot"; ifSVID - { + { RETVAL_HUGED; } else @@ -2531,14 +2570,14 @@ else RETVAL_HUGE_VALD; } NOT_MATHERRD {ERRNO_RANGE;} - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case hypotf_overflow: /* hypotf overflow */ - { + { OVERFLOWF; NAMEF = (char *) "hypotf"; - ifSVID + ifSVID { RETVAL_HUGEF; } @@ -2547,7 +2586,7 @@ else RETVAL_HUGE_VALF; } NOT_MATHERRF {ERRNO_RANGE;} - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; break; } case acosl_gt_one: @@ -2555,7 +2594,7 @@ else { DOMAINL; NAMEL = (char *) "acosl"; RETVAL_ZEROL; - ifSVID + ifSVID { NOT_MATHERRL { @@ -2575,7 +2614,7 @@ else { DOMAIND; NAMED = (char *) "acos"; RETVAL_ZEROD; - ifSVID + ifSVID { NOT_MATHERRD { @@ -2595,9 +2634,9 @@ else { DOMAINF; NAMEF = (char *) "acosf"; RETVAL_ZEROF; - ifSVID + ifSVID { - NOT_MATHERRF + NOT_MATHERRF { WRITEF_ACOS; ERRNO_DOMAIN; @@ -2606,8 +2645,8 @@ else else { NOT_MATHERRF {ERRNO_DOMAIN;} - } - *(float *)retval = excf.retval; + } + *(float *)retval = excf.retval; break; } case asinl_gt_one: @@ -2615,7 +2654,7 @@ else { DOMAINL; NAMEL = (char *) "asinl"; RETVAL_ZEROL; - ifSVID + ifSVID { NOT_MATHERRL { @@ -2635,7 +2674,7 @@ else { DOMAIND; NAMED = (char *) "asin"; RETVAL_ZEROD; - ifSVID + ifSVID { NOT_MATHERRD { @@ -2655,9 +2694,9 @@ else { DOMAINF; NAMEF = (char *) "asinf"; RETVAL_ZEROF; - ifSVID + ifSVID { - NOT_MATHERRF + NOT_MATHERRF { WRITEF_ASIN; ERRNO_DOMAIN; @@ -2666,8 +2705,8 @@ else else { NOT_MATHERRF {ERRNO_DOMAIN;} - } - *(float *)retval = excf.retval; + } + *(float *)retval = excf.retval; break; } case acosdl_gt_one: @@ -2675,7 +2714,7 @@ else { DOMAINL; NAMEL = (char *) "acosdl"; RETVAL_ZEROL; - ifSVID + ifSVID { NOT_MATHERRL { @@ -2695,7 +2734,7 @@ else { DOMAIND; NAMED = (char *) "acosd"; RETVAL_ZEROD; - ifSVID + ifSVID { NOT_MATHERRD { @@ -2715,9 +2754,9 @@ else { DOMAINF; NAMEF = (char *) "acosdf"; RETVAL_ZEROF; - ifSVID + ifSVID { - NOT_MATHERRF + NOT_MATHERRF { WRITEF_ACOSD; ERRNO_DOMAIN; @@ -2726,8 +2765,8 @@ else else { NOT_MATHERRF {ERRNO_DOMAIN;} - } - *(float *)retval = excf.retval; + } + *(float *)retval = excf.retval; break; } case asindl_gt_one: @@ -2735,7 +2774,7 @@ else { DOMAINL; NAMEL = (char *) "asindl"; RETVAL_ZEROL; - ifSVID + ifSVID { NOT_MATHERRL { @@ -2755,7 +2794,7 @@ else { DOMAIND; NAMED = (char *) "asind"; RETVAL_ZEROD; - ifSVID + ifSVID { NOT_MATHERRD { @@ -2775,9 +2814,9 @@ else { DOMAINF; NAMEF = (char *) "asindf"; RETVAL_ZEROF; - ifSVID + ifSVID { - NOT_MATHERRF + NOT_MATHERRF { WRITEF_ASIND; ERRNO_DOMAIN; @@ -2786,8 +2825,8 @@ else else { NOT_MATHERRF {ERRNO_DOMAIN;} - } - *(float *)retval = excf.retval; + } + *(float *)retval = excf.retval; break; } case coshl_overflow: @@ -2798,7 +2837,7 @@ else { RETVAL_HUGEL; } - else + else { RETVAL_HUGE_VALL; } @@ -2814,7 +2853,7 @@ else { RETVAL_HUGED; } - else + else { RETVAL_HUGE_VALD; } @@ -2830,7 +2869,7 @@ else { RETVAL_HUGEF; } - else + else { RETVAL_HUGE_VALF; } @@ -2844,12 +2883,12 @@ else OVERFLOWL; NAMEL = (char *) "sinhl"; ifSVID { - if (INPUT_XL > 0.0) RETVAL_HUGEL; + if (INPUT_XL > ZEROL_VALUE /*0.0*/) RETVAL_HUGEL; else RETVAL_NEG_HUGEL; } - else + else { - if (INPUT_XL > 0.0) RETVAL_HUGE_VALL; + if (INPUT_XL > ZEROL_VALUE /*0.0*/) RETVAL_HUGE_VALL; else RETVAL_NEG_HUGE_VALL; } NOT_MATHERRL {ERRNO_RANGE;} @@ -2862,12 +2901,12 @@ else OVERFLOWD; NAMED = (char *) "sinh"; ifSVID { - if (INPUT_XD > 0.0) RETVAL_HUGED; + if (INPUT_XD > ZEROD_VALUE /*0.0*/) RETVAL_HUGED; else RETVAL_NEG_HUGED; } - else + else { - if (INPUT_XD > 0.0) RETVAL_HUGE_VALD; + if (INPUT_XD > ZEROD_VALUE /*0.0*/) RETVAL_HUGE_VALD; else RETVAL_NEG_HUGE_VALD; } NOT_MATHERRD {ERRNO_RANGE;} @@ -2880,12 +2919,12 @@ else OVERFLOWF; NAMEF = (char *) "sinhf"; ifSVID { - if( INPUT_XF > 0.0) RETVAL_HUGEF; + if (INPUT_XF > ZEROF_VALUE /*0.0*/) RETVAL_HUGEF; else RETVAL_NEG_HUGEF; } - else + else { - if (INPUT_XF > 0.0) RETVAL_HUGE_VALF; + if (INPUT_XF > ZEROF_VALUE /*0.0*/) RETVAL_HUGE_VALF; else RETVAL_NEG_HUGE_VALF; } NOT_MATHERRF {ERRNO_RANGE;} @@ -2896,7 +2935,7 @@ else /* acoshl(x < 1) */ { DOMAINL; NAMEL = (char *) "acoshl"; - ifSVID + ifSVID { NOT_MATHERRL { @@ -2904,7 +2943,7 @@ else ERRNO_DOMAIN; } } - else + else { NOT_MATHERRL {ERRNO_DOMAIN;} } @@ -2915,15 +2954,15 @@ else /* acosh(x < 1) */ { DOMAIND; NAMED = (char *) "acosh"; - ifSVID + ifSVID { NOT_MATHERRD { - WRITEL_ACOSH; + WRITED_ACOSH; ERRNO_DOMAIN; } } - else + else { NOT_MATHERRD {ERRNO_DOMAIN;} } @@ -2934,7 +2973,7 @@ else /* acoshf(x < 1) */ { DOMAINF; NAMEF = (char *) "acoshf"; - ifSVID + ifSVID { NOT_MATHERRF { @@ -2947,13 +2986,13 @@ else NOT_MATHERRF {ERRNO_DOMAIN;} } *(float *)retval = excf.retval; - ERRNO_DOMAIN; break; + break; } case atanhl_gt_one: /* atanhl(|x| > 1) */ { DOMAINL; NAMEL = (char *) "atanhl"; - ifSVID + ifSVID { NOT_MATHERRL { @@ -2971,7 +3010,7 @@ else /* atanh(|x| > 1) */ { DOMAIND; NAMED = (char *) "atanh"; - ifSVID + ifSVID { NOT_MATHERRD { @@ -2989,7 +3028,7 @@ else /* atanhf(|x| > 1) */ { DOMAINF; NAMEF = (char *) "atanhf"; - ifSVID + ifSVID { NOT_MATHERRF { @@ -3007,7 +3046,7 @@ else /* atanhl(|x| == 1) */ { SINGL; NAMEL = (char *) "atanhl"; - ifSVID + ifSVID { NOT_MATHERRL { @@ -3025,7 +3064,7 @@ else /* atanh(|x| == 1) */ { SINGD; NAMED = (char *) "atanh"; - ifSVID + ifSVID { NOT_MATHERRD { @@ -3043,7 +3082,7 @@ else /* atanhf(|x| == 1) */ { SINGF; NAMEF = (char *) "atanhf"; - ifSVID + ifSVID { NOT_MATHERRF { @@ -3061,7 +3100,7 @@ else /* gammal overflow */ { OVERFLOWL; NAMEL = (char *) "gammal"; - ifSVID + ifSVID { RETVAL_HUGEL; } @@ -3069,15 +3108,15 @@ else { RETVAL_HUGE_VALL; } - NOT_MATHERRL{ERRNO_RANGE;} - *(long double*)retval = excl.retval; + NOT_MATHERRL {ERRNO_RANGE;} + *(long double *)retval = excl.retval; break; } case gamma_overflow: /* gamma overflow */ { OVERFLOWD; NAMED = (char *) "gamma"; - ifSVID + ifSVID { RETVAL_HUGED; } @@ -3085,15 +3124,15 @@ else { RETVAL_HUGE_VALD; } - NOT_MATHERRD{ERRNO_RANGE;} - *(double*)retval = exc.retval; + NOT_MATHERRD {ERRNO_RANGE;} + *(double *)retval = exc.retval; break; } case gammaf_overflow: /* gammaf overflow */ { OVERFLOWF; NAMEF = (char *) "gammaf"; - ifSVID + ifSVID { RETVAL_HUGEF; } @@ -3101,8 +3140,8 @@ else { RETVAL_HUGE_VALF; } - NOT_MATHERRF{ERRNO_RANGE;} - *(float*)retval = excf.retval; + NOT_MATHERRF {ERRNO_RANGE;} + *(float *)retval = excf.retval; break; } case gammal_negative: @@ -3121,16 +3160,16 @@ else else { RETVAL_HUGE_VALL; - NOT_MATHERRL{ERRNO_DOMAIN;} + NOT_MATHERRL {ERRNO_DOMAIN;} } - *(long double*)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case gamma_negative: /* gamma -int or 0 */ { SINGD; NAMED = (char *) "gamma"; - ifSVID + ifSVID { RETVAL_HUGED; NOT_MATHERRD @@ -3142,16 +3181,16 @@ else else { RETVAL_HUGE_VALD; - NOT_MATHERRD{ERRNO_DOMAIN;} + NOT_MATHERRD {ERRNO_DOMAIN;} } - *(double*)retval = exc.retval; + *(double *)retval = exc.retval; break; } case gammaf_negative: /* gammaf -int or 0 */ { SINGF; NAMEF = (char *) "gammaf"; - ifSVID + ifSVID { RETVAL_HUGEF; NOT_MATHERRF @@ -3163,16 +3202,16 @@ else else { RETVAL_HUGE_VALF; - NOT_MATHERRF{ERRNO_DOMAIN;} + NOT_MATHERRF {ERRNO_DOMAIN;} } - *(float*)retval = excf.retval; + *(float *)retval = excf.retval; break; } case lgammal_overflow: /* lgammal overflow */ { OVERFLOWL; NAMEL = (char *) "lgammal"; - ifSVID + ifSVID { RETVAL_HUGEL; } @@ -3180,15 +3219,15 @@ else { RETVAL_HUGE_VALL; } - NOT_MATHERRL{ERRNO_RANGE;} - *(long double*)retval = excl.retval; + NOT_MATHERRL {ERRNO_RANGE;} + *(long double *)retval = excl.retval; break; } case lgamma_overflow: /* lgamma overflow */ { OVERFLOWD; NAMED = (char *) "lgamma"; - ifSVID + ifSVID { RETVAL_HUGED; } @@ -3196,15 +3235,15 @@ else { RETVAL_HUGE_VALD; } - NOT_MATHERRD{ERRNO_RANGE;} - *(double*)retval = exc.retval; + NOT_MATHERRD {ERRNO_RANGE;} + *(double *)retval = exc.retval; break; } case lgammaf_overflow: /* lgammaf overflow */ { OVERFLOWF; NAMEF = (char *) "lgammaf"; - ifSVID + ifSVID { RETVAL_HUGEF; } @@ -3212,8 +3251,8 @@ else { RETVAL_HUGE_VALF; } - NOT_MATHERRF{ERRNO_RANGE;} - *(float*)retval = excf.retval; + NOT_MATHERRF {ERRNO_RANGE;} + *(float *)retval = excf.retval; break; } case lgammal_negative: @@ -3225,16 +3264,16 @@ else RETVAL_HUGEL; NOT_MATHERRL { - WRITEL_GAMMA_NEGATIVE; - ERRNO_DOMAIN; + WRITEL_LGAMMA_NEGATIVE; + ERRNO_DOMAIN; } } else { RETVAL_HUGE_VALL; - NOT_MATHERRL{ERRNO_DOMAIN;} + NOT_MATHERRL {ERRNO_DOMAIN;} } - *(long double*)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case lgamma_negative: @@ -3253,16 +3292,16 @@ else else { RETVAL_HUGE_VALD; - NOT_MATHERRD{ERRNO_DOMAIN;} + NOT_MATHERRD {ERRNO_DOMAIN;} } - *(double*)retval = exc.retval; + *(double *)retval = exc.retval; break; } case lgammaf_negative: /* lgammaf -int or 0 */ { SINGF; NAMEF = (char *) "lgammaf"; - ifSVID + ifSVID { RETVAL_HUGEF; NOT_MATHERRF @@ -3274,16 +3313,16 @@ else else { RETVAL_HUGE_VALF; - NOT_MATHERRF{ERRNO_DOMAIN;} + NOT_MATHERRF {ERRNO_DOMAIN;} } - *(float*)retval = excf.retval; + *(float *)retval = excf.retval; break; } case tgammal_overflow: /* tgammal overflow */ { OVERFLOWL; NAMEL = (char *) "tgammal"; - ifSVID + ifSVID { RETVAL_HUGEL; } @@ -3291,15 +3330,15 @@ else { RETVAL_HUGE_VALL; } - NOT_MATHERRL{ERRNO_RANGE;} - *(long double*)retval = excl.retval; + NOT_MATHERRL {ERRNO_RANGE;} + *(long double *)retval = excl.retval; break; } case tgamma_overflow: /* tgamma overflow */ { OVERFLOWD; NAMED = (char *) "tgamma"; - ifSVID + ifSVID { RETVAL_HUGED; } @@ -3307,15 +3346,15 @@ else { RETVAL_HUGE_VALD; } - NOT_MATHERRD{ERRNO_RANGE;} - *(double*)retval = exc.retval; + NOT_MATHERRD {ERRNO_RANGE;} + *(double *)retval = exc.retval; break; } case tgammaf_overflow: /* tgammaf overflow */ { OVERFLOWF; NAMEF = (char *) "tgammaf"; - ifSVID + ifSVID { RETVAL_HUGEF; } @@ -3323,15 +3362,15 @@ else { RETVAL_HUGE_VALF; } - NOT_MATHERRF{ERRNO_RANGE;} - *(float*)retval = excf.retval; + NOT_MATHERRF {ERRNO_RANGE;} + *(float *)retval = excf.retval; break; } case tgammal_negative: /* tgammal -int or 0 */ { SINGL; NAMEL = (char *) "tgammal"; - ifSVID + ifSVID { NOT_MATHERRL { @@ -3341,16 +3380,16 @@ else } else { - NOT_MATHERRL{ERRNO_DOMAIN;} + NOT_MATHERRL {ERRNO_DOMAIN;} } - *(long double*)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case tgamma_negative: /* tgamma -int or 0 */ { SINGD; NAMED = (char *) "tgamma"; - ifSVID + ifSVID { NOT_MATHERRD { @@ -3360,16 +3399,16 @@ else } else { - NOT_MATHERRD{ERRNO_DOMAIN;} + NOT_MATHERRD {ERRNO_DOMAIN;} } - *(double*)retval = exc.retval; + *(double *)retval = exc.retval; break; } case tgammaf_negative: /* tgammaf -int or 0 */ { SINGF; NAMEF = (char *) "tgammaf"; - ifSVID + ifSVID { NOT_MATHERRF { @@ -3379,9 +3418,9 @@ else } else { - NOT_MATHERRF{ERRNO_DOMAIN;} + NOT_MATHERRF {ERRNO_DOMAIN;} } - *(float*)retval = excf.retval; + *(float *)retval = excf.retval; break; } case j0l_gt_loss: @@ -3389,7 +3428,7 @@ else { TLOSSL; NAMEL = (char *) "j0l"; RETVAL_ZEROL; - ifSVID + ifSVID { NOT_MATHERRL { @@ -3401,7 +3440,7 @@ else { NOT_MATHERRL {ERRNO_RANGE;} } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case j0_gt_loss: @@ -3409,7 +3448,7 @@ else { TLOSSD; NAMED = (char *) "j0"; RETVAL_ZEROD; - ifSVID + ifSVID { NOT_MATHERRD { @@ -3421,7 +3460,7 @@ else { NOT_MATHERRD {ERRNO_RANGE;} } - *(double*)retval = exc.retval; + *(double*)retval = exc.retval; break; } case j0f_gt_loss: @@ -3429,7 +3468,7 @@ else { TLOSSF; NAMEF = (char *) "j0f"; RETVAL_ZEROF; - ifSVID + ifSVID { NOT_MATHERRF { @@ -3449,7 +3488,7 @@ else { TLOSSL; NAMEL = (char *) "j1l"; RETVAL_ZEROL; - ifSVID + ifSVID { NOT_MATHERRL { @@ -3461,7 +3500,7 @@ else { NOT_MATHERRL {ERRNO_RANGE;} } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case j1_gt_loss: @@ -3469,7 +3508,7 @@ else { TLOSSD; NAMED = (char *) "j1"; RETVAL_ZEROD; - ifSVID + ifSVID { NOT_MATHERRD { @@ -3481,7 +3520,7 @@ else { NOT_MATHERRD {ERRNO_RANGE;} } - *(double*)retval = exc.retval; + *(double*)retval = exc.retval; break; } case j1f_gt_loss: @@ -3489,7 +3528,7 @@ else { TLOSSF; NAMEF = (char *) "j1f"; RETVAL_ZEROF; - ifSVID + ifSVID { NOT_MATHERRF { @@ -3509,7 +3548,7 @@ else { TLOSSL; NAMEL = (char *) "jnl"; RETVAL_ZEROL; - ifSVID + ifSVID { NOT_MATHERRL { @@ -3521,7 +3560,7 @@ else { NOT_MATHERRL {ERRNO_RANGE;} } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case jn_gt_loss: @@ -3529,7 +3568,7 @@ else { TLOSSD; NAMED = (char *) "jn"; RETVAL_ZEROD; - ifSVID + ifSVID { NOT_MATHERRD { @@ -3541,7 +3580,7 @@ else { NOT_MATHERRD {ERRNO_RANGE;} } - *(double*)retval = exc.retval; + *(double*)retval = exc.retval; break; } case jnf_gt_loss: @@ -3549,7 +3588,7 @@ else { TLOSSF; NAMEF = (char *) "jnf"; RETVAL_ZEROF; - ifSVID + ifSVID { NOT_MATHERRF { @@ -3569,7 +3608,7 @@ else { TLOSSL; NAMEL = (char *) "y0l"; RETVAL_ZEROL; - ifSVID + ifSVID { NOT_MATHERRL { @@ -3589,7 +3628,7 @@ else { TLOSSD; NAMED = (char *) "y0"; RETVAL_ZEROD; - ifSVID + ifSVID { NOT_MATHERRD { @@ -3609,7 +3648,7 @@ else { TLOSSF; NAMEF = (char *) "y0f"; RETVAL_ZEROF; - ifSVID + ifSVID { NOT_MATHERRF { @@ -3628,10 +3667,10 @@ else /* y0l(0) */ { DOMAINL; NAMEL = (char *) "y0l"; - ifSVID + ifSVID { RETVAL_NEG_HUGEL; - NOT_MATHERRL + NOT_MATHERRL { WRITEL_Y0_ZERO; ERRNO_DOMAIN; @@ -3639,20 +3678,20 @@ else } else { - RETVAL_NEG_HUGE_VALL; + RETVAL_NEG_HUGE_VALL; NOT_MATHERRL {ERRNO_DOMAIN;} } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case y0_zero: /* y0(0) */ { DOMAIND; NAMED = (char *) "y0"; - ifSVID + ifSVID { RETVAL_NEG_HUGED; - NOT_MATHERRD + NOT_MATHERRD { WRITED_Y0_ZERO; ERRNO_DOMAIN; @@ -3660,20 +3699,20 @@ else } else { - RETVAL_NEG_HUGE_VALD; + RETVAL_NEG_HUGE_VALD; NOT_MATHERRD {ERRNO_DOMAIN;} } - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case y0f_zero: /* y0f(0) */ { DOMAINF; NAMEF = (char *) "y0f"; - ifSVID + ifSVID { RETVAL_NEG_HUGEF; - NOT_MATHERRF + NOT_MATHERRF { WRITEF_Y0_ZERO; ERRNO_DOMAIN; @@ -3681,10 +3720,10 @@ else } else { - RETVAL_NEG_HUGE_VALF; + RETVAL_NEG_HUGE_VALF; NOT_MATHERRF {ERRNO_DOMAIN;} } - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; break; } case y1l_gt_loss: @@ -3692,7 +3731,7 @@ else { TLOSSL; NAMEL = (char *) "y1l"; RETVAL_ZEROL; - ifSVID + ifSVID { NOT_MATHERRL { @@ -3712,7 +3751,7 @@ else { TLOSSD; NAMED = (char *) "y1"; RETVAL_ZEROD; - ifSVID + ifSVID { NOT_MATHERRD { @@ -3732,7 +3771,7 @@ else { TLOSSF; NAMEF = (char *) "y1f"; RETVAL_ZEROF; - ifSVID + ifSVID { NOT_MATHERRF { @@ -3751,10 +3790,10 @@ else /* y1l(0) */ { DOMAINL; NAMEL = (char *) "y1l"; - ifSVID + ifSVID { RETVAL_NEG_HUGEL; - NOT_MATHERRL + NOT_MATHERRL { WRITEL_Y1_ZERO; ERRNO_DOMAIN; @@ -3762,20 +3801,20 @@ else } else { - RETVAL_NEG_HUGE_VALL; + RETVAL_NEG_HUGE_VALL; NOT_MATHERRL {ERRNO_DOMAIN;} } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case y1_zero: /* y1(0) */ { DOMAIND; NAMED = (char *) "y1"; - ifSVID + ifSVID { RETVAL_NEG_HUGED; - NOT_MATHERRD + NOT_MATHERRD { WRITED_Y1_ZERO; ERRNO_DOMAIN; @@ -3783,30 +3822,31 @@ else } else { - RETVAL_NEG_HUGE_VALD; + RETVAL_NEG_HUGE_VALD; NOT_MATHERRD {ERRNO_DOMAIN;} } - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case y1f_zero: /* y1f(0) */ { DOMAINF; NAMEF = (char *) "y1f"; - ifSVID + ifSVID { RETVAL_NEG_HUGEF; - NOT_MATHERRF + NOT_MATHERRF { WRITEF_Y1_ZERO; ERRNO_DOMAIN; } - }else + } + else { - RETVAL_NEG_HUGE_VALF; + RETVAL_NEG_HUGE_VALF; NOT_MATHERRF {ERRNO_DOMAIN;} } - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; break; } case ynl_gt_loss: @@ -3814,7 +3854,7 @@ else { TLOSSL; NAMEL = (char *) "ynl"; RETVAL_ZEROL; - ifSVID + ifSVID { NOT_MATHERRL { @@ -3834,7 +3874,7 @@ else { TLOSSD; NAMED = (char *) "yn"; RETVAL_ZEROD; - ifSVID + ifSVID { NOT_MATHERRD { @@ -3854,7 +3894,7 @@ else { TLOSSF; NAMEF = (char *) "ynf"; RETVAL_ZEROF; - ifSVID + ifSVID { NOT_MATHERRF { @@ -3873,10 +3913,10 @@ else /* ynl(0) */ { DOMAINL; NAMEL = (char *) "ynl"; - ifSVID + ifSVID { RETVAL_NEG_HUGEL; - NOT_MATHERRL + NOT_MATHERRL { WRITEL_YN_ZERO; ERRNO_DOMAIN; @@ -3884,20 +3924,20 @@ else } else { - RETVAL_NEG_HUGE_VALL; + RETVAL_NEG_HUGE_VALL; NOT_MATHERRL {ERRNO_DOMAIN;} } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case yn_zero: /* yn(0) */ { DOMAIND; NAMED = (char *) "yn"; - ifSVID + ifSVID { RETVAL_NEG_HUGED; - NOT_MATHERRD + NOT_MATHERRD { WRITED_YN_ZERO; ERRNO_DOMAIN; @@ -3905,20 +3945,20 @@ else } else { - RETVAL_NEG_HUGE_VALD; + RETVAL_NEG_HUGE_VALD; NOT_MATHERRD {ERRNO_DOMAIN;} } - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case ynf_zero: /* ynf(0) */ { DOMAINF; NAMEF = (char *) "ynf"; - ifSVID + ifSVID { RETVAL_NEG_HUGEF; - NOT_MATHERRF + NOT_MATHERRF { WRITEF_YN_ZERO; ERRNO_DOMAIN; @@ -3926,20 +3966,20 @@ else } else { - RETVAL_NEG_HUGE_VALF; + RETVAL_NEG_HUGE_VALF; NOT_MATHERRF {ERRNO_DOMAIN;} } - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; break; } case y0l_negative: /* y0l(x<0) */ { DOMAINL; NAMEL = (char *) "y0l"; - ifSVID + ifSVID { RETVAL_NEG_HUGEL; - NOT_MATHERRL + NOT_MATHERRL { WRITEL_Y0_NEGATIVE; ERRNO_DOMAIN; @@ -3947,20 +3987,20 @@ else } else { - RETVAL_NEG_HUGE_VALL; + RETVAL_NEG_HUGE_VALL; NOT_MATHERRL {ERRNO_DOMAIN;} } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case y0_negative: /* y0(x<0) */ { DOMAIND; NAMED = (char *) "y0"; - ifSVID + ifSVID { RETVAL_NEG_HUGED; - NOT_MATHERRD + NOT_MATHERRD { WRITED_Y0_NEGATIVE; ERRNO_DOMAIN; @@ -3968,20 +4008,20 @@ else } else { - RETVAL_NEG_HUGE_VALD; + RETVAL_NEG_HUGE_VALD; NOT_MATHERRD {ERRNO_DOMAIN;} } - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case y0f_negative: /* y0f(x<0) */ { DOMAINF; NAMEF = (char *) "y0f"; - ifSVID + ifSVID { RETVAL_NEG_HUGEF; - NOT_MATHERRF + NOT_MATHERRF { WRITEF_Y0_NEGATIVE; ERRNO_DOMAIN; @@ -3989,20 +4029,20 @@ else } else { - RETVAL_NEG_HUGE_VALF; + RETVAL_NEG_HUGE_VALF; NOT_MATHERRF {ERRNO_DOMAIN;} } - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; break; } case y1l_negative: /* y1l(x<0) */ { DOMAINL; NAMEL = (char *) "y1l"; - ifSVID + ifSVID { RETVAL_NEG_HUGEL; - NOT_MATHERRL + NOT_MATHERRL { WRITEL_Y1_NEGATIVE; ERRNO_DOMAIN; @@ -4010,41 +4050,41 @@ else } else { - RETVAL_NEG_HUGE_VALL; + RETVAL_NEG_HUGE_VALL; NOT_MATHERRL {ERRNO_DOMAIN;} } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case y1_negative: /* y1(x<0) */ { DOMAIND; NAMED = (char *) "y1"; - ifSVID + ifSVID { RETVAL_NEG_HUGED; - NOT_MATHERRD + NOT_MATHERRD { - WRITED_Y1_NEGATIUE; + WRITED_Y1_NEGATIVE; ERRNO_DOMAIN; } } else { - RETVAL_NEG_HUGE_VALD; + RETVAL_NEG_HUGE_VALD; NOT_MATHERRD {ERRNO_DOMAIN;} } - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case y1f_negative: /* y1f(x<0) */ { DOMAINF; NAMEF = (char *) "y1f"; - ifSVID + ifSVID { RETVAL_NEG_HUGEF; - NOT_MATHERRF + NOT_MATHERRF { WRITEF_Y1_NEGATIVE; ERRNO_DOMAIN; @@ -4052,20 +4092,20 @@ else } else { - RETVAL_NEG_HUGE_VALF; + RETVAL_NEG_HUGE_VALF; NOT_MATHERRF {ERRNO_DOMAIN;} } - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; break; } case ynl_negative: /* ynl(x<0) */ { DOMAINL; NAMEL = (char *) "ynl"; - ifSVID + ifSVID { RETVAL_NEG_HUGEL; - NOT_MATHERRL + NOT_MATHERRL { WRITEL_YN_NEGATIVE; ERRNO_DOMAIN; @@ -4073,20 +4113,20 @@ else } else { - RETVAL_NEG_HUGE_VALL; + RETVAL_NEG_HUGE_VALL; NOT_MATHERRL {ERRNO_DOMAIN;} } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } case yn_negative: /* yn(x<0) */ { DOMAIND; NAMED = (char *) "yn"; - ifSVID + ifSVID { RETVAL_NEG_HUGED; - NOT_MATHERRD + NOT_MATHERRD { WRITED_YN_NEGATIVE; ERRNO_DOMAIN; @@ -4094,20 +4134,20 @@ else } else { - RETVAL_NEG_HUGE_VALD; + RETVAL_NEG_HUGE_VALD; NOT_MATHERRD {ERRNO_DOMAIN;} } - *(double *)retval = exc.retval; + *(double *)retval = exc.retval; break; } case ynf_negative: /* ynf(x<0) */ { DOMAINF; NAMEF = (char *) "ynf"; - ifSVID + ifSVID { RETVAL_NEG_HUGEF; - NOT_MATHERRF + NOT_MATHERRF { WRITEF_YN_NEGATIVE; ERRNO_DOMAIN; @@ -4115,18 +4155,18 @@ else } else { - RETVAL_NEG_HUGE_VALF; + RETVAL_NEG_HUGE_VALF; NOT_MATHERRF {ERRNO_DOMAIN;} } - *(float *)retval = excf.retval; + *(float *)retval = excf.retval; break; } - case fmodl_by_zero: + case fmodl_by_zero: /* fmodl(x,0) */ { DOMAINL; NAMEL = (char *) "fmodl"; - ifSVID - { + ifSVID + { *(long double *)retval = *(long double *)arg1; NOT_MATHERRL { @@ -4134,21 +4174,21 @@ else ERRNO_DOMAIN; } } - else + else { /* NaN already computed */ NOT_MATHERRL {ERRNO_DOMAIN;} } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } - case fmod_by_zero: + case fmod_by_zero: /* fmod(x,0) */ { DOMAIND; NAMED = (char *) "fmod"; - ifSVID + ifSVID { *(double *)retval = *(double *)arg1; - NOT_MATHERRD + NOT_MATHERRD { WRITED_FMOD; ERRNO_DOMAIN; @@ -4157,18 +4197,18 @@ else else { /* NaN already computed */ NOT_MATHERRD {ERRNO_DOMAIN;} - } - *(double *)retval = exc.retval; + } + *(double *)retval = exc.retval; break; } - case fmodf_by_zero: + case fmodf_by_zero: /* fmodf(x,0) */ { DOMAINF; NAMEF = (char *) "fmodf"; - ifSVID + ifSVID { *(float *)retval = *(float *)arg1; - NOT_MATHERRF + NOT_MATHERRF { WRITEF_FMOD; ERRNO_DOMAIN; @@ -4177,36 +4217,36 @@ else else { NOT_MATHERRF {ERRNO_DOMAIN;} - } - *(float *)retval = excf.retval; + } + *(float *)retval = excf.retval; break; } - case remainderl_by_zero: + case remainderl_by_zero: /* remainderl(x,0) */ { DOMAINL; NAMEL = (char *) "remainderl"; - ifSVID - { + ifSVID + { NOT_MATHERRL { WRITEL_REM; ERRNO_DOMAIN; } } - else + else { /* NaN already computed */ NOT_MATHERRL {ERRNO_DOMAIN;} } - *(long double *)retval = excl.retval; + *(long double *)retval = excl.retval; break; } - case remainder_by_zero: + case remainder_by_zero: /* remainder(x,0) */ { DOMAIND; NAMED = (char *) "remainder"; - ifSVID + ifSVID { - NOT_MATHERRD + NOT_MATHERRD { WRITED_REM; ERRNO_DOMAIN; @@ -4215,17 +4255,17 @@ else else { /* NaN already computed */ NOT_MATHERRD {ERRNO_DOMAIN;} - } - *(double *)retval = exc.retval; + } + *(double *)retval = exc.retval; break; } - case remainderf_by_zero: + case remainderf_by_zero: /* remainderf(x,0) */ { DOMAINF; NAMEF = (char *) "remainderf"; - ifSVID + ifSVID { - NOT_MATHERRF + NOT_MATHERRF { WRITEF_REM; ERRNO_DOMAIN; @@ -4234,8 +4274,8 @@ else else { NOT_MATHERRF {ERRNO_DOMAIN;} - } - *(float *)retval = excf.retval; + } + *(float *)retval = excf.retval; break; } default: diff --git a/sysdeps/ia64/fpu/libm_lgamma.S b/sysdeps/ia64/fpu/libm_lgamma.S index 5c13fc3..0df1e4b 100644 --- a/sysdeps/ia64/fpu/libm_lgamma.S +++ b/sysdeps/ia64/fpu/libm_lgamma.S @@ -47,6 +47,7 @@ // 09/15/02 Fixed bug on the branch lgamma_negrecursion // 10/21/02 Now it returns SIGN(GAMMA(x))=-1 for negative zero // 02/10/03 Reordered header: .section, .global, .proc, .align +// 07/22/03 Reformatted some data tables // //********************************************************************* // @@ -951,19 +952,32 @@ data8 0xD28D3312983E98A0,0xBFFF //S2 // data8 0x8090F777D7942F73,0x4001 // PR01 data8 0xE5B521193CF61E63,0x4000 // PR11 -data8 0xC02C000000001939,0x0000000000000233 // (-15;-14) -data8 0xC02A000000016124,0x0000000000002BFB // (-14;-13) -data8 0xC02800000011EED9,0x0000000000025CBB // (-13;-12) -data8 0xC026000000D7322A,0x00000000001E1095 // (-12;-11) -data8 0xC0240000093F2777,0x00000000013DD3DC // (-11;-10) -data8 0xC02200005C7768FB,0x000000000C9539B9 // (-10;-9) -data8 0xC02000034028B3F9,0x000000007570C565 // (-9;-8) -data8 0xC01C0033FDEDFE1F,0x00000007357E670E // (-8;-7) -data8 0xC018016B25897C8D,0x000000346DC5D639 // (-7;-6) -data8 0xC014086A57F0B6D9,0x0000010624DD2F1B // (-6;-5) -data8 0xC010284E78599581,0x0000051EB851EB85 // (-5;-4) -data8 0xC009260DBC9E59AF,0x000028F5C28F5C29 // (-4;-3) -data8 0xC003A7FC9600F86C,0x0000666666666666 // (-3;-2) +data8 0xC02C000000001939 // (-15;-14) +data8 0x0000000000000233 // (-15;-14) +data8 0xC02A000000016124 // (-14;-13) +data8 0x0000000000002BFB // (-14;-13) +data8 0xC02800000011EED9 // (-13;-12) +data8 0x0000000000025CBB // (-13;-12) +data8 0xC026000000D7322A // (-12;-11) +data8 0x00000000001E1095 // (-12;-11) +data8 0xC0240000093F2777 // (-11;-10) +data8 0x00000000013DD3DC // (-11;-10) +data8 0xC02200005C7768FB // (-10;-9) +data8 0x000000000C9539B9 // (-10;-9) +data8 0xC02000034028B3F9 // (-9;-8) +data8 0x000000007570C565 // (-9;-8) +data8 0xC01C0033FDEDFE1F // (-8;-7) +data8 0x00000007357E670E // (-8;-7) +data8 0xC018016B25897C8D // (-7;-6) +data8 0x000000346DC5D639 // (-7;-6) +data8 0xC014086A57F0B6D9 // (-6;-5) +data8 0x0000010624DD2F1B // (-6;-5) +data8 0xC010284E78599581 // (-5;-4) +data8 0x0000051EB851EB85 // (-5;-4) +data8 0xC009260DBC9E59AF // (-4;-3) +data8 0x000028F5C28F5C29 // (-4;-3) +data8 0xC003A7FC9600F86C // (-3;-2) +data8 0x0000666666666666 // (-3;-2) data8 0xCC15879606130890,0x4000 // PR21 data8 0xB42FE3281465E1CC,0x4000 // PR31 // @@ -971,19 +985,32 @@ data8 0x828185F0B95C9916,0x4001 // PR00 // data8 0xD4D3C819E4E5654B,0x4000 // PR10 data8 0xA82FBBA4FCC75298,0x4000 // PR20 -data8 0xC02DFFFFFFFFFE52,0x000000000000001C // (-15;-14) -data8 0xC02BFFFFFFFFE6C7,0x00000000000001A6 // (-14;-13) -data8 0xC029FFFFFFFE9EDC,0x0000000000002BFB // (-13;-12) -data8 0xC027FFFFFFEE1127,0x000000000001EEC8 // (-12;-11) -data8 0xC025FFFFFF28CDD4,0x00000000001E1095 // (-11;-10) -data8 0xC023FFFFF6C0D7C0,0x000000000101B2B3 // (-10;-9) -data8 0xC021FFFFA3884BD0,0x000000000D6BF94D // (-9;-8) -data8 0xC01FFFF97F8159CF,0x00000000C9539B89 // (-8;-7) -data8 0xC01BFFCBF76B86F0,0x00000007357E670E // (-7;-6) -data8 0xC017FE92F591F40D,0x000000346DC5D639 // (-6;-5) -data8 0xC013F7577A6EEAFD,0x00000147AE147AE1 // (-5;-4) -data8 0xC00FA471547C2FE5,0x00000C49BA5E353F // (-4;-3) -data8 0xC005FB410A1BD901,0x000053F7CED91687 // (-3;-2) +data8 0xC02DFFFFFFFFFE52 // (-15;-14) +data8 0x000000000000001C // (-15;-14) +data8 0xC02BFFFFFFFFE6C7 // (-14;-13) +data8 0x00000000000001A6 // (-14;-13) +data8 0xC029FFFFFFFE9EDC // (-13;-12) +data8 0x0000000000002BFB // (-13;-12) +data8 0xC027FFFFFFEE1127 // (-12;-11) +data8 0x000000000001EEC8 // (-12;-11) +data8 0xC025FFFFFF28CDD4 // (-11;-10) +data8 0x00000000001E1095 // (-11;-10) +data8 0xC023FFFFF6C0D7C0 // (-10;-9) +data8 0x000000000101B2B3 // (-10;-9) +data8 0xC021FFFFA3884BD0 // (-9;-8) +data8 0x000000000D6BF94D // (-9;-8) +data8 0xC01FFFF97F8159CF // (-8;-7) +data8 0x00000000C9539B89 // (-8;-7) +data8 0xC01BFFCBF76B86F0 // (-7;-6) +data8 0x00000007357E670E // (-7;-6) +data8 0xC017FE92F591F40D // (-6;-5) +data8 0x000000346DC5D639 // (-6;-5) +data8 0xC013F7577A6EEAFD // (-5;-4) +data8 0x00000147AE147AE1 // (-5;-4) +data8 0xC00FA471547C2FE5 // (-4;-3) +data8 0x00000C49BA5E353F // (-4;-3) +data8 0xC005FB410A1BD901 // (-3;-2) +data8 0x000053F7CED91687 // (-3;-2) data8 0x80151BB918A293AA,0x4000 // PR30 data8 0xB3C9F8F47422A314,0x400B // PRN // @@ -3538,6 +3565,7 @@ lgamma_libm_err: };; GLOBAL_LIBM_END(__libm_lgamma) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/libm_lgammaf.S b/sysdeps/ia64/fpu/libm_lgammaf.S index 83cffd6..04dcd63 100644 --- a/sysdeps/ia64/fpu/libm_lgammaf.S +++ b/sysdeps/ia64/fpu/libm_lgammaf.S @@ -47,6 +47,7 @@ // 09/16/02 Improved accuracy on intervals reduced to [1;1.25] // 10/21/02 Now it returns SIGN(GAMMA(x))=-1 for negative zero // 02/10/03 Reordered header: .section, .global, .proc, .align +// 07/22/03 Reformatted some data tables // //********************************************************************* // @@ -685,19 +686,26 @@ data8 0x3FF1029A9DD542B4,0xBFFAD37C209D3B25 // A6,A5 data8 0x405385E6FD9BE7EA // A0 data8 0x478895F1C0000000 // Overflow boundary data8 0x400062D97D26B523,0xC00A03E1529FF023 // A6,A5 -data8 0x4069204C51E566CE,0 // A0 +data8 0x4069204C51E566CE // A0 +data8 0x0000000000000000 // pad data8 0x40101476B38FD501,0xC0199DE7B387C0FC // A6,A5 -data8 0x407EB8DAEC83D759,0 // A0 +data8 0x407EB8DAEC83D759 // A0 +data8 0x0000000000000000 // pad data8 0x401FDB008D65125A,0xC0296B506E665581 // A6,A5 -data8 0x409226D93107EF66,0 // A0 +data8 0x409226D93107EF66 // A0 +data8 0x0000000000000000 // pad data8 0x402FB3EAAF3E7B2D,0xC039521142AD8E0D // A6,A5 -data8 0x40A4EFA4F072792E,0 // A0 +data8 0x40A4EFA4F072792E // A0 +data8 0x0000000000000000 // pad data8 0x403FA024C66B2563,0xC0494569F250E691 // A6,A5 -data8 0x40B7B747C9235BB8,0 // A0 +data8 0x40B7B747C9235BB8 // A0 +data8 0x0000000000000000 // pad data8 0x404F9607D6DA512C,0xC0593F0B2EDDB4BC // A6,A5 -data8 0x40CA7E29C5F16DE2,0 // A0 +data8 0x40CA7E29C5F16DE2 // A0 +data8 0x0000000000000000 // pad data8 0x405F90C5F613D98D,0xC0693BD130E50AAF // A6,A5 -data8 0x40DD4495238B190C,0 // A0 +data8 0x40DD4495238B190C // A0 +data8 0x0000000000000000 // pad // // polynomial approximation of ln(sin(Pi*x)/(Pi*x)), |x| <= 0.5 data8 0xBFD58731A486E820,0xBFA4452CC28E15A9 // S16,S14 @@ -2133,6 +2141,7 @@ lgammaf_libm_err: };; GLOBAL_LIBM_END(__libm_lgammaf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/libm_lgammal.S b/sysdeps/ia64/fpu/libm_lgammal.S index 056171b..844c517 100644 --- a/sysdeps/ia64/fpu/libm_lgammal.S +++ b/sysdeps/ia64/fpu/libm_lgammal.S @@ -7622,6 +7622,7 @@ lgammal_singularity: GLOBAL_LIBM_END(__libm_lgammal) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/libm_scalblnf.S b/sysdeps/ia64/fpu/libm_scalblnf.S index 362e68b..af620d4 100644 --- a/sysdeps/ia64/fpu/libm_scalblnf.S +++ b/sysdeps/ia64/fpu/libm_scalblnf.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -44,38 +44,51 @@ // 02/06/02 Corrected to handle 32- or 64-bit integers // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 08/25/03 Improved performance // // API //============================================================== -// float = __libm_scalblnf (float x, long int n, int long_int_type) +// float __libm_scalblnf (float x, long int n, int long_int_type) // input floating point f8 and long int n (r33) // input long_int_type = 0 if long int defined as 32 bits, = 1 if 64 bits -// // output floating point f8 // - // Returns x* 2**n using an fma and detects overflow -// and underflow. +// and underflow. // // +// Strategy: +// Compute biased exponent of result exp_Result = N + exp_X +// Break into ranges: +// exp_Result > 0x1007e -> Certain overflow +// exp_Result = 0x1007e -> Possible overflow +// 0x0ff81 <= exp_Result < 0x1007e -> No over/underflow (main path) +// 0x0ff81 - 23 <= exp_Result < 0x0ff81 -> Possible underflow +// exp_Result < 0x0ff81 - 23 -> Certain underflow FR_Big = f6 FR_NBig = f7 FR_Floating_X = f8 FR_Result = f8 FR_Result2 = f9 -FR_Result3 = f11 -FR_Norm_X = f12 -FR_Two_N = f14 -FR_Two_to_Big = f15 +FR_Result3 = f10 +FR_Norm_X = f11 +FR_Two_N = f12 +GR_neg_ov_limit= r14 GR_N_Biased = r15 GR_Big = r16 GR_NBig = r17 -GR_Scratch = r18 -GR_Scratch1 = r19 +GR_exp_Result = r18 +GR_pos_ov_limit= r19 GR_Bias = r20 GR_N_as_int = r21 +GR_signexp_X = r22 +GR_exp_X = r23 +GR_exp_mask = r24 +GR_max_exp = r25 +GR_min_exp = r26 +GR_min_den_exp = r27 GR_SAVE_B0 = r32 GR_SAVE_GP = r33 @@ -93,243 +106,298 @@ GLOBAL_LIBM_ENTRY(__libm_scalblnf) // Build the exponent Bias // { .mfi - alloc r32=ar.pfs,3,0,4,0 - fclass.m.unc p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero - addl GR_Bias = 0x0FFFF,r0 + getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x + fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero + mov GR_Bias = 0x0ffff } - - // -// Is N zero? // Normalize x -// Do we need to sign extend input (long_int_type = 0)? +// Is long integer type 32 bits? // { .mfi - cmp.eq.unc p6,p0 = r33,r0 - fnorm.s1 FR_Norm_X = FR_Floating_X - cmp.eq.unc p8,p9 = r34,r0 + mov GR_Big = 35000 // If N this big then certain overflow + fnorm.s1 FR_Norm_X = FR_Floating_X + cmp.eq p8,p9 = r34,r0 } ;; -{ .mii -(p9) mov GR_N_as_int = r33 // Get n directly if long int 64 bits -(p8) sxt4 GR_N_as_int = r33 // Sign extend n if long int 32 bits - nop.i 0 +// Sign extend N if long int is 32 bits +{ .mfi +(p9) mov GR_N_as_int = r33 // Copy N if long int is 64 bits + fclass.m p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm +(p8) sxt4 GR_N_as_int = r33 // Sign extend N if long int is 32 bits +} +{ .mfi + mov GR_NBig = -35000 // If N this small then certain underflow + nop.f 0 + mov GR_max_exp = 0x1007e // Exponent of maximum float +} +;; + +// Create biased exponent for 2**N +{ .mfi + add GR_N_Biased = GR_Bias,GR_N_as_int + nop.f 0 + cmp.ge p7, p0 = GR_N_as_int, GR_Big // Certain overflow? +} +{ .mib + cmp.le p8, p0 = GR_N_as_int, GR_NBig // Certain underflow? + mov GR_min_exp = 0x0ff81 // Exponent of minimum float +(p9) br.cond.spnt SCALBNF_UNORM // Branch if x=unorm +} +;; + +SCALBNF_COMMON: +// Main path continues. Also return here from x=unorm path. +// Create 2**N +.pred.rel "mutex",p7,p8 +{ .mfi + setf.exp FR_Two_N = GR_N_Biased + nop.f 0 +(p7) mov GR_N_as_int = GR_Big // Limit max N +} +{ .mfi +(p8) mov GR_N_as_int = GR_NBig // Limit min N + nop.f 0 +(p8) cmp.eq p7,p0 = r0,r0 // Set p7 if |N| big } ;; // -// Normalize x -// Branch and return special values. -// Create -35000 -// Create 35000 +// Create biased exponent for 2**N for N big +// Is N zero? // { .mfi - addl GR_Big = 35000,r0 +(p7) add GR_N_Biased = GR_Bias,GR_N_as_int nop.f 0 - add GR_N_Biased = GR_Bias,GR_N_as_int + cmp.eq.or p6,p0 = r33,r0 } -{ .mfb - addl GR_NBig = -35000,r0 -(p7) fma.s.s0 FR_Result = FR_Floating_X,f1, f0 -(p7) br.ret.spnt b0 -};; +{ .mfi + mov GR_pos_ov_limit = 0x1007f // Exponent for positive overflow + nop.f 0 + mov GR_exp_mask = 0x1ffff // Exponent mask +} +;; // -// Build the exponent Bias -// Return x when N = 0 +// Create 2**N for N big +// Return x when N = 0 or X = Nan, Inf, Zero // { .mfi - setf.exp FR_Two_N = GR_N_Biased +(p7) setf.exp FR_Two_N = GR_N_Biased nop.f 0 - addl GR_Scratch1 = 0x063BF,r0 + mov GR_min_den_exp = 0x0ff81 - 23 // Exponent of min denorm float } { .mfb - addl GR_Scratch = 0x019C3F,r0 -(p6) fma.s.s0 FR_Result = FR_Floating_X,f1, f0 -(p6) br.ret.spnt b0 -};; + and GR_exp_X = GR_exp_mask, GR_signexp_X +(p6) fma.s.s0 FR_Result = FR_Floating_X, f1, f0 +(p6) br.ret.spnt b0 +} +;; // -// Create 2*big -// Create 2**-big -// Is N > 35000 -// Is N < -35000 // Raise Denormal operand flag with compare -// Main path, create 2**N +// Compute biased result exponent // { .mfi - setf.exp FR_NBig = GR_Scratch1 - nop.f 0 - cmp.ge.unc p6, p0 = GR_N_as_int, GR_Big -} -{ .mfi - setf.exp FR_Big = GR_Scratch + add GR_exp_Result = GR_exp_X, GR_N_as_int fcmp.ge.s0 p0,p11 = FR_Floating_X,f0 - cmp.le.unc p8, p0 = GR_N_as_int, GR_NBig -};; + mov GR_neg_ov_limit = 0x3007f // Exponent for negative overflow +} +;; // -// Adjust 2**N if N was very small or very large +// Do final operation // { .mfi - nop.m 0 -(p6) fma.s1 FR_Two_N = FR_Big,f1,f0 - nop.i 0 + cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow + fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 + cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow } -{ .mlx - nop.m 999 - movl GR_Scratch = 0x000000000003007F -};; +{ .mfb + nop.m 0 + nop.f 0 +(p9) br.cond.spnt SCALBNF_UNDERFLOW // Branch if certain underflow +} +;; +{ .mib +(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow +(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow +(p7) br.ret.sptk b0 // Return from main path +} +;; -{ .mfi - nop.m 0 -(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0 - nop.i 0 +{ .bbb +(p6) br.cond.spnt SCALBNF_OVERFLOW // Branch if certain overflow +(p8) br.cond.spnt SCALBNF_POSSIBLE_OVERFLOW // Branch if possible overflow +(p9) br.cond.spnt SCALBNF_POSSIBLE_UNDERFLOW // Branch if possible underflow } -{ .mlx - nop.m 999 - movl GR_Scratch1= 0x000000000001007F -};; +;; + +// Here if possible underflow. +// Resulting exponent: 0x0ff81-23 <= exp_Result < 0x0ff81 +SCALBNF_POSSIBLE_UNDERFLOW: +// +// Here if possible overflow. +// Resulting exponent: 0x1007e = exp_Result +SCALBNF_POSSIBLE_OVERFLOW: -// Set up necessary status fields +// Set up necessary status fields // // S0 user supplied status // S2 user supplied status + WRE + TD (Overflows) // S3 user supplied status + FZ + TD (Underflows) // { .mfi - nop.m 999 + nop.m 0 fsetc.s3 0x7F,0x41 - nop.i 999 + nop.i 0 } { .mfi - nop.m 999 + nop.m 0 fsetc.s2 0x7F,0x42 - nop.i 999 -};; + nop.i 0 +} +;; // -// Do final operation +// Do final operation with s2 and s3 // { .mfi - setf.exp FR_NBig = GR_Scratch - fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 - nop.i 999 + setf.exp FR_NBig = GR_neg_ov_limit + fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 } { .mfi - nop.m 999 - fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; -{ .mfi - setf.exp FR_Big = GR_Scratch1 - fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; + setf.exp FR_Big = GR_pos_ov_limit + fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 +} +;; // Check for overflow or underflow. // Restore s3 // Restore s2 // { .mfi - nop.m 0 + nop.m 0 fsetc.s3 0x7F,0x40 - nop.i 999 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fsetc.s2 0x7F,0x40 - nop.i 999 -};; + nop.i 0 +} +;; // // Is the result zero? // { .mfi - nop.m 999 - fclass.m.unc p6, p0 = FR_Result3, 0x007 - nop.i 999 -} + nop.m 0 + fclass.m p6, p0 = FR_Result3, 0x007 + nop.i 0 +} { .mfi - addl GR_Tag = 205, r0 - fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big - nop.i 0 -};; + nop.m 0 + fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big + nop.i 0 +} +;; // // Detect masked underflow - Tiny + Inexact Only // { .mfi - nop.m 999 + nop.m 0 (p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2 - nop.i 999 -};; + nop.i 0 +} +;; // // Is result bigger the allowed range? // Branch out for underflow // { .mfb -(p6) addl GR_Tag = 206, r0 + nop.m 0 (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig -(p6) br.cond.spnt scalbnf_UNDERFLOW -};; +(p6) br.cond.spnt SCALBNF_UNDERFLOW +} +;; // // Branch out for overflow // -{ .mbb - nop.m 0 -(p7) br.cond.spnt scalbnf_OVERFLOW -(p9) br.cond.spnt scalbnf_OVERFLOW -};; +{ .bbb +(p7) br.cond.spnt SCALBNF_OVERFLOW +(p9) br.cond.spnt SCALBNF_OVERFLOW + br.ret.sptk b0 // Return from main path. +} +;; -// -// Return from main path. -// -{ .mfb - nop.m 999 - nop.f 0 - br.ret.sptk b0;; +// Here if result overflows +SCALBNF_OVERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 205, r0 // Set error tag for overflow + br.cond.sptk __libm_error_region // Call error support for overflow } +;; -GLOBAL_LIBM_END(__libm_scalblnf) -__libm_error_region: +// Here if result underflows +SCALBNF_UNDERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 206, r0 // Set error tag for underflow + br.cond.sptk __libm_error_region // Call error support for underflow +} +;; + +// Here if x=unorm +SCALBNF_UNORM: +{ .mib + getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x + nop.i 0 + br.cond.sptk SCALBNF_COMMON // Return to main path +} +;; -scalbnf_OVERFLOW: -scalbnf_UNDERFLOW: + +GLOBAL_LIBM_END(__libm_scalblnf) +LOCAL_LIBM_ENTRY(__libm_error_region) // // Get stack address of N // .prologue { .mfi - add GR_Parameter_Y=-32,sp + add GR_Parameter_Y=-32,sp nop.f 0 .save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs + mov GR_SAVE_PFS=ar.pfs } // -// Adjust sp +// Adjust sp // { .mfi .fframe 64 - add sp=-64,sp + add sp=-64,sp nop.f 0 - mov GR_SAVE_GP=gp + mov GR_SAVE_GP=gp };; // -// Store N on stack in correct position +// Store N on stack in correct position // Locate the address of x on stack // { .mmi - st8 [GR_Parameter_Y] = GR_N_as_int,16 - add GR_Parameter_X = 16,sp + st8 [GR_Parameter_Y] = GR_N_as_int,16 + add GR_Parameter_X = 16,sp .save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 + mov GR_SAVE_B0=b0 };; // @@ -338,42 +406,42 @@ scalbnf_UNDERFLOW: // .body { .mib - stfs [GR_Parameter_X] = FR_Norm_X - add GR_Parameter_RESULT = 0,GR_Parameter_Y + stfs [GR_Parameter_X] = FR_Norm_X + add GR_Parameter_RESULT = 0,GR_Parameter_Y nop.b 0 } { .mib - stfs [GR_Parameter_Y] = FR_Result + stfs [GR_Parameter_Y] = FR_Result add GR_Parameter_Y = -16,GR_Parameter_Y - br.call.sptk b0=__libm_error_support# + br.call.sptk b0=__libm_error_support# };; // // Get location of result on stack // { .mmi + add GR_Parameter_RESULT = 48,sp nop.m 0 - nop.m 0 - add GR_Parameter_RESULT = 48,sp + nop.i 0 };; // -// Get the new result +// Get the new result // { .mmi - ldfs FR_Result = [GR_Parameter_RESULT] + ldfs FR_Result = [GR_Parameter_RESULT] .restore sp - add sp = 64,sp - mov b0 = GR_SAVE_B0 + add sp = 64,sp + mov b0 = GR_SAVE_B0 };; // // Restore gp, ar.pfs and return // { .mib - mov gp = GR_SAVE_GP - mov ar.pfs = GR_SAVE_PFS - br.ret.sptk b0 + mov gp = GR_SAVE_GP + mov ar.pfs = GR_SAVE_PFS + br.ret.sptk b0 };; LOCAL_LIBM_END(__libm_error_region) diff --git a/sysdeps/ia64/fpu/libm_sincos.S b/sysdeps/ia64/fpu/libm_sincos.S index a3f4c72..3475b62 100644 --- a/sysdeps/ia64/fpu/libm_sincos.S +++ b/sysdeps/ia64/fpu/libm_sincos.S @@ -46,12 +46,13 @@ // 03/19/02 Added stack unwind around call to __libm_cis_large // 09/05/02 Work range is widened by reduction strengthen (3 parts of Pi/16) // 02/10/03 Reordered header: .section, .global, .proc, .align -// +// 08/08/03 Improved performance +// 02/11/04 cis is moved to the separate file. +// // API //============================================================== -// 1) double _Complex cis(double) -// 2) void sincos(double, double*s, double*c) -// 3) __libm_sincos - internal LIBM function, that accepts +// 1) void sincos(double, double*s, double*c) +// 2) __libm_sincos - internal LIBM function, that accepts // argument in f8 and returns cosine through f8, sine through f9 // // Overview of operation @@ -65,12 +66,12 @@ // nfloat = Round result to integer (round-to-nearest) // // r = x - nfloat * pi/2^k -// Do this as ((((x - nfloat * HIGH(pi/2^k))) - -// nfloat * LOW(pi/2^k)) - +// Do this as ((((x - nfloat * HIGH(pi/2^k))) - +// nfloat * LOW(pi/2^k)) - // nfloat * LOWEST(pi/2^k) for increased accuracy. // pi/2^k is stored as two numbers that when added make pi/2^k. // pi/2^k = HIGH(pi/2^k) + LOW(pi/2^k) -// HIGH and LOW parts are rounded to zero values, +// HIGH and LOW parts are rounded to zero values, // and LOWEST is rounded to nearest one. // // x = (nfloat * pi/2^k) + r @@ -166,15 +167,14 @@ // Registers used //============================================================== // general input registers: -// r14 -> r19 -// r32 -> r49 +// r14 -> r39 // predicate registers used: // p6 -> p14 - +// // floating-point registers used // f9 -> f15 -// f32 -> f100 +// f32 -> f67 // Assembly macros //============================================================== @@ -246,38 +246,32 @@ cis_Q = f67 cis_pResSin = r33 cis_pResCos = r34 -cis_exp_limit = r35 -cis_r_signexp = r36 -cis_AD_beta_table = r37 -cis_r_sincos = r38 - -cis_r_exp = r39 -cis_r_17_ones = r40 - cis_GR_sig_inv_pi_by_16 = r14 cis_GR_rshf_2to61 = r15 cis_GR_rshf = r16 cis_GR_exp_2tom61 = r17 cis_GR_n = r18 - cis_GR_n_sin = r19 -cis_GR_m_sin = r41 -cis_GR_32m_sin = r41 - -cis_GR_n_cos = r42 -cis_GR_m_cos = r43 -cis_GR_32m_cos = r43 - -cis_AD_2_sin = r44 -cis_AD_2_cos = r45 - -cis_gr_tmp = r46 -GR_SAVE_B0 = r47 -GR_SAVE_GP = r48 -rB0_SAVED = r49 -GR_SAVE_PFS = r50 -GR_SAVE_PR = r51 -cis_AD_1 = r52 +cis_exp_limit = r20 +cis_r_signexp = r21 +cis_AD_1 = r22 +cis_r_sincos = r23 +cis_r_exp = r24 +cis_r_17_ones = r25 +cis_GR_m_sin = r26 +cis_GR_32m_sin = r26 +cis_GR_n_cos = r27 +cis_GR_m_cos = r28 +cis_GR_32m_cos = r28 +cis_AD_2_sin = r29 +cis_AD_2_cos = r30 +cis_gr_tmp = r31 + +GR_SAVE_B0 = r35 +GR_SAVE_GP = r36 +rB0_SAVED = r37 +GR_SAVE_PFS = r38 +GR_SAVE_PR = r39 RODATA @@ -408,14 +402,14 @@ LOCAL_OBJECT_END(double_sin_cos_beta_k4) GLOBAL_IEEE754_ENTRY(sincos) // cis_GR_sig_inv_pi_by_16 = significand of 16/pi { .mlx - alloc GR_SAVE_PFS = ar.pfs, 0, 21, 0, 0 + getf.exp cis_r_signexp = cis_Arg movl cis_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A - + } // cis_GR_rshf_2to61 = 1.1000 2^(63+63-2) { .mlx addl cis_AD_1 = @ltoff(double_cis_pi), gp - movl cis_GR_rshf_2to61 = 0x47b8000000000000 + movl cis_GR_rshf_2to61 = 0x47b8000000000000 };; { .mfi @@ -430,12 +424,11 @@ GLOBAL_IEEE754_ENTRY(sincos) br.cond.sptk _CIS_COMMON };; GLOBAL_IEEE754_END(sincos) -LOCAL_LIBM_ENTRY(cis) -LOCAL_LIBM_END(cis) + GLOBAL_LIBM_ENTRY(__libm_sincos) // cis_GR_sig_inv_pi_by_16 = significand of 16/pi { .mlx - alloc GR_SAVE_PFS = ar.pfs,0,21,0,0 + getf.exp cis_r_signexp = cis_Arg movl cis_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A } // cis_GR_rshf_2to61 = 1.1000 2^(63+63-2) @@ -443,11 +436,12 @@ GLOBAL_LIBM_ENTRY(__libm_sincos) addl cis_AD_1 = @ltoff(double_cis_pi), gp movl cis_GR_rshf_2to61 = 0x47b8000000000000 };; + // p14 set for __libm_sincos and cis { .mfi ld8 cis_AD_1 = [cis_AD_1] fnorm.s1 cis_NORM_f8 = cis_Arg - cmp.eq p14, p13 = r0, r0 + cmp.eq p14, p13 = r0, r0 } // cis_GR_exp_2tom61 = exponent of scaling factor 2^-61 { .mib @@ -476,10 +470,15 @@ _CIS_COMMON: // 2^-61 for scaling Nfloat // 0x1001a is register_bias + 27. // So if f8 >= 2^27, go to large arguments routine -{ .mmi - getf.exp cis_r_signexp = cis_Arg - setf.exp cis_2TOM61 = cis_GR_exp_2tom61 +{ .mfi + alloc GR_SAVE_PFS = ar.pfs, 3, 5, 0, 0 + fclass.m p11,p0 = cis_Arg, 0x0b // Test for x=unorm mov cis_exp_limit = 0x1001a +} +{ .mib + setf.exp cis_2TOM61 = cis_GR_exp_2tom61 + nop.i 0 +(p6) br.cond.spnt _CIS_SPECIAL_ARGS };; // Load the two pieces of pi/16 @@ -488,9 +487,11 @@ _CIS_COMMON: { .mmb ldfe cis_Pi_by_16_hi = [cis_AD_1],16 setf.d cis_RSHF = cis_GR_rshf -(p6) br.cond.spnt _CIS_SPECIAL_ARGS +(p11) br.cond.spnt _CIS_UNORM // Branch if x=unorm };; +_CIS_COMMON2: +// Return here if x=unorm // Create constant inexact set { .mmi ldfe cis_Pi_by_16_lo = [cis_AD_1],16 @@ -498,23 +499,18 @@ _CIS_COMMON: nop.i 0 };; +// Select exponent (17 lsb) { .mfi ldfe cis_Pi_by_16_lowest = [cis_AD_1],16 nop.f 0 - nop.i 0 -};; - -// Start loading P, Q coefficients -{ .mib - ldfpd cis_P4,cis_Q4 = [cis_AD_1],16 dep.z cis_r_exp = cis_r_signexp, 0, 17 - nop.b 0 };; +// Start loading P, Q coefficients // p10 is true if we must call routines to handle larger arguments // p10 is true if f8 exp is > 0x1001a { .mmb - ldfpd cis_P3,cis_Q3 = [cis_AD_1],16 + ldfpd cis_P4,cis_Q4 = [cis_AD_1],16 cmp.ge p10, p0 = cis_r_exp, cis_exp_limit (p10) br.cond.spnt _CIS_LARGE_ARGS // go to |x| >= 2^27 path };; @@ -523,39 +519,33 @@ _CIS_COMMON: // Multiply x by scaled 16/pi and add large const to shift integer part of W to // rightmost bits of significand { .mfi - ldfpd cis_P2,cis_Q2 = [cis_AD_1],16 + ldfpd cis_P3,cis_Q3 = [cis_AD_1],16 fma.s1 cis_W_2TO61_RSH = cis_NORM_f8,cis_SIG_INV_PI_BY_16_2TO61,cis_RSHF_2TO61 nop.i 0 };; +// get N = (int)cis_int_Nfloat // cis_NFLOAT = Round_Int_Nearest(cis_W) -{ .mfi - ldfpd cis_P1,cis_Q1 = [cis_AD_1], 16 +{ .mmf + getf.sig cis_GR_n = cis_W_2TO61_RSH + ldfpd cis_P2,cis_Q2 = [cis_AD_1],16 fms.s1 cis_NFLOAT = cis_W_2TO61_RSH,cis_2TOM61,cis_RSHF - nop.i 0 -};; - -// get N = (int)cis_int_Nfloat -{ .mfi - getf.sig cis_GR_n = cis_W_2TO61_RSH - nop.f 0 - nop.i 0 };; -// Add 2^(k-1) (which is in cis_r_sincos) to N // cis_r = -cis_Nfloat * cis_Pi_by_16_hi + x -// cis_r = cis_r -cis_Nfloat * cis_Pi_by_16_lo { .mfi - add cis_GR_n_cos = 0x8, cis_GR_n + ldfpd cis_P1,cis_Q1 = [cis_AD_1], 16 fnma.s1 cis_r = cis_NFLOAT,cis_Pi_by_16_hi,cis_NORM_f8 nop.i 0 };; -//Get M (least k+1 bits of N) +// Add 2^(k-1) (which is in cis_r_sincos) to N { .mmi + add cis_GR_n_cos = 0x8, cis_GR_n +;; +//Get M (least k+1 bits of N) and cis_GR_m_sin = 0x1f,cis_GR_n and cis_GR_m_cos = 0x1f,cis_GR_n_cos - nop.i 0 };; { .mmi @@ -565,9 +555,10 @@ _CIS_COMMON: };; // Add 32*M to address of sin_cos_beta table -{ .mmi +// cis_r = cis_r -cis_Nfloat * cis_Pi_by_16_lo +{ .mfi add cis_AD_2_sin = cis_GR_32m_sin, cis_AD_1 - nop.m 0 + fnma.s1 cis_r = cis_NFLOAT, cis_Pi_by_16_lo, cis_r shl cis_GR_32m_cos = cis_GR_m_cos,5 };; @@ -580,7 +571,6 @@ _CIS_COMMON: { .mfi ldfe cis_Sm_cos = [cis_AD_2_cos], 16 - fnma.s1 cis_r = cis_NFLOAT, cis_Pi_by_16_lo, cis_r nop.i 0 };; @@ -604,7 +594,7 @@ _CIS_COMMON: { .mfi ldfe cis_Cm_cos = [cis_AD_2_cos] - fma.s1 cis_P_temp1 = cis_rsq, cis_P4, cis_P3 + fma.s1 cis_P_temp1 = cis_rsq, cis_P4, cis_P3 nop.i 0 } @@ -638,18 +628,18 @@ _CIS_COMMON: { .mfi nop.m 0 - fma.s1 cis_Q = cis_rsq, cis_Q_temp2, cis_Q1 + fmpy.s1 cis_rcub = cis_r_exact, cis_rsq // get r^3 nop.i 0 -} +};; + { .mfi nop.m 0 - fma.s1 cis_P = cis_rsq, cis_P_temp2, cis_P1 + fma.s1 cis_Q = cis_rsq, cis_Q_temp2, cis_Q1 nop.i 0 -};; - +} { .mfi nop.m 0 - fmpy.s1 cis_rcub = cis_r_exact, cis_rsq // get r^3 + fma.s1 cis_P = cis_rsq, cis_P_temp2, cis_P1 nop.i 0 };; @@ -717,7 +707,17 @@ _CIS_SPECIAL_ARGS: stfd [cis_pResCos] = cis_Cos_res br.ret.sptk b0 // common exit for sincos main path };; + +_CIS_UNORM: +// Here if x=unorm +{ .mfb + getf.exp cis_r_signexp = cis_NORM_f8 // Get signexp of x + fcmp.eq.s0 p11,p0 = cis_Arg, f0 // Dummy op to set denorm + br.cond.sptk _CIS_COMMON2 // Return to main path +};; + GLOBAL_LIBM_END(__libm_sincos) + //// |x| > 2^27 path /////// .proc _CIS_LARGE_ARGS _CIS_LARGE_ARGS: diff --git a/sysdeps/ia64/fpu/libm_sincos_large.S b/sysdeps/ia64/fpu/libm_sincos_large.S index 42cf094..b09d369 100644 --- a/sysdeps/ia64/fpu/libm_sincos_large.S +++ b/sysdeps/ia64/fpu/libm_sincos_large.S @@ -792,6 +792,7 @@ GLOBAL_LIBM_END(__libm_sincos_large) + GLOBAL_LIBM_ENTRY(__libm_sin_large) { .mlx @@ -821,6 +822,7 @@ alloc GR_Table_Base = ar.pfs,0,12,2,0 } GLOBAL_LIBM_END(__libm_sin_large) + GLOBAL_LIBM_ENTRY(__libm_cos_large) { .mlx @@ -2673,6 +2675,7 @@ SINCOS_SPECIAL: } GLOBAL_LIBM_END(__libm_cos_large) + // ******************************************************************* // ******************************************************************* // ******************************************************************* diff --git a/sysdeps/ia64/fpu/libm_sincosf.S b/sysdeps/ia64/fpu/libm_sincosf.S index c4783ac..fb12007 100644 --- a/sysdeps/ia64/fpu/libm_sincosf.S +++ b/sysdeps/ia64/fpu/libm_sincosf.S @@ -47,12 +47,12 @@ // 03/19/02 Added stack unwind around call to __libm_cisf_large // 09/05/02 Work range is widened by reduction strengthen (2 parts of Pi/16) // 02/10/03 Reordered header: .section, .global, .proc, .align +// 02/11/04 cisf is moved to the separate file. // API //============================================================== -// 1) float _Complex cisf(float) -// 2) void sincosf(float, float*s, float*c) -// 3) __libm_sincosf - internal LIBM function, that accepts +// 1) void sincosf(float, float*s, float*c) +// 2) __libm_sincosf - internal LIBM function, that accepts // argument in f8 and returns cosine through f8, sine through f9 // @@ -400,7 +400,7 @@ GLOBAL_IEEE754_ENTRY(sincosf) { .mlx alloc GR_SAVE_PFS = ar.pfs, 0, 21, 0, 0 movl cisf_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // 16/pi signd - + } // cis_GR_rshf_2to61 = 1.1000 2^(63+63-2) { .mlx @@ -420,8 +420,7 @@ GLOBAL_IEEE754_ENTRY(sincosf) br.cond.sptk _CISF_COMMON };; GLOBAL_IEEE754_END(sincosf) -LOCAL_LIBM_ENTRY(cisf) -LOCAL_LIBM_END(cisf) + GLOBAL_LIBM_ENTRY(__libm_sincosf) { .mlx // cisf_GR_sig_inv_pi_by_16 = significand of 16/pi @@ -438,7 +437,7 @@ GLOBAL_LIBM_ENTRY(__libm_sincosf) { .mfi ld8 cisf_AD_1 = [cisf_AD_1] fnorm.s1 cisf_NORM_f8 = cisf_Arg - cmp.eq p14, p13 = r0, r0 + cmp.eq p14, p13 = r0, r0 } // cisf_GR_exp_2tom61 = exponent of scaling factor 2^-61 { .mib @@ -499,7 +498,7 @@ _CISF_COMMON: // p10 is true if f8 exp is >= 0x10017 { .mmb ldfpd cisf_P1,cisf_Q1 = [cisf_AD_1], 16 - cmp.ge p10, p0 = cisf_r_exp, cisf_exp_limit + cmp.ge p10, p0 = cisf_r_exp, cisf_exp_limit (p10) br.cond.spnt _CISF_LARGE_ARGS // go to |x| >= 2^24 path };; @@ -521,7 +520,7 @@ _CISF_COMMON: // N = (int)cisf_int_Nfloat { .mfi - getf.sig cisf_GR_n = cisf_W_2TO61_RSH + getf.sig cisf_GR_n = cisf_W_2TO61_RSH nop.f 0 nop.i 0 };; @@ -537,7 +536,7 @@ _CISF_COMMON: //Get M (least k+1 bits of N) { .mmi - and cisf_GR_m_sin = 0x1f,cisf_GR_n + and cisf_GR_m_sin = 0x1f,cisf_GR_n and cisf_GR_m_cos = 0x1f,cisf_GR_n_cos nop.i 0 };; @@ -552,7 +551,7 @@ _CISF_COMMON: { .mmf ldfpd cisf_Sm_sin, cisf_Cm_sin = [cisf_AD_2_sin] ldfpd cisf_Sm_cos, cisf_Cm_cos = [cisf_AD_2_cos] - fclass.m.unc p10,p0 = cisf_Arg,0x0b + fclass.m.unc p10,p0 = cisf_Arg,0x0b };; { .mfi @@ -679,6 +678,7 @@ _CISF_RETURN: br.ret.sptk b0 // exit for sincos };; GLOBAL_LIBM_END(__libm_sincosf) + //// |x| > 2^24 path /////// .proc _CISF_LARGE_ARGS _CISF_LARGE_ARGS: @@ -728,7 +728,7 @@ _CISF_LARGE_ARGS: { .mfb nop.m 0 fma.s.s0 cisf_Sin_res = cisf_Sin_res, f1, f0 -(p14) br.cond.sptk _CISF_RETURN +(p14) br.cond.sptk _CISF_RETURN };; { .mmb diff --git a/sysdeps/ia64/fpu/libm_sincosl.S b/sysdeps/ia64/fpu/libm_sincosl.S index 2a03a23..1d89ff4 100644 --- a/sysdeps/ia64/fpu/libm_sincosl.S +++ b/sysdeps/ia64/fpu/libm_sincosl.S @@ -1,7 +1,7 @@ -.file "libm_sincosl.asm" +.file "libm_sincosl.s" -// Copyright (c) 2000 - 2003, Intel Corporation +// Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -43,6 +43,9 @@ // 05/13/02 Initial version of sincosl (based on libm's sinl and cosl) // 02/10/03 Reordered header: .section, .global, .proc, .align; // used data8 for long double table values +// 10/13/03 Corrected .file name +// 02/11/04 cisl is moved to the separate file. +// 10/26/04 Avoided using r14-31 as scratch so not clobbered by dynamic loader // //********************************************************************* // @@ -50,9 +53,8 @@ // // API's //============================================================== -// 1) long double _Complex cisl(long double) -// 2) void sincosl(long double, long double*s, long double*c) -// 3) __libm_sincosl - internal LIBM function, that accepts +// 1) void sincosl(long double, long double*s, long double*c) +// 2) __libm_sincosl - internal LIBM function, that accepts // argument in f8 and returns cosine through f8, sine through f9 // // @@ -65,7 +67,7 @@ // f32-f121 // // General Purpose Registers: -// r32-r47 +// r32-r61 // // Predicate Registers: p6-p15 // @@ -775,20 +777,6 @@ FR_Tmp = f94 sincos_pResSin = r34 sincos_pResCos = r35 -GR_sig_inv_pi = r14 -GR_rshf_2to64 = r15 -GR_exp_2tom64 = r16 -GR_rshf = r17 -GR_ad_p = r18 -GR_ad_d = r19 -GR_ad_pp = r20 -GR_ad_qq = r21 -GR_ad_c = r22 -GR_ad_s = r23 -GR_ad_ce = r24 -GR_ad_se = r25 -GR_ad_m14 = r26 -GR_ad_s1 = r27 GR_exp_m2_to_m3= r36 GR_N_Inc = r37 GR_Cis = r38 @@ -803,6 +791,20 @@ GR_N_SignS = r45 GR_N_SignC = r46 GR_N_SinCos = r47 +GR_sig_inv_pi = r48 +GR_rshf_2to64 = r49 +GR_exp_2tom64 = r50 +GR_rshf = r51 +GR_ad_p = r52 +GR_ad_d = r53 +GR_ad_pp = r54 +GR_ad_qq = r55 +GR_ad_c = r56 +GR_ad_s = r57 +GR_ad_ce = r58 +GR_ad_se = r59 +GR_ad_m14 = r60 +GR_ad_s1 = r61 // For unwind support GR_SAVE_B0 = r39 @@ -814,7 +816,7 @@ GR_SAVE_PFS = r41 GLOBAL_IEEE754_ENTRY(sincosl) { .mlx ///////////////////////////// 1 ///////////////// - alloc r32 = ar.pfs,3,13,2,0 + alloc r32 = ar.pfs,3,27,2,0 movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi } { .mlx @@ -834,11 +836,9 @@ GLOBAL_IEEE754_ENTRY(sincosl) };; GLOBAL_IEEE754_END(sincosl) -LOCAL_LIBM_ENTRY(cisl) -LOCAL_LIBM_END(cisl) GLOBAL_LIBM_ENTRY(__libm_sincosl) { .mlx ///////////////////////////// 1 ///////////////// - alloc r32 = ar.pfs,3,14,2,0 + alloc r32 = ar.pfs,3,27,2,0 movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi } { .mlx @@ -2447,6 +2447,7 @@ SINCOSL_SPECIAL: GLOBAL_LIBM_END(__libm_sincosl) + // ******************************************************************* // ******************************************************************* // ******************************************************************* @@ -2461,7 +2462,7 @@ GLOBAL_LIBM_END(__libm_sincosl) // c is in f9 // N is in r8 // Be sure to allocate at least 2 GP registers as output registers for -// __libm_pi_by_2_reduce. This routine uses r49-50. These are used as +// __libm_pi_by_2_reduce. This routine uses r62-63. These are used as // scratch registers within the __libm_pi_by_2_reduce routine (for speed). // // We know also that __libm_pi_by_2_reduce preserves f10-15, f71-127. We diff --git a/sysdeps/ia64/fpu/libm_support.h b/sysdeps/ia64/fpu/libm_support.h index 50dac33..dc9c0a2 100644 --- a/sysdeps/ia64/fpu/libm_support.h +++ b/sysdeps/ia64/fpu/libm_support.h @@ -1,7 +1,8 @@ /* file: libm_support.h */ -// Copyright (c) 2000 - 2002, Intel Corporation +/* +// Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -21,13 +22,14 @@ // products derived from this software without specific prior written // permission. +// // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS @@ -38,17 +40,17 @@ // http://www.intel.com/software/products/opensource/libraries/num.htm. // -// History: 02/02/2000 Initial version +// History: 02/02/2000 Initial version // 2/28/2000 added tags for logb and nextafter // 3/22/2000 Changes to support _LIB_VERSIONIMF variable -// and filled some enum gaps. Added support for C99. +// and filled some enum gaps. Added support for C99. // 5/31/2000 added prototypes for __libm_frexp_4l/8l // 8/10/2000 Changed declaration of _LIB_VERSIONIMF to work for library // builds and other application builds (precompiler directives). // 8/11/2000 Added pointers-to-matherr-functions declarations to allow // for user-defined matherr functions in the dll build. // 12/07/2000 Added scalbn error_types values. -// 5/01/2001 Added error_types values for C99 nearest integer +// 5/01/2001 Added error_types values for C99 nearest integer // functions. // 6/07/2001 Added error_types values for fdim. // 6/18/2001 Added include of complex_support.h. @@ -65,232 +67,142 @@ // 06/27/2002 Added error_types for sinhcosh. // 12/05/2002 Added error_types for annuity and compound // 04/10/2003 Added error_types for tgammal/tgamma/tgammaf +// 05/16/2003 FP-treatment macros copied here from IA32 libm_support.h +// 06/02/2003 Added pad into struct fp80 (12/16 bytes). +// 08/01/2003 Added struct ker80 and macros for multiprecision addition, +// subtraction, multiplication, division, square root. +// 08/07/2003 History section updated. +// 09/03/2003 ALIGN(n) macro added. +// 10/01/2003 LDOUBLE_ALIGN and fp80 corrected on linux to 16 bytes. +// 11/24/2004 Added ifdef around definitions of INT32/64 +// 12/15/2004 Added error_types for exp10, nextafter, nexttoward +// underflow. Moved error codes into libm_error_codes.h. // +*/ -void __libm_sincos_pi4(double,double*,double*,int); -void __libm_y0y1(double , double *, double *); -void __libm_j0j1(double , double *, double *); -double __libm_j0(double); -double __libm_j1(double); -double __libm_jn(int,double); -double __libm_y0(double); -double __libm_y1(double); -double __libm_yn(int,double); -double __libm_copysign (double, double); -float __libm_copysignf (float, float); -long double __libm_copysignl (long double, long double); - -extern double sqrt(double); -extern double fabs(double); -extern double log(double); -extern double log1p(double); -extern double sqrt(double); -extern double sin(double); -extern double exp(double); -extern double modf(double, double *); -extern double asinh(double); -extern double acosh(double); -extern double atanh(double); -extern double tanh(double); -extern double erf(double); -extern double erfc(double); -extern double j0(double); -extern double j1(double); -extern double jn(int, double); -extern double y0(double); -extern double y1(double); -extern double yn(int, double); - -extern float fabsf(float); -extern float asinhf(float); -extern float acoshf(float); -extern float atanhf(float); -extern float tanhf(float); -extern float erff(float); -extern float erfcf(float); -extern float j0f(float); -extern float j1f(float); -extern float jnf(int, float); -extern float y0f(float); -extern float y1f(float); -extern float ynf(int, float); - -extern long double log1pl(long double); -extern long double logl(long double); -extern long double sqrtl(long double); -extern long double expl(long double); -extern long double fabsl(long double); +#ifndef __LIBM_SUPPORT_H_INCLUDED__ +#define __LIBM_SUPPORT_H_INCLUDED__ -#if !(defined(SIZE_INT_32) || defined(SIZE_INT_64)) -#error integer size not established; define SIZE_INT_32 or SIZE_INT_64 +#ifndef _LIBC +#if !(defined(_WIN32) || defined(_WIN64)) +# pragma const_seg(".rodata") /* place constant data in text (code) section */ #endif -#if (defined(SIZE_INT_32) && defined(SIZE_INT_64)) -#error multiple integer size definitions; define SIZE_INT_32 or SIZE_INT_64 +#if defined(__ICC) || defined(__ICL) || defined(__ECC) || defined(__ECL) +# pragma warning( disable : 1682 ) /* #1682: ixplicit conversion of a 64-bit integral type to a smaller integral type (potential portability problem) */ +# pragma warning( disable : 1683 ) /* #1683: explicit conversion of a 64-bit integral type to a smaller integral type (potential portability problem) */ #endif - -#if !(defined(SIZE_LONG_INT_32) || defined(SIZE_LONG_INT_64)) -#error long int size not established; define SIZE_LONG_INT_32 or SIZE_LONG_INT_64 #endif -#if (defined(SIZE_LONG_INT_32) && defined(SIZE_LONG_INT_64)) -#error multiple long int size definitions; define SIZE_LONG_INT_32 or SIZE_LONG_INT_64 +/* macros to form a double value in hex representation (unsigned int type) */ + +#define DOUBLE_HEX(hi,lo) 0x##lo,0x##hi /*LITTLE_ENDIAN*/ + +#include "libm_cpu_defs.h" + +#if !(defined (IA64)) +# include "libm_dll.h" +# include "libm_dispatch.h" #endif -#if !(defined(SIZE_LONG_LONG_INT_32) || defined(SIZE_LONG_LONG_INT_64)) -#error long long int size not established; define SIZE_LONG_LONG_INT_32 or SIZE_LONG_LONG_INT_64 +#include "libm_error_codes.h" + +struct exceptionf +{ + int type; + char *name; + float arg1, arg2, retval; +}; + +# ifdef __cplusplus +struct __exception +{ + int type; + char *name; + double arg1, arg2, retval; +}; +# else + +# ifndef _LIBC +struct exception +{ + int type; + char *name; + double arg1, arg2, retval; +}; +# endif +# endif + +struct exceptionl +{ + int type; + char *name; + long double arg1, arg2, retval; +}; + +#if (defined (_MS_) && defined (IA64)) +#define MATHERR_F _matherrf +#define MATHERR_D _matherr +#else +#define MATHERR_F matherrf +#define MATHERR_D matherr #endif -#if (defined(SIZE_LONG_LONG_INT_32) && defined(SIZE_LONG_LONG_INT_64)) -#error multiple long long int size definitions; define SIZE_LONG_LONG_INT_32 or SIZE_LONG_LONG_INT_64 +# ifdef __cplusplus +#define EXC_DECL_D __exception +#else +// exception is a reserved name in C++ +#define EXC_DECL_D exception #endif +extern int MATHERR_F(struct exceptionf*); +extern int MATHERR_D(struct EXC_DECL_D*); +extern int matherrl(struct exceptionl*); + +#ifndef _LIBC +// Add code to support _LIB_VERSIONIMF typedef enum { - logl_zero=0, logl_negative, /* 0, 1 */ - log_zero, log_negative, /* 2, 3 */ - logf_zero, logf_negative, /* 4, 5 */ - log10l_zero, log10l_negative, /* 6, 7 */ - log10_zero, log10_negative, /* 8, 9 */ - log10f_zero, log10f_negative, /* 10, 11 */ - expl_overflow, expl_underflow, /* 12, 13 */ - exp_overflow, exp_underflow, /* 14, 15 */ - expf_overflow, expf_underflow, /* 16, 17 */ - powl_overflow, powl_underflow, /* 18, 19 */ - powl_zero_to_zero, /* 20 */ - powl_zero_to_negative, /* 21 */ - powl_neg_to_non_integer, /* 22 */ - powl_nan_to_zero, /* 23 */ - pow_overflow, pow_underflow, /* 24, 25 */ - pow_zero_to_zero, /* 26 */ - pow_zero_to_negative, /* 27 */ - pow_neg_to_non_integer, /* 28 */ - pow_nan_to_zero, /* 29 */ - powf_overflow, powf_underflow, /* 30, 31 */ - powf_zero_to_zero, /* 32 */ - powf_zero_to_negative, /* 33 */ - powf_neg_to_non_integer, /* 34 */ - powf_nan_to_zero, /* 35 */ - atan2l_zero, /* 36 */ - atan2_zero, /* 37 */ - atan2f_zero, /* 38 */ - expm1l_overflow, /* 39 */ - expm1l_underflow, /* 40 */ - expm1_overflow, /* 41 */ - expm1_underflow, /* 42 */ - expm1f_overflow, /* 43 */ - expm1f_underflow, /* 44 */ - hypotl_overflow, /* 45 */ - hypot_overflow, /* 46 */ - hypotf_overflow, /* 47 */ - sqrtl_negative, /* 48 */ - sqrt_negative, /* 49 */ - sqrtf_negative, /* 50 */ - scalbl_overflow, scalbl_underflow, /* 51, 52 */ - scalb_overflow, scalb_underflow, /* 53, 54 */ - scalbf_overflow, scalbf_underflow, /* 55, 56 */ - acosl_gt_one, acos_gt_one, acosf_gt_one, /* 57, 58, 59 */ - asinl_gt_one, asin_gt_one, asinf_gt_one, /* 60, 61, 62 */ - coshl_overflow, cosh_overflow, coshf_overflow, /* 63, 64, 65 */ - y0l_zero, y0l_negative,y0l_gt_loss, /* 66, 67, 68 */ - y0_zero, y0_negative,y0_gt_loss, /* 69, 70, 71 */ - y0f_zero, y0f_negative,y0f_gt_loss, /* 72, 73, 74 */ - y1l_zero, y1l_negative,y1l_gt_loss, /* 75, 76, 77 */ - y1_zero, y1_negative,y1_gt_loss, /* 78, 79, 80 */ - y1f_zero, y1f_negative,y1f_gt_loss, /* 81, 82, 83 */ - ynl_zero, ynl_negative,ynl_gt_loss, /* 84, 85, 86 */ - yn_zero, yn_negative,yn_gt_loss, /* 87, 88, 89 */ - ynf_zero, ynf_negative,ynf_gt_loss, /* 90, 91, 92 */ - j0l_gt_loss, /* 93 */ - j0_gt_loss, /* 94 */ - j0f_gt_loss, /* 95 */ - j1l_gt_loss, /* 96 */ - j1_gt_loss, /* 97 */ - j1f_gt_loss, /* 98 */ - jnl_gt_loss, /* 99 */ - jn_gt_loss, /* 100 */ - jnf_gt_loss, /* 101 */ - lgammal_overflow, lgammal_negative,lgammal_reserve, /* 102, 103, 104 */ - lgamma_overflow, lgamma_negative,lgamma_reserve, /* 105, 106, 107 */ - lgammaf_overflow, lgammaf_negative, lgammaf_reserve,/* 108, 109, 110 */ - gammal_overflow,gammal_negative, gammal_reserve, /* 111, 112, 113 */ - gamma_overflow, gamma_negative, gamma_reserve, /* 114, 115, 116 */ - gammaf_overflow,gammaf_negative,gammaf_reserve, /* 117, 118, 119 */ - fmodl_by_zero, /* 120 */ - fmod_by_zero, /* 121 */ - fmodf_by_zero, /* 122 */ - remainderl_by_zero, /* 123 */ - remainder_by_zero, /* 124 */ - remainderf_by_zero, /* 125 */ - sinhl_overflow, sinh_overflow, sinhf_overflow, /* 126, 127, 128 */ - atanhl_gt_one, atanhl_eq_one, /* 129, 130 */ - atanh_gt_one, atanh_eq_one, /* 131, 132 */ - atanhf_gt_one, atanhf_eq_one, /* 133, 134 */ - acoshl_lt_one, /* 135 */ - acosh_lt_one, /* 136 */ - acoshf_lt_one, /* 137 */ - log1pl_zero, log1pl_negative, /* 138, 139 */ - log1p_zero, log1p_negative, /* 140, 141 */ - log1pf_zero, log1pf_negative, /* 142, 143 */ - ldexpl_overflow, ldexpl_underflow, /* 144, 145 */ - ldexp_overflow, ldexp_underflow, /* 146, 147 */ - ldexpf_overflow, ldexpf_underflow, /* 148, 149 */ - logbl_zero, logb_zero, logbf_zero, /* 150, 151, 152 */ - nextafterl_overflow, nextafter_overflow, - nextafterf_overflow, /* 153, 154, 155 */ - ilogbl_zero, ilogb_zero, ilogbf_zero, /* 156, 157, 158 */ - exp2l_overflow, exp2l_underflow, /* 159, 160 */ - exp2_overflow, exp2_underflow, /* 161, 162 */ - exp2f_overflow, exp2f_underflow, /* 163, 164 */ - exp10l_overflow, exp10_overflow, - exp10f_overflow, /* 165, 166, 167 */ - log2l_zero, log2l_negative, /* 168, 169 */ - log2_zero, log2_negative, /* 170, 171 */ - log2f_zero, log2f_negative, /* 172, 173 */ - scalbnl_overflow, scalbnl_underflow, /* 174, 175 */ - scalbn_overflow, scalbn_underflow, /* 176, 177 */ - scalbnf_overflow, scalbnf_underflow, /* 178, 179 */ - remquol_by_zero, /* 180 */ - remquo_by_zero, /* 181 */ - remquof_by_zero, /* 182 */ - lrintl_large, lrint_large, lrintf_large, /* 183, 184, 185 */ - llrintl_large, llrint_large, llrintf_large, /* 186, 187, 188 */ - lroundl_large, lround_large, lroundf_large, /* 189, 190, 191 */ - llroundl_large, llround_large, llroundf_large, /* 192, 193, 194 */ - fdiml_overflow, fdim_overflow, fdimf_overflow, /* 195, 196, 197 */ - nexttowardl_overflow, nexttoward_overflow, - nexttowardf_overflow, /* 198, 199, 200 */ - scalblnl_overflow, scalblnl_underflow, /* 201, 202 */ - scalbln_overflow, scalbln_underflow, /* 203, 204 */ - scalblnf_overflow, scalblnf_underflow, /* 205, 206 */ - erfcl_underflow, erfc_underflow, erfcf_underflow, /* 207, 208, 209 */ - acosdl_gt_one, acosd_gt_one, acosdf_gt_one, /* 210, 211, 212 */ - asindl_gt_one, asind_gt_one, asindf_gt_one, /* 213, 214, 215 */ - atan2dl_zero, atan2d_zero, atan2df_zero, /* 216, 217, 218 */ - tandl_overflow, tand_overflow, tandf_overflow, /* 219, 220, 221 */ - cotdl_overflow, cotd_overflow, cotdf_overflow, /* 222, 223, 224 */ - cotl_overflow, cot_overflow, cotf_overflow, /* 225, 226, 227 */ - sinhcoshl_overflow, sinhcosh_overflow, sinhcoshf_overflow, /* 228, 229, 230 */ - annuityl_by_zero, annuity_by_zero, annuityf_by_zero, /* 231, 232, 233 */ - annuityl_less_m1, annuity_less_m1, annuityf_less_m1, /* 234, 235, 236 */ - annuityl_overflow, annuity_overflow, annuityf_overflow, /* 237, 238, 239 */ - annuityl_underflow, annuity_underflow, annuityf_underflow, /* 240, 241, 242 */ - compoundl_by_zero, compound_by_zero, compoundf_by_zero, /* 243, 244, 245 */ - compoundl_less_m1, compound_less_m1, compoundf_less_m1, /* 246, 247, 248 */ - compoundl_overflow, compound_overflow, compoundf_overflow, /* 249, 250, 251 */ - compoundl_underflow, compound_underflow, compoundf_underflow, /* 252, 253, 254 */ - tgammal_overflow, tgammal_negative, tgammal_reserve, /* 255, 256, 257 */ - tgamma_overflow, tgamma_negative, tgamma_reserve, /* 258, 259, 260 */ - tgammaf_overflow, tgammaf_negative, tgammaf_reserve, /* 261, 262, 263 */ -} error_types; - -void __libm_error_support(void*,void*,void*,error_types); -#ifdef _LIBC -libc_hidden_proto(__libm_error_support) + _IEEE_ = -1, // IEEE-like behavior + _SVID_, // SysV, Rel. 4 behavior + _XOPEN_, // Unix98 + _POSIX_, // Posix + _ISOC_ // ISO C9X +} _LIB_VERSION_TYPE; #endif -#define HI_SIGNIFICAND_LESS(X, HI) ((X)->hi_significand < 0x ## HI) -#define f64abs(x) ((x) < 0.0 ? -(x) : (x)) +// This is a run-time variable and may affect +// floating point behavior of the libm functions + +#if !defined( LIBM_BUILD ) +#if defined( _DLL ) +extern _LIB_VERSION_TYPE __declspec(dllimport) _LIB_VERSIONIMF; +#else +extern _LIB_VERSION_TYPE _LIB_VERSIONIMF; +#endif /* _DLL */ +#else +extern int (*pmatherrf)(struct exceptionf*); +extern int (*pmatherr)(struct EXC_DECL_D*); +extern int (*pmatherrl)(struct exceptionl*); +#endif /* LIBM_BUILD */ + +/* memory format definitions (LITTLE_ENDIAN only) */ + +#if !(defined(SIZE_INT_32) || defined(SIZE_INT_64)) +# error "You need to define SIZE_INT_32 or SIZE_INT_64" +#endif + +#if (defined(SIZE_INT_32) && defined(SIZE_INT_64)) +#error multiple integer size definitions; define SIZE_INT_32 or SIZE_INT_64 +#endif + +#if !(defined(SIZE_LONG_32) || defined(SIZE_LONG_64)) +# error "You need to define SIZE_LONG_32 or SIZE_LONG_64" +#endif + +#if (defined(SIZE_LONG_32) && defined(SIZE_LONG_64)) +#error multiple integer size definitions; define SIZE_LONG_32 or SIZE_LONG_64 +#endif #if !defined(__USE_EXTERNAL_FPMEMTYP_H__) @@ -342,22 +254,519 @@ struct fp80 { /*/ sign:1 exponent:15 significand:64 (NO implied bits) */ unsigned exponent:15; unsigned sign:1; #endif + unsigned pad:16; +#if !(defined(__unix__) && defined(__i386__)) + unsigned padwin:32; +#endif }; #endif /*__USE_EXTERNAL_FPMEMTYP_H__*/ -/* macros to form a double value in hex representation (unsigned int type) */ +#if !(defined(opensource)) +typedef __int32 INT32; +typedef signed __int32 SINT32; +typedef unsigned __int32 UINT32; -#define DOUBLE_HEX(hi,lo) 0x##lo,0x##hi /*LITTLE_ENDIAN*/ +typedef __int64 INT64; +typedef signed __int64 SINT64; +typedef unsigned __int64 UINT64; +#else +typedef int INT32; +typedef signed int SINT32; +typedef unsigned int UINT32; -/* macros to form a long double value in hex representation (unsigned short type) */ +typedef long long INT64; +typedef signed long long SINT64; +typedef unsigned long long UINT64; +#endif -#if defined(_WIN32) || defined(_WIN64) -#define LDOUBLE_ALIGN 16 -#else -#define LDOUBLE_ALIGN 12 +#if (defined(_WIN32) || defined(_WIN64)) /* Windows */ +# define I64CONST(bits) 0x##bits##i64 +# define U64CONST(bits) 0x##bits##ui64 +#elif (defined(__linux__) && defined(_M_IA64)) /* Linux,64 */ +# define I64CONST(bits) 0x##bits##L +# define U64CONST(bits) 0x##bits##uL +#else /* Linux,32 */ +# define I64CONST(bits) 0x##bits##LL +# define U64CONST(bits) 0x##bits##uLL +#endif + +struct ker80 { + union { + long double ldhi; + struct fp80 fphi; + }; + union { + long double ldlo; + struct fp80 fplo; + }; + int ex; +}; + +/* Addition: x+y */ +/* The result is sum rhi+rlo */ +/* Temporary variables: t1 */ +/* All variables are in long double precision */ +/* Correct if no overflow (algorithm by D.Knuth) */ +#define __LIBM_ADDL1_K80( rhi,rlo,x,y, t1 ) \ + rhi = x + y; \ + rlo = rhi - x; \ + t1 = rhi - rlo; \ + rlo = y - rlo; \ + t1 = x - t1; \ + rlo = rlo + t1; + +/* Addition: (xhi+xlo) + (yhi+ylo) */ +/* The result is sum rhi+rlo */ +/* Temporary variables: t1 */ +/* All variables are in long double precision */ +/* Correct if no overflow (algorithm by T.J.Dekker) */ +#define __LIBM_ADDL2_K80( rhi,rlo,xhi,xlo,yhi,ylo, t1 ) \ + rlo = xhi+yhi; \ + if ( VALUE_GT_80(FP80(xhi),FP80(yhi)) ) { \ + t1=xhi-rlo;t1=t1+yhi;t1=t1+ylo;t1=t1+xlo; \ + } else { \ + t1=yhi-rlo;t1=t1+xhi;t1=t1+xlo;t1=t1+ylo; \ + } \ + rhi=rlo+t1; \ + rlo=rlo-rhi;rlo=rlo+t1; + +/* Addition: r=x+y */ +/* Variables r,x,y are pointers to struct ker80, */ +/* all other variables are in long double precision */ +/* Temporary variables: t1 */ +/* Correct if x and y belong to interval [2^-8000;2^8000], */ +/* or when one or both of them are zero */ +#if defined(SIZE_INT_32) +#define __LIBM_ADDL_K80(r,x,y, t1) \ + if ( ((y)->ex+(y)->fphi.exponent-134 < \ + (x)->ex+(x)->fphi.exponent) && \ + ((x)->ex+(x)->fphi.exponent < \ + (y)->ex+(y)->fphi.exponent+134) && \ + !SIGNIFICAND_ZERO_80(&((x)->fphi)) && \ + !SIGNIFICAND_ZERO_80(&((y)->fphi)) ) \ + { \ + /* y/2^134 < x < y*2^134, */ \ + /* and x,y are nonzero finite numbers */ \ + if ( (x)->ex != (y)->ex ) { \ + /* adjust x->ex to y->ex */ \ + /* t1 = 2^(x->ex - y->ex) */ \ + FP80(t1)->sign = 0; \ + FP80(t1)->exponent = BIAS_80 + (x)->ex-(y)->ex; \ + /* exponent is correct because */ \ + /* |x->ex - y->ex| = */ \ + /* = | (x->ex + x->fphi.exponent) - */ \ + /* -(y->ex + y->fphi.exponent) + */ \ + /* + y->fphi.exponent - */ \ + /* - x->fphi.exponent | < */ \ + /* < | (x->ex+x->fphi.exponent) - */ \ + /* -(y->ex+y->fphi.exponent) | + */ \ + /* +| y->fphi.exponent - */ \ + /* -x->fphi.exponent | < */ \ + /* < 134 + 16000 */ \ + FP80(t1)->hi_significand = 0x80000000; \ + FP80(t1)->lo_significand = 0x00000000; \ + (x)->ex = (y)->ex; \ + (x)->ldhi *= t1; \ + (x)->ldlo *= t1; \ + } \ + /* r==x+y */ \ + (r)->ex = (y)->ex; \ + __LIBM_ADDL2_K80( (r)->ldhi,(r)->ldlo, \ + (x)->ldhi,(x)->ldlo, (y)->ldhi,(y)->ldlo, t1 ); \ + } else if ( SIGNIFICAND_ZERO_80(&((x)->fphi)) || \ + ((y)->ex+(y)->fphi.exponent-BIAS_80 - 134 >= \ + (x)->ex+(x)->fphi.exponent-BIAS_80) ) \ + { \ + /* |x|<<|y| */ \ + *(r) = *(y); \ + } else { \ + /* |y|<<|x| */ \ + *(r) = *(x); \ + } +#elif defined(SIZE_INT_64) +#define __LIBM_ADDL_K80(r,x,y, t1) \ + if ( ((y)->ex+(y)->fphi.exponent-134 < \ + (x)->ex+(x)->fphi.exponent) && \ + ((x)->ex+(x)->fphi.exponent < \ + (y)->ex+(y)->fphi.exponent+134) && \ + !SIGNIFICAND_ZERO_80(&((x)->fphi)) && \ + !SIGNIFICAND_ZERO_80(&((y)->fphi)) ) \ + { \ + /* y/2^134 < x < y*2^134, */ \ + /* and x,y are nonzero finite numbers */ \ + if ( (x)->ex != (y)->ex ) { \ + /* adjust x->ex to y->ex */ \ + /* t1 = 2^(x->ex - y->ex) */ \ + FP80(t1)->sign = 0; \ + FP80(t1)->exponent = BIAS_80 + (x)->ex-(y)->ex; \ + /* exponent is correct because */ \ + /* |x->ex - y->ex| = */ \ + /* = | (x->ex + x->fphi.exponent) - */ \ + /* -(y->ex + y->fphi.exponent) + */ \ + /* + y->fphi.exponent - */ \ + /* - x->fphi.exponent | < */ \ + /* < | (x->ex+x->fphi.exponent) - */ \ + /* -(y->ex+y->fphi.exponent) | + */ \ + /* +| y->fphi.exponent - */ \ + /* -x->fphi.exponent | < */ \ + /* < 134 + 16000 */ \ + FP80(t1)->significand = 0x8000000000000000; \ + (x)->ex = (y)->ex; \ + (x)->ldhi *= t1; \ + (x)->ldlo *= t1; \ + } \ + /* r==x+y */ \ + (r)->ex = (y)->ex; \ + __LIBM_ADDL2_K80( (r)->ldhi,(r)->ldlo, \ + (x)->ldhi,(x)->ldlo, (y)->ldhi,(y)->ldlo, t1 ); \ + } else if ( SIGNIFICAND_ZERO_80(&((x)->fphi)) || \ + ((y)->ex+(y)->fphi.exponent-BIAS_80 - 134 >= \ + (x)->ex+(x)->fphi.exponent-BIAS_80) ) \ + { \ + /* |x|<<|y| */ \ + *(r) = *(y); \ + } else { \ + /* |y|<<|x| */ \ + *(r) = *(x); \ + } #endif +/* Addition: r=x+y */ +/* Variables r,x,y are pointers to struct ker80, */ +/* all other variables are in long double precision */ +/* Temporary variables: t1 */ +/* Correct for any finite x and y */ +#define __LIBM_ADDL_NORM_K80(r,x,y, t1) \ + if ( ((x)->fphi.exponent-BIAS_80<-8000) || \ + ((x)->fphi.exponent-BIAS_80>+8000) || \ + ((y)->fphi.exponent-BIAS_80<-8000) || \ + ((y)->fphi.exponent-BIAS_80>+8000) ) \ + { \ + __libm_normalizel_k80(x); \ + __libm_normalizel_k80(y); \ + } \ + __LIBM_ADDL_K80(r,x,y, t1) + +/* Subtraction: x-y */ +/* The result is sum rhi+rlo */ +/* Temporary variables: t1 */ +/* All variables are in long double precision */ +/* Correct if no overflow (algorithm by D.Knuth) */ +#define __LIBM_SUBL1_K80( rhi, rlo, x, y, t1 ) \ + rhi = x - y; \ + rlo = rhi - x; \ + t1 = rhi - rlo; \ + rlo = y + rlo; \ + t1 = x - t1; \ + rlo = t1 - rlo; + +/* Subtraction: (xhi+xlo) - (yhi+ylo) */ +/* The result is sum rhi+rlo */ +/* Temporary variables: t1 */ +/* All variables are in long double precision */ +/* Correct if no overflow (algorithm by T.J.Dekker) */ +#define __LIBM_SUBL2_K80( rhi,rlo,xhi,xlo,yhi,ylo, t1 ) \ + rlo = xhi-yhi; \ + if ( VALUE_GT_80(FP80(xhi),FP80(yhi)) ) { \ + t1=xhi-rlo;t1=t1-yhi;t1=t1-ylo;t1=t1+xlo; \ + } else { \ + t1=yhi+rlo;t1=xhi-t1;t1=t1+xlo;t1=t1-ylo; \ + } \ + rhi=rlo+t1; \ + rlo=rlo-rhi;rlo=rlo+t1; + +/* Subtraction: r=x-y */ +/* Variables r,x,y are pointers to struct ker80, */ +/* all other variables are in long double precision */ +/* Temporary variables: t1 */ +/* Correct if x and y belong to interval [2^-8000;2^8000], */ +/* or when one or both of them are zero */ +#if defined(SIZE_INT_32) +#define __LIBM_SUBL_K80(r,x,y, t1) \ + if ( ((y)->ex+(y)->fphi.exponent-134 < \ + (x)->ex+(x)->fphi.exponent) && \ + ((x)->ex+(x)->fphi.exponent < \ + (y)->ex+(y)->fphi.exponent+134) && \ + !SIGNIFICAND_ZERO_80(&((x)->fphi)) && \ + !SIGNIFICAND_ZERO_80(&((y)->fphi)) ) \ + { \ + /* y/2^134 < x < y*2^134, */ \ + /* and x,y are nonzero finite numbers */ \ + if ( (x)->ex != (y)->ex ) { \ + /* adjust x->ex to y->ex */ \ + /* t1 = 2^(x->ex - y->ex) */ \ + FP80(t1)->sign = 0; \ + FP80(t1)->exponent = BIAS_80 + (x)->ex-(y)->ex; \ + /* exponent is correct because */ \ + /* |x->ex - y->ex| = */ \ + /* = | (x->ex + x->fphi.exponent) - */ \ + /* -(y->ex + y->fphi.exponent) + */ \ + /* + y->fphi.exponent - */ \ + /* - x->fphi.exponent | < */ \ + /* < | (x->ex+x->fphi.exponent) - */ \ + /* -(y->ex+y->fphi.exponent) | + */ \ + /* +| y->fphi.exponent - */ \ + /* -x->fphi.exponent | < */ \ + /* < 134 + 16000 */ \ + FP80(t1)->hi_significand = 0x80000000; \ + FP80(t1)->lo_significand = 0x00000000; \ + (x)->ex = (y)->ex; \ + (x)->ldhi *= t1; \ + (x)->ldlo *= t1; \ + } \ + /* r==x+y */ \ + (r)->ex = (y)->ex; \ + __LIBM_SUBL2_K80( (r)->ldhi,(r)->ldlo, \ + (x)->ldhi,(x)->ldlo, (y)->ldhi,(y)->ldlo, t1 ); \ + } else if ( SIGNIFICAND_ZERO_80(&((x)->fphi)) || \ + ((y)->ex+(y)->fphi.exponent-BIAS_80 - 134 >= \ + (x)->ex+(x)->fphi.exponent-BIAS_80) ) \ + { \ + /* |x|<<|y| */ \ + (r)->ex = (y)->ex; \ + (r)->ldhi = -((y)->ldhi); \ + (r)->ldlo = -((y)->ldlo); \ + } else { \ + /* |y|<<|x| */ \ + *(r) = *(x); \ + } +#elif defined(SIZE_INT_64) +#define __LIBM_SUBL_K80(r,x,y, t1) \ + if ( ((y)->ex+(y)->fphi.exponent-134 < \ + (x)->ex+(x)->fphi.exponent) && \ + ((x)->ex+(x)->fphi.exponent < \ + (y)->ex+(y)->fphi.exponent+134) && \ + !SIGNIFICAND_ZERO_80(&((x)->fphi)) && \ + !SIGNIFICAND_ZERO_80(&((y)->fphi)) ) \ + { \ + /* y/2^134 < x < y*2^134, */ \ + /* and x,y are nonzero finite numbers */ \ + if ( (x)->ex != (y)->ex ) { \ + /* adjust x->ex to y->ex */ \ + /* t1 = 2^(x->ex - y->ex) */ \ + FP80(t1)->sign = 0; \ + FP80(t1)->exponent = BIAS_80 + (x)->ex-(y)->ex; \ + /* exponent is correct because */ \ + /* |x->ex - y->ex| = */ \ + /* = | (x->ex + x->fphi.exponent) - */ \ + /* -(y->ex + y->fphi.exponent) + */ \ + /* + y->fphi.exponent - */ \ + /* - x->fphi.exponent | < */ \ + /* < | (x->ex+x->fphi.exponent) - */ \ + /* -(y->ex+y->fphi.exponent) | + */ \ + /* +| y->fphi.exponent - */ \ + /* -x->fphi.exponent | < */ \ + /* < 134 + 16000 */ \ + FP80(t1)->significand = 0x8000000000000000; \ + (x)->ex = (y)->ex; \ + (x)->ldhi *= t1; \ + (x)->ldlo *= t1; \ + } \ + /* r==x+y */ \ + (r)->ex = (y)->ex; \ + __LIBM_SUBL2_K80( (r)->ldhi,(r)->ldlo, \ + (x)->ldhi,(x)->ldlo, (y)->ldhi,(y)->ldlo, t1 ); \ + } else if ( SIGNIFICAND_ZERO_80(&((x)->fphi)) || \ + ((y)->ex+(y)->fphi.exponent-BIAS_80 - 134 >= \ + (x)->ex+(x)->fphi.exponent-BIAS_80) ) \ + { \ + /* |x|<<|y| */ \ + (r)->ex = (y)->ex; \ + (r)->ldhi = -((y)->ldhi); \ + (r)->ldlo = -((y)->ldlo); \ + } else { \ + /* |y|<<|x| */ \ + *(r) = *(x); \ + } +#endif + +/* Subtraction: r=x+y */ +/* Variables r,x,y are pointers to struct ker80, */ +/* all other variables are in long double precision */ +/* Temporary variables: t1 */ +/* Correct for any finite x and y */ +#define __LIBM_SUBL_NORM_K80(r,x,y, t1) \ + if ( ((x)->fphi.exponent-BIAS_80<-8000) || \ + ((x)->fphi.exponent-BIAS_80>+8000) || \ + ((y)->fphi.exponent-BIAS_80<-8000) || \ + ((y)->fphi.exponent-BIAS_80>+8000) ) \ + { \ + __libm_normalizel_k80(x); \ + __libm_normalizel_k80(y); \ + } \ + __LIBM_SUBL_K80(r,x,y, t1) + +/* Multiplication: x*y */ +/* The result is sum rhi+rlo */ +/* Here t32 is the constant 2^32+1 */ +/* Temporary variables: t1,t2,t3,t4,t5,t6 */ +/* All variables are in long double precision */ +/* Correct if no over/underflow (algorithm by T.J.Dekker) */ +#define __LIBM_MULL1_K80(rhi,rlo,x,y, \ + t32,t1,t2,t3,t4,t5,t6) \ + t1=(x)*(t32); t3=x-t1; t3=t3+t1; t4=x-t3; \ + t1=(y)*(t32); t5=y-t1; t5=t5+t1; t6=y-t5; \ + t1=(t3)*(t5); \ + t2=(t3)*(t6)+(t4)*(t5); \ + rhi=t1+t2; \ + rlo=t1-rhi; rlo=rlo+t2; rlo=rlo+(t4*t6); + +/* Multiplication: (xhi+xlo)*(yhi+ylo) */ +/* The result is sum rhi+rlo */ +/* Here t32 is the constant 2^32+1 */ +/* Temporary variables: t1,t2,t3,t4,t5,t6,t7,t8 */ +/* All variables are in long double precision */ +/* Correct if no over/underflow (algorithm by T.J.Dekker) */ +#define __LIBM_MULL2_K80(rhi,rlo,xhi,xlo,yhi,ylo, \ + t32,t1,t2,t3,t4,t5,t6,t7,t8) \ + __LIBM_MULL1_K80(t7,t8,xhi,yhi, t32,t1,t2,t3,t4,t5,t6) \ + t1=(xhi)*(ylo)+(xlo)*(yhi); t1=t1+t8; \ + rhi=t7+t1; \ + rlo=t7-rhi; rlo=rlo+t1; + +/* Multiplication: r=x*y */ +/* Variables r,x,y are pointers to struct ker80, */ +/* all other variables are in long double precision */ +/* Here t32 is the constant 2^32+1 */ +/* Temporary variables: t1,t2,t3,t4,t5,t6,t7,t8 */ +/* Correct if x and y belong to interval [2^-8000;2^8000] */ +#define __LIBM_MULL_K80(r,x,y, t32,t1,t2,t3,t4,t5,t6,t7,t8) \ + (r)->ex = (x)->ex + (y)->ex; \ + __LIBM_MULL2_K80((r)->ldhi,(r)->ldlo, \ + (x)->ldhi,(x)->ldlo,(y)->ldhi,(y)->ldlo, \ + t32,t1,t2,t3,t4,t5,t6,t7,t8) + +/* Multiplication: r=x*y */ +/* Variables r,x,y are pointers to struct ker80, */ +/* all other variables are in long double precision */ +/* Here t32 is the constant 2^32+1 */ +/* Temporary variables: t1,t2,t3,t4,t5,t6,t7,t8 */ +/* Correct for any finite x and y */ +#define __LIBM_MULL_NORM_K80(r,x,y, \ + t32,t1,t2,t3,t4,t5,t6,t7,t8) \ + if ( ((x)->fphi.exponent-BIAS_80<-8000) || \ + ((x)->fphi.exponent-BIAS_80>+8000) || \ + ((y)->fphi.exponent-BIAS_80<-8000) || \ + ((y)->fphi.exponent-BIAS_80>+8000) ) \ + { \ + __libm_normalizel_k80(x); \ + __libm_normalizel_k80(y); \ + } \ + __LIBM_MULL_K80(r,x,y, t32,t1,t2,t3,t4,t5,t6,t7,t8) + +/* Division: (xhi+xlo)/(yhi+ylo) */ +/* The result is sum rhi+rlo */ +/* Here t32 is the constant 2^32+1 */ +/* Temporary variables: t1,t2,t3,t4,t5,t6,t7,t8,t9 */ +/* All variables are in long double precision */ +/* Correct if no over/underflow (algorithm by T.J.Dekker) */ +#define __LIBM_DIVL2_K80(rhi,rlo,xhi,xlo,yhi,ylo, \ + t32,t1,t2,t3,t4,t5,t6,t7,t8,t9) \ + t7=(xhi)/(yhi); \ + __LIBM_MULL1_K80(t8,t9,t7,yhi, t32,t1,t2,t3,t4,t5,t6) \ + t1=xhi-t8; t1=t1-t9; t1=t1+xlo; t1=t1-(t7)*(ylo); \ + t1=(t1)/(yhi); \ + rhi=t7+t1; \ + rlo=t7-rhi; rlo=rlo+t1; + +/* Division: r=x/y */ +/* Variables r,x,y are pointers to struct ker80, */ +/* all other variables are in long double precision */ +/* Here t32 is the constant 2^32+1 */ +/* Temporary variables: t1,t2,t3,t4,t5,t6,t7,t8,t9 */ +/* Correct if x and y belong to interval [2^-8000;2^8000] */ +#define __LIBM_DIVL_K80(r,x,y, \ + t32,t1,t2,t3,t4,t5,t6,t7,t8,t9) \ + (r)->ex = (x)->ex - (y)->ex; \ + __LIBM_DIVL2_K80( (r)->ldhi,(r)->ldlo, \ + (x)->ldhi,(x)->ldlo,(y)->ldhi,(y)->ldlo, \ + t32,t1,t2,t3,t4,t5,t6,t7,t8,t9) + +/* Division: r=x/y */ +/* Variables r,x,y are pointers to struct ker80, */ +/* all other variables are in long double precision */ +/* Here t32 is the constant 2^32+1 */ +/* Temporary variables: t1,t2,t3,t4,t5,t6,t7,t8 */ +/* Correct for any finite x and y */ +#define __LIBM_DIVL_NORM_K80(r,x,y, \ + t32,t1,t2,t3,t4,t5,t6,t7,t8,t9) \ + if ( ((x)->fphi.exponent-BIAS_80<-8000) || \ + ((x)->fphi.exponent-BIAS_80>+8000) || \ + ((y)->fphi.exponent-BIAS_80<-8000) || \ + ((y)->fphi.exponent-BIAS_80>+8000) ) \ + { \ + __libm_normalizel_k80(x); \ + __libm_normalizel_k80(y); \ + } \ + __LIBM_DIVL_K80(r,x,y, t32,t1,t2,t3,t4,t5,t6,t7,t8,t9) + +/* Square root: sqrt(xhi+xlo) */ +/* The result is sum rhi+rlo */ +/* Here t32 is the constant 2^32+1 */ +/* half is the constant 0.5 */ +/* Temporary variables: t1,t2,t3,t4,t5,t6,t7,t8,t9 */ +/* All variables are in long double precision */ +/* Correct for positive xhi+xlo (algorithm by T.J.Dekker) */ +#define __LIBM_SQRTL2_NORM_K80(rhi,rlo,xhi,xlo, \ + t32,half,t1,t2,t3,t4,t5,t6,t7,t8,t9) \ + t7=sqrtl(xhi); \ + __LIBM_MULL1_K80(t8,t9,t7,t7, t32,t1,t2,t3,t4,t5,t6) \ + t1=xhi-t8; t1=t1-t9; t1=t1+xlo; t1=(t1)*(half); \ + t1=(t1)/(t7); \ + rhi=t7+t1; \ + rlo=t7-rhi; rlo=rlo+t1; + +/* Square root: r=sqrt(x) */ +/* Variables r,x,y are pointers to struct ker80, */ +/* all other variables are in long double precision */ +/* Here t32 is the constant 2^32+1 */ +/* half is the constant 0.5 */ +/* Temporary variables: t1,t2,t3,t4,t5,t6,t7,t8,t9 */ +/* Correct if x belongs to interval [2^-16000;2^16000] */ +#define __LIBM_SQRTL_K80(r,x, \ + t32,half,t1,t2,t3,t4,t5,t6,t7,t8,t9) \ + if ( ((x)->ex & 1) == 1 ) { \ + (x)->ex = (x)->ex + 1; \ + (x)->ldhi *= half; \ + (x)->ldlo *= half; \ + } \ + (r)->ex = (x)->ex >> 1; \ + __LIBM_SQRTL2_NORM_K80( (r)->ldhi,(r)->ldlo, \ + (x)->ldhi,(x)->ldlo, \ + t32,half,t1,t2,t3,t4,t5,t6,t7,t8,t9) + +/* Square root: r=sqrt(x) */ +/* Variables r,x,y are pointers to struct ker80, */ +/* all other variables are in long double precision */ +/* Here t32 is the constant 2^32+1 */ +/* half is the constant 0.5 */ +/* Temporary variables: t1,t2,t3,t4,t5,t6,t7,t8,t9 */ +/* Correct for any positive x */ +#define __LIBM_SQRTL_NORM_K80(r,x, \ + t32,half,t1,t2,t3,t4,t5,t6,t7,t8,t9) \ + if ( ((x)->fphi.exponent-BIAS_80<-16000) || \ + ((x)->fphi.exponent-BIAS_80>+16000) ) \ + { \ + __libm_normalizel_k80(x); \ + } \ + __LIBM_SQRTL_K80(r,x, t32,half,t1,t2,t3,t4,t5,t6,t7,t8,t9) + + +#ifdef __INTEL_COMPILER +#define ALIGN(n) __declspec(align(n)) +#else /* __INTEL_COMPILER */ +#define ALIGN(n) +#endif /* __INTEL_COMPILER */ + +/* macros to form a long double value in hex representation (unsigned short type) */ + +#if (defined(__unix__) && defined(__i386__)) +# define LDOUBLE_ALIGN 12 /* IA32 Linux: 12-byte alignment */ +#else /*__linux__ & IA32*/ +# define LDOUBLE_ALIGN 16 /* EFI2/IA32 Win or IPF Win/Linux: 16-byte alignment */ +#endif /*__linux__ & IA32*/ + #if (LDOUBLE_ALIGN == 16) #define _XPD_ ,0x0000,0x0000,0x0000 #else /*12*/ @@ -451,7 +860,7 @@ struct fp80 { /*/ sign:1 exponent:15 significand:64 (NO implied bits) */ # define SIGNIFICAND_LT_HEX_64(X,HI,LO) ((X)->significand < 0x ## HI ## LO) # define SIGNIFICAND_LE_HEX_64(X,HI,LO) ((X)->significand <= 0x ## HI ## LO) #endif - + #if defined(SIZE_INT_32) # define SIGNIFICAND_EQ_HEX_80(X,HI,LO) \ (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand == 0x ## LO)) @@ -514,15 +923,15 @@ struct fp80 { /*/ sign:1 exponent:15 significand:64 (NO implied bits) */ #if defined(SIZE_INT_32) # define SIGNIFICAND_EQ_64(X,Y) \ - (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand == (Y)->lo_significand)) + (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand == (Y)->lo_significand)) # define SIGNIFICAND_GT_64(X,Y) (((X)->hi_significand > (Y)->hi_significand) || \ - (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand > (Y)->lo_significand))) + (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand > (Y)->lo_significand))) # define SIGNIFICAND_GE_64(X,Y) (((X)->hi_significand > (Y)->hi_significand) || \ - (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand >= (Y)->lo_significand))) + (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand >= (Y)->lo_significand))) # define SIGNIFICAND_LT_64(X,Y) (((X)->hi_significand < (Y)->hi_significand) || \ - (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand < (Y)->lo_significand))) + (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand < (Y)->lo_significand))) # define SIGNIFICAND_LE_64(X,Y) (((X)->hi_significand < (Y)->hi_significand) || \ - (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand <= (Y)->lo_significand))) + (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand <= (Y)->lo_significand))) #elif defined(SIZE_INT_64) # define SIGNIFICAND_EQ_64(X,Y) ((X)->significand == (Y)->significand) # define SIGNIFICAND_GT_64(X,Y) ((X)->significand > (Y)->significand) @@ -560,7 +969,7 @@ struct fp80 { /*/ sign:1 exponent:15 significand:64 (NO implied bits) */ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LT_32(X, Y)))) #define VALUE_LE_32(X,Y) (((X)->exponent < (Y)->exponent) || \ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LE_32(X, Y)))) - + #define VALUE_EQ_64(X,Y) \ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_EQ_64(X, Y))) #define VALUE_GT_64(X,Y) (((X)->exponent > (Y)->exponent) || \ @@ -571,7 +980,7 @@ struct fp80 { /*/ sign:1 exponent:15 significand:64 (NO implied bits) */ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LT_64(X, Y)))) #define VALUE_LE_64(X,Y) (((X)->exponent < (Y)->exponent) || \ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LE_64(X, Y)))) - + #define VALUE_EQ_80(X,Y) \ (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_EQ_80(X, Y))) #define VALUE_GT_80(X,Y) (((X)->exponent > (Y)->exponent) || \ @@ -622,134 +1031,21 @@ struct fp80 { /*/ sign:1 exponent:15 significand:64 (NO implied bits) */ #endif +/* error codes */ -#if (defined(_WIN32) && !defined(_WIN64)) - -#define FP80_DECLARE() -#define _FPC_64 0x0300 -static unsigned short __wControlWord, __wNewControlWord; -#define FP80_SET() { \ - __asm { fnstcw word ptr [__wControlWord] } \ - __wNewControlWord = __wControlWord | _FPC_64; \ - __asm { fldcw word ptr [__wNewControlWord] } \ - } -#define FP80_RESET() { \ - __asm { fldcw word ptr [__wControlWord] } \ - } -#else /* defined(_WIN32) && !defined(_WIN64) */ - -#define FP80_DECLARE() -#define FP80_SET() -#define FP80_RESET() - -#endif /* defined(_WIN32) && !defined(_WIN64) */ - - -#ifdef _LIBC -# include <math.h> -#else - -static const unsigned INF[] = { - DOUBLE_HEX(7ff00000, 00000000), - DOUBLE_HEX(fff00000, 00000000) -}; - -static const double _zeroo = 0.0; -static const double _bigg = 1.0e300; -static const double _ponee = 1.0; -static const double _nonee = -1.0; - -#define INVALID (_zeroo * *((double*)&INF[0])) -#define PINF *((double*)&INF[0]) -#define NINF -PINF -#define PINF_DZ (_ponee/_zeroo) -#define X_TLOSS 1.41484755040568800000e+16 -#endif - -struct exceptionf -{ - int type; - char *name; - float arg1, arg2, retval; -}; - -# ifdef __cplusplus -struct __exception -{ - int type; - char *name; - double arg1, arg2, retval; -}; -# else - -# ifndef _LIBC -struct exception -{ - int type; - char *name; - double arg1, arg2, retval; -}; -# endif -# endif - - - -struct exceptionl -{ - int type; - char *name; - long double arg1, arg2, retval; -}; - -#ifdef _MS_ -#define MATHERR_F _matherrf -#define MATHERR_D _matherr -#else -#define MATHERR_F matherrf -#define MATHERR_D matherr -#endif - -# ifdef __cplusplus -#define EXC_DECL_D __exception -#else -// exception is a reserved name in C++ -#define EXC_DECL_D exception -#endif - -extern int MATHERR_F(struct exceptionf*); -extern int MATHERR_D(struct EXC_DECL_D*); -extern int matherrl(struct exceptionl*); - +#define DOMAIN 1 /* argument domain error */ +#define SING 2 /* argument singularity */ +#define OVERFLOW 3 /* overflow range error */ +#define UNDERFLOW 4 /* underflow range error */ +#define TLOSS 5 /* total loss of precision */ +#define PLOSS 6 /* partial loss of precision */ -/* Set these appropriately to make thread Safe */ -#define ERRNO_RANGE errno = ERANGE -#define ERRNO_DOMAIN errno = EDOM +/* */ +#define VOLATILE_32 /*volatile*/ +#define VOLATILE_64 /*volatile*/ +#define VOLATILE_80 /*volatile*/ -// Add code to support _LIB_VERSIONIMF -#ifndef _LIBC -typedef enum -{ - _IEEE_ = -1, // IEEE-like behavior - _SVID_, // SysV, Rel. 4 behavior - _XOPEN_, // Unix98 - _POSIX_, // Posix - _ISOC_ // ISO C9X -} _LIB_VERSION_TYPE; - - -#if !defined( LIBM_BUILD ) -#if defined( _DLL ) -extern _LIB_VERSION_TYPE __declspec(dllimport) _LIB_VERSIONIMF; -#else -extern _LIB_VERSION_TYPE _LIB_VERSIONIMF; -#endif /* _DLL */ -#else -extern int (*pmatherrf)(struct exceptionf*); -extern int (*pmatherr)(struct EXC_DECL_D*); -extern int (*pmatherrl)(struct exceptionl*); -#endif /* LIBM_BUILD */ +#define QUAD_TYPE _Quad -// This is a run-time variable and may affect -// floating point behavior of the libm functions -#endif +#endif /*__LIBM_SUPPORT_H_INCLUDED__*/ diff --git a/sysdeps/ia64/fpu/s_asinh.S b/sysdeps/ia64/fpu/s_asinh.S index a9ef4e1..ab01f4f 100644 --- a/sysdeps/ia64/fpu/s_asinh.S +++ b/sysdeps/ia64/fpu/s_asinh.S @@ -1134,3 +1134,4 @@ ASINH_UNORM: ;; GLOBAL_LIBM_END(asinh) + diff --git a/sysdeps/ia64/fpu/s_asinhl.S b/sysdeps/ia64/fpu/s_asinhl.S index fcb4e6e..d3a5507 100644 --- a/sysdeps/ia64/fpu/s_asinhl.S +++ b/sysdeps/ia64/fpu/s_asinhl.S @@ -1344,3 +1344,4 @@ near_0: GLOBAL_LIBM_END(asinhl) + diff --git a/sysdeps/ia64/fpu/s_atanf.S b/sysdeps/ia64/fpu/s_atanf.S index fb7f4a3..4da68c7 100644 --- a/sysdeps/ia64/fpu/s_atanf.S +++ b/sysdeps/ia64/fpu/s_atanf.S @@ -553,3 +553,4 @@ ATANF_X_INF_NAN_ZERO: ;; GLOBAL_LIBM_END(atanf) + diff --git a/sysdeps/ia64/fpu/s_atanl.S b/sysdeps/ia64/fpu/s_atanl.S index bfd9f45..721a38c 100644 --- a/sysdeps/ia64/fpu/s_atanl.S +++ b/sysdeps/ia64/fpu/s_atanl.S @@ -812,6 +812,7 @@ GLOBAL_IEEE754_ENTRY(atanl) ;; GLOBAL_IEEE754_END(atanl) + GLOBAL_IEEE754_ENTRY(atan2l) { .mfi @@ -1951,6 +1952,7 @@ ATANL_ArgY_Not_INF: ;; GLOBAL_IEEE754_END(atan2l) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/s_cbrt.S b/sysdeps/ia64/fpu/s_cbrt.S index b7a827d..7a74ac1 100644 --- a/sysdeps/ia64/fpu/s_cbrt.S +++ b/sysdeps/ia64/fpu/s_cbrt.S @@ -35,7 +35,7 @@ // // Intel Corporation is the author of this code, and requests that all // problem reports or change requests be submitted to it directly at -// http: //www.intel.com/software/products/opensource/libraries/num.htm. +// http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== diff --git a/sysdeps/ia64/fpu/s_cbrtf.S b/sysdeps/ia64/fpu/s_cbrtf.S index c8c6500..612fb85 100644 --- a/sysdeps/ia64/fpu/s_cbrtf.S +++ b/sysdeps/ia64/fpu/s_cbrtf.S @@ -35,7 +35,7 @@ // // Intel Corporation is the author of this code, and requests that all // problem reports or change requests be submitted to it directly at -// http: //www.intel.com/software/products/opensource/libraries/num.htm. +// http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== @@ -762,3 +762,4 @@ GLOBAL_LIBM_END(cbrtf) + diff --git a/sysdeps/ia64/fpu/s_cbrtl.S b/sysdeps/ia64/fpu/s_cbrtl.S index 3e621e2..76ef12f 100644 --- a/sysdeps/ia64/fpu/s_cbrtl.S +++ b/sysdeps/ia64/fpu/s_cbrtl.S @@ -1,7 +1,7 @@ .file "cbrtl.s" -// Copyright (c) 2000 - 2003, Intel Corporation +// Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -21,27 +21,28 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== -// 04/28/00 Initial version +// 04/28/00 Initial version // 05/20/02 Cleaned up namespace and sf0 syntax -// 02/06/03 Reordered header: .section, .global, .proc, .align +// 02/06/03 Reordered header:.section,.global,.proc,.align +// 11/23/04 Reformatted routine and improved speed // // API //============================================================== @@ -53,49 +54,93 @@ // // Implementation // -// cbrt(a) = cbrt(a y) / cbrt(y) -// = cbrt(1 - (1 - a y)) * 1/cbrt(y) +// The result is computed as +// cbrt(x)= cbrt(1 - (1 - x*y)) * (1/cbrt(y)) +// where y = frcpa(x) = (-1)^sgn_y * 2^(3*k+j) * m_y, +// m_y in [1,2), j in {0,1,2} // -// where y = frcpa(a). +// cbrt(1 - (1 - x*y)) is approximated by a degree-6 polynomial +// in r= 1 - x*y : +// P = 1 - c_1 r - c_2 * r^2 - c_3 * r^3 - c_4 * r^4 - c_5 * r^5 - c_6 * r^6 // -// * cbrt(1 - (1 - a y)) is approximated by a degree-6 polynomial -// -// 1 - c_1 r - c_2 * r^2 - c_3 * r^3 - c_4 * r^4 - c_5 * r^5 - c_6 * r^6 -// -// in r = 1 - a y. // -// * The values 1/cbrt(y) are stored as two tables of constants T_hi -// (double-extended precision) and D (single precision) as follows: +// The values (1/cbrt(y)) are stored as two tables of constants T_hi +// (double-extended precision) and D (single precision) as follows: +// T_hi (1 + D)= 1/cbrt(y) to about 80 bits of accuracy // -// T_hi (1 + D) = 1/cbrt(y) to about 80 bits of accuracy +// The tables are only stored for three exponent values (i.e. +// only for 2^j * m_y, where j in {0,1,2} and m_y covers the 256 +// possible mantissas for an frcpa result); the index is formed +// by the 8 leading mantissa bits of x, which is the same index used +// by the hardware to get frcpa(x). // -// The tables are only stored for three exponent values and are -// then multiplied by e/3 where e is the exponent of the input number. -// This computation is carried out in parallel with the polynomial -// evaluation: +// The table values are multiplied by 2^k where e is the exponent of +// the input number. This multiplication is carried out in parallel with +// the polynomial evaluation: +// T= 2^(k) * T_hi // -// T = 2^(e/3) * T_hi - - - - +//======================================================================= //=============== -// input = x -// C = frcpa(x) -// r = C * x - 1 -// -// Special values +// Special values //============================================================== - - // Registers used //============================================================== -// f6-f15 -// r2-r3, r23-r30 -// p6,p7,p12 - +// p6, p7, p12 + FR_R = f6 + FR_C1 = f7 + FR_C2 = f9 + FR_C3 = f10 + FR_C4 = f11 + FR_C5 = f12 + FR_C6 = f13 + FR_XNORM = f14 + FR_D = f15 + FR_SPECIAL = f32 + FR_RCP = f33 + FR_R2 = f34 + FR_P1 = f35 + FR_P2 = f36 + FR_P3 = f37 + FR_P4 = f38 + FR_P5 = f39 + FR_R3 = f40 + FR_T = f41 + FR_TF = f42 + FR_P = f43 + FR_SGNEXP = f44 + + GR_ADDR = r2 + GR_C_START = r2 + GR_ARGSIG = r3 + GR_NORMSIG = r15 + GR_D_ADDR = r16 + GR_D_START = r16 + GR_INDEX2 = r17 + GR_IX2 = r17 + GR_NORMEXP = r18 + GR_EXP5 = r19 + GR_EXP3 = r20 + GR_EXP6 = r20 + GR_EXP17 = r21 + GR_TMP1 = r21 + GR_SGNMASK = r22 + GR_T_INDEX = r23 + GR_IX_T = r23 + GR_IX_D = r24 + GR_D_INDEX = r24 + GR_TMP2 = r25 + GR_TMP3 = r25 + GR_TMP4 = r25 + GR_EXP_RES = r26 + GR_BIAS23 = r27 + GR_EXPBIAS = r27 + GR_EXP_MOD_3 = r28 + GR_SIGN = r29 + GR_EXPSIGNRES = r29 + GR_REMTMP = r30 + GR_NORMEXPSGN = r31 // Data tables @@ -107,601 +152,596 @@ RODATA LOCAL_OBJECT_START(poly_coeffs) -data8 0xaaaaaaaaaaaaaab1, 0x00003ffd // C_1 -data8 0xe38e38e38e38e3e0, 0x00003ffb // C_2 -data8 0x3faf9add3c0be9a6, 0x3fa511e8d2b1f749 // C_3, C_4 -data8 0x3f9ee71b2c6ebe99, 0x3f9809180fd0340c // C_5, C_6 + data8 0xaaaaaaaaaaaaaab1, 0x00003ffd // C_1 + data8 0xe38e38e38e38e3e0, 0x00003ffb // C_2 + data8 0x3faf9add3c0be9a6, 0x3fa511e8d2b1f749 // C_3, C_4 + data8 0x3f9ee71b2c6ebe99, 0x3f9809180fd0340c // C_5, C_6 LOCAL_OBJECT_END(poly_coeffs) LOCAL_OBJECT_START(T_table) - -data8 0x80155c748c374836, 0x8040404b0879f7f9 -data8 0x806b5dce4b405c10, 0x8096b586974669b1 -data8 0x80bcd273d952a028, 0x80e898c52813f2f3 -data8 0x81149add67c2d208, 0x813b4e2c856b6e9a -data8 0x8167c1dde03de7aa, 0x818ed973b811135e -data8 0x81bbc0c33e13ec98, 0x81e33e69fbe7504a -data8 0x820aec524e3c23e9, 0x823880f78e70b805 -data8 0x826097a62a8e5200, 0x8288dfe00e9b5eaf -data8 0x82b15a10c5371624, 0x82da06a527b18937 -data8 0x8302e60b635ab394, 0x832bf8b2feec2f0e -data8 0x83553f0ce00e276b, 0x837eb98b50f8322a -data8 0x83a270f44c84f699, 0x83cc4d7cfcfac5ca -data8 0x83f65f78a8872b4c, 0x8420a75f2f7b53c8 -data8 0x844510461ff14209, 0x846fbd91b930bed2 -data8 0x84947e18234f3294, 0x84bf92755825045a -data8 0x84e4ac0ee112ba51, 0x8509ef44b86f20be -data8 0x85359d5d91768427, 0x855b3bd5b7384357 -data8 0x858104f0c415f79a, 0x85a6f90390d29864 -data8 0x85d3772fcd56a1dd, 0x85f9c982fcc002f3 -data8 0x862047e0e7ea554b, 0x8646f2a26f7f5852 -data8 0x866dca21754096b5, 0x8694ceb8dfd17a37 -data8 0x86bc00c49e9307e8, 0x86dccd74fce79610 -data8 0x870453c845acf90f, 0x872c089a1e90342c -data8 0x8753ec4a92d16c5e, 0x877bff3aca19f6b4 -data8 0x879d88b6fe1c324c, 0x87c5f346dbf98c3a -data8 0x87e7c653efacef2c, 0x881089d4e73ffefc -data8 0x88397e6a366f2a8a, 0x885bc559e5e1c081 -data8 0x887e2ee392bb7a93, 0x88a7a8587e404257 -data8 0x88ca5eda67594784, 0x88f4356166bd590e -data8 0x89173a0acf5ce026, 0x893a62a098b6a57b -data8 0x895daf637236ae2c, 0x89883b9d1c2fa9c5 -data8 0x89abd8dd374a5d7b, 0x89cf9b1dcd197fa0 -data8 0x89f382a258ea79de, 0x8a178faf06648f29 -data8 0x8a3bc288b3e1d18a, 0x8a601b74f4d1f835 -data8 0x8a849aba14274764, 0x8aa9409f16cdbc9b -data8 0x8ace0d6bbe2cb316, 0x8af301688ab33558 -data8 0x8b181cdebe6f3206, 0x8b3d60185fafcb7c -data8 0x8b62cb603bb2fad0, 0x8b80d7d6bc4104de -data8 0x8ba68bf73ac74f39, 0x8bcc68fb9f9f7335 -data8 0x8bf26f31c534fca2, 0x8c10f86e13a1a1f9 -data8 0x8c3749916cc6abb5, 0x8c5dc4c4f7706032 -data8 0x8c7cac3a8c42e3e0, 0x8ca373f1b7bf2716 -data8 0x8cc29907fb951294, 0x8ce9ae4e9492aac8 -data8 0x8d0911dddbfdad0e, 0x8d3075c4f20f04ee -data8 0x8d5018a9d4de77d5, 0x8d77cc47dd143515 -data8 0x8d97af6352739cb7, 0x8db7af523167800f -data8 0x8ddfd80bc68c32ff, 0x8e00197e1e7c88fe -data8 0x8e207859f77e20e7, 0x8e40f4ce60c9f8e2 -data8 0x8e69ba46cf2fde4d, 0x8e8a7a00bd7ae63e -data8 0x8eab57ef1cf2f529, 0x8ecc5442cffb1dad -data8 0x8eed6f2d2a4acbfe, 0x8f0ea8dff24441ff -data8 0x8f385c95d696b817, 0x8f59dc43edd930f3 -data8 0x8f7b7b5f5ffad1c4, 0x8f9d3a1bea165f38 -data8 0x8fbf18adc34b66da, 0x8fe117499e356095 -data8 0x90033624aa685f8d, 0x9025757495f36b86 -data8 0x903f3a5dcc091203, 0x9061b2fceb2bdbab -data8 0x90844ca7211032a7, 0x90a7079403e6a15d -data8 0x90c9e3fbafd63799, 0x90ece216c8a16ee4 -data8 0x9110021e7b516f0a, 0x912a708a39be9075 -data8 0x914dcc7b31146370, 0x91714af8cfe984d5 -data8 0x918c00a6f3795e97, 0x91afbc299ed0295d -data8 0x91d39add3e958db0, 0x91ee9920a8974d92 -data8 0x9212b5fcac537c19, 0x9236f6b256923fcf -data8 0x92523ee6f90dcfc3, 0x9276bef031e6eb79 -data8 0x929236ec237a24ad, 0x92b6f70b7efe9dc3 -data8 0x92d29f61eec7dc2b, 0x92f7a05d5b8ba92f -data8 0x931379a403be5c16, 0x9338bc44de2e3f34 -data8 0x9354c71412c69486, 0x937a4c273907e262 -data8 0x93968919f6e7975d, 0x93bc516fdd4680c9 -data8 0x93d8c123d9be59b2, 0x93f546c955e60076 -data8 0x941b70a65879079f, 0x943829f337410591 -data8 0x9454f995765bc4d2, 0x947b86b57f5842ed -data8 0x94988aeb23470f86, 0x94b5a5dc9695f42a -data8 0x94d2d7a9170d8b42, 0x94f9e87dd78bf019 -data8 0x95175019a503d89e, 0x9534cefa625fcb3a -data8 0x955265405c491a25, 0x9570130c1f9bb857 -data8 0x9597ca4119525184, 0x95b5af6fb5aa4d3c -data8 0x95d3ac9273aafd7a, 0x95f1c1cafdfd3684 -data8 0x960fef3b430b8d5f, 0x962e350575b409c5 -data8 0x964c934c0dfc1708, 0x966b0a31c9c6bc7d -data8 0x968999d9ad8d264e, 0x96a8426705198795 -data8 0x96c703fd64445ee5, 0x96e5dec0a7b4268d -data8 0x9704d2d4f59f79f3, 0x9723e05ebe91b9b0 -data8 0x97430782be323831, 0x97624865fc0df8bf -data8 0x9781a32dcc640b2a, 0x97a117ffd0f48e46 -data8 0x97c0a701f9d263c9, 0x97e0505a8637a036 -data8 0x97f57a9fb0b08c6e, 0x9815503365914a9d -data8 0x98354085054fd204, 0x98554bbbf8a77902 -data8 0x987571fffb7f94f6, 0x9895b3791dd03c23 -data8 0x98ab43a5fc65d0c8, 0x98cbb2d196bd713d -data8 0x98ec3d9ec7b6f21a, 0x990ce436db5e8344 -data8 0x9922b8218160967a, 0x99438d686f75779d -data8 0x99647eea131fa20b, 0x997a85045a47c6d0 -data8 0x999ba5f14f8add02, 0x99bce38b5465ecae -data8 0x99d31ca0887f30f9, 0x99f48a669c74c09e -data8 0x9a16154eb445c873, 0x9a2c822ec198d667 -data8 0x9a4e3e080cd91b78, 0x9a70177afe52322e -data8 0x9a86b8fa94eebe10, 0x9aa8c42866ae2958 -data8 0x9abf86f9e12fc45e, 0x9ae1c462fc05f49d -data8 0x9af8a8dc936b84d0, 0x9b1b19033be35730 -data8 0x9b3da7daf04c2892, 0x9b54c2e4c8a9012b -data8 0x9b77854e6c661200, 0x9b8ec2e678d56d2f -data8 0x9ba60e6a5ca133b6, 0x9bc919ea66a151a4 -data8 0x9be0887c09ef82bb, 0x9c03c8d5fffc3503 -data8 0x9c1b5ad21a81cbb9, 0x9c3ed09216e9ca02 -data8 0x9c568656c0423def, 0x9c7a320af242ce60 -data8 0x9c920bf7a8c01dc2, 0x9ca9f475d98b159c -data8 0x9ccdeca60e80b5f8, 0x9ce5f9d4653d4902 -data8 0x9cfe15cb38bfdd8e, 0x9d225b983f6c1f96 -data8 0x9d3a9cca32261ed7, 0x9d52ecfccebe1768 -data8 0x9d77818d95b82f86, 0x9d8ff7893fa4706c -data8 0x9da87cbef36f2a5e, 0x9dcd6140b4a35aeb -data8 0x9de60cd06dc6e2d4, 0x9dfec7d4cc43b76f -data8 0x9e17925ec9fccc4a, 0x9e3cdf6db57dc075 -data8 0x9e55d110b63637a8, 0x9e6ed27594550d2e -data8 0x9e87e3adc385d393, 0x9ead9b54b37a1055 -data8 0x9ec6d46a3d7de215, 0x9ee01d9108be3154 -data8 0x9ef976db07288d04, 0x9f12e05a4759ec25 -data8 0x9f2c5a20f4da6668, 0x9f52af78ed1733ca -data8 0x9f6c52426a39d003, 0x9f860593d42fd7f3 -data8 0x9f9fc97fdb96bd51, 0x9fb99e194f4a7037 -data8 0x9fd383731ca51db9, 0x9fed79a04fbf9423 -data8 0xa00780b413b24ee8, 0xa02eab2c4474b0cd -data8 0xa048dcd51ccfd142, 0xa0631fa894b11b8d -data8 0xa07d73ba65e680af, 0xa097d91e6aaf71b0 -data8 0xa0b24fe89e02602f, 0xa0ccd82d1bd2f68b -data8 0xa0e77200215909e6, 0xa1021d760d584855 -data8 0xa11cdaa36068a57d, 0xa137a99cbd3f880b -data8 0xa160019ed37fb4ae, 0xa1960b5966da4608 -data8 0xa1cc5dbe6dc2aab4, 0xa202f97995b69c0d -data8 0xa232fe6eb0c0577d, 0xa26a2582012f6e17 -data8 0xa2a197e5d10465cb, 0xa2d25a532efefbc8 -data8 0xa30a5bd6e49e4ab8, 0xa33b9c9b59879e24 -data8 0xa3742fca6a3c1f21, 0xa3a5f1273887bf22 -data8 0xa3d7ef508ff11574, 0xa4115ce30548bc15 -data8 0xa443df0e53df577a, 0xa4769fa5913c0ec3 -data8 0xa4a99f303bc7def5, 0xa4dcde37779adf4b -data8 0xa5105d46152c938a, 0xa5441ce89825cb8d -data8 0xa5781dad3e54d899, 0xa5ac602406c4e68c -data8 0xa5d9601d95c2c0bc, 0xa60e1e1a2de14745 -data8 0xa6431f6e3fbd9658, 0xa67864b0d432fda4 -data8 0xa6a6444aa0243c0b, 0xa6dc094d10f25792 -data8 0xa70a574cc02bba69, 0xa7409e2af9549084 -data8 0xa76f5c64ca2cf13b, 0xa79e4f0babab5dc0 -data8 0xa7d5579ae5164b85, 0xa804bd3c6fe61cc8 -data8 0xa8345895e5250a5a, 0xa8642a122b44ef0b -data8 0xa89c38ca18f6108b, 0xa8cc81063b6e87ca -data8 0xa8fd00bfa409285e, 0xa92db8664d5516da -data8 0xa95ea86b75cc2c20, 0xa98fd141a4992deb -data8 0xa9c1335cae7446ba, 0xa9ea8686f556f645 -data8 0xaa1c52d17906bb19, 0xaa4e59b046dab887 -data8 0xaa809b9c60d1890b, 0xaab319102f3f9b33 -data8 0xaadd5a18c1e21274, 0xab1045f2ac31bdf5 -data8 0xab3ae3ab2df7231e, 0xab6e3f945d1e96fc -data8 0xaba1d953a08fa94e, 0xabcd090db7ef4c3f -data8 0xabf864602d7c323d, 0xac2ca5886ccf9b57 -data8 0xac5861d4aa441f0f, 0xac8d183fe3a2fbed -data8 0xacb93703ff51571e, 0xace5830ad0c3f14b -data8 0xad11fca5d78b3ff2, 0xad4797fddf91a798 -data8 0xad747701e559ebcb, 0xada184a47e9c7613 -data8 0xadcec13ab0dda8ff, 0xadfc2d1a5fd21ba8 -data8 0xae29c89a5053c33a, 0xae5794122b638df9 -data8 0xae858fda8137ae0a, 0xaeb3bc4ccc56d3d1 -data8 0xaee219c374c09920, 0xaf10a899d3235fe7 -data8 0xaf3f692c341fe8b4, 0xaf6e5bd7db9ae6c2 -data8 0xaf9d80fb081cd91b, 0xafc35ce063eb3787 -data8 0xaff2ddcb5f28f03d, 0xb022923b148e05c5 -data8 0xb0527a919adbf58b, 0xb078f3ab1d701c65 -data8 0xb0a93a6870649f31, 0xb0d9b624d62ec856 -data8 0xb100a5f53fb3c8e1, 0xb131821882f5540a -data8 0xb158bf8e4cb04055, 0xb189fd69d56b238f -data8 0xb1b189958e8108e4, 0xb1e32a8165b09832 -data8 0xb20b0678fc271eec, 0xb23d0bd3f7592b6e -data8 0xb26538b2db8420dc, 0xb28d89e339ceca14 -data8 0xb2c022ca12e55a16, 0xb2e8c6852c6b03f1 -data8 0xb3118f4eda9fe40f, 0xb33a7d6268109ebe -data8 0xb36ddbc5ea70ec55, 0xb3971e9b39264023 -data8 0xb3c0877ecc18e24a, 0xb3ea16ae3a6c905f -data8 0xb413cc67aa0e4d2d, 0xb43da8e9d163e1af -data8 0xb47233773b84d425, 0xb49c6825430fe730 -data8 0xb4c6c46bcdb27dcf, 0xb4f1488c0b35d26f -data8 0xb51bf4c7c51f0168, 0xb546c9616087ab9c -data8 0xb571c69bdffd9a70, 0xb59cecbae56984c3 -data8 0xb5bd64512bb14bb7, 0xb5e8d2a4bf5ba416 -data8 0xb6146a9a1bc47819, 0xb6402c7749d621c0 -data8 0xb66c1882fb435ea2, 0xb6982f048c999a56 -data8 0xb6c47044075b4142, 0xb6e5bd6bfd02bafd -data8 0xb7124a2736ff8ef2, 0xb73f026a01e94177 -data8 0xb760a959f1d0a7a7, 0xb78dae7e06868ab0 -data8 0xb7badff8ad9e4e02, 0xb7dce25b8e17ae9f -data8 0xb80a6226904045e2, 0xb8380f1cafd73c1c -data8 0xb85a6ea8e321b4d8, 0xb8886b684ae7d2fa -data8 0xb8ab0726fa00cf5d, 0xb8d954a4d13b7cb1 -data8 0xb8fc2d4f6cd9f04a, 0xb92acc851476b1ab -data8 0xb94de2d841a184c2, 0xb97cd4c36c92693c -data8 0xb9a0297f172665e3, 0xb9cf6f21e36c3924 -data8 0xb9f3030951267208, 0xba229d6a618e7c59 -data8 0xba467144459f9855, 0xba6a60c3c48f1a4b -data8 0xba9a76056b67ee7a, 0xbabea699563ada6e -data8 0xbae2f350b262cc4b, 0xbb1385a23be24e57 -data8 0xbb3814975e17c680, 0xbb5cc031009bf467 -data8 0xbb81889680024764, 0xbbb2c0d8703ae95d -data8 0xbbd7cd09ba3c5463, 0xbbfcf68c4977718f -data8 0xbc223d88cfc88eee, 0xbc47a2284fee4ff8 -data8 0xbc79ac0916ed7b8a, 0xbc9f5670d1a13030 -data8 0xbcc51f068cb95c1d, 0xbceb05f4b30a9bc0 -data8 0xbd110b6604c7d306, 0xbd372f8598620f19 -data8 0xbd5d727edb6b3c7e, 0xbd83d47d937bbc6d -data8 0xbdaa55addf1ae47d, 0xbdd0f63c36aa73f0 -data8 0xbdf7b6556d550a15, 0xbe1e9626b1ffa96b -data8 0xbe4595dd903e5371, 0xbe6cb5a7f14bc935 -data8 0xbe93f5b41d047cf7, 0xbebb5630bae4c15f -data8 0xbee2d74cd30a430c, 0xbf0a7937cf38d981 -data8 0xbf323c217be2bc8c, 0xbf5a203a09342bbb -data8 0xbf74cad1c14ebfc4, 0xbf9ce6a497a89f78 -data8 0xbfc52428bec6e72f, 0xbfed838fddab024b -data8 0xc016050c0420981a, 0xc03ea8cfabddc330 -data8 0xc059d3cbd65ddbce, 0xc082b122a3c78c9d -data8 0xc0abb1499ae736c4, 0xc0d4d474c3aedaaf -data8 0xc0f054ca33eb3437, 0xc119b2c67e600ed0 -data8 0xc1433453de2033ff, 0xc15ef3e44e10032d -data8 0xc188b130431d80e6, 0xc1b2929d6067730e -data8 0xc1ce9268f31cc734, 0xc1f8b0877c1b0c08 -data8 0xc222f35a87b415ba, 0xc23f3467349e5c88 -data8 0xc269b4e40e088c01, 0xc2945aac24daaf6e -data8 0xc2b0de05e43c1d66, 0xc2dbc275e1229d09 -data8 0xc2f86fca9d80eeff, 0xc323938449a2587e -data8 0xc3406b40a538ed20, 0xc36bcee8211d15e0 -data8 0xc397593adf2ba366, 0xc3b475b6206155d5 -data8 0xc3e0410243b97383, 0xc3fd890709833d37 -data8 0xc41ae295f7e7fa06, 0xc44709f7bb8a4dd2 -data8 0xc4648fb0e0bec4c1, 0xc490f9a94695ba14 -data8 0xc4aeac0173b7d390, 0xc4db5941007aa853 -data8 0xc4f938aec206291a, 0xc52629e899dfd622 -data8 0xc54436e44043b965, 0xc562563abf9ea07f -data8 0xc58fa7d1dc42921c, 0xc5adf561b91e110a -data8 0xc5cc5591bdbd82fa, 0xc5fa08f1ff20593c -data8 0xc618980a79ce6862, 0xc6373a09e34b50fa -data8 0xc66550a6e0baaf35, 0xc6842241926342c9 -data8 0xc6a3070b7c93bb9e, 0xc6d18260bb84081b -data8 0xc6f0977c9416828b, 0xc70fc0117c641630 -data8 0xc72efc34d7e615be, 0xc75dfb441594141e -data8 0xc77d68aa019bda4c, 0xc79ce9ea478dbc4f -data8 0xc7bc7f1ae453219d, 0xc7ec0476e15e141a -data8 0xc80bcbe16f1d540f, 0xc82ba78a5d349735 -data8 0xc84b978847a06b87, 0xc86b9bf1ee817bc6 -data8 0xc88bb4de3667cdf4, 0xc8bc00e7fe9e23a3 -data8 0xc8dc4d7ff2d25232, 0xc8fcaeebcb40eb47 -data8 0xc91d25431426a663, 0xc93db09d7fdb2949 -data8 0xc95e5112e721582a, 0xc97f06bb49787677 -data8 0xc99fd1aecd6e1b06, 0xc9d12a3e27bb1625 -data8 0xc9f22ad82ba3d5f0, 0xca134113105e67b2 -data8 0xca346d07b045a876, 0xca55aecf0e94bb88 -data8 0xca77068257be9bab, 0xca98743ae1c693a8 -data8 0xcab9f8122c99a101, 0xcadb9221e268c3b5 -data8 0xcafd4283d8043dfd, 0xcb1f09520d37c6fb -data8 0xcb51ddcb9e93095e, 0xcb95f333968ad59b -data8 0xcbda64292d3ffd97, 0xcc1f3184af961596 -data8 0xcc5bb1ac954d33e2, 0xcca12e9831fc6402 -data8 0xcce70a67b64f24ad, 0xcd24794726477ea5 -data8 0xcd6b096a0b70ee87, 0xcda9177738b15a90 -data8 0xcdf05f2247dffab9, 0xce2f0f347f96f906 -data8 0xce6e0be0cd551a61, 0xceb666b2c347d1de -data8 0xcef609b0cb874f00, 0xcf35fb5447e5c765 -data8 0xcf763c47ee869f00, 0xcfb6cd3888d71785 -data8 0xcff7aed4fbfbb447, 0xd038e1ce5167e3c6 -data8 0xd07a66d7bfa0ebba, 0xd0bc3ea6b32d1b21 -data8 0xd0f4f0e8f36c1bf8, 0xd1376458e34b037e -data8 0xd17a2ca133f78572, 0xd1bd4a80301c5715 -data8 0xd1f71682b2fa4575, 0xd23ad555f773f059 -data8 0xd2752c7039a5bf73, 0xd2b98ee008c06b59 -data8 0xd2f4735ffd700280, 0xd32f99ed6d9ac0e1 -data8 0xd374f0666c75d51c, 0xd3b0a7d13618e4a1 -data8 0xd3eca2ea53bcec0c, 0xd428e23874f13a17 -data8 0xd46f82fe293bc6d3, 0xd4ac57e9b7186420 -data8 0xd4e972becb04e8b8, 0xd526d40a7a9b43a3 -data8 0xd5647c5b73917370, 0xd5a26c4201bd6d13 -data8 0xd5e0a45015350a7e, 0xd614b539c6194104 -data8 0xd6537310e224283f, 0xd6927ab62244c917 -data8 0xd6d1ccc1fc4ef4b7, 0xd71169cea98fdded -data8 0xd746a66a5bc9f6d9, 0xd786ce8f0fae5317 -data8 0xd7bc7ff214c4e75a, 0xd7fd35467a517ed1 -data8 0xd83e38838648d815, 0xd874a1db598b8951 -data8 0xd8ab42205b80edaf, 0xd8ed1849d202f965 -data8 0xd92432bd5a173685, 0xd9669ca45b03c23e -data8 0xd99e3327cf89574e, 0xd9d602b19b100466 -data8 0xda0e0ba86c096841, 0xda5195fcdb1c3dce -data8 0xda8a1eb87a491f6c, 0xdac2e230b91c3f84 -data8 0xdafbe0d0b66aea30, 0xdb351b04a8fafced -data8 0xdb6e9139e33cdd8e, 0xdba843ded7151ea1 -data8 0xdbe2336319b61fc8, 0xdc1c60376789fa68 -data8 0xdc56cacda82d0cd5, 0xdc917398f2797814 -data8 0xdccc5b0d90a3e628, 0xdd0781a10469f0f2 -data8 0xdd42e7ca0b52838f, 0xdd729ad01c69114d -data8 0xddae749c001fbf5e, 0xddea8f50a51c69b1 -data8 0xde26eb69a0f0f111, 0xde576480262399bc -data8 0xde943789645933c8, 0xded14d58139a28af -data8 0xdf025c00bbf2b5c7, 0xdf3feb44d723a713 -data8 0xdf715bc16c159be0, 0xdfaf66240e29cda8 -data8 0xdfe139cbf6e19bdc, 0xe01fc0fe94d9fc52 -data8 0xe051f92ffcc0bd60, 0xe090feec9c9a06ac -data8 0xe0c39d0c9ff862d6, 0xe0f668eeb99f188d -data8 0xe1362890eb663139, 0xe1695c7212aecbaa -data8 0xe19cbf0391bbbbe9, 0xe1d050901c531e85 -data8 0xe2110903b4f4047a, 0xe2450559b4d80b6d -data8 0xe27931a231554ef3, 0xe2ad8e2ac3c5b04b -data8 0xe2e21b41b9694cce, 0xe316d93615862714 -data8 0xe3590bd86a0d30f9, 0xe38e38e38e38e38e -data8 0xe3c397d1e6db7839, 0xe3f928f5953feb9e -data8 0xe42eeca17c62886c, 0xe464e32943446305 -data8 0xe49b0ce15747a8a2, 0xe4d16a1eee94e9d4 -data8 0xe4fa52107353f67d, 0xe5310a471f4d2dc3 -data8 0xe567f6f1c2b9c224, 0xe59f18689a9e4c9a -data8 0xe5d66f04b8a68ecf, 0xe60dfb2005c192e9 -data8 0xe645bd1544c7ea51, 0xe66fb21b505b20a0 -data8 0xe6a7d32af4a7c59a, 0xe6e02b129c6a5ae4 -data8 0xe70a9136a7403039, 0xe74349fb2d92a589 -data8 0xe77c3a9c86ed7d42, 0xe7a713f88151518a -data8 0xe7e067453317ed2b, 0xe819f37a81871bb5 -data8 0xe8454236bfaeca14, 0xe87f32f24c3fc90e -data8 0xe8aacd8688892ba6, 0xe8e523fd32f606f7 -data8 0xe9110b5311407927, 0xe94bc8bf0c108fa3 -data8 0xe977fdc439c2ca3c, 0xe9b3236528fc349e -data8 0xe9dfa70b745ac1b4, 0xea1b36268d0eaa38 -data8 0xea480963fd394197, 0xea84034425f27484 -data8 0xeab12713138dd1cc, 0xeade6db73a5e503b -data8 0xeb1b0268343b121b, 0xeb489b0b2bdb5f14 -data8 0xeb765721e85f03d0, 0xebb389645f222f62 -data8 0xebe198f090607e0c, 0xec0fcc9321024509 -data8 0xec3e247da8b82f61, 0xec7c27d21321c9f7 -data8 0xecaad5278824e453, 0xecd9a76d097d4e77 -data8 0xed089ed5dcd99446, 0xed37bb95add09a1c -data8 0xed76c70508f904b6, 0xeda63bb05e7f93c6 -data8 0xedd5d661daed2dc4, 0xee05974eef86b903 -data8 0xee357ead791fc670, 0xee658cb3c134a463 -data8 0xee95c1987f080211, 0xeec61d92d8c4314f -data8 0xeef6a0da64a014ac, 0xef274ba72a07c811 -data8 0xef581e31a2c91260, 0xef8918b2bc43aec6 -data8 0xefba3b63d89d7cbf, 0xefeb867ecffaa607 -data8 0xf01cfa3df1b9c9fa, 0xf04e96dc05b43e2d -data8 0xf0805c944d827454, 0xf0b24ba285c495cb -data8 0xf0e46442e76f6569, 0xf116a6b2291d7896 -data8 0xf1383fa9e9b5b381, 0xf16ac84f90083b9b -data8 0xf19d7b686dcb03d7, 0xf1d0593311db1757 -data8 0xf20361ee8f1c711e, 0xf23695da7de51d3f -data8 0xf258d095e465cc35, 0xf28c4d0bfc982b34 -data8 0xf2bff55eb3f0ea71, 0xf2f3c9cf9884636e -data8 0xf31670135ab9cc0f, 0xf34a8e9f0b54cdfb -data8 0xf37ed9fa6b8add3f, 0xf3a1cfe884ef6bb6 -data8 0xf3d66689dcc8e8d3, 0xf40b2ab069d5c96a -data8 0xf42e718b90c8bc16, 0xf463822a0a3b4b00 -data8 0xf498c1076015faf8, 0xf4bc5a19a33990b5 -data8 0xf4f1e6a7d6f5425f, 0xf527a232cf6be334 -data8 0xf54b8ecdcda90851, 0xf5819949c7ad87b4 -data8 0xf5a5bac9213b48a9, 0xf5dc1501f324a812 -data8 0xf6006bee86b5589e, 0xf63716b2fa067fa4 -data8 0xf66df22fb6132b9c, 0xf6929fb98225deb1 -data8 0xf6c9cd13021e3fea, 0xf6eeb177472cedae -data8 0xf713abf4cb0b3afb, 0xf74b4d5333684ef1 -data8 0xf7707f75a72f8e94, 0xf7a874b97927af44 -data8 0xf7cddf140aedf1d8, 0xf806291bacb7f7a9 -data8 0xf82bcc43b92eafef, 0xf8646bf0defb759e -data8 0xf88a487dfc3ff5f7, 0xf8b03c2b46cdc17f -data8 0xf8e95541c152ae7a, 0xf90f832c2700c160 -data8 0xf935c88e0c7f419b, 0xf96f5cd84fd86873 -data8 0xf995dd53ebdd9d6d, 0xf9bc75a034436a41 -data8 0xf9f686f26d5518de, 0xfa1d5b39b910a8c5 -data8 0xfa4447acc4ecbfd2, 0xfa7ed7e51e6fdfb4 -data8 0xfaa601394d49a1a0, 0xfacd431644ce0e40 -data8 0xfaf49d96f7a75909, 0xfb2fd3c65e562fd5 -data8 0xfb576c5762024805, 0xfb7f1debc22c4040 -data8 0xfba6e89f32d0190a, 0xfbe2c803a0894893 -data8 0xfc0ad1ff0ed9ecf0, 0xfc32f57bdfbcbe7f -data8 0xfc5b32968f99b21c, 0xfc83896bc861ab08 -data8 0xfcabfa1861ed4815, 0xfce8d3cea7d3163e -data8 0xfd118595143ee273, 0xfd3a519943d4865a -data8 0xfd6337f8e1ae5a4b, 0xfd8c38d1c8e927eb -data8 0xfdb5544205095a53, 0xfdde8a67d2613531 -data8 0xfe07db619e781611, 0xfe460768d80bf758 -data8 0xfe6f9bfb06cd32f6, 0xfe994bcd3d14fcc2 -data8 0xfec316fecaf3f2ab, 0xfeecfdaf33fadb80 -data8 0xff16fffe2fa8fad6, 0xff411e0ba9db886d -data8 0xff6b57f7c33e4e9a, 0xff95ade2d1bd7358 -data8 0xffc01fed60f86fb5, 0xffeaae3832b63956 + data8 0x80155c748c374836, 0x8040404b0879f7f9 + data8 0x806b5dce4b405c10, 0x8096b586974669b1 + data8 0x80bcd273d952a028, 0x80e898c52813f2f3 + data8 0x81149add67c2d208, 0x813b4e2c856b6e9a + data8 0x8167c1dde03de7aa, 0x818ed973b811135e + data8 0x81bbc0c33e13ec98, 0x81e33e69fbe7504a + data8 0x820aec524e3c23e9, 0x823880f78e70b805 + data8 0x826097a62a8e5200, 0x8288dfe00e9b5eaf + data8 0x82b15a10c5371624, 0x82da06a527b18937 + data8 0x8302e60b635ab394, 0x832bf8b2feec2f0e + data8 0x83553f0ce00e276b, 0x837eb98b50f8322a + data8 0x83a270f44c84f699, 0x83cc4d7cfcfac5ca + data8 0x83f65f78a8872b4c, 0x8420a75f2f7b53c8 + data8 0x844510461ff14209, 0x846fbd91b930bed2 + data8 0x84947e18234f3294, 0x84bf92755825045a + data8 0x84e4ac0ee112ba51, 0x8509ef44b86f20be + data8 0x85359d5d91768427, 0x855b3bd5b7384357 + data8 0x858104f0c415f79a, 0x85a6f90390d29864 + data8 0x85d3772fcd56a1dd, 0x85f9c982fcc002f3 + data8 0x862047e0e7ea554b, 0x8646f2a26f7f5852 + data8 0x866dca21754096b5, 0x8694ceb8dfd17a37 + data8 0x86bc00c49e9307e8, 0x86dccd74fce79610 + data8 0x870453c845acf90f, 0x872c089a1e90342c + data8 0x8753ec4a92d16c5e, 0x877bff3aca19f6b4 + data8 0x879d88b6fe1c324c, 0x87c5f346dbf98c3a + data8 0x87e7c653efacef2c, 0x881089d4e73ffefc + data8 0x88397e6a366f2a8a, 0x885bc559e5e1c081 + data8 0x887e2ee392bb7a93, 0x88a7a8587e404257 + data8 0x88ca5eda67594784, 0x88f4356166bd590e + data8 0x89173a0acf5ce026, 0x893a62a098b6a57b + data8 0x895daf637236ae2c, 0x89883b9d1c2fa9c5 + data8 0x89abd8dd374a5d7b, 0x89cf9b1dcd197fa0 + data8 0x89f382a258ea79de, 0x8a178faf06648f29 + data8 0x8a3bc288b3e1d18a, 0x8a601b74f4d1f835 + data8 0x8a849aba14274764, 0x8aa9409f16cdbc9b + data8 0x8ace0d6bbe2cb316, 0x8af301688ab33558 + data8 0x8b181cdebe6f3206, 0x8b3d60185fafcb7c + data8 0x8b62cb603bb2fad0, 0x8b80d7d6bc4104de + data8 0x8ba68bf73ac74f39, 0x8bcc68fb9f9f7335 + data8 0x8bf26f31c534fca2, 0x8c10f86e13a1a1f9 + data8 0x8c3749916cc6abb5, 0x8c5dc4c4f7706032 + data8 0x8c7cac3a8c42e3e0, 0x8ca373f1b7bf2716 + data8 0x8cc29907fb951294, 0x8ce9ae4e9492aac8 + data8 0x8d0911dddbfdad0e, 0x8d3075c4f20f04ee + data8 0x8d5018a9d4de77d5, 0x8d77cc47dd143515 + data8 0x8d97af6352739cb7, 0x8db7af523167800f + data8 0x8ddfd80bc68c32ff, 0x8e00197e1e7c88fe + data8 0x8e207859f77e20e7, 0x8e40f4ce60c9f8e2 + data8 0x8e69ba46cf2fde4d, 0x8e8a7a00bd7ae63e + data8 0x8eab57ef1cf2f529, 0x8ecc5442cffb1dad + data8 0x8eed6f2d2a4acbfe, 0x8f0ea8dff24441ff + data8 0x8f385c95d696b817, 0x8f59dc43edd930f3 + data8 0x8f7b7b5f5ffad1c4, 0x8f9d3a1bea165f38 + data8 0x8fbf18adc34b66da, 0x8fe117499e356095 + data8 0x90033624aa685f8d, 0x9025757495f36b86 + data8 0x903f3a5dcc091203, 0x9061b2fceb2bdbab + data8 0x90844ca7211032a7, 0x90a7079403e6a15d + data8 0x90c9e3fbafd63799, 0x90ece216c8a16ee4 + data8 0x9110021e7b516f0a, 0x912a708a39be9075 + data8 0x914dcc7b31146370, 0x91714af8cfe984d5 + data8 0x918c00a6f3795e97, 0x91afbc299ed0295d + data8 0x91d39add3e958db0, 0x91ee9920a8974d92 + data8 0x9212b5fcac537c19, 0x9236f6b256923fcf + data8 0x92523ee6f90dcfc3, 0x9276bef031e6eb79 + data8 0x929236ec237a24ad, 0x92b6f70b7efe9dc3 + data8 0x92d29f61eec7dc2b, 0x92f7a05d5b8ba92f + data8 0x931379a403be5c16, 0x9338bc44de2e3f34 + data8 0x9354c71412c69486, 0x937a4c273907e262 + data8 0x93968919f6e7975d, 0x93bc516fdd4680c9 + data8 0x93d8c123d9be59b2, 0x93f546c955e60076 + data8 0x941b70a65879079f, 0x943829f337410591 + data8 0x9454f995765bc4d2, 0x947b86b57f5842ed + data8 0x94988aeb23470f86, 0x94b5a5dc9695f42a + data8 0x94d2d7a9170d8b42, 0x94f9e87dd78bf019 + data8 0x95175019a503d89e, 0x9534cefa625fcb3a + data8 0x955265405c491a25, 0x9570130c1f9bb857 + data8 0x9597ca4119525184, 0x95b5af6fb5aa4d3c + data8 0x95d3ac9273aafd7a, 0x95f1c1cafdfd3684 + data8 0x960fef3b430b8d5f, 0x962e350575b409c5 + data8 0x964c934c0dfc1708, 0x966b0a31c9c6bc7d + data8 0x968999d9ad8d264e, 0x96a8426705198795 + data8 0x96c703fd64445ee5, 0x96e5dec0a7b4268d + data8 0x9704d2d4f59f79f3, 0x9723e05ebe91b9b0 + data8 0x97430782be323831, 0x97624865fc0df8bf + data8 0x9781a32dcc640b2a, 0x97a117ffd0f48e46 + data8 0x97c0a701f9d263c9, 0x97e0505a8637a036 + data8 0x97f57a9fb0b08c6e, 0x9815503365914a9d + data8 0x98354085054fd204, 0x98554bbbf8a77902 + data8 0x987571fffb7f94f6, 0x9895b3791dd03c23 + data8 0x98ab43a5fc65d0c8, 0x98cbb2d196bd713d + data8 0x98ec3d9ec7b6f21a, 0x990ce436db5e8344 + data8 0x9922b8218160967a, 0x99438d686f75779d + data8 0x99647eea131fa20b, 0x997a85045a47c6d0 + data8 0x999ba5f14f8add02, 0x99bce38b5465ecae + data8 0x99d31ca0887f30f9, 0x99f48a669c74c09e + data8 0x9a16154eb445c873, 0x9a2c822ec198d667 + data8 0x9a4e3e080cd91b78, 0x9a70177afe52322e + data8 0x9a86b8fa94eebe10, 0x9aa8c42866ae2958 + data8 0x9abf86f9e12fc45e, 0x9ae1c462fc05f49d + data8 0x9af8a8dc936b84d0, 0x9b1b19033be35730 + data8 0x9b3da7daf04c2892, 0x9b54c2e4c8a9012b + data8 0x9b77854e6c661200, 0x9b8ec2e678d56d2f + data8 0x9ba60e6a5ca133b6, 0x9bc919ea66a151a4 + data8 0x9be0887c09ef82bb, 0x9c03c8d5fffc3503 + data8 0x9c1b5ad21a81cbb9, 0x9c3ed09216e9ca02 + data8 0x9c568656c0423def, 0x9c7a320af242ce60 + data8 0x9c920bf7a8c01dc2, 0x9ca9f475d98b159c + data8 0x9ccdeca60e80b5f8, 0x9ce5f9d4653d4902 + data8 0x9cfe15cb38bfdd8e, 0x9d225b983f6c1f96 + data8 0x9d3a9cca32261ed7, 0x9d52ecfccebe1768 + data8 0x9d77818d95b82f86, 0x9d8ff7893fa4706c + data8 0x9da87cbef36f2a5e, 0x9dcd6140b4a35aeb + data8 0x9de60cd06dc6e2d4, 0x9dfec7d4cc43b76f + data8 0x9e17925ec9fccc4a, 0x9e3cdf6db57dc075 + data8 0x9e55d110b63637a8, 0x9e6ed27594550d2e + data8 0x9e87e3adc385d393, 0x9ead9b54b37a1055 + data8 0x9ec6d46a3d7de215, 0x9ee01d9108be3154 + data8 0x9ef976db07288d04, 0x9f12e05a4759ec25 + data8 0x9f2c5a20f4da6668, 0x9f52af78ed1733ca + data8 0x9f6c52426a39d003, 0x9f860593d42fd7f3 + data8 0x9f9fc97fdb96bd51, 0x9fb99e194f4a7037 + data8 0x9fd383731ca51db9, 0x9fed79a04fbf9423 + data8 0xa00780b413b24ee8, 0xa02eab2c4474b0cd + data8 0xa048dcd51ccfd142, 0xa0631fa894b11b8d + data8 0xa07d73ba65e680af, 0xa097d91e6aaf71b0 + data8 0xa0b24fe89e02602f, 0xa0ccd82d1bd2f68b + data8 0xa0e77200215909e6, 0xa1021d760d584855 + data8 0xa11cdaa36068a57d, 0xa137a99cbd3f880b + data8 0xa160019ed37fb4ae, 0xa1960b5966da4608 + data8 0xa1cc5dbe6dc2aab4, 0xa202f97995b69c0d + data8 0xa232fe6eb0c0577d, 0xa26a2582012f6e17 + data8 0xa2a197e5d10465cb, 0xa2d25a532efefbc8 + data8 0xa30a5bd6e49e4ab8, 0xa33b9c9b59879e24 + data8 0xa3742fca6a3c1f21, 0xa3a5f1273887bf22 + data8 0xa3d7ef508ff11574, 0xa4115ce30548bc15 + data8 0xa443df0e53df577a, 0xa4769fa5913c0ec3 + data8 0xa4a99f303bc7def5, 0xa4dcde37779adf4b + data8 0xa5105d46152c938a, 0xa5441ce89825cb8d + data8 0xa5781dad3e54d899, 0xa5ac602406c4e68c + data8 0xa5d9601d95c2c0bc, 0xa60e1e1a2de14745 + data8 0xa6431f6e3fbd9658, 0xa67864b0d432fda4 + data8 0xa6a6444aa0243c0b, 0xa6dc094d10f25792 + data8 0xa70a574cc02bba69, 0xa7409e2af9549084 + data8 0xa76f5c64ca2cf13b, 0xa79e4f0babab5dc0 + data8 0xa7d5579ae5164b85, 0xa804bd3c6fe61cc8 + data8 0xa8345895e5250a5a, 0xa8642a122b44ef0b + data8 0xa89c38ca18f6108b, 0xa8cc81063b6e87ca + data8 0xa8fd00bfa409285e, 0xa92db8664d5516da + data8 0xa95ea86b75cc2c20, 0xa98fd141a4992deb + data8 0xa9c1335cae7446ba, 0xa9ea8686f556f645 + data8 0xaa1c52d17906bb19, 0xaa4e59b046dab887 + data8 0xaa809b9c60d1890b, 0xaab319102f3f9b33 + data8 0xaadd5a18c1e21274, 0xab1045f2ac31bdf5 + data8 0xab3ae3ab2df7231e, 0xab6e3f945d1e96fc + data8 0xaba1d953a08fa94e, 0xabcd090db7ef4c3f + data8 0xabf864602d7c323d, 0xac2ca5886ccf9b57 + data8 0xac5861d4aa441f0f, 0xac8d183fe3a2fbed + data8 0xacb93703ff51571e, 0xace5830ad0c3f14b + data8 0xad11fca5d78b3ff2, 0xad4797fddf91a798 + data8 0xad747701e559ebcb, 0xada184a47e9c7613 + data8 0xadcec13ab0dda8ff, 0xadfc2d1a5fd21ba8 + data8 0xae29c89a5053c33a, 0xae5794122b638df9 + data8 0xae858fda8137ae0a, 0xaeb3bc4ccc56d3d1 + data8 0xaee219c374c09920, 0xaf10a899d3235fe7 + data8 0xaf3f692c341fe8b4, 0xaf6e5bd7db9ae6c2 + data8 0xaf9d80fb081cd91b, 0xafc35ce063eb3787 + data8 0xaff2ddcb5f28f03d, 0xb022923b148e05c5 + data8 0xb0527a919adbf58b, 0xb078f3ab1d701c65 + data8 0xb0a93a6870649f31, 0xb0d9b624d62ec856 + data8 0xb100a5f53fb3c8e1, 0xb131821882f5540a + data8 0xb158bf8e4cb04055, 0xb189fd69d56b238f + data8 0xb1b189958e8108e4, 0xb1e32a8165b09832 + data8 0xb20b0678fc271eec, 0xb23d0bd3f7592b6e + data8 0xb26538b2db8420dc, 0xb28d89e339ceca14 + data8 0xb2c022ca12e55a16, 0xb2e8c6852c6b03f1 + data8 0xb3118f4eda9fe40f, 0xb33a7d6268109ebe + data8 0xb36ddbc5ea70ec55, 0xb3971e9b39264023 + data8 0xb3c0877ecc18e24a, 0xb3ea16ae3a6c905f + data8 0xb413cc67aa0e4d2d, 0xb43da8e9d163e1af + data8 0xb47233773b84d425, 0xb49c6825430fe730 + data8 0xb4c6c46bcdb27dcf, 0xb4f1488c0b35d26f + data8 0xb51bf4c7c51f0168, 0xb546c9616087ab9c + data8 0xb571c69bdffd9a70, 0xb59cecbae56984c3 + data8 0xb5bd64512bb14bb7, 0xb5e8d2a4bf5ba416 + data8 0xb6146a9a1bc47819, 0xb6402c7749d621c0 + data8 0xb66c1882fb435ea2, 0xb6982f048c999a56 + data8 0xb6c47044075b4142, 0xb6e5bd6bfd02bafd + data8 0xb7124a2736ff8ef2, 0xb73f026a01e94177 + data8 0xb760a959f1d0a7a7, 0xb78dae7e06868ab0 + data8 0xb7badff8ad9e4e02, 0xb7dce25b8e17ae9f + data8 0xb80a6226904045e2, 0xb8380f1cafd73c1c + data8 0xb85a6ea8e321b4d8, 0xb8886b684ae7d2fa + data8 0xb8ab0726fa00cf5d, 0xb8d954a4d13b7cb1 + data8 0xb8fc2d4f6cd9f04a, 0xb92acc851476b1ab + data8 0xb94de2d841a184c2, 0xb97cd4c36c92693c + data8 0xb9a0297f172665e3, 0xb9cf6f21e36c3924 + data8 0xb9f3030951267208, 0xba229d6a618e7c59 + data8 0xba467144459f9855, 0xba6a60c3c48f1a4b + data8 0xba9a76056b67ee7a, 0xbabea699563ada6e + data8 0xbae2f350b262cc4b, 0xbb1385a23be24e57 + data8 0xbb3814975e17c680, 0xbb5cc031009bf467 + data8 0xbb81889680024764, 0xbbb2c0d8703ae95d + data8 0xbbd7cd09ba3c5463, 0xbbfcf68c4977718f + data8 0xbc223d88cfc88eee, 0xbc47a2284fee4ff8 + data8 0xbc79ac0916ed7b8a, 0xbc9f5670d1a13030 + data8 0xbcc51f068cb95c1d, 0xbceb05f4b30a9bc0 + data8 0xbd110b6604c7d306, 0xbd372f8598620f19 + data8 0xbd5d727edb6b3c7e, 0xbd83d47d937bbc6d + data8 0xbdaa55addf1ae47d, 0xbdd0f63c36aa73f0 + data8 0xbdf7b6556d550a15, 0xbe1e9626b1ffa96b + data8 0xbe4595dd903e5371, 0xbe6cb5a7f14bc935 + data8 0xbe93f5b41d047cf7, 0xbebb5630bae4c15f + data8 0xbee2d74cd30a430c, 0xbf0a7937cf38d981 + data8 0xbf323c217be2bc8c, 0xbf5a203a09342bbb + data8 0xbf74cad1c14ebfc4, 0xbf9ce6a497a89f78 + data8 0xbfc52428bec6e72f, 0xbfed838fddab024b + data8 0xc016050c0420981a, 0xc03ea8cfabddc330 + data8 0xc059d3cbd65ddbce, 0xc082b122a3c78c9d + data8 0xc0abb1499ae736c4, 0xc0d4d474c3aedaaf + data8 0xc0f054ca33eb3437, 0xc119b2c67e600ed0 + data8 0xc1433453de2033ff, 0xc15ef3e44e10032d + data8 0xc188b130431d80e6, 0xc1b2929d6067730e + data8 0xc1ce9268f31cc734, 0xc1f8b0877c1b0c08 + data8 0xc222f35a87b415ba, 0xc23f3467349e5c88 + data8 0xc269b4e40e088c01, 0xc2945aac24daaf6e + data8 0xc2b0de05e43c1d66, 0xc2dbc275e1229d09 + data8 0xc2f86fca9d80eeff, 0xc323938449a2587e + data8 0xc3406b40a538ed20, 0xc36bcee8211d15e0 + data8 0xc397593adf2ba366, 0xc3b475b6206155d5 + data8 0xc3e0410243b97383, 0xc3fd890709833d37 + data8 0xc41ae295f7e7fa06, 0xc44709f7bb8a4dd2 + data8 0xc4648fb0e0bec4c1, 0xc490f9a94695ba14 + data8 0xc4aeac0173b7d390, 0xc4db5941007aa853 + data8 0xc4f938aec206291a, 0xc52629e899dfd622 + data8 0xc54436e44043b965, 0xc562563abf9ea07f + data8 0xc58fa7d1dc42921c, 0xc5adf561b91e110a + data8 0xc5cc5591bdbd82fa, 0xc5fa08f1ff20593c + data8 0xc618980a79ce6862, 0xc6373a09e34b50fa + data8 0xc66550a6e0baaf35, 0xc6842241926342c9 + data8 0xc6a3070b7c93bb9e, 0xc6d18260bb84081b + data8 0xc6f0977c9416828b, 0xc70fc0117c641630 + data8 0xc72efc34d7e615be, 0xc75dfb441594141e + data8 0xc77d68aa019bda4c, 0xc79ce9ea478dbc4f + data8 0xc7bc7f1ae453219d, 0xc7ec0476e15e141a + data8 0xc80bcbe16f1d540f, 0xc82ba78a5d349735 + data8 0xc84b978847a06b87, 0xc86b9bf1ee817bc6 + data8 0xc88bb4de3667cdf4, 0xc8bc00e7fe9e23a3 + data8 0xc8dc4d7ff2d25232, 0xc8fcaeebcb40eb47 + data8 0xc91d25431426a663, 0xc93db09d7fdb2949 + data8 0xc95e5112e721582a, 0xc97f06bb49787677 + data8 0xc99fd1aecd6e1b06, 0xc9d12a3e27bb1625 + data8 0xc9f22ad82ba3d5f0, 0xca134113105e67b2 + data8 0xca346d07b045a876, 0xca55aecf0e94bb88 + data8 0xca77068257be9bab, 0xca98743ae1c693a8 + data8 0xcab9f8122c99a101, 0xcadb9221e268c3b5 + data8 0xcafd4283d8043dfd, 0xcb1f09520d37c6fb + data8 0xcb51ddcb9e93095e, 0xcb95f333968ad59b + data8 0xcbda64292d3ffd97, 0xcc1f3184af961596 + data8 0xcc5bb1ac954d33e2, 0xcca12e9831fc6402 + data8 0xcce70a67b64f24ad, 0xcd24794726477ea5 + data8 0xcd6b096a0b70ee87, 0xcda9177738b15a90 + data8 0xcdf05f2247dffab9, 0xce2f0f347f96f906 + data8 0xce6e0be0cd551a61, 0xceb666b2c347d1de + data8 0xcef609b0cb874f00, 0xcf35fb5447e5c765 + data8 0xcf763c47ee869f00, 0xcfb6cd3888d71785 + data8 0xcff7aed4fbfbb447, 0xd038e1ce5167e3c6 + data8 0xd07a66d7bfa0ebba, 0xd0bc3ea6b32d1b21 + data8 0xd0f4f0e8f36c1bf8, 0xd1376458e34b037e + data8 0xd17a2ca133f78572, 0xd1bd4a80301c5715 + data8 0xd1f71682b2fa4575, 0xd23ad555f773f059 + data8 0xd2752c7039a5bf73, 0xd2b98ee008c06b59 + data8 0xd2f4735ffd700280, 0xd32f99ed6d9ac0e1 + data8 0xd374f0666c75d51c, 0xd3b0a7d13618e4a1 + data8 0xd3eca2ea53bcec0c, 0xd428e23874f13a17 + data8 0xd46f82fe293bc6d3, 0xd4ac57e9b7186420 + data8 0xd4e972becb04e8b8, 0xd526d40a7a9b43a3 + data8 0xd5647c5b73917370, 0xd5a26c4201bd6d13 + data8 0xd5e0a45015350a7e, 0xd614b539c6194104 + data8 0xd6537310e224283f, 0xd6927ab62244c917 + data8 0xd6d1ccc1fc4ef4b7, 0xd71169cea98fdded + data8 0xd746a66a5bc9f6d9, 0xd786ce8f0fae5317 + data8 0xd7bc7ff214c4e75a, 0xd7fd35467a517ed1 + data8 0xd83e38838648d815, 0xd874a1db598b8951 + data8 0xd8ab42205b80edaf, 0xd8ed1849d202f965 + data8 0xd92432bd5a173685, 0xd9669ca45b03c23e + data8 0xd99e3327cf89574e, 0xd9d602b19b100466 + data8 0xda0e0ba86c096841, 0xda5195fcdb1c3dce + data8 0xda8a1eb87a491f6c, 0xdac2e230b91c3f84 + data8 0xdafbe0d0b66aea30, 0xdb351b04a8fafced + data8 0xdb6e9139e33cdd8e, 0xdba843ded7151ea1 + data8 0xdbe2336319b61fc8, 0xdc1c60376789fa68 + data8 0xdc56cacda82d0cd5, 0xdc917398f2797814 + data8 0xdccc5b0d90a3e628, 0xdd0781a10469f0f2 + data8 0xdd42e7ca0b52838f, 0xdd729ad01c69114d + data8 0xddae749c001fbf5e, 0xddea8f50a51c69b1 + data8 0xde26eb69a0f0f111, 0xde576480262399bc + data8 0xde943789645933c8, 0xded14d58139a28af + data8 0xdf025c00bbf2b5c7, 0xdf3feb44d723a713 + data8 0xdf715bc16c159be0, 0xdfaf66240e29cda8 + data8 0xdfe139cbf6e19bdc, 0xe01fc0fe94d9fc52 + data8 0xe051f92ffcc0bd60, 0xe090feec9c9a06ac + data8 0xe0c39d0c9ff862d6, 0xe0f668eeb99f188d + data8 0xe1362890eb663139, 0xe1695c7212aecbaa + data8 0xe19cbf0391bbbbe9, 0xe1d050901c531e85 + data8 0xe2110903b4f4047a, 0xe2450559b4d80b6d + data8 0xe27931a231554ef3, 0xe2ad8e2ac3c5b04b + data8 0xe2e21b41b9694cce, 0xe316d93615862714 + data8 0xe3590bd86a0d30f9, 0xe38e38e38e38e38e + data8 0xe3c397d1e6db7839, 0xe3f928f5953feb9e + data8 0xe42eeca17c62886c, 0xe464e32943446305 + data8 0xe49b0ce15747a8a2, 0xe4d16a1eee94e9d4 + data8 0xe4fa52107353f67d, 0xe5310a471f4d2dc3 + data8 0xe567f6f1c2b9c224, 0xe59f18689a9e4c9a + data8 0xe5d66f04b8a68ecf, 0xe60dfb2005c192e9 + data8 0xe645bd1544c7ea51, 0xe66fb21b505b20a0 + data8 0xe6a7d32af4a7c59a, 0xe6e02b129c6a5ae4 + data8 0xe70a9136a7403039, 0xe74349fb2d92a589 + data8 0xe77c3a9c86ed7d42, 0xe7a713f88151518a + data8 0xe7e067453317ed2b, 0xe819f37a81871bb5 + data8 0xe8454236bfaeca14, 0xe87f32f24c3fc90e + data8 0xe8aacd8688892ba6, 0xe8e523fd32f606f7 + data8 0xe9110b5311407927, 0xe94bc8bf0c108fa3 + data8 0xe977fdc439c2ca3c, 0xe9b3236528fc349e + data8 0xe9dfa70b745ac1b4, 0xea1b36268d0eaa38 + data8 0xea480963fd394197, 0xea84034425f27484 + data8 0xeab12713138dd1cc, 0xeade6db73a5e503b + data8 0xeb1b0268343b121b, 0xeb489b0b2bdb5f14 + data8 0xeb765721e85f03d0, 0xebb389645f222f62 + data8 0xebe198f090607e0c, 0xec0fcc9321024509 + data8 0xec3e247da8b82f61, 0xec7c27d21321c9f7 + data8 0xecaad5278824e453, 0xecd9a76d097d4e77 + data8 0xed089ed5dcd99446, 0xed37bb95add09a1c + data8 0xed76c70508f904b6, 0xeda63bb05e7f93c6 + data8 0xedd5d661daed2dc4, 0xee05974eef86b903 + data8 0xee357ead791fc670, 0xee658cb3c134a463 + data8 0xee95c1987f080211, 0xeec61d92d8c4314f + data8 0xeef6a0da64a014ac, 0xef274ba72a07c811 + data8 0xef581e31a2c91260, 0xef8918b2bc43aec6 + data8 0xefba3b63d89d7cbf, 0xefeb867ecffaa607 + data8 0xf01cfa3df1b9c9fa, 0xf04e96dc05b43e2d + data8 0xf0805c944d827454, 0xf0b24ba285c495cb + data8 0xf0e46442e76f6569, 0xf116a6b2291d7896 + data8 0xf1383fa9e9b5b381, 0xf16ac84f90083b9b + data8 0xf19d7b686dcb03d7, 0xf1d0593311db1757 + data8 0xf20361ee8f1c711e, 0xf23695da7de51d3f + data8 0xf258d095e465cc35, 0xf28c4d0bfc982b34 + data8 0xf2bff55eb3f0ea71, 0xf2f3c9cf9884636e + data8 0xf31670135ab9cc0f, 0xf34a8e9f0b54cdfb + data8 0xf37ed9fa6b8add3f, 0xf3a1cfe884ef6bb6 + data8 0xf3d66689dcc8e8d3, 0xf40b2ab069d5c96a + data8 0xf42e718b90c8bc16, 0xf463822a0a3b4b00 + data8 0xf498c1076015faf8, 0xf4bc5a19a33990b5 + data8 0xf4f1e6a7d6f5425f, 0xf527a232cf6be334 + data8 0xf54b8ecdcda90851, 0xf5819949c7ad87b4 + data8 0xf5a5bac9213b48a9, 0xf5dc1501f324a812 + data8 0xf6006bee86b5589e, 0xf63716b2fa067fa4 + data8 0xf66df22fb6132b9c, 0xf6929fb98225deb1 + data8 0xf6c9cd13021e3fea, 0xf6eeb177472cedae + data8 0xf713abf4cb0b3afb, 0xf74b4d5333684ef1 + data8 0xf7707f75a72f8e94, 0xf7a874b97927af44 + data8 0xf7cddf140aedf1d8, 0xf806291bacb7f7a9 + data8 0xf82bcc43b92eafef, 0xf8646bf0defb759e + data8 0xf88a487dfc3ff5f7, 0xf8b03c2b46cdc17f + data8 0xf8e95541c152ae7a, 0xf90f832c2700c160 + data8 0xf935c88e0c7f419b, 0xf96f5cd84fd86873 + data8 0xf995dd53ebdd9d6d, 0xf9bc75a034436a41 + data8 0xf9f686f26d5518de, 0xfa1d5b39b910a8c5 + data8 0xfa4447acc4ecbfd2, 0xfa7ed7e51e6fdfb4 + data8 0xfaa601394d49a1a0, 0xfacd431644ce0e40 + data8 0xfaf49d96f7a75909, 0xfb2fd3c65e562fd5 + data8 0xfb576c5762024805, 0xfb7f1debc22c4040 + data8 0xfba6e89f32d0190a, 0xfbe2c803a0894893 + data8 0xfc0ad1ff0ed9ecf0, 0xfc32f57bdfbcbe7f + data8 0xfc5b32968f99b21c, 0xfc83896bc861ab08 + data8 0xfcabfa1861ed4815, 0xfce8d3cea7d3163e + data8 0xfd118595143ee273, 0xfd3a519943d4865a + data8 0xfd6337f8e1ae5a4b, 0xfd8c38d1c8e927eb + data8 0xfdb5544205095a53, 0xfdde8a67d2613531 + data8 0xfe07db619e781611, 0xfe460768d80bf758 + data8 0xfe6f9bfb06cd32f6, 0xfe994bcd3d14fcc2 + data8 0xfec316fecaf3f2ab, 0xfeecfdaf33fadb80 + data8 0xff16fffe2fa8fad6, 0xff411e0ba9db886d + data8 0xff6b57f7c33e4e9a, 0xff95ade2d1bd7358 + data8 0xffc01fed60f86fb5, 0xffeaae3832b63956 LOCAL_OBJECT_END(T_table) - - - - LOCAL_OBJECT_START(D_table) -data4 0x1e50f488, 0x1ebdc559, 0x1e649ec1, 0x9eed9b2c -data4 0x9e511c44, 0x9ec6d551, 0x9eefe248, 0x9e313854 -data4 0x9f54ff18, 0x9d231411, 0x1ee5d63c, 0x9edf6b95 -data4 0x9f332aaa, 0x1dc92a84, 0x1f73fb7b, 0x1e32f100 -data4 0x9ea636f5, 0x9f6c3353, 0x9f405552, 0x1f33fd97 -data4 0x1e975291, 0x9e59a11e, 0x1e47b0ba, 0x9d8ad33e -data4 0x1ea51bf6, 0x1f25d782, 0x9ecf534d, 0x1f55436f -data4 0x1d0975e4, 0x9f0633a1, 0x1f3e840a, 0x1f523a4c -data4 0x9f53cbbc, 0x9c8b5661, 0x9f6bc8eb, 0x1f4f6c7b -data4 0x9ed9b376, 0x9f5b30b6, 0x1f64fa5e, 0x1cbcc3e0 -data4 0x1f343548, 0x1f62a6a2, 0x9f336abb, 0x9f1d15af -data4 0x1f476c83, 0x1ea86421, 0x1f33b2cf, 0x9e8f1348 -data4 0x1f6fa829, 0x9f30ee3a, 0x9ebd6146, 0x1f2db598 -data4 0x1ef9600d, 0x1f5b1427, 0x9edd741b, 0x1f51ef4e -data4 0x9f1aa57d, 0x9ee9b5e0, 0x9f17ecd7, 0x1ead71ff -data4 0x1f6c910e, 0x9e1837df, 0x9f0f17d9, 0x9e8350dd -data4 0x9d292f1b, 0x9e33b3ab, 0x9d6f0fe8, 0x9ed8c7cc -data4 0x9ec598c8, 0x9d56758c, 0x1e090c1e, 0x9ed4b941 -data4 0x9f1fc4cf, 0x1f63513a, 0x9edd0abc, 0x1e3924dd -data4 0x1f60d56f, 0x1ea84424, 0x9e88f4fb, 0x1f205c09 -data4 0x1ec9ae4e, 0x1d2d5738, 0x9f2c9f6d, 0x1e0765c2 -data4 0x1e8bbdd7, 0x9f16d9f1, 0x9ea62627, 0x1f13904c -data4 0x1e566ab8, 0x9dca3d1a, 0x9e91f2a1, 0x9f14641c -data4 0x9f278946, 0x1f490c1e, 0x1f575eb6, 0x1f50b3fd -data4 0x9da32efb, 0x1ea95e59, 0x9e41e058, 0x9eada15f -data4 0x9e4fe66c, 0x1f3abc98, 0x1f1b8d1e, 0x9ece97e4 -data4 0x1d188aed, 0x9e89b6ee, 0x1f287478, 0x9e8a161a -data4 0x1e4749f7, 0x9e68084a, 0x1e867f33, 0x9f462b63 -data4 0x1db30792, 0x1f59a767, 0x9d1da4ae, 0x9f472a33 -data4 0x1d1e91cd, 0x9f414824, 0x9f473d4f, 0x1f4b5783 -data4 0x9f5b04b8, 0x9f5c205b, 0x1f309617, 0x9f0d6852 -data4 0x9d96a609, 0x9f0965c2, 0x9e23f467, 0x9f089884 -data4 0x9ec71458, 0x9ed6e955, 0x1e5e8691, 0x1f5b2bbc -data4 0x9f128268, 0x1ed40f5b, 0x1dc430ce, 0x1f345986 -data4 0x1d778f72, 0x1e9b11d6, 0x9f5a40be, 0x9e07f61a -data4 0x9ed641a7, 0x9f334787, 0x1e952fd0, 0x1edeb5e2 -data4 0x9e9f3eb1, 0x9e379fd9, 0x1f13102a, 0x9e5e80e1 -data4 0x1c757944, 0x1dae2260, 0x1f183ab7, 0x1e55d576 -data4 0x9e6bb99f, 0x9f52d7cb, 0x9e73a0f5, 0x1d4e1d14 -data4 0x9dd05b53, 0x1f2261e4, 0x9d4ee73d, 0x1ede515e -data4 0x1f22a573, 0x9ecac348, 0x1e6a2ac0, 0x1e2787d2 -data4 0x9eb64b87, 0x1f0c69c6, 0x9f470a01, 0x9d7c1686 -data4 0x1e468ebe, 0x9f21ee2f, 0x9ee52116, 0x9e20f715 -data4 0x1ed18533, 0x9f005b38, 0x9f20cb95, 0x1da72967 -data4 0x1f1ba5d7, 0x1e2f8b16, 0x9c794f96, 0x9ca74ea3 -data4 0x1f410555, 0x9eff2b96, 0x1ce8f0b1, 0x1f0cee77 -data4 0x1f191edd, 0x9ed5fcbc, 0x1f30f242, 0x9e0ad369 -data4 0x1ed8f3c8, 0x1f52bb0e, 0x9e9ce408, 0x1f18907f -data4 0x9ecdad40, 0x9e8af91d, 0x1d46698a, 0x9f4b93d6 -data4 0x9f3f5d33, 0x1e2e52f7, 0x9f13aeec, 0x9f3b1969 -data4 0x1f0996f4, 0x9f2a03df, 0x1e264767, 0x1f3ab1fb -data4 0x9f3193c9, 0x9f21ce22, 0x9eab624c, 0x9ecd8fb1 -data4 0x1eaf9a85, 0x1f0c6a2c, 0x1eecbe61, 0x1f3fead9 -data4 0x1f1d3a29, 0x1e9099ce, 0x1eadd875, 0x1e4dbfb8 -data4 0x9dc640d2, 0x1f413680, 0x9f3f57b3, 0x1dfa1553 -data4 0x1ec71c6b, 0x1e00cc00, 0x9f271e55, 0x1e5a88bb -data4 0x1f46cc2b, 0x1ee80ff9, 0x9e29c6f3, 0x1f15e229 -data4 0x9ea83d66, 0x1f37408e, 0x9dacb66e, 0x1e6f6259 -data4 0x9f106973, 0x1dd4e5ac, 0x1cbfdcc8, 0x9f231c9f -data4 0x9e8677e4, 0x9e9e695a, 0x1efd782b, 0x9dd26959 -data4 0x9e80af69, 0x1f386fb3, 0x1f022e8c, 0x9e839967 -data4 0x1ce6796f, 0x1e4c22c2, 0x1e57ef24, 0x1e919804 -data4 0x9d7ea090, 0x1e40140a, 0x1f261b46, 0x1db75be2 -data4 0x1f145019, 0x9e3102b9, 0x9e22507b, 0x1eae813c -data4 0x1f117e97, 0x1f282296, 0x1f3814b3, 0x1e17977b -data4 0x1f39d6ff, 0x9f1c81b9, 0x9eb5bcad, 0x1f0f596e -data4 0x1e757fd5, 0x9f090daa, 0x9f2532fc, 0x9eebafbb -data4 0x1f086556, 0x9eeedde8, 0x9f32e174, 0x1e33c030 -data4 0x1f1f145a, 0x1e6e556c, 0x1e419ffb, 0x9eb6019a -data4 0x9e872a2e, 0x1e113136, 0x1e93096f, 0x1f39be40 -data4 0x1f1665ad, 0x9db81d7d, 0x9cd29091, 0x1e3f4af7 -data4 0x9f23176c, 0x9eccf9b3, 0x1f34fc6c, 0x9ed36894 -data4 0x1ef08e06, 0x9f3b46bb, 0x9f2c850b, 0x1f1565a4 -data4 0x1e887bc3, 0x1e92629c, 0x9f11ac9e, 0x9e5579f3 -data4 0x1e4d5790, 0x9ee1c3d1, 0x9e916aec, 0x9eb8d9b8 -data4 0x1db46105, 0x1e168663, 0x1f26a942, 0x9f0f0383 -data4 0x9f079032, 0x9ecae1d8, 0x1ed3b34c, 0x9edc5ee6 -data4 0x9e8a75a7, 0x1f3c3de2, 0x9ee5041e, 0x1f08c727 -data4 0x1d02d7ae, 0x9f36adda, 0x9ef9a857, 0x9ef5cb3a -data4 0x9eee73da, 0x9da5d629, 0x1e0e99be, 0x1e5159b9 -data4 0x1f2eac89, 0x9e8eedc5, 0x1dd0ec90, 0x1f229aff -data4 0x1ed9c3e6, 0x1e95c55a, 0x9f0c24e4, 0x1e8afed6 -data4 0x1e599a96, 0x1e881b21, 0x1eab84b9, 0x9ba2bb0e -data4 0x9e33ab10, 0x1f1710b5, 0x1ebfa271, 0x9e90bbc5 -data4 0x9f32515b, 0x9b32aae8, 0x1eda455c, 0x1da8186e -data4 0x9e8917ff, 0x1ec4d08e, 0x1c90069d, 0x9f2f1d29 -data4 0x9ecee86d, 0x9f234d1f, 0x1f370724, 0x1da87496 -data4 0x1e7959f0, 0x9e8ada34, 0x1f1c7f6f, 0x1edd576b -data4 0x9de91e8b, 0x1ec4ef89, 0x1f32078a, 0x1e9925e2 -data4 0x9d8eeccb, 0x9ea3d011, 0x1f231fdf, 0x9f1dbdfa -data4 0x1e7507a3, 0x1ec42614, 0x9e8693cb, 0x9ec68398 -data4 0x1d5b05fb, 0x1de32119, 0x9f003429, 0x9ec16d92 -data4 0x9f095315, 0x9f119d2c, 0x9ed0c984, 0x9f090662 -data4 0x9e59aa1f, 0x9ed4e64a, 0x9f2798a7, 0x9f23624d -data4 0x1e0467d9, 0x1f22e7e7, 0x1e915256, 0x9cb4df70 -data4 0x9e6f687c, 0x9e3c35e5, 0x9e5757ab, 0x9f031fa1 -data4 0x1f25bff7, 0x1f0e58c2, 0x1ef3ce04, 0x1f002ecb -data4 0x9ebdc836, 0x9ed657dd, 0x9f149441, 0x9e8544b2 -data4 0x1cd8ff1e, 0x1e9bb463, 0x1eaa1c5c, 0x1f200c1a -data4 0x1edbfbaf, 0x1f18724d, 0x9ed63c22, 0x9f08e045 -data4 0x1f13ad07, 0x9e949311, 0x9f0c50d4, 0x1e824516 -data4 0x1d5e52ba, 0x1d583fbd, 0x1e3b60a9, 0x9effe6d3 -data4 0x1f0d0508, 0x1f00be77, 0x9e404bfa, 0x9e1ca381 -data4 0x9f084dd8, 0x9e6db85d, 0x1db698e4, 0x9ebd1871 -data4 0x9ecc2679, 0x1ee68442, 0x1edb1050, 0x9dbc96a4 -data4 0x9f27c1f4, 0x1c99b756, 0x1eb4400a, 0x9f24390a -data4 0x1d927875, 0x9f074faa, 0x1e9dc2c3, 0x1f13c0d2 -data4 0x1e3c9685, 0x9e6b6f75, 0x9db9cb31, 0x1ea5f3aa -data4 0x9d992c61, 0x1f1015e4, 0x1f194f70, 0x9e19d2b3 -data4 0x9d89116c, 0x1f23cd35, 0x1e33d3a2, 0x1ee331b8 -data4 0x1d5ba7ec, 0x9f273788, 0x9e6907f4, 0x9ed5f912 -data4 0x9edd458d, 0x1e2ca7b2, 0x1ef81fe4, 0x1dc7ade6 -data4 0x1e876e51, 0x9f04ec89, 0x1f1da63a, 0x1ec02bd0 -data4 0x9e71326f, 0x1e7847b4, 0x1f0de618, 0x9e036cb6 -data4 0x1eec61e2, 0x1ef1758b, 0x9ee880a3, 0x1ed269d7 -data4 0x1e27edd3, 0x9e8a81a1, 0x1eacb84d, 0x9e1aad37 -data4 0x1f1aa8f7, 0x1e9bbd90, 0x1ea1b61f, 0x9ed41c2f -data4 0x1dbb5dd6, 0x1f0ec733, 0x9df06b1b, 0x1e06fef1 -data4 0x9edede3a, 0x1edeb5e2, 0x1f0e63ee, 0x9db316bb -data4 0x9efc1ad3, 0x1f01fbb5, 0x9cc0d078, 0x1ea28b36 -data4 0x9e9dd205, 0x9e791534, 0x1da1c8d5, 0x9e8195cc -data4 0x1f0681a4, 0x1eeaf1e2, 0x9ef83b37, 0x9f22a92b -data4 0x1eabc4ce, 0x1f10eefb, 0x1e06d9aa, 0x1e7cacd5 -data4 0x1f1ea087, 0x1eb21983, 0x9f100c78, 0x1e840abe -data4 0x9efab66c, 0x1f183fa8, 0x9e84ee68, 0x9eea083d -data4 0x9ee23a74, 0x1f1351d7, 0x9ec5d42a, 0x9f071f57 -data4 0x9ef578d9, 0x9f1aa7e7, 0x1eb02044, 0x1f151a2e -data4 0x9c0dc8b2, 0x9ef4087a, 0x1ec12b93, 0x1c1a946b -data4 0x1e89946f, 0x9dafe8c3, 0x1d295288, 0x9e8497ab -data4 0x1ec000c6, 0x1e102f29, 0x1e542256, 0x1e67d44d -data4 0x1ef688d8, 0x1f0e0f29, 0x1e67861f, 0x1e869748 -data4 0x1ee6aa6e, 0x9e4d228b, 0x9e50be5b, 0x1e9fe225 -data4 0x9ea34102, 0x9e628a3b, 0x9ed9fd83, 0x1ecd7109 -data4 0x1f1864ff, 0x1ea19b76, 0x1db0d1c9, 0x9dff519b -data4 0x1e8fea71, 0x9ee82e9a, 0x9f08919b, 0x9ef5c8ae -data4 0x9ee446a4, 0x1ea59444, 0x1eb74230, 0x1ea13fbf -data4 0x9ea6a3ea, 0x1e5f2797, 0x9e0adb07, 0x9d3adadd -data4 0x1ebf2ee2, 0x1da19bfa, 0x1e8dea6d, 0x1ec4fea9 -data4 0x1e669f22, 0x1dc5f919, 0x9ed25caa, 0x1ee475b1 -data4 0x1ed0603e, 0x9eacb35c, 0x1dc00b27, 0x1e2f9991 -data4 0x1e7b0406, 0x1eaa3387, 0x9d865bde, 0x1eb78a48 -data4 0x1c40ae2e, 0x1ee9838b, 0x9f0f0d7f, 0x1e3e5d26 -data4 0x1e99e7a6, 0x9e681ccf, 0x9e93ed65, 0x9eeb6a66 -data4 0x1e29e9af, 0x9e96f923, 0x9e74f11d, 0x9f1474da -data4 0x1eec2ea7, 0x1ebf7aa3, 0x9c25dcca, 0x9f0553c2 -data4 0x9e599efd, 0x1d2ab490, 0x1e95d7cd, 0x9ee4b20e -data4 0x9d988ce5, 0x9ef9787e, 0x9dbbba5b, 0x9f12c304 -data4 0x1e3b9d70, 0x1e7bcae8, 0x9d98bb6e, 0x9e8e6b01 -data4 0x9f07d03b, 0x9d67c822, 0x9f0ef69e, 0x1c7c0fe3 -data4 0x9e9bfbb9, 0x9e83b84b, 0x1efbf15e, 0x9ecfa6a6 -data4 0x9c91158e, 0x9ecf6770, 0x1ee1e3a8, 0x9dc95ec0 -data4 0x1ef603f7, 0x1d5e52ba, 0x1c477d1b, 0x9e955cd8 -data4 0x1ed665b0, 0x9e8376c4, 0x9c0ee88e, 0x1e8c989e -data4 0x1ea2df29, 0x9d961e5c, 0x1e101813, 0x1e7fffff -data4 0x9e5abff4, 0x1dbddd71, 0x1eb69100, 0x1e71f114 -data4 0x1e9ca798, 0x1ef62c8d, 0x9db4e55a, 0x1dbe69ce -data4 0x9ef1c01f, 0x1f044a2a, 0x9eb9e0d7, 0x9ee59745 -data4 0x9e874803, 0x1ea0b418, 0x9e13572a, 0x1ddbb3a2 -data4 0x9ec0e391, 0x1e89fba1, 0x1ee8b261, 0x9e5d25f0 -data4 0x9ef222cb, 0x9ef135ec, 0x1ea04b9a, 0x9f04291f -data4 0x9e969254, 0x9ee32f08, 0x9ed909d3, 0x9e362640 -data4 0x9ec20735, 0x1e50131b, 0x9ed4e049, 0x1ee8e817 -data4 0x1e1e09c0, 0x9ea643c5, 0x9e5a1ab6, 0x9e389059 -data4 0x1e560947, 0x1d02b877, 0x1e4475ab, 0x9ea9aaf6 -data4 0x1e95bc5e, 0x1eaf6afd, 0x1d43067d, 0x9d043821 -data4 0x9e97baa9, 0x1de5c4f9, 0x9e9a0069, 0x9e1b9944 -data4 0x1eb13686, 0x9eb907eb, 0x1e059589, 0x1cbd0f93 -data4 0x9eb7e6ae, 0x1e9fa175, 0x1ee5bdf4, 0x1e8052f7 -data4 0x9c80d1e3, 0x1bfbe28e, 0x9e672b3b, 0x9ecacf19 -data4 0x9e3c04be, 0x1dfe8c5c, 0x1e1ba9cb, 0x1eb40b1e -data4 0x1ec7e7f6, 0x9d0d45b3, 0x1ef0113b, 0x9a155fa3 -data4 0x1e28ec3b, 0x1e7ca8df, 0x9d2f91b4, 0x1eccd9ed -data4 0x9ed943bc, 0x9ccaab19, 0x9e8a5c58, 0x1ec3bca8 -data4 0x1ed78dc7, 0x9ed391a8, 0x9e938f6e, 0x9ec4a030 -data4 0x9e80346e, 0x1e7a4686, 0x9e284315, 0x9e39584c -data4 0x1ebdc9b4, 0x9e9cfce5, 0x9ef55c65, 0x1e2941e7 -data4 0x9efbe59f, 0x1d87c41b, 0x1e40befc, 0x1e3d05b5 -data4 0x1de9ea67, 0x1ec9a21c, 0x1decb69a, 0x1df6e75a -data4 0x9e8030ab, 0x9db20540, 0x9ef1e977, 0x1e3cdc43 -data4 0x1e0492b0, 0x9e91d872, 0x1e775346, 0x9e939978 -data4 0x1eb2714e, 0x1e49a203, 0x9e10195a, 0x1ef1ffc3 -data4 0x9ea8b709, 0x9e832e27, 0x1ed5ac3b, 0x1edb20a6 -data4 0x1e4dbd4e, 0x1efbb932, 0x1d8170ec, 0x1e6c4849 -data4 0x1f008e17, 0x1e8000c4, 0x1d855ecf, 0x9e37cb85 -data4 0x1ecffdf5, 0x1eba6519, 0x9edbe600, 0x1ea3e5e7 -data4 0x1ed4fb39, 0x1f00be77, 0x1e6f4484, 0x9e9e7107 -data4 0x9e30b29d, 0x9ee6e174, 0x1e3a2656, 0x9dd72f3f -data4 0x9ee12138, 0x1ed16fed, 0x9ece8a02, 0x9ca5b249 -data4 0x9eafd508, 0x9ef0e9fc, 0x1d1307ac, 0x1eecee20 -data4 0x1cf60c6f, 0x9d556216, 0x9eaed175, 0x9ec919f4 -data4 0x1ec2c988, 0x1cd82772, 0x9dc99456, 0x1eab0467 -data4 0x1e89b36f, 0x1c757944, 0x1eef9abd, 0x9e98664d + data4 0x1e50f488, 0x1ebdc559, 0x1e649ec1, 0x9eed9b2c + data4 0x9e511c44, 0x9ec6d551, 0x9eefe248, 0x9e313854 + data4 0x9f54ff18, 0x9d231411, 0x1ee5d63c, 0x9edf6b95 + data4 0x9f332aaa, 0x1dc92a84, 0x1f73fb7b, 0x1e32f100 + data4 0x9ea636f5, 0x9f6c3353, 0x9f405552, 0x1f33fd97 + data4 0x1e975291, 0x9e59a11e, 0x1e47b0ba, 0x9d8ad33e + data4 0x1ea51bf6, 0x1f25d782, 0x9ecf534d, 0x1f55436f + data4 0x1d0975e4, 0x9f0633a1, 0x1f3e840a, 0x1f523a4c + data4 0x9f53cbbc, 0x9c8b5661, 0x9f6bc8eb, 0x1f4f6c7b + data4 0x9ed9b376, 0x9f5b30b6, 0x1f64fa5e, 0x1cbcc3e0 + data4 0x1f343548, 0x1f62a6a2, 0x9f336abb, 0x9f1d15af + data4 0x1f476c83, 0x1ea86421, 0x1f33b2cf, 0x9e8f1348 + data4 0x1f6fa829, 0x9f30ee3a, 0x9ebd6146, 0x1f2db598 + data4 0x1ef9600d, 0x1f5b1427, 0x9edd741b, 0x1f51ef4e + data4 0x9f1aa57d, 0x9ee9b5e0, 0x9f17ecd7, 0x1ead71ff + data4 0x1f6c910e, 0x9e1837df, 0x9f0f17d9, 0x9e8350dd + data4 0x9d292f1b, 0x9e33b3ab, 0x9d6f0fe8, 0x9ed8c7cc + data4 0x9ec598c8, 0x9d56758c, 0x1e090c1e, 0x9ed4b941 + data4 0x9f1fc4cf, 0x1f63513a, 0x9edd0abc, 0x1e3924dd + data4 0x1f60d56f, 0x1ea84424, 0x9e88f4fb, 0x1f205c09 + data4 0x1ec9ae4e, 0x1d2d5738, 0x9f2c9f6d, 0x1e0765c2 + data4 0x1e8bbdd7, 0x9f16d9f1, 0x9ea62627, 0x1f13904c + data4 0x1e566ab8, 0x9dca3d1a, 0x9e91f2a1, 0x9f14641c + data4 0x9f278946, 0x1f490c1e, 0x1f575eb6, 0x1f50b3fd + data4 0x9da32efb, 0x1ea95e59, 0x9e41e058, 0x9eada15f + data4 0x9e4fe66c, 0x1f3abc98, 0x1f1b8d1e, 0x9ece97e4 + data4 0x1d188aed, 0x9e89b6ee, 0x1f287478, 0x9e8a161a + data4 0x1e4749f7, 0x9e68084a, 0x1e867f33, 0x9f462b63 + data4 0x1db30792, 0x1f59a767, 0x9d1da4ae, 0x9f472a33 + data4 0x1d1e91cd, 0x9f414824, 0x9f473d4f, 0x1f4b5783 + data4 0x9f5b04b8, 0x9f5c205b, 0x1f309617, 0x9f0d6852 + data4 0x9d96a609, 0x9f0965c2, 0x9e23f467, 0x9f089884 + data4 0x9ec71458, 0x9ed6e955, 0x1e5e8691, 0x1f5b2bbc + data4 0x9f128268, 0x1ed40f5b, 0x1dc430ce, 0x1f345986 + data4 0x1d778f72, 0x1e9b11d6, 0x9f5a40be, 0x9e07f61a + data4 0x9ed641a7, 0x9f334787, 0x1e952fd0, 0x1edeb5e2 + data4 0x9e9f3eb1, 0x9e379fd9, 0x1f13102a, 0x9e5e80e1 + data4 0x1c757944, 0x1dae2260, 0x1f183ab7, 0x1e55d576 + data4 0x9e6bb99f, 0x9f52d7cb, 0x9e73a0f5, 0x1d4e1d14 + data4 0x9dd05b53, 0x1f2261e4, 0x9d4ee73d, 0x1ede515e + data4 0x1f22a573, 0x9ecac348, 0x1e6a2ac0, 0x1e2787d2 + data4 0x9eb64b87, 0x1f0c69c6, 0x9f470a01, 0x9d7c1686 + data4 0x1e468ebe, 0x9f21ee2f, 0x9ee52116, 0x9e20f715 + data4 0x1ed18533, 0x9f005b38, 0x9f20cb95, 0x1da72967 + data4 0x1f1ba5d7, 0x1e2f8b16, 0x9c794f96, 0x9ca74ea3 + data4 0x1f410555, 0x9eff2b96, 0x1ce8f0b1, 0x1f0cee77 + data4 0x1f191edd, 0x9ed5fcbc, 0x1f30f242, 0x9e0ad369 + data4 0x1ed8f3c8, 0x1f52bb0e, 0x9e9ce408, 0x1f18907f + data4 0x9ecdad40, 0x9e8af91d, 0x1d46698a, 0x9f4b93d6 + data4 0x9f3f5d33, 0x1e2e52f7, 0x9f13aeec, 0x9f3b1969 + data4 0x1f0996f4, 0x9f2a03df, 0x1e264767, 0x1f3ab1fb + data4 0x9f3193c9, 0x9f21ce22, 0x9eab624c, 0x9ecd8fb1 + data4 0x1eaf9a85, 0x1f0c6a2c, 0x1eecbe61, 0x1f3fead9 + data4 0x1f1d3a29, 0x1e9099ce, 0x1eadd875, 0x1e4dbfb8 + data4 0x9dc640d2, 0x1f413680, 0x9f3f57b3, 0x1dfa1553 + data4 0x1ec71c6b, 0x1e00cc00, 0x9f271e55, 0x1e5a88bb + data4 0x1f46cc2b, 0x1ee80ff9, 0x9e29c6f3, 0x1f15e229 + data4 0x9ea83d66, 0x1f37408e, 0x9dacb66e, 0x1e6f6259 + data4 0x9f106973, 0x1dd4e5ac, 0x1cbfdcc8, 0x9f231c9f + data4 0x9e8677e4, 0x9e9e695a, 0x1efd782b, 0x9dd26959 + data4 0x9e80af69, 0x1f386fb3, 0x1f022e8c, 0x9e839967 + data4 0x1ce6796f, 0x1e4c22c2, 0x1e57ef24, 0x1e919804 + data4 0x9d7ea090, 0x1e40140a, 0x1f261b46, 0x1db75be2 + data4 0x1f145019, 0x9e3102b9, 0x9e22507b, 0x1eae813c + data4 0x1f117e97, 0x1f282296, 0x1f3814b3, 0x1e17977b + data4 0x1f39d6ff, 0x9f1c81b9, 0x9eb5bcad, 0x1f0f596e + data4 0x1e757fd5, 0x9f090daa, 0x9f2532fc, 0x9eebafbb + data4 0x1f086556, 0x9eeedde8, 0x9f32e174, 0x1e33c030 + data4 0x1f1f145a, 0x1e6e556c, 0x1e419ffb, 0x9eb6019a + data4 0x9e872a2e, 0x1e113136, 0x1e93096f, 0x1f39be40 + data4 0x1f1665ad, 0x9db81d7d, 0x9cd29091, 0x1e3f4af7 + data4 0x9f23176c, 0x9eccf9b3, 0x1f34fc6c, 0x9ed36894 + data4 0x1ef08e06, 0x9f3b46bb, 0x9f2c850b, 0x1f1565a4 + data4 0x1e887bc3, 0x1e92629c, 0x9f11ac9e, 0x9e5579f3 + data4 0x1e4d5790, 0x9ee1c3d1, 0x9e916aec, 0x9eb8d9b8 + data4 0x1db46105, 0x1e168663, 0x1f26a942, 0x9f0f0383 + data4 0x9f079032, 0x9ecae1d8, 0x1ed3b34c, 0x9edc5ee6 + data4 0x9e8a75a7, 0x1f3c3de2, 0x9ee5041e, 0x1f08c727 + data4 0x1d02d7ae, 0x9f36adda, 0x9ef9a857, 0x9ef5cb3a + data4 0x9eee73da, 0x9da5d629, 0x1e0e99be, 0x1e5159b9 + data4 0x1f2eac89, 0x9e8eedc5, 0x1dd0ec90, 0x1f229aff + data4 0x1ed9c3e6, 0x1e95c55a, 0x9f0c24e4, 0x1e8afed6 + data4 0x1e599a96, 0x1e881b21, 0x1eab84b9, 0x9ba2bb0e + data4 0x9e33ab10, 0x1f1710b5, 0x1ebfa271, 0x9e90bbc5 + data4 0x9f32515b, 0x9b32aae8, 0x1eda455c, 0x1da8186e + data4 0x9e8917ff, 0x1ec4d08e, 0x1c90069d, 0x9f2f1d29 + data4 0x9ecee86d, 0x9f234d1f, 0x1f370724, 0x1da87496 + data4 0x1e7959f0, 0x9e8ada34, 0x1f1c7f6f, 0x1edd576b + data4 0x9de91e8b, 0x1ec4ef89, 0x1f32078a, 0x1e9925e2 + data4 0x9d8eeccb, 0x9ea3d011, 0x1f231fdf, 0x9f1dbdfa + data4 0x1e7507a3, 0x1ec42614, 0x9e8693cb, 0x9ec68398 + data4 0x1d5b05fb, 0x1de32119, 0x9f003429, 0x9ec16d92 + data4 0x9f095315, 0x9f119d2c, 0x9ed0c984, 0x9f090662 + data4 0x9e59aa1f, 0x9ed4e64a, 0x9f2798a7, 0x9f23624d + data4 0x1e0467d9, 0x1f22e7e7, 0x1e915256, 0x9cb4df70 + data4 0x9e6f687c, 0x9e3c35e5, 0x9e5757ab, 0x9f031fa1 + data4 0x1f25bff7, 0x1f0e58c2, 0x1ef3ce04, 0x1f002ecb + data4 0x9ebdc836, 0x9ed657dd, 0x9f149441, 0x9e8544b2 + data4 0x1cd8ff1e, 0x1e9bb463, 0x1eaa1c5c, 0x1f200c1a + data4 0x1edbfbaf, 0x1f18724d, 0x9ed63c22, 0x9f08e045 + data4 0x1f13ad07, 0x9e949311, 0x9f0c50d4, 0x1e824516 + data4 0x1d5e52ba, 0x1d583fbd, 0x1e3b60a9, 0x9effe6d3 + data4 0x1f0d0508, 0x1f00be77, 0x9e404bfa, 0x9e1ca381 + data4 0x9f084dd8, 0x9e6db85d, 0x1db698e4, 0x9ebd1871 + data4 0x9ecc2679, 0x1ee68442, 0x1edb1050, 0x9dbc96a4 + data4 0x9f27c1f4, 0x1c99b756, 0x1eb4400a, 0x9f24390a + data4 0x1d927875, 0x9f074faa, 0x1e9dc2c3, 0x1f13c0d2 + data4 0x1e3c9685, 0x9e6b6f75, 0x9db9cb31, 0x1ea5f3aa + data4 0x9d992c61, 0x1f1015e4, 0x1f194f70, 0x9e19d2b3 + data4 0x9d89116c, 0x1f23cd35, 0x1e33d3a2, 0x1ee331b8 + data4 0x1d5ba7ec, 0x9f273788, 0x9e6907f4, 0x9ed5f912 + data4 0x9edd458d, 0x1e2ca7b2, 0x1ef81fe4, 0x1dc7ade6 + data4 0x1e876e51, 0x9f04ec89, 0x1f1da63a, 0x1ec02bd0 + data4 0x9e71326f, 0x1e7847b4, 0x1f0de618, 0x9e036cb6 + data4 0x1eec61e2, 0x1ef1758b, 0x9ee880a3, 0x1ed269d7 + data4 0x1e27edd3, 0x9e8a81a1, 0x1eacb84d, 0x9e1aad37 + data4 0x1f1aa8f7, 0x1e9bbd90, 0x1ea1b61f, 0x9ed41c2f + data4 0x1dbb5dd6, 0x1f0ec733, 0x9df06b1b, 0x1e06fef1 + data4 0x9edede3a, 0x1edeb5e2, 0x1f0e63ee, 0x9db316bb + data4 0x9efc1ad3, 0x1f01fbb5, 0x9cc0d078, 0x1ea28b36 + data4 0x9e9dd205, 0x9e791534, 0x1da1c8d5, 0x9e8195cc + data4 0x1f0681a4, 0x1eeaf1e2, 0x9ef83b37, 0x9f22a92b + data4 0x1eabc4ce, 0x1f10eefb, 0x1e06d9aa, 0x1e7cacd5 + data4 0x1f1ea087, 0x1eb21983, 0x9f100c78, 0x1e840abe + data4 0x9efab66c, 0x1f183fa8, 0x9e84ee68, 0x9eea083d + data4 0x9ee23a74, 0x1f1351d7, 0x9ec5d42a, 0x9f071f57 + data4 0x9ef578d9, 0x9f1aa7e7, 0x1eb02044, 0x1f151a2e + data4 0x9c0dc8b2, 0x9ef4087a, 0x1ec12b93, 0x1c1a946b + data4 0x1e89946f, 0x9dafe8c3, 0x1d295288, 0x9e8497ab + data4 0x1ec000c6, 0x1e102f29, 0x1e542256, 0x1e67d44d + data4 0x1ef688d8, 0x1f0e0f29, 0x1e67861f, 0x1e869748 + data4 0x1ee6aa6e, 0x9e4d228b, 0x9e50be5b, 0x1e9fe225 + data4 0x9ea34102, 0x9e628a3b, 0x9ed9fd83, 0x1ecd7109 + data4 0x1f1864ff, 0x1ea19b76, 0x1db0d1c9, 0x9dff519b + data4 0x1e8fea71, 0x9ee82e9a, 0x9f08919b, 0x9ef5c8ae + data4 0x9ee446a4, 0x1ea59444, 0x1eb74230, 0x1ea13fbf + data4 0x9ea6a3ea, 0x1e5f2797, 0x9e0adb07, 0x9d3adadd + data4 0x1ebf2ee2, 0x1da19bfa, 0x1e8dea6d, 0x1ec4fea9 + data4 0x1e669f22, 0x1dc5f919, 0x9ed25caa, 0x1ee475b1 + data4 0x1ed0603e, 0x9eacb35c, 0x1dc00b27, 0x1e2f9991 + data4 0x1e7b0406, 0x1eaa3387, 0x9d865bde, 0x1eb78a48 + data4 0x1c40ae2e, 0x1ee9838b, 0x9f0f0d7f, 0x1e3e5d26 + data4 0x1e99e7a6, 0x9e681ccf, 0x9e93ed65, 0x9eeb6a66 + data4 0x1e29e9af, 0x9e96f923, 0x9e74f11d, 0x9f1474da + data4 0x1eec2ea7, 0x1ebf7aa3, 0x9c25dcca, 0x9f0553c2 + data4 0x9e599efd, 0x1d2ab490, 0x1e95d7cd, 0x9ee4b20e + data4 0x9d988ce5, 0x9ef9787e, 0x9dbbba5b, 0x9f12c304 + data4 0x1e3b9d70, 0x1e7bcae8, 0x9d98bb6e, 0x9e8e6b01 + data4 0x9f07d03b, 0x9d67c822, 0x9f0ef69e, 0x1c7c0fe3 + data4 0x9e9bfbb9, 0x9e83b84b, 0x1efbf15e, 0x9ecfa6a6 + data4 0x9c91158e, 0x9ecf6770, 0x1ee1e3a8, 0x9dc95ec0 + data4 0x1ef603f7, 0x1d5e52ba, 0x1c477d1b, 0x9e955cd8 + data4 0x1ed665b0, 0x9e8376c4, 0x9c0ee88e, 0x1e8c989e + data4 0x1ea2df29, 0x9d961e5c, 0x1e101813, 0x1e7fffff + data4 0x9e5abff4, 0x1dbddd71, 0x1eb69100, 0x1e71f114 + data4 0x1e9ca798, 0x1ef62c8d, 0x9db4e55a, 0x1dbe69ce + data4 0x9ef1c01f, 0x1f044a2a, 0x9eb9e0d7, 0x9ee59745 + data4 0x9e874803, 0x1ea0b418, 0x9e13572a, 0x1ddbb3a2 + data4 0x9ec0e391, 0x1e89fba1, 0x1ee8b261, 0x9e5d25f0 + data4 0x9ef222cb, 0x9ef135ec, 0x1ea04b9a, 0x9f04291f + data4 0x9e969254, 0x9ee32f08, 0x9ed909d3, 0x9e362640 + data4 0x9ec20735, 0x1e50131b, 0x9ed4e049, 0x1ee8e817 + data4 0x1e1e09c0, 0x9ea643c5, 0x9e5a1ab6, 0x9e389059 + data4 0x1e560947, 0x1d02b877, 0x1e4475ab, 0x9ea9aaf6 + data4 0x1e95bc5e, 0x1eaf6afd, 0x1d43067d, 0x9d043821 + data4 0x9e97baa9, 0x1de5c4f9, 0x9e9a0069, 0x9e1b9944 + data4 0x1eb13686, 0x9eb907eb, 0x1e059589, 0x1cbd0f93 + data4 0x9eb7e6ae, 0x1e9fa175, 0x1ee5bdf4, 0x1e8052f7 + data4 0x9c80d1e3, 0x1bfbe28e, 0x9e672b3b, 0x9ecacf19 + data4 0x9e3c04be, 0x1dfe8c5c, 0x1e1ba9cb, 0x1eb40b1e + data4 0x1ec7e7f6, 0x9d0d45b3, 0x1ef0113b, 0x9a155fa3 + data4 0x1e28ec3b, 0x1e7ca8df, 0x9d2f91b4, 0x1eccd9ed + data4 0x9ed943bc, 0x9ccaab19, 0x9e8a5c58, 0x1ec3bca8 + data4 0x1ed78dc7, 0x9ed391a8, 0x9e938f6e, 0x9ec4a030 + data4 0x9e80346e, 0x1e7a4686, 0x9e284315, 0x9e39584c + data4 0x1ebdc9b4, 0x9e9cfce5, 0x9ef55c65, 0x1e2941e7 + data4 0x9efbe59f, 0x1d87c41b, 0x1e40befc, 0x1e3d05b5 + data4 0x1de9ea67, 0x1ec9a21c, 0x1decb69a, 0x1df6e75a + data4 0x9e8030ab, 0x9db20540, 0x9ef1e977, 0x1e3cdc43 + data4 0x1e0492b0, 0x9e91d872, 0x1e775346, 0x9e939978 + data4 0x1eb2714e, 0x1e49a203, 0x9e10195a, 0x1ef1ffc3 + data4 0x9ea8b709, 0x9e832e27, 0x1ed5ac3b, 0x1edb20a6 + data4 0x1e4dbd4e, 0x1efbb932, 0x1d8170ec, 0x1e6c4849 + data4 0x1f008e17, 0x1e8000c4, 0x1d855ecf, 0x9e37cb85 + data4 0x1ecffdf5, 0x1eba6519, 0x9edbe600, 0x1ea3e5e7 + data4 0x1ed4fb39, 0x1f00be77, 0x1e6f4484, 0x9e9e7107 + data4 0x9e30b29d, 0x9ee6e174, 0x1e3a2656, 0x9dd72f3f + data4 0x9ee12138, 0x1ed16fed, 0x9ece8a02, 0x9ca5b249 + data4 0x9eafd508, 0x9ef0e9fc, 0x1d1307ac, 0x1eecee20 + data4 0x1cf60c6f, 0x9d556216, 0x9eaed175, 0x9ec919f4 + data4 0x1ec2c988, 0x1cd82772, 0x9dc99456, 0x1eab0467 + data4 0x1e89b36f, 0x1c757944, 0x1eef9abd, 0x9e98664d LOCAL_OBJECT_END(D_table) @@ -709,184 +749,238 @@ LOCAL_OBJECT_END(D_table) GLOBAL_LIBM_ENTRY(cbrtl) { .mfi - getf.sig r3=f8 - // will continue only for normal/denormal numbers - fclass.nm.unc p12,p7 = f8, 0x1b - // r2 = pointer to C_1...C_6 followed by T_table - addl r2 = @ltoff(poly_coeffs), gp;; + getf.sig GR_ARGSIG = f8 + // will continue on main path only for normal/denormal numbers + // all other values will be filtered out and will exit early + fclass.nm.unc p12, p7 = f8, 0x1b + // GR_ADDR = pointer to C_1...C_6 followed by T_table + addl GR_ADDR = @ltoff(poly_coeffs), gp } -{.mfi - // r29=2/3*bias -63=0xaaaa-0x3f=0xaa6b - mov r29=0xaa6b - // normalize a - fma.s1 f14=f8,f1,f0 - // r27 = pointer to D table - addl r27 = @ltoff(D_table), gp;; +{ .mfi + // GR_BIAS23 = 2/3*bias -63 = 0xaaaa-0x3f = 0xaa6b + mov GR_BIAS23 = 0xaa6b + // normalize a + fma.s1 FR_XNORM = f8, f1, f0 + // GR_D_ADDR = pointer to D table + addl GR_D_ADDR = @ltoff(D_table), gp } -{.mib - nop.m 0 - (p7) cmp.eq p12,p0=r3,r0 - nop.b 0;; +;; + +{ .mmf + // load start address for C_1...C_6 followed by T_table + ld8 GR_C_START = [ GR_ADDR ] + // load start address of D table + ld8 GR_D_START = [ GR_D_ADDR ] + // y = frcpa(a) + frcpa.s1 FR_RCP, p6 = f1, f8 } -{.mfb - // load start address for C_1...C_6 followed by T_table - ld8 r2=[r2] - (p12) fma.s0 f8=f8,f1,f0 - (p12) br.ret.spnt b0;; +;; + +{ .mmi + // get normalized significand + getf.sig GR_NORMSIG = FR_XNORM + // get exponent + getf.exp GR_NORMEXPSGN = FR_XNORM + (p7) cmp.eq p12, p0 = GR_ARGSIG, r0 } -{.mmf - // load C_1 - ldfe f7=[r2],16 - // load start address of D table - ld8 r27=[r27] - // y=frcpa(a) - frcpa.s0 f8,p6=f1,f8;; +;; + +{ .mii + // load C_1 + ldfe FR_C1 = [ GR_C_START ], 16 + mov GR_SGNMASK = 0x20000 + nop.i 0 } -{.mmi - // load C_2 - ldfe f9=[r2],16;; - // load C_3, C_4 - ldfpd f10,f11=[r2],16 - nop.i 0;; +;; + +{ .mfb + // load C_2 + ldfe FR_C2 = [ GR_C_START ], 16 + (p12) fma.s0 f8 = f8, f1, f0 + // NaN/Infinities exit early + (p12) br.ret.spnt b0 } -{.mmi - // get normalized significand - getf.sig r23=f14 - // get exponent - getf.exp r24=f14 - mov r25=0x20000;; +;; + +{ .mfi + // load C_3, C_4 + ldfpd FR_C3, FR_C4 = [ GR_C_START ], 16 + // y = frcpa(a), set flags and result when argument is 0 + // only used when p6=0 + frcpa.s0 f8, p0 = f1, f8 + nop.i 0 } -{.mii - // get r26=sign - and r26=r24,r25 - // eliminate leading 1 from r23=2nd table index - shl r23=r23,1 - // eliminate sign from exponent (r25) - andcm r25=r24,r25;; +;; + +{ .mii + // get GR_SIGN = sign + and GR_SIGN = GR_NORMEXPSGN, GR_SGNMASK + // eliminate leading 1 from GR_NORMSIG = 2nd table index + shl GR_INDEX2 = GR_NORMSIG, 1 + // eliminate sign from exponent + andcm GR_NORMEXP = GR_NORMEXPSGN, GR_SGNMASK } -{.mfi - // load C_5,C_6 - (p6) ldfpd f12,f13=[r2],16 - // r=1-a*y - (p6) fnma.s1 f6=f8,f14,f1 - // 1: exponent*=5; // (2^{16}-1)/3=0x5555 - shladd r24=r25,2,r25;; +;; + +{ .mfi + // load C_5, C_6 + (p6) ldfpd FR_C5, FR_C6 = [ GR_C_START ], 16 + // r = 1-a*y + (p6) fnma.s1 FR_R = FR_RCP, FR_XNORM, f1 + // Start computation of floor(exponent/3) by + // computing (2^20+2)/3*exponent = exponent*0x55556 + // 1: exponent* = 5; + // (2^{16}-1)/3 = 0x5555: + // will form 0x5555*exponent by using shladd's + shladd GR_EXP5 = GR_NORMEXP, 2, GR_NORMEXP } -{.mib - // r30=(5*expon)*16 - shladd r30=r24,4,r0 - // r28=3*exponent - shladd r28=r25,1,r25 - nop.b 0;; +;; + +{ .mib + // Next several integer steps compute floor(exponent/3) + // GR_TMP1 = (5*expon)*16 + shladd GR_TMP1 = GR_EXP5, 4, r0 + // GR_EXP3 = 3*exponent + shladd GR_EXP3 = GR_NORMEXP, 1, GR_NORMEXP + nop.b 0 } -{.mmi - // r28=6*exponent - shladd r28=r28,1,r0 - // r24=17*expon - add r24=r24,r30 - // r23=2nd table index (8 bits) - shr.u r23=r23,56;; +;; + +{ .mmi + // GR_EXP6 = 6*exponent + shladd GR_EXP6 = GR_EXP3, 1, r0 + // GR_EXP17 = 17*expon + add GR_EXP17 = GR_EXP5, GR_TMP1 + // GR_IX2 = 2nd table index (8 bits) + shr.u GR_IX2 = GR_INDEX2, 56 } -{.mmi - // adjust T_table pointer by 2nd index - shladd r2=r23,3,r2 - // adjust D_table pointer by 2nd index - shladd r27=r23,2,r27 - // r30=(17*expon)*16^2 - shl r30=r24,8;; +;; + +{ .mmi + // adjust T_table pointer by 2nd index + shladd GR_T_INDEX = GR_IX2, 3, GR_C_START + // adjust D_table pointer by 2nd index + shladd GR_D_INDEX = GR_IX2, 2, GR_D_START + // GR_TMP2 = (17*expon)*16^2 + shl GR_TMP2 = GR_EXP17, 8 } -{.mmi - // r24=expon*(2^16-1)/3 - add r24=r24,r30;; - // r24=expon*(2^20+2)/3=expon*0x55556 - shladd r24=r24,4,r28 - nop.i 0;; +;; + +{ .mmi + // GR_TMP3 = expon*(2^16-1)/3 + add GR_TMP3 = GR_EXP17, GR_TMP2 +;; + // GR_TMP4 = expon*(2^20+2)/3 = expon*0x55556 + shladd GR_TMP4 = GR_TMP3, 4, GR_EXP6 + nop.i 0 } -{.mii - nop.m 0 - // r24=floor(expon/3) - shr.u r24=r24,20 - nop.i 0;; +;; + +{ .mii + nop.m 0 + // GR_EXP_RES = floor(expon/3) + shr.u GR_EXP_RES = GR_TMP4, 20 + nop.i 0 } -{.mmi - nop.m 0 - // r28=3*exponent - shladd r28=r24,1,r24 - // bias exponent - add r24=r29,r24;; +;; + +{ .mmi + nop.m 0 + // r16 = 3*exponent + shladd r16 = GR_EXP_RES, 1, GR_EXP_RES + // bias exponent + add GR_EXPBIAS = GR_BIAS23, GR_EXP_RES } -{.mmi - // get remainder of exponent/3 - sub r25=r25,r28;; - // add sign to exponent - or r24=r24,r26 - // remainder <<=8 - shl r25=r25,8;; -} -{.mfi - // adjust D_table pointer by 1st index - shladd r27=r25,2,r27 - // P_1=C_1+C_2*r - (p6) fma.s1 f7=f9,f6,f7 - // adjust T_table pointer by 1st index - shladd r2=r25,3,r2 +;; + +{ .mmi + // get remainder of exponent/3 + sub GR_EXP_MOD_3 = GR_NORMEXP, r16 +;; + // add sign to exponent + or GR_EXPSIGNRES = GR_EXPBIAS, GR_SIGN + // remainder << = 8 + shl GR_REMTMP = GR_EXP_MOD_3, 8 } -{.mfi - // f14=sign*2^{exponent/3} - (p6) setf.exp f14=r24 - // r2=r*r - (p6) fma.s1 f9=f6,f6,f0 - nop.i 0;; +;; + +{ .mfi + // adjust D_table pointer by 1st index + shladd GR_IX_D = GR_REMTMP, 2, GR_D_INDEX + // P_1 = C_1+C_2*r + (p6) fma.s1 FR_P1 = FR_C2, FR_R, FR_C1 + // adjust T_table pointer by 1st index + shladd GR_IX_T = GR_REMTMP, 3, GR_T_INDEX } -{.mfi - // load D - (p6) ldfs f15=[r27] - // P_2=C_3+C_4*r - (p6) fma.s1 f10=f11,f6,f10 - nop.i 0 +{ .mfi + // FR_SGNEXP = sign*2^{exponent/3} + (p6) setf.exp FR_SGNEXP = GR_EXPSIGNRES + // r^2 = r*r + (p6) fma.s1 FR_R2 = FR_R, FR_R, f0 + nop.i 0 } -{.mfi - // load T - (p6) ldf8 f8=[r2] - // P_3=C_5+C_6*r - (p6) fma.s1 f12=f13,f6,f12 - nop.i 0;; +;; + +{ .mfi + // load D + (p6) ldfs FR_D = [ GR_IX_D ] + // P_2 = C_3+C_4*r + (p6) fma.s1 FR_P2 = FR_C4, FR_R, FR_C3 + nop.i 0 } -{.mfi - nop.m 0 - // P_4=D-r*P_1 - (p6) fnma.s1 f15=f6,f7,f15 - nop.i 0 +{ .mfi + // load T + (p6) ldf8 FR_T = [ GR_IX_T ] + // P_3 = C_5+C_6*r + (p6) fma.s1 FR_P3 = FR_C6, FR_R, FR_C5 + nop.i 0 } -{.mfi - nop.m 0 - // r3=r*r2 - (p6) fma.s1 f6=f6,f9,f0 - nop.i 0;; +;; + +{ .mfi + nop.m 0 + // P_4 = D-r*P_1 + (p6) fnma.s1 FR_P4 = FR_R, FR_P1, FR_D + nop.i 0 } -{.mfi - nop.m 0 - // P_5=P_2+r2*P_3 - (p6) fma.s1 f10=f9,f12,f10 - nop.i 0;; +{ .mfi + nop.m 0 + // r^3 = r*r^2 + (p6) fma.s1 FR_R3 = FR_R, FR_R2, f0 + nop.i 0 } -{.mfi - nop.m 0 - // T=T*(sign*2^{exponent/3}) - (p6) fma.s1 f8=f8,f14,f0 - nop.i 0 +;; + +{ .mfi + nop.m 0 + // P_5 = P_2+r2*P_3 + (p6) fma.s1 FR_P5 = FR_R2, FR_P3, FR_P2 + nop.i 0 } -{.mfi - nop.m 0 - // P=P_4-r3*P_5 - (p6) fnma.s1 f6=f6,f10,f15 - nop.i 0;; +;; + +{ .mfi + nop.m 0 + // T = T*(sign*2^{exponent/3}) + (p6) fma.s1 FR_TF = FR_T, FR_SGNEXP, f0 + nop.i 0 } -{.mfb - nop.m 0 - // result=T+T*p - (p6) fma.s0 f8=f8,f6,f8 - br.ret.sptk b0;; +{ .mfi + nop.m 0 + // P = P_4-r3*P_5 + (p6) fnma.s1 FR_P = FR_R3, FR_P5, FR_P4 + nop.i 0 } +;; + +{ .mfb + nop.m 0 + // result = T+T*p + (p6) fma.s0 f8 = FR_TF, FR_P, FR_TF + br.ret.sptk b0 +} +;; + GLOBAL_LIBM_END(cbrtl) + diff --git a/sysdeps/ia64/fpu/s_cos.S b/sysdeps/ia64/fpu/s_cos.S index 84c177a..bf8997b 100644 --- a/sysdeps/ia64/fpu/s_cos.S +++ b/sysdeps/ia64/fpu/s_cos.S @@ -1,7 +1,7 @@ .file "sincos.s" -// Copyright (c) 2000 - 2003, Intel Corporation +// Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -51,6 +51,8 @@ // 06/03/02 Insure inexact flag set for large arg result // 09/05/02 Work range is widened by reduction strengthen (3 parts of Pi/16) // 02/10/03 Reordered header: .section, .global, .proc, .align +// 08/08/03 Improved performance +// 10/28/04 Saved sincos_r_sincos to avoid clobber by dynamic loader // API //============================================================== @@ -170,11 +172,11 @@ // Registers used //============================================================== // general input registers: -// r14 -> r19 -// r32 -> r45 +// r14 -> r26 +// r32 -> r35 // predicate registers used: -// p6 -> p14 +// p6 -> p11 // floating-point registers used // f9 -> f15 @@ -236,16 +238,6 @@ fp_tmp = f61 ///////////////////////////////////////////////////////////// -sincos_AD_1 = r33 -sincos_AD_2 = r34 -sincos_exp_limit = r35 -sincos_r_signexp = r36 -sincos_AD_beta_table = r37 -sincos_r_sincos = r38 - -sincos_r_exp = r39 -sincos_r_17_ones = r40 - sincos_GR_sig_inv_pi_by_16 = r14 sincos_GR_rshf_2to61 = r15 sincos_GR_rshf = r16 @@ -254,11 +246,18 @@ sincos_GR_n = r18 sincos_GR_m = r19 sincos_GR_32m = r19 sincos_GR_all_ones = r19 +sincos_AD_1 = r20 +sincos_AD_2 = r21 +sincos_exp_limit = r22 +sincos_r_signexp = r23 +sincos_r_17_ones = r24 +sincos_r_sincos = r25 +sincos_r_exp = r26 -gr_tmp = r41 -GR_SAVE_PFS = r41 -GR_SAVE_B0 = r42 -GR_SAVE_GP = r43 +GR_SAVE_PFS = r33 +GR_SAVE_B0 = r34 +GR_SAVE_GP = r35 +GR_SAVE_r_sincos = r36 RODATA @@ -405,7 +404,7 @@ LOCAL_OBJECT_END(double_sin_cos_beta_k4) GLOBAL_IEEE754_ENTRY(sin) { .mlx - alloc r32 = ar.pfs, 1, 13, 0, 0 + getf.exp sincos_r_signexp = f8 movl sincos_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // signd of 16/pi } { .mlx @@ -427,10 +426,11 @@ GLOBAL_IEEE754_ENTRY(sin) ;; GLOBAL_IEEE754_END(sin) + GLOBAL_IEEE754_ENTRY(cos) { .mlx - alloc r32 = ar.pfs, 1, 13, 0, 0 + getf.exp sincos_r_signexp = f8 movl sincos_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // signd of 16/pi } { .mlx @@ -464,7 +464,6 @@ _SINCOS_COMMON: // Form two constants we need // 16/pi * 2^-2 * 2^63, scaled by 2^61 since we just loaded the significand // 1.1000...000 * 2^(63+63-2) to right shift int(W) into the low significand -// fcmp used to set denormal, and invalid on snans { .mfi setf.sig sincos_SIG_INV_PI_BY_16_2TO61 = sincos_GR_sig_inv_pi_by_16 fclass.m p6,p0 = f8, 0xe7 // if x = 0,inf,nan @@ -480,10 +479,15 @@ _SINCOS_COMMON: // 2^-61 for scaling Nfloat // 0x1001a is register_bias + 27. // So if f8 >= 2^27, go to large argument routines -{ .mmi - getf.exp sincos_r_signexp = f8 +{ .mfi + alloc r32 = ar.pfs, 1, 4, 0, 0 + fclass.m p11,p0 = f8, 0x0b // Test for x=unorm + mov sincos_GR_all_ones = -1 // For "inexect" constant create +} +{ .mib setf.exp sincos_2TOM61 = sincos_GR_exp_2tom61 - addl gr_tmp = -1,r0 // For "inexect" constant create + nop.i 999 +(p6) br.cond.spnt _SINCOS_SPECIAL_ARGS } ;; @@ -493,41 +497,31 @@ _SINCOS_COMMON: { .mmb ldfe sincos_Pi_by_16_1 = [sincos_AD_1],16 setf.d sincos_RSHF = sincos_GR_rshf -(p6) br.cond.spnt _SINCOS_SPECIAL_ARGS +(p11) br.cond.spnt _SINCOS_UNORM // Branch if x=unorm } ;; +_SINCOS_COMMON2: +// Return here if x=unorm +// Create constant used to set inexact { .mmi ldfe sincos_Pi_by_16_2 = [sincos_AD_1],16 - setf.sig fp_tmp = gr_tmp // constant for inexact set + setf.sig fp_tmp = sincos_GR_all_ones nop.i 999 };; +// Select exponent (17 lsb) { .mfi ldfe sincos_Pi_by_16_3 = [sincos_AD_1],16 nop.f 999 - nop.i 999 + dep.z sincos_r_exp = sincos_r_signexp, 0, 17 };; // Polynomial coefficients (Q4, P4, Q3, P3, Q2, Q1, P2, P1) loading -{ .mmi - ldfpd sincos_P4,sincos_Q4 = [sincos_AD_1],16 - nop.m 999 - nop.i 999 -};; - -// Select exponent (17 lsb) -{ .mmi - ldfpd sincos_P3,sincos_Q3 = [sincos_AD_1],16 - nop.m 999 - dep.z sincos_r_exp = sincos_r_signexp, 0, 17 -} -;; - // p10 is true if we must call routines to handle larger arguments // p10 is true if f8 exp is >= 0x1001a (2^27) { .mmb - ldfpd sincos_P2,sincos_Q2 = [sincos_AD_1],16 + ldfpd sincos_P4,sincos_Q4 = [sincos_AD_1],16 cmp.ge p10,p0 = sincos_r_exp,sincos_exp_limit (p10) br.cond.spnt _SINCOS_LARGE_ARGS // Go to "large args" routine };; @@ -536,66 +530,61 @@ _SINCOS_COMMON: // Multiply x by scaled 16/pi and add large const to shift integer part of W to // rightmost bits of significand { .mfi - ldfpd sincos_P1,sincos_Q1 = [sincos_AD_1],16 + ldfpd sincos_P3,sincos_Q3 = [sincos_AD_1],16 fma.s1 sincos_W_2TO61_RSH = sincos_NORM_f8,sincos_SIG_INV_PI_BY_16_2TO61,sincos_RSHF_2TO61 nop.i 999 };; +// get N = (int)sincos_int_Nfloat // sincos_NFLOAT = Round_Int_Nearest(sincos_W) // This is done by scaling back by 2^-61 and subtracting the shift constant -{ .mfi - nop.m 999 +{ .mmf + getf.sig sincos_GR_n = sincos_W_2TO61_RSH + ldfpd sincos_P2,sincos_Q2 = [sincos_AD_1],16 fms.s1 sincos_NFLOAT = sincos_W_2TO61_RSH,sincos_2TOM61,sincos_RSHF - nop.i 999 };; - -// get N = (int)sincos_int_Nfloat +// sincos_r = -sincos_Nfloat * sincos_Pi_by_16_1 + x { .mfi - getf.sig sincos_GR_n = sincos_W_2TO61_RSH - nop.f 999 + ldfpd sincos_P1,sincos_Q1 = [sincos_AD_1],16 + fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_1, sincos_NORM_f8 nop.i 999 };; // Add 2^(k-1) (which is in sincos_r_sincos) to N -// sincos_r = -sincos_Nfloat * sincos_Pi_by_16_1 + x -{ .mfi +{ .mmi add sincos_GR_n = sincos_GR_n, sincos_r_sincos - fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_1, sincos_NORM_f8 +;; +// Get M (least k+1 bits of N) + and sincos_GR_m = 0x1f,sincos_GR_n nop.i 999 };; -// Get M (least k+1 bits of N) -{ .mmi - and sincos_GR_m = 0x1f,sincos_GR_n;; +// sincos_r = sincos_r -sincos_Nfloat * sincos_Pi_by_16_2 +{ .mfi nop.m 999 + fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_2, sincos_r shl sincos_GR_32m = sincos_GR_m,5 };; // Add 32*M to address of sin_cos_beta table +// For sin denorm. - set uflow { .mfi add sincos_AD_2 = sincos_GR_32m, sincos_AD_1 -(p8) fclass.m.unc p10,p0 = f8,0x0b // For sin denorm. - set uflow +(p8) fclass.m.unc p10,p0 = f8,0x0b nop.i 999 };; // Load Sin and Cos table value using obtained index m (sincosf_AD_2) { .mfi ldfe sincos_Sm = [sincos_AD_2],16 -(p9) fclass.m.unc p11,p0 = f8,0x0b // For cos denorm - set denorm - nop.i 999 -};; - -// sincos_r = sincos_r -sincos_Nfloat * sincos_Pi_by_16_2 -{ .mfi - ldfe sincos_Cm = [sincos_AD_2] - fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_2, sincos_r + nop.f 999 nop.i 999 };; // get rsq = r*r { .mfi - nop.m 999 + ldfe sincos_Cm = [sincos_AD_2] fma.s1 sincos_rsq = sincos_r, sincos_r, f0 // r^2 = r*r nop.i 999 } @@ -660,7 +649,6 @@ _SINCOS_COMMON: fma.s1 sincos_Q = sincos_rsq, sincos_Q_temp2, sincos_Q1 nop.i 999 } - { .mfi nop.m 999 fma.s1 sincos_P = sincos_rsq, sincos_P_temp2, sincos_P1 @@ -675,7 +663,6 @@ _SINCOS_COMMON: fma.s1 sincos_Q = sincos_srsq,sincos_Q, sincos_Sm nop.i 999 } - { .mfi nop.m 999 fma.s1 sincos_P = sincos_rcub,sincos_P, sincos_r_exact @@ -683,19 +670,12 @@ _SINCOS_COMMON: };; // If sin(denormal), force underflow to be set -.pred.rel "mutex",p10,p11 { .mfi nop.m 999 -(p10) fmpy.d.s0 fp_tmp = f8,f8 // forces underflow flag - nop.i 999 // for denormal sine args -} -{ .mfi - nop.m 999 -(p11) fma.d.s0 fp_tmp = f8,f1, f8 // forces denormal flag - nop.i 999 // for denormal cosine args +(p10) fmpy.d.s0 fp_tmp = sincos_NORM_f8,sincos_NORM_f8 + nop.i 999 };; - // Final calculation // result = C[m]*P + Q { .mfb @@ -724,13 +704,22 @@ _SINCOS_SPECIAL_ARGS: br.ret.sptk b0 // Exit for x = 0/Inf/NaN path };; +_SINCOS_UNORM: +// Here if x=unorm +{ .mfb + getf.exp sincos_r_signexp = sincos_NORM_f8 // Get signexp of x + fcmp.eq.s0 p11,p0 = f8, f0 // Dummy op to set denorm flag + br.cond.sptk _SINCOS_COMMON2 // Return to main path +};; + GLOBAL_IEEE754_END(cos) + //////////// x >= 2^27 - large arguments routine call //////////// LOCAL_LIBM_ENTRY(__libm_callout_sincos) _SINCOS_LARGE_ARGS: .prologue { .mfi - mov sincos_GR_all_ones = -1 // 0xffffffff + mov GR_SAVE_r_sincos = sincos_r_sincos // Save sin or cos nop.f 999 .save ar.pfs,GR_SAVE_PFS mov GR_SAVE_PFS = ar.pfs @@ -753,7 +742,7 @@ _SINCOS_LARGE_ARGS: };; { .mbb - cmp.ne p9,p0 = sincos_r_sincos, r0 // set p9 if cos + cmp.ne p9,p0 = GR_SAVE_r_sincos, r0 // set p9 if cos nop.b 999 (p9) br.call.sptk.many b0 = __libm_cos_large# // cos(large_X) };; diff --git a/sysdeps/ia64/fpu/s_cosf.S b/sysdeps/ia64/fpu/s_cosf.S index 89cf823..a588938 100644 --- a/sysdeps/ia64/fpu/s_cosf.S +++ b/sysdeps/ia64/fpu/s_cosf.S @@ -408,6 +408,7 @@ GLOBAL_IEEE754_ENTRY(sinf) };; GLOBAL_IEEE754_END(sinf) + GLOBAL_IEEE754_ENTRY(cosf) { .mlx @@ -657,6 +658,7 @@ _SINCOSF_SPECIAL_ARGS: };; GLOBAL_IEEE754_END(cosf) + //////////// x >= 2^24 - large arguments routine call //////////// LOCAL_LIBM_ENTRY(__libm_callout_sincosf) _SINCOSF_LARGE_ARGS: diff --git a/sysdeps/ia64/fpu/s_cosl.S b/sysdeps/ia64/fpu/s_cosl.S index 374e822..8d71e50 100644 --- a/sysdeps/ia64/fpu/s_cosl.S +++ b/sysdeps/ia64/fpu/s_cosl.S @@ -1,7 +1,7 @@ .file "sincosl.s" -// Copyright (c) 2000 - 2003, Intel Corporation +// Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -47,6 +47,8 @@ // 05/13/02 Changed interface to __libm_pi_by_2_reduce // 02/10/03 Reordered header: .section, .global, .proc, .align; // used data8 for long double table values +// 10/13/03 Corrected final .endp name to match .proc +// 10/26/04 Avoided using r14-31 as scratch so not clobbered by dynamic loader // //********************************************************************* // @@ -63,8 +65,7 @@ // f32-f99 // // General Purpose Registers: -// r32-r43 -// r44-r45 (Used to pass arguments to pi_by_2 reduce routine) +// r32-r58 // // Predicate Registers: p6-p13 // @@ -715,20 +716,6 @@ FR_PP_1_lo = f98 FR_ArgPrime = f99 FR_inexact = f100 -GR_sig_inv_pi = r14 -GR_rshf_2to64 = r15 -GR_exp_2tom64 = r16 -GR_rshf = r17 -GR_ad_p = r18 -GR_ad_d = r19 -GR_ad_pp = r20 -GR_ad_qq = r21 -GR_ad_c = r22 -GR_ad_s = r23 -GR_ad_ce = r24 -GR_ad_se = r25 -GR_ad_m14 = r26 -GR_ad_s1 = r27 GR_exp_m2_to_m3= r36 GR_N_Inc = r37 GR_Sin_or_Cos = r38 @@ -739,6 +726,21 @@ GR_exp_2_to_63 = r42 GR_exp_2_to_m3 = r43 GR_exp_2_to_24 = r44 +GR_sig_inv_pi = r45 +GR_rshf_2to64 = r46 +GR_exp_2tom64 = r47 +GR_rshf = r48 +GR_ad_p = r49 +GR_ad_d = r50 +GR_ad_pp = r51 +GR_ad_qq = r52 +GR_ad_c = r53 +GR_ad_s = r54 +GR_ad_ce = r55 +GR_ad_se = r56 +GR_ad_m14 = r57 +GR_ad_s1 = r58 + // Added for unwind support GR_SAVE_B0 = r39 @@ -750,7 +752,7 @@ GR_SAVE_PFS = r41 GLOBAL_IEEE754_ENTRY(sinl) { .mlx - alloc r32 = ar.pfs,0,12,2,0 + alloc r32 = ar.pfs,0,27,2,0 movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi } { .mlx @@ -772,9 +774,10 @@ GLOBAL_IEEE754_ENTRY(sinl) ;; GLOBAL_IEEE754_END(sinl) + GLOBAL_IEEE754_ENTRY(cosl) { .mlx - alloc r32 = ar.pfs,0,12,2,0 + alloc r32 = ar.pfs,0,27,2,0 movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi } { .mlx @@ -2285,6 +2288,7 @@ SINCOSL_SPECIAL: } GLOBAL_IEEE754_END(cosl) + // ******************************************************************* // ******************************************************************* // ******************************************************************* @@ -2299,7 +2303,7 @@ GLOBAL_IEEE754_END(cosl) // c is in f9 // N is in r8 // Be sure to allocate at least 2 GP registers as output registers for -// __libm_pi_by_2_reduce. This routine uses r49-50. These are used as +// __libm_pi_by_2_reduce. This routine uses r59-60. These are used as // scratch registers within the __libm_pi_by_2_reduce routine (for speed). // // We know also that __libm_pi_by_2_reduce preserves f10-15, f71-127. We @@ -2356,6 +2360,6 @@ SINCOSL_ARG_TOO_LARGE: br.cond.sptk SINCOSL_NORMAL_R // Branch if |r|>=2^-3 for |x| >= 2^63 };; -.endp +LOCAL_LIBM_END(__libm_callout) .type __libm_pi_by_2_reduce#,@function .global __libm_pi_by_2_reduce# diff --git a/sysdeps/ia64/fpu/s_erf.S b/sysdeps/ia64/fpu/s_erf.S index 8b8cc7f..3abcd3e 100644 --- a/sysdeps/ia64/fpu/s_erf.S +++ b/sysdeps/ia64/fpu/s_erf.S @@ -922,3 +922,4 @@ erf_denormal: GLOBAL_LIBM_END(erf) + diff --git a/sysdeps/ia64/fpu/s_erfc.S b/sysdeps/ia64/fpu/s_erfc.S index 8b22327..3b1b583 100644 --- a/sysdeps/ia64/fpu/s_erfc.S +++ b/sysdeps/ia64/fpu/s_erfc.S @@ -1135,6 +1135,7 @@ GLOBAL_LIBM_ENTRY(erfc) };; GLOBAL_LIBM_END(erfc) + // call via (p15) br.cond.spnt __libm_error_region // for x > ARG_ASYMP = 28.0 // or diff --git a/sysdeps/ia64/fpu/s_erfcf.S b/sysdeps/ia64/fpu/s_erfcf.S index 7d9e2a9..e09ce98 100644 --- a/sysdeps/ia64/fpu/s_erfcf.S +++ b/sysdeps/ia64/fpu/s_erfcf.S @@ -918,6 +918,7 @@ SPECIAL: GLOBAL_LIBM_END(erfcf) + // Call via (p10) br.cond.spnt __libm_error_region // for UnfBound < x < = POS_ARG_ASYMP // and diff --git a/sysdeps/ia64/fpu/s_erfcl.S b/sysdeps/ia64/fpu/s_erfcl.S index f06e26f..11f66bc 100644 --- a/sysdeps/ia64/fpu/s_erfcl.S +++ b/sysdeps/ia64/fpu/s_erfcl.S @@ -2002,6 +2002,7 @@ ERFC_RESULT: };; GLOBAL_LIBM_END(erfcl) + // call via (p15) br.cond.spnt __libm_error_region // for x > POS_ARG_ASYMP // or diff --git a/sysdeps/ia64/fpu/s_erfl.S b/sysdeps/ia64/fpu/s_erfl.S index 902539b..10da22c 100644 --- a/sysdeps/ia64/fpu/s_erfl.S +++ b/sysdeps/ia64/fpu/s_erfl.S @@ -1237,3 +1237,4 @@ _0_to_1o8: GLOBAL_LIBM_END(erfl) + diff --git a/sysdeps/ia64/fpu/s_expm1.S b/sysdeps/ia64/fpu/s_expm1.S index 41b9954..5d1fd8c 100644 --- a/sysdeps/ia64/fpu/s_expm1.S +++ b/sysdeps/ia64/fpu/s_expm1.S @@ -831,6 +831,7 @@ EXPM1_CERTAIN_MINUS_ONE: GLOBAL_IEEE754_END(expm1) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/s_expm1f.S b/sysdeps/ia64/fpu/s_expm1f.S index 0c5f2e6..311be06 100644 --- a/sysdeps/ia64/fpu/s_expm1f.S +++ b/sysdeps/ia64/fpu/s_expm1f.S @@ -612,6 +612,7 @@ EXPM1_CERTAIN_OVERFLOW: GLOBAL_IEEE754_END(expm1f) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/s_expm1l.S b/sysdeps/ia64/fpu/s_expm1l.S index 069856d..5f135fa 100644 --- a/sysdeps/ia64/fpu/s_expm1l.S +++ b/sysdeps/ia64/fpu/s_expm1l.S @@ -49,6 +49,7 @@ // used data8 for long double table values // 03/11/03 Improved accuracy and performance, corrected missing inexact flags // 04/17/03 Eliminated misplaced and unused data label +// 12/15/03 Eliminated call to error support on expm1l underflow // //********************************************************************* // @@ -677,6 +678,7 @@ GLOBAL_IEEE754_ENTRY(expm1l) GLOBAL_IEEE754_END(expm1l) + GLOBAL_IEEE754_ENTRY(expl) // // Set p7 false for exp, p6 true @@ -1287,28 +1289,14 @@ EXP_POSSIBLE_UNDERFLOW: { .mfi nop.m 999 -(p7) fclass.m.unc p12, p0 = FR_ftz, 0x00F // If expm1, FTZ result denorm, zero? - nop.i 999 -} -;; - -{ .mfi - nop.m 999 (p6) fclass.m.unc p11, p0 = FR_ftz, 0x00F // If exp, FTZ result denorm or zero? nop.i 999 } ;; { .mfb -(p12) mov GR_Parameter_TAG = 40 // expm1 underflow - fmerge.s FR_X = f8,f8 // Save x for error call -(p12) br.cond.spnt __libm_error_region // Branch on expm1 underflow -} -;; - -{ .mib (p11) mov GR_Parameter_TAG = 13 // exp underflow - nop.i 999 + fmerge.s FR_X = f8,f8 // Save x for error call (p11) br.cond.spnt __libm_error_region // Branch on exp underflow } ;; @@ -1389,6 +1377,7 @@ EXP_64_UNSUPPORTED: ;; GLOBAL_IEEE754_END(expl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/s_fdim.S b/sysdeps/ia64/fpu/s_fdim.S index 96ff67b..eff290c 100644 --- a/sysdeps/ia64/fpu/s_fdim.S +++ b/sysdeps/ia64/fpu/s_fdim.S @@ -164,6 +164,7 @@ FDIM_OVERFLOW: GLOBAL_LIBM_END(fdim) + LOCAL_LIBM_ENTRY(__libm_error_region) // Call error support to report possible range error .prologue diff --git a/sysdeps/ia64/fpu/s_fdimf.S b/sysdeps/ia64/fpu/s_fdimf.S index 19e14d3..76d69d1 100644 --- a/sysdeps/ia64/fpu/s_fdimf.S +++ b/sysdeps/ia64/fpu/s_fdimf.S @@ -164,6 +164,7 @@ FDIM_OVERFLOW: GLOBAL_LIBM_END(fdimf) + LOCAL_LIBM_ENTRY(__libm_error_region) // Call error support to report possible range error .prologue diff --git a/sysdeps/ia64/fpu/s_fdiml.S b/sysdeps/ia64/fpu/s_fdiml.S index 00876c3..2227deb 100644 --- a/sysdeps/ia64/fpu/s_fdiml.S +++ b/sysdeps/ia64/fpu/s_fdiml.S @@ -164,6 +164,7 @@ FDIM_OVERFLOW: GLOBAL_LIBM_END(fdiml) + LOCAL_LIBM_ENTRY(__libm_error_region) // Call error support to report possible range error .prologue diff --git a/sysdeps/ia64/fpu/s_frexp.c b/sysdeps/ia64/fpu/s_frexp.c index c675006..7d90213 100644 --- a/sysdeps/ia64/fpu/s_frexp.c +++ b/sysdeps/ia64/fpu/s_frexp.c @@ -4,8 +4,7 @@ // Copyright (c) 2000-2002, Intel Corporation // All rights reserved. // -// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, -// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation. +// Contributed 2000 by the Intel Numerics Group, Intel Corporation // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are diff --git a/sysdeps/ia64/fpu/s_frexpf.c b/sysdeps/ia64/fpu/s_frexpf.c index c21a21d..920f09d 100644 --- a/sysdeps/ia64/fpu/s_frexpf.c +++ b/sysdeps/ia64/fpu/s_frexpf.c @@ -4,8 +4,7 @@ // Copyright (c) 2000-2002, Intel Corporation // All rights reserved. // -// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, -// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation. +// Contributed 2000 by the Intel Numerics Group, Intel Corporation // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are diff --git a/sysdeps/ia64/fpu/s_frexpl.c b/sysdeps/ia64/fpu/s_frexpl.c index 13d44ab..968cc32 100644 --- a/sysdeps/ia64/fpu/s_frexpl.c +++ b/sysdeps/ia64/fpu/s_frexpl.c @@ -4,8 +4,7 @@ // Copyright (c) 2000-2002, Intel Corporation // All rights reserved. // -// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, -// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation. +// Contributed 2000 by the Intel Numerics Group, Intel Corporation // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are diff --git a/sysdeps/ia64/fpu/s_ilogb.S b/sysdeps/ia64/fpu/s_ilogb.S index 3f2733c..0102370 100644 --- a/sysdeps/ia64/fpu/s_ilogb.S +++ b/sysdeps/ia64/fpu/s_ilogb.S @@ -206,6 +206,7 @@ ILOGB_ZERO: GLOBAL_LIBM_END(ilogb) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/s_ilogbf.S b/sysdeps/ia64/fpu/s_ilogbf.S index 1b6ade6..9e971bc 100644 --- a/sysdeps/ia64/fpu/s_ilogbf.S +++ b/sysdeps/ia64/fpu/s_ilogbf.S @@ -206,6 +206,7 @@ ILOGB_ZERO: GLOBAL_LIBM_END(ilogbf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/s_ilogbl.S b/sysdeps/ia64/fpu/s_ilogbl.S index e462fb7..8a6c9dc 100644 --- a/sysdeps/ia64/fpu/s_ilogbl.S +++ b/sysdeps/ia64/fpu/s_ilogbl.S @@ -206,6 +206,7 @@ ILOGB_ZERO: GLOBAL_LIBM_END(ilogbl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/s_ldexp.c b/sysdeps/ia64/fpu/s_ldexp.c index 015b650..a0bc14c 100644 --- a/sysdeps/ia64/fpu/s_ldexp.c +++ b/sysdeps/ia64/fpu/s_ldexp.c @@ -4,8 +4,7 @@ // Copyright (c) 2000, 2001, Intel Corporation // All rights reserved. // -// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, -// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation. +// Contributed 2000 by the Intel Numerics Group, Intel Corporation // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are diff --git a/sysdeps/ia64/fpu/s_ldexpf.c b/sysdeps/ia64/fpu/s_ldexpf.c index eae4051..ad083fa 100644 --- a/sysdeps/ia64/fpu/s_ldexpf.c +++ b/sysdeps/ia64/fpu/s_ldexpf.c @@ -4,8 +4,7 @@ // Copyright (c) 2000, 2001, Intel Corporation // All rights reserved. // -// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, -// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation. +// Contributed 2000 by the Intel Numerics Group, Intel Corporation // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are diff --git a/sysdeps/ia64/fpu/s_ldexpl.c b/sysdeps/ia64/fpu/s_ldexpl.c index 91d8268..61dfd21 100644 --- a/sysdeps/ia64/fpu/s_ldexpl.c +++ b/sysdeps/ia64/fpu/s_ldexpl.c @@ -4,8 +4,7 @@ // Copyright (c) 2000, 2001, Intel Corporation // All rights reserved. // -// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, -// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation. +// Contributed 2000 by the Intel Numerics Group, Intel Corporation // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are diff --git a/sysdeps/ia64/fpu/s_libm_ldexp.S b/sysdeps/ia64/fpu/s_libm_ldexp.S index 1fc2c3f..2aaf2c3 100644 --- a/sysdeps/ia64/fpu/s_libm_ldexp.S +++ b/sysdeps/ia64/fpu/s_libm_ldexp.S @@ -21,33 +21,34 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== // 02/02/00 Initial version -// 01/26/01 ldexp completely reworked and now standalone version +// 01/26/01 ldexp completely reworked and now standalone version // 01/04/02 Added handling for int 32 or 64 bits // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 08/04/03 Improved performance // // API //============================================================== -// double __libm_ldexp (double x, int n, int int_type) +// double __libm_ldexp (double x, int n, int int_type) // input floating point f8 and int n (r33), int int_type (r34) // output floating point f8 // @@ -55,27 +56,41 @@ // int_type = 1 if int is 64 bits // // Returns x* 2**n using an fma and detects overflow -// and underflow. +// and underflow. // // +// Strategy: +// Compute biased exponent of result exp_Result = N + exp_X +// Break into ranges: +// exp_Result > 0x103fe -> Certain overflow +// exp_Result = 0x103fe -> Possible overflow +// 0x0fc01 <= exp_Result < 0x103fe -> No over/underflow (main path) +// 0x0fc01 - 52 <= exp_Result < 0x0fc01 -> Possible underflow +// exp_Result < 0x0fc01 - 52 -> Certain underflow FR_Big = f6 FR_NBig = f7 FR_Floating_X = f8 FR_Result = f8 FR_Result2 = f9 -FR_Result3 = f11 -FR_Norm_X = f12 -FR_Two_N = f14 -FR_Two_to_Big = f15 +FR_Result3 = f10 +FR_Norm_X = f11 +FR_Two_N = f12 +GR_neg_ov_limit= r14 GR_N_Biased = r15 GR_Big = r16 GR_NBig = r17 -GR_Scratch = r18 -GR_Scratch1 = r19 +GR_exp_Result = r18 +GR_pos_ov_limit= r19 GR_Bias = r20 GR_N_as_int = r21 +GR_signexp_X = r22 +GR_exp_X = r23 +GR_exp_mask = r24 +GR_max_exp = r25 +GR_min_exp = r26 +GR_min_den_exp = r27 GR_SAVE_B0 = r32 GR_SAVE_GP = r33 @@ -93,243 +108,298 @@ GLOBAL_LIBM_ENTRY(__libm_ldexp) // Build the exponent Bias // { .mfi - alloc r32=ar.pfs,3,0,4,0 - fclass.m p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero - addl GR_Bias = 0x0FFFF,r0 + getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x + fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero + mov GR_Bias = 0x0ffff } - // -// Is N zero? // Normalize x // Is integer type 32 bits? // { .mfi - cmp.eq p6,p0 = r33,r0 - fnorm.s1 FR_Norm_X = FR_Floating_X - cmp.eq p8,p9 = r34,r0 + mov GR_Big = 35000 // If N this big then certain overflow + fnorm.s1 FR_Norm_X = FR_Floating_X + cmp.eq p8,p9 = r34,r0 } ;; // Sign extend N if int is 32 bits { .mfi (p9) mov GR_N_as_int = r33 // Copy N if int is 64 bits - nop.f 0 + fclass.m p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm (p8) sxt4 GR_N_as_int = r33 // Sign extend N if int is 32 bits } +{ .mfi + mov GR_NBig = -35000 // If N this small then certain underflow + nop.f 0 + mov GR_max_exp = 0x103fe // Exponent of maximum double +} +;; + +// Create biased exponent for 2**N +{ .mfi + add GR_N_Biased = GR_Bias,GR_N_as_int + nop.f 0 + cmp.ge p7, p0 = GR_N_as_int, GR_Big // Certain overflow? +} +{ .mib + cmp.le p8, p0 = GR_N_as_int, GR_NBig // Certain underflow? + mov GR_min_exp = 0x0fc01 // Exponent of minimum double +(p9) br.cond.spnt LDEXP_UNORM // Branch if x=unorm +} +;; + +LDEXP_COMMON: +// Main path continues. Also return here from x=unorm path. +// Create 2**N +.pred.rel "mutex",p7,p8 +{ .mfi + setf.exp FR_Two_N = GR_N_Biased + nop.f 0 +(p7) mov GR_N_as_int = GR_Big // Limit max N +} +{ .mfi +(p8) mov GR_N_as_int = GR_NBig // Limit min N + nop.f 0 +(p8) cmp.eq p7,p0 = r0,r0 // Set p7 if |N| big +} ;; // -// Branch and return special values. -// Create -35000 -// Create 35000 +// Create biased exponent for 2**N for N big +// Is N zero? // { .mfi - addl GR_Big = 35000,r0 +(p7) add GR_N_Biased = GR_Bias,GR_N_as_int nop.f 0 - add GR_N_Biased = GR_Bias,GR_N_as_int + cmp.eq.or p6,p0 = r33,r0 } -{ .mfb - addl GR_NBig = -35000,r0 -(p7) fma.d.s0 FR_Result = FR_Floating_X,f1, f0 -(p7) br.ret.spnt b0 -};; +{ .mfi + mov GR_pos_ov_limit = 0x103ff // Exponent for positive overflow + nop.f 0 + mov GR_exp_mask = 0x1ffff // Exponent mask +} +;; // -// Build the exponent Bias -// Return x when N = 0 +// Create 2**N for N big +// Return x when N = 0 or X = Nan, Inf, Zero // { .mfi - setf.exp FR_Two_N = GR_N_Biased +(p7) setf.exp FR_Two_N = GR_N_Biased nop.f 0 - addl GR_Scratch1 = 0x063BF,r0 + mov GR_min_den_exp = 0x0fc01 - 52 // Exponent of min denorm dble } { .mfb - addl GR_Scratch = 0x019C3F,r0 -(p6) fma.d.s0 FR_Result = FR_Floating_X,f1, f0 -(p6) br.ret.spnt b0 -};; + and GR_exp_X = GR_exp_mask, GR_signexp_X +(p6) fma.d.s0 FR_Result = FR_Floating_X, f1, f0 +(p6) br.ret.spnt b0 +} +;; // -// Create 2*big -// Create 2**-big -// Is N > 35000 -// Is N < -35000 // Raise Denormal operand flag with compare -// Main path, create 2**N +// Compute biased result exponent // { .mfi - setf.exp FR_NBig = GR_Scratch1 - nop.f 0 - cmp.ge p6, p0 = GR_N_as_int, GR_Big -} -{ .mfi - setf.exp FR_Big = GR_Scratch + add GR_exp_Result = GR_exp_X, GR_N_as_int fcmp.ge.s0 p0,p11 = FR_Floating_X,f0 - cmp.le p8, p0 = GR_N_as_int, GR_NBig -};; + mov GR_neg_ov_limit = 0x303ff // Exponent for negative overflow +} +;; // -// Adjust 2**N if N was very small or very large +// Do final operation // { .mfi - nop.m 0 -(p6) fma.s1 FR_Two_N = FR_Big,f1,f0 - nop.i 0 + cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow + fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 + cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow } -{ .mlx - nop.m 999 - movl GR_Scratch = 0x00000000000303FF -};; +{ .mfb + nop.m 0 + nop.f 0 +(p9) br.cond.spnt LDEXP_UNDERFLOW // Branch if certain underflow +} +;; +{ .mib +(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow +(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow +(p7) br.ret.sptk b0 // Return from main path +} +;; -{ .mfi - nop.m 0 -(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0 - nop.i 0 +{ .bbb +(p6) br.cond.spnt LDEXP_OVERFLOW // Branch if certain overflow +(p8) br.cond.spnt LDEXP_POSSIBLE_OVERFLOW // Branch if possible overflow +(p9) br.cond.spnt LDEXP_POSSIBLE_UNDERFLOW // Branch if possible underflow } -{ .mlx - nop.m 999 - movl GR_Scratch1= 0x00000000000103FF -};; +;; -// Set up necessary status fields +// Here if possible underflow. +// Resulting exponent: 0x0fc01-52 <= exp_Result < 0x0fc01 +LDEXP_POSSIBLE_UNDERFLOW: +// +// Here if possible overflow. +// Resulting exponent: 0x103fe = exp_Result +LDEXP_POSSIBLE_OVERFLOW: + +// Set up necessary status fields // // S0 user supplied status // S2 user supplied status + WRE + TD (Overflows) // S3 user supplied status + FZ + TD (Underflows) // { .mfi - nop.m 999 + nop.m 0 fsetc.s3 0x7F,0x41 - nop.i 999 + nop.i 0 } { .mfi - nop.m 999 + nop.m 0 fsetc.s2 0x7F,0x42 - nop.i 999 -};; + nop.i 0 +} +;; // -// Do final operation +// Do final operation with s2 and s3 // { .mfi - setf.exp FR_NBig = GR_Scratch - fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 - nop.i 999 + setf.exp FR_NBig = GR_neg_ov_limit + fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 } { .mfi - nop.m 999 - fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; -{ .mfi - setf.exp FR_Big = GR_Scratch1 - fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; + setf.exp FR_Big = GR_pos_ov_limit + fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 +} +;; -// // Check for overflow or underflow. // Restore s3 // Restore s2 // { .mfi - nop.m 0 + nop.m 0 fsetc.s3 0x7F,0x40 - nop.i 999 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fsetc.s2 0x7F,0x40 - nop.i 999 -};; + nop.i 0 +} +;; // // Is the result zero? // { .mfi - nop.m 999 + nop.m 0 fclass.m p6, p0 = FR_Result3, 0x007 - nop.i 999 -} + nop.i 0 +} { .mfi - addl GR_Tag = 146, r0 + nop.m 0 fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big - nop.i 0 -};; + nop.i 0 +} +;; // // Detect masked underflow - Tiny + Inexact Only // { .mfi - nop.m 999 + nop.m 0 (p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2 - nop.i 999 -};; + nop.i 0 +} +;; // // Is result bigger the allowed range? // Branch out for underflow // { .mfb -(p6) addl GR_Tag = 147, r0 + nop.m 0 (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig -(p6) br.cond.spnt LDEXP_UNDERFLOW -};; +(p6) br.cond.spnt LDEXP_UNDERFLOW +} +;; // // Branch out for overflow // -{ .mbb - nop.m 0 -(p7) br.cond.spnt LDEXP_OVERFLOW -(p9) br.cond.spnt LDEXP_OVERFLOW -};; +{ .bbb +(p7) br.cond.spnt LDEXP_OVERFLOW +(p9) br.cond.spnt LDEXP_OVERFLOW + br.ret.sptk b0 // Return from main path. +} +;; -// -// Return from main path. -// -{ .mfb - nop.m 999 - nop.f 0 - br.ret.sptk b0;; +// Here if result overflows +LDEXP_OVERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 146, r0 // Set error tag for overflow + br.cond.sptk __libm_error_region // Call error support for overflow +} +;; + +// Here if result underflows +LDEXP_UNDERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 147, r0 // Set error tag for underflow + br.cond.sptk __libm_error_region // Call error support for underflow } +;; -GLOBAL_LIBM_END(__libm_ldexp) -__libm_error_region: +// Here if x=unorm +LDEXP_UNORM: +{ .mib + getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x + nop.i 0 + br.cond.sptk LDEXP_COMMON // Return to main path +} +;; -LDEXP_OVERFLOW: -LDEXP_UNDERFLOW: + +GLOBAL_LIBM_END(__libm_ldexp) +LOCAL_LIBM_ENTRY(__libm_error_region) // // Get stack address of N // .prologue { .mfi - add GR_Parameter_Y=-32,sp + add GR_Parameter_Y=-32,sp nop.f 0 .save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs + mov GR_SAVE_PFS=ar.pfs } // -// Adjust sp +// Adjust sp // { .mfi .fframe 64 - add sp=-64,sp + add sp=-64,sp nop.f 0 - mov GR_SAVE_GP=gp + mov GR_SAVE_GP=gp };; // -// Store N on stack in correct position +// Store N on stack in correct position // Locate the address of x on stack // { .mmi - st8 [GR_Parameter_Y] = GR_N_as_int,16 - add GR_Parameter_X = 16,sp + st8 [GR_Parameter_Y] = GR_N_as_int,16 + add GR_Parameter_X = 16,sp .save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 + mov GR_SAVE_B0=b0 };; // @@ -338,42 +408,42 @@ LDEXP_UNDERFLOW: // .body { .mib - stfd [GR_Parameter_X] = FR_Norm_X - add GR_Parameter_RESULT = 0,GR_Parameter_Y + stfd [GR_Parameter_X] = FR_Norm_X + add GR_Parameter_RESULT = 0,GR_Parameter_Y nop.b 0 } { .mib - stfd [GR_Parameter_Y] = FR_Result + stfd [GR_Parameter_Y] = FR_Result add GR_Parameter_Y = -16,GR_Parameter_Y - br.call.sptk b0=__libm_error_support# + br.call.sptk b0=__libm_error_support# };; // // Get location of result on stack // { .mmi + add GR_Parameter_RESULT = 48,sp nop.m 0 - nop.m 0 - add GR_Parameter_RESULT = 48,sp + nop.i 0 };; // -// Get the new result +// Get the new result // { .mmi - ldfd FR_Result = [GR_Parameter_RESULT] + ldfd FR_Result = [GR_Parameter_RESULT] .restore sp - add sp = 64,sp - mov b0 = GR_SAVE_B0 + add sp = 64,sp + mov b0 = GR_SAVE_B0 };; // // Restore gp, ar.pfs and return // { .mib - mov gp = GR_SAVE_GP - mov ar.pfs = GR_SAVE_PFS - br.ret.sptk b0 + mov gp = GR_SAVE_GP + mov ar.pfs = GR_SAVE_PFS + br.ret.sptk b0 };; LOCAL_LIBM_END(__libm_error_region) diff --git a/sysdeps/ia64/fpu/s_libm_ldexpf.S b/sysdeps/ia64/fpu/s_libm_ldexpf.S index d7f161c..1326a14 100644 --- a/sysdeps/ia64/fpu/s_libm_ldexpf.S +++ b/sysdeps/ia64/fpu/s_libm_ldexpf.S @@ -21,33 +21,34 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== // 02/02/00 Initial version -// 01/26/01 ldexpf completely reworked and now standalone version +// 01/26/01 ldexpf completely reworked and now standalone version // 01/04/02 Added handling for int 32 or 64 bits // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 08/04/03 Improved performance // // API //============================================================== -// float __libm_ldexpf (float x, int n, int int_type) +// float __libm_ldexpf (float x, int n, int int_type) // input floating point f8 and int n (r33), int int_type (r34) // output floating point f8 // @@ -55,27 +56,41 @@ // int_type = 1 if int is 64 bits // // Returns x* 2**n using an fma and detects overflow -// and underflow. +// and underflow. // // +// Strategy: +// Compute biased exponent of result exp_Result = N + exp_X +// Break into ranges: +// exp_Result > 0x1007e -> Certain overflow +// exp_Result = 0x1007e -> Possible overflow +// 0x0ff81 <= exp_Result < 0x1007e -> No over/underflow (main path) +// 0x0ff81 - 23 <= exp_Result < 0x0ff81 -> Possible underflow +// exp_Result < 0x0ff81 - 23 -> Certain underflow FR_Big = f6 FR_NBig = f7 FR_Floating_X = f8 FR_Result = f8 FR_Result2 = f9 -FR_Result3 = f11 -FR_Norm_X = f12 -FR_Two_N = f14 -FR_Two_to_Big = f15 +FR_Result3 = f10 +FR_Norm_X = f11 +FR_Two_N = f12 +GR_neg_ov_limit= r14 GR_N_Biased = r15 GR_Big = r16 GR_NBig = r17 -GR_Scratch = r18 -GR_Scratch1 = r19 +GR_exp_Result = r18 +GR_pos_ov_limit= r19 GR_Bias = r20 GR_N_as_int = r21 +GR_signexp_X = r22 +GR_exp_X = r23 +GR_exp_mask = r24 +GR_max_exp = r25 +GR_min_exp = r26 +GR_min_den_exp = r27 GR_SAVE_B0 = r32 GR_SAVE_GP = r33 @@ -93,241 +108,298 @@ GLOBAL_LIBM_ENTRY(__libm_ldexpf) // Build the exponent Bias // { .mfi - alloc r32=ar.pfs,3,0,4,0 - fclass.m p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero - addl GR_Bias = 0x0FFFF,r0 + getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x + fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero + mov GR_Bias = 0x0ffff } // -// Is N zero? // Normalize x // Is integer type 32 bits? // { .mfi - cmp.eq p6,p0 = r33,r0 - fnorm.s1 FR_Norm_X = FR_Floating_X - cmp.eq p8,p9 = r34,r0 + mov GR_Big = 35000 // If N this big then certain overflow + fnorm.s1 FR_Norm_X = FR_Floating_X + cmp.eq p8,p9 = r34,r0 } ;; // Sign extend N if int is 32 bits { .mfi (p9) mov GR_N_as_int = r33 // Copy N if int is 64 bits - nop.f 0 + fclass.m p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm (p8) sxt4 GR_N_as_int = r33 // Sign extend N if int is 32 bits } +{ .mfi + mov GR_NBig = -35000 // If N this small then certain underflow + nop.f 0 + mov GR_max_exp = 0x1007e // Exponent of maximum float +} +;; + +// Create biased exponent for 2**N +{ .mfi + add GR_N_Biased = GR_Bias,GR_N_as_int + nop.f 0 + cmp.ge p7, p0 = GR_N_as_int, GR_Big // Certain overflow? +} +{ .mib + cmp.le p8, p0 = GR_N_as_int, GR_NBig // Certain underflow? + mov GR_min_exp = 0x0ff81 // Exponent of minimum float +(p9) br.cond.spnt LDEXPF_UNORM // Branch if x=unorm +} +;; + +LDEXPF_COMMON: +// Main path continues. Also return here from x=unorm path. +// Create 2**N +.pred.rel "mutex",p7,p8 +{ .mfi + setf.exp FR_Two_N = GR_N_Biased + nop.f 0 +(p7) mov GR_N_as_int = GR_Big // Limit max N +} +{ .mfi +(p8) mov GR_N_as_int = GR_NBig // Limit min N + nop.f 0 +(p8) cmp.eq p7,p0 = r0,r0 // Set p7 if |N| big +} ;; // -// Branch and return special values. -// Create -35000 -// Create 35000 +// Create biased exponent for 2**N for N big +// Is N zero? // { .mfi - addl GR_Big = 35000,r0 +(p7) add GR_N_Biased = GR_Bias,GR_N_as_int nop.f 0 - add GR_N_Biased = GR_Bias,GR_N_as_int + cmp.eq.or p6,p0 = r33,r0 } -{ .mfb - addl GR_NBig = -35000,r0 -(p7) fma.s.s0 FR_Result = FR_Floating_X,f1, f0 -(p7) br.ret.spnt b0 -};; +{ .mfi + mov GR_pos_ov_limit = 0x1007f // Exponent for positive overflow + nop.f 0 + mov GR_exp_mask = 0x1ffff // Exponent mask +} +;; // -// Build the exponent Bias -// Return x when N = 0 +// Create 2**N for N big +// Return x when N = 0 or X = Nan, Inf, Zero // { .mfi - setf.exp FR_Two_N = GR_N_Biased +(p7) setf.exp FR_Two_N = GR_N_Biased nop.f 0 - addl GR_Scratch1 = 0x063BF,r0 + mov GR_min_den_exp = 0x0ff81 - 23 // Exponent of min denorm float } { .mfb - addl GR_Scratch = 0x019C3F,r0 -(p6) fma.s.s0 FR_Result = FR_Floating_X,f1, f0 -(p6) br.ret.spnt b0 -};; + and GR_exp_X = GR_exp_mask, GR_signexp_X +(p6) fma.s.s0 FR_Result = FR_Floating_X, f1, f0 +(p6) br.ret.spnt b0 +} +;; // -// Create 2*big -// Create 2**-big -// Is N > 35000 -// Is N < -35000 // Raise Denormal operand flag with compare -// Main path, create 2**N +// Compute biased result exponent // { .mfi - setf.exp FR_NBig = GR_Scratch1 - nop.f 0 - cmp.ge p6, p0 = GR_N_as_int, GR_Big -} -{ .mfi - setf.exp FR_Big = GR_Scratch + add GR_exp_Result = GR_exp_X, GR_N_as_int fcmp.ge.s0 p0,p11 = FR_Floating_X,f0 - cmp.le p8, p0 = GR_N_as_int, GR_NBig -};; + mov GR_neg_ov_limit = 0x3007f // Exponent for negative overflow +} +;; // -// Adjust 2**N if N was very small or very large +// Do final operation // { .mfi - nop.m 0 -(p6) fma.s1 FR_Two_N = FR_Big,f1,f0 - nop.i 0 + cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow + fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 + cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow } -{ .mlx - nop.m 999 - movl GR_Scratch = 0x000000000003007F -};; +{ .mfb + nop.m 0 + nop.f 0 +(p9) br.cond.spnt LDEXPF_UNDERFLOW // Branch if certain underflow +} +;; +{ .mib +(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow +(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow +(p7) br.ret.sptk b0 // Return from main path +} +;; -{ .mfi - nop.m 0 -(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0 - nop.i 0 +{ .bbb +(p6) br.cond.spnt LDEXPF_OVERFLOW // Branch if certain overflow +(p8) br.cond.spnt LDEXPF_POSSIBLE_OVERFLOW // Branch if possible overflow +(p9) br.cond.spnt LDEXPF_POSSIBLE_UNDERFLOW // Branch if possible underflow } -{ .mlx - nop.m 999 - movl GR_Scratch1= 0x000000000001007F -};; +;; -// Set up necessary status fields +// Here if possible underflow. +// Resulting exponent: 0x0ff81-23 <= exp_Result < 0x0ff81 +LDEXPF_POSSIBLE_UNDERFLOW: +// +// Here if possible overflow. +// Resulting exponent: 0x1007e = exp_Result +LDEXPF_POSSIBLE_OVERFLOW: + +// Set up necessary status fields // // S0 user supplied status // S2 user supplied status + WRE + TD (Overflows) // S3 user supplied status + FZ + TD (Underflows) // { .mfi - nop.m 999 + nop.m 0 fsetc.s3 0x7F,0x41 - nop.i 999 + nop.i 0 } { .mfi - nop.m 999 + nop.m 0 fsetc.s2 0x7F,0x42 - nop.i 999 -};; + nop.i 0 +} +;; // -// Do final operation +// Do final operation with s2 and s3 // { .mfi - setf.exp FR_NBig = GR_Scratch - fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 - nop.i 999 + setf.exp FR_NBig = GR_neg_ov_limit + fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 } { .mfi - nop.m 999 - fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; -{ .mfi - setf.exp FR_Big = GR_Scratch1 - fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; + setf.exp FR_Big = GR_pos_ov_limit + fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 +} +;; // Check for overflow or underflow. // Restore s3 // Restore s2 // { .mfi - nop.m 0 + nop.m 0 fsetc.s3 0x7F,0x40 - nop.i 999 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fsetc.s2 0x7F,0x40 - nop.i 999 -};; + nop.i 0 +} +;; // // Is the result zero? // { .mfi - nop.m 999 + nop.m 0 fclass.m p6, p0 = FR_Result3, 0x007 - nop.i 999 -} + nop.i 0 +} { .mfi - addl GR_Tag = 148, r0 + nop.m 0 fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big - nop.i 0 -};; + nop.i 0 +} +;; // // Detect masked underflow - Tiny + Inexact Only // { .mfi - nop.m 999 + nop.m 0 (p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2 - nop.i 999 -};; + nop.i 0 +} +;; // // Is result bigger the allowed range? // Branch out for underflow // { .mfb -(p6) addl GR_Tag = 149, r0 + nop.m 0 (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig -(p6) br.cond.spnt LDEXPF_UNDERFLOW -};; +(p6) br.cond.spnt LDEXPF_UNDERFLOW +} +;; // // Branch out for overflow // -{ .mbb - nop.m 0 -(p7) br.cond.spnt LDEXPF_OVERFLOW -(p9) br.cond.spnt LDEXPF_OVERFLOW -};; +{ .bbb +(p7) br.cond.spnt LDEXPF_OVERFLOW +(p9) br.cond.spnt LDEXPF_OVERFLOW + br.ret.sptk b0 // Return from main path. +} +;; -// -// Return from main path. -// -{ .mfb - nop.m 999 - nop.f 0 - br.ret.sptk b0;; +// Here if result overflows +LDEXPF_OVERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 148, r0 // Set error tag for overflow + br.cond.sptk __libm_error_region // Call error support for overflow } +;; -GLOBAL_LIBM_END(__libm_ldexpf) -__libm_error_region: +// Here if result underflows +LDEXPF_UNDERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 149, r0 // Set error tag for underflow + br.cond.sptk __libm_error_region // Call error support for underflow +} +;; + +// Here if x=unorm +LDEXPF_UNORM: +{ .mib + getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x + nop.i 0 + br.cond.sptk LDEXPF_COMMON // Return to main path +} +;; -LDEXPF_OVERFLOW: -LDEXPF_UNDERFLOW: + +GLOBAL_LIBM_END(__libm_ldexpf) +LOCAL_LIBM_ENTRY(__libm_error_region) // // Get stack address of N // .prologue { .mfi - add GR_Parameter_Y=-32,sp + add GR_Parameter_Y=-32,sp nop.f 0 .save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs + mov GR_SAVE_PFS=ar.pfs } // -// Adjust sp +// Adjust sp // { .mfi .fframe 64 - add sp=-64,sp + add sp=-64,sp nop.f 0 - mov GR_SAVE_GP=gp + mov GR_SAVE_GP=gp };; // -// Store N on stack in correct position +// Store N on stack in correct position // Locate the address of x on stack // { .mmi - st8 [GR_Parameter_Y] = GR_N_as_int,16 - add GR_Parameter_X = 16,sp + st8 [GR_Parameter_Y] = GR_N_as_int,16 + add GR_Parameter_X = 16,sp .save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 + mov GR_SAVE_B0=b0 };; // @@ -336,42 +408,42 @@ LDEXPF_UNDERFLOW: // .body { .mib - stfs [GR_Parameter_X] = FR_Norm_X - add GR_Parameter_RESULT = 0,GR_Parameter_Y + stfs [GR_Parameter_X] = FR_Norm_X + add GR_Parameter_RESULT = 0,GR_Parameter_Y nop.b 0 } { .mib - stfs [GR_Parameter_Y] = FR_Result + stfs [GR_Parameter_Y] = FR_Result add GR_Parameter_Y = -16,GR_Parameter_Y - br.call.sptk b0=__libm_error_support# + br.call.sptk b0=__libm_error_support# };; // // Get location of result on stack // { .mmi + add GR_Parameter_RESULT = 48,sp nop.m 0 - nop.m 0 - add GR_Parameter_RESULT = 48,sp + nop.i 0 };; // -// Get the new result +// Get the new result // { .mmi - ldfs FR_Result = [GR_Parameter_RESULT] + ldfs FR_Result = [GR_Parameter_RESULT] .restore sp - add sp = 64,sp - mov b0 = GR_SAVE_B0 + add sp = 64,sp + mov b0 = GR_SAVE_B0 };; // // Restore gp, ar.pfs and return // { .mib - mov gp = GR_SAVE_GP - mov ar.pfs = GR_SAVE_PFS - br.ret.sptk b0 + mov gp = GR_SAVE_GP + mov ar.pfs = GR_SAVE_PFS + br.ret.sptk b0 };; LOCAL_LIBM_END(__libm_error_region) diff --git a/sysdeps/ia64/fpu/s_libm_ldexpl.S b/sysdeps/ia64/fpu/s_libm_ldexpl.S index 72d4560..fffda9e 100644 --- a/sysdeps/ia64/fpu/s_libm_ldexpl.S +++ b/sysdeps/ia64/fpu/s_libm_ldexpl.S @@ -21,33 +21,34 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== // 02/02/00 Initial version -// 01/26/01 ldexpl completely reworked and now standalone version +// 01/26/01 ldexpl completely reworked and now standalone version // 01/04/02 Added handling for int 32 or 64 bits // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 08/04/03 Improved performance // // API //============================================================== -// long double __libm_ldexpl (long double x, int n, int int_type) +// long double __libm_ldexpl (long double x, int n, int int_type) // input floating point f8 and int n (r34), int int_type (r35) // output floating point f8 // @@ -55,27 +56,41 @@ // int_type = 1 if int is 64 bits // // Returns x* 2**n using an fma and detects overflow -// and underflow. +// and underflow. // // +// Strategy: +// Compute biased exponent of result exp_Result = N + exp_X +// Break into ranges: +// exp_Result > 0x13ffe -> Certain overflow +// exp_Result = 0x13ffe -> Possible overflow +// 0x0c001 <= exp_Result < 0x13ffe -> No over/underflow (main path) +// 0x0c001 - 63 <= exp_Result < 0x0c001 -> Possible underflow +// exp_Result < 0x0c001 - 63 -> Certain underflow FR_Big = f6 FR_NBig = f7 FR_Floating_X = f8 FR_Result = f8 FR_Result2 = f9 -FR_Result3 = f11 -FR_Norm_X = f12 -FR_Two_N = f14 -FR_Two_to_Big = f15 +FR_Result3 = f10 +FR_Norm_X = f11 +FR_Two_N = f12 +GR_neg_ov_limit= r14 GR_N_Biased = r15 GR_Big = r16 GR_NBig = r17 -GR_Scratch = r18 -GR_Scratch1 = r19 +GR_exp_Result = r18 +GR_pos_ov_limit= r19 GR_Bias = r20 GR_N_as_int = r21 +GR_signexp_X = r22 +GR_exp_X = r23 +GR_exp_mask = r24 +GR_max_exp = r25 +GR_min_exp = r26 +GR_min_den_exp = r27 GR_SAVE_B0 = r32 GR_SAVE_GP = r33 @@ -93,242 +108,298 @@ GLOBAL_LIBM_ENTRY(__libm_ldexpl) // Build the exponent Bias // { .mfi - alloc r32=ar.pfs,3,0,4,0 - fclass.m p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero - addl GR_Bias = 0x0FFFF,r0 + getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x + fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero + mov GR_Bias = 0x0ffff } - // -// Is N zero? // Normalize x // Is integer type 32 bits? // { .mfi - cmp.eq p6,p0 = r34,r0 - fnorm.s1 FR_Norm_X = FR_Floating_X - cmp.eq p8,p9 = r35,r0 + mov GR_Big = 35000 // If N this big then certain overflow + fnorm.s1 FR_Norm_X = FR_Floating_X + cmp.eq p8,p9 = r35,r0 } ;; // Sign extend N if int is 32 bits { .mfi (p9) mov GR_N_as_int = r34 // Copy N if int is 64 bits - nop.f 0 + fclass.m p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm (p8) sxt4 GR_N_as_int = r34 // Sign extend N if int is 32 bits } +{ .mfi + mov GR_NBig = -35000 // If N this small then certain underflow + nop.f 0 + mov GR_max_exp = 0x13ffe // Exponent of maximum long double +} +;; + +// Create biased exponent for 2**N +{ .mfi + add GR_N_Biased = GR_Bias,GR_N_as_int + nop.f 0 + cmp.ge p7, p0 = GR_N_as_int, GR_Big // Certain overflow? +} +{ .mib + cmp.le p8, p0 = GR_N_as_int, GR_NBig // Certain underflow? + mov GR_min_exp = 0x0c001 // Exponent of minimum long double +(p9) br.cond.spnt LDEXPL_UNORM // Branch if x=unorm +} +;; + +LDEXPL_COMMON: +// Main path continues. Also return here from x=unorm path. +// Create 2**N +.pred.rel "mutex",p7,p8 +{ .mfi + setf.exp FR_Two_N = GR_N_Biased + nop.f 0 +(p7) mov GR_N_as_int = GR_Big // Limit max N +} +{ .mfi +(p8) mov GR_N_as_int = GR_NBig // Limit min N + nop.f 0 +(p8) cmp.eq p7,p0 = r0,r0 // Set p7 if |N| big +} ;; // -// Branch and return special values. -// Create -35000 -// Create 35000 +// Create biased exponent for 2**N for N big +// Is N zero? // { .mfi - addl GR_Big = 35000,r0 +(p7) add GR_N_Biased = GR_Bias,GR_N_as_int nop.f 0 - add GR_N_Biased = GR_Bias,GR_N_as_int + cmp.eq.or p6,p0 = r34,r0 } -{ .mfb - addl GR_NBig = -35000,r0 -(p7) fma.s0 FR_Result = FR_Floating_X,f1, f0 -(p7) br.ret.spnt b0 -};; +{ .mfi + mov GR_pos_ov_limit = 0x13fff // Exponent for positive overflow + nop.f 0 + mov GR_exp_mask = 0x1ffff // Exponent mask +} +;; // -// Build the exponent Bias -// Return x when N = 0 +// Create 2**N for N big +// Return x when N = 0 or X = Nan, Inf, Zero // { .mfi - setf.exp FR_Two_N = GR_N_Biased +(p7) setf.exp FR_Two_N = GR_N_Biased nop.f 0 - addl GR_Scratch1 = 0x063BF,r0 + mov GR_min_den_exp = 0x0c001 - 63 // Exp of min denorm long dble } { .mfb - addl GR_Scratch = 0x019C3F,r0 -(p6) fma.s0 FR_Result = FR_Floating_X,f1, f0 -(p6) br.ret.spnt b0 -};; + and GR_exp_X = GR_exp_mask, GR_signexp_X +(p6) fma.s0 FR_Result = FR_Floating_X, f1, f0 +(p6) br.ret.spnt b0 +} +;; // -// Create 2*big -// Create 2**-big -// Is N > 35000 -// Is N < -35000 // Raise Denormal operand flag with compare -// Main path, create 2**N +// Compute biased result exponent // { .mfi - setf.exp FR_NBig = GR_Scratch1 - nop.f 0 - cmp.ge p6, p0 = GR_N_as_int, GR_Big -} -{ .mfi - setf.exp FR_Big = GR_Scratch + add GR_exp_Result = GR_exp_X, GR_N_as_int fcmp.ge.s0 p0,p11 = FR_Floating_X,f0 - cmp.le p8, p0 = GR_N_as_int, GR_NBig -};; + mov GR_neg_ov_limit = 0x33fff // Exponent for negative overflow +} +;; // -// Adjust 2**N if N was very small or very large +// Do final operation // { .mfi - nop.m 0 -(p6) fma.s1 FR_Two_N = FR_Big,f1,f0 - nop.i 0 + cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow + fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 + cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow } -{ .mlx - nop.m 999 - movl GR_Scratch = 0x0000000000033FFF -};; +{ .mfb + nop.m 0 + nop.f 0 +(p9) br.cond.spnt LDEXPL_UNDERFLOW // Branch if certain underflow +} +;; +{ .mib +(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow +(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow +(p7) br.ret.sptk b0 // Return from main path +} +;; -{ .mfi - nop.m 0 -(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0 - nop.i 0 +{ .bbb +(p6) br.cond.spnt LDEXPL_OVERFLOW // Branch if certain overflow +(p8) br.cond.spnt LDEXPL_POSSIBLE_OVERFLOW // Branch if possible overflow +(p9) br.cond.spnt LDEXPL_POSSIBLE_UNDERFLOW // Branch if possible underflow } -{ .mlx - nop.m 999 - movl GR_Scratch1= 0x0000000000013FFF -};; +;; -// Set up necessary status fields +// Here if possible underflow. +// Resulting exponent: 0x0c001-63 <= exp_Result < 0x0c001 +LDEXPL_POSSIBLE_UNDERFLOW: +// +// Here if possible overflow. +// Resulting exponent: 0x13ffe = exp_Result +LDEXPL_POSSIBLE_OVERFLOW: + +// Set up necessary status fields // // S0 user supplied status // S2 user supplied status + WRE + TD (Overflows) // S3 user supplied status + FZ + TD (Underflows) // { .mfi - nop.m 999 + nop.m 0 fsetc.s3 0x7F,0x41 - nop.i 999 + nop.i 0 } { .mfi - nop.m 999 + nop.m 0 fsetc.s2 0x7F,0x42 - nop.i 999 -};; + nop.i 0 +} +;; // -// Do final operation +// Do final operation with s2 and s3 // { .mfi - setf.exp FR_NBig = GR_Scratch - fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 - nop.i 999 + setf.exp FR_NBig = GR_neg_ov_limit + fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 } { .mfi - nop.m 999 - fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; -{ .mfi - setf.exp FR_Big = GR_Scratch1 - fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; + setf.exp FR_Big = GR_pos_ov_limit + fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 +} +;; // Check for overflow or underflow. // Restore s3 // Restore s2 // { .mfi - nop.m 0 + nop.m 0 fsetc.s3 0x7F,0x40 - nop.i 999 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fsetc.s2 0x7F,0x40 - nop.i 999 -};; + nop.i 0 +} +;; // // Is the result zero? // { .mfi - nop.m 999 + nop.m 0 fclass.m p6, p0 = FR_Result3, 0x007 - nop.i 999 -} + nop.i 0 +} { .mfi - addl GR_Tag = 144, r0 + nop.m 0 fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big - nop.i 0 -};; + nop.i 0 +} +;; // // Detect masked underflow - Tiny + Inexact Only // { .mfi - nop.m 999 + nop.m 0 (p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2 - nop.i 999 -};; + nop.i 0 +} +;; // // Is result bigger the allowed range? // Branch out for underflow // { .mfb -(p6) addl GR_Tag = 145, r0 + nop.m 0 (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig -(p6) br.cond.spnt LDEXPL_UNDERFLOW -};; +(p6) br.cond.spnt LDEXPL_UNDERFLOW +} +;; // // Branch out for overflow // -{ .mbb - nop.m 0 -(p7) br.cond.spnt LDEXPL_OVERFLOW -(p9) br.cond.spnt LDEXPL_OVERFLOW -};; +{ .bbb +(p7) br.cond.spnt LDEXPL_OVERFLOW +(p9) br.cond.spnt LDEXPL_OVERFLOW + br.ret.sptk b0 // Return from main path. +} +;; -// -// Return from main path. -// -{ .mfb - nop.m 999 - nop.f 0 - br.ret.sptk b0;; +// Here if result overflows +LDEXPL_OVERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 144, r0 // Set error tag for overflow + br.cond.sptk __libm_error_region // Call error support for overflow } +;; -GLOBAL_LIBM_END(__libm_ldexpl) -__libm_error_region: +// Here if result underflows +LDEXPL_UNDERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 145, r0 // Set error tag for underflow + br.cond.sptk __libm_error_region // Call error support for underflow +} +;; + +// Here if x=unorm +LDEXPL_UNORM: +{ .mib + getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x + nop.i 0 + br.cond.sptk LDEXPL_COMMON // Return to main path +} +;; -LDEXPL_OVERFLOW: -LDEXPL_UNDERFLOW: + +GLOBAL_LIBM_END(__libm_ldexpl) +LOCAL_LIBM_ENTRY(__libm_error_region) // // Get stack address of N // .prologue { .mfi - add GR_Parameter_Y=-32,sp + add GR_Parameter_Y=-32,sp nop.f 0 .save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs + mov GR_SAVE_PFS=ar.pfs } // -// Adjust sp +// Adjust sp // { .mfi .fframe 64 - add sp=-64,sp + add sp=-64,sp nop.f 0 - mov GR_SAVE_GP=gp + mov GR_SAVE_GP=gp };; // -// Store N on stack in correct position +// Store N on stack in correct position // Locate the address of x on stack // { .mmi - st8 [GR_Parameter_Y] = GR_N_as_int,16 - add GR_Parameter_X = 16,sp + st8 [GR_Parameter_Y] = GR_N_as_int,16 + add GR_Parameter_X = 16,sp .save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 + mov GR_SAVE_B0=b0 };; // @@ -337,42 +408,42 @@ LDEXPL_UNDERFLOW: // .body { .mib - stfe [GR_Parameter_X] = FR_Norm_X - add GR_Parameter_RESULT = 0,GR_Parameter_Y + stfe [GR_Parameter_X] = FR_Norm_X + add GR_Parameter_RESULT = 0,GR_Parameter_Y nop.b 0 } { .mib - stfe [GR_Parameter_Y] = FR_Result + stfe [GR_Parameter_Y] = FR_Result add GR_Parameter_Y = -16,GR_Parameter_Y - br.call.sptk b0=__libm_error_support# + br.call.sptk b0=__libm_error_support# };; // // Get location of result on stack // { .mmi + add GR_Parameter_RESULT = 48,sp nop.m 0 - nop.m 0 - add GR_Parameter_RESULT = 48,sp + nop.i 0 };; // -// Get the new result +// Get the new result // { .mmi - ldfe FR_Result = [GR_Parameter_RESULT] + ldfe FR_Result = [GR_Parameter_RESULT] .restore sp - add sp = 64,sp - mov b0 = GR_SAVE_B0 + add sp = 64,sp + mov b0 = GR_SAVE_B0 };; // // Restore gp, ar.pfs and return // { .mib - mov gp = GR_SAVE_GP - mov ar.pfs = GR_SAVE_PFS - br.ret.sptk b0 + mov gp = GR_SAVE_GP + mov ar.pfs = GR_SAVE_PFS + br.ret.sptk b0 };; LOCAL_LIBM_END(__libm_error_region) diff --git a/sysdeps/ia64/fpu/s_libm_scalbn.S b/sysdeps/ia64/fpu/s_libm_scalbn.S index fb7ab93..eaccb7d 100644 --- a/sysdeps/ia64/fpu/s_libm_scalbn.S +++ b/sysdeps/ia64/fpu/s_libm_scalbn.S @@ -21,33 +21,34 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== // 02/02/00 Initial version -// 01/26/01 Scalbn completely reworked and now standalone version +// 01/26/01 Scalbn completely reworked and now standalone version // 01/04/02 Added handling for int 32 or 64 bits // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 08/04/03 Improved performance // // API //============================================================== -// double __libm_scalbn (double x, int n, int int_type) +// double __libm_scalbn (double x, int n, int int_type) // input floating point f8 and int n (r33), int int_type (r34) // output floating point f8 // @@ -55,27 +56,41 @@ // int_type = 1 if int is 64 bits // // Returns x* 2**n using an fma and detects overflow -// and underflow. +// and underflow. // // +// Strategy: +// Compute biased exponent of result exp_Result = N + exp_X +// Break into ranges: +// exp_Result > 0x103fe -> Certain overflow +// exp_Result = 0x103fe -> Possible overflow +// 0x0fc01 <= exp_Result < 0x103fe -> No over/underflow (main path) +// 0x0fc01 - 52 <= exp_Result < 0x0fc01 -> Possible underflow +// exp_Result < 0x0fc01 - 52 -> Certain underflow FR_Big = f6 FR_NBig = f7 FR_Floating_X = f8 FR_Result = f8 FR_Result2 = f9 -FR_Result3 = f11 -FR_Norm_X = f12 -FR_Two_N = f14 -FR_Two_to_Big = f15 +FR_Result3 = f10 +FR_Norm_X = f11 +FR_Two_N = f12 +GR_neg_ov_limit= r14 GR_N_Biased = r15 GR_Big = r16 GR_NBig = r17 -GR_Scratch = r18 -GR_Scratch1 = r19 +GR_exp_Result = r18 +GR_pos_ov_limit= r19 GR_Bias = r20 GR_N_as_int = r21 +GR_signexp_X = r22 +GR_exp_X = r23 +GR_exp_mask = r24 +GR_max_exp = r25 +GR_min_exp = r26 +GR_min_den_exp = r27 GR_SAVE_B0 = r32 GR_SAVE_GP = r33 @@ -93,242 +108,298 @@ GLOBAL_LIBM_ENTRY(__libm_scalbn) // Build the exponent Bias // { .mfi - alloc r32=ar.pfs,3,0,4,0 - fclass.m p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero - addl GR_Bias = 0x0FFFF,r0 + getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x + fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero + mov GR_Bias = 0x0ffff } - // -// Is N zero? // Normalize x // Is integer type 32 bits? // { .mfi - cmp.eq p6,p0 = r33,r0 - fnorm.s1 FR_Norm_X = FR_Floating_X - cmp.eq p8,p9 = r34,r0 + mov GR_Big = 35000 // If N this big then certain overflow + fnorm.s1 FR_Norm_X = FR_Floating_X + cmp.eq p8,p9 = r34,r0 } ;; // Sign extend N if int is 32 bits { .mfi (p9) mov GR_N_as_int = r33 // Copy N if int is 64 bits - nop.f 0 + fclass.m p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm (p8) sxt4 GR_N_as_int = r33 // Sign extend N if int is 32 bits } +{ .mfi + mov GR_NBig = -35000 // If N this small then certain underflow + nop.f 0 + mov GR_max_exp = 0x103fe // Exponent of maximum double +} +;; + +// Create biased exponent for 2**N +{ .mfi + add GR_N_Biased = GR_Bias,GR_N_as_int + nop.f 0 + cmp.ge p7, p0 = GR_N_as_int, GR_Big // Certain overflow? +} +{ .mib + cmp.le p8, p0 = GR_N_as_int, GR_NBig // Certain underflow? + mov GR_min_exp = 0x0fc01 // Exponent of minimum double +(p9) br.cond.spnt SCALBN_UNORM // Branch if x=unorm +} +;; + +SCALBN_COMMON: +// Main path continues. Also return here from x=unorm path. +// Create 2**N +.pred.rel "mutex",p7,p8 +{ .mfi + setf.exp FR_Two_N = GR_N_Biased + nop.f 0 +(p7) mov GR_N_as_int = GR_Big // Limit max N +} +{ .mfi +(p8) mov GR_N_as_int = GR_NBig // Limit min N + nop.f 0 +(p8) cmp.eq p7,p0 = r0,r0 // Set p7 if |N| big +} ;; // -// Branch and return special values. -// Create -35000 -// Create 35000 +// Create biased exponent for 2**N for N big +// Is N zero? // { .mfi - addl GR_Big = 35000,r0 +(p7) add GR_N_Biased = GR_Bias,GR_N_as_int nop.f 0 - add GR_N_Biased = GR_Bias,GR_N_as_int + cmp.eq.or p6,p0 = r33,r0 } -{ .mfb - addl GR_NBig = -35000,r0 -(p7) fma.d.s0 FR_Result = FR_Floating_X,f1, f0 -(p7) br.ret.spnt b0 -};; +{ .mfi + mov GR_pos_ov_limit = 0x103ff // Exponent for positive overflow + nop.f 0 + mov GR_exp_mask = 0x1ffff // Exponent mask +} +;; // -// Build the exponent Bias -// Return x when N = 0 +// Create 2**N for N big +// Return x when N = 0 or X = Nan, Inf, Zero // { .mfi - setf.exp FR_Two_N = GR_N_Biased +(p7) setf.exp FR_Two_N = GR_N_Biased nop.f 0 - addl GR_Scratch1 = 0x063BF,r0 + mov GR_min_den_exp = 0x0fc01 - 52 // Exponent of min denorm dble } { .mfb - addl GR_Scratch = 0x019C3F,r0 -(p6) fma.d.s0 FR_Result = FR_Floating_X,f1, f0 -(p6) br.ret.spnt b0 -};; + and GR_exp_X = GR_exp_mask, GR_signexp_X +(p6) fma.d.s0 FR_Result = FR_Floating_X, f1, f0 +(p6) br.ret.spnt b0 +} +;; // -// Create 2*big -// Create 2**-big -// Is N > 35000 -// Is N < -35000 // Raise Denormal operand flag with compare -// Main path, create 2**N +// Compute biased result exponent // { .mfi - setf.exp FR_NBig = GR_Scratch1 - nop.f 0 - cmp.ge p6, p0 = GR_N_as_int, GR_Big -} -{ .mfi - setf.exp FR_Big = GR_Scratch + add GR_exp_Result = GR_exp_X, GR_N_as_int fcmp.ge.s0 p0,p11 = FR_Floating_X,f0 - cmp.le p8, p0 = GR_N_as_int, GR_NBig -};; + mov GR_neg_ov_limit = 0x303ff // Exponent for negative overflow +} +;; // -// Adjust 2**N if N was very small or very large +// Do final operation // { .mfi - nop.m 0 -(p6) fma.s1 FR_Two_N = FR_Big,f1,f0 - nop.i 0 + cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow + fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 + cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow } -{ .mlx - nop.m 999 - movl GR_Scratch = 0x00000000000303FF -};; +{ .mfb + nop.m 0 + nop.f 0 +(p9) br.cond.spnt SCALBN_UNDERFLOW // Branch if certain underflow +} +;; +{ .mib +(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow +(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow +(p7) br.ret.sptk b0 // Return from main path +} +;; -{ .mfi - nop.m 0 -(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0 - nop.i 0 +{ .bbb +(p6) br.cond.spnt SCALBN_OVERFLOW // Branch if certain overflow +(p8) br.cond.spnt SCALBN_POSSIBLE_OVERFLOW // Branch if possible overflow +(p9) br.cond.spnt SCALBN_POSSIBLE_UNDERFLOW // Branch if possible underflow } -{ .mlx - nop.m 999 - movl GR_Scratch1= 0x00000000000103FF -};; +;; -// Set up necessary status fields +// Here if possible underflow. +// Resulting exponent: 0x0fc01-52 <= exp_Result < 0x0fc01 +SCALBN_POSSIBLE_UNDERFLOW: +// +// Here if possible overflow. +// Resulting exponent: 0x103fe = exp_Result +SCALBN_POSSIBLE_OVERFLOW: + +// Set up necessary status fields // // S0 user supplied status // S2 user supplied status + WRE + TD (Overflows) // S3 user supplied status + FZ + TD (Underflows) // { .mfi - nop.m 999 + nop.m 0 fsetc.s3 0x7F,0x41 - nop.i 999 + nop.i 0 } { .mfi - nop.m 999 + nop.m 0 fsetc.s2 0x7F,0x42 - nop.i 999 -};; + nop.i 0 +} +;; // -// Do final operation +// Do final operation with s2 and s3 // { .mfi - setf.exp FR_NBig = GR_Scratch - fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 - nop.i 999 + setf.exp FR_NBig = GR_neg_ov_limit + fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 } { .mfi - nop.m 999 - fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; -{ .mfi - setf.exp FR_Big = GR_Scratch1 - fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; + setf.exp FR_Big = GR_pos_ov_limit + fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 +} +;; // Check for overflow or underflow. // Restore s3 // Restore s2 // { .mfi - nop.m 0 + nop.m 0 fsetc.s3 0x7F,0x40 - nop.i 999 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fsetc.s2 0x7F,0x40 - nop.i 999 -};; + nop.i 0 +} +;; // // Is the result zero? // { .mfi - nop.m 999 + nop.m 0 fclass.m p6, p0 = FR_Result3, 0x007 - nop.i 999 -} + nop.i 0 +} { .mfi - addl GR_Tag = 176, r0 + nop.m 0 fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big - nop.i 0 -};; + nop.i 0 +} +;; // // Detect masked underflow - Tiny + Inexact Only // { .mfi - nop.m 999 + nop.m 0 (p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2 - nop.i 999 -};; + nop.i 0 +} +;; // // Is result bigger the allowed range? // Branch out for underflow // { .mfb -(p6) addl GR_Tag = 177, r0 + nop.m 0 (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig -(p6) br.cond.spnt SCALBN_UNDERFLOW -};; +(p6) br.cond.spnt SCALBN_UNDERFLOW +} +;; // // Branch out for overflow // -{ .mbb - nop.m 0 -(p7) br.cond.spnt SCALBN_OVERFLOW -(p9) br.cond.spnt SCALBN_OVERFLOW -};; +{ .bbb +(p7) br.cond.spnt SCALBN_OVERFLOW +(p9) br.cond.spnt SCALBN_OVERFLOW + br.ret.sptk b0 // Return from main path. +} +;; -// -// Return from main path. -// -{ .mfb - nop.m 999 - nop.f 0 - br.ret.sptk b0;; +// Here if result overflows +SCALBN_OVERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 176, r0 // Set error tag for overflow + br.cond.sptk __libm_error_region // Call error support for overflow } +;; -GLOBAL_LIBM_END(__libm_scalbn) -__libm_error_region: +// Here if result underflows +SCALBN_UNDERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 177, r0 // Set error tag for underflow + br.cond.sptk __libm_error_region // Call error support for underflow +} +;; + +// Here if x=unorm +SCALBN_UNORM: +{ .mib + getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x + nop.i 0 + br.cond.sptk SCALBN_COMMON // Return to main path +} +;; -SCALBN_OVERFLOW: -SCALBN_UNDERFLOW: + +GLOBAL_LIBM_END(__libm_scalbn) +LOCAL_LIBM_ENTRY(__libm_error_region) // // Get stack address of N // .prologue { .mfi - add GR_Parameter_Y=-32,sp + add GR_Parameter_Y=-32,sp nop.f 0 .save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs + mov GR_SAVE_PFS=ar.pfs } // -// Adjust sp +// Adjust sp // { .mfi .fframe 64 - add sp=-64,sp + add sp=-64,sp nop.f 0 - mov GR_SAVE_GP=gp + mov GR_SAVE_GP=gp };; // -// Store N on stack in correct position +// Store N on stack in correct position // Locate the address of x on stack // { .mmi - st8 [GR_Parameter_Y] = GR_N_as_int,16 - add GR_Parameter_X = 16,sp + st8 [GR_Parameter_Y] = GR_N_as_int,16 + add GR_Parameter_X = 16,sp .save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 + mov GR_SAVE_B0=b0 };; // @@ -337,42 +408,42 @@ SCALBN_UNDERFLOW: // .body { .mib - stfd [GR_Parameter_X] = FR_Norm_X - add GR_Parameter_RESULT = 0,GR_Parameter_Y + stfd [GR_Parameter_X] = FR_Norm_X + add GR_Parameter_RESULT = 0,GR_Parameter_Y nop.b 0 } { .mib - stfd [GR_Parameter_Y] = FR_Result + stfd [GR_Parameter_Y] = FR_Result add GR_Parameter_Y = -16,GR_Parameter_Y - br.call.sptk b0=__libm_error_support# + br.call.sptk b0=__libm_error_support# };; // // Get location of result on stack // { .mmi + add GR_Parameter_RESULT = 48,sp nop.m 0 - nop.m 0 - add GR_Parameter_RESULT = 48,sp + nop.i 0 };; // -// Get the new result +// Get the new result // { .mmi - ldfd FR_Result = [GR_Parameter_RESULT] + ldfd FR_Result = [GR_Parameter_RESULT] .restore sp - add sp = 64,sp - mov b0 = GR_SAVE_B0 + add sp = 64,sp + mov b0 = GR_SAVE_B0 };; // // Restore gp, ar.pfs and return // { .mib - mov gp = GR_SAVE_GP - mov ar.pfs = GR_SAVE_PFS - br.ret.sptk b0 + mov gp = GR_SAVE_GP + mov ar.pfs = GR_SAVE_PFS + br.ret.sptk b0 };; LOCAL_LIBM_END(__libm_error_region) diff --git a/sysdeps/ia64/fpu/s_libm_scalbnf.S b/sysdeps/ia64/fpu/s_libm_scalbnf.S index 57ab2cc..e00997a 100644 --- a/sysdeps/ia64/fpu/s_libm_scalbnf.S +++ b/sysdeps/ia64/fpu/s_libm_scalbnf.S @@ -21,33 +21,34 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== // 02/02/00 Initial version -// 01/26/01 scalbnf completely reworked and now standalone version +// 01/26/01 Scalbnf completely reworked and now standalone version // 01/04/02 Added handling for int 32 or 64 bits // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 08/04/03 Improved performance // // API //============================================================== -// float __libm_scalbnf (float x, int n, int int_type) +// float __libm_scalbnf (float x, int n, int int_type) // input floating point f8 and int n (r33), int int_type (r34) // output floating point f8 // @@ -55,27 +56,41 @@ // int_type = 1 if int is 64 bits // // Returns x* 2**n using an fma and detects overflow -// and underflow. +// and underflow. // // +// Strategy: +// Compute biased exponent of result exp_Result = N + exp_X +// Break into ranges: +// exp_Result > 0x1007e -> Certain overflow +// exp_Result = 0x1007e -> Possible overflow +// 0x0ff81 <= exp_Result < 0x1007e -> No over/underflow (main path) +// 0x0ff81 - 23 <= exp_Result < 0x0ff81 -> Possible underflow +// exp_Result < 0x0ff81 - 23 -> Certain underflow FR_Big = f6 FR_NBig = f7 FR_Floating_X = f8 FR_Result = f8 FR_Result2 = f9 -FR_Result3 = f11 -FR_Norm_X = f12 -FR_Two_N = f14 -FR_Two_to_Big = f15 +FR_Result3 = f10 +FR_Norm_X = f11 +FR_Two_N = f12 +GR_neg_ov_limit= r14 GR_N_Biased = r15 GR_Big = r16 GR_NBig = r17 -GR_Scratch = r18 -GR_Scratch1 = r19 +GR_exp_Result = r18 +GR_pos_ov_limit= r19 GR_Bias = r20 GR_N_as_int = r21 +GR_signexp_X = r22 +GR_exp_X = r23 +GR_exp_mask = r24 +GR_max_exp = r25 +GR_min_exp = r26 +GR_min_den_exp = r27 GR_SAVE_B0 = r32 GR_SAVE_GP = r33 @@ -93,242 +108,298 @@ GLOBAL_LIBM_ENTRY(__libm_scalbnf) // Build the exponent Bias // { .mfi - alloc r32=ar.pfs,3,0,4,0 - fclass.m p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero - addl GR_Bias = 0x0FFFF,r0 + getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x + fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero + mov GR_Bias = 0x0ffff } - // -// Is N zero? // Normalize x // Is integer type 32 bits? // { .mfi - cmp.eq p6,p0 = r33,r0 - fnorm.s1 FR_Norm_X = FR_Floating_X - cmp.eq p8,p9 = r34,r0 + mov GR_Big = 35000 // If N this big then certain overflow + fnorm.s1 FR_Norm_X = FR_Floating_X + cmp.eq p8,p9 = r34,r0 } ;; // Sign extend N if int is 32 bits { .mfi (p9) mov GR_N_as_int = r33 // Copy N if int is 64 bits - nop.f 0 + fclass.m p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm (p8) sxt4 GR_N_as_int = r33 // Sign extend N if int is 32 bits } +{ .mfi + mov GR_NBig = -35000 // If N this small then certain underflow + nop.f 0 + mov GR_max_exp = 0x1007e // Exponent of maximum float +} +;; + +// Create biased exponent for 2**N +{ .mfi + add GR_N_Biased = GR_Bias,GR_N_as_int + nop.f 0 + cmp.ge p7, p0 = GR_N_as_int, GR_Big // Certain overflow? +} +{ .mib + cmp.le p8, p0 = GR_N_as_int, GR_NBig // Certain underflow? + mov GR_min_exp = 0x0ff81 // Exponent of minimum float +(p9) br.cond.spnt SCALBNF_UNORM // Branch if x=unorm +} +;; + +SCALBNF_COMMON: +// Main path continues. Also return here from x=unorm path. +// Create 2**N +.pred.rel "mutex",p7,p8 +{ .mfi + setf.exp FR_Two_N = GR_N_Biased + nop.f 0 +(p7) mov GR_N_as_int = GR_Big // Limit max N +} +{ .mfi +(p8) mov GR_N_as_int = GR_NBig // Limit min N + nop.f 0 +(p8) cmp.eq p7,p0 = r0,r0 // Set p7 if |N| big +} ;; // -// Branch and return special values. -// Create -35000 -// Create 35000 +// Create biased exponent for 2**N for N big +// Is N zero? // { .mfi - addl GR_Big = 35000,r0 +(p7) add GR_N_Biased = GR_Bias,GR_N_as_int nop.f 0 - add GR_N_Biased = GR_Bias,GR_N_as_int + cmp.eq.or p6,p0 = r33,r0 } -{ .mfb - addl GR_NBig = -35000,r0 -(p7) fma.s.s0 FR_Result = FR_Floating_X,f1, f0 -(p7) br.ret.spnt b0 -};; +{ .mfi + mov GR_pos_ov_limit = 0x1007f // Exponent for positive overflow + nop.f 0 + mov GR_exp_mask = 0x1ffff // Exponent mask +} +;; // -// Build the exponent Bias -// Return x when N = 0 +// Create 2**N for N big +// Return x when N = 0 or X = Nan, Inf, Zero // { .mfi - setf.exp FR_Two_N = GR_N_Biased +(p7) setf.exp FR_Two_N = GR_N_Biased nop.f 0 - addl GR_Scratch1 = 0x063BF,r0 + mov GR_min_den_exp = 0x0ff81 - 23 // Exponent of min denorm float } { .mfb - addl GR_Scratch = 0x019C3F,r0 -(p6) fma.s.s0 FR_Result = FR_Floating_X,f1, f0 -(p6) br.ret.spnt b0 -};; + and GR_exp_X = GR_exp_mask, GR_signexp_X +(p6) fma.s.s0 FR_Result = FR_Floating_X, f1, f0 +(p6) br.ret.spnt b0 +} +;; // -// Create 2*big -// Create 2**-big -// Is N > 35000 -// Is N < -35000 // Raise Denormal operand flag with compare -// Main path, create 2**N +// Compute biased result exponent // { .mfi - setf.exp FR_NBig = GR_Scratch1 - nop.f 0 - cmp.ge p6, p0 = GR_N_as_int, GR_Big -} -{ .mfi - setf.exp FR_Big = GR_Scratch + add GR_exp_Result = GR_exp_X, GR_N_as_int fcmp.ge.s0 p0,p11 = FR_Floating_X,f0 - cmp.le p8, p0 = GR_N_as_int, GR_NBig -};; + mov GR_neg_ov_limit = 0x3007f // Exponent for negative overflow +} +;; // -// Adjust 2**N if N was very small or very large +// Do final operation // { .mfi - nop.m 0 -(p6) fma.s1 FR_Two_N = FR_Big,f1,f0 - nop.i 0 + cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow + fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 + cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow } -{ .mlx - nop.m 999 - movl GR_Scratch = 0x000000000003007F -};; +{ .mfb + nop.m 0 + nop.f 0 +(p9) br.cond.spnt SCALBNF_UNDERFLOW // Branch if certain underflow +} +;; +{ .mib +(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow +(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow +(p7) br.ret.sptk b0 // Return from main path +} +;; -{ .mfi - nop.m 0 -(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0 - nop.i 0 +{ .bbb +(p6) br.cond.spnt SCALBNF_OVERFLOW // Branch if certain overflow +(p8) br.cond.spnt SCALBNF_POSSIBLE_OVERFLOW // Branch if possible overflow +(p9) br.cond.spnt SCALBNF_POSSIBLE_UNDERFLOW // Branch if possible underflow } -{ .mlx - nop.m 999 - movl GR_Scratch1= 0x000000000001007F -};; +;; -// Set up necessary status fields +// Here if possible underflow. +// Resulting exponent: 0x0ff81-23 <= exp_Result < 0x0ff81 +SCALBNF_POSSIBLE_UNDERFLOW: +// +// Here if possible overflow. +// Resulting exponent: 0x1007e = exp_Result +SCALBNF_POSSIBLE_OVERFLOW: + +// Set up necessary status fields // // S0 user supplied status // S2 user supplied status + WRE + TD (Overflows) // S3 user supplied status + FZ + TD (Underflows) // { .mfi - nop.m 999 + nop.m 0 fsetc.s3 0x7F,0x41 - nop.i 999 + nop.i 0 } { .mfi - nop.m 999 + nop.m 0 fsetc.s2 0x7F,0x42 - nop.i 999 -};; + nop.i 0 +} +;; // -// Do final operation +// Do final operation with s2 and s3 // { .mfi - setf.exp FR_NBig = GR_Scratch - fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 - nop.i 999 + setf.exp FR_NBig = GR_neg_ov_limit + fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 } { .mfi - nop.m 999 - fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; -{ .mfi - setf.exp FR_Big = GR_Scratch1 - fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; + setf.exp FR_Big = GR_pos_ov_limit + fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 +} +;; // Check for overflow or underflow. // Restore s3 // Restore s2 // { .mfi - nop.m 0 + nop.m 0 fsetc.s3 0x7F,0x40 - nop.i 999 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fsetc.s2 0x7F,0x40 - nop.i 999 -};; + nop.i 0 +} +;; // // Is the result zero? // { .mfi - nop.m 999 + nop.m 0 fclass.m p6, p0 = FR_Result3, 0x007 - nop.i 999 -} + nop.i 0 +} { .mfi - addl GR_Tag = 178, r0 + nop.m 0 fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big - nop.i 0 -};; + nop.i 0 +} +;; // // Detect masked underflow - Tiny + Inexact Only // { .mfi - nop.m 999 + nop.m 0 (p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2 - nop.i 999 -};; + nop.i 0 +} +;; // // Is result bigger the allowed range? // Branch out for underflow // { .mfb -(p6) addl GR_Tag = 179, r0 + nop.m 0 (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig -(p6) br.cond.spnt SCALBNF_UNDERFLOW -};; +(p6) br.cond.spnt SCALBNF_UNDERFLOW +} +;; // // Branch out for overflow // -{ .mbb - nop.m 0 -(p7) br.cond.spnt SCALBNF_OVERFLOW -(p9) br.cond.spnt SCALBNF_OVERFLOW -};; +{ .bbb +(p7) br.cond.spnt SCALBNF_OVERFLOW +(p9) br.cond.spnt SCALBNF_OVERFLOW + br.ret.sptk b0 // Return from main path. +} +;; -// -// Return from main path. -// -{ .mfb - nop.m 999 - nop.f 0 - br.ret.sptk b0;; +// Here if result overflows +SCALBNF_OVERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 178, r0 // Set error tag for overflow + br.cond.sptk __libm_error_region // Call error support for overflow } +;; -GLOBAL_LIBM_END(__libm_scalbnf) -__libm_error_region: +// Here if result underflows +SCALBNF_UNDERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 179, r0 // Set error tag for underflow + br.cond.sptk __libm_error_region // Call error support for underflow +} +;; + +// Here if x=unorm +SCALBNF_UNORM: +{ .mib + getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x + nop.i 0 + br.cond.sptk SCALBNF_COMMON // Return to main path +} +;; -SCALBNF_OVERFLOW: -SCALBNF_UNDERFLOW: + +GLOBAL_LIBM_END(__libm_scalbnf) +LOCAL_LIBM_ENTRY(__libm_error_region) // // Get stack address of N // .prologue { .mfi - add GR_Parameter_Y=-32,sp + add GR_Parameter_Y=-32,sp nop.f 0 .save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs + mov GR_SAVE_PFS=ar.pfs } // -// Adjust sp +// Adjust sp // { .mfi .fframe 64 - add sp=-64,sp + add sp=-64,sp nop.f 0 - mov GR_SAVE_GP=gp + mov GR_SAVE_GP=gp };; // -// Store N on stack in correct position +// Store N on stack in correct position // Locate the address of x on stack // { .mmi - st8 [GR_Parameter_Y] = GR_N_as_int,16 - add GR_Parameter_X = 16,sp + st8 [GR_Parameter_Y] = GR_N_as_int,16 + add GR_Parameter_X = 16,sp .save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 + mov GR_SAVE_B0=b0 };; // @@ -337,42 +408,42 @@ SCALBNF_UNDERFLOW: // .body { .mib - stfs [GR_Parameter_X] = FR_Norm_X - add GR_Parameter_RESULT = 0,GR_Parameter_Y + stfs [GR_Parameter_X] = FR_Norm_X + add GR_Parameter_RESULT = 0,GR_Parameter_Y nop.b 0 } { .mib - stfs [GR_Parameter_Y] = FR_Result + stfs [GR_Parameter_Y] = FR_Result add GR_Parameter_Y = -16,GR_Parameter_Y - br.call.sptk b0=__libm_error_support# + br.call.sptk b0=__libm_error_support# };; // // Get location of result on stack // { .mmi + add GR_Parameter_RESULT = 48,sp nop.m 0 - nop.m 0 - add GR_Parameter_RESULT = 48,sp + nop.i 0 };; // -// Get the new result +// Get the new result // { .mmi - ldfs FR_Result = [GR_Parameter_RESULT] + ldfs FR_Result = [GR_Parameter_RESULT] .restore sp - add sp = 64,sp - mov b0 = GR_SAVE_B0 + add sp = 64,sp + mov b0 = GR_SAVE_B0 };; // // Restore gp, ar.pfs and return // { .mib - mov gp = GR_SAVE_GP - mov ar.pfs = GR_SAVE_PFS - br.ret.sptk b0 + mov gp = GR_SAVE_GP + mov ar.pfs = GR_SAVE_PFS + br.ret.sptk b0 };; LOCAL_LIBM_END(__libm_error_region) diff --git a/sysdeps/ia64/fpu/s_libm_scalbnl.S b/sysdeps/ia64/fpu/s_libm_scalbnl.S index 6eb6e17..1edf9a0 100644 --- a/sysdeps/ia64/fpu/s_libm_scalbnl.S +++ b/sysdeps/ia64/fpu/s_libm_scalbnl.S @@ -21,33 +21,34 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History //============================================================== // 02/02/00 Initial version -// 01/26/01 scalbnl completely reworked and now standalone version +// 01/26/01 Scalbnl completely reworked and now standalone version // 01/04/02 Added handling for int 32 or 64 bits // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 08/04/03 Improved performance // // API //============================================================== -// long double __libm_scalbnl (long double x, int n, int int_type) +// long double __libm_scalbnl (long double x, int n, int int_type) // input floating point f8 and int n (r34), int int_type (r35) // output floating point f8 // @@ -55,27 +56,41 @@ // int_type = 1 if int is 64 bits // // Returns x* 2**n using an fma and detects overflow -// and underflow. +// and underflow. // // +// Strategy: +// Compute biased exponent of result exp_Result = N + exp_X +// Break into ranges: +// exp_Result > 0x13ffe -> Certain overflow +// exp_Result = 0x13ffe -> Possible overflow +// 0x0c001 <= exp_Result < 0x13ffe -> No over/underflow (main path) +// 0x0c001 - 63 <= exp_Result < 0x0c001 -> Possible underflow +// exp_Result < 0x0c001 - 63 -> Certain underflow FR_Big = f6 FR_NBig = f7 FR_Floating_X = f8 FR_Result = f8 FR_Result2 = f9 -FR_Result3 = f11 -FR_Norm_X = f12 -FR_Two_N = f14 -FR_Two_to_Big = f15 +FR_Result3 = f10 +FR_Norm_X = f11 +FR_Two_N = f12 +GR_neg_ov_limit= r14 GR_N_Biased = r15 GR_Big = r16 GR_NBig = r17 -GR_Scratch = r18 -GR_Scratch1 = r19 +GR_exp_Result = r18 +GR_pos_ov_limit= r19 GR_Bias = r20 GR_N_as_int = r21 +GR_signexp_X = r22 +GR_exp_X = r23 +GR_exp_mask = r24 +GR_max_exp = r25 +GR_min_exp = r26 +GR_min_den_exp = r27 GR_SAVE_B0 = r32 GR_SAVE_GP = r33 @@ -93,242 +108,298 @@ GLOBAL_LIBM_ENTRY(__libm_scalbnl) // Build the exponent Bias // { .mfi - alloc r32=ar.pfs,3,0,4,0 - fclass.m p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero - addl GR_Bias = 0x0FFFF,r0 + getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x + fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero + mov GR_Bias = 0x0ffff } - // -// Is N zero? // Normalize x // Is integer type 32 bits? // { .mfi - cmp.eq p6,p0 = r34,r0 - fnorm.s1 FR_Norm_X = FR_Floating_X - cmp.eq p8,p9 = r35,r0 + mov GR_Big = 35000 // If N this big then certain overflow + fnorm.s1 FR_Norm_X = FR_Floating_X + cmp.eq p8,p9 = r35,r0 } ;; // Sign extend N if int is 32 bits { .mfi (p9) mov GR_N_as_int = r34 // Copy N if int is 64 bits - nop.f 0 + fclass.m p9,p0 = FR_Floating_X, 0x0b // Test for x=unorm (p8) sxt4 GR_N_as_int = r34 // Sign extend N if int is 32 bits } +{ .mfi + mov GR_NBig = -35000 // If N this small then certain underflow + nop.f 0 + mov GR_max_exp = 0x13ffe // Exponent of maximum long double +} +;; + +// Create biased exponent for 2**N +{ .mfi + add GR_N_Biased = GR_Bias,GR_N_as_int + nop.f 0 + cmp.ge p7, p0 = GR_N_as_int, GR_Big // Certain overflow? +} +{ .mib + cmp.le p8, p0 = GR_N_as_int, GR_NBig // Certain underflow? + mov GR_min_exp = 0x0c001 // Exponent of minimum long double +(p9) br.cond.spnt SCALBNL_UNORM // Branch if x=unorm +} +;; + +SCALBNL_COMMON: +// Main path continues. Also return here from x=unorm path. +// Create 2**N +.pred.rel "mutex",p7,p8 +{ .mfi + setf.exp FR_Two_N = GR_N_Biased + nop.f 0 +(p7) mov GR_N_as_int = GR_Big // Limit max N +} +{ .mfi +(p8) mov GR_N_as_int = GR_NBig // Limit min N + nop.f 0 +(p8) cmp.eq p7,p0 = r0,r0 // Set p7 if |N| big +} ;; // -// Branch and return special values. -// Create -35000 -// Create 35000 +// Create biased exponent for 2**N for N big +// Is N zero? // { .mfi - addl GR_Big = 35000,r0 +(p7) add GR_N_Biased = GR_Bias,GR_N_as_int nop.f 0 - add GR_N_Biased = GR_Bias,GR_N_as_int + cmp.eq.or p6,p0 = r34,r0 } -{ .mfb - addl GR_NBig = -35000,r0 -(p7) fma.s0 FR_Result = FR_Floating_X,f1, f0 -(p7) br.ret.spnt b0 -};; +{ .mfi + mov GR_pos_ov_limit = 0x13fff // Exponent for positive overflow + nop.f 0 + mov GR_exp_mask = 0x1ffff // Exponent mask +} +;; // -// Build the exponent Bias -// Return x when N = 0 +// Create 2**N for N big +// Return x when N = 0 or X = Nan, Inf, Zero // { .mfi - setf.exp FR_Two_N = GR_N_Biased +(p7) setf.exp FR_Two_N = GR_N_Biased nop.f 0 - addl GR_Scratch1 = 0x063BF,r0 + mov GR_min_den_exp = 0x0c001 - 63 // Exp of min denorm long dble } { .mfb - addl GR_Scratch = 0x019C3F,r0 -(p6) fma.s0 FR_Result = FR_Floating_X,f1, f0 -(p6) br.ret.spnt b0 -};; + and GR_exp_X = GR_exp_mask, GR_signexp_X +(p6) fma.s0 FR_Result = FR_Floating_X, f1, f0 +(p6) br.ret.spnt b0 +} +;; // -// Create 2*big -// Create 2**-big -// Is N > 35000 -// Is N < -35000 // Raise Denormal operand flag with compare -// Main path, create 2**N +// Compute biased result exponent // { .mfi - setf.exp FR_NBig = GR_Scratch1 - nop.f 0 - cmp.ge p6, p0 = GR_N_as_int, GR_Big -} -{ .mfi - setf.exp FR_Big = GR_Scratch + add GR_exp_Result = GR_exp_X, GR_N_as_int fcmp.ge.s0 p0,p11 = FR_Floating_X,f0 - cmp.le p8, p0 = GR_N_as_int, GR_NBig -};; + mov GR_neg_ov_limit = 0x33fff // Exponent for negative overflow +} +;; // -// Adjust 2**N if N was very small or very large +// Do final operation // { .mfi - nop.m 0 -(p6) fma.s1 FR_Two_N = FR_Big,f1,f0 - nop.i 0 + cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow + fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 + cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow } -{ .mlx - nop.m 999 - movl GR_Scratch = 0x0000000000033FFF -};; +{ .mfb + nop.m 0 + nop.f 0 +(p9) br.cond.spnt SCALBNL_UNDERFLOW // Branch if certain underflow +} +;; +{ .mib +(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow +(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow +(p7) br.ret.sptk b0 // Return from main path +} +;; -{ .mfi - nop.m 0 -(p8) fma.s1 FR_Two_N = FR_NBig,f1,f0 - nop.i 0 +{ .bbb +(p6) br.cond.spnt SCALBNL_OVERFLOW // Branch if certain overflow +(p8) br.cond.spnt SCALBNL_POSSIBLE_OVERFLOW // Branch if possible overflow +(p9) br.cond.spnt SCALBNL_POSSIBLE_UNDERFLOW // Branch if possible underflow } -{ .mlx - nop.m 999 - movl GR_Scratch1= 0x0000000000013FFF -};; +;; -// Set up necessary status fields +// Here if possible underflow. +// Resulting exponent: 0x0c001-63 <= exp_Result < 0x0c001 +SCALBNL_POSSIBLE_UNDERFLOW: +// +// Here if possible overflow. +// Resulting exponent: 0x13ffe = exp_Result +SCALBNL_POSSIBLE_OVERFLOW: + +// Set up necessary status fields // // S0 user supplied status // S2 user supplied status + WRE + TD (Overflows) // S3 user supplied status + FZ + TD (Underflows) // { .mfi - nop.m 999 + nop.m 0 fsetc.s3 0x7F,0x41 - nop.i 999 + nop.i 0 } { .mfi - nop.m 999 + nop.m 0 fsetc.s2 0x7F,0x42 - nop.i 999 -};; + nop.i 0 +} +;; // -// Do final operation +// Do final operation with s2 and s3 // { .mfi - setf.exp FR_NBig = GR_Scratch - fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0 - nop.i 999 + setf.exp FR_NBig = GR_neg_ov_limit + fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 } { .mfi - nop.m 999 - fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; -{ .mfi - setf.exp FR_Big = GR_Scratch1 - fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 - nop.i 999 -};; + setf.exp FR_Big = GR_pos_ov_limit + fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0 + nop.i 0 +} +;; // Check for overflow or underflow. // Restore s3 // Restore s2 // { .mfi - nop.m 0 + nop.m 0 fsetc.s3 0x7F,0x40 - nop.i 999 + nop.i 0 } { .mfi - nop.m 0 + nop.m 0 fsetc.s2 0x7F,0x40 - nop.i 999 -};; + nop.i 0 +} +;; // // Is the result zero? // { .mfi - nop.m 999 + nop.m 0 fclass.m p6, p0 = FR_Result3, 0x007 - nop.i 999 -} + nop.i 0 +} { .mfi - addl GR_Tag = 174, r0 + nop.m 0 fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big - nop.i 0 -};; + nop.i 0 +} +;; // // Detect masked underflow - Tiny + Inexact Only // { .mfi - nop.m 999 + nop.m 0 (p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2 - nop.i 999 -};; + nop.i 0 +} +;; // // Is result bigger the allowed range? // Branch out for underflow // { .mfb -(p6) addl GR_Tag = 175, r0 + nop.m 0 (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig -(p6) br.cond.spnt SCALBNL_UNDERFLOW -};; +(p6) br.cond.spnt SCALBNL_UNDERFLOW +} +;; // // Branch out for overflow // -{ .mbb - nop.m 0 -(p7) br.cond.spnt SCALBNL_OVERFLOW -(p9) br.cond.spnt SCALBNL_OVERFLOW -};; +{ .bbb +(p7) br.cond.spnt SCALBNL_OVERFLOW +(p9) br.cond.spnt SCALBNL_OVERFLOW + br.ret.sptk b0 // Return from main path. +} +;; -// -// Return from main path. -// -{ .mfb - nop.m 999 - nop.f 0 - br.ret.sptk b0;; +// Here if result overflows +SCALBNL_OVERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 174, r0 // Set error tag for overflow + br.cond.sptk __libm_error_region // Call error support for overflow } +;; -GLOBAL_LIBM_END(__libm_scalbnl) -__libm_error_region: +// Here if result underflows +SCALBNL_UNDERFLOW: +{ .mib + alloc r32=ar.pfs,3,0,4,0 + addl GR_Tag = 175, r0 // Set error tag for underflow + br.cond.sptk __libm_error_region // Call error support for underflow +} +;; + +// Here if x=unorm +SCALBNL_UNORM: +{ .mib + getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x + nop.i 0 + br.cond.sptk SCALBNL_COMMON // Return to main path +} +;; -SCALBNL_OVERFLOW: -SCALBNL_UNDERFLOW: + +GLOBAL_LIBM_END(__libm_scalbnl) +LOCAL_LIBM_ENTRY(__libm_error_region) // // Get stack address of N // .prologue { .mfi - add GR_Parameter_Y=-32,sp + add GR_Parameter_Y=-32,sp nop.f 0 .save ar.pfs,GR_SAVE_PFS - mov GR_SAVE_PFS=ar.pfs + mov GR_SAVE_PFS=ar.pfs } // -// Adjust sp +// Adjust sp // { .mfi .fframe 64 - add sp=-64,sp + add sp=-64,sp nop.f 0 - mov GR_SAVE_GP=gp + mov GR_SAVE_GP=gp };; // -// Store N on stack in correct position +// Store N on stack in correct position // Locate the address of x on stack // { .mmi - st8 [GR_Parameter_Y] = GR_N_as_int,16 - add GR_Parameter_X = 16,sp + st8 [GR_Parameter_Y] = GR_N_as_int,16 + add GR_Parameter_X = 16,sp .save b0, GR_SAVE_B0 - mov GR_SAVE_B0=b0 + mov GR_SAVE_B0=b0 };; // @@ -337,42 +408,42 @@ SCALBNL_UNDERFLOW: // .body { .mib - stfe [GR_Parameter_X] = FR_Norm_X - add GR_Parameter_RESULT = 0,GR_Parameter_Y + stfe [GR_Parameter_X] = FR_Norm_X + add GR_Parameter_RESULT = 0,GR_Parameter_Y nop.b 0 } { .mib - stfe [GR_Parameter_Y] = FR_Result + stfe [GR_Parameter_Y] = FR_Result add GR_Parameter_Y = -16,GR_Parameter_Y - br.call.sptk b0=__libm_error_support# + br.call.sptk b0=__libm_error_support# };; // // Get location of result on stack // { .mmi + add GR_Parameter_RESULT = 48,sp nop.m 0 - nop.m 0 - add GR_Parameter_RESULT = 48,sp + nop.i 0 };; // -// Get the new result +// Get the new result // { .mmi - ldfe FR_Result = [GR_Parameter_RESULT] + ldfe FR_Result = [GR_Parameter_RESULT] .restore sp - add sp = 64,sp - mov b0 = GR_SAVE_B0 + add sp = 64,sp + mov b0 = GR_SAVE_B0 };; // // Restore gp, ar.pfs and return // { .mib - mov gp = GR_SAVE_GP - mov ar.pfs = GR_SAVE_PFS - br.ret.sptk b0 + mov gp = GR_SAVE_GP + mov ar.pfs = GR_SAVE_PFS + br.ret.sptk b0 };; LOCAL_LIBM_END(__libm_error_region) diff --git a/sysdeps/ia64/fpu/s_log1p.S b/sysdeps/ia64/fpu/s_log1p.S index cd35519..ccf0c31 100644 --- a/sysdeps/ia64/fpu/s_log1p.S +++ b/sysdeps/ia64/fpu/s_log1p.S @@ -1047,6 +1047,7 @@ log_libm_err: GLOBAL_IEEE754_END(log1p) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/s_log1pf.S b/sysdeps/ia64/fpu/s_log1pf.S index a148d4b..77e79c3 100644 --- a/sysdeps/ia64/fpu/s_log1pf.S +++ b/sysdeps/ia64/fpu/s_log1pf.S @@ -48,6 +48,7 @@ // 10/02/02 Improved performance by basing on log algorithm // 02/10/03 Reordered header: .section, .global, .proc, .align // 04/18/03 Eliminate possible WAW dependency warning +// 12/16/03 Fixed parameter passing to/from error handling routine // // API //============================================================== @@ -733,6 +734,7 @@ log_libm_err: GLOBAL_IEEE754_END(log1pf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/s_log1pl.S b/sysdeps/ia64/fpu/s_log1pl.S index d392a58..9654265 100644 --- a/sysdeps/ia64/fpu/s_log1pl.S +++ b/sysdeps/ia64/fpu/s_log1pl.S @@ -1145,6 +1145,7 @@ LOG1P_LT_Minus_1: GLOBAL_IEEE754_END(log1pl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/s_logb.S b/sysdeps/ia64/fpu/s_logb.S index dfe581a..7ee8987 100644 --- a/sysdeps/ia64/fpu/s_logb.S +++ b/sysdeps/ia64/fpu/s_logb.S @@ -219,6 +219,7 @@ LOGB_ZERO: GLOBAL_LIBM_END(logb) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/s_logbf.S b/sysdeps/ia64/fpu/s_logbf.S index 1d605cd..eefa270 100644 --- a/sysdeps/ia64/fpu/s_logbf.S +++ b/sysdeps/ia64/fpu/s_logbf.S @@ -219,6 +219,7 @@ LOGB_ZERO: GLOBAL_LIBM_END(logbf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/s_logbl.S b/sysdeps/ia64/fpu/s_logbl.S index 6a08e94..e312c1b 100644 --- a/sysdeps/ia64/fpu/s_logbl.S +++ b/sysdeps/ia64/fpu/s_logbl.S @@ -219,6 +219,7 @@ LOGB_ZERO: GLOBAL_LIBM_END(logbl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/s_nearbyint.S b/sysdeps/ia64/fpu/s_nearbyint.S index cba74e6..ec1ff22 100644 --- a/sysdeps/ia64/fpu/s_nearbyint.S +++ b/sysdeps/ia64/fpu/s_nearbyint.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -43,48 +43,44 @@ // 02/08/01 Corrected behavior for all rounding modes. // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 07/25/03 Improved performance //============================================================== -// + // API //============================================================== // double nearbyint(double x) -// -// general registers used: -// +//============================================================== -nearbyint_GR_signexp = r14 -nearbyint_GR_exponent = r15 -nearbyint_GR_17ones = r16 -nearbyint_GR_10033 = r17 -nearbyint_GR_fpsr = r18 -nearbyint_GR_rcs0 = r19 -nearbyint_GR_rcs0_mask = r20 +// general input registers: +// r14 - r21 +rSignexp = r14 +rExp = r15 +rExpMask = r16 +rBigexp = r17 +rFpsr = r19 +rRcs0 = r20 +rRcs0Mask = r21 -// predicate registers used: -// p6-11 +// floating-point registers: +// f8 - f10 -// floating-point registers used: +fXInt = f9 +fNormX = f10 -NEARBYINT_NORM_f8 = f9 -NEARBYINT_FLOAT_INT_f8 = f10 -NEARBYINT_INT_f8 = f11 +// predicate registers used: +// p6 - p10 // Overview of operation //============================================================== - // double nearbyint(double x) -// Return an integer value (represented as a double) that is x rounded to integer in current -// rounding mode -// ******************************************************************************* - -// Set denormal flag for denormal input and -// and take denormal fault if necessary. - -// Is the input an integer value already? +// Return an integer value (represented as a double) that is x +// rounded to integer in current rounding mode +// Inexact is not set, otherwise result identical with rint. +//============================================================== // double_extended -// if the exponent is >= 1003e => 3F(true) = 63(decimal) +// if the exponent is > 1003e => 3F(true) = 63(decimal) // we have a significand of 64 bits 1.63-bits. // If we multiply by 2^63, we no longer have a fractional part // So input is an integer value already. @@ -97,128 +93,125 @@ NEARBYINT_INT_f8 = f11 // So input is an integer value already. // single -// if the exponent is >= 10016 => 17(true) = 23(decimal) -// we have a significand of 53 bits 1.52-bits. (implicit 1) -// If we multiply by 2^52, we no longer have a fractional part +// if the exponent is > 10016 => 17(true) = 23(decimal) +// we have a significand of 24 bits 1.23-bits. (implicit 1) +// If we multiply by 2^23, we no longer have a fractional part // So input is an integer value already. -// If x is NAN, ZERO, or INFINITY, then return - -// qnan snan inf norm unorm 0 -+ -// 1 1 1 0 0 1 11 0xe7 - - .section .text GLOBAL_LIBM_ENTRY(nearbyint) { .mfi - mov nearbyint_GR_fpsr = ar40 // Read the fpsr--need to check rc.s0 - fcvt.fx.s1 NEARBYINT_INT_f8 = f8 - addl nearbyint_GR_10033 = 0x10033, r0 + getf.exp rSignexp = f8 // Get signexp, recompute if unorm + fclass.m p7,p0 = f8, 0x0b // Test x unorm + addl rBigexp = 0x10033, r0 // Set exponent at which is integer } { .mfi - nop.m 999 - fnorm.s1 NEARBYINT_NORM_f8 = f8 - mov nearbyint_GR_17ones = 0x1FFFF -;; + nop.m 0 + fcvt.fx.s1 fXInt = f8 // Convert to int in significand + mov rExpMask = 0x1FFFF // Form exponent mask } +;; { .mfi - nop.m 999 - fclass.m.unc p6,p0 = f8, 0xe7 - mov nearbyint_GR_rcs0_mask = 0x0c00 -;; + mov rFpsr = ar40 // Read fpsr -- check rc.s0 + fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf + nop.i 0 } - { .mfb - nop.m 999 -(p6) fnorm.d.s0 f8 = f8 -(p6) br.ret.spnt b0 // Exit if x nan, inf, zero + nop.m 0 + fnorm.s1 fNormX = f8 // Normalize input +(p7) br.cond.spnt RINT_UNORM // Branch if x unorm +} ;; + + +RINT_COMMON: +// Return here from RINT_UNORM +{ .mfb + and rExp = rSignexp, rExpMask // Get biased exponent +(p6) fma.d.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf +(p6) br.ret.spnt b0 // Exit if x natval, nan, inf } +;; { .mfi - nop.m 999 - fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8 - nop.i 999 -;; + mov rRcs0Mask = 0x0c00 // Mask for rc.s0 + fcvt.xf f8 = fXInt // Result assume |x| < 2^52 + cmp.ge p7,p8 = rExp, rBigexp // Is |x| >= 2^52? } +;; +// We must correct result if |x| >= 2^52 { .mfi - getf.exp nearbyint_GR_signexp = NEARBYINT_NORM_f8 - fcmp.eq.s0 p8,p0 = f8,f0 // Dummy op to set denormal - nop.i 999 -;; + nop.m 0 +(p7) fma.d.s0 f8 = fNormX, f1, f0 // If |x| >= 2^52, result x + nop.i 0 } - - -{ .mii - nop.m 999 - nop.i 999 - and nearbyint_GR_exponent = nearbyint_GR_signexp, nearbyint_GR_17ones ;; -} -{ .mmi - cmp.ge.unc p7,p6 = nearbyint_GR_exponent, nearbyint_GR_10033 - and nearbyint_GR_rcs0 = nearbyint_GR_rcs0_mask, nearbyint_GR_fpsr - nop.i 999 -;; +{ .mfi + nop.m 0 +(p8) fmerge.s f8 = fNormX, f8 // Make sign nearbyint(x) = sign x + nop.i 0 } - -// Check to see if s0 rounding mode is round to nearest. If not then set s2 -// rounding mode to that of s0 and repeat conversions. -NEARBYINT_COMMON: -{ .mfb - cmp.ne p11,p0 = nearbyint_GR_rcs0, r0 -(p6) fclass.m.unc p9,p10 = NEARBYINT_FLOAT_INT_f8, 0x07 // Test for result=0 -(p11) br.cond.spnt NEARBYINT_NOT_ROUND_NEAREST // Branch if not round to nearest ;; -} { .mfi - nop.m 999 -(p7) fnorm.d.s0 f8 = f8 - nop.i 999 -;; +(p8) and rRcs0 = rFpsr, rRcs0Mask // Get rounding mode for sf0 + nop.f 0 + nop.i 0 } +;; -// If result is zero, merge sign of input -{ .mfi - nop.m 999 -(p9) fmerge.s f8 = f8, NEARBYINT_FLOAT_INT_f8 - nop.i 999 +// If |x| < 2^52 we must test for other rounding modes +{ .mbb +(p8) cmp.ne.unc p10,p0 = rRcs0, r0 // Test for other rounding modes +(p10) br.cond.spnt RINT_NOT_ROUND_NEAREST // Branch if not round nearest + br.ret.sptk b0 // Exit main path if round nearest } +;; + + +RINT_UNORM: +// Here if x unorm { .mfb - nop.m 999 -(p10) fnorm.d.s0 f8 = NEARBYINT_FLOAT_INT_f8 - br.ret.sptk b0 -;; + getf.exp rSignexp = fNormX // Get signexp, recompute if unorm + fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag + br.cond.sptk RINT_COMMON // Return to main path } +;; - -NEARBYINT_NOT_ROUND_NEAREST: -// Set rounding mode of s2 to that of s0 +RINT_NOT_ROUND_NEAREST: +// Here if not round to nearest, and |x| < 2^52 +// Set rounding mode of s2 to that of s0, and repeat the conversion using s2 { .mfi - mov nearbyint_GR_rcs0 = r0 // Clear so we don't come back here - fsetc.s2 0x7f, 0x40 - nop.i 999 -;; + nop.m 0 + fsetc.s2 0x7f, 0x40 + nop.i 0 } +;; { .mfi - nop.m 999 - fcvt.fx.s2 NEARBYINT_INT_f8 = f8 - nop.i 999 + nop.m 0 + fcvt.fx.s2 fXInt = fNormX // Convert to int in significand + nop.i 0 +} ;; + +{ .mfi + nop.m 0 + fcvt.xf f8 = fXInt // Expected result + nop.i 0 } +;; +// Be sure sign of result = sign of input. Fixes cases where result is 0. { .mfb - nop.m 999 - fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8 - br.cond.sptk NEARBYINT_COMMON -;; + nop.m 0 + fmerge.s f8 = fNormX, f8 + br.ret.sptk b0 // Exit main path } - +;; GLOBAL_LIBM_END(nearbyint) diff --git a/sysdeps/ia64/fpu/s_nearbyintf.S b/sysdeps/ia64/fpu/s_nearbyintf.S index 6471232..aac7b5c 100644 --- a/sysdeps/ia64/fpu/s_nearbyintf.S +++ b/sysdeps/ia64/fpu/s_nearbyintf.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -43,48 +43,44 @@ // 02/08/01 Corrected behavior for all rounding modes. // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 07/25/03 Improved performance //============================================================== -// + // API //============================================================== // float nearbyintf(float x) -// -// general registers used: -// +//============================================================== -nearbyint_GR_signexp = r14 -nearbyint_GR_exponent = r15 -nearbyint_GR_17ones = r16 -nearbyint_GR_10033 = r17 -nearbyint_GR_fpsr = r18 -nearbyint_GR_rcs0 = r19 -nearbyint_GR_rcs0_mask = r20 +// general input registers: +// r14 - r21 +rSignexp = r14 +rExp = r15 +rExpMask = r16 +rBigexp = r17 +rFpsr = r19 +rRcs0 = r20 +rRcs0Mask = r21 -// predicate registers used: -// p6-11 +// floating-point registers: +// f8 - f10 -// floating-point registers used: +fXInt = f9 +fNormX = f10 -NEARBYINT_NORM_f8 = f9 -NEARBYINT_FLOAT_INT_f8 = f10 -NEARBYINT_INT_f8 = f11 +// predicate registers used: +// p6 - p10 // Overview of operation //============================================================== - // float nearbyintf(float x) -// Return an integer value (represented as a float) that is x rounded to integer in current -// rounding mode -// ******************************************************************************* - -// Set denormal flag for denormal input and -// and take denormal fault if necessary. - -// Is the input an integer value already? +// Return an integer value (represented as a float) that is x +// rounded to integer in current rounding mode +// Inexact is not set, otherwise result identical with rint. +//============================================================== // double_extended -// if the exponent is >= 1003e => 3F(true) = 63(decimal) +// if the exponent is > 1003e => 3F(true) = 63(decimal) // we have a significand of 64 bits 1.63-bits. // If we multiply by 2^63, we no longer have a fractional part // So input is an integer value already. @@ -97,128 +93,125 @@ NEARBYINT_INT_f8 = f11 // So input is an integer value already. // single -// if the exponent is >= 10016 => 17(true) = 23(decimal) -// we have a significand of 53 bits 1.52-bits. (implicit 1) -// If we multiply by 2^52, we no longer have a fractional part +// if the exponent is > 10016 => 17(true) = 23(decimal) +// we have a significand of 24 bits 1.23-bits. (implicit 1) +// If we multiply by 2^23, we no longer have a fractional part // So input is an integer value already. -// If x is NAN, ZERO, or INFINITY, then return - -// qnan snan inf norm unorm 0 -+ -// 1 1 1 0 0 1 11 0xe7 - - .section .text GLOBAL_LIBM_ENTRY(nearbyintf) { .mfi - mov nearbyint_GR_fpsr = ar40 // Read the fpsr--need to check rc.s0 - fcvt.fx.s1 NEARBYINT_INT_f8 = f8 - addl nearbyint_GR_10033 = 0x10016, r0 + getf.exp rSignexp = f8 // Get signexp, recompute if unorm + fclass.m p7,p0 = f8, 0x0b // Test x unorm + addl rBigexp = 0x10016, r0 // Set exponent at which is integer } { .mfi - nop.m 999 - fnorm.s1 NEARBYINT_NORM_f8 = f8 - mov nearbyint_GR_17ones = 0x1FFFF -;; + nop.m 0 + fcvt.fx.s1 fXInt = f8 // Convert to int in significand + mov rExpMask = 0x1FFFF // Form exponent mask } +;; { .mfi - nop.m 999 - fclass.m.unc p6,p0 = f8, 0xe7 - mov nearbyint_GR_rcs0_mask = 0x0c00 -;; + mov rFpsr = ar40 // Read fpsr -- check rc.s0 + fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf + nop.i 0 } - { .mfb - nop.m 999 -(p6) fnorm.s.s0 f8 = f8 -(p6) br.ret.spnt b0 // Exit if x nan, inf, zero + nop.m 0 + fnorm.s1 fNormX = f8 // Normalize input +(p7) br.cond.spnt RINT_UNORM // Branch if x unorm +} ;; + + +RINT_COMMON: +// Return here from RINT_UNORM +{ .mfb + and rExp = rSignexp, rExpMask // Get biased exponent +(p6) fma.s.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf +(p6) br.ret.spnt b0 // Exit if x natval, nan, inf } +;; { .mfi - nop.m 999 - fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8 - nop.i 999 -;; + mov rRcs0Mask = 0x0c00 // Mask for rc.s0 + fcvt.xf f8 = fXInt // Result assume |x| < 2^23 + cmp.ge p7,p8 = rExp, rBigexp // Is |x| >= 2^23? } +;; +// We must correct result if |x| >= 2^23 { .mfi - getf.exp nearbyint_GR_signexp = NEARBYINT_NORM_f8 - fcmp.eq.s0 p8,p0 = f8,f0 // Dummy op to set denormal - nop.i 999 -;; + nop.m 0 +(p7) fma.s.s0 f8 = fNormX, f1, f0 // If |x| >= 2^23, result x + nop.i 0 } - - -{ .mii - nop.m 999 - nop.i 999 - and nearbyint_GR_exponent = nearbyint_GR_signexp, nearbyint_GR_17ones ;; + +{ .mfi + nop.m 0 +(p8) fmerge.s f8 = fNormX, f8 // Make sign nearbyintf(x)= sign x + nop.i 0 } +;; -{ .mmi - cmp.ge.unc p7,p6 = nearbyint_GR_exponent, nearbyint_GR_10033 - and nearbyint_GR_rcs0 = nearbyint_GR_rcs0_mask, nearbyint_GR_fpsr - nop.i 999 +{ .mfi +(p8) and rRcs0 = rFpsr, rRcs0Mask // Get rounding mode for sf0 + nop.f 0 + nop.i 0 +} ;; + +// If |x| < 2^23 we must test for other rounding modes +{ .mbb +(p8) cmp.ne.unc p10,p0 = rRcs0, r0 // Test for other rounding modes +(p10) br.cond.spnt RINT_NOT_ROUND_NEAREST // Branch if not round nearest + br.ret.sptk b0 // Exit main path if round nearest } +;; -// Check to see if s0 rounding mode is round to nearest. If not then set s2 -// rounding mode to that of s0 and repeat conversions. -NEARBYINT_COMMON: + +RINT_UNORM: +// Here if x unorm { .mfb - cmp.ne p11,p0 = nearbyint_GR_rcs0, r0 -(p6) fclass.m.unc p9,p10 = NEARBYINT_FLOAT_INT_f8, 0x07 // Test for result=0 -(p11) br.cond.spnt NEARBYINT_NOT_ROUND_NEAREST // Branch if not round to nearest -;; + getf.exp rSignexp = fNormX // Get signexp, recompute if unorm + fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag + br.cond.sptk RINT_COMMON // Return to main path } - -{ .mfi - nop.m 999 -(p7) fnorm.s.s0 f8 = f8 - nop.i 999 ;; -} -// If result is zero, merge sign of input +RINT_NOT_ROUND_NEAREST: +// Here if not round to nearest, and |x| < 2^23 +// Set rounding mode of s2 to that of s0, and repeat the conversion using s2 { .mfi - nop.m 999 -(p9) fmerge.s f8 = f8, NEARBYINT_FLOAT_INT_f8 - nop.i 999 + nop.m 0 + fsetc.s2 0x7f, 0x40 + nop.i 0 } -{ .mfb - nop.m 999 -(p10) fnorm.s.s0 f8 = NEARBYINT_FLOAT_INT_f8 - br.ret.sptk b0 ;; -} - -NEARBYINT_NOT_ROUND_NEAREST: -// Set rounding mode of s2 to that of s0 { .mfi - mov nearbyint_GR_rcs0 = r0 // Clear so we don't come back here - fsetc.s2 0x7f, 0x40 - nop.i 999 -;; + nop.m 0 + fcvt.fx.s2 fXInt = fNormX // Convert to int in significand + nop.i 0 } +;; { .mfi - nop.m 999 - fcvt.fx.s2 NEARBYINT_INT_f8 = f8 - nop.i 999 -;; + nop.m 0 + fcvt.xf f8 = fXInt // Expected result + nop.i 0 } +;; +// Be sure sign of result = sign of input. Fixes cases where result is 0. { .mfb - nop.m 999 - fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8 - br.cond.sptk NEARBYINT_COMMON -;; + nop.m 0 + fmerge.s f8 = fNormX, f8 + br.ret.sptk b0 // Exit main path } - +;; GLOBAL_LIBM_END(nearbyintf) diff --git a/sysdeps/ia64/fpu/s_nearbyintl.S b/sysdeps/ia64/fpu/s_nearbyintl.S index 9c4c2e4..ee6159c 100644 --- a/sysdeps/ia64/fpu/s_nearbyintl.S +++ b/sysdeps/ia64/fpu/s_nearbyintl.S @@ -21,20 +21,20 @@ // products derived from this software without specific prior written // permission. -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// // Intel Corporation is the author of this code, and requests that all -// problem reports or change requests be submitted to it directly at +// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm. // // History @@ -43,49 +43,44 @@ // 02/08/01 Corrected behavior for all rounding modes. // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 07/25/03 Improved performance //============================================================== -// + // API //============================================================== // long double nearbyintl(long double x) -// -// general registers used: -// +//============================================================== -nearbyint_GR_signexp = r14 -nearbyint_GR_exponent = r15 -nearbyint_GR_17ones = r16 -nearbyint_GR_10033 = r17 -nearbyint_GR_fpsr = r18 -nearbyint_GR_rcs0 = r19 -nearbyint_GR_rcs0_mask = r20 +// general input registers: +// r14 - r21 +rSignexp = r14 +rExp = r15 +rExpMask = r16 +rBigexp = r17 +rFpsr = r19 +rRcs0 = r20 +rRcs0Mask = r21 -// predicate registers used: -// p6-11 +// floating-point registers: +// f8 - f10 -// floating-point registers used: +fXInt = f9 +fNormX = f10 -NEARBYINT_NORM_f8 = f9 -NEARBYINT_FLOAT_INT_f8 = f10 -NEARBYINT_INT_f8 = f11 -NEARBYINT_SIGNED_FLOAT_INT_f8 = f12 +// predicate registers used: +// p6 - p10 // Overview of operation //============================================================== - // long double nearbyintl(long double x) -// Return an integer value (represented as a long double) that is -// x rounded to integer in current rounding mode -// ******************************************************************************* - -// Set denormal flag for denormal input and -// and take denormal fault if necessary. - -// Is the input an integer value already? +// Return an integer value (represented as a long double) that is x +// rounded to integer in current rounding mode +// Inexact is not set, otherwise result identical with rint. +//============================================================== // double_extended -// if the exponent is >= 1003e => 3F(true) = 63(decimal) +// if the exponent is > 1003e => 3F(true) = 63(decimal) // we have a significand of 64 bits 1.63-bits. // If we multiply by 2^63, we no longer have a fractional part // So input is an integer value already. @@ -98,124 +93,125 @@ NEARBYINT_SIGNED_FLOAT_INT_f8 = f12 // So input is an integer value already. // single -// if the exponent is >= 10016 => 17(true) = 23(decimal) -// we have a significand of 53 bits 1.52-bits. (implicit 1) -// If we multiply by 2^52, we no longer have a fractional part +// if the exponent is > 10016 => 17(true) = 23(decimal) +// we have a significand of 24 bits 1.23-bits. (implicit 1) +// If we multiply by 2^23, we no longer have a fractional part // So input is an integer value already. -// If x is NAN, ZERO, or INFINITY, then return - -// qnan snan inf norm unorm 0 -+ -// 1 1 1 0 0 1 11 0xe7 - - .section .text GLOBAL_LIBM_ENTRY(nearbyintl) { .mfi - mov nearbyint_GR_fpsr = ar40 // Read the fpsr--need to check rc.s0 - fcvt.fx.s1 NEARBYINT_INT_f8 = f8 - addl nearbyint_GR_10033 = 0x1003e, r0 + getf.exp rSignexp = f8 // Get signexp, recompute if unorm + fclass.m p7,p0 = f8, 0x0b // Test x unorm + addl rBigexp = 0x1003e, r0 // Set exponent at which is integer } { .mfi - nop.m 999 - fnorm.s1 NEARBYINT_NORM_f8 = f8 - mov nearbyint_GR_17ones = 0x1FFFF -;; + nop.m 0 + fcvt.fx.s1 fXInt = f8 // Convert to int in significand + mov rExpMask = 0x1FFFF // Form exponent mask } +;; { .mfi - nop.m 999 - fclass.m.unc p6,p0 = f8, 0xe7 - mov nearbyint_GR_rcs0_mask = 0x0c00 -;; + mov rFpsr = ar40 // Read fpsr -- check rc.s0 + fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf + nop.i 0 } - { .mfb - nop.m 999 -(p6) fnorm.s0 f8 = f8 -(p6) br.ret.spnt b0 // Exit if x nan, inf, zero + nop.m 0 + fnorm.s1 fNormX = f8 // Normalize input +(p7) br.cond.spnt RINT_UNORM // Branch if x unorm +} ;; + + +RINT_COMMON: +// Return here from RINT_UNORM +{ .mfb + and rExp = rSignexp, rExpMask // Get biased exponent +(p6) fma.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf +(p6) br.ret.spnt b0 // Exit if x natval, nan, inf } +;; { .mfi - nop.m 999 - fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8 - nop.i 999 -;; + mov rRcs0Mask = 0x0c00 // Mask for rc.s0 + fcvt.xf f8 = fXInt // Result assume |x| < 2^63 + cmp.ge p7,p8 = rExp, rBigexp // Is |x| >= 2^63? } +;; +// We must correct result if |x| >= 2^63 { .mfi - getf.exp nearbyint_GR_signexp = NEARBYINT_NORM_f8 - fcmp.eq.s0 p8,p0 = f8,f0 // Dummy op to set denormal - nop.i 999 -;; + nop.m 0 +(p7) fma.s0 f8 = fNormX, f1, f0 // If |x| >= 2^63, result x + nop.i 0 } - - -{ .mii - nop.m 999 - nop.i 999 - and nearbyint_GR_exponent = nearbyint_GR_signexp, nearbyint_GR_17ones ;; -} -{ .mmi - cmp.ge.unc p7,p6 = nearbyint_GR_exponent, nearbyint_GR_10033 - and nearbyint_GR_rcs0 = nearbyint_GR_rcs0_mask, nearbyint_GR_fpsr - nop.i 999 -;; +{ .mfi + nop.m 0 +(p8) fmerge.s f8 = fNormX, f8 // Make sign nearbyintl(x)= sign x + nop.i 0 } - -// Check to see if s0 rounding mode is round to nearest. If not then set s2 -// rounding mode to that of s0 and repeat conversions. -// Must merge the original sign for cases where the result is zero or the input -// is the largest that still has a fraction (0x1007dfffffffffff) -NEARBYINT_COMMON: -{ .mfb - cmp.ne p11,p0 = nearbyint_GR_rcs0, r0 -(p6) fmerge.s NEARBYINT_SIGNED_FLOAT_INT_f8 = f8, NEARBYINT_FLOAT_INT_f8 -(p11) br.cond.spnt NEARBYINT_NOT_ROUND_NEAREST // Branch if not round to nearest ;; -} { .mfi - nop.m 999 -(p7) fnorm.s0 f8 = f8 - nop.i 999 +(p8) and rRcs0 = rFpsr, rRcs0Mask // Get rounding mode for sf0 + nop.f 0 + nop.i 0 +} ;; + +// If |x| < 2^63 we must test for other rounding modes +{ .mbb +(p8) cmp.ne.unc p10,p0 = rRcs0, r0 // Test for other rounding modes +(p10) br.cond.spnt RINT_NOT_ROUND_NEAREST // Branch if not round nearest + br.ret.sptk b0 // Exit main path if round nearest } +;; + +RINT_UNORM: +// Here if x unorm { .mfb - nop.m 999 -(p6) fnorm.s0 f8 = NEARBYINT_SIGNED_FLOAT_INT_f8 - br.ret.sptk b0 -;; + getf.exp rSignexp = fNormX // Get signexp, recompute if unorm + fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag + br.cond.sptk RINT_COMMON // Return to main path } +;; - -NEARBYINT_NOT_ROUND_NEAREST: -// Set rounding mode of s2 to that of s0 +RINT_NOT_ROUND_NEAREST: +// Here if not round to nearest, and |x| < 2^63 +// Set rounding mode of s2 to that of s0, and repeat the conversion using s2 { .mfi - mov nearbyint_GR_rcs0 = r0 // Clear so we don't come back here - fsetc.s2 0x7f, 0x40 - nop.i 999 -;; + nop.m 0 + fsetc.s2 0x7f, 0x40 + nop.i 0 } +;; { .mfi - nop.m 999 - fcvt.fx.s2 NEARBYINT_INT_f8 = f8 - nop.i 999 + nop.m 0 + fcvt.fx.s2 fXInt = fNormX // Convert to int in significand + nop.i 0 +} ;; + +{ .mfi + nop.m 0 + fcvt.xf f8 = fXInt // Expected result + nop.i 0 } +;; +// Be sure sign of result = sign of input. Fixes cases where result is 0. { .mfb - nop.m 999 - fcvt.xf NEARBYINT_FLOAT_INT_f8 = NEARBYINT_INT_f8 - br.cond.sptk NEARBYINT_COMMON -;; + nop.m 0 + fmerge.s f8 = fNormX, f8 + br.ret.sptk b0 // Exit main path } - +;; GLOBAL_LIBM_END(nearbyintl) diff --git a/sysdeps/ia64/fpu/s_nextafter.S b/sysdeps/ia64/fpu/s_nextafter.S index 8c77aa4..6635a31 100644 --- a/sysdeps/ia64/fpu/s_nextafter.S +++ b/sysdeps/ia64/fpu/s_nextafter.S @@ -1,7 +1,7 @@ .file "nextafter.s" -// Copyright (c) 2000 - 2003, Intel Corporation +// Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -51,6 +51,7 @@ // fixed flag settings for several cases // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 12/14/04 Added error handling on underflow. // // API //============================================================== @@ -60,21 +61,21 @@ // // Registers used //============================================================== -nextafter_GR_max_pexp = r14 -nextafter_GR_min_pexp = r15 -nextafter_GR_exp = r16 -nextafter_GR_sig = r17 -nextafter_GR_lnorm_sig = r18 -nextafter_GR_sign_mask = r19 -nextafter_GR_exp_mask = r20 -nextafter_GR_sden_sig = r21 -nextafter_GR_new_sig = r22 -nextafter_GR_new_exp = r23 -nextafter_GR_lden_sig = r24 -nextafter_GR_snorm_sig = r25 -nextafter_GR_exp1 = r26 -nextafter_GR_x_exp = r27 -nextafter_GR_min_den_rexp = r28 +GR_max_pexp = r14 +GR_min_pexp = r15 +GR_exp = r16 +GR_sig = r17 +GR_lnorm_sig = r18 +GR_sign_mask = r19 +GR_exp_mask = r20 +GR_sden_sig = r21 +GR_new_sig = r22 +GR_new_exp = r23 +GR_lden_sig = r24 +GR_snorm_sig = r25 +GR_exp1 = r26 +GR_x_exp = r27 +GR_min_den_rexp = r28 // r36-39 parameters for libm_error_support GR_SAVE_B0 = r34 @@ -84,20 +85,21 @@ GR_SAVE_PFS = r32 GR_Parameter_X = r36 GR_Parameter_Y = r37 GR_Parameter_RESULT = r38 - -NEXTAFTER_lnorm_sig = f10 -NEXTAFTER_lnorm_exp = f11 -NEXTAFTER_lnorm = f12 -NEXTAFTER_sden_sig = f13 -NEXTAFTER_sden_exp = f14 -NEXTAFTER_sden = f15 -NEXTAFTER_save_f8 = f33 -NEXTAFTER_new_exp = f34 -NEXTAFTER_new_sig = f35 -NEXTAFTER_lden_sig = f36 -NEXTAFTER_snorm_sig = f37 -NEXTAFTER_exp1 = f38 -NEXTAFTER_tmp = f39 +GR_Parameter_TAG = r39 + +FR_lnorm_sig = f10 +FR_lnorm_exp = f11 +FR_lnorm = f12 +FR_sden_sig = f13 +FR_sden_exp = f14 +FR_sden = f15 +FR_save_f8 = f33 +FR_new_exp = f34 +FR_new_sig = f35 +FR_lden_sig = f36 +FR_snorm_sig = f37 +FR_exp1 = f38 +FR_tmp = f39 // // Overview of operation @@ -113,30 +115,30 @@ GLOBAL_LIBM_ENTRY(nextafter) // Is x < y ? p10 if yes, p11 if no // Form smallest denormal significand = ulp size { .mfi - getf.exp nextafter_GR_exp = f8 + getf.exp GR_exp = f8 fcmp.lt.s1 p10,p11 = f8, f9 - addl nextafter_GR_sden_sig = 0x800, r0 + addl GR_sden_sig = 0x800, r0 } // Form largest normal significand 0xfffffffffffff800 // Form smallest normal exponent { .mfi - addl nextafter_GR_lnorm_sig = -0x800,r0 + addl GR_lnorm_sig = -0x800,r0 nop.f 999 - addl nextafter_GR_min_pexp = 0x0fc01, r0 ;; + addl GR_min_pexp = 0x0fc01, r0 ;; } // Extract significand from x // Is x=y? // Form largest normal exponent { .mfi - getf.sig nextafter_GR_sig = f8 + getf.sig GR_sig = f8 fcmp.eq.s0 p6,p0 = f8, f9 - addl nextafter_GR_max_pexp = 0x103fe, r0 + addl GR_max_pexp = 0x103fe, r0 } // Move largest normal significand to fp reg for special cases { .mfi - setf.sig NEXTAFTER_lnorm_sig = nextafter_GR_lnorm_sig + setf.sig FR_lnorm_sig = GR_lnorm_sig nop.f 999 - addl nextafter_GR_sign_mask = 0x20000, r0 ;; + addl GR_sign_mask = 0x20000, r0 ;; } // Move smallest denormal significand and signexp to fp regs @@ -145,13 +147,13 @@ GLOBAL_LIBM_ENTRY(nextafter) // It increases (p12 set) if x<y and x>=0 or if x>y and x<0 // It decreases (p13 set) if x<y and x<0 or if x>y and x>=0 { .mfi - setf.sig NEXTAFTER_sden_sig = nextafter_GR_sden_sig + setf.sig FR_sden_sig = GR_sden_sig fclass.m p8,p0 = f8, 0xc3 -(p10) cmp.lt p12,p13 = nextafter_GR_exp, nextafter_GR_sign_mask +(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask } { .mfi - setf.exp NEXTAFTER_sden_exp = nextafter_GR_min_pexp -(p11) cmp.ge p12,p13 = nextafter_GR_exp, nextafter_GR_sign_mask ;; + setf.exp FR_sden_exp = GR_min_pexp +(p11) cmp.ge p12,p13 = GR_exp, GR_sign_mask ;; } .pred.rel "mutex",p12,p13 @@ -160,33 +162,33 @@ GLOBAL_LIBM_ENTRY(nextafter) // If x=y set result to y // Form smallest normal significand and largest denormal significand { .mfi -(p12) add nextafter_GR_new_sig = nextafter_GR_sig, nextafter_GR_sden_sig +(p12) add GR_new_sig = GR_sig, GR_sden_sig (p6) fmerge.s f8=f9,f9 - dep.z nextafter_GR_snorm_sig = 1,63,1 // 0x8000000000000000 + dep.z GR_snorm_sig = 1,63,1 // 0x8000000000000000 } { .mlx -(p13) sub nextafter_GR_new_sig = nextafter_GR_sig, nextafter_GR_sden_sig - movl nextafter_GR_lden_sig = 0x7ffffffffffff800 ;; +(p13) sub GR_new_sig = GR_sig, GR_sden_sig + movl GR_lden_sig = 0x7ffffffffffff800 ;; } // Move expected result significand and signexp to fp regs // Is y=nan? // Form new exponent in case result exponent needs incrementing or decrementing { .mfi - setf.exp NEXTAFTER_new_exp = nextafter_GR_exp + setf.exp FR_new_exp = GR_exp fclass.m p9,p0 = f9, 0xc3 -(p12) add nextafter_GR_exp1 = 1, nextafter_GR_exp +(p12) add GR_exp1 = 1, GR_exp } { .mib - setf.sig NEXTAFTER_new_sig = nextafter_GR_new_sig -(p13) add nextafter_GR_exp1 = -1, nextafter_GR_exp + setf.sig FR_new_sig = GR_new_sig +(p13) add GR_exp1 = -1, GR_exp (p6) br.ret.spnt b0 ;; // Exit if x=y } // Move largest normal signexp to fp reg for special cases // Is x=zero? { .mfi - setf.exp NEXTAFTER_lnorm_exp = nextafter_GR_max_pexp + setf.exp FR_lnorm_exp = GR_max_pexp fclass.m p7,p0 = f8, 0x7 nop.i 999 } @@ -199,12 +201,12 @@ GLOBAL_LIBM_ENTRY(nextafter) // Move exp+-1 and smallest normal significand to fp regs for special cases // Is x=inf? { .mfi - setf.exp NEXTAFTER_exp1 = nextafter_GR_exp1 + setf.exp FR_exp1 = GR_exp1 fclass.m p6,p0 = f8, 0x23 - addl nextafter_GR_exp_mask = 0x1ffff, r0 + addl GR_exp_mask = 0x1ffff, r0 } { .mfb - setf.sig NEXTAFTER_snorm_sig = nextafter_GR_snorm_sig + setf.sig FR_snorm_sig = GR_snorm_sig (p9) fma.s0 f8 = f8,f1,f9 (p9) br.ret.spnt b0 ;; // Exit if y=nan } @@ -212,16 +214,16 @@ GLOBAL_LIBM_ENTRY(nextafter) // Move largest denormal significand to fp regs for special cases // Save x { .mfb - setf.sig NEXTAFTER_lden_sig = nextafter_GR_lden_sig - mov NEXTAFTER_save_f8 = f8 -(p7) br.cond.spnt NEXTAFTER_ZERO ;; // Exit if x=0 + setf.sig FR_lden_sig = GR_lden_sig + mov FR_save_f8 = f8 +(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 } // Mask off the sign to get x_exp { .mfb - and nextafter_GR_x_exp = nextafter_GR_exp_mask, nextafter_GR_exp + and GR_x_exp = GR_exp_mask, GR_exp nop.f 999 -(p6) br.cond.spnt NEXTAFTER_INF ;; // Exit if x=inf +(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf } // Check 6 special cases when significand rolls over: @@ -240,35 +242,35 @@ GLOBAL_LIBM_ENTRY(nextafter) // // Form exponent of smallest double denormal (if normalized register format) { .mmi - adds nextafter_GR_min_den_rexp = -52, nextafter_GR_min_pexp -(p12) cmp.eq.unc p6,p0 = nextafter_GR_new_sig, r0 -(p13) cmp.eq.unc p8,p10 = nextafter_GR_new_sig, nextafter_GR_lden_sig ;; + adds GR_min_den_rexp = -52, GR_min_pexp +(p12) cmp.eq.unc p6,p0 = GR_new_sig, r0 +(p13) cmp.eq.unc p8,p10 = GR_new_sig, GR_lden_sig ;; } { .mmi -(p6) cmp.lt.unc p6,p7 = nextafter_GR_x_exp, nextafter_GR_max_pexp -(p8) cmp.gt.unc p8,p9 = nextafter_GR_x_exp, nextafter_GR_min_pexp -(p10) cmp.eq.unc p10,p0 = nextafter_GR_new_sig, r0 ;; +(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp +(p8) cmp.gt.unc p8,p9 = GR_x_exp, GR_min_pexp +(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0 ;; } // Create small normal in case need to generate underflow flag { .mfi -(p10) cmp.le.unc p10,p0 = nextafter_GR_x_exp, nextafter_GR_min_pexp - fmerge.se NEXTAFTER_tmp = NEXTAFTER_sden_exp, NEXTAFTER_lnorm_sig -(p9) cmp.gt.unc p9,p14 = nextafter_GR_x_exp, nextafter_GR_min_den_rexp +(p10) cmp.le.unc p10,p0 = GR_x_exp, GR_min_pexp + fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig +(p9) cmp.gt.unc p9,p14 = GR_x_exp, GR_min_den_rexp } // Branch if cases 1, 2, 3 { .bbb -(p6) br.cond.spnt NEXTAFTER_EXPUP -(p7) br.cond.spnt NEXTAFTER_OVERFLOW -(p8) br.cond.spnt NEXTAFTER_EXPDOWN ;; +(p6) br.cond.spnt NEXT_EXPUP +(p7) br.cond.spnt NEXT_OVERFLOW +(p8) br.cond.spnt NEXT_EXPDOWN ;; } // Branch if cases 4, 5, 6 { .bbb -(p9) br.cond.spnt NEXTAFTER_NORM_TO_DENORM -(p10) br.cond.spnt NEXTAFTER_UNDERFLOW_TO_ZERO -(p14) br.cond.spnt NEXTAFTER_UNDERFLOW_TO_ZERO ;; +(p9) br.cond.spnt NEXT_NORM_TO_DENORM +(p10) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO +(p14) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO ;; } // Here if no special cases @@ -276,68 +278,72 @@ GLOBAL_LIBM_ENTRY(nextafter) // Case 1: x_exp=min_exp, x_sig=unnormalized // Case 2: x_exp<min_exp { .mfi - cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp - fmerge.se f8 = NEXTAFTER_new_exp, NEXTAFTER_new_sig + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_new_exp, FR_new_sig nop.i 999 ;; } { .mfi nop.m 999 nop.f 999 -(p7) tbit.z p6,p0 = nextafter_GR_new_sig, 63 ;; +(p7) tbit.z p6,p0 = GR_new_sig, 63 ;; } -NEXTAFTER_COMMON_FINISH: +NEXT_COMMON_FINISH: // Force underflow and inexact if denormal result { .mfi nop.m 999 -(p6) fma.d.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0 - nop.i 999 ;; +(p6) fma.d.s0 FR_tmp = FR_tmp,FR_tmp,f0 + nop.i 999 +} +{ .mfb + nop.m 999 + fnorm.d.s0 f8 = f8 // Final normalization to result precision +(p6) br.cond.spnt NEXT_UNDERFLOW ;; } -// Final normalization to result precision and exit { .mfb nop.m 999 - fnorm.d.s0 f8 = f8 + nop.f 999 br.ret.sptk b0;; } //Special cases -NEXTAFTER_EXPUP: +NEXT_EXPUP: { .mfb - cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp - fmerge.se f8 = NEXTAFTER_exp1, NEXTAFTER_snorm_sig - br.cond.sptk NEXTAFTER_COMMON_FINISH ;; + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_exp1, FR_snorm_sig + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTAFTER_EXPDOWN: +NEXT_EXPDOWN: { .mfb - cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp - fmerge.se f8 = NEXTAFTER_exp1, NEXTAFTER_lnorm_sig - br.cond.sptk NEXTAFTER_COMMON_FINISH ;; + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_exp1, FR_lnorm_sig + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTAFTER_NORM_TO_DENORM: +NEXT_NORM_TO_DENORM: { .mfi nop.m 999 - fmerge.se f8 = NEXTAFTER_new_exp, NEXTAFTER_lden_sig + fmerge.se f8 = FR_new_exp, FR_lden_sig nop.i 999 } // Force underflow and inexact if denormal result { .mfb nop.m 999 - fma.d.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0 - br.ret.sptk b0 ;; + fma.d.s0 FR_tmp = FR_tmp,FR_tmp,f0 + br.cond.sptk NEXT_UNDERFLOW ;; } -NEXTAFTER_UNDERFLOW_TO_ZERO: +NEXT_UNDERFLOW_TO_ZERO: { .mfb cmp.eq p6,p0 = r0,r0 - fmerge.s f8 = NEXTAFTER_save_f8,f0 - br.cond.sptk NEXTAFTER_COMMON_FINISH ;; + fmerge.s f8 = FR_save_f8,f0 + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTAFTER_INF: +NEXT_INF: // Here if f8 is +- infinity // INF // if f8 is +inf, no matter what y is return largest double @@ -345,17 +351,17 @@ NEXTAFTER_INF: { .mfi nop.m 999 - fmerge.se NEXTAFTER_lnorm = NEXTAFTER_lnorm_exp,NEXTAFTER_lnorm_sig + fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig nop.i 999 ;; } { .mfb nop.m 999 - fmerge.s f8 = f8,NEXTAFTER_lnorm + fmerge.s f8 = f8,FR_lnorm br.ret.sptk b0 ;; } -NEXTAFTER_ZERO: +NEXT_ZERO: // Here if f8 is +- zero // ZERO @@ -364,75 +370,72 @@ NEXTAFTER_ZERO: { .mfi nop.m 999 - fmerge.se NEXTAFTER_sden = NEXTAFTER_sden_exp,NEXTAFTER_sden_sig + fmerge.se FR_sden = FR_sden_exp,FR_sden_sig nop.i 999 ;; } // Create small normal to generate underflow flag { .mfi nop.m 999 - fmerge.se NEXTAFTER_tmp = NEXTAFTER_sden_exp, NEXTAFTER_lnorm_sig + fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig nop.i 999 ;; } // Add correct sign from direction arg { .mfi nop.m 999 - fmerge.s f8 = f9,NEXTAFTER_sden + fmerge.s f8 = f9,FR_sden nop.i 999 ;; } +// Force underflow and inexact flags { .mfb nop.m 999 - fma.d.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0 - br.ret.sptk b0 ;; + fma.d.s0 FR_tmp = FR_tmp,FR_tmp,f0 + br.cond.sptk NEXT_UNDERFLOW ;; } -GLOBAL_LIBM_END(nextafter) -// Stack operations when calling error support. -// (1) (2) (3) (call) (4) -// sp -> + psp -> + psp -> + sp -> + -// | | | | -// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8 -// | | | | -// | <-GR_Y Y2->| Y2 ->| <- GR_Y | -// | | | | -// | | <- GR_X X1 ->| | -// | | | | -// sp-64 -> + sp -> + sp -> + + -// save ar.pfs save b0 restore gp -// save gp restore ar.pfs - - +NEXT_UNDERFLOW: +// Here if result is a denorm, or input is finite and result is zero +// Call error support to report possible range error +{ .mib + alloc r32=ar.pfs,2,2,4,0 + mov GR_Parameter_TAG = 268 // Error code + br.cond.sptk __libm_error_region // Branch to error call +} +;; -LOCAL_LIBM_ENTRY(__libm_error_region) -NEXTAFTER_OVERFLOW: -// Here if f8 is finite, but result will be infinite +NEXT_OVERFLOW: +// Here if input is finite, but result will be infinite // Use frcpa to generate infinity of correct sign // Call error support to report possible range error -.prologue - { .mfi alloc r32=ar.pfs,2,2,4,0 - frcpa.s1 f8,p6 = NEXTAFTER_save_f8, f0 + frcpa.s1 f8,p6 = FR_save_f8, f0 nop.i 999 ;; } // Create largest double { .mfi nop.m 999 - fmerge.se NEXTAFTER_lnorm = NEXTAFTER_lnorm_exp,NEXTAFTER_lnorm_sig + fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig nop.i 999 ;; } // Force overflow and inexact flags to be set -{ .mfi - mov r39 = 154 // Error code - fma.d.s0 NEXTAFTER_tmp = NEXTAFTER_lnorm,NEXTAFTER_lnorm,f0 - nop.i 999 +{ .mfb + mov GR_Parameter_TAG = 154 // Error code + fma.d.s0 FR_tmp = FR_lnorm,FR_lnorm,f0 + br.cond.sptk __libm_error_region // Branch to error call } ;; +GLOBAL_LIBM_END(nextafter) + + +LOCAL_LIBM_ENTRY(__libm_error_region) +.prologue + // (1) { .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value @@ -459,7 +462,7 @@ NEXTAFTER_OVERFLOW: .body // (3) { .mib - stfd [GR_Parameter_X] = NEXTAFTER_save_f8 // STORE Parameter 1 on stack + stfd [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0 } diff --git a/sysdeps/ia64/fpu/s_nextafterf.S b/sysdeps/ia64/fpu/s_nextafterf.S index 6d2a927..0c269ec 100644 --- a/sysdeps/ia64/fpu/s_nextafterf.S +++ b/sysdeps/ia64/fpu/s_nextafterf.S @@ -1,7 +1,7 @@ .file "nextafterf.s" -// Copyright (c) 2000 - 2003, Intel Corporation +// Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -51,6 +51,7 @@ // fixed flag settings for several cases // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 12/14/04 Added error handling on underflow. // // API //============================================================== @@ -60,21 +61,21 @@ // // Registers used //============================================================== -nextafter_GR_max_pexp = r14 -nextafter_GR_min_pexp = r15 -nextafter_GR_exp = r16 -nextafter_GR_sig = r17 -nextafter_GR_lnorm_sig = r18 -nextafter_GR_sign_mask = r19 -nextafter_GR_exp_mask = r20 -nextafter_GR_sden_sig = r21 -nextafter_GR_new_sig = r22 -nextafter_GR_new_exp = r23 -nextafter_GR_lden_sig = r24 -nextafter_GR_snorm_sig = r25 -nextafter_GR_exp1 = r26 -nextafter_GR_x_exp = r27 -nextafter_GR_min_den_rexp = r28 +GR_max_pexp = r14 +GR_min_pexp = r15 +GR_exp = r16 +GR_sig = r17 +GR_lnorm_sig = r18 +GR_sign_mask = r19 +GR_exp_mask = r20 +GR_sden_sig = r21 +GR_new_sig = r22 +GR_new_exp = r23 +GR_lden_sig = r24 +GR_snorm_sig = r25 +GR_exp1 = r26 +GR_x_exp = r27 +GR_min_den_rexp = r28 // r36-39 parameters for libm_error_support GR_SAVE_B0 = r34 @@ -84,20 +85,21 @@ GR_SAVE_PFS = r32 GR_Parameter_X = r36 GR_Parameter_Y = r37 GR_Parameter_RESULT = r38 - -NEXTAFTER_lnorm_sig = f10 -NEXTAFTER_lnorm_exp = f11 -NEXTAFTER_lnorm = f12 -NEXTAFTER_sden_sig = f13 -NEXTAFTER_sden_exp = f14 -NEXTAFTER_sden = f15 -NEXTAFTER_save_f8 = f33 -NEXTAFTER_new_exp = f34 -NEXTAFTER_new_sig = f35 -NEXTAFTER_lden_sig = f36 -NEXTAFTER_snorm_sig = f37 -NEXTAFTER_exp1 = f38 -NEXTAFTER_tmp = f39 +GR_Parameter_TAG = r39 + +FR_lnorm_sig = f10 +FR_lnorm_exp = f11 +FR_lnorm = f12 +FR_sden_sig = f13 +FR_sden_exp = f14 +FR_sden = f15 +FR_save_f8 = f33 +FR_new_exp = f34 +FR_new_sig = f35 +FR_lden_sig = f36 +FR_snorm_sig = f37 +FR_exp1 = f38 +FR_tmp = f39 // // Overview of operation @@ -112,21 +114,21 @@ GLOBAL_LIBM_ENTRY(nextafterf) // Extract signexp from x // Form smallest denormal significand = ulp size { .mlx - getf.exp nextafter_GR_exp = f8 - movl nextafter_GR_sden_sig = 0x0000010000000000 + getf.exp GR_exp = f8 + movl GR_sden_sig = 0x0000010000000000 } // Form largest normal exponent // Is x < y ? p10 if yes, p11 if no // Form smallest normal exponent { .mfi - addl nextafter_GR_max_pexp = 0x1007e, r0 + addl GR_max_pexp = 0x1007e, r0 fcmp.lt.s1 p10,p11 = f8, f9 - addl nextafter_GR_min_pexp = 0x0ff81, r0 ;; + addl GR_min_pexp = 0x0ff81, r0 ;; } // Is x=y? { .mfi - getf.sig nextafter_GR_sig = f8 + getf.sig GR_sig = f8 fcmp.eq.s0 p6,p0 = f8, f9 nop.i 0 } @@ -134,14 +136,14 @@ GLOBAL_LIBM_ENTRY(nextafterf) // Form largest normal significand { .mlx nop.m 0 - movl nextafter_GR_lnorm_sig = 0xffffff0000000000 ;; + movl GR_lnorm_sig = 0xffffff0000000000 ;; } // Move largest normal significand to fp reg for special cases { .mfi - setf.sig NEXTAFTER_lnorm_sig = nextafter_GR_lnorm_sig + setf.sig FR_lnorm_sig = GR_lnorm_sig nop.f 0 - addl nextafter_GR_sign_mask = 0x20000, r0 ;; + addl GR_sign_mask = 0x20000, r0 ;; } // Move smallest denormal significand and signexp to fp regs @@ -150,14 +152,14 @@ GLOBAL_LIBM_ENTRY(nextafterf) // It increases (p12 set) if x<y and x>=0 or if x>y and x<0 // It decreases (p13 set) if x<y and x<0 or if x>y and x>=0 { .mfi - setf.sig NEXTAFTER_sden_sig = nextafter_GR_sden_sig + setf.sig FR_sden_sig = GR_sden_sig fclass.m p8,p0 = f8, 0xc3 -(p10) cmp.lt p12,p13 = nextafter_GR_exp, nextafter_GR_sign_mask +(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask } { .mfi - setf.exp NEXTAFTER_sden_exp = nextafter_GR_min_pexp + setf.exp FR_sden_exp = GR_min_pexp nop.f 999 -(p11) cmp.ge p12,p13 = nextafter_GR_exp, nextafter_GR_sign_mask ;; +(p11) cmp.ge p12,p13 = GR_exp, GR_sign_mask ;; } .pred.rel "mutex",p12,p13 @@ -166,33 +168,33 @@ GLOBAL_LIBM_ENTRY(nextafterf) // If x=y set result to y // Form smallest normal significand and largest denormal significand { .mfi -(p12) add nextafter_GR_new_sig = nextafter_GR_sig, nextafter_GR_sden_sig +(p12) add GR_new_sig = GR_sig, GR_sden_sig (p6) fmerge.s f8=f9,f9 - dep.z nextafter_GR_snorm_sig = 1,63,1 // 0x8000000000000000 + dep.z GR_snorm_sig = 1,63,1 // 0x8000000000000000 } { .mlx -(p13) sub nextafter_GR_new_sig = nextafter_GR_sig, nextafter_GR_sden_sig - movl nextafter_GR_lden_sig = 0x7fffff0000000000 ;; +(p13) sub GR_new_sig = GR_sig, GR_sden_sig + movl GR_lden_sig = 0x7fffff0000000000 ;; } // Move expected result significand and signexp to fp regs // Is y=nan? // Form new exponent in case result exponent needs incrementing or decrementing { .mfi - setf.exp NEXTAFTER_new_exp = nextafter_GR_exp + setf.exp FR_new_exp = GR_exp fclass.m p9,p0 = f9, 0xc3 -(p12) add nextafter_GR_exp1 = 1, nextafter_GR_exp +(p12) add GR_exp1 = 1, GR_exp } { .mib - setf.sig NEXTAFTER_new_sig = nextafter_GR_new_sig -(p13) add nextafter_GR_exp1 = -1, nextafter_GR_exp + setf.sig FR_new_sig = GR_new_sig +(p13) add GR_exp1 = -1, GR_exp (p6) br.ret.spnt b0 ;; // Exit if x=y } // Move largest normal signexp to fp reg for special cases // Is x=zero? { .mfi - setf.exp NEXTAFTER_lnorm_exp = nextafter_GR_max_pexp + setf.exp FR_lnorm_exp = GR_max_pexp fclass.m p7,p0 = f8, 0x7 nop.i 999 } @@ -205,12 +207,12 @@ GLOBAL_LIBM_ENTRY(nextafterf) // Move exp+-1 and smallest normal significand to fp regs for special cases // Is x=inf? { .mfi - setf.exp NEXTAFTER_exp1 = nextafter_GR_exp1 + setf.exp FR_exp1 = GR_exp1 fclass.m p6,p0 = f8, 0x23 - addl nextafter_GR_exp_mask = 0x1ffff, r0 + addl GR_exp_mask = 0x1ffff, r0 } { .mfb - setf.sig NEXTAFTER_snorm_sig = nextafter_GR_snorm_sig + setf.sig FR_snorm_sig = GR_snorm_sig (p9) fma.s0 f8 = f8,f1,f9 (p9) br.ret.spnt b0 ;; // Exit if y=nan } @@ -218,16 +220,16 @@ GLOBAL_LIBM_ENTRY(nextafterf) // Move largest denormal significand to fp regs for special cases // Save x { .mfb - setf.sig NEXTAFTER_lden_sig = nextafter_GR_lden_sig - mov NEXTAFTER_save_f8 = f8 -(p7) br.cond.spnt NEXTAFTER_ZERO ;; // Exit if x=0 + setf.sig FR_lden_sig = GR_lden_sig + mov FR_save_f8 = f8 +(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 } // Mask off the sign to get x_exp { .mfb - and nextafter_GR_x_exp = nextafter_GR_exp_mask, nextafter_GR_exp + and GR_x_exp = GR_exp_mask, GR_exp nop.f 999 -(p6) br.cond.spnt NEXTAFTER_INF ;; // Exit if x=inf +(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf } // Check 6 special cases when significand rolls over: @@ -246,35 +248,35 @@ GLOBAL_LIBM_ENTRY(nextafterf) // // Form exponent of smallest float denormal (if normalized register format) { .mmi - adds nextafter_GR_min_den_rexp = -23, nextafter_GR_min_pexp -(p12) cmp.eq.unc p6,p0 = nextafter_GR_new_sig, r0 -(p13) cmp.eq.unc p8,p10 = nextafter_GR_new_sig, nextafter_GR_lden_sig ;; + adds GR_min_den_rexp = -23, GR_min_pexp +(p12) cmp.eq.unc p6,p0 = GR_new_sig, r0 +(p13) cmp.eq.unc p8,p10 = GR_new_sig, GR_lden_sig ;; } { .mmi -(p6) cmp.lt.unc p6,p7 = nextafter_GR_x_exp, nextafter_GR_max_pexp -(p8) cmp.gt.unc p8,p9 = nextafter_GR_x_exp, nextafter_GR_min_pexp -(p10) cmp.eq.unc p10,p0 = nextafter_GR_new_sig, r0 ;; +(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp +(p8) cmp.gt.unc p8,p9 = GR_x_exp, GR_min_pexp +(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0 ;; } // Create small normal in case need to generate underflow flag { .mfi -(p10) cmp.le.unc p10,p0 = nextafter_GR_x_exp, nextafter_GR_min_pexp - fmerge.se NEXTAFTER_tmp = NEXTAFTER_sden_exp, NEXTAFTER_lnorm_sig -(p9) cmp.gt.unc p9,p14 = nextafter_GR_x_exp, nextafter_GR_min_den_rexp +(p10) cmp.le.unc p10,p0 = GR_x_exp, GR_min_pexp + fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig +(p9) cmp.gt.unc p9,p14 = GR_x_exp, GR_min_den_rexp } // Branch if cases 1, 2, 3 { .bbb -(p6) br.cond.spnt NEXTAFTER_EXPUP -(p7) br.cond.spnt NEXTAFTER_OVERFLOW -(p8) br.cond.spnt NEXTAFTER_EXPDOWN ;; +(p6) br.cond.spnt NEXT_EXPUP +(p7) br.cond.spnt NEXT_OVERFLOW +(p8) br.cond.spnt NEXT_EXPDOWN ;; } // Branch if cases 4, 5, 6 { .bbb -(p9) br.cond.spnt NEXTAFTER_NORM_TO_DENORM -(p10) br.cond.spnt NEXTAFTER_UNDERFLOW_TO_ZERO -(p14) br.cond.spnt NEXTAFTER_UNDERFLOW_TO_ZERO ;; +(p9) br.cond.spnt NEXT_NORM_TO_DENORM +(p10) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO +(p14) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO ;; } // Here if no special cases @@ -282,68 +284,72 @@ GLOBAL_LIBM_ENTRY(nextafterf) // Case 1: x_exp=min_exp, x_sig=unnormalized // Case 2: x_exp<min_exp { .mfi - cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp - fmerge.se f8 = NEXTAFTER_new_exp, NEXTAFTER_new_sig + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_new_exp, FR_new_sig nop.i 999 ;; } { .mfi nop.m 999 nop.f 999 -(p7) tbit.z p6,p0 = nextafter_GR_new_sig, 63 ;; +(p7) tbit.z p6,p0 = GR_new_sig, 63 ;; } -NEXTAFTER_COMMON_FINISH: +NEXT_COMMON_FINISH: // Force underflow and inexact if denormal result { .mfi nop.m 999 -(p6) fma.s.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0 - nop.i 999 ;; +(p6) fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0 + nop.i 999 +} +{ .mfb + nop.m 999 + fnorm.s.s0 f8 = f8 // Final normalization to result precision +(p6) br.cond.spnt NEXT_UNDERFLOW ;; } -// Final normalization to result precision and exit { .mfb nop.m 999 - fnorm.s.s0 f8 = f8 + nop.f 999 br.ret.sptk b0;; } //Special cases -NEXTAFTER_EXPUP: +NEXT_EXPUP: { .mfb - cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp - fmerge.se f8 = NEXTAFTER_exp1, NEXTAFTER_snorm_sig - br.cond.sptk NEXTAFTER_COMMON_FINISH ;; + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_exp1, FR_snorm_sig + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTAFTER_EXPDOWN: +NEXT_EXPDOWN: { .mfb - cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp - fmerge.se f8 = NEXTAFTER_exp1, NEXTAFTER_lnorm_sig - br.cond.sptk NEXTAFTER_COMMON_FINISH ;; + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_exp1, FR_lnorm_sig + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTAFTER_NORM_TO_DENORM: +NEXT_NORM_TO_DENORM: { .mfi nop.m 999 - fmerge.se f8 = NEXTAFTER_new_exp, NEXTAFTER_lden_sig + fmerge.se f8 = FR_new_exp, FR_lden_sig nop.i 999 } // Force underflow and inexact { .mfb nop.m 999 - fma.s.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0 - br.ret.sptk b0 ;; + fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0 + br.cond.sptk NEXT_UNDERFLOW ;; } -NEXTAFTER_UNDERFLOW_TO_ZERO: +NEXT_UNDERFLOW_TO_ZERO: { .mfb cmp.eq p6,p0 = r0,r0 - fmerge.s f8 = NEXTAFTER_save_f8,f0 - br.cond.sptk NEXTAFTER_COMMON_FINISH ;; + fmerge.s f8 = FR_save_f8,f0 + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTAFTER_INF: +NEXT_INF: // Here if f8 is +- infinity // INF // if f8 is +inf, no matter what y is return largest float @@ -351,17 +357,17 @@ NEXTAFTER_INF: { .mfi nop.m 999 - fmerge.se NEXTAFTER_lnorm = NEXTAFTER_lnorm_exp,NEXTAFTER_lnorm_sig + fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig nop.i 999 ;; } { .mfb nop.m 999 - fmerge.s f8 = f8,NEXTAFTER_lnorm + fmerge.s f8 = f8,FR_lnorm br.ret.sptk b0 ;; } -NEXTAFTER_ZERO: +NEXT_ZERO: // Here if f8 is +- zero // ZERO @@ -370,76 +376,72 @@ NEXTAFTER_ZERO: { .mfi nop.m 999 - fmerge.se NEXTAFTER_sden = NEXTAFTER_sden_exp,NEXTAFTER_sden_sig + fmerge.se FR_sden = FR_sden_exp,FR_sden_sig nop.i 999 ;; } // Create small normal to generate underflow flag { .mfi nop.m 999 - fmerge.se NEXTAFTER_tmp = NEXTAFTER_sden_exp, NEXTAFTER_lnorm_sig + fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig nop.i 999 ;; } // Add correct sign from direction arg { .mfi nop.m 999 - fmerge.s f8 = f9,NEXTAFTER_sden + fmerge.s f8 = f9,FR_sden nop.i 999 ;; } // Force underflow and inexact flags { .mfb nop.m 999 - fma.s.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0 - br.ret.sptk b0 ;; + fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0 + br.cond.sptk NEXT_UNDERFLOW ;; } -GLOBAL_LIBM_END(nextafterf) -// Stack operations when calling error support. -// (1) (2) (3) (call) (4) -// sp -> + psp -> + psp -> + sp -> + -// | | | | -// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8 -// | | | | -// | <-GR_Y Y2->| Y2 ->| <- GR_Y | -// | | | | -// | | <- GR_X X1 ->| | -// | | | | -// sp-64 -> + sp -> + sp -> + + -// save ar.pfs save b0 restore gp -// save gp restore ar.pfs - - +NEXT_UNDERFLOW: +// Here if result is a denorm, or input is finite and result is zero +// Call error support to report possible range error +{ .mib + alloc r32=ar.pfs,2,2,4,0 + mov GR_Parameter_TAG = 269 // Error code + br.cond.sptk __libm_error_region // Branch to error call +} +;; -LOCAL_LIBM_ENTRY(__libm_error_region) -NEXTAFTER_OVERFLOW: -// Here if f8 is finite, but result will be infinite +NEXT_OVERFLOW: +// Here if input is finite, but result will be infinite // Use frcpa to generate infinity of correct sign // Call error support to report possible range error -.prologue - { .mfi alloc r32=ar.pfs,2,2,4,0 - frcpa.s1 f8,p6 = NEXTAFTER_save_f8, f0 - nop.i 999 + frcpa.s1 f8,p6 = FR_save_f8, f0 + nop.i 999 ;; } -// Create largest float +// Create largest double { .mfi nop.m 999 - fmerge.se NEXTAFTER_lnorm = NEXTAFTER_lnorm_exp,NEXTAFTER_lnorm_sig + fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig nop.i 999 ;; } // Force overflow and inexact flags to be set -{ .mfi - mov r39 = 155 // Error code - fma.s.s0 NEXTAFTER_tmp = NEXTAFTER_lnorm,NEXTAFTER_lnorm,f0 - nop.i 999 +{ .mfb + mov GR_Parameter_TAG = 155 // Error code + fma.s.s0 FR_tmp = FR_lnorm,FR_lnorm,f0 + br.cond.sptk __libm_error_region // Branch to error call } ;; +GLOBAL_LIBM_END(nextafterf) + + +LOCAL_LIBM_ENTRY(__libm_error_region) +.prologue + // (1) { .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value @@ -466,7 +468,7 @@ NEXTAFTER_OVERFLOW: .body // (3) { .mib - stfs [GR_Parameter_X] = NEXTAFTER_save_f8 // STORE Parameter 1 on stack + stfs [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0 } diff --git a/sysdeps/ia64/fpu/s_nextafterl.S b/sysdeps/ia64/fpu/s_nextafterl.S index 05bdd9c..20c927b 100644 --- a/sysdeps/ia64/fpu/s_nextafterl.S +++ b/sysdeps/ia64/fpu/s_nextafterl.S @@ -1,7 +1,7 @@ .file "nextafterl.s" -// Copyright (c) 2000 - 2003, Intel Corporation +// Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -52,6 +52,7 @@ // for several cases // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 12/14/04 Added error handling on underflow. // // API //============================================================== @@ -61,20 +62,20 @@ // // Registers used //============================================================== -nextafter_GR_max_pexp = r14 -nextafter_GR_min_pexp = r15 -nextafter_GR_exp = r16 -nextafter_GR_sig = r17 -nextafter_GR_lnorm_sig = r18 -nextafter_GR_sign_mask = r19 -nextafter_GR_exp_mask = r20 -nextafter_GR_sden_sig = r21 -nextafter_GR_new_sig = r22 -nextafter_GR_new_exp = r23 -nextafter_GR_lden_sig = r24 -nextafter_GR_snorm_sig = r25 -nextafter_GR_exp1 = r26 -nextafter_GR_x_exp = r27 +GR_max_pexp = r14 +GR_min_pexp = r15 +GR_exp = r16 +GR_sig = r17 +GR_lnorm_sig = r18 +GR_sign_mask = r19 +GR_exp_mask = r20 +GR_sden_sig = r21 +GR_new_sig = r22 +GR_new_exp = r23 +GR_lden_sig = r24 +GR_snorm_sig = r25 +GR_exp1 = r26 +GR_x_exp = r27 // r36-39 parameters for libm_error_support GR_SAVE_B0 = r34 @@ -84,21 +85,22 @@ GR_SAVE_PFS = r32 GR_Parameter_X = r36 GR_Parameter_Y = r37 GR_Parameter_RESULT = r38 - -NEXTAFTER_lnorm_sig = f10 -NEXTAFTER_lnorm_exp = f11 -NEXTAFTER_lnorm = f12 -NEXTAFTER_sden_sig = f13 -NEXTAFTER_den_exp = f14 -NEXTAFTER_sden = f15 -NEXTAFTER_snorm_exp = f32 -NEXTAFTER_save_f8 = f33 -NEXTAFTER_new_exp = f34 -NEXTAFTER_new_sig = f35 -NEXTAFTER_lden_sig = f36 -NEXTAFTER_snorm_sig = f37 -NEXTAFTER_exp1 = f38 -NEXTAFTER_tmp = f39 +GR_Parameter_TAG = r39 + +FR_lnorm_sig = f10 +FR_lnorm_exp = f11 +FR_lnorm = f12 +FR_sden_sig = f13 +FR_den_exp = f14 +FR_sden = f15 +FR_snorm_exp = f32 +FR_save_f8 = f33 +FR_new_exp = f34 +FR_new_sig = f35 +FR_lden_sig = f36 +FR_snorm_sig = f37 +FR_exp1 = f38 +FR_tmp = f39 // // Overview of operation @@ -114,31 +116,31 @@ GLOBAL_LIBM_ENTRY(nextafterl) // Is x < y ? p10 if yes, p11 if no // Form smallest denormal significand = ulp size { .mfi - getf.exp nextafter_GR_exp = f8 + getf.exp GR_exp = f8 fcmp.lt.s1 p10,p11 = f8, f9 - addl nextafter_GR_sden_sig = 0x1, r0 + addl GR_sden_sig = 0x1, r0 } // Form largest normal significand 0xffffffffffffffff // Form smallest normal exponent { .mfi - addl nextafter_GR_lnorm_sig = -0x1,r0 + addl GR_lnorm_sig = -0x1,r0 nop.f 999 - addl nextafter_GR_min_pexp = 0x0c001, r0 ;; + addl GR_min_pexp = 0x0c001, r0 ;; } // Extract significand from x // Is x=y? This fcmp also sets Invalid and Denormal if required // Form largest normal exponent { .mfi - getf.sig nextafter_GR_sig = f8 + getf.sig GR_sig = f8 fcmp.eq.s0 p6,p0 = f8, f9 - addl nextafter_GR_max_pexp = 0x13ffe, r0 + addl GR_max_pexp = 0x13ffe, r0 } // Move largest normal significand to fp reg for special cases { .mfi - setf.sig NEXTAFTER_lnorm_sig = nextafter_GR_lnorm_sig + setf.sig FR_lnorm_sig = GR_lnorm_sig nop.f 999 - addl nextafter_GR_sign_mask = 0x20000, r0 ;; + addl GR_sign_mask = 0x20000, r0 ;; } // Move smallest denormal significand and exp to fp regs @@ -147,15 +149,15 @@ GLOBAL_LIBM_ENTRY(nextafterl) // It increases (p12 set) if x<y and x>=0 or if x>y and x<0 // It decreases (p13 set) if x<y and x<0 or if x>y and x>=0 { .mfi - setf.sig NEXTAFTER_sden_sig = nextafter_GR_sden_sig + setf.sig FR_sden_sig = GR_sden_sig fclass.m p8,p0 = f8, 0xc3 -(p10) cmp.lt p12,p13 = nextafter_GR_exp, nextafter_GR_sign_mask +(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask } // Move smallest normal exp to fp regs { .mfi - setf.exp NEXTAFTER_snorm_exp = nextafter_GR_min_pexp + setf.exp FR_snorm_exp = GR_min_pexp nop.f 999 -(p11) cmp.ge p12,p13 = nextafter_GR_exp, nextafter_GR_sign_mask ;; +(p11) cmp.ge p12,p13 = GR_exp, GR_sign_mask ;; } .pred.rel "mutex",p12,p13 @@ -164,38 +166,38 @@ GLOBAL_LIBM_ENTRY(nextafterl) // If x=y set result to y // Form smallest normal significand and largest denormal significand { .mfi -(p12) add nextafter_GR_new_sig = nextafter_GR_sig, nextafter_GR_sden_sig +(p12) add GR_new_sig = GR_sig, GR_sden_sig (p6) fmerge.s f8=f9,f9 - dep.z nextafter_GR_snorm_sig = 1,63,1 // 0x8000000000000000 + dep.z GR_snorm_sig = 1,63,1 // 0x8000000000000000 } { .mlx -(p13) sub nextafter_GR_new_sig = nextafter_GR_sig, nextafter_GR_sden_sig - movl nextafter_GR_lden_sig = 0x7fffffffffffffff ;; +(p13) sub GR_new_sig = GR_sig, GR_sden_sig + movl GR_lden_sig = 0x7fffffffffffffff ;; } // Move expected result significand and signexp to fp regs // Is y=nan? // Form new exponent in case result exponent needs incrementing or decrementing { .mfi - setf.exp NEXTAFTER_new_exp = nextafter_GR_exp + setf.exp FR_new_exp = GR_exp fclass.m p9,p0 = f9, 0xc3 -(p12) add nextafter_GR_exp1 = 1, nextafter_GR_exp +(p12) add GR_exp1 = 1, GR_exp } { .mib - setf.sig NEXTAFTER_new_sig = nextafter_GR_new_sig -(p13) add nextafter_GR_exp1 = -1, nextafter_GR_exp + setf.sig FR_new_sig = GR_new_sig +(p13) add GR_exp1 = -1, GR_exp (p6) br.ret.spnt b0 ;; // Exit if x=y } // Move largest normal signexp to fp reg for special cases // Is x=zero? { .mfi - setf.exp NEXTAFTER_lnorm_exp = nextafter_GR_max_pexp + setf.exp FR_lnorm_exp = GR_max_pexp fclass.m p7,p0 = f8, 0x7 nop.i 999 } { .mfb - setf.exp NEXTAFTER_den_exp = nextafter_GR_min_pexp + setf.exp FR_den_exp = GR_min_pexp (p8) fma.s0 f8 = f8,f1,f9 (p8) br.ret.spnt b0 ;; // Exit if x=nan } @@ -203,12 +205,12 @@ GLOBAL_LIBM_ENTRY(nextafterl) // Move exp+-1 and smallest normal significand to fp regs for special cases // Is x=inf? { .mfi - setf.exp NEXTAFTER_exp1 = nextafter_GR_exp1 + setf.exp FR_exp1 = GR_exp1 fclass.m p6,p0 = f8, 0x23 - addl nextafter_GR_exp_mask = 0x1ffff, r0 + addl GR_exp_mask = 0x1ffff, r0 } { .mfb - setf.sig NEXTAFTER_snorm_sig = nextafter_GR_snorm_sig + setf.sig FR_snorm_sig = GR_snorm_sig (p9) fma.s0 f8 = f8,f1,f9 (p9) br.ret.spnt b0 ;; // Exit if y=nan } @@ -216,16 +218,16 @@ GLOBAL_LIBM_ENTRY(nextafterl) // Move largest denormal significand to fp regs for special cases // Save x { .mfb - setf.sig NEXTAFTER_lden_sig = nextafter_GR_lden_sig - mov NEXTAFTER_save_f8 = f8 -(p7) br.cond.spnt NEXTAFTER_ZERO ;; // Exit if x=0 + setf.sig FR_lden_sig = GR_lden_sig + mov FR_save_f8 = f8 +(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 } // Mask off the sign to get x_exp { .mfb - and nextafter_GR_x_exp = nextafter_GR_exp_mask, nextafter_GR_exp + and GR_x_exp = GR_exp_mask, GR_exp nop.f 999 -(p6) br.cond.spnt NEXTAFTER_INF ;; // Exit if x=inf +(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf } // Check 5 special cases when significand rolls over: @@ -241,37 +243,37 @@ GLOBAL_LIBM_ENTRY(nextafterl) // Set p10, result is zero, sign of x, signal underflow and inexact // { .mmi -(p12) cmp.eq.unc p6,p0 = nextafter_GR_new_sig, r0 -(p13) cmp.eq.unc p9,p10 = nextafter_GR_new_sig, nextafter_GR_lden_sig +(p12) cmp.eq.unc p6,p0 = GR_new_sig, r0 +(p13) cmp.eq.unc p9,p10 = GR_new_sig, GR_lden_sig nop.i 999 ;; } { .mmi -(p6) cmp.lt.unc p6,p7 = nextafter_GR_x_exp, nextafter_GR_max_pexp -(p10) cmp.eq.unc p10,p0 = nextafter_GR_new_sig, r0 -(p9) cmp.le.unc p9,p8 = nextafter_GR_x_exp, nextafter_GR_min_pexp +(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp +(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0 +(p9) cmp.le.unc p9,p8 = GR_x_exp, GR_min_pexp ;; } // Create small normal in case need to generate underflow flag { .mfi nop.m 999 - fmerge.se NEXTAFTER_tmp = NEXTAFTER_snorm_exp, NEXTAFTER_lnorm_sig + fmerge.se FR_tmp = FR_snorm_exp, FR_lnorm_sig nop.i 999 } // Branch if cases 1, 2, 3 { .bbb -(p6) br.cond.spnt NEXTAFTER_EXPUP -(p7) br.cond.spnt NEXTAFTER_OVERFLOW -(p8) br.cond.spnt NEXTAFTER_EXPDOWN ;; +(p6) br.cond.spnt NEXT_EXPUP +(p7) br.cond.spnt NEXT_OVERFLOW +(p8) br.cond.spnt NEXT_EXPDOWN ;; } // Branch if cases 4, 5 { .mbb nop.m 999 -(p9) br.cond.spnt NEXTAFTER_NORM_TO_DENORM -(p10) br.cond.spnt NEXTAFTER_UNDERFLOW_TO_ZERO +(p9) br.cond.spnt NEXT_NORM_TO_DENORM +(p10) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO ;; } @@ -280,68 +282,72 @@ GLOBAL_LIBM_ENTRY(nextafterl) // Case 1: x_exp=min_exp, x_sig=unnormalized // Case 2: x_exp<min_exp { .mfi - cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp - fmerge.se f8 = NEXTAFTER_new_exp, NEXTAFTER_new_sig + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_new_exp, FR_new_sig nop.i 999 ;; } { .mfi nop.m 999 nop.f 999 -(p6) tbit.z p6,p0 = nextafter_GR_new_sig, 63 ;; +(p6) tbit.z p6,p0 = GR_new_sig, 63 ;; } -NEXTAFTER_COMMON_FINISH: +NEXT_COMMON_FINISH: // Force underflow and inexact if denormal result { .mfi nop.m 999 -(p6) fma.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0 - nop.i 999 ;; +(p6) fma.s0 FR_tmp = FR_tmp,FR_tmp,f0 + nop.i 999 +} +{ .mfb + nop.m 999 + fnorm.s0 f8 = f8 // Final normalization to result precision +(p6) br.cond.spnt NEXT_UNDERFLOW ;; } -// Final normalization to result precision and exit { .mfb nop.m 999 - fnorm.s0 f8 = f8 + nop.f 999 br.ret.sptk b0;; } //Special cases -NEXTAFTER_EXPUP: +NEXT_EXPUP: { .mfb - cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp - fmerge.se f8 = NEXTAFTER_exp1, NEXTAFTER_snorm_sig - br.cond.sptk NEXTAFTER_COMMON_FINISH ;; + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_exp1, FR_snorm_sig + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTAFTER_EXPDOWN: +NEXT_EXPDOWN: { .mfb - cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp - fmerge.se f8 = NEXTAFTER_exp1, NEXTAFTER_lnorm_sig - br.cond.sptk NEXTAFTER_COMMON_FINISH ;; + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_exp1, FR_lnorm_sig + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTAFTER_NORM_TO_DENORM: +NEXT_NORM_TO_DENORM: { .mfi nop.m 999 - fmerge.se f8 = NEXTAFTER_exp1, NEXTAFTER_lden_sig + fmerge.se f8 = FR_exp1, FR_lden_sig nop.i 999 } // Force underflow and inexact { .mfb nop.m 999 - fma.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0 - br.ret.sptk b0 ;; + fma.s0 FR_tmp = FR_tmp,FR_tmp,f0 + br.cond.sptk NEXT_UNDERFLOW ;; } -NEXTAFTER_UNDERFLOW_TO_ZERO: +NEXT_UNDERFLOW_TO_ZERO: { .mfb cmp.eq p6,p0 = r0,r0 - fmerge.s f8 = NEXTAFTER_save_f8,f0 - br.cond.sptk NEXTAFTER_COMMON_FINISH ;; + fmerge.s f8 = FR_save_f8,f0 + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTAFTER_INF: +NEXT_INF: // Here if f8 is +- infinity // INF // if f8 is +inf, no matter what y is return largest long double @@ -350,17 +356,17 @@ NEXTAFTER_INF: // Create largest long double { .mfi nop.m 999 - fmerge.se NEXTAFTER_lnorm = NEXTAFTER_lnorm_exp,NEXTAFTER_lnorm_sig + fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig nop.i 999 ;; } { .mfb nop.m 999 - fmerge.s f8 = f8,NEXTAFTER_lnorm + fmerge.s f8 = f8,FR_lnorm br.ret.sptk b0 ;; } -NEXTAFTER_ZERO: +NEXT_ZERO: // Here if f8 is +- zero // ZERO @@ -369,76 +375,72 @@ NEXTAFTER_ZERO: { .mfi nop.m 999 - fmerge.se NEXTAFTER_sden = f0,NEXTAFTER_sden_sig + fmerge.se FR_sden = f0,FR_sden_sig nop.i 999 ;; } // Create small normal to generate underflow flag { .mfi nop.m 999 - fmerge.se NEXTAFTER_tmp = NEXTAFTER_snorm_exp, NEXTAFTER_lnorm_sig + fmerge.se FR_tmp = FR_snorm_exp, FR_lnorm_sig nop.i 999 ;; } // Add correct sign from direction arg { .mfi nop.m 999 - fmerge.s f8 = f9,NEXTAFTER_sden + fmerge.s f8 = f9,FR_sden nop.i 999 ;; } // Force underflow and inexact flags { .mfb nop.m 999 - fma.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0 - br.ret.sptk b0 ;; + fma.s0 FR_tmp = FR_tmp,FR_tmp,f0 + br.cond.sptk NEXT_UNDERFLOW ;; } -GLOBAL_LIBM_END(nextafterl) -// Stack operations when calling error support. -// (1) (2) (3) (call) (4) -// sp -> + psp -> + psp -> + sp -> + -// | | | | -// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8 -// | | | | -// | <-GR_Y Y2->| Y2 ->| <- GR_Y | -// | | | | -// | | <- GR_X X1 ->| | -// | | | | -// sp-64 -> + sp -> + sp -> + + -// save ar.pfs save b0 restore gp -// save gp restore ar.pfs - - +NEXT_UNDERFLOW: +// Here if result is a denorm, or input is finite and result is zero +// Call error support to report possible range error +{ .mib + alloc r32=ar.pfs,2,2,4,0 + mov GR_Parameter_TAG = 267 // Error code + br.cond.sptk __libm_error_region // Branch to error call +} +;; -LOCAL_LIBM_ENTRY(__libm_error_region) -NEXTAFTER_OVERFLOW: -// Here if f8 is finite, but result will be infinite +NEXT_OVERFLOW: +// Here if input is finite, but result will be infinite // Use frcpa to generate infinity of correct sign // Call error support to report possible range error -.prologue - { .mfi alloc r32=ar.pfs,2,2,4,0 - frcpa.s1 f8,p6 = NEXTAFTER_save_f8, f0 + frcpa.s1 f8,p6 = FR_save_f8, f0 nop.i 999 ;; } -// Create largest long double +// Create largest double { .mfi nop.m 999 - fmerge.se NEXTAFTER_lnorm = NEXTAFTER_lnorm_exp,NEXTAFTER_lnorm_sig + fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig nop.i 999 ;; } // Force overflow and inexact flags to be set -{ .mfi - mov r39 = 153 // Error code - fma.s0 NEXTAFTER_tmp = NEXTAFTER_lnorm,NEXTAFTER_lnorm,f0 - nop.i 999 +{ .mfb + mov GR_Parameter_TAG = 153 // Error code + fma.s0 FR_tmp = FR_lnorm,FR_lnorm,f0 + br.cond.sptk __libm_error_region // Branch to error call } ;; +GLOBAL_LIBM_END(nextafterl) + + +LOCAL_LIBM_ENTRY(__libm_error_region) +.prologue + // (1) { .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value @@ -465,7 +467,7 @@ NEXTAFTER_OVERFLOW: .body // (3) { .mib - stfe [GR_Parameter_X] = NEXTAFTER_save_f8 // STORE Parameter 1 on stack + stfe [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0 } diff --git a/sysdeps/ia64/fpu/s_nexttoward.S b/sysdeps/ia64/fpu/s_nexttoward.S index f8fac1e..741fea0 100644 --- a/sysdeps/ia64/fpu/s_nexttoward.S +++ b/sysdeps/ia64/fpu/s_nexttoward.S @@ -1,7 +1,7 @@ .file "nexttoward.s" -// Copyright (c) 2001 - 2003, Intel Corporation +// Copyright (c) 2001 - 2004, Intel Corporation // All rights reserved. // // Contributed 2001 by the Intel Numerics Group, Intel Corporation @@ -43,6 +43,7 @@ // 08/23/01 Corrected error tag number // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 12/14/04 Added error handling on underflow. // // API //============================================================== @@ -52,21 +53,21 @@ // // Registers used //============================================================== -nexttoward_GR_max_pexp = r14 -nexttoward_GR_min_pexp = r15 -nexttoward_GR_exp = r16 -nexttoward_GR_sig = r17 -nexttoward_GR_lnorm_sig = r18 -nexttoward_GR_sign_mask = r19 -nexttoward_GR_exp_mask = r20 -nexttoward_GR_sden_sig = r21 -nexttoward_GR_new_sig = r22 -nexttoward_GR_new_exp = r23 -nexttoward_GR_lden_sig = r24 -nexttoward_GR_snorm_sig = r25 -nexttoward_GR_exp1 = r26 -nexttoward_GR_x_exp = r27 -nexttoward_GR_min_den_rexp = r28 +GR_max_pexp = r14 +GR_min_pexp = r15 +GR_exp = r16 +GR_sig = r17 +GR_lnorm_sig = r18 +GR_sign_mask = r19 +GR_exp_mask = r20 +GR_sden_sig = r21 +GR_new_sig = r22 +GR_new_exp = r23 +GR_lden_sig = r24 +GR_snorm_sig = r25 +GR_exp1 = r26 +GR_x_exp = r27 +GR_min_den_rexp = r28 // r36-39 parameters for libm_error_support GR_SAVE_B0 = r34 @@ -76,20 +77,21 @@ GR_SAVE_PFS = r32 GR_Parameter_X = r36 GR_Parameter_Y = r37 GR_Parameter_RESULT = r38 - -NEXTTOWARD_lnorm_sig = f10 -NEXTTOWARD_lnorm_exp = f11 -NEXTTOWARD_lnorm = f12 -NEXTTOWARD_sden_sig = f13 -NEXTTOWARD_sden_exp = f14 -NEXTTOWARD_sden = f15 -NEXTTOWARD_save_f8 = f33 -NEXTTOWARD_new_exp = f34 -NEXTTOWARD_new_sig = f35 -NEXTTOWARD_lden_sig = f36 -NEXTTOWARD_snorm_sig = f37 -NEXTTOWARD_exp1 = f38 -NEXTTOWARD_tmp = f39 +GR_Parameter_TAG = r39 + +FR_lnorm_sig = f10 +FR_lnorm_exp = f11 +FR_lnorm = f12 +FR_sden_sig = f13 +FR_sden_exp = f14 +FR_sden = f15 +FR_save_f8 = f33 +FR_new_exp = f34 +FR_new_sig = f35 +FR_lden_sig = f36 +FR_snorm_sig = f37 +FR_exp1 = f38 +FR_tmp = f39 // // Overview of operation @@ -105,30 +107,30 @@ GLOBAL_LIBM_ENTRY(nexttoward) // Is x < y ? p10 if yes, p11 if no // Form smallest denormal significand = ulp size { .mfi - getf.exp nexttoward_GR_exp = f8 + getf.exp GR_exp = f8 fcmp.lt.s1 p10,p11 = f8, f9 - addl nexttoward_GR_sden_sig = 0x800, r0 + addl GR_sden_sig = 0x800, r0 } // Form largest normal significand 0xfffffffffffff800 // Form smallest normal exponent { .mfi - addl nexttoward_GR_lnorm_sig = -0x800,r0 + addl GR_lnorm_sig = -0x800,r0 nop.f 999 - addl nexttoward_GR_min_pexp = 0x0fc01, r0 ;; + addl GR_min_pexp = 0x0fc01, r0 ;; } // Extract significand from x // Is x=y? // Form largest normal exponent { .mfi - getf.sig nexttoward_GR_sig = f8 + getf.sig GR_sig = f8 fcmp.eq.s0 p6,p0 = f8, f9 - addl nexttoward_GR_max_pexp = 0x103fe, r0 + addl GR_max_pexp = 0x103fe, r0 } // Move largest normal significand to fp reg for special cases { .mfi - setf.sig NEXTTOWARD_lnorm_sig = nexttoward_GR_lnorm_sig + setf.sig FR_lnorm_sig = GR_lnorm_sig nop.f 999 - addl nexttoward_GR_sign_mask = 0x20000, r0 ;; + addl GR_sign_mask = 0x20000, r0 ;; } // Move smallest denormal significand and signexp to fp regs @@ -137,13 +139,13 @@ GLOBAL_LIBM_ENTRY(nexttoward) // It increases (p12 set) if x<y and x>=0 or if x>y and x<0 // It decreases (p13 set) if x<y and x<0 or if x>y and x>=0 { .mfi - setf.sig NEXTTOWARD_sden_sig = nexttoward_GR_sden_sig + setf.sig FR_sden_sig = GR_sden_sig fclass.m p8,p0 = f8, 0xc3 -(p10) cmp.lt p12,p13 = nexttoward_GR_exp, nexttoward_GR_sign_mask +(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask } { .mfi - setf.exp NEXTTOWARD_sden_exp = nexttoward_GR_min_pexp -(p11) cmp.ge p12,p13 = nexttoward_GR_exp, nexttoward_GR_sign_mask ;; + setf.exp FR_sden_exp = GR_min_pexp +(p11) cmp.ge p12,p13 = GR_exp, GR_sign_mask ;; } .pred.rel "mutex",p12,p13 @@ -152,33 +154,33 @@ GLOBAL_LIBM_ENTRY(nexttoward) // If x=y set result to y // Form smallest normal significand and largest denormal significand { .mfi -(p12) add nexttoward_GR_new_sig = nexttoward_GR_sig, nexttoward_GR_sden_sig +(p12) add GR_new_sig = GR_sig, GR_sden_sig (p6) fnorm.d.s0 f8=f9 //Normalise - dep.z nexttoward_GR_snorm_sig = 1,63,1 // 0x8000000000000000 + dep.z GR_snorm_sig = 1,63,1 // 0x8000000000000000 } { .mlx -(p13) sub nexttoward_GR_new_sig = nexttoward_GR_sig, nexttoward_GR_sden_sig - movl nexttoward_GR_lden_sig = 0x7ffffffffffff800 ;; +(p13) sub GR_new_sig = GR_sig, GR_sden_sig + movl GR_lden_sig = 0x7ffffffffffff800 ;; } // Move expected result significand and signexp to fp regs // Is y=nan? // Form new exponent in case result exponent needs incrementing or decrementing { .mfi - setf.exp NEXTTOWARD_new_exp = nexttoward_GR_exp + setf.exp FR_new_exp = GR_exp fclass.m p9,p0 = f9, 0xc3 -(p12) add nexttoward_GR_exp1 = 1, nexttoward_GR_exp +(p12) add GR_exp1 = 1, GR_exp } { .mib - setf.sig NEXTTOWARD_new_sig = nexttoward_GR_new_sig -(p13) add nexttoward_GR_exp1 = -1, nexttoward_GR_exp + setf.sig FR_new_sig = GR_new_sig +(p13) add GR_exp1 = -1, GR_exp (p6) br.ret.spnt b0 ;; // Exit if x=y } // Move largest normal signexp to fp reg for special cases // Is x=zero? { .mfi - setf.exp NEXTTOWARD_lnorm_exp = nexttoward_GR_max_pexp + setf.exp FR_lnorm_exp = GR_max_pexp fclass.m p7,p0 = f8, 0x7 nop.i 999 } @@ -191,12 +193,12 @@ GLOBAL_LIBM_ENTRY(nexttoward) // Move exp+-1 and smallest normal significand to fp regs for special cases // Is x=inf? { .mfi - setf.exp NEXTTOWARD_exp1 = nexttoward_GR_exp1 + setf.exp FR_exp1 = GR_exp1 fclass.m p6,p0 = f8, 0x23 - addl nexttoward_GR_exp_mask = 0x1ffff, r0 + addl GR_exp_mask = 0x1ffff, r0 } { .mfb - setf.sig NEXTTOWARD_snorm_sig = nexttoward_GR_snorm_sig + setf.sig FR_snorm_sig = GR_snorm_sig (p9) fma.s0 f8 = f8,f1,f9 (p9) br.ret.spnt b0 ;; // Exit if y=nan } @@ -204,16 +206,16 @@ GLOBAL_LIBM_ENTRY(nexttoward) // Move largest denormal significand to fp regs for special cases // Save x { .mfb - setf.sig NEXTTOWARD_lden_sig = nexttoward_GR_lden_sig - mov NEXTTOWARD_save_f8 = f8 -(p7) br.cond.spnt NEXTTOWARD_ZERO ;; // Exit if x=0 + setf.sig FR_lden_sig = GR_lden_sig + mov FR_save_f8 = f8 +(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 } // Mask off the sign to get x_exp { .mfb - and nexttoward_GR_x_exp = nexttoward_GR_exp_mask, nexttoward_GR_exp + and GR_x_exp = GR_exp_mask, GR_exp nop.f 999 -(p6) br.cond.spnt NEXTTOWARD_INF ;; // Exit if x=inf +(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf } // Check 6 special cases when significand rolls over: @@ -232,35 +234,35 @@ GLOBAL_LIBM_ENTRY(nexttoward) // // Form exponent of smallest double denormal (if normalized register format) { .mmi - adds nexttoward_GR_min_den_rexp = -52, nexttoward_GR_min_pexp -(p12) cmp.eq.unc p6,p0 = nexttoward_GR_new_sig, r0 -(p13) cmp.eq.unc p8,p10 = nexttoward_GR_new_sig, nexttoward_GR_lden_sig ;; + adds GR_min_den_rexp = -52, GR_min_pexp +(p12) cmp.eq.unc p6,p0 = GR_new_sig, r0 +(p13) cmp.eq.unc p8,p10 = GR_new_sig, GR_lden_sig ;; } { .mmi -(p6) cmp.lt.unc p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_max_pexp -(p8) cmp.gt.unc p8,p9 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp -(p10) cmp.eq.unc p10,p0 = nexttoward_GR_new_sig, r0 ;; +(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp +(p8) cmp.gt.unc p8,p9 = GR_x_exp, GR_min_pexp +(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0 ;; } // Create small normal in case need to generate underflow flag { .mfi -(p10) cmp.le.unc p10,p0 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp - fmerge.se NEXTTOWARD_tmp = NEXTTOWARD_sden_exp, NEXTTOWARD_lnorm_sig -(p9) cmp.gt.unc p9,p14 = nexttoward_GR_x_exp, nexttoward_GR_min_den_rexp +(p10) cmp.le.unc p10,p0 = GR_x_exp, GR_min_pexp + fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig +(p9) cmp.gt.unc p9,p14 = GR_x_exp, GR_min_den_rexp } // Branch if cases 1, 2, 3 { .bbb -(p6) br.cond.spnt NEXTTOWARD_EXPUP -(p7) br.cond.spnt NEXTTOWARD_OVERFLOW -(p8) br.cond.spnt NEXTTOWARD_EXPDOWN ;; +(p6) br.cond.spnt NEXT_EXPUP +(p7) br.cond.spnt NEXT_OVERFLOW +(p8) br.cond.spnt NEXT_EXPDOWN ;; } // Branch if cases 4, 5, 6 { .bbb -(p9) br.cond.spnt NEXTTOWARD_NORM_TO_DENORM -(p10) br.cond.spnt NEXTTOWARD_UNDERFLOW_TO_ZERO -(p14) br.cond.spnt NEXTTOWARD_UNDERFLOW_TO_ZERO ;; +(p9) br.cond.spnt NEXT_NORM_TO_DENORM +(p10) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO +(p14) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO ;; } // Here if no special cases @@ -268,68 +270,72 @@ GLOBAL_LIBM_ENTRY(nexttoward) // Case 1: x_exp=min_exp, x_sig=unnormalized // Case 2: x_exp<min_exp { .mfi - cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp - fmerge.se f8 = NEXTTOWARD_new_exp, NEXTTOWARD_new_sig + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_new_exp, FR_new_sig nop.i 999 ;; } { .mfi nop.m 999 nop.f 999 -(p7) tbit.z p6,p0 = nexttoward_GR_new_sig, 63 ;; +(p7) tbit.z p6,p0 = GR_new_sig, 63 ;; } -NEXTTOWARD_COMMON_FINISH: +NEXT_COMMON_FINISH: // Force underflow and inexact if denormal result { .mfi nop.m 999 -(p6) fma.d.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0 - nop.i 999 ;; +(p6) fma.d.s0 FR_tmp = FR_tmp,FR_tmp,f0 + nop.i 999 +} +{ .mfb + nop.m 999 + fnorm.d.s0 f8 = f8 // Final normalization to result precision +(p6) br.cond.spnt NEXT_UNDERFLOW ;; } -// Final normalization to result precision and exit { .mfb nop.m 999 - fnorm.d.s0 f8 = f8 + nop.f 999 br.ret.sptk b0;; } //Special cases -NEXTTOWARD_EXPUP: +NEXT_EXPUP: { .mfb - cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp - fmerge.se f8 = NEXTTOWARD_exp1, NEXTTOWARD_snorm_sig - br.cond.sptk NEXTTOWARD_COMMON_FINISH ;; + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_exp1, FR_snorm_sig + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTTOWARD_EXPDOWN: +NEXT_EXPDOWN: { .mfb - cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp - fmerge.se f8 = NEXTTOWARD_exp1, NEXTTOWARD_lnorm_sig - br.cond.sptk NEXTTOWARD_COMMON_FINISH ;; + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_exp1, FR_lnorm_sig + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTTOWARD_NORM_TO_DENORM: +NEXT_NORM_TO_DENORM: { .mfi nop.m 999 - fmerge.se f8 = NEXTTOWARD_new_exp, NEXTTOWARD_lden_sig + fmerge.se f8 = FR_new_exp, FR_lden_sig nop.i 999 } // Force underflow and inexact if denormal result { .mfb nop.m 999 - fma.d.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0 - br.ret.sptk b0 ;; + fma.d.s0 FR_tmp = FR_tmp,FR_tmp,f0 + br.cond.sptk NEXT_UNDERFLOW ;; } -NEXTTOWARD_UNDERFLOW_TO_ZERO: +NEXT_UNDERFLOW_TO_ZERO: { .mfb cmp.eq p6,p0 = r0,r0 - fmerge.s f8 = NEXTTOWARD_save_f8,f0 - br.cond.sptk NEXTTOWARD_COMMON_FINISH ;; + fmerge.s f8 = FR_save_f8,f0 + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTTOWARD_INF: +NEXT_INF: // Here if f8 is +- infinity // INF // if f8 is +inf, no matter what y is return largest double @@ -337,17 +343,17 @@ NEXTTOWARD_INF: { .mfi nop.m 999 - fmerge.se NEXTTOWARD_lnorm = NEXTTOWARD_lnorm_exp,NEXTTOWARD_lnorm_sig + fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig nop.i 999 ;; } { .mfb nop.m 999 - fmerge.s f8 = f8,NEXTTOWARD_lnorm + fmerge.s f8 = f8,FR_lnorm br.ret.sptk b0 ;; } -NEXTTOWARD_ZERO: +NEXT_ZERO: // Here if f8 is +- zero // ZERO @@ -356,76 +362,72 @@ NEXTTOWARD_ZERO: { .mfi nop.m 999 - fmerge.se NEXTTOWARD_sden = NEXTTOWARD_sden_exp,NEXTTOWARD_sden_sig + fmerge.se FR_sden = FR_sden_exp,FR_sden_sig nop.i 999 ;; } // Create small normal to generate underflow flag { .mfi nop.m 999 - fmerge.se NEXTTOWARD_tmp = NEXTTOWARD_sden_exp, NEXTTOWARD_lnorm_sig + fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig nop.i 999 ;; } // Add correct sign from direction arg { .mfi nop.m 999 - fmerge.s f8 = f9,NEXTTOWARD_sden + fmerge.s f8 = f9,FR_sden nop.i 999 ;; } // Force underflow and inexact flags { .mfb nop.m 999 - fma.d.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0 - br.ret.sptk b0 ;; + fma.d.s0 FR_tmp = FR_tmp,FR_tmp,f0 + br.cond.sptk NEXT_UNDERFLOW ;; } -GLOBAL_LIBM_END(nexttoward) -// Stack operations when calling error support. -// (1) (2) (3) (call) (4) -// sp -> + psp -> + psp -> + sp -> + -// | | | | -// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8 -// | | | | -// | <-GR_Y Y2->| Y2 ->| <- GR_Y | -// | | | | -// | | <- GR_X X1 ->| | -// | | | | -// sp-64 -> + sp -> + sp -> + + -// save ar.pfs save b0 restore gp -// save gp restore ar.pfs - - +NEXT_UNDERFLOW: +// Here if result is a denorm, or input is finite and result is zero +// Call error support to report possible range error +{ .mib + alloc r32=ar.pfs,2,2,4,0 + mov GR_Parameter_TAG = 271 // Error code + br.cond.sptk __libm_error_region // Branch to error call +} +;; -LOCAL_LIBM_ENTRY(__libm_error_region) -NEXTTOWARD_OVERFLOW: -// Here if f8 is finite, but result will be infinite +NEXT_OVERFLOW: +// Here if input is finite, but result will be infinite // Use frcpa to generate infinity of correct sign // Call error support to report possible range error -.prologue - { .mfi alloc r32=ar.pfs,2,2,4,0 - frcpa.s1 f8,p6 = NEXTTOWARD_save_f8, f0 + frcpa.s1 f8,p6 = FR_save_f8, f0 nop.i 999 ;; } // Create largest double { .mfi nop.m 999 - fmerge.se NEXTTOWARD_lnorm = NEXTTOWARD_lnorm_exp,NEXTTOWARD_lnorm_sig + fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig nop.i 999 ;; } // Force overflow and inexact flags to be set -{ .mfi - mov r39 = 199 // Error code - fma.d.s0 NEXTTOWARD_tmp = NEXTTOWARD_lnorm,NEXTTOWARD_lnorm,f0 - nop.i 999 +{ .mfb + mov GR_Parameter_TAG = 199 // Error code + fma.d.s0 FR_tmp = FR_lnorm,FR_lnorm,f0 + br.cond.sptk __libm_error_region // Branch to error call } ;; +GLOBAL_LIBM_END(nexttoward) + + +LOCAL_LIBM_ENTRY(__libm_error_region) +.prologue + // (1) { .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value @@ -452,7 +454,7 @@ NEXTTOWARD_OVERFLOW: .body // (3) { .mib - stfd [GR_Parameter_X] = NEXTTOWARD_save_f8 // STORE Parameter 1 on stack + stfd [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0 } diff --git a/sysdeps/ia64/fpu/s_nexttowardf.S b/sysdeps/ia64/fpu/s_nexttowardf.S index fb1adae..b8b9762 100644 --- a/sysdeps/ia64/fpu/s_nexttowardf.S +++ b/sysdeps/ia64/fpu/s_nexttowardf.S @@ -1,7 +1,7 @@ .file "nexttowardf.s" -// Copyright (c) 2001 - 2003, Intel Corporation +// Copyright (c) 2001 - 2004, Intel Corporation // All rights reserved. // // Contributed 2001 by the Intel Numerics Group, Intel Corporation @@ -43,6 +43,7 @@ // 08/23/01 Corrected error tag number // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 12/14/04 Added error handling on underflow. // // API //============================================================== @@ -52,21 +53,21 @@ // // Registers used //============================================================== -nexttoward_GR_max_pexp = r14 -nexttoward_GR_min_pexp = r15 -nexttoward_GR_exp = r16 -nexttoward_GR_sig = r17 -nexttoward_GR_lnorm_sig = r18 -nexttoward_GR_sign_mask = r19 -nexttoward_GR_exp_mask = r20 -nexttoward_GR_sden_sig = r21 -nexttoward_GR_new_sig = r22 -nexttoward_GR_new_exp = r23 -nexttoward_GR_lden_sig = r24 -nexttoward_GR_snorm_sig = r25 -nexttoward_GR_exp1 = r26 -nexttoward_GR_x_exp = r27 -nexttoward_GR_min_den_rexp = r28 +GR_max_pexp = r14 +GR_min_pexp = r15 +GR_exp = r16 +GR_sig = r17 +GR_lnorm_sig = r18 +GR_sign_mask = r19 +GR_exp_mask = r20 +GR_sden_sig = r21 +GR_new_sig = r22 +GR_new_exp = r23 +GR_lden_sig = r24 +GR_snorm_sig = r25 +GR_exp1 = r26 +GR_x_exp = r27 +GR_min_den_rexp = r28 // r36-39 parameters for libm_error_support GR_SAVE_B0 = r34 @@ -76,20 +77,21 @@ GR_SAVE_PFS = r32 GR_Parameter_X = r36 GR_Parameter_Y = r37 GR_Parameter_RESULT = r38 - -NEXTTOWARD_lnorm_sig = f10 -NEXTTOWARD_lnorm_exp = f11 -NEXTTOWARD_lnorm = f12 -NEXTTOWARD_sden_sig = f13 -NEXTTOWARD_sden_exp = f14 -NEXTTOWARD_sden = f15 -NEXTTOWARD_save_f8 = f33 -NEXTTOWARD_new_exp = f34 -NEXTTOWARD_new_sig = f35 -NEXTTOWARD_lden_sig = f36 -NEXTTOWARD_snorm_sig = f37 -NEXTTOWARD_exp1 = f38 -NEXTTOWARD_tmp = f39 +GR_Parameter_TAG = r39 + +FR_lnorm_sig = f10 +FR_lnorm_exp = f11 +FR_lnorm = f12 +FR_sden_sig = f13 +FR_sden_exp = f14 +FR_sden = f15 +FR_save_f8 = f33 +FR_new_exp = f34 +FR_new_sig = f35 +FR_lden_sig = f36 +FR_snorm_sig = f37 +FR_exp1 = f38 +FR_tmp = f39 // // Overview of operation @@ -104,21 +106,21 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // Extract signexp from x // Form smallest denormal significand = ulp size { .mlx - getf.exp nexttoward_GR_exp = f8 - movl nexttoward_GR_sden_sig = 0x0000010000000000 + getf.exp GR_exp = f8 + movl GR_sden_sig = 0x0000010000000000 } // Form largest normal exponent // Is x < y ? p10 if yes, p11 if no // Form smallest normal exponent { .mfi - addl nexttoward_GR_max_pexp = 0x1007e, r0 + addl GR_max_pexp = 0x1007e, r0 fcmp.lt.s1 p10,p11 = f8, f9 - addl nexttoward_GR_min_pexp = 0x0ff81, r0 ;; + addl GR_min_pexp = 0x0ff81, r0 ;; } // Is x=y? { .mfi - getf.sig nexttoward_GR_sig = f8 + getf.sig GR_sig = f8 fcmp.eq.s0 p6,p0 = f8, f9 nop.i 0 } @@ -126,14 +128,14 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // Form largest normal significand { .mlx nop.m 0 - movl nexttoward_GR_lnorm_sig = 0xffffff0000000000 ;; + movl GR_lnorm_sig = 0xffffff0000000000 ;; } // Move largest normal significand to fp reg for special cases { .mfi - setf.sig NEXTTOWARD_lnorm_sig = nexttoward_GR_lnorm_sig + setf.sig FR_lnorm_sig = GR_lnorm_sig nop.f 0 - addl nexttoward_GR_sign_mask = 0x20000, r0 ;; + addl GR_sign_mask = 0x20000, r0 ;; } // Move smallest denormal significand and signexp to fp regs @@ -142,14 +144,14 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // It increases (p12 set) if x<y and x>=0 or if x>y and x<0 // It decreases (p13 set) if x<y and x<0 or if x>y and x>=0 { .mfi - setf.sig NEXTTOWARD_sden_sig = nexttoward_GR_sden_sig + setf.sig FR_sden_sig = GR_sden_sig fclass.m p8,p0 = f8, 0xc3 -(p10) cmp.lt p12,p13 = nexttoward_GR_exp, nexttoward_GR_sign_mask +(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask } { .mfi - setf.exp NEXTTOWARD_sden_exp = nexttoward_GR_min_pexp + setf.exp FR_sden_exp = GR_min_pexp nop.f 999 -(p11) cmp.ge p12,p13 = nexttoward_GR_exp, nexttoward_GR_sign_mask ;; +(p11) cmp.ge p12,p13 = GR_exp, GR_sign_mask ;; } .pred.rel "mutex",p12,p13 @@ -158,33 +160,33 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // If x=y set result to y // Form smallest normal significand and largest denormal significand { .mfi -(p12) add nexttoward_GR_new_sig = nexttoward_GR_sig, nexttoward_GR_sden_sig +(p12) add GR_new_sig = GR_sig, GR_sden_sig (p6) fnorm.s.s0 f8=f9 //Normalise - dep.z nexttoward_GR_snorm_sig = 1,63,1 // 0x8000000000000000 + dep.z GR_snorm_sig = 1,63,1 // 0x8000000000000000 } { .mlx -(p13) sub nexttoward_GR_new_sig = nexttoward_GR_sig, nexttoward_GR_sden_sig - movl nexttoward_GR_lden_sig = 0x7fffff0000000000 ;; +(p13) sub GR_new_sig = GR_sig, GR_sden_sig + movl GR_lden_sig = 0x7fffff0000000000 ;; } // Move expected result significand and signexp to fp regs // Is y=nan? // Form new exponent in case result exponent needs incrementing or decrementing { .mfi - setf.exp NEXTTOWARD_new_exp = nexttoward_GR_exp + setf.exp FR_new_exp = GR_exp fclass.m p9,p0 = f9, 0xc3 -(p12) add nexttoward_GR_exp1 = 1, nexttoward_GR_exp +(p12) add GR_exp1 = 1, GR_exp } { .mib - setf.sig NEXTTOWARD_new_sig = nexttoward_GR_new_sig -(p13) add nexttoward_GR_exp1 = -1, nexttoward_GR_exp + setf.sig FR_new_sig = GR_new_sig +(p13) add GR_exp1 = -1, GR_exp (p6) br.ret.spnt b0 ;; // Exit if x=y } // Move largest normal signexp to fp reg for special cases // Is x=zero? { .mfi - setf.exp NEXTTOWARD_lnorm_exp = nexttoward_GR_max_pexp + setf.exp FR_lnorm_exp = GR_max_pexp fclass.m p7,p0 = f8, 0x7 nop.i 999 } @@ -197,12 +199,12 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // Move exp+-1 and smallest normal significand to fp regs for special cases // Is x=inf? { .mfi - setf.exp NEXTTOWARD_exp1 = nexttoward_GR_exp1 + setf.exp FR_exp1 = GR_exp1 fclass.m p6,p0 = f8, 0x23 - addl nexttoward_GR_exp_mask = 0x1ffff, r0 + addl GR_exp_mask = 0x1ffff, r0 } { .mfb - setf.sig NEXTTOWARD_snorm_sig = nexttoward_GR_snorm_sig + setf.sig FR_snorm_sig = GR_snorm_sig (p9) fma.s0 f8 = f8,f1,f9 (p9) br.ret.spnt b0 ;; // Exit if y=nan } @@ -210,16 +212,16 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // Move largest denormal significand to fp regs for special cases // Save x { .mfb - setf.sig NEXTTOWARD_lden_sig = nexttoward_GR_lden_sig - mov NEXTTOWARD_save_f8 = f8 -(p7) br.cond.spnt NEXTTOWARD_ZERO ;; // Exit if x=0 + setf.sig FR_lden_sig = GR_lden_sig + mov FR_save_f8 = f8 +(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 } // Mask off the sign to get x_exp { .mfb - and nexttoward_GR_x_exp = nexttoward_GR_exp_mask, nexttoward_GR_exp + and GR_x_exp = GR_exp_mask, GR_exp nop.f 999 -(p6) br.cond.spnt NEXTTOWARD_INF ;; // Exit if x=inf +(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf } // Check 6 special cases when significand rolls over: @@ -238,35 +240,35 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // // Form exponent of smallest float denormal (if normalized register format) { .mmi - adds nexttoward_GR_min_den_rexp = -23, nexttoward_GR_min_pexp -(p12) cmp.eq.unc p6,p0 = nexttoward_GR_new_sig, r0 -(p13) cmp.eq.unc p8,p10 = nexttoward_GR_new_sig, nexttoward_GR_lden_sig ;; + adds GR_min_den_rexp = -23, GR_min_pexp +(p12) cmp.eq.unc p6,p0 = GR_new_sig, r0 +(p13) cmp.eq.unc p8,p10 = GR_new_sig, GR_lden_sig ;; } { .mmi -(p6) cmp.lt.unc p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_max_pexp -(p8) cmp.gt.unc p8,p9 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp -(p10) cmp.eq.unc p10,p0 = nexttoward_GR_new_sig, r0 ;; +(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp +(p8) cmp.gt.unc p8,p9 = GR_x_exp, GR_min_pexp +(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0 ;; } // Create small normal in case need to generate underflow flag { .mfi -(p10) cmp.le.unc p10,p0 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp - fmerge.se NEXTTOWARD_tmp = NEXTTOWARD_sden_exp, NEXTTOWARD_lnorm_sig -(p9) cmp.gt.unc p9,p14 = nexttoward_GR_x_exp, nexttoward_GR_min_den_rexp +(p10) cmp.le.unc p10,p0 = GR_x_exp, GR_min_pexp + fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig +(p9) cmp.gt.unc p9,p14 = GR_x_exp, GR_min_den_rexp } // Branch if cases 1, 2, 3 { .bbb -(p6) br.cond.spnt NEXTTOWARD_EXPUP -(p7) br.cond.spnt NEXTTOWARD_OVERFLOW -(p8) br.cond.spnt NEXTTOWARD_EXPDOWN ;; +(p6) br.cond.spnt NEXT_EXPUP +(p7) br.cond.spnt NEXT_OVERFLOW +(p8) br.cond.spnt NEXT_EXPDOWN ;; } // Branch if cases 4, 5, 6 { .bbb -(p9) br.cond.spnt NEXTTOWARD_NORM_TO_DENORM -(p10) br.cond.spnt NEXTTOWARD_UNDERFLOW_TO_ZERO -(p14) br.cond.spnt NEXTTOWARD_UNDERFLOW_TO_ZERO ;; +(p9) br.cond.spnt NEXT_NORM_TO_DENORM +(p10) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO +(p14) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO ;; } // Here if no special cases @@ -274,68 +276,72 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // Case 1: x_exp=min_exp, x_sig=unnormalized // Case 2: x_exp<min_exp { .mfi - cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp - fmerge.se f8 = NEXTTOWARD_new_exp, NEXTTOWARD_new_sig + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_new_exp, FR_new_sig nop.i 999 ;; } { .mfi nop.m 999 nop.f 999 -(p7) tbit.z p6,p0 = nexttoward_GR_new_sig, 63 ;; +(p7) tbit.z p6,p0 = GR_new_sig, 63 ;; } -NEXTTOWARD_COMMON_FINISH: +NEXT_COMMON_FINISH: // Force underflow and inexact if denormal result { .mfi nop.m 999 -(p6) fma.s.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0 - nop.i 999 ;; +(p6) fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0 + nop.i 999 +} +{ .mfb + nop.m 999 + fnorm.s.s0 f8 = f8 // Final normalization to result precision +(p6) br.cond.spnt NEXT_UNDERFLOW ;; } -// Final normalization to result precision and exit { .mfb nop.m 999 - fnorm.s.s0 f8 = f8 + nop.f 999 br.ret.sptk b0;; } //Special cases -NEXTTOWARD_EXPUP: +NEXT_EXPUP: { .mfb - cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp - fmerge.se f8 = NEXTTOWARD_exp1, NEXTTOWARD_snorm_sig - br.cond.sptk NEXTTOWARD_COMMON_FINISH ;; + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_exp1, FR_snorm_sig + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTTOWARD_EXPDOWN: +NEXT_EXPDOWN: { .mfb - cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp - fmerge.se f8 = NEXTTOWARD_exp1, NEXTTOWARD_lnorm_sig - br.cond.sptk NEXTTOWARD_COMMON_FINISH ;; + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_exp1, FR_lnorm_sig + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTTOWARD_NORM_TO_DENORM: +NEXT_NORM_TO_DENORM: { .mfi nop.m 999 - fmerge.se f8 = NEXTTOWARD_new_exp, NEXTTOWARD_lden_sig + fmerge.se f8 = FR_new_exp, FR_lden_sig nop.i 999 } // Force underflow and inexact { .mfb nop.m 999 - fma.s.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0 - br.ret.sptk b0 ;; + fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0 + br.cond.sptk NEXT_UNDERFLOW ;; } -NEXTTOWARD_UNDERFLOW_TO_ZERO: +NEXT_UNDERFLOW_TO_ZERO: { .mfb cmp.eq p6,p0 = r0,r0 - fmerge.s f8 = NEXTTOWARD_save_f8,f0 - br.cond.sptk NEXTTOWARD_COMMON_FINISH ;; + fmerge.s f8 = FR_save_f8,f0 + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTTOWARD_INF: +NEXT_INF: // Here if f8 is +- infinity // INF // if f8 is +inf, no matter what y is return largest float @@ -343,17 +349,17 @@ NEXTTOWARD_INF: { .mfi nop.m 999 - fmerge.se NEXTTOWARD_lnorm = NEXTTOWARD_lnorm_exp,NEXTTOWARD_lnorm_sig + fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig nop.i 999 ;; } { .mfb nop.m 999 - fmerge.s f8 = f8,NEXTTOWARD_lnorm + fmerge.s f8 = f8,FR_lnorm br.ret.sptk b0 ;; } -NEXTTOWARD_ZERO: +NEXT_ZERO: // Here if f8 is +- zero // ZERO @@ -362,76 +368,72 @@ NEXTTOWARD_ZERO: { .mfi nop.m 999 - fmerge.se NEXTTOWARD_sden = NEXTTOWARD_sden_exp,NEXTTOWARD_sden_sig + fmerge.se FR_sden = FR_sden_exp,FR_sden_sig nop.i 999 ;; } // Create small normal to generate underflow flag { .mfi nop.m 999 - fmerge.se NEXTTOWARD_tmp = NEXTTOWARD_sden_exp, NEXTTOWARD_lnorm_sig + fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig nop.i 999 ;; } // Add correct sign from direction arg { .mfi nop.m 999 - fmerge.s f8 = f9,NEXTTOWARD_sden - nop.i 999;; + fmerge.s f8 = f9,FR_sden + nop.i 999 ;; } // Force underflow and inexact flags { .mfb nop.m 999 - fma.s.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0 - br.ret.sptk b0 ;; + fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0 + br.cond.sptk NEXT_UNDERFLOW ;; } -GLOBAL_LIBM_END(nexttowardf) -// Stack operations when calling error support. -// (1) (2) (3) (call) (4) -// sp -> + psp -> + psp -> + sp -> + -// | | | | -// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8 -// | | | | -// | <-GR_Y Y2->| Y2 ->| <- GR_Y | -// | | | | -// | | <- GR_X X1 ->| | -// | | | | -// sp-64 -> + sp -> + sp -> + + -// save ar.pfs save b0 restore gp -// save gp restore ar.pfs - - +NEXT_UNDERFLOW: +// Here if result is a denorm, or input is finite and result is zero +// Call error support to report possible range error +{ .mib + alloc r32=ar.pfs,2,2,4,0 + mov GR_Parameter_TAG = 272 // Error code + br.cond.sptk __libm_error_region // Branch to error call +} +;; -LOCAL_LIBM_ENTRY(__libm_error_region) -NEXTTOWARD_OVERFLOW: -// Here if f8 is finite, but result will be infinite +NEXT_OVERFLOW: +// Here if input is finite, but result will be infinite // Use frcpa to generate infinity of correct sign // Call error support to report possible range error -.prologue - { .mfi alloc r32=ar.pfs,2,2,4,0 - frcpa.s1 f8,p6 = NEXTTOWARD_save_f8, f0 - nop.i 999 + frcpa.s1 f8,p6 = FR_save_f8, f0 + nop.i 999 ;; } -// Create largest float +// Create largest double { .mfi nop.m 999 - fmerge.se NEXTTOWARD_lnorm = NEXTTOWARD_lnorm_exp,NEXTTOWARD_lnorm_sig + fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig nop.i 999 ;; } // Force overflow and inexact flags to be set -{ .mfi - mov r39 = 200 // Error code - fma.s.s0 NEXTTOWARD_tmp = NEXTTOWARD_lnorm,NEXTTOWARD_lnorm,f0 - nop.i 999 +{ .mfb + mov GR_Parameter_TAG = 200 // Error code + fma.s.s0 FR_tmp = FR_lnorm,FR_lnorm,f0 + br.cond.sptk __libm_error_region // Branch to error call } ;; +GLOBAL_LIBM_END(nexttowardf) + + +LOCAL_LIBM_ENTRY(__libm_error_region) +.prologue + // (1) { .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value @@ -458,7 +460,7 @@ NEXTTOWARD_OVERFLOW: .body // (3) { .mib - stfs [GR_Parameter_X] = NEXTTOWARD_save_f8 // STORE Parameter 1 on stack + stfs [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0 } diff --git a/sysdeps/ia64/fpu/s_nexttowardl.S b/sysdeps/ia64/fpu/s_nexttowardl.S index 9c79f2c..fa2db12 100644 --- a/sysdeps/ia64/fpu/s_nexttowardl.S +++ b/sysdeps/ia64/fpu/s_nexttowardl.S @@ -1,7 +1,7 @@ .file "nexttowardl.s" -// Copyright (c) 2001 - 2003, Intel Corporation +// Copyright (c) 2001 - 2004, Intel Corporation // All rights reserved. // // Contributed 2001 by the Intel Numerics Group, Intel Corporation @@ -43,6 +43,7 @@ // 08/23/01 Corrected error tag number // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 12/14/04 Added error handling on underflow. // // API //============================================================== @@ -52,20 +53,20 @@ // // Registers used //============================================================== -nexttoward_GR_max_pexp = r14 -nexttoward_GR_min_pexp = r15 -nexttoward_GR_exp = r16 -nexttoward_GR_sig = r17 -nexttoward_GR_lnorm_sig = r18 -nexttoward_GR_sign_mask = r19 -nexttoward_GR_exp_mask = r20 -nexttoward_GR_sden_sig = r21 -nexttoward_GR_new_sig = r22 -nexttoward_GR_new_exp = r23 -nexttoward_GR_lden_sig = r24 -nexttoward_GR_snorm_sig = r25 -nexttoward_GR_exp1 = r26 -nexttoward_GR_x_exp = r27 +GR_max_pexp = r14 +GR_min_pexp = r15 +GR_exp = r16 +GR_sig = r17 +GR_lnorm_sig = r18 +GR_sign_mask = r19 +GR_exp_mask = r20 +GR_sden_sig = r21 +GR_new_sig = r22 +GR_new_exp = r23 +GR_lden_sig = r24 +GR_snorm_sig = r25 +GR_exp1 = r26 +GR_x_exp = r27 // r36-39 parameters for libm_error_support GR_SAVE_B0 = r34 @@ -75,21 +76,22 @@ GR_SAVE_PFS = r32 GR_Parameter_X = r36 GR_Parameter_Y = r37 GR_Parameter_RESULT = r38 - -NEXTTOWARD_lnorm_sig = f10 -NEXTTOWARD_lnorm_exp = f11 -NEXTTOWARD_lnorm = f12 -NEXTTOWARD_sden_sig = f13 -NEXTTOWARD_den_exp = f14 -NEXTTOWARD_sden = f15 -NEXTTOWARD_snorm_exp = f32 -NEXTTOWARD_save_f8 = f33 -NEXTTOWARD_new_exp = f34 -NEXTTOWARD_new_sig = f35 -NEXTTOWARD_lden_sig = f36 -NEXTTOWARD_snorm_sig = f37 -NEXTTOWARD_exp1 = f38 -NEXTTOWARD_tmp = f39 +GR_Parameter_TAG = r39 + +FR_lnorm_sig = f10 +FR_lnorm_exp = f11 +FR_lnorm = f12 +FR_sden_sig = f13 +FR_den_exp = f14 +FR_sden = f15 +FR_snorm_exp = f32 +FR_save_f8 = f33 +FR_new_exp = f34 +FR_new_sig = f35 +FR_lden_sig = f36 +FR_snorm_sig = f37 +FR_exp1 = f38 +FR_tmp = f39 // // Overview of operation @@ -105,31 +107,31 @@ GLOBAL_LIBM_ENTRY(nexttowardl) // Is x < y ? p10 if yes, p11 if no // Form smallest denormal significand = ulp size { .mfi - getf.exp nexttoward_GR_exp = f8 + getf.exp GR_exp = f8 fcmp.lt.s1 p10,p11 = f8, f9 - addl nexttoward_GR_sden_sig = 0x1, r0 + addl GR_sden_sig = 0x1, r0 } // Form largest normal significand 0xffffffffffffffff // Form smallest normal exponent { .mfi - addl nexttoward_GR_lnorm_sig = -0x1,r0 + addl GR_lnorm_sig = -0x1,r0 nop.f 999 - addl nexttoward_GR_min_pexp = 0x0c001, r0 ;; + addl GR_min_pexp = 0x0c001, r0 ;; } // Extract significand from x // Is x=y? This fcmp also sets Invalid and Denormal if required // Form largest normal exponent { .mfi - getf.sig nexttoward_GR_sig = f8 + getf.sig GR_sig = f8 fcmp.eq.s0 p6,p0 = f8, f9 - addl nexttoward_GR_max_pexp = 0x13ffe, r0 + addl GR_max_pexp = 0x13ffe, r0 } // Move largest normal significand to fp reg for special cases { .mfi - setf.sig NEXTTOWARD_lnorm_sig = nexttoward_GR_lnorm_sig + setf.sig FR_lnorm_sig = GR_lnorm_sig nop.f 999 - addl nexttoward_GR_sign_mask = 0x20000, r0 ;; + addl GR_sign_mask = 0x20000, r0 ;; } // Move smallest denormal significand and exp to fp regs @@ -138,15 +140,15 @@ GLOBAL_LIBM_ENTRY(nexttowardl) // It increases (p12 set) if x<y and x>=0 or if x>y and x<0 // It decreases (p13 set) if x<y and x<0 or if x>y and x>=0 { .mfi - setf.sig NEXTTOWARD_sden_sig = nexttoward_GR_sden_sig + setf.sig FR_sden_sig = GR_sden_sig fclass.m p8,p0 = f8, 0xc3 -(p10) cmp.lt p12,p13 = nexttoward_GR_exp, nexttoward_GR_sign_mask +(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask } // Move smallest normal exp to fp regs { .mfi - setf.exp NEXTTOWARD_snorm_exp = nexttoward_GR_min_pexp + setf.exp FR_snorm_exp = GR_min_pexp nop.f 999 -(p11) cmp.ge p12,p13 = nexttoward_GR_exp, nexttoward_GR_sign_mask ;; +(p11) cmp.ge p12,p13 = GR_exp, GR_sign_mask ;; } .pred.rel "mutex",p12,p13 @@ -155,38 +157,38 @@ GLOBAL_LIBM_ENTRY(nexttowardl) // If x=y set result to y // Form smallest normal significand and largest denormal significand { .mfi -(p12) add nexttoward_GR_new_sig = nexttoward_GR_sig, nexttoward_GR_sden_sig +(p12) add GR_new_sig = GR_sig, GR_sden_sig (p6) fmerge.s f8=f9,f9 - dep.z nexttoward_GR_snorm_sig = 1,63,1 // 0x8000000000000000 + dep.z GR_snorm_sig = 1,63,1 // 0x8000000000000000 } { .mlx -(p13) sub nexttoward_GR_new_sig = nexttoward_GR_sig, nexttoward_GR_sden_sig - movl nexttoward_GR_lden_sig = 0x7fffffffffffffff ;; +(p13) sub GR_new_sig = GR_sig, GR_sden_sig + movl GR_lden_sig = 0x7fffffffffffffff ;; } // Move expected result significand and signexp to fp regs // Is y=nan? // Form new exponent in case result exponent needs incrementing or decrementing { .mfi - setf.exp NEXTTOWARD_new_exp = nexttoward_GR_exp + setf.exp FR_new_exp = GR_exp fclass.m p9,p0 = f9, 0xc3 -(p12) add nexttoward_GR_exp1 = 1, nexttoward_GR_exp +(p12) add GR_exp1 = 1, GR_exp } { .mib - setf.sig NEXTTOWARD_new_sig = nexttoward_GR_new_sig -(p13) add nexttoward_GR_exp1 = -1, nexttoward_GR_exp + setf.sig FR_new_sig = GR_new_sig +(p13) add GR_exp1 = -1, GR_exp (p6) br.ret.spnt b0 ;; // Exit if x=y } // Move largest normal signexp to fp reg for special cases // Is x=zero? { .mfi - setf.exp NEXTTOWARD_lnorm_exp = nexttoward_GR_max_pexp + setf.exp FR_lnorm_exp = GR_max_pexp fclass.m p7,p0 = f8, 0x7 nop.i 999 } { .mfb - setf.exp NEXTTOWARD_den_exp = nexttoward_GR_min_pexp + setf.exp FR_den_exp = GR_min_pexp (p8) fma.s0 f8 = f8,f1,f9 (p8) br.ret.spnt b0 ;; // Exit if x=nan } @@ -194,12 +196,12 @@ GLOBAL_LIBM_ENTRY(nexttowardl) // Move exp+-1 and smallest normal significand to fp regs for special cases // Is x=inf? { .mfi - setf.exp NEXTTOWARD_exp1 = nexttoward_GR_exp1 + setf.exp FR_exp1 = GR_exp1 fclass.m p6,p0 = f8, 0x23 - addl nexttoward_GR_exp_mask = 0x1ffff, r0 + addl GR_exp_mask = 0x1ffff, r0 } { .mfb - setf.sig NEXTTOWARD_snorm_sig = nexttoward_GR_snorm_sig + setf.sig FR_snorm_sig = GR_snorm_sig (p9) fma.s0 f8 = f8,f1,f9 (p9) br.ret.spnt b0 ;; // Exit if y=nan } @@ -207,16 +209,16 @@ GLOBAL_LIBM_ENTRY(nexttowardl) // Move largest denormal significand to fp regs for special cases // Save x { .mfb - setf.sig NEXTTOWARD_lden_sig = nexttoward_GR_lden_sig - mov NEXTTOWARD_save_f8 = f8 -(p7) br.cond.spnt NEXTTOWARD_ZERO ;; // Exit if x=0 + setf.sig FR_lden_sig = GR_lden_sig + mov FR_save_f8 = f8 +(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 } // Mask off the sign to get x_exp { .mfb - and nexttoward_GR_x_exp = nexttoward_GR_exp_mask, nexttoward_GR_exp + and GR_x_exp = GR_exp_mask, GR_exp nop.f 999 -(p6) br.cond.spnt NEXTTOWARD_INF ;; // Exit if x=inf +(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf } // Check 5 special cases when significand rolls over: @@ -232,37 +234,37 @@ GLOBAL_LIBM_ENTRY(nexttowardl) // Set p10, result is zero, sign of x, signal underflow and inexact // { .mmi -(p12) cmp.eq.unc p6,p0 = nexttoward_GR_new_sig, r0 -(p13) cmp.eq.unc p9,p10 = nexttoward_GR_new_sig, nexttoward_GR_lden_sig +(p12) cmp.eq.unc p6,p0 = GR_new_sig, r0 +(p13) cmp.eq.unc p9,p10 = GR_new_sig, GR_lden_sig nop.i 999 ;; } { .mmi -(p6) cmp.lt.unc p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_max_pexp -(p10) cmp.eq.unc p10,p0 = nexttoward_GR_new_sig, r0 -(p9) cmp.le.unc p9,p8 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp +(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp +(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0 +(p9) cmp.le.unc p9,p8 = GR_x_exp, GR_min_pexp ;; } // Create small normal in case need to generate underflow flag { .mfi nop.m 999 - fmerge.se NEXTTOWARD_tmp = NEXTTOWARD_snorm_exp, NEXTTOWARD_lnorm_sig + fmerge.se FR_tmp = FR_snorm_exp, FR_lnorm_sig nop.i 999 } // Branch if cases 1, 2, 3 { .bbb -(p6) br.cond.spnt NEXTTOWARD_EXPUP -(p7) br.cond.spnt NEXTTOWARD_OVERFLOW -(p8) br.cond.spnt NEXTTOWARD_EXPDOWN ;; +(p6) br.cond.spnt NEXT_EXPUP +(p7) br.cond.spnt NEXT_OVERFLOW +(p8) br.cond.spnt NEXT_EXPDOWN ;; } // Branch if cases 4, 5 { .mbb nop.m 999 -(p9) br.cond.spnt NEXTTOWARD_NORM_TO_DENORM -(p10) br.cond.spnt NEXTTOWARD_UNDERFLOW_TO_ZERO +(p9) br.cond.spnt NEXT_NORM_TO_DENORM +(p10) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO ;; } @@ -271,68 +273,72 @@ GLOBAL_LIBM_ENTRY(nexttowardl) // Case 1: x_exp=min_exp, x_sig=unnormalized // Case 2: x_exp<min_exp { .mfi - cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp - fmerge.se f8 = NEXTTOWARD_new_exp, NEXTTOWARD_new_sig + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_new_exp, FR_new_sig nop.i 999 ;; } { .mfi nop.m 999 nop.f 999 -(p6) tbit.z p6,p0 = nexttoward_GR_new_sig, 63 ;; +(p6) tbit.z p6,p0 = GR_new_sig, 63 ;; } -NEXTTOWARD_COMMON_FINISH: +NEXT_COMMON_FINISH: // Force underflow and inexact if denormal result { .mfi nop.m 999 -(p6) fma.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0 - nop.i 999 ;; +(p6) fma.s0 FR_tmp = FR_tmp,FR_tmp,f0 + nop.i 999 +} +{ .mfb + nop.m 999 + fnorm.s0 f8 = f8 // Final normalization to result precision +(p6) br.cond.spnt NEXT_UNDERFLOW ;; } -// Final normalization to result precision and exit { .mfb nop.m 999 - fnorm.s0 f8 = f8 + nop.f 999 br.ret.sptk b0;; } //Special cases -NEXTTOWARD_EXPUP: +NEXT_EXPUP: { .mfb - cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp - fmerge.se f8 = NEXTTOWARD_exp1, NEXTTOWARD_snorm_sig - br.cond.sptk NEXTTOWARD_COMMON_FINISH ;; + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_exp1, FR_snorm_sig + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTTOWARD_EXPDOWN: +NEXT_EXPDOWN: { .mfb - cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp - fmerge.se f8 = NEXTTOWARD_exp1, NEXTTOWARD_lnorm_sig - br.cond.sptk NEXTTOWARD_COMMON_FINISH ;; + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_exp1, FR_lnorm_sig + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTTOWARD_NORM_TO_DENORM: +NEXT_NORM_TO_DENORM: { .mfi nop.m 999 - fmerge.se f8 = NEXTTOWARD_exp1, NEXTTOWARD_lden_sig + fmerge.se f8 = FR_exp1, FR_lden_sig nop.i 999 } // Force underflow and inexact { .mfb nop.m 999 - fma.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0 - br.ret.sptk b0 ;; + fma.s0 FR_tmp = FR_tmp,FR_tmp,f0 + br.cond.sptk NEXT_UNDERFLOW ;; } -NEXTTOWARD_UNDERFLOW_TO_ZERO: +NEXT_UNDERFLOW_TO_ZERO: { .mfb cmp.eq p6,p0 = r0,r0 - fmerge.s f8 = NEXTTOWARD_save_f8,f0 - br.cond.sptk NEXTTOWARD_COMMON_FINISH ;; + fmerge.s f8 = FR_save_f8,f0 + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTTOWARD_INF: +NEXT_INF: // Here if f8 is +- infinity // INF // if f8 is +inf, no matter what y is return largest long double @@ -341,17 +347,17 @@ NEXTTOWARD_INF: // Create largest long double { .mfi nop.m 999 - fmerge.se NEXTTOWARD_lnorm = NEXTTOWARD_lnorm_exp,NEXTTOWARD_lnorm_sig + fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig nop.i 999 ;; } { .mfb nop.m 999 - fmerge.s f8 = f8,NEXTTOWARD_lnorm + fmerge.s f8 = f8,FR_lnorm br.ret.sptk b0 ;; } -NEXTTOWARD_ZERO: +NEXT_ZERO: // Here if f8 is +- zero // ZERO @@ -360,76 +366,72 @@ NEXTTOWARD_ZERO: { .mfi nop.m 999 - fmerge.se NEXTTOWARD_sden = f0,NEXTTOWARD_sden_sig + fmerge.se FR_sden = f0,FR_sden_sig nop.i 999 ;; } // Create small normal to generate underflow flag { .mfi nop.m 999 - fmerge.se NEXTTOWARD_tmp = NEXTTOWARD_snorm_exp, NEXTTOWARD_lnorm_sig + fmerge.se FR_tmp = FR_snorm_exp, FR_lnorm_sig nop.i 999 ;; } // Add correct sign from direction arg { .mfi nop.m 999 - fmerge.s f8 = f9,NEXTTOWARD_sden + fmerge.s f8 = f9,FR_sden nop.i 999 ;; } // Force underflow and inexact flags { .mfb nop.m 999 - fma.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0 - br.ret.sptk b0 ;; + fma.s0 FR_tmp = FR_tmp,FR_tmp,f0 + br.cond.sptk NEXT_UNDERFLOW ;; } -GLOBAL_LIBM_END(nexttowardl) -// Stack operations when calling error support. -// (1) (2) (3) (call) (4) -// sp -> + psp -> + psp -> + sp -> + -// | | | | -// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8 -// | | | | -// | <-GR_Y Y2->| Y2 ->| <- GR_Y | -// | | | | -// | | <- GR_X X1 ->| | -// | | | | -// sp-64 -> + sp -> + sp -> + + -// save ar.pfs save b0 restore gp -// save gp restore ar.pfs - - +NEXT_UNDERFLOW: +// Here if result is a denorm, or input is finite and result is zero +// Call error support to report possible range error +{ .mib + alloc r32=ar.pfs,2,2,4,0 + mov GR_Parameter_TAG = 270 // Error code + br.cond.sptk __libm_error_region // Branch to error call +} +;; -LOCAL_LIBM_ENTRY(__libm_error_region) -NEXTTOWARD_OVERFLOW: -// Here if f8 is finite, but result will be infinite +NEXT_OVERFLOW: +// Here if input is finite, but result will be infinite // Use frcpa to generate infinity of correct sign // Call error support to report possible range error -.prologue - { .mfi alloc r32=ar.pfs,2,2,4,0 - frcpa.s1 f8,p6 = NEXTTOWARD_save_f8, f0 + frcpa.s1 f8,p6 = FR_save_f8, f0 nop.i 999 ;; } -// Create largest long double +// Create largest double { .mfi nop.m 999 - fmerge.se NEXTTOWARD_lnorm = NEXTTOWARD_lnorm_exp,NEXTTOWARD_lnorm_sig + fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig nop.i 999 ;; } // Force overflow and inexact flags to be set -{ .mfi - mov r39 = 198 // Error code - fma.s0 NEXTTOWARD_tmp = NEXTTOWARD_lnorm,NEXTTOWARD_lnorm,f0 - nop.i 999 +{ .mfb + mov GR_Parameter_TAG = 198 // Error code + fma.s0 FR_tmp = FR_lnorm,FR_lnorm,f0 + br.cond.sptk __libm_error_region // Branch to error call } ;; +GLOBAL_LIBM_END(nexttowardl) + + +LOCAL_LIBM_ENTRY(__libm_error_region) +.prologue + // (1) { .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value @@ -456,7 +458,7 @@ NEXTTOWARD_OVERFLOW: .body // (3) { .mib - stfe [GR_Parameter_X] = NEXTTOWARD_save_f8 // STORE Parameter 1 on stack + stfe [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0 } diff --git a/sysdeps/ia64/fpu/s_round.S b/sysdeps/ia64/fpu/s_round.S index 04033b4..ed5ffae 100644 --- a/sysdeps/ia64/fpu/s_round.S +++ b/sysdeps/ia64/fpu/s_round.S @@ -44,6 +44,7 @@ // 05/20/02 Cleaned up namespace and sf0 syntax // 01/20/03 Improved performance and reduced code size // 04/18/03 Eliminate possible WAW dependency warning +// 09/03/03 Improved performance //============================================================== // API @@ -52,14 +53,13 @@ //============================================================== // general input registers: -// r14 - r19 +// r14 - r18 rSignexp = r14 rExp = r15 rExpMask = r16 rBigexp = r17 rExpHalf = r18 -rExpMHalf = r19 // floating-point registers: // f8 - f13 @@ -67,7 +67,7 @@ rExpMHalf = r19 fXtruncInt = f9 fNormX = f10 fHalf = f11 -fMHalf = f12 +fInc = f12 fRem = f13 // predicate registers used: @@ -119,15 +119,15 @@ GLOBAL_LIBM_ENTRY(round) } ;; -{ .mmf +{ .mfi setf.exp fHalf = rExpHalf // Form 0.5 - mov rExpMHalf = 0x2FFFE // Form sign and exponent of -0.5 fclass.m p7,p0 = f8, 0x0b // Test x unorm + nop.i 0 } ;; { .mfb - setf.exp fMHalf = rExpMHalf // Form -0.5 + nop.m 0 fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf (p7) br.cond.spnt ROUND_UNORM // Branch if x unorm } @@ -135,27 +135,31 @@ GLOBAL_LIBM_ENTRY(round) ROUND_COMMON: // Return here from ROUND_UNORM -{ .mfi +{ .mfb nop.m 0 fcmp.lt.s1 p8,p9 = f8, f0 // Test if x < 0 +(p6) br.cond.spnt ROUND_SPECIAL // Exit if x natval, nan, inf +} +;; + +{ .mfi + nop.m 0 + fcvt.xf f8 = fXtruncInt // Pre-Result if 0.5 <= |x| < 2^52 nop.i 0 } -{ .mfb +;; + +{ .mfi and rExp = rSignexp, rExpMask // Get biased exponent -(p6) fma.d.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf -(p6) br.ret.spnt b0 // Exit if x natval, nan, inf + fmerge.s fInc = fNormX, f1 // Form increment if |rem| >= 0.5 + nop.i 0 } ;; -{ .mfi +{ .mmi cmp.lt p6,p0 = rExp, rExpHalf // Is |x| < 0.5? - fcvt.xf f8 = fXtruncInt // Pre-Result if 0.5 <= |x| < 2^52 cmp.ge p7,p0 = rExp, rBigexp // Is |x| >= 2^52? -} -{ .mfi cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5? - nop.f 0 - nop.i 0 } ;; @@ -176,44 +180,52 @@ ROUND_COMMON: // Here if 0.5 <= |x| < 2^52 { .mfi nop.m 0 - fms.s1 fRem = fNormX, f1, f8 // Get remainder = x - trunc(x) +(p9) fms.s1 fRem = fNormX, f1, f8 // Get remainder = x - trunc(x) nop.i 0 } -;; - { .mfi nop.m 0 -(p8) fcmp.le.s1 p8,p0 = fRem, fMHalf +(p8) fms.s1 fRem = f8, f1, fNormX // Get remainder = trunc(x) - x nop.i 0 } +;; + { .mfi nop.m 0 -(p9) fcmp.ge.s1 p9,p0 = fRem, fHalf + fcmp.ge.s1 p9,p0 = fRem, fHalf // Test |rem| >= 0.5 nop.i 0 } ;; // If x < 0 and remainder <= -0.5, then subtract 1 from result // If x > 0 and remainder >= +0.5, then add 1 to result -.pred.rel "mutex",p8,p9 -{ .mfi +{ .mfb nop.m 0 -(p8) fms.d.s0 f8 = f8, f1, f1 - nop.i 0 +(p9) fma.d.s0 f8 = f8, f1, fInc + br.ret.sptk b0 } +;; + + +ROUND_SPECIAL: +// Here if x natval, nan, inf { .mfb nop.m 0 -(p9) fma.d.s0 f8 = f8, f1, f1 + fma.d.s0 f8 = f8, f1, f0 br.ret.sptk b0 } ;; - ROUND_UNORM: // Here if x unorm -{ .mfb +{ .mfi getf.exp rSignexp = fNormX // Get signexp, recompute if unorm fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag + nop.i 0 +} +{ .mfb + nop.m 0 + fcvt.fx.trunc.s1 fXtruncInt = fNormX // Convert to int in significand br.cond.sptk ROUND_COMMON // Return to main path } ;; diff --git a/sysdeps/ia64/fpu/s_roundf.S b/sysdeps/ia64/fpu/s_roundf.S index 1e8dc78..7cec860 100644 --- a/sysdeps/ia64/fpu/s_roundf.S +++ b/sysdeps/ia64/fpu/s_roundf.S @@ -44,6 +44,7 @@ // 05/20/02 Cleaned up namespace and sf0 syntax // 01/20/03 Improved performance and reduced code size // 04/18/03 Eliminate possible WAW dependency warning +// 09/03/03 Improved performance //============================================================== // API @@ -52,14 +53,13 @@ //============================================================== // general input registers: -// r14 - r19 +// r14 - r18 rSignexp = r14 rExp = r15 rExpMask = r16 rBigexp = r17 rExpHalf = r18 -rExpMHalf = r19 // floating-point registers: // f8 - f13 @@ -67,7 +67,7 @@ rExpMHalf = r19 fXtruncInt = f9 fNormX = f10 fHalf = f11 -fMHalf = f12 +fInc = f12 fRem = f13 // predicate registers used: @@ -119,15 +119,15 @@ GLOBAL_LIBM_ENTRY(roundf) } ;; -{ .mmf +{ .mfi setf.exp fHalf = rExpHalf // Form 0.5 - mov rExpMHalf = 0x2FFFE // Form sign and exponent of -0.5 fclass.m p7,p0 = f8, 0x0b // Test x unorm + nop.i 0 } ;; { .mfb - setf.exp fMHalf = rExpMHalf // Form -0.5 + nop.m 0 fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf (p7) br.cond.spnt ROUND_UNORM // Branch if x unorm } @@ -135,27 +135,31 @@ GLOBAL_LIBM_ENTRY(roundf) ROUND_COMMON: // Return here from ROUND_UNORM -{ .mfi +{ .mfb nop.m 0 fcmp.lt.s1 p8,p9 = f8, f0 // Test if x < 0 +(p6) br.cond.spnt ROUND_SPECIAL // Exit if x natval, nan, inf +} +;; + +{ .mfi + nop.m 0 + fcvt.xf f8 = fXtruncInt // Pre-Result if 0.5 <= |x| < 2^23 nop.i 0 } -{ .mfb +;; + +{ .mfi and rExp = rSignexp, rExpMask // Get biased exponent -(p6) fma.s.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf -(p6) br.ret.spnt b0 // Exit if x natval, nan, inf + fmerge.s fInc = fNormX, f1 // Form increment if |rem| >= 0.5 + nop.i 0 } ;; -{ .mfi +{ .mmi cmp.lt p6,p0 = rExp, rExpHalf // Is |x| < 0.5? - fcvt.xf f8 = fXtruncInt // Pre-Result if 0.5 <= |x| < 2^23 cmp.ge p7,p0 = rExp, rBigexp // Is |x| >= 2^23? -} -{ .mfi cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5? - nop.f 0 - nop.i 0 } ;; @@ -176,44 +180,52 @@ ROUND_COMMON: // Here if 0.5 <= |x| < 2^23 { .mfi nop.m 0 - fms.s1 fRem = fNormX, f1, f8 // Get remainder = x - trunc(x) +(p9) fms.s1 fRem = fNormX, f1, f8 // Get remainder = x - trunc(x) nop.i 0 } -;; - { .mfi nop.m 0 -(p8) fcmp.le.s1 p8,p0 = fRem, fMHalf +(p8) fms.s1 fRem = f8, f1, fNormX // Get remainder = trunc(x) - x nop.i 0 } +;; + { .mfi nop.m 0 -(p9) fcmp.ge.s1 p9,p0 = fRem, fHalf + fcmp.ge.s1 p9,p0 = fRem, fHalf // Test |rem| >= 0.5 nop.i 0 } ;; // If x < 0 and remainder <= -0.5, then subtract 1 from result // If x > 0 and remainder >= +0.5, then add 1 to result -.pred.rel "mutex",p8,p9 -{ .mfi +{ .mfb nop.m 0 -(p8) fms.s.s0 f8 = f8, f1, f1 - nop.i 0 +(p9) fma.s.s0 f8 = f8, f1, fInc + br.ret.sptk b0 } +;; + + +ROUND_SPECIAL: +// Here if x natval, nan, inf { .mfb nop.m 0 -(p9) fma.s.s0 f8 = f8, f1, f1 + fma.s.s0 f8 = f8, f1, f0 br.ret.sptk b0 } ;; - ROUND_UNORM: // Here if x unorm -{ .mfb +{ .mfi getf.exp rSignexp = fNormX // Get signexp, recompute if unorm fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag + nop.i 0 +} +{ .mfb + nop.m 0 + fcvt.fx.trunc.s1 fXtruncInt = fNormX // Convert to int in significand br.cond.sptk ROUND_COMMON // Return to main path } ;; diff --git a/sysdeps/ia64/fpu/s_roundl.S b/sysdeps/ia64/fpu/s_roundl.S index 79dff00..da6cbfe 100644 --- a/sysdeps/ia64/fpu/s_roundl.S +++ b/sysdeps/ia64/fpu/s_roundl.S @@ -44,6 +44,7 @@ // 05/20/02 Cleaned up namespace and sf0 syntax // 01/20/03 Improved performance and reduced code size // 04/18/03 Eliminate possible WAW dependency warning +// 09/03/03 Improved performance //============================================================== // API @@ -52,14 +53,13 @@ //============================================================== // general input registers: -// r14 - r19 +// r14 - r18 rSignexp = r14 rExp = r15 rExpMask = r16 rBigexp = r17 rExpHalf = r18 -rExpMHalf = r19 // floating-point registers: // f8 - f13 @@ -67,7 +67,7 @@ rExpMHalf = r19 fXtruncInt = f9 fNormX = f10 fHalf = f11 -fMHalf = f12 +fInc = f12 fRem = f13 // predicate registers used: @@ -119,15 +119,15 @@ GLOBAL_LIBM_ENTRY(roundl) } ;; -{ .mmf +{ .mfi setf.exp fHalf = rExpHalf // Form 0.5 - mov rExpMHalf = 0x2FFFE // Form sign and exponent of -0.5 fclass.m p7,p0 = f8, 0x0b // Test x unorm + nop.i 0 } ;; { .mfb - setf.exp fMHalf = rExpMHalf // Form -0.5 + nop.m 0 fclass.m p6,p0 = f8, 0x1e3 // Test x natval, nan, inf (p7) br.cond.spnt ROUND_UNORM // Branch if x unorm } @@ -135,27 +135,31 @@ GLOBAL_LIBM_ENTRY(roundl) ROUND_COMMON: // Return here from ROUND_UNORM -{ .mfi +{ .mfb nop.m 0 fcmp.lt.s1 p8,p9 = f8, f0 // Test if x < 0 +(p6) br.cond.spnt ROUND_SPECIAL // Exit if x natval, nan, inf +} +;; + +{ .mfi + nop.m 0 + fcvt.xf f8 = fXtruncInt // Pre-Result if 0.5 <= |x| < 2^63 nop.i 0 } -{ .mfb +;; + +{ .mfi and rExp = rSignexp, rExpMask // Get biased exponent -(p6) fma.s0 f8 = f8, f1, f0 // Result if x natval, nan, inf -(p6) br.ret.spnt b0 // Exit if x natval, nan, inf + fmerge.s fInc = fNormX, f1 // Form increment if |rem| >= 0.5 + nop.i 0 } ;; -{ .mfi +{ .mmi cmp.lt p6,p0 = rExp, rExpHalf // Is |x| < 0.5? - fcvt.xf f8 = fXtruncInt // Pre-Result if 0.5 <= |x| < 2^63 cmp.ge p7,p0 = rExp, rBigexp // Is |x| >= 2^63? -} -{ .mfi cmp.lt p10,p0 = rExp, rExpHalf // Is |x| < 0.5? - nop.f 0 - nop.i 0 } ;; @@ -176,44 +180,52 @@ ROUND_COMMON: // Here if 0.5 <= |x| < 2^63 { .mfi nop.m 0 - fms.s1 fRem = fNormX, f1, f8 // Get remainder = x - trunc(x) +(p9) fms.s1 fRem = fNormX, f1, f8 // Get remainder = x - trunc(x) nop.i 0 } -;; - { .mfi nop.m 0 -(p8) fcmp.le.s1 p8,p0 = fRem, fMHalf +(p8) fms.s1 fRem = f8, f1, fNormX // Get remainder = trunc(x) - x nop.i 0 } +;; + { .mfi nop.m 0 -(p9) fcmp.ge.s1 p9,p0 = fRem, fHalf + fcmp.ge.s1 p9,p0 = fRem, fHalf // Test |rem| >= 0.5 nop.i 0 } ;; // If x < 0 and remainder <= -0.5, then subtract 1 from result // If x > 0 and remainder >= +0.5, then add 1 to result -.pred.rel "mutex",p8,p9 -{ .mfi +{ .mfb nop.m 0 -(p8) fms.s0 f8 = f8, f1, f1 - nop.i 0 +(p9) fma.s0 f8 = f8, f1, fInc + br.ret.sptk b0 } +;; + + +ROUND_SPECIAL: +// Here if x natval, nan, inf { .mfb nop.m 0 -(p9) fma.s0 f8 = f8, f1, f1 + fma.s0 f8 = f8, f1, f0 br.ret.sptk b0 } ;; - ROUND_UNORM: // Here if x unorm -{ .mfb +{ .mfi getf.exp rSignexp = fNormX // Get signexp, recompute if unorm fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag + nop.i 0 +} +{ .mfb + nop.m 0 + fcvt.fx.trunc.s1 fXtruncInt = fNormX // Convert to int in significand br.cond.sptk ROUND_COMMON // Return to main path } ;; diff --git a/sysdeps/ia64/fpu/s_scalblnf.c b/sysdeps/ia64/fpu/s_scalblnf.c index 97de090..2fa51ba 100644 --- a/sysdeps/ia64/fpu/s_scalblnf.c +++ b/sysdeps/ia64/fpu/s_scalblnf.c @@ -4,8 +4,7 @@ // Copyright (c) 2000, 2001, Intel Corporation // All rights reserved. // -// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, -// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation. +// Contributed 2000 by the Intel Numerics Group, Intel Corporation // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are diff --git a/sysdeps/ia64/fpu/s_scalbn.c b/sysdeps/ia64/fpu/s_scalbn.c index b0bd44a..1f57141 100644 --- a/sysdeps/ia64/fpu/s_scalbn.c +++ b/sysdeps/ia64/fpu/s_scalbn.c @@ -4,8 +4,7 @@ // Copyright (c) 2000, 2001, Intel Corporation // All rights reserved. // -// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, -// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation. +// Contributed 2000 by the Intel Numerics Group, Intel Corporation // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are diff --git a/sysdeps/ia64/fpu/s_scalbnf.c b/sysdeps/ia64/fpu/s_scalbnf.c index 176c2ed..97c06da 100644 --- a/sysdeps/ia64/fpu/s_scalbnf.c +++ b/sysdeps/ia64/fpu/s_scalbnf.c @@ -4,8 +4,7 @@ // Copyright (c) 2000, 2001, Intel Corporation // All rights reserved. // -// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, -// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation. +// Contributed 2000 by the Intel Numerics Group, Intel Corporation // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are diff --git a/sysdeps/ia64/fpu/s_scalbnl.c b/sysdeps/ia64/fpu/s_scalbnl.c index d19ddd3..d7a81df 100644 --- a/sysdeps/ia64/fpu/s_scalbnl.c +++ b/sysdeps/ia64/fpu/s_scalbnl.c @@ -4,8 +4,7 @@ // Copyright (c) 2000, 2001, Intel Corporation // All rights reserved. // -// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, -// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation. +// Contributed 2000 by the Intel Numerics Group, Intel Corporation // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are diff --git a/sysdeps/ia64/fpu/s_tan.S b/sysdeps/ia64/fpu/s_tan.S index 3000f5e..a2f80c8 100644 --- a/sysdeps/ia64/fpu/s_tan.S +++ b/sysdeps/ia64/fpu/s_tan.S @@ -282,6 +282,7 @@ LOCAL_LIBM_ENTRY(cot) LOCAL_LIBM_END(cot) + GLOBAL_IEEE754_ENTRY(tan) // The initial fnorm will take any unmasked faults and // normalize any single/double unorms @@ -737,6 +738,7 @@ COMMON_PATH: } GLOBAL_IEEE754_END(tan) + LOCAL_LIBM_ENTRY(__libm_callout) TAN_DBX: .prologue diff --git a/sysdeps/ia64/fpu/s_tanf.S b/sysdeps/ia64/fpu/s_tanf.S index 48f8234..98e3f76 100644 --- a/sysdeps/ia64/fpu/s_tanf.S +++ b/sysdeps/ia64/fpu/s_tanf.S @@ -247,6 +247,7 @@ LOCAL_LIBM_ENTRY(cotf) LOCAL_LIBM_END(cotf) + GLOBAL_IEEE754_ENTRY(tanf) { .mlx @@ -549,6 +550,7 @@ Return_From_Huges: GLOBAL_IEEE754_END(tanf) + LOCAL_LIBM_ENTRY(__libm_callout) Huge_Argument: .prologue diff --git a/sysdeps/ia64/fpu/s_tanh.S b/sysdeps/ia64/fpu/s_tanh.S index c858398..5e0c407 100644 --- a/sysdeps/ia64/fpu/s_tanh.S +++ b/sysdeps/ia64/fpu/s_tanh.S @@ -985,3 +985,4 @@ _tanh_spec: GLOBAL_LIBM_END(tanh) + diff --git a/sysdeps/ia64/fpu/s_tanhl.S b/sysdeps/ia64/fpu/s_tanhl.S index ab00994..3435f43 100644 --- a/sysdeps/ia64/fpu/s_tanhl.S +++ b/sysdeps/ia64/fpu/s_tanhl.S @@ -1345,3 +1345,4 @@ GLOBAL_LIBM_END(tanhl) + diff --git a/sysdeps/ia64/fpu/s_tanl.S b/sysdeps/ia64/fpu/s_tanl.S index 345a059..607a271 100644 --- a/sysdeps/ia64/fpu/s_tanl.S +++ b/sysdeps/ia64/fpu/s_tanl.S @@ -1,7 +1,7 @@ .file "tancotl.s" -// Copyright (c) 2000 - 2003, Intel Corporation +// Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -50,6 +50,7 @@ // 02/10/03 Reordered header: .section, .global, .proc, .align; // used data8 for long double table values // 05/15/03 Reformatted data tables +// 10/26/04 Avoided using r14-31 as scratch so not clobbered by dynamic loader // //********************************************************************* // @@ -65,7 +66,7 @@ // f32-f121 // // General Purpose Registers: -// r14-r26,r32-r57 +// r32-r70 // // Predicate Registers: p6-p15 // @@ -1171,20 +1172,6 @@ TWO_TO_NEG65 = f119 fp_tmp = f120 mOne = f121 -GR_sig_inv_pi = r14 -GR_rshf_2to64 = r15 -GR_exp_2tom64 = r16 -GR_rshf = r17 -GR_exp_2_to_63 = r18 -GR_exp_2_to_24 = r19 -GR_signexp_x = r20 -GR_exp_x = r21 -GR_exp_mask = r22 -GR_exp_2tom14 = r23 -GR_exp_m2tom14 = r24 -GR_exp_2tom33 = r25 -GR_exp_m2tom33 = r26 - GR_SAVE_B0 = r33 GR_SAVE_GP = r34 GR_SAVE_PFS = r35 @@ -1204,13 +1191,28 @@ bmask2 = r48 gr_tmp = r49 cot_flag = r50 -GR_SAVE_B0 = r51 -GR_SAVE_PFS = r52 -GR_SAVE_GP = r53 -GR_Parameter_X = r54 -GR_Parameter_Y = r55 -GR_Parameter_RESULT = r56 -GR_Parameter_Tag = r57 +GR_sig_inv_pi = r51 +GR_rshf_2to64 = r52 +GR_exp_2tom64 = r53 +GR_rshf = r54 +GR_exp_2_to_63 = r55 +GR_exp_2_to_24 = r56 +GR_signexp_x = r57 +GR_exp_x = r58 +GR_exp_mask = r59 +GR_exp_2tom14 = r60 +GR_exp_m2tom14 = r61 +GR_exp_2tom33 = r62 +GR_exp_m2tom33 = r63 + +GR_SAVE_B0 = r64 +GR_SAVE_PFS = r65 +GR_SAVE_GP = r66 + +GR_Parameter_X = r67 +GR_Parameter_Y = r68 +GR_Parameter_RESULT = r69 +GR_Parameter_Tag = r70 .section .text @@ -1223,7 +1225,7 @@ __libm_cotl: LOCAL_LIBM_ENTRY(cotl) { .mlx - alloc r32 = ar.pfs, 0,22,4,0 + alloc r32 = ar.pfs, 0,35,4,0 movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi } { .mlx @@ -1246,13 +1248,14 @@ LOCAL_LIBM_ENTRY(cotl) LOCAL_LIBM_END(cotl) + .proc __libm_tanl# __libm_tanl: .endp __libm_tanl# GLOBAL_IEEE754_ENTRY(tanl) { .mlx - alloc r32 = ar.pfs, 0,22,4,0 + alloc r32 = ar.pfs, 0,35,4,0 movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi } { .mlx @@ -3089,6 +3092,7 @@ TANL_UNSUPPORTED: GLOBAL_IEEE754_END(tanl) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue diff --git a/sysdeps/ia64/fpu/w_lgamma.c b/sysdeps/ia64/fpu/w_lgamma.c index fb799df..f16256e 100644 --- a/sysdeps/ia64/fpu/w_lgamma.c +++ b/sysdeps/ia64/fpu/w_lgamma.c @@ -1,5 +1,6 @@ /* file: lgamma.c */ + // Copyright (c) 2002 Intel Corporation // All rights reserved. // @@ -20,7 +21,6 @@ // products derived from this software without specific prior written // permission. -// WARRANTY DISCLAIMER // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT diff --git a/sysdeps/ia64/fpu/w_lgammaf.c b/sysdeps/ia64/fpu/w_lgammaf.c index bda3741..5ac3b82 100644 --- a/sysdeps/ia64/fpu/w_lgammaf.c +++ b/sysdeps/ia64/fpu/w_lgammaf.c @@ -1,5 +1,6 @@ /* file: lgammaf.c */ + // Copyright (c) 2002 Intel Corporation // All rights reserved. // @@ -20,7 +21,6 @@ // products derived from this software without specific prior written // permission. -// WARRANTY DISCLAIMER // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT diff --git a/sysdeps/ia64/fpu/w_lgammal.c b/sysdeps/ia64/fpu/w_lgammal.c index 9f9f356..8ddbb74 100644 --- a/sysdeps/ia64/fpu/w_lgammal.c +++ b/sysdeps/ia64/fpu/w_lgammal.c @@ -1,5 +1,6 @@ /* file: lgammal.c */ + // Copyright (c) 2002 Intel Corporation // All rights reserved. // @@ -20,7 +21,6 @@ // products derived from this software without specific prior written // permission. -// WARRANTY DISCLAIMER // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT diff --git a/sysdeps/ia64/fpu/w_tgamma.S b/sysdeps/ia64/fpu/w_tgamma.S index 7d654d0..e55e4e3 100644 --- a/sysdeps/ia64/fpu/w_tgamma.S +++ b/sysdeps/ia64/fpu/w_tgamma.S @@ -1781,6 +1781,7 @@ tgamma_libm_err: };; GLOBAL_LIBM_END(tgamma) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi diff --git a/sysdeps/ia64/fpu/w_tgammaf.S b/sysdeps/ia64/fpu/w_tgammaf.S index 4363ca2..64421ab 100644 --- a/sysdeps/ia64/fpu/w_tgammaf.S +++ b/sysdeps/ia64/fpu/w_tgammaf.S @@ -45,6 +45,7 @@ // 02/10/03 Reordered header: .section, .global, .proc, .align // 04/04/03 Changed error codes for overflow and negative integers // 04/10/03 Changed code for overflow near zero handling +// 12/16/03 Fixed parameter passing to/from error handling routine // //********************************************************************* // @@ -1274,6 +1275,7 @@ tgammaf_libm_err: };; GLOBAL_LIBM_END(tgammaf) + LOCAL_LIBM_ENTRY(__libm_error_region) .prologue { .mfi @@ -1289,19 +1291,19 @@ LOCAL_LIBM_ENTRY(__libm_error_region) mov GR_SAVE_GP=gp // Save gp };; { .mmi - stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack + stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address .save b0, GR_SAVE_B0 mov GR_SAVE_B0=b0 // Save b0 };; .body { .mib - stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack + stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0 } { .mib - stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack + stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function };; @@ -1311,7 +1313,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region) add GR_Parameter_RESULT = 48,sp };; { .mmi - ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack + ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack .restore sp add sp = 64,sp // Restore stack pointer mov b0 = GR_SAVE_B0 // Restore return address diff --git a/sysdeps/ia64/fpu/w_tgammal.S b/sysdeps/ia64/fpu/w_tgammal.S index 75b1069..d801ba0 100644 --- a/sysdeps/ia64/fpu/w_tgammal.S +++ b/sysdeps/ia64/fpu/w_tgammal.S @@ -4428,6 +4428,7 @@ GLOBAL_LIBM_END(tgammal) + ////////////////// Tgammal error handler /////////////////////////////////////// //------------------------------------------------------------------------------ LOCAL_LIBM_ENTRY(__libm_error_region) |